From dcf74b0293c8e05c7492bd2006cefa24225db8f2 Mon Sep 17 00:00:00 2001 From: Matthew Creal Morgan Date: Mon, 17 Mar 2025 14:07:26 -0700 Subject: [PATCH] Morga471 cluster (#20) * yep * set back to normal * missed tempo * fix branch ref * change branch ref to test provider-resolution * fix min vals * 2 is the lowest * docs and keycloak * use default for eks again * tempo and kiali updates while working on keycloak * missed a comma * almost * no v * cleanup * namespaces * use main * fmt * namespace changes * update internal url ref * fmt * versions * more wip: * keycloak wip * update prom internal url input value * test changes on prom * deleted old cluster platform-eng-eks-test and created new cluster platform-eng-eks-srn * testing more autoscaling stuffs * wip * wip * wip * use my eks * isolate karpenter again for debug * 1.3.0 is not ready * make grafana work again * increment grafana operator chart version * otel added * fix gatekeeper chart version * ordering * test branch * use newer image * update loki memcached * vers * keycloak defaults * put keycloak in keycloak namespace for debug * removed a few folders from workspace * update grafana tg * remove old module from workspace * reset branches to default * missed one * fmt * more fmt * use client id and secret * fix service name regex violation * updates * update from lukes pr * disable gatekeeper * updated --------- Co-authored-by: Srini Nangunuri --- .checkov.yml | 24 ++ .github/platform-tg-infra.code-workspace | 28 +- .gitignore | 3 + configs/node-groups.yaml | 48 ++++ configs/resource-quotas.yml | 36 +++ docs/ARCHITECTURE.md | 88 ++++++ docs/DOCUMENTATION_STANDARDS.md | 56 ++++ docs/INFRASTRUCTURE_STANDARDS.md | 75 +++++ docs/MODULE_DEPENDENCIES.md | 45 +++ docs/MODULE_STANDARDS.md | 69 +++++ docs/OBSERVABILITY_STANDARDS.md | 67 +++++ docs/SECURITY_AUDIT_CHECKLIST.md | 43 +++ docs/SECURITY_BASELINE.md | 76 +++++ docs/TESTING_STANDARDS.md | 107 +++++++ docs/VERSION_CONTROL.md | 52 ++++ docs/templates/MODULE_README.md | 71 +++++ lab/_envcommon/default-versions.hcl | 98 ++++--- .../vpc/platform-eng-eks-mcm/cluster.hcl | 2 +- .../eks-cert-manager/terragrunt.hcl | 4 +- .../eks-config/terragrunt.hcl | 6 +- .../eks-dns/terragrunt.hcl | 4 +- .../eks-gogatekeeper/terragrunt.hcl.off | 80 ++++++ .../eks-grafana/terragrunt.hcl | 55 +++- .../eks-istio/terragrunt.hcl | 4 +- .../eks-k8s-dashboard/terragrunt.hcl | 19 +- .../eks-karpenter/terragrunt.hcl | 8 +- .../eks-keycloak/terragrunt.hcl | 76 +++++ .../eks-kiali/terragrunt.hcl | 126 ++++++++ .../eks-kiali/terragrunt.hcl.disable | 85 ------ .../eks-loki/terragrunt.hcl | 8 +- .../eks-metrics-server/terragrunt.hcl | 2 +- .../eks-otel/terragrunt.hcl | 61 ++++ .../eks-prometheus/terragrunt.hcl | 7 +- .../eks-tempo/terragrunt.hcl | 10 +- .../vpc/platform-eng-eks-srn/cluster.hcl | 28 ++ .../eks-cert-manager/terragrunt.hcl | 57 ++++ .../eks-config/terragrunt.hcl | 54 ++++ .../eks-dns/terragrunt.hcl | 60 ++++ .../eks-grafana/terragrunt.hcl | 63 ++++ .../eks-istio/terragrunt.hcl | 44 +++ .../eks-k8s-dashboard/terragrunt.hcl | 55 ++++ .../eks-karpenter/terragrunt.hcl | 50 ++++ .../eks-keycloak/terragrunt.hcl | 87 ++++++ .../eks-kiali/terragrunt.hcl | 113 ++++++++ .../eks-kiali/terragrunt.hcl.disabled | 0 .../eks-loki/terragrunt.hcl | 56 ++++ .../eks-metrics-server/terragrunt.hcl | 43 +++ .../eks-postgresql/terragrunt.hcl | 76 +++++ .../eks-prometheus/README.md | 0 .../eks-prometheus/terragrunt.hcl | 61 ++++ .../eks-tempo/terragrunt.hcl | 66 +++++ .../platform-eng-eks-srn/eks/terragrunt.hcl | 28 ++ .../vpc/platform-eng-eks-test/cluster.hcl | 20 -- .../eks-cert-manager/terragrunt.hcl | 40 --- .../eks-config/terragrunt.hcl | 42 --- .../eks-dns/terragrunt.hcl | 42 --- .../eks-grafana/terragrunt.hcl | 40 --- .../eks-istio/terragrunt.hcl | 32 --- .../eks-k8s-dashboard/terragrunt.hcl | 36 --- .../eks-karpenter/terragrunt.hcl | 43 --- .../eks-kiali/terragrunt.hcl.disable | 81 ------ .../eks-loki/terragrunt.hcl | 44 --- .../eks-metrics-server/terragrunt.hcl | 33 --- .../eks-prometheus/terragrunt.hcl | 38 --- .../eks-tempo/terragrunt.hcl | 46 --- .../platform-eng-eks-test/eks/terragrunt.hcl | 56 ---- monitoring/grafana-dashboards.json | 44 +++ monitoring/prometheus-rules.yaml | 39 +++ plan.md | 271 ++++++++++++++++++ tests/terraform.tftest.hcl | 40 +++ 70 files changed, 2701 insertions(+), 770 deletions(-) create mode 100644 .checkov.yml create mode 100644 configs/node-groups.yaml create mode 100644 configs/resource-quotas.yml create mode 100644 docs/ARCHITECTURE.md create mode 100644 docs/DOCUMENTATION_STANDARDS.md create mode 100644 docs/INFRASTRUCTURE_STANDARDS.md create mode 100644 docs/MODULE_DEPENDENCIES.md create mode 100644 docs/MODULE_STANDARDS.md create mode 100644 docs/OBSERVABILITY_STANDARDS.md create mode 100644 docs/SECURITY_AUDIT_CHECKLIST.md create mode 100644 docs/SECURITY_BASELINE.md create mode 100644 docs/TESTING_STANDARDS.md create mode 100644 docs/VERSION_CONTROL.md create mode 100644 docs/templates/MODULE_README.md create mode 100644 lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-gogatekeeper/terragrunt.hcl.off create mode 100644 lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-keycloak/terragrunt.hcl create mode 100644 lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-kiali/terragrunt.hcl delete mode 100644 lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-kiali/terragrunt.hcl.disable create mode 100644 lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-otel/terragrunt.hcl create mode 100644 lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/cluster.hcl create mode 100644 lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-cert-manager/terragrunt.hcl create mode 100644 lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-config/terragrunt.hcl create mode 100644 lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-dns/terragrunt.hcl create mode 100644 lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-grafana/terragrunt.hcl create mode 100644 lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-istio/terragrunt.hcl create mode 100644 lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-k8s-dashboard/terragrunt.hcl create mode 100644 lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-karpenter/terragrunt.hcl create mode 100644 lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-keycloak/terragrunt.hcl create mode 100644 lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-kiali/terragrunt.hcl rename lab/development/us-gov-east-1/vpc/{platform-eng-eks-mcm => platform-eng-eks-srn}/eks-kiali/terragrunt.hcl.disabled (100%) create mode 100644 lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-loki/terragrunt.hcl create mode 100644 lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-metrics-server/terragrunt.hcl create mode 100644 lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-postgresql/terragrunt.hcl rename lab/development/us-gov-east-1/vpc/{platform-eng-eks-test => platform-eng-eks-srn}/eks-prometheus/README.md (100%) create mode 100644 lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-prometheus/terragrunt.hcl create mode 100644 lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-tempo/terragrunt.hcl create mode 100644 lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks/terragrunt.hcl delete mode 100644 lab/development/us-gov-east-1/vpc/platform-eng-eks-test/cluster.hcl delete mode 100644 lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-cert-manager/terragrunt.hcl delete mode 100644 lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-config/terragrunt.hcl delete mode 100644 lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-dns/terragrunt.hcl delete mode 100644 lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-grafana/terragrunt.hcl delete mode 100644 lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-istio/terragrunt.hcl delete mode 100644 lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-k8s-dashboard/terragrunt.hcl delete mode 100644 lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-karpenter/terragrunt.hcl delete mode 100644 lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-kiali/terragrunt.hcl.disable delete mode 100644 lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-loki/terragrunt.hcl delete mode 100644 lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-metrics-server/terragrunt.hcl delete mode 100644 lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-prometheus/terragrunt.hcl delete mode 100644 lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-tempo/terragrunt.hcl delete mode 100644 lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks/terragrunt.hcl create mode 100644 monitoring/grafana-dashboards.json create mode 100644 monitoring/prometheus-rules.yaml create mode 100644 plan.md create mode 100644 tests/terraform.tftest.hcl diff --git a/.checkov.yml b/.checkov.yml new file mode 100644 index 00000000..cc000299 --- /dev/null +++ b/.checkov.yml @@ -0,0 +1,24 @@ +branch: master +download-external-modules: true +evaluate-variables: true +external-checks-dir: + - security/custom_checks +framework: + - terraform + - kubernetes +output: + - cli + - json + - junitxml +skip-check: + - CKV_AWS_79 # Instance Metadata Service Version 1 + - CKV_AWS_130 # Ensure VPC subnets are not assigned public IP by default +quiet: true +compact: true +directory: + - . + - modules/* +secrets-scan-file-type: + - tf + - yaml + - json diff --git a/.github/platform-tg-infra.code-workspace b/.github/platform-tg-infra.code-workspace index 5047434c..d243b5d4 100644 --- a/.github/platform-tg-infra.code-workspace +++ b/.github/platform-tg-infra.code-workspace @@ -2,20 +2,12 @@ "folders": [ { "name": "platform-tg-infra", - "path": "../" + "path": ".." }, { "name": "tfmod-cert-mgr", "path": "../../tfmod-cert-mgr" }, - { - "name": "tfmod-config-job", - "path": "../../tfmod-config-job" - }, - { - "name": "tfmod-custom-iam-role-for-service-account-eks", - "path": "../../tfmod-custom-iam-role-for-service-account-eks" - }, { "name": "tfmod-eks", "path": "../../tfmod-eks" @@ -28,6 +20,10 @@ "name": "tfmod-eks-dns", "path": "../../tfmod-eks-dns" }, + { + "name": "tfmod-gogatekeeper", + "path": "../../tfmod-gogatekeeper" + }, { "name": "tfmod-grafana", "path": "../../tfmod-grafana" @@ -48,6 +44,10 @@ "name": "tfmod-karpenter", "path": "../../tfmod-karpenter" }, + { + "name": "tfmod-keycloak", + "path": "../../tfmod-keycloak" + }, { "name": "tfmod-kiali", "path": "../../tfmod-kiali" @@ -60,6 +60,10 @@ "name": "tfmod-metrics-server", "path": "../../tfmod-metrics-server" }, + { + "name": "tfmod-open-telemetry", + "path": "../../tfmod-open-telemetry" + }, { "name": "tfmod-prometheus", "path": "../../tfmod-prometheus" @@ -69,13 +73,15 @@ "path": "../../tfmod-tempo" }, { + "name": "terraform-aws-eks", "path": "../../terraform-aws-eks" }, { - "path": "../../karpenter-provider-aws" + "name": "terragrunt", + "path": "../../terragrunt" }, { - "path": "../../terragrunt" + "path": "../../tfmod-config-job" } ] } diff --git a/.gitignore b/.gitignore index 4b072ca2..e99855b8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,8 @@ # Local .terraform directories **/.terraform/* +**/apply.log +**/plan.log +**/destroy.log # terraform lock file. **/.terraform.lock.hcl diff --git a/configs/node-groups.yaml b/configs/node-groups.yaml new file mode 100644 index 00000000..11e09cad --- /dev/null +++ b/configs/node-groups.yaml @@ -0,0 +1,48 @@ +nodeGroups: + - name: general-purpose + instanceTypes: + - m6i.xlarge + - m6a.xlarge + - m5.xlarge + minSize: 2 + maxSize: 10 + desiredSize: 2 + labels: + node-type: general + taints: [] + updateConfig: + maxUnavailable: 1 + + - name: compute-optimized + instanceTypes: + - c6i.2xlarge + - c6a.2xlarge + - c5.2xlarge + minSize: 1 + maxSize: 20 + desiredSize: 2 + labels: + node-type: compute + taints: + - key: workload + value: batch + effect: NoSchedule + updateConfig: + maxUnavailable: 2 + + - name: memory-optimized + instanceTypes: + - r6i.2xlarge + - r6a.2xlarge + - r5.2xlarge + minSize: 1 + maxSize: 10 + desiredSize: 2 + labels: + node-type: memory + taints: + - key: workload + value: memory-intensive + effect: NoSchedule + updateConfig: + maxUnavailable: 1 diff --git a/configs/resource-quotas.yml b/configs/resource-quotas.yml new file mode 100644 index 00000000..655595d0 --- /dev/null +++ b/configs/resource-quotas.yml @@ -0,0 +1,36 @@ +apiVersion: v1 +kind: ResourceQuota +metadata: + name: default-quota +spec: + hard: + requests.cpu: "20" + requests.memory: 40Gi + limits.cpu: "40" + limits.memory: 80Gi + pods: "100" + services: "50" + secrets: "100" + configmaps: "100" + persistentvolumeclaims: "50" + +--- +apiVersion: v1 +kind: LimitRange +metadata: + name: default-limits +spec: + limits: + - type: Container + default: + cpu: 500m + memory: 512Mi + defaultRequest: + cpu: 100m + memory: 256Mi + max: + cpu: "4" + memory: 8Gi + min: + cpu: 50m + memory: 64Mi diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md new file mode 100644 index 00000000..8ea6c671 --- /dev/null +++ b/docs/ARCHITECTURE.md @@ -0,0 +1,88 @@ +# Platform Infrastructure Architecture + +## Complete Platform Architecture + +```mermaid +graph TD + %% Core Network Infrastructure + VPC[VPC Module] --> DNS[DNS Module] + VPC --> SUBNETS[Subnet Configuration] + SUBNETS --> PRIVATE[Private Subnets] + SUBNETS --> PUBLIC[Public Subnets] + + %% EKS Cluster and Core Components + VPC --> EKS[EKS Cluster] + EKS --> IAM[IAM Roles Module] + EKS --> EKS_CONFIG[EKS Configuration] + EKS --> KARPENTER[Karpenter] + + %% Security and Access Management + EKS --> CERT_MGR[Cert Manager] + EKS --> GATEKEEPER[GoGatekeeper] + + %% Service Mesh + EKS_CONFIG --> ISTIO[Istio Service Mesh] + ISTIO --> KIALI[Kiali Dashboard] + ISTIO --> INGRESS[Service Ingress] + + %% Monitoring and Observability + EKS --> MONITORING[Monitoring Stack] + MONITORING --> PROMETHEUS[Prometheus] + MONITORING --> GRAFANA[Grafana] + MONITORING --> LOKI[Loki Log Aggregation] + MONITORING --> TEMPO[Tempo Tracing] + + %% Additional Services + EKS --> DASHBOARD[Kubernetes Dashboard] + EKS --> METRICS[Metrics Server] + EKS --> KEYCLOAK[Keycloak SSO] + + %% Infrastructure Management + TERRAGRUNT[Terragrunt] --> VPC + TERRAGRUNT --> EKS + + %% Database Layer + VPC --> RDS[RDS Database] + + %% Styling + classDef core fill:#f9f,stroke:#333,stroke-width:2px + classDef security fill:#bbf,stroke:#333,stroke-width:2px + classDef monitoring fill:#bfb,stroke:#333,stroke-width:2px + + class VPC,EKS,EKS_CONFIG core + class CERT_MGR,GATEKEEPER,IAM security + class PROMETHEUS,GRAFANA,LOKI,TEMPO monitoring +``` + +## Component Descriptions + +### Core Infrastructure +- **VPC Module**: Network foundation with public/private subnets +- **EKS Cluster**: Managed Kubernetes service +- **Karpenter**: Autoscaling node management +- **DNS Module**: Route53 DNS management + +### Security Layer +- **Cert Manager**: Certificate lifecycle management +- **GoGatekeeper**: Policy enforcement +- **IAM Roles**: AWS IAM integration + +### Service Mesh +- **Istio**: Service mesh implementation +- **Kiali**: Service mesh visualization +- **Service Ingress**: External traffic management + +### Monitoring Stack +- **Prometheus**: Metrics collection +- **Grafana**: Metrics visualization +- **Loki**: Log aggregation +- **Tempo**: Distributed tracing + +### Additional Services +- **Kubernetes Dashboard**: Cluster management UI +- **Metrics Server**: Resource metrics +- **Keycloak**: Identity management + +### Infrastructure Management +- **Terragrunt**: Infrastructure deployment orchestration +- **RDS**: Managed database services diff --git a/docs/DOCUMENTATION_STANDARDS.md b/docs/DOCUMENTATION_STANDARDS.md new file mode 100644 index 00000000..b00374bc --- /dev/null +++ b/docs/DOCUMENTATION_STANDARDS.md @@ -0,0 +1,56 @@ +# Documentation Standards Guide + +## README Structure +Each module must include a README.md with the following sections: + +1. Overview + - Purpose + - Key features + - Architecture diagram + +2. Prerequisites + - Required tooling + - Required permissions + - Dependencies + +3. Usage + - Basic example + - Advanced examples + - Input variables table + - Output variables table + +4. Architecture + - Component diagram + - Network flow + - Security considerations + +5. Operations + - Deployment guide + - Monitoring + - Troubleshooting + - Maintenance + +## Changelog Format +Use Commitizen convention: + +``` +feat: New feature +fix: Bug fix +docs: Documentation changes +style: Formatting changes +refactor: Code restructure without behavior change +test: Test updates +chore: Maintenance tasks +``` + +## Diagrams +- Use PlantUML for architecture diagrams +- Include source files in `docs/diagrams` +- Export PNG/SVG to `docs/images` +- Keep diagrams up to date with code changes + +## Usage Examples +- Provide basic and advanced examples +- Include realistic variable values +- Document required permissions +- Include expected outputs diff --git a/docs/INFRASTRUCTURE_STANDARDS.md b/docs/INFRASTRUCTURE_STANDARDS.md new file mode 100644 index 00000000..bdcdda6c --- /dev/null +++ b/docs/INFRASTRUCTURE_STANDARDS.md @@ -0,0 +1,75 @@ +# Infrastructure Standards + +## Node Group Configuration + +### Instance Types +```hcl +locals { + instance_types = { + general_purpose = ["m6i.xlarge", "m6a.xlarge", "m5.xlarge"] + compute_optimized = ["c6i.2xlarge", "c6a.2xlarge", "c5.2xlarge"] + memory_optimized = ["r6i.2xlarge", "r6a.2xlarge", "r5.2xlarge"] + } +} +``` + +### Node Labels +```yaml +labels: + node-type: [general|compute|memory] + environment: [dev|stage|prod] + workload-type: [service|batch|system] +``` + +## Auto-scaling Configuration + +### Cluster Autoscaler +```yaml +cluster-autoscaler: + scaleDownUnneededTime: 10m + scaleDownDelayAfterAdd: 10m + maxNodeProvisionTime: 15m + maxGracefulTermination: 10m +``` + +### Karpenter Settings +```yaml +provisioner: + requirements: + - key: karpenter.sh/capacity-type + operator: In + values: ["spot", "on-demand"] + limits: + resources: + cpu: 1000 + memory: 1000Gi +``` + +## Storage Classes + +### Standard Classes +```yaml +storage-classes: + standard: + type: gp3 + encrypted: true + reclaimPolicy: Delete + premium: + type: io2 + iops: 5000 + encrypted: true + reclaimPolicy: Retain +``` + +## Resource Quotas + +### Default Quotas +```yaml +quotas: + default: + requests.cpu: "20" + requests.memory: 40Gi + limits.cpu: "40" + limits.memory: 80Gi + pods: "100" +``` diff --git a/docs/MODULE_DEPENDENCIES.md b/docs/MODULE_DEPENDENCIES.md new file mode 100644 index 00000000..34372650 --- /dev/null +++ b/docs/MODULE_DEPENDENCIES.md @@ -0,0 +1,45 @@ +# Module Dependencies + +## Core Infrastructure Dependencies + +```mermaid +graph TD + VPC[VPC Module] --> EKS[EKS Module] + EKS --> EKS_CONFIG[EKS Config Module] + EKS --> KARPENTER[Karpenter Module] + EKS_CONFIG --> ISTIO[Istio Module] + ISTIO --> INGRESS[Service Ingress Module] + EKS --> MONITORING[Monitoring Stack] + MONITORING --> PROMETHEUS[Prometheus Module] + MONITORING --> GRAFANA[Grafana Module] +``` + +## Module Initialization Order + +1. Network Infrastructure + - VPC Module + - DNS Module + +2. Cluster Infrastructure + - EKS Module + - IAM Roles Module + - EKS Configuration + +3. Cluster Add-ons + - Metrics Server + - Cert Manager + - Karpenter + +4. Observability Stack + - Prometheus + - Grafana + - Loki + - Tempo + +## Version Compatibility Matrix + +| Module | Version | Dependencies | Breaking Changes | +|--------|---------|--------------|------------------| +| EKS | v1.0.0 | AWS Provider >= 4.0 | None | +| Karpenter | v0.5.0 | EKS >= 1.0.0 | Node group naming | +| Istio | v1.2.0 | EKS >= 1.0.0 | Service mesh config | diff --git a/docs/MODULE_STANDARDS.md b/docs/MODULE_STANDARDS.md new file mode 100644 index 00000000..88699ced --- /dev/null +++ b/docs/MODULE_STANDARDS.md @@ -0,0 +1,69 @@ +# Module Standards + +## Directory Structure +``` +module/ +├── README.md +├── main.tf +├── variables.tf +├── outputs.tf +├── versions.tf +├── examples/ +│ ├── basic/ +│ └── complete/ +└── tests/ + ├── defaults/ + └── complete/ +``` + +## Naming Conventions + +### Resource Naming +```hcl +resource "aws_iam_role" "example" { + name = format("%s-%s-%s", var.prefix, var.environment, var.name) + # ... +} +``` + +### Variable Structure +```hcl +variable "cluster_config" { + type = object({ + name = string + version = string + environment = string + vpc_id = string + }) + description = "EKS cluster configuration" +} +``` + +## Version Constraints + +### Provider Versions +```hcl +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 4.0" + } + kubernetes = { + source = "hashicorp/kubernetes" + version = "~> 2.0" + } + } + required_version = ">= 1.0" +} +``` + +## Documentation Requirements + +### README Structure +1. Overview +2. Usage +3. Inputs/Outputs +4. Examples +5. Requirements +6. Dependencies diff --git a/docs/OBSERVABILITY_STANDARDS.md b/docs/OBSERVABILITY_STANDARDS.md new file mode 100644 index 00000000..a6d95bbb --- /dev/null +++ b/docs/OBSERVABILITY_STANDARDS.md @@ -0,0 +1,67 @@ +# Observability Standards + +## Metrics Collection + +### Golden Signals +- Latency +- Traffic +- Errors +- Saturation + +### Standard Labels +```yaml +labels: + environment: [dev|stage|prod] + service: + team: + cost_center: +``` + +### SLO Definitions +```yaml +slos: + availability: + target: 99.9% + window: 30d + latency: + target: 95% + threshold: 500ms + window: 30d +``` + +## Logging Standards + +### Log Format +```json +{ + "timestamp": "ISO8601", + "level": "INFO|WARN|ERROR", + "service": "service_name", + "trace_id": "uuid", + "message": "log message", + "metadata": {} +} +``` + +### Retention Policy +- Hot storage: 7 days +- Warm storage: 30 days +- Cold storage: 365 days + +## Alerting Standards + +### Alert Severity Levels +- P1: Critical - Immediate action required +- P2: High - Action required within 1 hour +- P3: Medium - Action required within 24 hours +- P4: Low - Action required within 1 week + +### Alert Format +```yaml +alert: + name: AlertName + severity: P1|P2|P3|P4 + description: "Clear description of the alert" + runbook_url: "Link to runbook" + notification_channels: ["slack", "email"] +``` diff --git a/docs/SECURITY_AUDIT_CHECKLIST.md b/docs/SECURITY_AUDIT_CHECKLIST.md new file mode 100644 index 00000000..f0b1bc09 --- /dev/null +++ b/docs/SECURITY_AUDIT_CHECKLIST.md @@ -0,0 +1,43 @@ +# EKS Security Audit Checklist + +## Cluster Configuration +- [ ] EKS Control Plane Logging enabled +- [ ] Kubernetes API server endpoint private +- [ ] Secrets encryption enabled +- [ ] Latest EKS version deployed +- [ ] IRSA (IAM Roles for Service Accounts) enabled + +## Network Security +- [ ] Security groups follow least privilege +- [ ] Network policies implemented +- [ ] All ports documented and justified +- [ ] No public endpoints exposed +- [ ] VPC flow logs enabled + +## Authentication & Authorization +- [ ] IAM policies follow least privilege +- [ ] RBAC policies implemented +- [ ] Service account tokens auto-rotated +- [ ] AWS IAM authenticator configured +- [ ] Regular access review process + +## Data Protection +- [ ] EBS encryption enabled +- [ ] Secrets managed by AWS Secrets Manager +- [ ] ETCd encryption enabled +- [ ] S3 bucket encryption enabled +- [ ] Regular key rotation configured + +## Compliance +- [ ] FIPS endpoints enabled +- [ ] Compliance tags implemented +- [ ] Regular security scans configured +- [ ] Audit logging enabled +- [ ] Compliance reports automated + +## Monitoring & Alerts +- [ ] Security event logging enabled +- [ ] Alert thresholds configured +- [ ] Incident response plan documented +- [ ] Regular security testing scheduled +- [ ] Compliance monitoring automated diff --git a/docs/SECURITY_BASELINE.md b/docs/SECURITY_BASELINE.md new file mode 100644 index 00000000..ffd32a28 --- /dev/null +++ b/docs/SECURITY_BASELINE.md @@ -0,0 +1,76 @@ +# EKS Security Baseline + +## Security Group Configuration + +### Node Group Security +```hcl +# Example security group configuration +resource "aws_security_group" "node_group" { + name_prefix = "eks-node-group" + vpc_id = var.vpc_id + + ingress { + from_port = 443 + to_port = 443 + protocol = "tcp" + security_groups = [var.cluster_security_group_id] + } + + egress { + from_port = 0 + to_port = 0 + protocol = "-1" + cidr_blocks = ["0.0.0.0/0"] + } +} +``` + +## Encryption Standards + +### Data at Rest +- EBS Volumes: AWS KMS encryption required +- Secrets: Envelope encryption with automatic key rotation +- ETCd: AWS KMS encryption enabled + +### Data in Transit +- TLS 1.2+ required for all API communications +- mTLS required for service-to-service communication +- Certificate rotation every 90 days + +## Network Policies + +### Default Deny Policy +```yaml +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: default-deny +spec: + podSelector: {} + policyTypes: + - Ingress + - Egress +``` + +## Pod Security Standards + +### Baseline Pod Security +```yaml +apiVersion: pod-security.admission.config.k8s.io/v1 +kind: PodSecurityConfiguration +defaults: + enforce: "baseline" + enforce-version: "latest" + audit: "restricted" + audit-version: "latest" + warn: "restricted" + warn-version: "latest" +``` + +## Compliance Requirements + +### GovCloud Specific +- FIPS 140-2 endpoints enabled +- NIST 800-53 controls implemented +- Regular security assessments +- Continuous monitoring enabled diff --git a/docs/TESTING_STANDARDS.md b/docs/TESTING_STANDARDS.md new file mode 100644 index 00000000..c731eaf3 --- /dev/null +++ b/docs/TESTING_STANDARDS.md @@ -0,0 +1,107 @@ +# Testing Standards + +## Validation Testing + +### Pre-commit Hooks +```yaml +repos: +- repo: https://github.com/antonbabenko/pre-commit-terraform + rev: v1.64.0 + hooks: + - id: terraform_fmt + - id: terraform_docs + - id: terraform_tflint + - id: terraform_validate +``` + +### Static Analysis +```hcl +provider "aws" { + region = var.region + + default_tags { + tags = { + Environment = var.environment + Terraform = "true" + Project = var.project + } + } +} + +# Required variable validation +variable "environment" { + type = string + validation { + condition = contains(["dev", "stage", "prod"], var.environment) + error_message = "Environment must be dev, stage, or prod." + } +} +``` + +## Integration Testing + +### Test Structure +``` +tests/ +├── integration/ +│ ├── eks_cluster/ +│ │ ├── test_cluster.tf +│ │ └── variables.tf +│ └── monitoring/ +│ ├── test_prometheus.tf +│ └── variables.tf +└── e2e/ + └── complete_setup/ + ├── main.tf + └── outputs.tf +``` + +### Example Test Case +```hcl +module "test_eks" { + source = "../../" + + cluster_name = "test-cluster" + cluster_version = "1.24" + + vpc_id = module.vpc.vpc_id + subnet_ids = module.vpc.private_subnets + + enable_logging = true +} + +output "test_cluster_status" { + value = module.test_eks.cluster_status +} +``` + +## Security Testing + +### Checkov Configuration +```yaml +checkov: + skip-check: + - CKV_AWS_79 # Ensure Instance Metadata Service Version 1 is not enabled + external-checks-dir: + - security/custom_checks +``` + +### Custom Security Checks +```python +from checkov.common.models.enums import CheckResult, CheckCategories +from checkov.terraform.checks.resource.base_resource_check import BaseResourceCheck + +class EnsureEncryption(BaseResourceCheck): + def __init__(self): + name = "Ensure encryption is enabled" + id = "CKV_CUSTOM_1" + supported_resources = ['aws_ebs_volume'] + categories = [CheckCategories.ENCRYPTION] + super().__init__(name=name, id=id, categories=categories, supported_resources=supported_resources) + + def scan_resource_conf(self, conf): + if 'encrypted' in conf.keys(): + if conf['encrypted'][0]: + return CheckResult.PASSED + return CheckResult.FAILED +``` diff --git a/docs/VERSION_CONTROL.md b/docs/VERSION_CONTROL.md new file mode 100644 index 00000000..bc433f6a --- /dev/null +++ b/docs/VERSION_CONTROL.md @@ -0,0 +1,52 @@ +# Version Control Standards + +## Semantic Versioning + +### Version Format +- MAJOR.MINOR.PATCH +- Example: 1.2.3 + +### Version Rules +1. MAJOR version - Incompatible API changes +2. MINOR version - Backwards-compatible features +3. PATCH version - Bug fixes + +## Release Process + +### Release Branches +``` +main +├── release/1.0.x +├── release/1.1.x +└── release/2.0.x +``` + +### Version Tags +```bash +# Release tags +v1.0.0 +v1.0.1 +v1.1.0 +v2.0.0 +``` + +## Breaking Changes + +### Documentation Format +```markdown +# Breaking Changes + +## Version 2.0.0 +- Changed: Resource naming convention +- Removed: Deprecated variables +- Required: AWS Provider >= 4.0 +``` + +## Upgrade Guidelines + +### Module Updates +1. Review breaking changes +2. Update dependencies +3. Test in non-production +4. Update documentation +5. Create migration guide diff --git a/docs/templates/MODULE_README.md b/docs/templates/MODULE_README.md new file mode 100644 index 00000000..99123315 --- /dev/null +++ b/docs/templates/MODULE_README.md @@ -0,0 +1,71 @@ +# Module Name + +## Overview +Brief description of the module's purpose and functionality. + +## Prerequisites +* Required tools and versions +* Required permissions +* Dependencies + +## Usage + +### Basic Example +```hcl +module "example" { + source = "path/to/module" + + // Required variables + environment = "production" + region = "us-west-2" +} +``` + +### Advanced Example +```hcl +module "example" { + source = "path/to/module" + + // Detailed configuration + environment = "production" + region = "us-west-2" + high_availability = true + backup_retention = 30 +} +``` + +## Architecture +[Insert architecture diagram] + +### Components +* Component 1 - Description +* Component 2 - Description + +### Network Flow +[Insert network flow diagram] + +## Operations + +### Deployment +Step-by-step deployment instructions + +### Monitoring +Key metrics and monitoring guidelines + +### Troubleshooting +Common issues and solutions + +## Input Variables +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:--------:| +| var1 | Description | type | default | yes/no | + +## Outputs +| Name | Description | +|------|-------------| +| out1 | Description | + +## Security Considerations +* Security group configurations +* IAM permissions +* Encryption settings diff --git a/lab/_envcommon/default-versions.hcl b/lab/_envcommon/default-versions.hcl index c2e4f946..478dc672 100644 --- a/lab/_envcommon/default-versions.hcl +++ b/lab/_envcommon/default-versions.hcl @@ -8,7 +8,7 @@ locals { custom_service_eks_account = "${local.release_version}" eks_module_version = "20.33.1" istio_ingress_version = "${local.release_version}" - release_version = "0.1.1" # "main" # change to main when testing updated modules + release_version = "main" # "main" # change to main when testing updated modules ##################### # TF Providers @@ -24,15 +24,19 @@ locals { ##################### # Namespaces Config ##################### + operator_namespace = "aoperator" + telemetry_namespace = "atelemetry" namespaces = { cert-manager = "kube-system" - karpenter = "kube-system" + karpenter = "karpenter" metrics-server = "kube-system" + postgresql = "kube-system" + keycloak = "keycloak" + gogatekeeper = "kube-system" istio = "istio-system" kiali = "istio-system" grafana = local.telemetry_namespace k8s-dashboard = local.telemetry_namespace - kiali = local.telemetry_namespace loki = local.telemetry_namespace otel = local.telemetry_namespace prometheus = local.telemetry_namespace @@ -42,16 +46,6 @@ locals { ##################### # EKS Config ##################### - operator_namespace = "operator" - telemetry_namespace = "telemetry" - # kubectl_image_tag = "1.30.4" - - ################ - # k8s-dashboard - ################ - dashboard_hostname = "k8s-dashboard" - # k8s_dashboard_metrics_scraper = "1.0.8" - k8s_dashboard_version = "6.0.6" ################ # Cert-Manager @@ -61,30 +55,53 @@ locals { cert_manager_controller_tag = "v${local.cert_manager_version}" cert_manager_helm_chart = "${local.cert_manager_version}" cert_manager_startupapicheck_tag = "v${local.cert_manager_version}" - cert_manager_version = "1.16.3" + cert_manager_version = "1.17.1" cert_manager_webhook_tag = "v${local.cert_manager_version}" + ################ + # GoGatekeeper + ################ + gogatekeeper_tag = "3.2.1" + gogatekeeper_chart_version = "0.1.53" + + ################ + # Grafana + ################ + grafana_hostname = "grafana" + grafana_operator_chart_version = "4.9.8" + grafana_operator_tag = "5.16.0" + grafana_tag = "11.5.2" + os_shell_image_tag = "12" + ################ # Istio ################ istio_namespace = "istio-system" - istio_version = "1.24.2" + istio_version = "1.25.0" ################ - # Grafana + # k8s-dashboard ################ - download_dashboards_image_tag = "7.85.0" - grafana_chart_version = "8.8.5" - grafana_hostname = "grafana" - grafana_namespace = "grafana" - grafana_tag = "11.4.0" - init_chown_data_image_tag = "1.31.1" + dashboard_hostname = "dashboard" + k8s_dashboard_metrics_scraper = "1.0.8" + k8s_dashboard_version = "6.0.6" ################ # Karpenter ################ - karpenter_helm_chart = "1.1.1" - karpenter_tag = "1.1.1" + karpenter_helm_chart = "1.3.1" + karpenter_tag = "1.3.1" + + ################ + # Keycloak + ################ + keycloak_chart_version = "24.4.11" + keycloak_tag = "26.1.3" + keycloak_hostname = "keycloak" + keycloak_database = "keycloak" + keycloak_username = "keycloak" + keycloak_password = "this is my very secure and totally random password horse battery staple now" + postgresql_tag = "17.4.0-debian-12-r2" ################ # Kiali @@ -95,37 +112,34 @@ locals { ################ # Loki ################ - loki_chart_version = "6.25.0" - loki_tag = "3.3.2" - canary_tag = "3.0.0" + loki_chart_version = "6.27.0" + loki_tag = "3.4.2" enterprise_logs_provisioner_tag = "v1.7.0" - gateway_tag = "1.25.2-alpine" - memcached_tag = "1.6.23-alpine" - exporter_tag = "v0.14.4" + gateway_tag = "1.27-alpine" + memcached_tag = "1.6.37" + exporter_tag = "v0.15.0" sidecar_tag = "1.27.4" ################ # Metrics Server ################ - metrics_server_helm_chart = "3.12.1" - metrics_server_tag = "v0.7.1" + metrics_server_helm_chart = "3.12.2" + metrics_server_tag = "0.7.2" ################ # Prometheus ################ - prometheus_chart_version = "25.26.0" - prometheus_namespace = "prometheus" - prometheus_server_tag = "v2.54.0" + prometheus_chart_version = "27.5.1" + prometheus_server_tag = "v3.2.1" prometheus_config_reloader_tag = "v0.75.2" - alertmanager_tag = "v0.27.0" - kube_state_metrics_tag = "v2.13.0" - node_exporter_tag = "v1.8.2" - pushgateway_tag = "v1.9.0" + alertmanager_tag = "v0.28.0" + kube_state_metrics_tag = "v2.15.0" + node_exporter_tag = "v1.9.0" + pushgateway_tag = "v1.11.0" ################ # Tempo ################ - tempo_chart_version = "1.18.1" - tempo_namespace = "tempo" - tempo_tag = "2.7.0" + tempo_chart_version = "1.18.2" + tempo_tag = "2.7.1" } diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/cluster.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/cluster.hcl index 29eb18d8..e52f9d23 100644 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/cluster.hcl +++ b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/cluster.hcl @@ -6,7 +6,7 @@ locals { eks_instance_disk_size = 100 eks_ng_desired_size = 2 eks_ng_max_size = 10 - eks_ng_min_size = 0 + eks_ng_min_size = 2 enable_cluster_creator_admin_permissions = true tags = { "slim:schedule" = "8:00-17:00" diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-cert-manager/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-cert-manager/terragrunt.hcl index ea7cc82f..5e03cd4a 100644 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-cert-manager/terragrunt.hcl +++ b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-cert-manager/terragrunt.hcl @@ -17,7 +17,8 @@ dependencies { paths = [ "../eks", "../eks-config", - "../eks-karpenter" + "../eks-karpenter", + "../eks-metrics-server", ] } @@ -47,6 +48,7 @@ inputs = { # Cert Manager Configuration cert_manager_helm_chart = include.root.inputs.cert_manager_helm_chart cluster_issuer_name = include.root.inputs.cluster_issuer_name + namespace = include.root.inputs.namespaces["cert-manager"] # Version Tags cert_manager_cainjector_tag = include.root.inputs.cert_manager_cainjector_tag diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-config/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-config/terragrunt.hcl index 0dfc1d31..4a6a659f 100644 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-config/terragrunt.hcl +++ b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-config/terragrunt.hcl @@ -7,7 +7,8 @@ include "root" { dependencies { paths = [ "../eks", - "../eks-karpenter" + "../eks-karpenter", + "../eks-metrics-server", ] } @@ -49,5 +50,6 @@ inputs = { security_group_all_worker_mgmt_id = dependency.eks.outputs.security_group_all_worker_mgmt_id subnets = dependency.eks.outputs.subnets vpc_id = dependency.eks.outputs.vpc_id - + operators_ns = include.root.inputs.operator_namespace + telemetry_ns = include.root.inputs.telemetry_namespace } diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-dns/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-dns/terragrunt.hcl index 2bf9b72f..6ab98584 100644 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-dns/terragrunt.hcl +++ b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-dns/terragrunt.hcl @@ -34,9 +34,9 @@ dependency "eks-istio" { dependencies { paths = [ - "../eks-config", + "../eks", "../eks-istio", - "../eks-karpenter" + "../eks-metrics-server", ] } diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-gogatekeeper/terragrunt.hcl.off b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-gogatekeeper/terragrunt.hcl.off new file mode 100644 index 00000000..119537e6 --- /dev/null +++ b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-gogatekeeper/terragrunt.hcl.off @@ -0,0 +1,80 @@ +include "root" { + path = find_in_parent_folders("root.hcl") + merge_strategy = "deep" + expose = true +} + +terraform { + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-gogatekeeper.git?ref=${include.root.inputs.release_version}" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20s"] + } +} + +dependency "eks" { + config_path = "../eks" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + cluster_name = "mock-cluster" + oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" + } +} + +dependency "eks_dns" { + config_path = "../eks-dns" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + cluster_domain = "mock.example.com" + } +} + +dependency "eks_grafana" { + config_path = "../eks-grafana" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + public_endpoint = "mock.grafaba.example.com" + } +} + +dependency "eks_keycloak" { + config_path = "../eks-keycloak" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + public_endpoint = "mock.keycloak.example.com" + discovery_url = "mock.keycloak.example.com/auth" + client_id = "mock-client-id" + client_secret = "mock-client-secret" + } +} + +dependencies { + paths = [ + "../eks", + "../eks-dns", + "../eks-grafana", + "../eks-keycloak", + "../eks-prometheus", + ] +} + +inputs = { + # Base Cluster Config + cluster_domain = dependency.eks_dns.outputs.cluster_domain + namespace = include.root.inputs.namespaces["gogatekeeper"] + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + + # Gatekeeper Config + gogatekeeper_tag = include.root.inputs.gogatekeeper_tag + gogatekeeper_chart_version = include.root.inputs.gogatekeeper_chart_version + keycloak_discovery_url = dependency.eks_keycloak.outputs.discovery_url + + # Service Behind Gatekeeper Config + service_name = "test-gc" + upstream_url = dependency.eks_grafana.outputs.public_endpoint + redirection_url = dependency.eks_grafana.outputs.public_endpoint + client_id = dependency.eks_keycloak.outputs.client_id + client_secret = dependency.eks_keycloak.outputs.client_secret + keycloak_public_url = dependency.eks_keycloak.outputs.public_endpoint +} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-grafana/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-grafana/terragrunt.hcl index e6db8bb5..7830797b 100644 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-grafana/terragrunt.hcl +++ b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-grafana/terragrunt.hcl @@ -21,21 +21,54 @@ dependency "eks" { } } +dependency "eks_dns" { + config_path = "../eks-dns" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + cluster_domain = "mock.domain.example.com" + } +} + dependency "eks_loki" { config_path = "../eks-loki" mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] mock_outputs = { rwo_storage_class = "gp3-mocked" + gateway_internal_endpoint = { + url = "mock.loki.enpoint.example.com" + } + } +} + +dependency "eks_prometheus" { + config_path = "../eks-prometheus" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + rwo_storage_class = "gp3-mocked" + prometheus_server_internal_endpoint = { + url = "mock.prometheus.enpoint.example.com" + } + } +} + +dependency "eks_tempo" { + config_path = "../eks-tempo" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + rwo_storage_class = "gp3-mocked" + tempo_internal_endpoint = { + url = "mock.tempo.enpoint.example.com" + } } } dependencies { paths = [ "../eks", - "../eks-config", "../eks-dns", - "../eks-karpenter", - "../eks-loki" + "../eks-loki", + "../eks-prometheus", + "../eks-tempo" ] } @@ -47,16 +80,20 @@ inputs = { # Cluster Configuration cluster_name = dependency.eks.outputs.cluster_name - cluster_domain = include.root.inputs.vpc_domain_name + cluster_domain = dependency.eks_dns.outputs.cluster_domain oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn # Storage Configuration rwo_storage_class = dependency.eks_loki.outputs.rwo_storage_class # Grafana Configuration - grafana_chart_version = include.root.inputs.grafana_chart_version - grafana_tag = include.root.inputs.grafana_tag - download_dashboards_image_tag = include.root.inputs.download_dashboards_image_tag - init_chown_data_image_tag = include.root.inputs.init_chown_data_image_tag - namespace = include.root.inputs.grafana_namespace + grafana_operator_chart_version = include.root.inputs.grafana_operator_chart_version + grafana_operator_tag = include.root.inputs.grafana_operator_tag + grafana_tag = include.root.inputs.grafana_tag + namespace = include.root.inputs.namespaces["grafana"] + os_shell_image_tag = include.root.inputs.os_shell_image_tag + service_name = "grafana" + loki_endpoint = dependency.eks_loki.outputs.gateway_internal_endpoint.url + prometheus_endpoint = dependency.eks_prometheus.outputs.prometheus_server_internal_endpoint.url + tempo_endpoint = dependency.eks_tempo.outputs.tempo_internal_endpoint.url } diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-istio/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-istio/terragrunt.hcl index 2d1d87aa..0cd1e1f9 100644 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-istio/terragrunt.hcl +++ b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-istio/terragrunt.hcl @@ -15,7 +15,7 @@ terraform { dependencies { paths = [ "../eks", - "../eks-config" + "../eks-cert-manager", ] } @@ -39,6 +39,6 @@ inputs = { oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn # Istio Configuration - namespace = include.root.inputs.istio_namespace + namespace = include.root.inputs.namespaces["istio"] istio_version = include.root.inputs.istio_version } diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-k8s-dashboard/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-k8s-dashboard/terragrunt.hcl index f2136034..1d02df66 100644 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-k8s-dashboard/terragrunt.hcl +++ b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-k8s-dashboard/terragrunt.hcl @@ -5,7 +5,7 @@ include "root" { } terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-k8s-dashboard.git?ref=cert_clash" + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-k8s-dashboard.git?ref=${include.root.inputs.release_version}" extra_arguments "retry_lock" { commands = get_terraform_commands_that_need_locking() arguments = ["-lock-timeout=20s"] @@ -15,8 +15,7 @@ terraform { dependencies { paths = [ "../eks", - "../eks-config", - "../eks-dns" + "../eks-dns", ] } @@ -29,6 +28,15 @@ dependency "eks" { } } +dependency "eks_dns" { + config_path = "../eks-dns" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + cluster_domain = "mock.example.com" + oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" + } +} + inputs = { # AWS Configuration account_id = include.root.inputs.aws_account_id @@ -36,10 +44,11 @@ inputs = { region = include.root.inputs.aws_region # Cluster Configuration - cluster_domain = include.root.inputs.vpc_domain_name + cluster_domain = dependency.eks_dns.outputs.cluster_domain cluster_name = dependency.eks.outputs.cluster_name # Dashboard Configuration + service_name = include.root.inputs.dashboard_hostname k8s_dashboard_version = include.root.inputs.k8s_dashboard_version - namespace = include.root.inputs.dashboard_hostname + namespace = include.root.inputs.namespaces["k8s-dashboard"] } diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-karpenter/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-karpenter/terragrunt.hcl index 1ec3a41d..25c22d7c 100644 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-karpenter/terragrunt.hcl +++ b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-karpenter/terragrunt.hcl @@ -14,13 +14,15 @@ terraform { } dependencies { - paths = ["../eks"] + paths = [ + "../eks", + "../eks-metrics-server", + ] } dependency "eks" { config_path = "../eks" mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] - mock_outputs = { cluster_name = "mock-cluster" cluster_endpoint = "https://mock-endpoint.eks.amazonaws.com" @@ -41,9 +43,11 @@ inputs = { cluster_endpoint = dependency.eks.outputs.cluster_endpoint cluster_name = dependency.eks.outputs.cluster_name oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn + vpc_id = dependency.eks.outputs.vpc_id # Karpenter Configuration karpenter_tag = include.root.inputs.karpenter_tag karpenter_helm_chart = include.root.inputs.karpenter_helm_chart karpenter_node_group_name = dependency.eks.outputs.node_group_name + namespace = include.root.inputs.namespaces["karpenter"] } diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-keycloak/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-keycloak/terragrunt.hcl new file mode 100644 index 00000000..74132d72 --- /dev/null +++ b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-keycloak/terragrunt.hcl @@ -0,0 +1,76 @@ +include "root" { + path = find_in_parent_folders("root.hcl") + merge_strategy = "deep" + expose = true +} + +terraform { + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-keycloak.git?ref=${include.root.inputs.release_version}" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20s"] + } +} + +dependency "eks" { + config_path = "../eks" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + cluster_name = "mock-cluster" + oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" + } +} + +dependency "eks_config" { + config_path = "../eks-config" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + rwo_storage_class = "gp3-mock" + } +} + +dependency "eks_dns" { + config_path = "../eks-dns" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + cluster_domain = "mock.example.com" + } +} + +dependencies { + paths = [ + "../eks", + "../eks-config", + "../eks-dns", + "../eks-prometheus", + ] +} + +inputs = { + cluster_domain = dependency.eks_dns.outputs.cluster_domain + cluster_name = dependency.eks.outputs.cluster_name + namespace = include.root.inputs.namespaces["keycloak"] + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + + # keycloak config + default_storage_class = dependency.eks_config.outputs.rwo_storage_class + keycloak_chart_version = include.root.inputs.keycloak_chart_version + keycloak_hostname = include.root.inputs.keycloak_hostname + keycloak_tag = include.root.inputs.keycloak_tag + realm_email = include.root.inputs.cluster_mailing_list + realm_name = "master" + realm_password = include.root.inputs.keycloak_password + realm_username = include.root.inputs.keycloak_username + service_name = "keycloak" + telemetry_namespace = include.root.inputs.telemetry_namespace + + # # Database configuration + keycloak_database = include.root.inputs.keycloak_database + keycloak_user = include.root.inputs.keycloak_username + keycloak_password = include.root.inputs.keycloak_password + + # Project information + project_name = include.root.inputs.project_name + tags = include.root.inputs.tags +} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-kiali/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-kiali/terragrunt.hcl new file mode 100644 index 00000000..f1c9bdcb --- /dev/null +++ b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-kiali/terragrunt.hcl @@ -0,0 +1,126 @@ +include "root" { + path = find_in_parent_folders("root.hcl") + merge_strategy = "deep" + expose = true +} + +terraform { + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-kiali.git?ref=${include.root.inputs.release_version}" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20s"] + } +} + +dependency "eks" { + config_path = "../eks" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + cluster_name = "mock-cluster" + oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" + } +} + +dependency "eks_cert_manager" { + config_path = "../eks-cert-manager" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + cluster_issuer_name = "mock-issuer" + } +} + +dependency "eks_dns" { + config_path = "../eks-dns" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + cluster_domain = "mock.example.com" + } +} + +dependency "eks_grafana" { + config_path = "../eks-grafana" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + internal_endpoint = { + hostname = "grafana.mock.svc.cluster.local" + port_number = "80" + url = "https://grafana.mock.svc.cluster.local:80/" + } + namespace = "grafana" + public_endpoint = { + hostname = "grafana.mock.lab.csp2.census.gov" + port_number = "80" + url = "https://grafana.mock.lab.csp2.census.gov:80/" + } + secret_name = "grafana" + tempo_datasource_id = "mock-tempo-datasource-id" + } +} + +dependency "eks_prometheus" { + config_path = "../eks-prometheus" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + prometheus_server_internal_endpoint = { + hostname = "prometheus.mock.svc.cluster.local" + port_number = "80" + url = "https://prometheus.mock.svc.cluster.local:80/" + } + } +} + +dependency "eks_tempo" { + config_path = "../eks-tempo" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + tempo_internal_endpoint = { + hostname = "tempo.mock.svc.cluster.local" + port_number = "80" + url = "https://tempo.mock.svc.cluster.local:80/" + } + } +} + +dependencies { + paths = [ + "../eks", + "../eks-config", + "../eks-grafana", + "../eks-istio", + "../eks-prometheus", + "../eks-tempo", + ] +} + + +inputs = { + # AWS Configuration + account_id = include.root.inputs.aws_account_id + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + + # Cluster Configuration + cluster_domain = dependency.eks_dns.outputs.cluster_domain + cluster_name = dependency.eks.outputs.cluster_name + certificate_issuer = dependency.eks_cert_manager.outputs.cluster_issuer_name + + # Kiali Configuration + service_name = "kiali" + namespace = include.root.inputs.namespaces["kiali"] + istio_namespace = include.root.inputs.namespaces["istio"] + grafana_internal_url = dependency.eks_grafana.outputs.internal_endpoint.url + grafana_namespace = dependency.eks_grafana.outputs.namespace + grafana_secret_name = dependency.eks_grafana.outputs.secret_name + grafana_public_url = dependency.eks_grafana.outputs.public_endpoint + + kiali_application_version = include.root.inputs.kiali_application_version + kiali_operator_version = include.root.inputs.kiali_operator_version + + prometheus_internal_url = dependency.eks_prometheus.outputs.prometheus_server_internal_endpoint.url + grafana_namespace = dependency.eks_grafana.outputs.namespace + grafana_secret_name = dependency.eks_grafana.outputs.secret_name + grafana_internal_url = dependency.eks_grafana.outputs.internal_endpoint.url + grafana_public_url = dependency.eks_grafana.outputs.public_endpoint + tempo_datasource_id = dependency.eks_grafana.outputs.tempo_datasource_id + tempo_internal_url = dependency.eks_tempo.outputs.tempo_internal_endpoint.url +} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-kiali/terragrunt.hcl.disable b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-kiali/terragrunt.hcl.disable deleted file mode 100644 index 27a255bb..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-kiali/terragrunt.hcl.disable +++ /dev/null @@ -1,85 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-kiali.git?ref=${include.root.inputs.release_version}" - # source = "../../../../../../../tfmod-kiali" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -dependency "eks" { - config_path = "../eks" - mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] - mock_outputs = { - cluster_name = "a-cluster-name" - } -} - -dependency "eks-cert-manager" { - config_path = "../eks-cert-manager" - mock_outputs = { - cluster_issuer_name = "acmpca-clusterissuer" - } -} - -dependency "eks-prometheus" { - config_path = "../eks-prometheus" - mock_outputs = { - prometheus_server_internal_endpoint = { - hostname = "prometheus-server.prometheus.svc.cluster.local" - port_number = 9090 - url = "http://prometheus-server.prometheus.svc.cluster.local:9090/" - } - } - -} -dependency "eks-grafana" { - config_path = "../eks-grafana" - mock_outputs = { - internal_endpoint = { - hostname = "grafana.grafana.svc.cluster.local" - port_number = "80" - url = "https://grafana.grafana.svc.cluster.local:80/" - } - namespace = "grafana" - public_endpoint = { - hostname = "grafana.dev.lab.csp2.census.gov" - port_number = "80" - url = "https://grafana.dev.lab.csp2.census.gov:80/" - } - secret_name = "grafana" - } -} - -inputs = { - kiali_operator_version = include.root.inputs.kiali_operator_version - kiali_application_version = include.root.inputs.kiali_application_version - - profile = include.root.inputs.aws_profile - cluster_domain = dependency.eks.inputs.vpc_domain_name - operators_namespace = "operators" - cluster_name = dependency.eks.outputs.cluster_name - certificate_issuer = dependency.eks-cert-manager.outputs.cluster_issuer_name - prometheus_internal_url = dependency.eks-prometheus.outputs.prometheus_server_internal_endpoint.url - grafana_internal_url = dependency.eks-grafana.outputs.internal_endpoint.url - grafana_namespace = dependency.eks-grafana.outputs.namespace - grafana_public_url = dependency.eks-grafana.outputs.public_endpoint.url - grafana_secret_name = "grafana" - # grafana_secret_name = dependency.eks-grafana.outputs.secret_name - jaeger_internal_url = "" - - - # client_id = var.sso_client_id - # client_secret = var.sso_client_secret - # keycloak_public_url = var.keycloak_public_url - # gogatekeeper_chart_version = var.gogatekeeper_chart_version - # gogatekeeper_registry = var.gogatekeeper_registry - # gogatekeeper_repository = var.gogatekeeper_repository - # gogatekeeper_tag = var.gogatekeeper_tag -} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-loki/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-loki/terragrunt.hcl index 9d0c933c..e126331b 100644 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-loki/terragrunt.hcl +++ b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-loki/terragrunt.hcl @@ -9,7 +9,6 @@ dependencies { "../eks", "../eks-config", "../eks-metrics-server", - "../eks-dns" ] } @@ -22,11 +21,11 @@ dependency "eks" { } } -dependency "eks-config" { +dependency "eks_config" { config_path = "../eks-config" mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] mock_outputs = { - rwo_storage_class = "gp3-encrypted" + rwo_storage_class = "gp3-mock" } } @@ -51,5 +50,6 @@ inputs = { # Loki Configuration loki_chart_version = include.root.inputs.loki_chart_version loki_tag = include.root.inputs.loki_tag - rwo_storage_class = dependency.eks-config.outputs.rwo_storage_class + namespace = include.root.inputs.namespaces["loki"] + rwo_storage_class = dependency.eks_config.outputs.rwo_storage_class } diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-metrics-server/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-metrics-server/terragrunt.hcl index 4e4d198f..fd02a7ac 100644 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-metrics-server/terragrunt.hcl +++ b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-metrics-server/terragrunt.hcl @@ -7,7 +7,6 @@ include "root" { dependencies { paths = [ "../eks", - "../eks-config" ] } @@ -39,4 +38,5 @@ inputs = { # Metrics Server Configuration metrics_server_helm_chart = include.root.inputs.metrics_server_helm_chart metrics_server_tag = include.root.inputs.metrics_server_tag + namespace = include.root.inputs.namespaces["metrics-server"] } diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-otel/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-otel/terragrunt.hcl new file mode 100644 index 00000000..2c93211d --- /dev/null +++ b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-otel/terragrunt.hcl @@ -0,0 +1,61 @@ +include "root" { + path = find_in_parent_folders("root.hcl") + merge_strategy = "deep" + expose = true +} + +terraform { + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-open-telemetry.git?ref=${include.root.inputs.release_version}" + # source = "../../../../../../../tfmod-open-telemetry" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20s"] + } +} + +dependencies { + paths = [ + "../eks", + "../eks-loki", + "../eks-prometheus", + "../eks-tempo" + ] +} + +dependency "eks" { + config_path = "../eks" + mock_outputs = { + cluster_name = "a-cluster-name" + } +} + +dependency "eks-loki" { + config_path = "../eks-loki" + mock_outputs = { + gateway_internal_endpoint = { + hostname = "loki-gateway.mock.svc.cluster.local" + portNumber = 3210 + url = "http://loki-gateway.mock.svc.cluster.local:3210/" + } + } +} + +dependency "eks-tempo" { + config_path = "../eks-tempo" + mock_outputs = { + tempo_otlp_endpoint = { + hostname = "tempo.mock.svc.cluster.local" + portNumber = 1234 + url = "http://tempo.mock.svc.cluster.local:1234/" + } + } +} + +inputs = { + profile = include.root.inputs.aws_profile + cluster_name = dependency.eks.outputs.cluster_name + region = include.root.inputs.aws_region + namespace = include.root.inputs.namespaces["otel"] + loki_endpoint = dependency.eks-loki.outputs.gateway_internal_endpoint.url + tempo_endpoint = dependency.eks-tempo.outputs.tempo_otlp_endpoint.url +} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-prometheus/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-prometheus/terragrunt.hcl index 8b16a914..80e24e8f 100644 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-prometheus/terragrunt.hcl +++ b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-prometheus/terragrunt.hcl @@ -17,7 +17,6 @@ dependencies { "../eks", "../eks-config", "../eks-metrics-server", - "../eks-dns" ] } @@ -30,7 +29,7 @@ dependency "eks" { } } -dependency "eks-config" { +dependency "eks_config" { config_path = "../eks-config" mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] mock_outputs = { @@ -50,12 +49,12 @@ inputs = { # Prometheus Configuration prometheus_chart_version = include.root.inputs.prometheus_chart_version - prometheus_namespace = include.root.inputs.prometheus_namespace prometheus_server_tag = include.root.inputs.prometheus_server_tag prometheus_config_reloader_tag = include.root.inputs.prometheus_config_reloader_tag alertmanager_tag = include.root.inputs.alertmanager_tag kube_state_metrics_tag = include.root.inputs.kube_state_metrics_tag + namespace = include.root.inputs.namespaces["prometheus"] node_exporter_tag = include.root.inputs.node_exporter_tag pushgateway_tag = include.root.inputs.pushgateway_tag - rwo_storage_class = dependency.eks-config.outputs.rwo_storage_class + rwo_storage_class = dependency.eks_config.outputs.rwo_storage_class } diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-tempo/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-tempo/terragrunt.hcl index dc222715..e94ad7f0 100644 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-tempo/terragrunt.hcl +++ b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-tempo/terragrunt.hcl @@ -27,9 +27,9 @@ dependency "eks-prometheus" { mock_outputs = { prometheus_namespace = "prometheus" prometheus_server_internal_endpoint = { - hostname = "prometheus-server.prometheus.svc.cluster.local" + hostname = "prometheus-server.mock.svc.cluster.local" port_number = 9090 - url = "http://prometheus-server.prometheus.svc.cluster.local:9090/" + url = "http://prometheus-server.mock.svc.cluster.local:9090/" } } } @@ -37,9 +37,6 @@ dependency "eks-prometheus" { dependencies { paths = [ "../eks", - "../eks-config", - "../eks-dns", - "../eks-karpenter", "../eks-prometheus" ] } @@ -61,5 +58,6 @@ inputs = { # Tempo Configuration tempo_chart_version = include.root.inputs.tempo_chart_version tempo_tag = include.root.inputs.tempo_tag - namespace = include.root.inputs.tempo_namespace + namespace = include.root.inputs.namespaces["tempo"] + } diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/cluster.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/cluster.hcl new file mode 100644 index 00000000..656de00e --- /dev/null +++ b/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/cluster.hcl @@ -0,0 +1,28 @@ +locals { + # Cluster specific configuration + cluster_endpoint_public_access = true + cluster_name = "platform-eng-eks-srn" + cluster_mailing_list = "srinivasa.nangunuri@census.gov" + eks_instance_disk_size = 100 + eks_ng_desired_size = 2 + eks_ng_max_size = 10 + eks_ng_min_size = 2 + enable_cluster_creator_admin_permissions = true + tags = { + "slim:schedule" = "8:00-17:00" + "cluster:size" = "min:${local.eks_ng_min_size}-max:${local.eks_ng_max_size}-desired:${local.eks_ng_desired_size}" + } + + # Common configuration + common_retry_args = { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20m"] + } + + common_dependencies = ["../eks", "../eks-config"] + + common_mock_eks = { + cluster_name = "mock-cluster" + oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" + } +} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-cert-manager/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-cert-manager/terragrunt.hcl new file mode 100644 index 00000000..d1e69d00 --- /dev/null +++ b/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-cert-manager/terragrunt.hcl @@ -0,0 +1,57 @@ +include "root" { + path = find_in_parent_folders("root.hcl") + merge_strategy = "deep" + expose = true +} + +terraform { + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-cert-mgr.git?ref=${include.root.inputs.release_version}" + + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20s"] + } +} + +dependencies { + paths = [ + "../eks", + "../eks-config", + "../eks-karpenter" + ] +} + +dependency "eks" { + config_path = "../eks" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + + mock_outputs = { + cluster_name = include.root.inputs.cluster_name + oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" + cluster_endpoint = "https://mock-endpoint.eks.amazonaws.com" + cluster_version = include.root.inputs.cluster_version + } +} + +inputs = { + # AWS Configuration + account_id = include.root.inputs.aws_account_id + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + + # Cluster Configuration + cluster_name = dependency.eks.outputs.cluster_name + cluster_mailing_list = include.root.inputs.cluster_mailing_list + oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn + + # Cert Manager Configuration + cert_manager_helm_chart = include.root.inputs.cert_manager_helm_chart + cluster_issuer_name = include.root.inputs.cluster_issuer_name + namespace = include.root.inputs.namespaces["cert-manager"] + + # Version Tags + cert_manager_cainjector_tag = include.root.inputs.cert_manager_cainjector_tag + cert_manager_controller_tag = include.root.inputs.cert_manager_controller_tag + cert_manager_startupapicheck_tag = include.root.inputs.cert_manager_startupapicheck_tag + cert_manager_webhook_tag = include.root.inputs.cert_manager_webhook_tag +} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-config/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-config/terragrunt.hcl new file mode 100644 index 00000000..c1328ee7 --- /dev/null +++ b/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-config/terragrunt.hcl @@ -0,0 +1,54 @@ +include "root" { + path = find_in_parent_folders("root.hcl") + merge_strategy = "deep" + expose = true +} + +dependencies { + paths = [ + "../eks", + "../eks-karpenter" + ] +} + +terraform { + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-eks-configuration.git?ref=${include.root.inputs.release_version}" + + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20s"] + } +} + +dependency "eks" { + config_path = "../eks" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + + mock_outputs = { + cluster_name = "mock-cluster" + cluster_endpoint = "https://mock-endpoint.eks.amazonaws.com" + cluster_certificate_authority_data = [{ data = "mock-cert-data" }] + eks_managed_node_groups_autoscaling_group_names = ["mock-asg-name"] + oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" + security_group_all_worker_mgmt_id = "sg-mock" + subnets = ["subnet-mock1", "subnet-mock2"] + vpc_id = "vpc-mock" + } +} + +inputs = { + # AWS Configuration + account_id = include.root.inputs.aws_account_id + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + + # Core Cluster Configuration + cluster_name = dependency.eks.outputs.cluster_name + eks_managed_node_groups_autoscaling_group_names = dependency.eks.outputs.eks_managed_node_groups_autoscaling_group_names + oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn + security_group_all_worker_mgmt_id = dependency.eks.outputs.security_group_all_worker_mgmt_id + subnets = dependency.eks.outputs.subnets + vpc_id = dependency.eks.outputs.vpc_id + operators_ns = include.root.inputs.operator_namespace + telemetry_ns = include.root.inputs.telemetry_namespace +} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-dns/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-dns/terragrunt.hcl new file mode 100644 index 00000000..2bf9b72f --- /dev/null +++ b/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-dns/terragrunt.hcl @@ -0,0 +1,60 @@ +include "root" { + path = find_in_parent_folders("root.hcl") + merge_strategy = "deep" + expose = true +} + +terraform { + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-eks-dns.git?ref=${include.root.inputs.release_version}" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20s"] + } +} + +dependency "eks" { + config_path = "../eks" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + cluster_name = include.root.inputs.cluster_name + subnets = ["subnet-mock1", "subnet-mock2", "subnet-mock3"] + } +} + +dependency "eks-istio" { + config_path = "../eks-istio" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + istio_ingress_lb = { + dns_name = "mock-${include.root.inputs.cluster_name}.elb.amazonaws.com" + zone_id = "MOCKZONEID" + } + } +} + +dependencies { + paths = [ + "../eks-config", + "../eks-istio", + "../eks-karpenter" + ] +} + +inputs = { + # AWS Configuration + account_id = include.root.inputs.aws_account_id + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + + # Cluster Configuration + cluster_name = include.root.inputs.cluster_name + + # Network Configuration + istio_ingress_lb = dependency.eks-istio.outputs.istio_ingress_lb + route53_endpoints = include.root.inputs.route53_endpoints + vpc_domain_name = include.root.inputs.vpc_domain_name + vpc_name = include.root.inputs.vpc_name + + # Additional Configuration + tags = include.root.inputs.tags +} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-grafana/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-grafana/terragrunt.hcl new file mode 100644 index 00000000..2bc7484b --- /dev/null +++ b/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-grafana/terragrunt.hcl @@ -0,0 +1,63 @@ +include "root" { + path = find_in_parent_folders("root.hcl") + merge_strategy = "deep" + expose = true +} + +terraform { + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-grafana.git?ref=${include.root.inputs.release_version}" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20s"] + } +} + +dependency "eks" { + config_path = "../eks" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + cluster_name = include.root.inputs.cluster_name + oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" + } +} + +dependency "eks_loki" { + config_path = "../eks-loki" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + rwo_storage_class = "gp3-mocked" + } +} + +dependencies { + paths = [ + "../eks", + "../eks-config", + "../eks-dns", + "../eks-karpenter", + "../eks-loki" + ] +} + +inputs = { + # AWS Configuration + account_id = include.root.inputs.aws_account_id + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + + # Cluster Configuration + cluster_name = dependency.eks.outputs.cluster_name + cluster_domain = include.root.inputs.vpc_domain_name + oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn + + # Storage Configuration + rwo_storage_class = dependency.eks_loki.outputs.rwo_storage_class + + # Grafana Configuration + grafana_chart_version = include.root.inputs.grafana_chart_version + grafana_tag = include.root.inputs.grafana_tag + download_dashboards_image_tag = include.root.inputs.download_dashboards_image_tag + init_chown_data_image_tag = include.root.inputs.init_chown_data_image_tag + namespace = include.root.inputs.namespaces["grafana"] + service_name = "grafana" +} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-istio/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-istio/terragrunt.hcl new file mode 100644 index 00000000..1c312166 --- /dev/null +++ b/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-istio/terragrunt.hcl @@ -0,0 +1,44 @@ +include "root" { + path = find_in_parent_folders("root.hcl") + merge_strategy = "deep" + expose = true +} + +terraform { + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-istio.git?ref=${include.root.inputs.release_version}" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20s"] + } +} + +dependencies { + paths = [ + "../eks", + "../eks-config" + ] +} + +dependency "eks" { + config_path = "../eks" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + cluster_name = include.root.inputs.cluster_name + oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" + } +} + +inputs = { + # AWS Configuration + account_id = include.root.inputs.aws_account_id + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + + # Cluster Configuration + cluster_name = dependency.eks.outputs.cluster_name + oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn + + # Istio Configuration + namespace = include.root.inputs.namespaces["istio"] + istio_version = include.root.inputs.istio_version +} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-k8s-dashboard/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-k8s-dashboard/terragrunt.hcl new file mode 100644 index 00000000..c32546cd --- /dev/null +++ b/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-k8s-dashboard/terragrunt.hcl @@ -0,0 +1,55 @@ +include "root" { + path = find_in_parent_folders("root.hcl") + merge_strategy = "deep" + expose = true +} + +terraform { + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-k8s-dashboard.git?ref=mcmCluster" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20s"] + } +} + +dependencies { + paths = [ + "../eks", + "../eks-config", + "../eks-dns" + ] +} + +dependency "eks" { + config_path = "../eks" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + cluster_name = include.root.inputs.cluster_name + oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" + } +} + +dependency "eks_dns" { + config_path = "../eks-dns" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + cluster_domain = "mock.example.com" + oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" + } +} + +inputs = { + # AWS Configuration + account_id = include.root.inputs.aws_account_id + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + + # Cluster Configuration + cluster_domain = dependency.eks_dns.outputs.cluster_domain + cluster_name = dependency.eks.outputs.cluster_name + + # Dashboard Configuration + service_name = include.root.inputs.dashboard_hostname + k8s_dashboard_version = include.root.inputs.k8s_dashboard_version + namespace = include.root.inputs.namespaces["k8s-dashboard"] +} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-karpenter/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-karpenter/terragrunt.hcl new file mode 100644 index 00000000..7c2ff2db --- /dev/null +++ b/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-karpenter/terragrunt.hcl @@ -0,0 +1,50 @@ +include "root" { + path = find_in_parent_folders("root.hcl") + merge_strategy = "deep" + expose = true +} + +terraform { + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-karpenter.git?ref=${include.root.inputs.release_version}" + + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20s"] + } +} + +dependencies { + paths = ["../eks"] +} + +dependency "eks" { + config_path = "../eks" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + + mock_outputs = { + cluster_name = "mock-cluster" + cluster_endpoint = "https://mock-endpoint.eks.amazonaws.com" + oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" + node_group_name = "mock-node-group" + vpc_id = "vpc-mock" + subnets = ["subnet-mock1", "subnet-mock2"] + } +} + +inputs = { + # AWS Configuration + account_id = include.root.inputs.aws_account_id + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + + # Cluster Configuration + cluster_endpoint = dependency.eks.outputs.cluster_endpoint + cluster_name = dependency.eks.outputs.cluster_name + oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn + + # Karpenter Configuration + karpenter_tag = include.root.inputs.karpenter_tag + karpenter_helm_chart = include.root.inputs.karpenter_helm_chart + karpenter_node_group_name = dependency.eks.outputs.node_group_name + namespace = include.root.inputs.namespaces["karpenter"] +} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-keycloak/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-keycloak/terragrunt.hcl new file mode 100644 index 00000000..248432dd --- /dev/null +++ b/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-keycloak/terragrunt.hcl @@ -0,0 +1,87 @@ +include "root" { + path = find_in_parent_folders("root.hcl") + merge_strategy = "deep" + expose = true +} + +terraform { + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-keycloak.git?ref=standards" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20s"] + } +} + +dependency "eks" { + config_path = "../eks" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + cluster_name = "mock-cluster" + oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" + } +} + +dependency "eks_config" { + config_path = "../eks-config" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + rwo_storage_class = "gp3-mock" + } +} + +dependency "eks_dns" { + config_path = "../eks-dns" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + cluster_domain = "mock.example.com" + oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" + } +} + +dependency "eks_postgresql" { + config_path = "../eks-postgresql" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + internal_endpoint = { + url = "mock-internal-endpoint-url" + } + } +} + +dependencies { + paths = [ + "../eks", + "../eks-config", + "../eks-dns", + "../eks-karpenter", + "../eks-postgresql", + "../eks-prometheus", + ] +} + +inputs = { + admin_email = include.root.inputs.cluster_mailing_list + cluster_domain = dependency.eks_dns.outputs.cluster_domain + cluster_name = dependency.eks.outputs.cluster_name + namespace = include.root.inputs.namespaces["keycloak"] + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + + # keycloak config + default_storage_class = dependency.eks_config.outputs.rwo_storage_class + keycloak_chart_version = include.root.inputs.keycloak_chart_version + keycloak_hostname = include.root.inputs.keycloak_hostname + keycloak_tag = include.root.inputs.keycloak_tag + service_name = "keycloak" + telemetry_namespace = include.root.inputs.telemetry_namespace + + # Database configuration + db_host = dependency.eks_postgresql.outputs.internal_endpoint.url + db_name = include.root.inputs.postgresql_database + db_password = include.root.inputs.postgresql_password + db_user = include.root.inputs.postgresql_username + + # Project information + project_name = include.root.inputs.project_name + tags = include.root.inputs.tags +} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-kiali/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-kiali/terragrunt.hcl new file mode 100644 index 00000000..c36c773c --- /dev/null +++ b/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-kiali/terragrunt.hcl @@ -0,0 +1,113 @@ +include "root" { + path = find_in_parent_folders("root.hcl") + merge_strategy = "deep" + expose = true +} + +terraform { + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-kiali.git?ref=mcmCluster" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20s"] + } +} + +dependencies { + paths = [ + "../eks", + "../eks-config", + "../eks-dns", + "../eks-grafana", + "../eks-istio", + "../eks-prometheus" + ] +} + +dependency "eks" { + config_path = "../eks" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + cluster_name = "mock-cluster" + oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" + } +} + +dependency "eks_config" { + config_path = "../eks-config" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + operators_namespace = "mock-namespace" + } +} + +dependency "eks_dns" { + config_path = "../eks-dns" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + cluster_domain = "mock.example.com" + oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" + } +} + +dependency "eks_grafana" { + config_path = "../eks-grafana" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + internal_endpoint = { + hostname = "grafana.mock.svc.cluster.local" + port_number = "80" + url = "https://grafana.mock.svc.cluster.local:80/" + } + namespace = "grafana" + public_endpoint = { + hostname = "grafana.mock.lab.csp2.census.gov" + port_number = "80" + url = "https://grafana.mock.lab.csp2.census.gov:80/" + } + secret_name = "grafana" + } +} + +dependency "eks_istio" { + config_path = "../eks-istio" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + namespace = "mock-namespace-istio" + } +} + +dependency "eks_prometheus" { + config_path = "../eks-prometheus" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + prometheus_server_internal_endpoint = { + hostname = "prometheus.mock.svc.cluster.local" + port_number = "80" + url = "https://prometheus.mock.svc.cluster.local:80/" + } + } +} + +inputs = { + # AWS Configuration + account_id = include.root.inputs.aws_account_id + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + + # Cluster Configuration + cluster_domain = dependency.eks_dns.outputs.cluster_domain + cluster_name = dependency.eks.outputs.cluster_name + + # Kiali Configuration + service_name = "kiali" + namespace = include.root.inputs.namespaces["kiali"] + grafana_internal_url = dependency.eks_grafana.outputs.internal_endpoint.url + grafana_namespace = dependency.eks_grafana.outputs.namespace + grafana_secret_name = dependency.eks_grafana.outputs.secret_name + grafana_public_url = dependency.eks_grafana.outputs.public_endpoint + + kiali_operator_version = include.root.inputs.kiali_operator_version + + prometheus_internal_url = dependency.eks_prometheus.outputs.prometheus_server_internal_endpoint.url + # jager_internal_url = dependency.eks_prometheus.outputs.jager_internal_url +} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-kiali/terragrunt.hcl.disabled b/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-kiali/terragrunt.hcl.disabled similarity index 100% rename from lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-kiali/terragrunt.hcl.disabled rename to lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-kiali/terragrunt.hcl.disabled diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-loki/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-loki/terragrunt.hcl new file mode 100644 index 00000000..55d3830e --- /dev/null +++ b/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-loki/terragrunt.hcl @@ -0,0 +1,56 @@ +include "root" { + path = find_in_parent_folders("root.hcl") + merge_strategy = "deep" + expose = true +} + +dependencies { + paths = [ + "../eks", + "../eks-config", + "../eks-metrics-server", + "../eks-dns" + ] +} + +dependency "eks" { + config_path = "../eks" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + cluster_name = "mock-cluster" + oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" + } +} + +dependency "eks_config" { + config_path = "../eks-config" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + rwo_storage_class = "gp3-mock" + } +} + +terraform { + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-loki.git?ref=${include.root.inputs.release_version}" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20s"] + } +} + +inputs = { + # AWS Configuration + account_id = include.root.inputs.aws_account_id + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + + # Cluster Configuration + cluster_name = dependency.eks.outputs.cluster_name + oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn + + # Loki Configuration + loki_chart_version = include.root.inputs.loki_chart_version + loki_tag = include.root.inputs.loki_tag + namespace = include.root.inputs.namespaces["loki"] + rwo_storage_class = dependency.eks_config.outputs.rwo_storage_class +} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-metrics-server/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-metrics-server/terragrunt.hcl new file mode 100644 index 00000000..5e520aad --- /dev/null +++ b/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-metrics-server/terragrunt.hcl @@ -0,0 +1,43 @@ +include "root" { + path = find_in_parent_folders("root.hcl") + merge_strategy = "deep" + expose = true +} + +dependencies { + paths = [ + "../eks", + "../eks-config" + ] +} + +dependency "eks" { + config_path = "../eks" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + cluster_name = "mock-cluster" + } +} + +terraform { + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-metrics-server.git?ref=${include.root.inputs.release_version}" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20s"] + } +} + +inputs = { + # AWS Configuration + account_id = include.root.inputs.aws_account_id + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + + # Cluster Configuration + cluster_name = dependency.eks.outputs.cluster_name + + # Metrics Server Configuration + metrics_server_helm_chart = include.root.inputs.metrics_server_helm_chart + metrics_server_tag = include.root.inputs.metrics_server_tag + namespace = include.root.inputs.namespaces["metrics-server"] +} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-postgresql/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-postgresql/terragrunt.hcl new file mode 100644 index 00000000..4429d04a --- /dev/null +++ b/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-postgresql/terragrunt.hcl @@ -0,0 +1,76 @@ +include "root" { + path = find_in_parent_folders("root.hcl") + merge_strategy = "deep" + expose = true +} + +terraform { + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-postgresql.git?ref=main" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20s"] + } +} + +dependencies { + paths = [ + "../eks", + "../eks-config", + "../eks-dns", + "../eks-prometheus", + ] +} + +dependency "eks" { + config_path = "../eks" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + cluster_name = include.root.inputs.cluster_name + oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" + } +} + +dependency "eks_config" { + config_path = "../eks-config" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + rwo_storage_class = "gp3-mock" + } +} + +dependency "eks_dns" { + config_path = "../eks-dns" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + cluster_domain = "mock.example.com" + oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" + } +} + +inputs = { + # AWS Configuration + account_id = include.root.inputs.aws_account_id + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + + # Cluster Configuration + cluster_domain = dependency.eks_dns.outputs.cluster_domain + cluster_name = dependency.eks.outputs.cluster_name + rwo_storage_class = dependency.eks_config.outputs.rwo_storage_class + + # PostgreSQL Configuration + namespace = include.root.inputs.namespaces["postgresql"] + os_shell_tag = include.root.inputs.os_shell_tag + pgpool_tag = include.root.inputs.pgpool_tag + postgres_exporter_tag = include.root.inputs.postgres_exporter_tag + postgresql_repmgr_tag = include.root.inputs.postgresql_repmgr_tag + postgresql_tag = include.root.inputs.postgresql_tag + service_name = "postgresql" + telemetry_namespace = include.root.inputs.telemetry_namespace + + # Database Consumer Configuration + postgresql_database = include.root.inputs.postgresql_database + postgresql_username = include.root.inputs.postgresql_username + postgresql_password = include.root.inputs.postgresql_password + +} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-prometheus/README.md b/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-prometheus/README.md similarity index 100% rename from lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-prometheus/README.md rename to lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-prometheus/README.md diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-prometheus/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-prometheus/terragrunt.hcl new file mode 100644 index 00000000..76650e5e --- /dev/null +++ b/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-prometheus/terragrunt.hcl @@ -0,0 +1,61 @@ +include "root" { + path = find_in_parent_folders("root.hcl") + merge_strategy = "deep" + expose = true +} + +terraform { + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-prometheus.git?ref=mcmCluster" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20s"] + } +} + +dependencies { + paths = [ + "../eks", + "../eks-config", + "../eks-metrics-server", + "../eks-dns" + ] +} + +dependency "eks" { + config_path = "../eks" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + cluster_name = include.root.inputs.cluster_name + oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" + } +} + +dependency "eks-config" { + config_path = "../eks-config" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + rwo_storage_class = "gp3-encyrpted" + } +} + +inputs = { + # AWS Configuration + account_id = include.root.inputs.aws_account_id + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + + # Cluster Configuration + cluster_name = dependency.eks.outputs.cluster_name + oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn + + # Prometheus Configuration + prometheus_chart_version = include.root.inputs.prometheus_chart_version + prometheus_server_tag = include.root.inputs.prometheus_server_tag + prometheus_config_reloader_tag = include.root.inputs.prometheus_config_reloader_tag + alertmanager_tag = include.root.inputs.alertmanager_tag + kube_state_metrics_tag = include.root.inputs.kube_state_metrics_tag + namespace = include.root.inputs.namespaces["prometheus"] + node_exporter_tag = include.root.inputs.node_exporter_tag + pushgateway_tag = include.root.inputs.pushgateway_tag + rwo_storage_class = dependency.eks-config.outputs.rwo_storage_class +} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-tempo/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-tempo/terragrunt.hcl new file mode 100644 index 00000000..e1b17d6a --- /dev/null +++ b/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-tempo/terragrunt.hcl @@ -0,0 +1,66 @@ +include "root" { + path = find_in_parent_folders("root.hcl") + merge_strategy = "deep" + expose = true +} + +terraform { + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-tempo.git?ref=keycloak" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20s"] + } +} + +dependency "eks" { + config_path = "../eks" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + cluster_name = include.root.inputs.cluster_name + oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" + } +} + +dependency "eks-prometheus" { + config_path = "../eks-prometheus" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + prometheus_svc = "prometheus-server" + prometheus_namespace = "prometheus" + prometheus_port = 80 + prometheus_server_internal_endpoint = { + hostname = "prometheus-server.prometheus.svc.cluster.local" + port_number = 9090 + url = "http://prometheus-server.prometheus.svc.cluster.local:9090/" + } + } +} + +dependencies { + paths = [ + "../eks", + "../eks-dns", + "../eks-prometheus" + ] +} + +inputs = { + # AWS Configuration + account_id = include.root.inputs.aws_account_id + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + + # Cluster Configuration + cluster_name = dependency.eks.outputs.cluster_name + oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn + + # Prometheus Configuration + prometheus_svc = dependency.eks-prometheus.outputs.prometheus_server_internal_endpoint.hostname + prometheus_namespace = dependency.eks-prometheus.outputs.prometheus_namespace + prometheus_port = dependency.eks-prometheus.outputs.prometheus_server_internal_endpoint.port_number + + # Tempo Configuration + tempo_chart_version = include.root.inputs.tempo_chart_version + tempo_tag = include.root.inputs.tempo_tag + namespace = include.root.inputs.namespaces["tempo"] +} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks/terragrunt.hcl new file mode 100644 index 00000000..9eca1de2 --- /dev/null +++ b/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks/terragrunt.hcl @@ -0,0 +1,28 @@ +include "root" { + path = find_in_parent_folders("root.hcl") + merge_strategy = "deep" + expose = true +} + +terraform { + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-eks.git?ref=${include.root.inputs.release_version}" + + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20s"] + } +} + +inputs = { + # AWS Configuration + account_id = include.root.inputs.aws_account_id + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + + # Core Cluster Configuration + cluster_name = include.root.inputs.cluster_name + cluster_version = include.root.inputs.cluster_version + + # Additional Configuration + tags = include.root.inputs.tags +} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/cluster.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/cluster.hcl deleted file mode 100644 index 8d2831cf..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/cluster.hcl +++ /dev/null @@ -1,20 +0,0 @@ -# lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/cluster.hcl - -# Set cluster specific variables. These are automatically pulled in to configure the remote state bucket in the root -# terragrunt.hcl configuration. -locals { - cluster_endpoint_public_access = true - cluster_name = "platform-eng-eks-mcm" - creator = "matthew.c.morgan@census.gov" - eks_instance_disk_size = 100 - eks_ng_desired_size = 2 - eks_ng_max_size = 10 - eks_ng_min_size = 0 - enable_cluster_creator_admin_permissions = true - terraform = true - terragrunt = true - tags = { - "slim:schedule" = "8:00-17:00" - "cluster:size" = "min:${local.eks_ng_min_size}-max:${local.eks_ng_max_size}-desired:${local.eks_ng_desired_size}" - } -} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-cert-manager/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-cert-manager/terragrunt.hcl deleted file mode 100644 index 35e355aa..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-cert-manager/terragrunt.hcl +++ /dev/null @@ -1,40 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-cert-mgr.git?ref=${include.root.inputs.release_version}" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -dependency "eks" { - config_path = "../eks" - mock_outputs = { - cluster_name = "a-cluster-name" - oidc_provider_arn = "arn:aws-us-gov:iam::111111111111:oidc-provider/oidc.eks.us-gov-east-1.amazonaws.com/id/0000000000000000AAAAAAAAAAAAAAAA" - } -} - -dependency "eks_config" { - config_path = "../eks-config" - skip_outputs = true -} - -inputs = { - cluster_name = dependency.eks.outputs.cluster_name - cluster_mailing_list = dependency.eks.inputs.creator - oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - cert_manager_helm_chart = include.root.inputs.cert_manager_helm_chart - cert_manager_cainjector_tag = include.root.inputs.cert_manager_cainjector_tag - cert_manager_controller_tag = include.root.inputs.cert_manager_controller_tag - cert_manager_startupapicheck_tag = include.root.inputs.cert_manager_startupapicheck_tag - cert_manager_webhook_tag = include.root.inputs.cert_manager_webhook_tag - cluster_issuer_name = include.root.inputs.cluster_issuer_name -} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-config/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-config/terragrunt.hcl deleted file mode 100644 index d4a60dbc..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-config/terragrunt.hcl +++ /dev/null @@ -1,42 +0,0 @@ -# lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-config/terragrunt.hcl - -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-eks-configuration.git?ref=${include.root.inputs.release_version}" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -dependency "eks" { - config_path = "../eks" - mock_outputs = { - cluster_certificate_authority_data = [{ data = "THISISAVERYLONGCERTSTRINGTHATGOESHEREFORSURENODYEP" }] - cluster_endpoint = "https://12345ABCDEE42BF9C24D4C362D1DC.sk1.us-gov-east-1.eks.amazonaws.com" - cluster_name = "a-cluster-name" - eks_managed_node_groups_autoscaling_group_names = ["eks-eks-a-cluster-name-node_group-0000000000000000000000000-5ac8a5e3-14dd-c043-2cc9-f4b6ffb36d32"] - oidc_provider_arn = "arn:aws-us-gov:iam::111111111111:oidc-provider/oidc.eks.us-gov-east-1.amazonaws.com/id/0000000000000000AAAAAAAAAAAAAAAA" - security_group_all_worker_mgmt_id = "sg-00b0000000000000" - subnets = ["subnet-00000000000000001", "subnet-00000000000000002", "subnet-00000000000000003"] - token = [{ token = "THISISTHETOKENTHATDOESNTEXISTTHEREAREMANYLIKEITBUTHISONEISFORACLUSTER" }] - vpc_id = "a-vpc-id" - } -} - -inputs = { - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - vpc_id = dependency.eks.outputs.vpc_id - cluster_name = dependency.eks.outputs.cluster_name - subnets = dependency.eks.outputs.subnets - security_group_all_worker_mgmt_id = dependency.eks.outputs.security_group_all_worker_mgmt_id - eks_managed_node_groups_autoscaling_group_names = dependency.eks.outputs.eks_managed_node_groups_autoscaling_group_names - oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn - kubectl_image_tag = include.root.inputs.kubectl_image_tag -} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-dns/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-dns/terragrunt.hcl deleted file mode 100644 index 6e28781b..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-dns/terragrunt.hcl +++ /dev/null @@ -1,42 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-eks-dns.git?ref=${include.root.inputs.release_version}" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -dependency "eks" { - config_path = "../eks" - mock_outputs = { - subnets = ["subnet-abcdefgh", "subnet-12345678", "subnet-ab12cd34"] - } -} - -dependency "istio" { - config_path = "../eks-istio" - mock_outputs = { - istio_ingress_lb = { - dns_name = "a1111111111111111111111111111111-2bbbbbbbbbbbbbbb.elb.us-gov-east-1.amazonaws.com" - zone_id = "ZABC123456DEF" - } - } -} - -inputs = { - cluster_name = dependency.eks.inputs.cluster_name - istio_ingress_lb = dependency.istio.outputs.istio_ingress_lb - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - subnets = dependency.eks.outputs.subnets - tags = dependency.eks.inputs.tags - vpc_domain_name = dependency.eks.inputs.vpc_domain_name - vpc_name = dependency.eks.inputs.vpc_name - route53_endpoints = include.root.inputs.route53_endpoints -} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-grafana/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-grafana/terragrunt.hcl deleted file mode 100644 index 65ab33fe..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-grafana/terragrunt.hcl +++ /dev/null @@ -1,40 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-grafana.git?ref=${include.root.inputs.release_version}" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -dependency "eks" { - config_path = "../eks" - mock_outputs = { - cluster_name = "a-cluster-name" - } -} - -dependency "eks-loki" { - config_path = "../eks-loki" - mock_outputs = { - rwo_storage_class = "gp3-encrypted" - } -} - -inputs = { - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - cluster_name = dependency.eks.outputs.cluster_name - cluster_domain = dependency.eks.inputs.vpc_domain_name - public_hostname = include.root.inputs.grafana_hostname - rwo_storage_class = dependency.eks-loki.outputs.rwo_storage_class - grafana_chart_version = include.root.inputs.grafana_chart_version - grafana_tag = include.root.inputs.grafana_tag - download_dashboards_image_tag = include.root.inputs.download_dashboards_image_tag - init_chown_data_image_tag = include.root.inputs.init_chown_data_image_tag -} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-istio/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-istio/terragrunt.hcl deleted file mode 100644 index c7c22c81..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-istio/terragrunt.hcl +++ /dev/null @@ -1,32 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-istio.git?ref=${include.root.inputs.release_version}" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -dependency "eks" { - config_path = "../eks" - mock_outputs = { - cluster_name = "a-cluster-name" - } -} -dependency "eks-karpenter" { - config_path = "../eks-karpenter" - skip_outputs = true -} - -inputs = { - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - cluster_name = dependency.eks.outputs.cluster_name - istio_chart_version = include.root.inputs.istio_version - istio_version = include.root.inputs.istio_version -} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-k8s-dashboard/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-k8s-dashboard/terragrunt.hcl deleted file mode 100644 index cd1961b6..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-k8s-dashboard/terragrunt.hcl +++ /dev/null @@ -1,36 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-k8s-dashboard.git?ref=${include.root.inputs.release_version}" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -dependency "eks" { - config_path = "../eks" - mock_outputs = { - cluster_name = "a-cluster-name" - vpc_domain_name = "example.com" - } -} - -dependency "eks-loki" { - config_path = "../eks-loki" - skip_outputs = true -} - -inputs = { - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - cluster_name = dependency.eks.outputs.cluster_name - cluster_domain = dependency.eks.inputs.vpc_domain_name - public_hostname = include.root.inputs.dashboard_hostname - k8s_dashboard_version = include.root.inputs.k8s_dashboard_version - # datasources = dependency.eks-loki.outputs.gateway_internal_endpoint -} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-karpenter/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-karpenter/terragrunt.hcl deleted file mode 100644 index 6b1a862f..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-karpenter/terragrunt.hcl +++ /dev/null @@ -1,43 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-karpenter.git?ref=${include.root.inputs.release_version}" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -dependency "eks" { - config_path = "../eks" - mock_outputs = { - cluster_endpoint = "https://0000000000000000AAAAAAAAAAAAAAAA.sk1.us-gov-east-1.eks.amazonaws.com" - cluster_name = "a-cluster-name" - node_group_name = "node_group_a-cluster-name" - oidc_provider_arn = "arn:aws-us-gov:iam::111111111111:oidc-provider/oidc.eks.us-gov-east-1.amazonaws.com/id/0000000000000000AAAAAAAAAAAAAAAA" - vpc_id = "a-vpc-name" - } -} - -dependency "eks-config" { - config_path = "../eks-config" - skip_outputs = true -} - -inputs = { - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - cluster_endpoint = dependency.eks.outputs.cluster_endpoint - cluster_name = dependency.eks.outputs.cluster_name - karpenter_node_group_name = dependency.eks.outputs.node_group_name - oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn - vpc_id = dependency.eks.outputs.vpc_id - karpenter_helm_chart = include.root.inputs.karpenter_helm_chart - karpenter_tag = include.root.inputs.karpenter_tag - kubectl_tag = include.root.inputs.kubectl_image_tag - -} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-kiali/terragrunt.hcl.disable b/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-kiali/terragrunt.hcl.disable deleted file mode 100644 index 1e04fe0d..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-kiali/terragrunt.hcl.disable +++ /dev/null @@ -1,81 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-kiali.git?ref=${include.root.inputs.release_version}" - # source = "../../../../../../../tfmod-kiali" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -dependency "eks" { - config_path = "../eks" - mock_outputs = { - cluster_name = "a-cluster-name" - } -} -dependency "eks-cert-manager" { - config_path = "../eks-cert-manager" - mock_outputs = { - cluster_issuer_name = "acmpca-clusterissuer" - } -} -dependency "eks-prometheus" { - config_path = "../eks-prometheus" - mock_outputs = { - prometheus_server_internal_endpoint = { - hostname = "prometheus-server.prometheus.svc.cluster.local" - port_number = 9090 - url = "http://prometheus-server.prometheus.svc.cluster.local:9090/" - } - } -} -dependency "eks-grafana" { - config_path = "../eks-grafana" - mock_outputs = { - internal_endpoint = { - hostname = "grafana.grafana.svc.cluster.local" - port_number = "80" - url = "https://grafana.grafana.svc.cluster.local:80/" - } - namespace = "grafana" - public_endpoint = { - hostname = "grafana.dev.lab.csp2.census.gov" - port_number = "80" - url = "https://grafana.dev.lab.csp2.census.gov:80/" - } - secret_name = "grafana" - } -} - -inputs = { - kiali_operator_version = include.root.inputs.kiali_operator_version - kiali_application_version = include.root.inputs.kiali_application_version - - profile = include.root.inputs.aws_profile - cluster_domain = dependency.eks.inputs.vpc_domain_name - operators_namespace = "operators" - cluster_name = dependency.eks.outputs.cluster_name - certificate_issuer = dependency.eks-cert-manager.outputs.cluster_issuer_name - prometheus_internal_url = dependency.eks-prometheus.outputs.prometheus_server_internal_endpoint.url - grafana_internal_url = dependency.eks-grafana.outputs.internal_endpoint.url - grafana_namespace = dependency.eks-grafana.outputs.namespace - grafana_public_url = dependency.eks-grafana.outputs.public_endpoint.url - grafana_secret_name = "grafana" - # grafana_secret_name = dependency.eks-grafana.outputs.secret_name - jaeger_internal_url = "" - - - # client_id = var.sso_client_id - # client_secret = var.sso_client_secret - # keycloak_public_url = var.keycloak_public_url - # gogatekeeper_chart_version = var.gogatekeeper_chart_version - # gogatekeeper_registry = var.gogatekeeper_registry - # gogatekeeper_repository = var.gogatekeeper_repository - # gogatekeeper_tag = var.gogatekeeper_tag -} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-loki/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-loki/terragrunt.hcl deleted file mode 100644 index 2c6b6be5..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-loki/terragrunt.hcl +++ /dev/null @@ -1,44 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-loki.git?ref=${include.root.inputs.release_version}" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -dependency "eks" { - config_path = "../eks" - mock_outputs = { - cluster_name = "a-cluster-name" - oidc_provider_arn = "arn:aws-us-gov:iam::111111111111:oidc-provider/oidc.eks.us-gov-east-1.amazonaws.com/id/0000000000000000AAAAAAAAAAAAAAAA" - } -} -dependency "eks-istio" { - config_path = "../eks-istio" - skip_outputs = true -} -dependency "eks-prometheus" { - config_path = "../eks-prometheus" - skip_outputs = true -} - -inputs = { - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - cluster_name = dependency.eks.outputs.cluster_name - oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn - loki_chart_version = include.root.inputs.loki_chart_version - loki_tag = include.root.inputs.loki_tag - canary_tag = include.root.inputs.canary_tag - enterprise_logs_provisioner_tag = include.root.inputs.enterprise_logs_provisioner_tag - gateway_tag = include.root.inputs.gateway_tag - memcached_tag = include.root.inputs.memcached_tag - exporter_tag = include.root.inputs.exporter_tag - sidecar_tag = include.root.inputs.sidecar_tag -} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-metrics-server/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-metrics-server/terragrunt.hcl deleted file mode 100644 index 387653b9..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-metrics-server/terragrunt.hcl +++ /dev/null @@ -1,33 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-metrics-server.git?ref=${include.root.inputs.release_version}" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -dependency "eks" { - config_path = "../eks" - mock_outputs = { - cluster_name = "a-cluster-name" - } -} - -dependency "eks_config" { - config_path = "../eks-config" - skip_outputs = true -} - -inputs = { - profile = include.root.inputs.aws_profile - cluster_name = dependency.eks.outputs.cluster_name - region = include.root.inputs.aws_region - metrics_server_helm_chart = include.root.inputs.metrics_server_helm_chart - metrics_server_tag = include.root.inputs.metrics_server_tag -} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-prometheus/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-prometheus/terragrunt.hcl deleted file mode 100644 index e6c54b16..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-prometheus/terragrunt.hcl +++ /dev/null @@ -1,38 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-prometheus.git?ref=${include.root.inputs.release_version}" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -dependency "eks" { - config_path = "../eks" - mock_outputs = { - cluster_name = "a-cluster-name" - } -} - -dependency "eks-dns" { - config_path = "../eks-dns" - skip_outputs = true -} - -inputs = { - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - cluster_name = dependency.eks.outputs.cluster_name - prometheus_chart_version = include.root.inputs.prometheus_chart_version - prometheus_server_tag = include.root.inputs.prometheus_server_tag - prometheus_config_reloader_tag = include.root.inputs.prometheus_config_reloader_tag - alertmanager_tag = include.root.inputs.alertmanager_tag - kube_state_metrics_tag = include.root.inputs.kube_state_metrics_tag - node_exporter_tag = include.root.inputs.node_exporter_tag - pushgateway_tag = include.root.inputs.pushgateway_tag -} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-tempo/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-tempo/terragrunt.hcl deleted file mode 100644 index e9ebd485..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-tempo/terragrunt.hcl +++ /dev/null @@ -1,46 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-tempo.git?ref=${include.root.inputs.release_version}" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -dependency "eks" { - config_path = "../eks" - mock_outputs = { - cluster_name = "a-cluster-name" - oidc_provider_arn = "arn:aws-us-gov:iam::111111111111:oidc-provider/oidc.eks.us-gov-east-1.amazonaws.com/id/0000000000000000AAAAAAAAAAAAAAAA" - } -} - -dependency "eks-prometheus" { - config_path = "../eks-prometheus" - mock_outputs = { - prometheus_server_internal_endpoint = { - hostname = "prometheus-server.prometheus.svc.cluster.local" - port_number = 9090 - url = "http://prometheus-server.prometheus.svc.cluster.local:9090/" - } - prometheus_namespace = "prometheus" - } -} - -inputs = { - account_id = include.root.locals.account_id - profile = include.root.locals.aws_profile - region = include.root.locals.aws_region - cluster_name = dependency.eks.outputs.cluster_name - oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn - prometheus_port = dependency.eks-prometheus.outputs.prometheus_server_internal_endpoint.port_number - prometheus_namespace = dependency.eks-prometheus.outputs.prometheus_namespace - tempo_chart_version = include.root.inputs.tempo_chart_version - tempo_tag = include.root.inputs.tempo_tag - -} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks/terragrunt.hcl deleted file mode 100644 index cc7c8935..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks/terragrunt.hcl +++ /dev/null @@ -1,56 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -locals { - # Set cluster/platform specific variables, or extract from the hierarchy. - account_id = include.root.inputs.aws_account_id - cluster_endpoint_public_access = include.root.inputs.cluster_endpoint_public_access - cluster_name = include.root.inputs.cluster_name - cluster_version = include.root.inputs.cluster_version - creator = include.root.inputs.creator - eks_instance_disk_size = include.root.inputs.eks_instance_disk_size - eks_ng_desired_size = include.root.inputs.eks_ng_desired_size - eks_ng_max_size = include.root.inputs.eks_ng_max_size - eks_ng_min_size = include.root.inputs.eks_ng_min_size - eks_vpc_name = include.root.inputs.vpc_name - enable_cluster_creator_admin_permissions = include.root.inputs.enable_cluster_creator_admin_permissions - environment_abbr = include.root.inputs.environment_abbr - organization = include.root.inputs.organization - profile = include.root.inputs.aws_profile - project_name = include.root.inputs.project_name - project_number = include.root.inputs.project_number - project_role = include.root.inputs.project_role - region = include.root.inputs.aws_region - tags = include.root.inputs.tags - terraform = include.root.inputs.terraform - terragrunt = include.root.inputs.terragrunt - vpc_domain_name = include.root.inputs.vpc_domain_name -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-eks.git?ref=${include.root.inputs.release_version}" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -inputs = { - aws_account_id = local.account_id - cluster_endpoint_public_access = local.cluster_endpoint_public_access - cluster_name = local.cluster_name - cluster_version = local.cluster_version - creator = local.creator - eks_instance_disk_size = local.eks_instance_disk_size - eks_ng_desired_size = local.eks_ng_desired_size - eks_ng_max_size = local.eks_ng_max_size - eks_ng_min_size = local.eks_ng_min_size - eks_vpc_name = local.eks_vpc_name - enable_cluster_creator_admin_permissions = local.enable_cluster_creator_admin_permissions - os_username = local.creator - shared_vpc_label = local.environment_abbr - tags = local.tags -} diff --git a/monitoring/grafana-dashboards.json b/monitoring/grafana-dashboards.json new file mode 100644 index 00000000..01d36852 --- /dev/null +++ b/monitoring/grafana-dashboards.json @@ -0,0 +1,44 @@ +{ + "dashboards": [ + { + "name": "Cluster Overview", + "panels": [ + { + "title": "Node CPU Usage", + "type": "graph", + "targets": [ + { + "expr": "cluster:node_cpu:ratio_rate5m", + "legendFormat": "{{node}}" + } + ] + }, + { + "title": "Pod Resource Usage", + "type": "graph", + "targets": [ + { + "expr": "cluster:pod_cpu:usage_rate5m", + "legendFormat": "{{pod}}" + } + ] + } + ] + }, + { + "name": "Service SLOs", + "panels": [ + { + "title": "Request Latency", + "type": "graph", + "targets": [ + { + "expr": "http_request_duration_seconds:99percentile", + "legendFormat": "{{service}}" + } + ] + } + ] + } + ] +} diff --git a/monitoring/prometheus-rules.yaml b/monitoring/prometheus-rules.yaml new file mode 100644 index 00000000..fa63c5ee --- /dev/null +++ b/monitoring/prometheus-rules.yaml @@ -0,0 +1,39 @@ +groups: +- name: kubernetes.rules + rules: + - record: cluster:node_cpu:ratio_rate5m + expr: sum(rate(node_cpu_seconds_total{mode!="idle"}[5m])) by (node) / count(node_cpu_seconds_total{mode="idle"}) by (node) + + - alert: NodeCPUUsage + expr: cluster:node_cpu:ratio_rate5m > 0.8 + for: 10m + labels: + severity: warning + annotations: + description: "CPU usage on {{ $labels.node }} is above 80%" + +- name: kubernetes.pod.rules + rules: + - record: cluster:pod_cpu:usage_rate5m + expr: sum(rate(container_cpu_usage_seconds_total{container!=""}[5m])) by (pod, namespace) + + - alert: PodCPUThrottling + expr: rate(container_cpu_cfs_throttled_seconds_total[5m]) > 0 + for: 15m + labels: + severity: warning + annotations: + description: "Pod {{ $labels.pod }} in {{ $labels.namespace }} is being throttled" + +- name: application.slos + rules: + - record: http_request_duration_seconds:99percentile + expr: histogram_quantile(0.99, sum(rate(http_request_duration_seconds_bucket[5m])) by (le, service)) + + - alert: HighLatency + expr: http_request_duration_seconds:99percentile > 0.5 + for: 5m + labels: + severity: critical + annotations: + description: "Service {{ $labels.service }} is experiencing high latency" diff --git a/plan.md b/plan.md new file mode 100644 index 00000000..bd058fd3 --- /dev/null +++ b/plan.md @@ -0,0 +1,271 @@ +Project Plan: EKS Infrastructure Codebase Improvements +1. Documentation Standardization + - Create centralized documentation standards guide + - Implement standardized README structure across all modules: + * Overview and purpose + * Prerequisites and dependencies + * Usage examples with variables + * Architecture diagrams + * Operations guide + - Establish changelog format using Commitizen convention + - Create architecture diagrams: + * High-level system architecture + * Module relationships + * Network flow diagrams + * Security group configurations + - Develop consistent module examples: + * Basic usage patterns + * Advanced configurations + * Migration guides + * Troubleshooting guides + - Implementation timeline: + * Week 1: Standards guide creation + * Week 2-3: README updates + * Week 4: Diagram creation + * Week 5: Example development + * Week 6: Review and refinement + +2. Security Enhancements + - EKS Security Group Configurations: + * Implement least-privilege access rules + * Restrict node group communication + * Define approved ingress/egress patterns + * Document security group dependencies + + - AWS GovCloud Security Implementation: + * Enable FIPS 140-2 compliant endpoints + * Implement NIST 800-53 controls + * Configure AWS KMS for all sensitive data + * Enable AWS Organizations SCPs + + - Encryption Configurations: + * Enable envelope encryption for secrets + * Implement at-rest encryption for EBS volumes + * Configure TLS for all service communications + * Rotate encryption keys automatically + + - Network Security Policies: + * Define default deny policies + * Create application-specific network policies + * Implement pod security policies + * Configure service mesh security + + - Implementation Timeline: + * Week 1: Security audit and gap analysis + * Week 2: Security group updates + * Week 3: Encryption improvements + * Week 4: Network policy implementation + * Week 5: Testing and validation + * Week 6: Documentation and training + +3. Observability Improvements + - Prometheus Configuration Standardization: + * Define standard metric collection rules + * Implement consistent recording rules + * Set up unified alerting rules + * Configure HA architecture + + - Metrics Collection Strategy: + * Define golden signals metrics + * Implement custom metric collectors + * Set up SLO/SLI tracking + * Configure cost metrics collection + + - Logging Framework: + * Implement structured logging + * Configure log aggregation + * Set up log retention policies + * Enable audit logging + + - Grafana Dashboards: + * Create cluster health dashboards + * Implement cost monitoring views + * Set up performance dashboards + * Configure security monitoring panels + + - Implementation Timeline: + * Week 1: Metrics standardization + * Week 2: Logging implementation + * Week 3: Dashboard creation + * Week 4: Alert configuration + * Week 5: Testing and validation + * Week 6: Documentation and training + +4. Infrastructure Optimization + - Node Group Configuration: + * Implement right-sized instance types + * Configure optimal scaling thresholds + * Set up mixed-instance policies + * Define node taints and labels + + - Auto-scaling Strategy: + * Configure Cluster Autoscaler settings + * Implement Karpenter provisioners + * Set up pod disruption budgets + * Define scaling policies + + - Storage Optimization: + * Define storage class specifications + * Implement volume encryption + * Configure backup policies + * Set up snapshot schedules + + - Resource Management: + * Implement namespace quotas + * Define limit ranges + * Configure resource requests/limits + * Set up cost allocation tags + + - Implementation Timeline: + * Week 1: Node group optimization + * Week 2: Auto-scaling implementation + * Week 3: Storage configuration + * Week 4: Resource quotas setup + * Week 5: Testing and validation + * Week 6: Documentation and training + +5. Module Organization + - Module Standardization: + * Create consistent module structure + * Implement standard naming conventions + * Define input/output patterns + * Establish version constraints + + - Variable Management: + * Create shared variable definitions + * Implement variable validation rules + * Define default value standards + * Document variable dependencies + + - Version Control: + * Implement semantic versioning + * Create version compatibility matrix + * Define upgrade paths + * Document breaking changes + + - Dependencies: + * Map module relationships + * Document cross-module dependencies + * Define initialization order + * Create dependency graphs + + - Implementation Timeline: + * Week 1: Module structure standardization + * Week 2: Variable management + * Week 3: Version control implementation + * Week 4: Dependency documentation + * Week 5: Testing and validation + * Week 6: Documentation and training + +6. Testing Framework + - Terraform Validation: + * Implement pre-commit hooks + * Configure format checking + * Add variable validation + * Set up static analysis + + - Integration Testing: + * Create test environments + * Implement end-to-end tests + * Configure smoke tests + * Set up regression testing + + - Security Testing: + * Implement security scanners + * Configure compliance checks + * Add vulnerability scanning + * Set up secret detection + + - Test Automation: + * Configure CI/CD pipelines + * Implement test reporting + * Set up coverage tracking + * Create automated rollbacks + + - Implementation Timeline: + * Week 1: Validation framework setup + * Week 2: Integration test development + * Week 3: Security scanning implementation + * Week 4: Automation configuration + * Week 5: Testing and validation + * Week 6: Documentation and training + +Implementation Priority: + - Security Enhancements (Critical) + - Observability Improvements (High) + - Infrastructure Optimization (High) + - Documentation Standardization (Medium) + - Module Organization (Medium) + - Testing Framework (Medium) + +Key Metrics: + - Security compliance score + - Resource utilization efficiency + - Documentation coverage + - Test coverage + - Code duplication reduction + - Deployment success rate + +Next Steps: + +1. Security Audit (Week 1-2) + - Perform comprehensive security assessment + * Review IAM roles and permissions + * Audit security group configurations + * Analyze network policies + * Review encryption settings + - Generate security findings report + - Prioritize security improvements + - Create remediation timeline + +2. Implementation Planning (Week 2-3) + - Create detailed project timeline + * Break down tasks by module + * Identify dependencies + * Assign ownership + * Set milestones + - Establish success criteria + - Define rollback procedures + - Create risk mitigation strategies + +3. Testing Pipeline Setup (Week 3-4) + - Configure CI/CD infrastructure + * Set up test environments + * Implement automated testing + * Configure quality gates + * Enable security scanning + - Create test data sets + - Develop test scenarios + - Implement monitoring for test environments + +4. Documentation Enhancement (Week 4-5) + - Audit existing documentation + - Create documentation templates + - Update README files + - Generate architecture diagrams + - Create operational runbooks + - Document emergency procedures + +5. Module Consolidation (Week 5-6) + - Analyze current module structure + - Identify consolidation opportunities + - Create module dependency map + - Plan refactoring phases + - Document migration steps + - Create validation checklist + +6. Validation and Review (Week 6-7) + - Conduct peer reviews + - Perform security validation + - Test documentation accuracy + - Validate monitoring setup + - Review automation effectiveness + - Gather stakeholder feedback + +7. Training and Handover (Week 7-8) + - Prepare training materials + - Schedule training sessions + - Document operational procedures + - Create troubleshooting guides + - Set up support channels + - Plan knowledge transfer sessions diff --git a/tests/terraform.tftest.hcl b/tests/terraform.tftest.hcl new file mode 100644 index 00000000..7dfcc8e8 --- /dev/null +++ b/tests/terraform.tftest.hcl @@ -0,0 +1,40 @@ +variables { + cluster_name = "test-cluster" + cluster_version = "1.24" + vpc_id = "vpc-12345678" + subnet_ids = ["subnet-1", "subnet-2"] + region = "us-gov-east-1" + environment = "test" +} + +run "cluster_creation" { + command = plan + + assert { + condition = length(aws_eks_cluster.main) > 0 + error_message = "EKS cluster was not created" + } + + assert { + condition = aws_eks_cluster.main.encryption_config[0].provider[0].key_arn != null + error_message = "EKS cluster encryption is not configured" + } +} + +run "node_groups" { + command = plan + + assert { + condition = length(aws_eks_node_group.main) > 0 + error_message = "Node groups were not created" + } +} + +run "security_groups" { + command = plan + + assert { + condition = length(aws_security_group_rule.cluster) > 0 + error_message = "Security group rules were not created" + } +}