diff --git a/.checkov.yml b/.checkov.yml deleted file mode 100644 index cc000299..00000000 --- a/.checkov.yml +++ /dev/null @@ -1,24 +0,0 @@ -branch: master -download-external-modules: true -evaluate-variables: true -external-checks-dir: - - security/custom_checks -framework: - - terraform - - kubernetes -output: - - cli - - json - - junitxml -skip-check: - - CKV_AWS_79 # Instance Metadata Service Version 1 - - CKV_AWS_130 # Ensure VPC subnets are not assigned public IP by default -quiet: true -compact: true -directory: - - . - - modules/* -secrets-scan-file-type: - - tf - - yaml - - json diff --git a/.github/platform-tg-infra.code-workspace b/.github/platform-tg-infra.code-workspace index d243b5d4..05b26aa5 100644 --- a/.github/platform-tg-infra.code-workspace +++ b/.github/platform-tg-infra.code-workspace @@ -8,6 +8,22 @@ "name": "tfmod-cert-mgr", "path": "../../tfmod-cert-mgr" }, + { + "name": "tfmod-config-job", + "path": "../../tfmod-config-job" + }, + { + "name": "tfmod-cribl", + "path": "../../tfmod-cribl" + }, + { + "name": "tfmod-custom-iam-role-for-service-account-eks", + "path": "../../tfmod-custom-iam-role-for-service-account-eks" + }, + { + "name": "tfmod-downloader", + "path": "../../tfmod-downloader" + }, { "name": "tfmod-eks", "path": "../../tfmod-eks" @@ -21,8 +37,12 @@ "path": "../../tfmod-eks-dns" }, { - "name": "tfmod-gogatekeeper", - "path": "../../tfmod-gogatekeeper" + "name": "tfmod-esri-arcgis", + "path": "../../tfmod-esri-arcgis" + }, + { + "name": "tfmod-gatekeeper", + "path": "../../tfmod-gatekeeper" }, { "name": "tfmod-grafana", @@ -65,23 +85,23 @@ "path": "../../tfmod-open-telemetry" }, { - "name": "tfmod-prometheus", - "path": "../../tfmod-prometheus" + "name": "tfmod-pipeline", + "path": "../../tfmod-pipeline" }, { - "name": "tfmod-tempo", - "path": "../../tfmod-tempo" + "name": "tfmod-prometheus", + "path": "../../tfmod-prometheus" }, { - "name": "terraform-aws-eks", - "path": "../../terraform-aws-eks" + "name": "tfmod-postgresql", + "path": "../../tfmod-postgresql" }, { - "name": "terragrunt", - "path": "../../terragrunt" + "name": "tfmod-tempo", + "path": "../../tfmod-tempo" }, { - "path": "../../tfmod-config-job" + "path": "../../repo-setup" } ] } diff --git a/Makefile b/Makefile index fc196a2c..83b9dcca 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,8 @@ -.PHONY: help init validate plan fmt check clean +.PHONY: help config init validate plan fmt check clean deploy-to-pipeline tail parse +# Default values +ENV ?= development +REGION_DIR ?= us-gov-east-1 +CLUSTER_DIR ?= csvd-platform-lab-mcm help: @echo "Available targets:" @@ -8,7 +12,62 @@ help: @echo " fmt - Format HCL files" @echo " check - Run all checks (format, validate, plan)" @echo " clean - Clean up Terragrunt cache and temporary files" + @echo " deploy-to-pipeline - Zip and upload to S3 to trigger CodePipeline" + @echo " tail - Tail the logs of the CodeBuild project" +# Shared configuration target that exports all variables +config: + @echo "Loading configuration..." + +# Detect configuration files + $(eval ACCOUNT_HCL=lab/$(ENV)/account.hcl) + $(eval REGION_HCL=lab/$(ENV)/$(REGION_DIR)/region.hcl) + $(eval CLUSTER_HCL=lab/$(ENV)/$(REGION_DIR)/vpc/$(CLUSTER_DIR)/cluster.hcl) + + @if [ ! -f "$(ACCOUNT_HCL)" ]; then echo "Error: $(ACCOUNT_HCL) not found"; exit 1; fi + @if [ ! -f "$(REGION_HCL)" ]; then echo "Error: $(REGION_HCL) not found"; exit 1; fi + @if [ ! -f "$(CLUSTER_HCL)" ]; then echo "Error: $(CLUSTER_HCL) not found"; exit 1; fi + +# Extract values from HCL files + $(eval AWS_ACCOUNT_ID=$(shell grep -oP 'aws_account_id\s*=\s*"\K[^"]+' $(ACCOUNT_HCL))) + $(eval ACCOUNT_NAME=$(shell grep -oP 'account_name\s*=\s*"\K[^"]+' $(ACCOUNT_HCL))) + $(eval AWS_PROFILE=$(shell echo $(AWS_ACCOUNT_ID)-$(shell echo $(ACCOUNT_NAME) | sed 's/-ew/-gov/'))) + $(eval AWS_REGION=$(shell grep -oP 'aws_region\s*=\s*"\K[^"]+' $(REGION_HCL))) + $(eval CLUSTER_NAME=$(shell grep -oP 'cluster_name\s*=\s*"\K[^"]+' $(CLUSTER_HCL))) + +# Calculate derived values + $(eval REGION_SHORT=$(shell echo $(AWS_REGION) | sed 's/\([a-z]\)[a-z]*-/\1/g')) + $(eval S3_BUCKET=v-s3-eks-$(CLUSTER_NAME)-artifacts-$(AWS_ACCOUNT_ID)-$(REGION_SHORT)) + $(eval OBJECT_KEY=clusters/$(CLUSTER_NAME)/platform-tg-infra.zip) + $(eval PIPELINE_NAME=eks-$(CLUSTER_NAME)-codepipeline) + $(eval PIPELINE_URL=https://console.amazonaws-us-gov.com/codesuite/codepipeline/pipelines/$(PIPELINE_NAME)/view?region=$(AWS_REGION)) + + @echo "Using configuration:" + @echo " AWS_ACCOUNT_ID: $(AWS_ACCOUNT_ID)" + @echo " ACCOUNT_NAME: $(ACCOUNT_NAME)" + @echo " AWS_PROFILE: $(AWS_PROFILE)" + @echo " AWS_REGION: $(AWS_REGION)" + @echo " CLUSTER_NAME: $(CLUSTER_NAME)" + @echo " S3_BUCKET: $(S3_BUCKET)" + + @if [ -z "$(AWS_ACCOUNT_ID)" ] || [ -z "$(AWS_PROFILE)" ] || [ -z "$(AWS_REGION)" ] || [ -z "$(CLUSTER_NAME)" ]; then \ + echo "Error: Failed to extract all required variables from HCL files"; \ + exit 1; \ + fi + +# Export variables for child processes + $(eval export AWS_ACCOUNT_ID) + $(eval export ACCOUNT_NAME) + $(eval export AWS_PROFILE) + $(eval export AWS_REGION) + $(eval export CLUSTER_NAME) + $(eval export REGION_SHORT) + $(eval export S3_BUCKET) + $(eval export OBJECT_KEY) + $(eval export PIPELINE_NAME) + $(eval export PIPELINE_URL) + +# Basic terragrunt operations init: @echo "Initializing Terragrunt configurations..." terragrunt run-all init @@ -33,3 +92,50 @@ clean: find . -type d -name ".terragrunt-cache" -exec rm -rf {} + find . -type f -name ".terraform.lock.hcl" -delete find . -type f -name "terragrunt-debug.tfvars.json" -delete + +# Pipeline operations that depend on shared config +deploy-to-pipeline: config + @echo "Preparing to deploy to pipeline..." + + @echo "Copy buildspecs from tfmod-pipeline" + mkdir -p ./buildspecs + cp -r ../tfmod-pipeline/buildspecs/* ./buildspecs + + @echo "Creating zip file..." + zip -r platform-tg-infra.zip . -x "*.git*" "*.github*" "*.terragrunt-cache*" "*.terraform*" + + rm -rf ./buildspecs + + @echo "Stopping any active pipeline executions for $(PIPELINE_NAME)..." + $(eval PIPELINE_EXECUTIONS=$(shell aws codepipeline list-pipeline-executions --pipeline-name $(PIPELINE_NAME) --region $(AWS_REGION) --profile $(AWS_PROFILE) --query "pipelineExecutionSummaries[?status=='InProgress'].pipelineExecutionId" --output text)) + @if [ -n "$(PIPELINE_EXECUTIONS)" ]; then \ + echo "Found active pipeline executions: $(PIPELINE_EXECUTIONS)"; \ + for EXECUTION_ID in $(PIPELINE_EXECUTIONS); do \ + echo "Stopping execution $$EXECUTION_ID..."; \ + aws codepipeline stop-pipeline-execution --pipeline-name $(PIPELINE_NAME) --pipeline-execution-id $$EXECUTION_ID --region $(AWS_REGION) --profile $(AWS_PROFILE) --abandon || echo "Warning: Failed to stop execution $$EXECUTION_ID"; \ + done; \ + echo "Waiting for pipeline executions to stop (10 seconds)..."; \ + sleep 10; \ + else \ + echo "No active pipeline executions found."; \ + fi + + @echo "Uploading to S3 bucket $(S3_BUCKET)..." + aws s3 cp platform-tg-infra.zip s3://$(S3_BUCKET)/$(OBJECT_KEY) --profile $(AWS_PROFILE) --sse aws:kms + @echo "Upload complete. Pipeline should trigger automatically." + rm -f platform-tg-infra.zip + + @echo "Pipeline URL: $(PIPELINE_URL)" + @echo "You can access the pipeline directly at the URL above." + @echo "Logs will start once CodeBuild runs... this can take a few minutes..." + $(MAKE) tail + +# Improved tail action using shared config +tail: config + @echo "Tailing Pipeline Logs for cluster $(CLUSTER_NAME):" + aws logs tail /aws/codebuild/$(CLUSTER_NAME) \ + --format short \ + --follow \ + --since 1m \ + --region $(AWS_REGION) \ + --profile $(AWS_PROFILE) diff --git a/README.md b/README.md index 454f6c3a..248d122a 100644 --- a/README.md +++ b/README.md @@ -1,55 +1,115 @@ -## How to setup and run terragrunt scripts for EKS related modules in a LAB account - -### Lab Account request and setup: - - Open a REMEDY ticket for creating an account in LAB environment, preferably with t3-admin role. - - LAB account url:https://pssvlab.tco.census.gov/PSS/ - - Make a note that the LAB account password is different from laptop password - - Once you have successful log proceed to next steps below - -### Access LAB jumphost (bromine): - Refer this page for additional help on sso credentials: https://github.e.it.census.gov/terraform/support/tree/master/docs/how-to/aws-sso - - 1. Goto LAB workspace:https://clients.amazonworkspaces.com/ - 2. Hit the web access login on the top right corner - 3. Enter the following registration code: FRosu+FMEXNZ and click Register - 4. Use your jbid and password (use lab password) - 5. Open reflection client and ssh connect to bromine.cto.census.gov - 6. On bromine, sso login to lab-gov as: - $ aws-sso-login.sh lab-gov - 7. On a browser goto auth-dev.census.gov - 8. Use PIV card option to login - 9. Copy the link from step 6 and paste it on the portal and authenticate - 10. Go back to bromine and doubleclick at an empty space, you should get successful login lab env - -### Environment Setup: - - Set your profile as a default profile by exporting AWS_PROFILE variable - $ export AWS_PROFILE="224384469011-lab-dev-gov.inf-admin-t3" - $ aws sts get-caller-identity - - Run any aws commands to make sure you are getting responses from the account your profile is set to: - $ aws s3 ls - -### Terraform/Terragrunt binaries and versions: - The following binaries used: - - Terraform version: v1.7.5 - - Terragrunt version: v0.55.21 - These versions can be found on bromine.cto.census.gov host at: - /app/terraform/bin/terr* folder - -### Run the Terragrunt script: - - Cd to specifc folder (example: cd eks) and Run terragrunt plan - $ terragrunt plan - - Verify the plan output and make sure there are no errors - - Run terragrunt apply - $ terragrunt apply - - Verify apply completes successfully and verify the resources on AWS Console. - -### Accessing the cluster: - $ aws eks --region us-gov-east-1 update-kubeconfig --name platform-eng-eks-test - $ kubectl config use-context arn:aws-us-gov:eks:us-gov-east-1:224384469011:cluster/platform-eng-eks-test - $ kubectl config get-contexts - -### Run few kubectl commands to verify you are accessing the cluster - $ kubectl cluster-info - $ kubectl get pods -A - $ kubectl get ns +# Platform Infrastructure with Terragrunt +This repository helps manage cloud infrastructure using Terragrunt. It is designed to make managing infrastructure easier and more organized. Terragrunt is used to handle infrastructure as code for different environments. + +## What's in This Repository + +- `/configs` - Files for setting up things like node groups and resource limits +- `/docs` - Guides and rules for how to set up and manage the infrastructure +- `/lab` - Settings for testing and development environments +- `/monitoring` - Tools for keeping an eye on the system +- `/tests` - Tests to make sure everything works as expected + +## Documentation + +You can find detailed guides in the `/docs` folder: + +- [Architecture](docs/ARCHITECTURE.md) - How the system is built +- [Documentation Standards](docs/DOCUMENTATION_STANDARDS.md) - How to write good documentation +- [Infrastructure Standards](docs/INFRASTRUCTURE_STANDARDS.md) - Rules for setting up infrastructure +- [Module Dependencies](docs/MODULE_DEPENDENCIES.md) - How different parts depend on each other +- [Module Standards](docs/MODULE_STANDARDS.md) - Rules for creating modules +- [Observability Standards](docs/OBSERVABILITY_STANDARDS.md) - How to monitor the system +- [Security Audit Checklist](docs/SECURITY_AUDIT_CHECKLIST.md) - Steps to check for security issues +- [Security Baseline](docs/SECURITY_BASELINE.md) - Basic security requirements +- [Testing Standards](docs/TESTING_STANDARDS.md) - Rules for testing +- [Version Control](docs/VERSION_CONTROL.md) - How to manage code versions + +## What You Need + +- Terraform v1.0.0 or newer +- Terragrunt v0.36.0 or newer +- AWS CLI set up with the right permissions +- Access to AWS resources + +## How to Get Started + +### Using the Makefile + +This repository has a Makefile with helpful commands: + +```bash +make help # See available commands +make init # Set up Terragrunt +make validate # Check if everything is set up correctly +make plan # Preview changes +make fmt # Format files +make check # Run all checks +make clean # Clean up temporary files +``` + +### Manual Terragrunt Commands + +Go to the folder with the Terragrunt configuration you want to use: + +```bash +cd lab/environment/component + +# Set up Terragrunt +terragrunt init + +# Preview changes +terragrunt plan + +# Apply changes +terragrunt apply + +# Remove resources +terragrunt destroy +``` + +### Running All Configurations + +Run commands for all Terragrunt configurations: + +```bash +# Set up everything +terragrunt run-all init + +# Preview all changes +terragrunt run-all plan + +# Apply all changes +terragrunt run-all apply +``` + +## Configuration + +Terragrunt configurations are organized like this: + +1. Main settings for each environment +2. Specific settings for different parts of the system +3. Overrides for special cases + +Check the environment folders for more details. + +## Testing + +The `/tests` folder has tools to check if everything works. To run tests: + +```bash +cd tests +./run_tests.sh +``` + +## How to Contribute + +1. Make a copy of this repository +2. Create a new branch for your changes +3. Make your updates +4. Run `make check` to ensure everything is correct +5. Submit a pull request + +## License + +Copyright © 2025 Your Organization. All rights reserved. diff --git a/configs/node-groups.yaml b/configs/node-groups.yaml deleted file mode 100644 index 11e09cad..00000000 --- a/configs/node-groups.yaml +++ /dev/null @@ -1,48 +0,0 @@ -nodeGroups: - - name: general-purpose - instanceTypes: - - m6i.xlarge - - m6a.xlarge - - m5.xlarge - minSize: 2 - maxSize: 10 - desiredSize: 2 - labels: - node-type: general - taints: [] - updateConfig: - maxUnavailable: 1 - - - name: compute-optimized - instanceTypes: - - c6i.2xlarge - - c6a.2xlarge - - c5.2xlarge - minSize: 1 - maxSize: 20 - desiredSize: 2 - labels: - node-type: compute - taints: - - key: workload - value: batch - effect: NoSchedule - updateConfig: - maxUnavailable: 2 - - - name: memory-optimized - instanceTypes: - - r6i.2xlarge - - r6a.2xlarge - - r5.2xlarge - minSize: 1 - maxSize: 10 - desiredSize: 2 - labels: - node-type: memory - taints: - - key: workload - value: memory-intensive - effect: NoSchedule - updateConfig: - maxUnavailable: 1 diff --git a/configs/resource-quotas.yml b/configs/resource-quotas.yml deleted file mode 100644 index 655595d0..00000000 --- a/configs/resource-quotas.yml +++ /dev/null @@ -1,36 +0,0 @@ -apiVersion: v1 -kind: ResourceQuota -metadata: - name: default-quota -spec: - hard: - requests.cpu: "20" - requests.memory: 40Gi - limits.cpu: "40" - limits.memory: 80Gi - pods: "100" - services: "50" - secrets: "100" - configmaps: "100" - persistentvolumeclaims: "50" - ---- -apiVersion: v1 -kind: LimitRange -metadata: - name: default-limits -spec: - limits: - - type: Container - default: - cpu: 500m - memory: 512Mi - defaultRequest: - cpu: 100m - memory: 256Mi - max: - cpu: "4" - memory: 8Gi - min: - cpu: 50m - memory: 64Mi diff --git a/docs/Process.md b/docs/Process.md new file mode 100644 index 00000000..ebfdd154 --- /dev/null +++ b/docs/Process.md @@ -0,0 +1,334 @@ +# Terraform Module Execution Process + +This document outlines the step-by-step process flow of our Terraform modules, explaining what each module does and the resources it creates. + +## 1. Pipeline Setup (tfmod-pipeline) + +**Purpose**: Creates the CI/CD infrastructure to build, plan, and apply the rest of the Terraform modules. + +**Resources Created**: +- AWS CodePipeline +- AWS CodeBuild projects +- IAM roles and policies for pipeline execution +- S3 buckets for artifacts +- CloudWatch event rules for pipeline triggers + +## 2. Core Infrastructure + +### 2.1 EKS Cluster (tfmod-eks) + +**Purpose**: Creates a managed Kubernetes cluster in AWS. + +**Resources Created**: +- EKS cluster +- EKS node groups +- VPC (if not using existing) +- Security groups +- IAM roles for EKS service and node groups + +### 2.2 EKS Configuration (tfmod-eks-configuration) + +**Purpose**: Configures the EKS cluster with essential settings. + +**Resources Created**: +- Kubernetes namespaces +- Service accounts +- RBAC configurations +- Add-on prerequisites + +### 2.3 Custom IAM Roles for Service Accounts (tfmod-custom-iam-role-for-service-account-eks) + +**Purpose**: Sets up IAM roles that can be assumed by Kubernetes service accounts via IRSA. + +**Resources Created**: +- IAM roles +- IAM policies +- Trust relationships + +### 2.4 EKS DNS Configuration (tfmod-eks-dns) + +**Purpose**: Configures DNS settings for the cluster. + +**Resources Created**: +- Route53 records +- DNS-related Kubernetes resources + +## 3. Cluster Monitoring & Observability + +### 3.1 Metrics Server (tfmod-metrics-server) + +**Purpose**: Deploys the Kubernetes Metrics Server for resource metrics. + +**Resources Created**: +- Metrics Server deployment +- Related service accounts and RBAC + +### 3.2 Prometheus (tfmod-prometheus) + +**Purpose**: Sets up Prometheus for metrics collection and alerting. + +**Resources Created**: +- Prometheus server +- Alert manager +- Service monitors +- Related Kubernetes resources + +### 3.3 Grafana (tfmod-grafana) + +**Purpose**: Deploys Grafana for metrics visualization. + +**Resources Created**: +- Grafana deployment +- Dashboards +- Data sources configuration + +### 3.4 Loki (tfmod-loki) + +**Purpose**: Implements log aggregation for the cluster. + +**Resources Created**: +- Loki deployment +- Log aggregation components +- Storage configuration + +### 3.5 Tempo (tfmod-tempo) + +**Purpose**: Provides distributed tracing capabilities. + +**Resources Created**: +- Tempo deployment +- Tracing components +- Storage configuration + +### 3.6 Open Telemetry (tfmod-open-telemetry) + +**Purpose**: Implements the OpenTelemetry collector for observability data. + +**Resources Created**: +- OpenTelemetry collector +- Configuration for metrics, logs, and traces + +### 3.7 Cribl (tfmod-cribl) + +**Purpose**: Deploys Cribl for log processing and forwarding. + +**Resources Created**: +- Cribl deployment +- Processing rules +- Output destinations + +## 4. Service Mesh & API Management + +### 4.1 Istio (tfmod-istio) + +**Purpose**: Implements a service mesh for the cluster. + +**Resources Created**: +- Istio control plane +- Istio gateways +- CRDs and operators + +### 4.2 Istio Service Ingress (tfmod-istio-service-ingress) + +**Purpose**: Configures ingress resources using Istio. + +**Resources Created**: +- Virtual services +- Gateways +- Service entries + +### 4.3 Kiali (tfmod-kiali) + +**Purpose**: Deploys Kiali for visualizing the service mesh. + +**Resources Created**: +- Kiali deployment +- Service +- Dashboard configuration + +## 5. Security & Compliance + +### 5.1 Cert Manager (tfmod-cert-mgr) + +**Purpose**: Manages certificates within the Kubernetes cluster. + +**Resources Created**: +- Cert-manager deployment +- CRDs for certificate resources +- Issuers/ClusterIssuers + +### 5.2 Gatekeeper (tfmod-gatekeeper) + +**Purpose**: Implements policy enforcement and governance. + +**Resources Created**: +- OPA Gatekeeper deployment +- Constraint templates +- Constraints + +## 6. Database & Persistent Storage + +### 6.1 PostgreSQL (tfmod-postgresql) + +**Purpose**: Deploys PostgreSQL database instances. + +**Resources Created**: +- PostgreSQL deployment or AWS RDS instances +- Storage configuration +- Network policies + +## 7. Application-Specific Modules + +### 7.1 Config Jobs (tfmod-config-job) + +**Purpose**: Creates Kubernetes jobs for configuration tasks. + +**Resources Created**: +- Kubernetes jobs +- ConfigMaps +- Secrets + +### 7.2 Keycloak (tfmod-keycloak) + +**Purpose**: Deploys Keycloak for identity and access management. + +**Resources Created**: +- Keycloak deployment +- Persistent storage +- Ingress configuration + +### 7.3 Kubernetes Dashboard (tfmod-k8s-dashboard) + +**Purpose**: Provides a web UI for the Kubernetes cluster. + +**Resources Created**: +- Dashboard deployment +- Service account +- RBAC configuration + +### 7.4 ArcGIS (tfmod-esri-arcgis) + +**Purpose**: Deploys ArcGIS services on the cluster. + +**Resources Created**: +- ArcGIS deployments +- Services +- Storage configuration + +### 7.5 Karpenter (tfmod-karpenter) + +**Purpose**: Implements Karpenter for Kubernetes node provisioning. + +**Resources Created**: +- Karpenter controller +- Provisioner CRDs +- Node templates + +## Execution Flow + +The modules are typically executed in the order outlined above, with the pipeline module orchestrating the process: + +1. The pipeline is created first (manually or by another automation) + - **Consolidated Image Security Processing**: A single security stage processes all container images used across modules: + - Image inventory collection from all module configurations + - Batch processing of vulnerability scanning + - Central SBOM (Software Bill of Materials) repository + - Single signing authority for all images + - Creation of a security compliance registry +2. Core infrastructure is established +3. Monitoring and observability tools are deployed +4. Service mesh and security components are added +5. Database and application-specific modules are deployed + +Each module depends on resources created by previous modules, forming a dependency chain that ensures proper infrastructure creation. + +## Image Security Implementation + +Container image security is implemented as a consolidated phase within the tfmod-pipeline module: + +- **Image Inventory**: Extracts container image references from all module configurations before deployment begins +- **Centralized Processing**: Processes all unique images in parallel rather than per-module +- **Security Registry**: Creates a compliance database that tracks security status of each image +- **Verification API**: Provides a lightweight API for modules to verify image compliance at deployment time +- **Policy Enforcement**: Blocks deployment of any module referencing non-compliant images + +### Image Inventory Collection Implementation + +The image inventory collection is implemented as part of the existing security stage in the pipeline: + +1. **Security Stage Enhancement**: + - The current security stage is expanded to include image security processing + - This maintains the existing pipeline structure (source → build → security → approve → deploy) + - No additional pipeline stages are required + +2. **Security Stage Sub-steps**: + - **Infrastructure Security**: Original security checks for IAC (continues as is) + - **Image Inventory**: Runs `terragrunt plan -json` for all modules to extract image references + - **Image Security Processing**: Scans, generates SBOMs, and signs images + - All vulnerabilities are automatically reported to AWS Security Hub + - Findings include image details, CVE IDs, severity levels, and remediation guidance + - **Security Registry Update**: Records compliance status of all images + - Security Hub findings are linked to the compliance database + - Security Hub integration enables centralized vulnerability management + +3. **Implementation Process**: + - CodeBuild job in the security stage collects Terraform plans + - Parsing script extracts image references from plan outputs + - Each unique image undergoes security checks in parallel + - Results are stored in a central compliance database + - Security Hub receives all vulnerability findings with proper resource tagging + - Final step generates a compliance report showing pass/fail status for all images + +4. **Pre-approval Check**: + - Before the approval stage, a validation step confirms all images are compliant + - Non-compliant images trigger pipeline warnings or failures based on policy settings + - Compliance summary is included in the approval notification + - Links to Security Hub findings are provided in notifications + +## Image Security Tooling + +The following tools are recommended for each phase of the image security process: + +### 1. Image Inventory Collection +- **Terragrunt/Terraform**: Using `terragrunt plan -json` output +- **jq/Python**: For parsing plan outputs to extract image references +- **AWS CodeBuild**: Custom build step with extraction script + +### 2. Vulnerability Scanning +- **AWS ECR Enhanced Scanning**: Primary scanning engine for ECR images +- **Trivy**: Open-source scanner for comprehensive vulnerability detection +- **Amazon Inspector**: For deeper AWS-integrated scanning and compliance reporting + +### 3. SBOM Generation +- **AWS SBOM Generator**: Native AWS tool for ECR images +- **CycloneDX/SPDX**: Standard formats for storing SBOM data + +### 4. Image Signing +- **Cosign**: For signing container images with simple keys or KMS +- **AWS Signer**: For AWS-managed signing workflows + +### 5. Security Data Management +- **AWS Security Hub**: Primary repository for all vulnerability findings + - Serves as the source of truth for security findings + - Provides organization-wide visibility and reporting + - Enables centralized policy management and alerting + +- **Pipeline-specific Database** (DynamoDB): + - Lightweight lookup table for CI/CD processes only + - Maps images to modules for deployment decisions + - Stores pipeline-specific metadata not relevant to Security Hub + - Contains links to Security Hub findings rather than duplicating data + - Enables fast deployment-time checks without querying Security Hub APIs + +This separation ensures Security Hub remains the authoritative source for security data while the pipeline database only stores what's needed for efficient CI/CD operations. + +### 6. Security Hub Integration +- **AWS Security Hub Custom Findings**: For publishing vulnerability data +- **AWS EventBridge**: For automating notifications and remediation +- **AWS Lambda**: For findings enrichment and customized reporting + +The security stage configures these tools as needed and orchestrates their execution in the proper sequence to provide a comprehensive security posture for all container images. + +This approach leverages the existing pipeline structure while ensuring all container images are properly secured before deployment. + +This approach is more reliable than static code parsing since it works with the exact resolved values that Terraform will use during deployment, including all variable substitutions and dynamic values. diff --git a/docs/enterprise_ecr_v1.drawio 1.png b/docs/enterprise_ecr_v1.drawio 1.png new file mode 100644 index 00000000..460d3dfc Binary files /dev/null and b/docs/enterprise_ecr_v1.drawio 1.png differ diff --git a/docs/terragrunt.stack.hcl.off b/docs/terragrunt.stack.hcl.off new file mode 100644 index 00000000..69d52333 --- /dev/null +++ b/docs/terragrunt.stack.hcl.off @@ -0,0 +1,380 @@ +locals { + environment = "development" + region = "us-gov-east-1" + project_name = "csvd-platform-lab-mcm" + base_source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-" + +} + +# Define the EKS cluster unit +unit "eks" { + source = format("%v%v", local.base_source, "eks") + path = "eks" + + values = { + # AWS Configuration + account_id = include.root.inputs.aws_account_id + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + + # Core Cluster Configuration + cluster_name = include.root.inputs.cluster_name + cluster_version = include.root.inputs.cluster_version + + # Additional Configuration + tags = include.root.inputs.tags + } +} + +unit "metrics" { + source = format("%v%v", local.base_source, "metrics-server") + path = "metrics-server" + + values = { + # AWS Configuration + account_id = include.root.inputs.aws_account_id + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + + # Cluster Configuration + cluster_name = dependency.eks.outputs.cluster_name + + # Metrics Server Configuration + metrics_server_helm_chart = include.root.inputs.metrics_server_helm_chart + metrics_server_tag = include.root.inputs.metrics_server_tag + namespace = include.root.inputs.namespaces["metrics-server"] + } +} + +unit "karpenter" { + source = format("%v%v", local.base_source, "karpenter") + path = "karpenter" + + values = { + # AWS Configuration + account_id = include.root.inputs.aws_account_id + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + + # Cluster Configuration + cluster_endpoint = dependency.eks.outputs.cluster_endpoint + cluster_name = dependency.eks.outputs.cluster_name + oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn + vpc_id = dependency.eks.outputs.vpc_id + + # Karpenter Configuration + karpenter_tag = include.root.inputs.karpenter_tag + karpenter_helm_chart = include.root.inputs.karpenter_helm_chart + karpenter_node_group_name = dependency.eks.outputs.node_group_name + namespace = include.root.inputs.namespaces["karpenter"] + } +} + +# Add other components as needed (node groups, addons, etc.) +unit "config" { + source = format("%v%v", local.base_source, "eks-config") + path = "eks-config" + + values = { + # AWS Configuration + account_id = include.root.inputs.aws_account_id + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + + # Core Cluster Configuration + cluster_name = dependency.eks.outputs.cluster_name + eks_managed_node_groups_autoscaling_group_names = dependency.eks.outputs.eks_managed_node_groups_autoscaling_group_names + oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn + security_group_all_worker_mgmt_id = dependency.eks.outputs.security_group_all_worker_mgmt_id + subnets = dependency.eks.outputs.subnets + vpc_id = dependency.eks.outputs.vpc_id + operators_ns = include.root.inputs.operator_namespace + telemetry_ns = include.root.inputs.telemetry_namespace + } +} + +# Add other components as needed (node groups, addons, etc.) +unit "cert_manager" { + source = format("%v%v", local.base_source, "cert-manager") + path = "cert-manager" + + values = { + # AWS Configuration + account_id = include.root.inputs.aws_account_id + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + + # Cluster Configuration + cluster_name = dependency.eks.outputs.cluster_name + cluster_mailing_list = include.root.inputs.cluster_mailing_list + oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn + + # Cert Manager Configuration + cert_manager_helm_chart = include.root.inputs.cert_manager_helm_chart + cluster_issuer_name = include.root.inputs.cluster_issuer_name + namespace = include.root.inputs.namespaces["cert-manager"] + + # Version Tags + cert_manager_cainjector_tag = include.root.inputs.cert_manager_cainjector_tag + cert_manager_controller_tag = include.root.inputs.cert_manager_controller_tag + cert_manager_startupapicheck_tag = include.root.inputs.cert_manager_startupapicheck_tag + cert_manager_webhook_tag = include.root.inputs.cert_manager_webhook_tag + } +} + +# Add other components as needed (node groups, addons, etc.) +unit "loki" { + source = format("%v%v", local.base_source, "loki") + path = "loki" + + values = { + # AWS Configuration + account_id = include.root.inputs.aws_account_id + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + + # Cluster Configuration + cluster_name = dependency.eks.outputs.cluster_name + oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn + + # Loki Configuration + loki_chart_version = include.root.inputs.loki_chart_version + loki_tag = include.root.inputs.loki_tag + namespace = include.root.inputs.namespaces["loki"] + rwo_storage_class = dependency.eks_config.outputs.rwo_storage_class + } +} + +# Add modules for monitoring, logging, etc. +unit "prometheus" { + source = format("%v%v", local.base_source, "prometheus") + path = "prometheus" + + values = { + # AWS Configuration + account_id = include.root.inputs.aws_account_id + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + + # Cluster Configuration + cluster_name = dependency.eks.outputs.cluster_name + oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn + + # Prometheus Configuration + prometheus_chart_version = include.root.inputs.prometheus_chart_version + prometheus_server_tag = include.root.inputs.prometheus_server_tag + prometheus_config_reloader_tag = include.root.inputs.prometheus_config_reloader_tag + alertmanager_tag = include.root.inputs.alertmanager_tag + kube_state_metrics_tag = include.root.inputs.kube_state_metrics_tag + namespace = include.root.inputs.namespaces["prometheus"] + node_exporter_tag = include.root.inputs.node_exporter_tag + pushgateway_tag = include.root.inputs.pushgateway_tag + rwo_storage_class = dependency.eks_config.outputs.rwo_storage_class + } +} + +# Add other components as needed (node groups, addons, etc.) +unit "istio" { + source = format("%v%v", local.base_source, "istio") + path = "istio" + + values = { + # AWS Configuration + account_id = include.root.inputs.aws_account_id + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + + # Cluster Configuration + cluster_name = dependency.eks.outputs.cluster_name + oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn + + # Istio Configuration + namespace = include.root.inputs.namespaces["istio"] + istio_version = include.root.inputs.istio_version + } +} + +# Add other components as needed (node groups, addons, etc.) +unit "tempo" { + source = format("%v%v", local.base_source, "tempo") + path = "tempo" + + values = { + # AWS Configuration + account_id = include.root.inputs.aws_account_id + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + + # Cluster Configuration + cluster_name = dependency.eks.outputs.cluster_name + oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn + + # Prometheus Configuration + prometheus_namespace = dependency.eks-prometheus.outputs.prometheus_namespace + prometheus_port = dependency.eks-prometheus.outputs.prometheus_server_internal_endpoint.port_number + + # Tempo Configuration + tempo_chart_version = include.root.inputs.tempo_chart_version + tempo_tag = include.root.inputs.tempo_tag + namespace = include.root.inputs.namespaces["tempo"] + } +} + +# Add other components as needed (node groups, addons, etc.) +unit "dns" { + source = format("%v%v", local.base_source, "dns") + path = "dns" + + values = { + # AWS Configuration + account_id = include.root.inputs.aws_account_id + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + + # Cluster Configuration + cluster_name = include.root.inputs.cluster_name + + # Network Configuration + istio_ingress_lb = dependency.eks-istio.outputs.istio_ingress_lb + route53_endpoints = include.root.inputs.route53_endpoints + vpc_domain_name = include.root.inputs.vpc_domain_name + vpc_name = include.root.inputs.vpc_name + + # Additional Configuration + tags = include.root.inputs.tags + } +} + +# Add other components as needed (node groups, addons, etc.) +unit "open_telemetry" { + source = format("%v%v", local.base_source, "open-telemetry") + path = "otel" + + values = { + profile = include.root.inputs.aws_profile + cluster_name = dependency.eks.outputs.cluster_name + region = include.root.inputs.aws_region + namespace = include.root.inputs.namespaces["otel"] + loki_endpoint = dependency.eks-loki.outputs.gateway_internal_endpoint.url + tempo_endpoint = dependency.eks-tempo.outputs.tempo_otlp_endpoint.url + } +} + +unit "grafana" { + source = format("%v%v", local.base_source, "grafana") + path = "grafana" + + values = { + # AWS Configuration + account_id = include.root.inputs.aws_account_id + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + + # Cluster Configuration + cluster_name = dependency.eks.outputs.cluster_name + cluster_domain = dependency.eks_dns.outputs.cluster_domain + oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn + + # Storage Configuration + rwo_storage_class = dependency.eks_loki.outputs.rwo_storage_class + + # Grafana Configuration + grafana_operator_chart_version = include.root.inputs.grafana_operator_chart_version + grafana_operator_tag = include.root.inputs.grafana_operator_tag + grafana_tag = include.root.inputs.grafana_tag + namespace = include.root.inputs.namespaces["grafana"] + os_shell_image_tag = include.root.inputs.os_shell_image_tag + service_name = "grafana" + loki_endpoint = dependency.eks_loki.outputs.gateway_internal_endpoint.url + prometheus_endpoint = dependency.eks_prometheus.outputs.prometheus_server_internal_endpoint.url + tempo_endpoint = dependency.eks_tempo.outputs.tempo_internal_endpoint.url + } +} + +unit "dashboard" { + source = format("%v%v", local.base_source, "k8s-dashboard") + path = "k8s-dashboard" + + values = { + # AWS Configuration + account_id = include.root.inputs.aws_account_id + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + + # Cluster Configuration + cluster_domain = dependency.eks_dns.outputs.cluster_domain + cluster_name = dependency.eks.outputs.cluster_name + + # Dashboard Configuration + service_name = include.root.inputs.dashboard_hostname + k8s_dashboard_version = include.root.inputs.k8s_dashboard_version + namespace = include.root.inputs.namespaces["k8s-dashboard"] + } +} + +unit "keycloak" { + source = format("%v%v", local.base_source, "keycloak") + path = "keycloak" + + values = { + cluster_domain = dependency.eks_dns.outputs.cluster_domain + cluster_name = dependency.eks.outputs.cluster_name + namespace = include.root.inputs.namespaces["keycloak"] + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + + # keycloak config + default_storage_class = dependency.eks_config.outputs.rwo_storage_class + keycloak_chart_version = include.root.inputs.keycloak_chart_version + keycloak_hostname = include.root.inputs.keycloak_hostname + keycloak_tag = include.root.inputs.keycloak_tag + realm_email = include.root.inputs.cluster_mailing_list + realm_name = "master" + realm_password = include.root.inputs.keycloak_password + realm_username = include.root.inputs.keycloak_username + service_name = "keycloak" + telemetry_namespace = include.root.inputs.telemetry_namespace + + # # Database configuration + keycloak_database = include.root.inputs.keycloak_database + keycloak_user = include.root.inputs.keycloak_username + keycloak_password = include.root.inputs.keycloak_password + } +} + +unit "kiali" { + source = format("%v%v", local.base_source, "kiali") + path = "kiali" + + values = { + # AWS Configuration + account_id = include.root.inputs.aws_account_id + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + + # Cluster Configuration + cluster_domain = dependency.eks_dns.outputs.cluster_domain + cluster_name = dependency.eks.outputs.cluster_name + certificate_issuer = dependency.eks_cert_manager.outputs.cluster_issuer_name + + # Kiali Configuration + service_name = "kiali" + namespace = include.root.inputs.namespaces["kiali"] + istio_namespace = include.root.inputs.namespaces["istio"] + grafana_internal_url = dependency.eks_grafana.outputs.internal_endpoint.url + grafana_namespace = dependency.eks_grafana.outputs.namespace + grafana_secret_name = dependency.eks_grafana.outputs.secret_name + grafana_public_url = dependency.eks_grafana.outputs.public_endpoint + + kiali_application_version = include.root.inputs.kiali_application_version + kiali_operator_version = include.root.inputs.kiali_operator_version + + prometheus_internal_url = dependency.eks_prometheus.outputs.prometheus_server_internal_endpoint.url + grafana_namespace = dependency.eks_grafana.outputs.namespace + grafana_secret_name = dependency.eks_grafana.outputs.secret_name + grafana_internal_url = dependency.eks_grafana.outputs.internal_endpoint.url + grafana_public_url = dependency.eks_grafana.outputs.public_endpoint + tempo_datasource_id = dependency.eks_grafana.outputs.tempo_datasource_id + tempo_internal_url = dependency.eks_tempo.outputs.tempo_internal_endpoint.url + } +} diff --git a/input_vars.hcl b/input_vars.hcl new file mode 100644 index 00000000..dfeec7ad --- /dev/null +++ b/input_vars.hcl @@ -0,0 +1,26 @@ +locals { + account_name = "lab-dev-ew" + aws_account_id = "224384469011" + aws_region = "us-gov-east-1" + cluster_mailing_list = "matthew.c.morgan@census.gov" + cluster_name = "csvd-platform-lab-mcm" + eks_instance_disk_size = 100 + eks_ng_desired_size = 2 + eks_ng_max_size = 10 + eks_ng_min_size = 2 + environment = "development" + environment_abbr = "dev" + organization = "census:ocio:csvd" + finops_project_name = "csvd_platformbaseline" + finops_project_number = "fs0000000078" + finops_project_role = "csvd_platformbaseline_app" + vpc_domain_name = "dev.lab.csp2.census.gov" + vpc_name = "vpc3-lab-dev" + tags = { + "slim:schedule" = "8:00-17:00" + } + module_enablement_overrides = { + "eks-arcgis" = false + "eks-postgresql" = false + } +} diff --git a/lab/_envcommon/common-variables.hcl b/lab/_envcommon/common-variables.hcl index 38cb4c92..89c502a7 100644 --- a/lab/_envcommon/common-variables.hcl +++ b/lab/_envcommon/common-variables.hcl @@ -6,10 +6,6 @@ # that are common across all environments/accounts. # --------------------------------------------------------------------------------------------------------------------- locals { - organization = "census:ocio:csvd" - project_name = "csvd_platformbaseline" - project_number = "fs0000000078" - project_role = "csvd_platformbaseline_app" state_bucket_prefix = "inf-tfstate" state_table_name = "tf_remote_state" route53_endpoints = { @@ -20,4 +16,24 @@ locals { "us-gov-west-1" = "vpc-08b7b4db6a5ddf9c1" } } + enterprise_ecr_account = { + lab = { + "account_id" = "269222635945" + "alias" = "lab-gov-shared-nonprod" + "profile" = "269222635945-lab-gov-shared-nonprod" + "region" = "us-gov-east-1" + } + prod = { + "account_id" = "067074201825" + "alias" = "ent-gov-shared-prod" + "profile" = "067074201825-ent-gov-shared-prod" + "region" = "us-gov-east-1" + } + } + eecr_info = { + account_id = local.enterprise_ecr_account.lab["account_id"] + alias = local.enterprise_ecr_account.lab["alias"] + profile = local.enterprise_ecr_account.lab["profile"] + region = local.enterprise_ecr_account.lab["region"] + } } diff --git a/lab/_envcommon/default-versions.hcl b/lab/_envcommon/default-versions.hcl index 478dc672..f2f9e2ae 100644 --- a/lab/_envcommon/default-versions.hcl +++ b/lab/_envcommon/default-versions.hcl @@ -1,14 +1,80 @@ # lab/_envcommon/default-versions.hcl locals { + module_name = basename(get_original_terragrunt_dir()) + release_version = local.module_versions["2025.20.04"][local.module_name] + ##################### # Module Versions ##################### - cluster_version = "1.31" - custom_service_eks_account = "${local.release_version}" - eks_module_version = "20.33.1" - istio_ingress_version = "${local.release_version}" - release_version = "main" # "main" # change to main when testing updated modules + cluster_version = "1.32" + custom_service_eks_account = "1.0.0" + eks_module_version = "20.36.0" + istio_ingress_version = "0.1.3" + + module_versions = { + "2025.20.04" = { + "eks-arcgis" = false + "eks-cert-manager" = "0.1.9" + "eks-config" = "1.0.5" + "eks-cribl" = "initial" + "eks-dns" = "0.1.3" + "eks-gatekeeper" = "0.0.3" + "eks-grafana" = "0.1.5" + "eks-istio" = "1.0.9" + "tfmod-istio-service-ingress" = "0.1.6" + "eks-k8s-dashboard" = "0.1.4" + "eks-karpenter" = "0.1.6" + "eks-keycloak" = "0.0.8" + "eks-kiali" = "0.1.4" + "eks-loki" = "0.1.4" + "eks-metrics-server" = "0.1.4" + "eks-otel" = "0.0.4" + "eks-pipeline" = "initial" + "eks-postgresql" = false + "eks-prometheus" = "0.1.4" + "eks-tempo" = "0.1.4" + "eks" = "1.0.9" + } + } + + submodule_versions = { + "tfmod-istio-service-ingress" = "0.1.6" + "tfmod-config-job" = "0.1.8" + + } + + ##################### + # Module Enablement + ##################### + + # Core modules that should always be enabled (cannot be disabled) + core_modules = [ + "eks", + "eks-metrics-server", + "eks-karpenter", + "eks-config", + "eks-cert-manager", + "eks-istio", + "eks-dns", + ] + + # Optional modules with their default enablement state + enabled_modules = { + "eks-arcgis" = false + "eks-cribl" = false + "eks-gatekeeper" = true + "eks-grafana" = true + "eks-k8s-dashboard" = true + "eks-keycloak" = true + "eks-kiali" = true + "eks-loki" = true + "eks-otel" = true + "eks-pipeline" = false + "eks-postgresql" = true + "eks-prometheus" = true + "eks-tempo" = true + } ##################### # TF Providers @@ -24,21 +90,23 @@ locals { ##################### # Namespaces Config ##################### - operator_namespace = "aoperator" - telemetry_namespace = "atelemetry" + operator_namespace = "operator" + telemetry_namespace = "telemetry" namespaces = { + arcgis = "arcgis" cert-manager = "kube-system" + cribl = "cribl" + gatekeeper = "keycloak" + grafana = local.telemetry_namespace + istio = "istio-system" + k8s-dashboard = local.telemetry_namespace karpenter = "karpenter" - metrics-server = "kube-system" - postgresql = "kube-system" keycloak = "keycloak" - gogatekeeper = "kube-system" - istio = "istio-system" kiali = "istio-system" - grafana = local.telemetry_namespace - k8s-dashboard = local.telemetry_namespace loki = local.telemetry_namespace + metrics-server = "kube-system" otel = local.telemetry_namespace + postgresql = "keycloak" prometheus = local.telemetry_namespace tempo = local.telemetry_namespace } @@ -58,11 +126,19 @@ locals { cert_manager_version = "1.17.1" cert_manager_webhook_tag = "v${local.cert_manager_version}" + ##################### + # Cribl + ##################### + cribl_chart_version = "4.11.1" + cribl_app_version = "4.11.1" + + ################ # GoGatekeeper ################ - gogatekeeper_tag = "3.2.1" - gogatekeeper_chart_version = "0.1.53" + gatekeeper_tag = "3.3.0" + gatekeeper_chart_version = "0.1.54" + gatekeeper_service_name = "gatekeeper" ################ # Grafana @@ -71,7 +147,7 @@ locals { grafana_operator_chart_version = "4.9.8" grafana_operator_tag = "5.16.0" grafana_tag = "11.5.2" - os_shell_image_tag = "12" + os_shell_image_tag = local.utilities_tag ################ # Istio @@ -83,25 +159,28 @@ locals { # k8s-dashboard ################ dashboard_hostname = "dashboard" - k8s_dashboard_metrics_scraper = "1.0.8" - k8s_dashboard_version = "6.0.6" + k8s_dashboard_version = "v2.7.0" + k8s_dashboard_metrics_scraper = "v1.0.9" + # dashboard_api_tag = "1.11.1" + # dashboard_auth_tag = "1.2.4" + # dashboard_metrics_tag = "1.2.2" + # dashboard_web_tag = "1.6.2" + # dashboard_kong_tag = "3.8" ################ # Karpenter ################ - karpenter_helm_chart = "1.3.1" - karpenter_tag = "1.3.1" + karpenter_helm_chart = "1.4.0" + karpenter_tag = "1.4.0" ################ # Keycloak ################ - keycloak_chart_version = "24.4.11" - keycloak_tag = "26.1.3" - keycloak_hostname = "keycloak" - keycloak_database = "keycloak" - keycloak_username = "keycloak" - keycloak_password = "this is my very secure and totally random password horse battery staple now" - postgresql_tag = "17.4.0-debian-12-r2" + keycloak_chart_version = "7.0.1" + keycloak_tag = "26.0.7" + postgresql_tag = "17.4.0-debian-12-r4" + postgres_exporter_tag = "0.17.1-debian-12-r0" + utilities_tag = "1.0.3" ################ # Kiali @@ -114,8 +193,8 @@ locals { ################ loki_chart_version = "6.27.0" loki_tag = "3.4.2" - enterprise_logs_provisioner_tag = "v1.7.0" - gateway_tag = "1.27-alpine" + enterprise_logs_provisioner_tag = "3.4.2" + gateway_tag = "1.26.3" memcached_tag = "1.6.37" exporter_tag = "v0.15.0" sidecar_tag = "1.27.4" @@ -124,7 +203,27 @@ locals { # Metrics Server ################ metrics_server_helm_chart = "3.12.2" - metrics_server_tag = "0.7.2" + metrics_server_tag = "v0.7.2" + + ################ + # Open Telemetry + ################ + auto_instrumentation_java_version = "2.9.0" + collector_contrib_version = "0.113.0-amd64" + collector_version = "0.111.0-amd64" + otel_helm_version = "0.71.2" + otel_version = "0.110.0" + rbac_proxy_version = "v0.19.0" + + ################ + # PostgreSQL + ################ + + # os_shell_tag = local.utilities_tag + # # postgres_exporter_tag = local.postgres_exporter_tag + # postgresql_repmgr_tag = "17.4.0-alpine" + # pgpool_tag = "4.5.5" + postgresql_chart_version = "16.5.0" ################ # Prometheus @@ -141,5 +240,5 @@ locals { # Tempo ################ tempo_chart_version = "1.18.2" - tempo_tag = "2.7.1" + tempo_tag = "2.7.0" } diff --git a/lab/_envcommon/prefixes.hcl b/lab/_envcommon/prefixes.hcl new file mode 100644 index 00000000..d46f6bb6 --- /dev/null +++ b/lab/_envcommon/prefixes.hcl @@ -0,0 +1,37 @@ +locals { + prefixes = { + "ebs" = "v-ebs-" + "efs" = "v-efs-" + "group" = "g-" + "kms" = "k-kms-" + "policy" = "p-" + "role" = "r-" + "s3" = "v-s3-" + "security-group" = "" # "sg-" + # VPC + "customer-gateway" = "cgw-" + "dhcp-options" = "" + "elastic-ip" = "eip-" + "internet-gateway" = "igw-" + "log-group" = "lg-" + "log-stream" = "lgs-" + "nat-gateway" = "nat-" + "network-acl" = "nacl-" + "route-table" = "route-" + "subnet" = "" + "vpc-endpoint" = "vpce-" + "vpc-peer" = "vpcp-" + "vpc" = "" + "vpn-connection" = "vpn_" + "vpn-gateway" = "vpcg-" + # EKS + "eks-policy" = "p-eks-" + "eks-queue" = "eks-q-" + "eks-role" = "r-eks-" + "eks-s3" = "v-s3-eks-" + "eks-security-group" = "eks-sg-" # "sg-eks-" + "eks-user" = "s-eks-" + "eks" = "eks-" + "eks-event" = "eks-ev-" + } +} diff --git a/lab/development/account.hcl b/lab/development/account.hcl index 80a8b3a0..a78efbf6 100644 --- a/lab/development/account.hcl +++ b/lab/development/account.hcl @@ -7,7 +7,7 @@ locals { account_name = "lab-dev-ew" aws_account_id = "224384469011" - aws_profile = "224384469011-lab-dev-gov" + aws_profile = format("%v-%v", local.aws_account_id, replace(local.account_name, "-ew", "-gov")) environment = "development" environment_abbr = "dev" } diff --git a/lab/development/us-gov-east-1/vpc/csvd-platform-lab-mcm/cluster.hcl b/lab/development/us-gov-east-1/vpc/csvd-platform-lab-mcm/cluster.hcl new file mode 100644 index 00000000..0f1f989c --- /dev/null +++ b/lab/development/us-gov-east-1/vpc/csvd-platform-lab-mcm/cluster.hcl @@ -0,0 +1,22 @@ +locals { + # Cluster specific configuration + cluster_name = "csvd-platform-lab-mcm" + cluster_mailing_list = "matthew.c.morgan@census.gov" + eks_instance_disk_size = 100 + eks_ng_desired_size = 2 + eks_ng_max_size = 10 + eks_ng_min_size = 2 + organization = "census:ocio:csvd" + finops_project_name = "csvd_platformbaseline" + finops_project_number = "fs0000000078" + finops_project_role = "csvd_platformbaseline_app" + + tags = { + "slim:schedule" = "8:00-17:00" + "cluster:size" = "min:${local.eks_ng_min_size}-max:${local.eks_ng_max_size}-desired:${local.eks_ng_desired_size}" + } + module_enablement_overrides = { + "eks-arcgis" = false + "eks-postgresql" = false + } +} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-postgresql/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/csvd-platform-lab-mcm/eks-arcgis/terragrunt.hcl similarity index 55% rename from lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-postgresql/terragrunt.hcl rename to lab/development/us-gov-east-1/vpc/csvd-platform-lab-mcm/eks-arcgis/terragrunt.hcl index 4429d04a..38cf455e 100644 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-postgresql/terragrunt.hcl +++ b/lab/development/us-gov-east-1/vpc/csvd-platform-lab-mcm/eks-arcgis/terragrunt.hcl @@ -4,29 +4,30 @@ include "root" { expose = true } +locals { + # Skip this module if disabled + skip = !lookup(include.root.locals.is_module_enabled, basename(get_terragrunt_dir()), true) +} + +exclude { + if = local.skip + actions = ["all_except_output"] + exclude_dependencies = false +} + terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-postgresql.git?ref=main" + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-ersi-arcgis.git?ref=${include.root.inputs.release_version}" extra_arguments "retry_lock" { commands = get_terraform_commands_that_need_locking() arguments = ["-lock-timeout=20s"] } } -dependencies { - paths = [ - "../eks", - "../eks-config", - "../eks-dns", - "../eks-prometheus", - ] -} - dependency "eks" { config_path = "../eks" mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] mock_outputs = { - cluster_name = include.root.inputs.cluster_name - oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" + cluster_name = "mock-cluster" } } @@ -42,35 +43,44 @@ dependency "eks_dns" { config_path = "../eks-dns" mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] mock_outputs = { - cluster_domain = "mock.example.com" - oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" + cluster_domain = "mock.domain.example.com" } } +dependencies { + paths = [ + "../eks", + "../eks-config", + "../eks-dns", + "../eks-kiali", + ] +} + inputs = { # AWS Configuration account_id = include.root.inputs.aws_account_id profile = include.root.inputs.aws_profile region = include.root.inputs.aws_region + eecr_info = include.root.inputs.eecr_info # Cluster Configuration cluster_domain = dependency.eks_dns.outputs.cluster_domain cluster_name = dependency.eks.outputs.cluster_name + namespace = "arcgis" rwo_storage_class = dependency.eks_config.outputs.rwo_storage_class - # PostgreSQL Configuration - namespace = include.root.inputs.namespaces["postgresql"] - os_shell_tag = include.root.inputs.os_shell_tag - pgpool_tag = include.root.inputs.pgpool_tag - postgres_exporter_tag = include.root.inputs.postgres_exporter_tag - postgresql_repmgr_tag = include.root.inputs.postgresql_repmgr_tag - postgresql_tag = include.root.inputs.postgresql_tag - service_name = "postgresql" - telemetry_namespace = include.root.inputs.telemetry_namespace - - # Database Consumer Configuration - postgresql_database = include.root.inputs.postgresql_database - postgresql_username = include.root.inputs.postgresql_username - postgresql_password = include.root.inputs.postgresql_password + # Dockerhub Creds + dockerhub_username = "" + dockerhub_password = "" + # ArcGIS Config + ersi_image_tag = "11.4.0.6285" + arcgis_license_json = "" + arcgis_admin_username = "admin" + arcgis_admin_password = "password" + arcgis_admin_email = include.root.inputs.cluster_mailing_list + arcgis_admin_firstname = "admin" + arcgis_admin_lastname = "admin" + arcgis_security_question_index = 1 + arcgis_security_question_answer = "Las Vegas" } diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-cert-manager/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/csvd-platform-lab-mcm/eks-cert-manager/terragrunt.hcl similarity index 86% rename from lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-cert-manager/terragrunt.hcl rename to lab/development/us-gov-east-1/vpc/csvd-platform-lab-mcm/eks-cert-manager/terragrunt.hcl index 5e03cd4a..569a3554 100644 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-cert-manager/terragrunt.hcl +++ b/lab/development/us-gov-east-1/vpc/csvd-platform-lab-mcm/eks-cert-manager/terragrunt.hcl @@ -4,6 +4,17 @@ include "root" { expose = true } +locals { + # Skip this module if disabled + skip = !lookup(include.root.locals.is_module_enabled, basename(get_terragrunt_dir()), true) +} + +exclude { + if = local.skip + actions = ["all_except_output"] + exclude_dependencies = false +} + terraform { source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-cert-mgr.git?ref=${include.root.inputs.release_version}" @@ -39,6 +50,7 @@ inputs = { account_id = include.root.inputs.aws_account_id profile = include.root.inputs.aws_profile region = include.root.inputs.aws_region + eecr_info = include.root.inputs.eecr_info # Cluster Configuration cluster_name = dependency.eks.outputs.cluster_name diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-config/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/csvd-platform-lab-mcm/eks-config/terragrunt.hcl similarity index 89% rename from lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-config/terragrunt.hcl rename to lab/development/us-gov-east-1/vpc/csvd-platform-lab-mcm/eks-config/terragrunt.hcl index 4a6a659f..49e0ea2f 100644 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-config/terragrunt.hcl +++ b/lab/development/us-gov-east-1/vpc/csvd-platform-lab-mcm/eks-config/terragrunt.hcl @@ -4,12 +4,15 @@ include "root" { expose = true } -dependencies { - paths = [ - "../eks", - "../eks-karpenter", - "../eks-metrics-server", - ] +locals { + # Skip this module if disabled + skip = !lookup(include.root.locals.is_module_enabled, basename(get_terragrunt_dir()), true) +} + +exclude { + if = local.skip + actions = ["all_except_output"] + exclude_dependencies = false } terraform { @@ -37,6 +40,14 @@ dependency "eks" { } } +dependencies { + paths = [ + "../eks", + "../eks-karpenter", + "../eks-metrics-server", + ] +} + inputs = { # AWS Configuration account_id = include.root.inputs.aws_account_id diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-config/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/csvd-platform-lab-mcm/eks-cribl/terragrunt.hcl similarity index 62% rename from lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-config/terragrunt.hcl rename to lab/development/us-gov-east-1/vpc/csvd-platform-lab-mcm/eks-cribl/terragrunt.hcl index c1328ee7..d18b1808 100644 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-config/terragrunt.hcl +++ b/lab/development/us-gov-east-1/vpc/csvd-platform-lab-mcm/eks-cribl/terragrunt.hcl @@ -4,15 +4,19 @@ include "root" { expose = true } -dependencies { - paths = [ - "../eks", - "../eks-karpenter" - ] +locals { + # Skip this module if disabled + skip = !lookup(include.root.locals.is_module_enabled, basename(get_terragrunt_dir()), true) +} + +exclude { + if = local.skip + actions = ["all_except_output"] + exclude_dependencies = false } terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-eks-configuration.git?ref=${include.root.inputs.release_version}" + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-cribl.git?ref=${include.root.inputs.release_version}" extra_arguments "retry_lock" { commands = get_terraform_commands_that_need_locking() @@ -36,6 +40,31 @@ dependency "eks" { } } +dependency "eks_config" { + config_path = "../eks-config" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + rwo_storage_class = "gp3-mock" + } +} + +dependency "eks_dns" { + config_path = "../eks-dns" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + cluster_domain = "mock.example.com" + } +} + +dependencies { + paths = [ + "../eks", + "../eks-config", + "../eks-dns", + "../eks-gatekeeper", + ] +} + inputs = { # AWS Configuration account_id = include.root.inputs.aws_account_id @@ -43,12 +72,19 @@ inputs = { region = include.root.inputs.aws_region # Core Cluster Configuration + cluster_domain = dependency.eks_dns.outputs.cluster_domain cluster_name = dependency.eks.outputs.cluster_name eks_managed_node_groups_autoscaling_group_names = dependency.eks.outputs.eks_managed_node_groups_autoscaling_group_names oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn + operators_ns = include.root.inputs.operator_namespace + rwo_storage_class = dependency.eks_config.outputs.rwo_storage_class security_group_all_worker_mgmt_id = dependency.eks.outputs.security_group_all_worker_mgmt_id subnets = dependency.eks.outputs.subnets - vpc_id = dependency.eks.outputs.vpc_id - operators_ns = include.root.inputs.operator_namespace telemetry_ns = include.root.inputs.telemetry_namespace + vpc_id = dependency.eks.outputs.vpc_id + + # Cribl configs + cribl_tag = include.root.inputs.cribl_app_version + namespace = include.root.inputs.namespaces["cribl"] + service_name = "cribl-leader" } diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-dns/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/csvd-platform-lab-mcm/eks-dns/terragrunt.hcl similarity index 86% rename from lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-dns/terragrunt.hcl rename to lab/development/us-gov-east-1/vpc/csvd-platform-lab-mcm/eks-dns/terragrunt.hcl index 6ab98584..feecb987 100644 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-dns/terragrunt.hcl +++ b/lab/development/us-gov-east-1/vpc/csvd-platform-lab-mcm/eks-dns/terragrunt.hcl @@ -4,6 +4,17 @@ include "root" { expose = true } +locals { + # Skip this module if disabled + skip = !lookup(include.root.locals.is_module_enabled, basename(get_terragrunt_dir()), true) +} + +exclude { + if = local.skip + actions = ["all_except_output"] + exclude_dependencies = false +} + terraform { source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-eks-dns.git?ref=${include.root.inputs.release_version}" extra_arguments "retry_lock" { diff --git a/lab/development/us-gov-east-1/vpc/csvd-platform-lab-mcm/eks-gatekeeper/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/csvd-platform-lab-mcm/eks-gatekeeper/terragrunt.hcl new file mode 100644 index 00000000..971dd2e9 --- /dev/null +++ b/lab/development/us-gov-east-1/vpc/csvd-platform-lab-mcm/eks-gatekeeper/terragrunt.hcl @@ -0,0 +1,140 @@ +include "root" { + path = find_in_parent_folders("root.hcl") + merge_strategy = "deep" + expose = true +} + +locals { + # Skip this module if disabled + skip = !lookup(include.root.locals.is_module_enabled, basename(get_terragrunt_dir()), true) +} + +exclude { + if = local.skip + actions = ["all_except_output"] + exclude_dependencies = false +} + +terraform { + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-gatekeeper.git?ref=${include.root.inputs.release_version}" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20s"] + } +} + +dependency "eks" { + config_path = "../eks" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + cluster_name = "mock-cluster" + oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" + } +} + +dependency "eks_dns" { + config_path = "../eks-dns" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + cluster_domain = "mock.example.com" + } +} + +dependency "eks_keycloak" { + config_path = "../eks-keycloak" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + user_auth_realm = "mock.keycloak.example.com/auth" + client_id = "mock-client-id" + client_secret = "mock-client-secret" + namespace = "keycloak" + user_secret = "user-sso" + } +} + +dependency "eks-k8s-dashboard" { + config_path = "../eks-k8s-dashboard" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + namespace = "telemetry" + internal_endpoint = { + hostname = "kubernetes-dashboard.telemetry.svc.cluster.local" + port_number = 80 + url = "http://kubernetes-dashboard.telemetry.svc.cluster.local:80/" + } + dashboard-user-token = "Iamanextremelylongstring" + } +} + +dependency "eks-grafana" { + config_path = "../eks-grafana" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + namespace = "telemetry" + internal_endpoint = { + hostname = "kubernetes-dashboard.telemetry.svc.cluster.local" + port_number = 80 + url = "http://kubernetes-dashboard.telemetry.svc.cluster.local:80/" + } + } +} + +dependency "eks-kiali" { + config_path = "../eks-kiali" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + namespace = "istio-system" + internal_endpoint = { + hostname = "kiali.telemetry.svc.cluster.local" + port_number = 80 + url = "http://kiali.telemetry.svc.cluster.local:80/" + } + } +} + +dependencies { + paths = [ + "../eks", + "../eks-dns", + "../eks-keycloak", + "../eks-k8s-dashboard", + "../eks-grafana", + "../eks-kiali", + ] +} + +inputs = { + # AWS Configuration + account_id = include.root.inputs.aws_account_id + eecr_info = include.root.inputs.eecr_info + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + + # Cluster Configuration + cluster_domain = dependency.eks_dns.outputs.cluster_domain + cluster_name = dependency.eks.outputs.cluster_name + + # Gatekeeper Standard Config + gatekeeper_chart_version = include.root.inputs.gatekeeper_chart_version + gatekeeper_tag = include.root.inputs.gatekeeper_tag + keycloak_client_id = dependency.eks_keycloak.outputs.client_id + keycloak_client_secret = dependency.eks_keycloak.outputs.client_secret + keycloak_fqdn = dependency.eks_keycloak.outputs.user_auth_realm + user_secret = dependency.eks_keycloak.outputs.user_secret + + # Dashboard Gatekeeper Config + dashboard_ns = dependency.eks-k8s-dashboard.outputs.namespace + dashboard_service_name = "dashboard" + dashboard_url = dependency.eks-k8s-dashboard.outputs.internal_endpoint.url + dashboard_user_token = dependency.eks-k8s-dashboard.outputs.dashboard-user-token + + # Grafana Gatekeeper Config + grafana_ns = dependency.eks-grafana.outputs.namespace + grafana_service_name = "grafana" + grafana_url = dependency.eks-grafana.outputs.internal_endpoint.url + + # Kaili Gatekeeper Config + kiali_ns = dependency.eks-kiali.outputs.namespace + kiali_service_name = "kiali" + kiali_url = dependency.eks-kiali.outputs.internal_endpoint.url +} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-grafana/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/csvd-platform-lab-mcm/eks-grafana/terragrunt.hcl similarity index 82% rename from lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-grafana/terragrunt.hcl rename to lab/development/us-gov-east-1/vpc/csvd-platform-lab-mcm/eks-grafana/terragrunt.hcl index 7830797b..07cc34d2 100644 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-grafana/terragrunt.hcl +++ b/lab/development/us-gov-east-1/vpc/csvd-platform-lab-mcm/eks-grafana/terragrunt.hcl @@ -4,6 +4,17 @@ include "root" { expose = true } +locals { + # Skip this module if disabled + skip = !lookup(include.root.locals.is_module_enabled, basename(get_terragrunt_dir()), true) +} + +exclude { + if = local.skip + actions = ["all_except_output"] + exclude_dependencies = false +} + terraform { source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-grafana.git?ref=${include.root.inputs.release_version}" extra_arguments "retry_lock" { @@ -16,8 +27,7 @@ dependency "eks" { config_path = "../eks" mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] mock_outputs = { - cluster_name = include.root.inputs.cluster_name - oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" + cluster_name = include.root.inputs.cluster_name } } @@ -44,9 +54,10 @@ dependency "eks_prometheus" { config_path = "../eks-prometheus" mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] mock_outputs = { - rwo_storage_class = "gp3-mocked" prometheus_server_internal_endpoint = { - url = "mock.prometheus.enpoint.example.com" + hostname = "prometheus.mock.svc.cluster.local" + port_number = "80" + url = "https://prometheus.mock.svc.cluster.local:80/" } } } @@ -75,13 +86,13 @@ dependencies { inputs = { # AWS Configuration account_id = include.root.inputs.aws_account_id + eecr_info = include.root.inputs.eecr_info profile = include.root.inputs.aws_profile region = include.root.inputs.aws_region # Cluster Configuration - cluster_name = dependency.eks.outputs.cluster_name - cluster_domain = dependency.eks_dns.outputs.cluster_domain - oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn + cluster_name = dependency.eks.outputs.cluster_name + cluster_domain = dependency.eks_dns.outputs.cluster_domain # Storage Configuration rwo_storage_class = dependency.eks_loki.outputs.rwo_storage_class diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-istio/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/csvd-platform-lab-mcm/eks-istio/terragrunt.hcl similarity index 69% rename from lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-istio/terragrunt.hcl rename to lab/development/us-gov-east-1/vpc/csvd-platform-lab-mcm/eks-istio/terragrunt.hcl index 0cd1e1f9..9f10168c 100644 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-istio/terragrunt.hcl +++ b/lab/development/us-gov-east-1/vpc/csvd-platform-lab-mcm/eks-istio/terragrunt.hcl @@ -4,6 +4,17 @@ include "root" { expose = true } +locals { + # Skip this module if disabled + skip = !lookup(include.root.locals.is_module_enabled, basename(get_terragrunt_dir()), true) +} + +exclude { + if = local.skip + actions = ["all_except_output"] + exclude_dependencies = false +} + terraform { source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-istio.git?ref=${include.root.inputs.release_version}" extra_arguments "retry_lock" { @@ -16,6 +27,7 @@ dependencies { paths = [ "../eks", "../eks-cert-manager", + "../eks-otel" ] } @@ -23,20 +35,19 @@ dependency "eks" { config_path = "../eks" mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] mock_outputs = { - cluster_name = include.root.inputs.cluster_name - oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" + cluster_name = include.root.inputs.cluster_name } } inputs = { # AWS Configuration account_id = include.root.inputs.aws_account_id + eecr_info = include.root.inputs.eecr_info profile = include.root.inputs.aws_profile region = include.root.inputs.aws_region # Cluster Configuration - cluster_name = dependency.eks.outputs.cluster_name - oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn + cluster_name = dependency.eks.outputs.cluster_name # Istio Configuration namespace = include.root.inputs.namespaces["istio"] diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-k8s-dashboard/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/csvd-platform-lab-mcm/eks-k8s-dashboard/terragrunt.hcl similarity index 84% rename from lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-k8s-dashboard/terragrunt.hcl rename to lab/development/us-gov-east-1/vpc/csvd-platform-lab-mcm/eks-k8s-dashboard/terragrunt.hcl index 1d02df66..9527e5f7 100644 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-k8s-dashboard/terragrunt.hcl +++ b/lab/development/us-gov-east-1/vpc/csvd-platform-lab-mcm/eks-k8s-dashboard/terragrunt.hcl @@ -4,6 +4,17 @@ include "root" { expose = true } +locals { + # Skip this module if disabled + skip = !lookup(include.root.locals.is_module_enabled, basename(get_terragrunt_dir()), true) +} + +exclude { + if = local.skip + actions = ["all_except_output"] + exclude_dependencies = false +} + terraform { source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-k8s-dashboard.git?ref=${include.root.inputs.release_version}" extra_arguments "retry_lock" { @@ -42,6 +53,7 @@ inputs = { account_id = include.root.inputs.aws_account_id profile = include.root.inputs.aws_profile region = include.root.inputs.aws_region + eecr_info = include.root.inputs.eecr_info # Cluster Configuration cluster_domain = dependency.eks_dns.outputs.cluster_domain diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-karpenter/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/csvd-platform-lab-mcm/eks-karpenter/terragrunt.hcl similarity index 84% rename from lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-karpenter/terragrunt.hcl rename to lab/development/us-gov-east-1/vpc/csvd-platform-lab-mcm/eks-karpenter/terragrunt.hcl index 25c22d7c..92332552 100644 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-karpenter/terragrunt.hcl +++ b/lab/development/us-gov-east-1/vpc/csvd-platform-lab-mcm/eks-karpenter/terragrunt.hcl @@ -4,6 +4,17 @@ include "root" { expose = true } +locals { + # Skip this module if disabled + skip = !lookup(include.root.locals.is_module_enabled, basename(get_terragrunt_dir()), true) +} + +exclude { + if = local.skip + actions = ["all_except_output"] + exclude_dependencies = false +} + terraform { source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-karpenter.git?ref=${include.root.inputs.release_version}" @@ -38,6 +49,7 @@ inputs = { account_id = include.root.inputs.aws_account_id profile = include.root.inputs.aws_profile region = include.root.inputs.aws_region + eecr_info = include.root.inputs.eecr_info # Cluster Configuration cluster_endpoint = dependency.eks.outputs.cluster_endpoint diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-keycloak/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/csvd-platform-lab-mcm/eks-keycloak/terragrunt.hcl similarity index 79% rename from lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-keycloak/terragrunt.hcl rename to lab/development/us-gov-east-1/vpc/csvd-platform-lab-mcm/eks-keycloak/terragrunt.hcl index 74132d72..f17489ea 100644 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-keycloak/terragrunt.hcl +++ b/lab/development/us-gov-east-1/vpc/csvd-platform-lab-mcm/eks-keycloak/terragrunt.hcl @@ -4,6 +4,17 @@ include "root" { expose = true } +locals { + # Skip this module if disabled + skip = !lookup(include.root.locals.is_module_enabled, basename(get_terragrunt_dir()), true) +} + +exclude { + if = local.skip + actions = ["all_except_output"] + exclude_dependencies = false +} + terraform { source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-keycloak.git?ref=${include.root.inputs.release_version}" extra_arguments "retry_lock" { @@ -49,6 +60,7 @@ dependencies { inputs = { cluster_domain = dependency.eks_dns.outputs.cluster_domain cluster_name = dependency.eks.outputs.cluster_name + eecr_info = include.root.inputs.eecr_info namespace = include.root.inputs.namespaces["keycloak"] profile = include.root.inputs.aws_profile region = include.root.inputs.aws_region @@ -56,21 +68,11 @@ inputs = { # keycloak config default_storage_class = dependency.eks_config.outputs.rwo_storage_class keycloak_chart_version = include.root.inputs.keycloak_chart_version - keycloak_hostname = include.root.inputs.keycloak_hostname keycloak_tag = include.root.inputs.keycloak_tag realm_email = include.root.inputs.cluster_mailing_list realm_name = "master" - realm_password = include.root.inputs.keycloak_password - realm_username = include.root.inputs.keycloak_username service_name = "keycloak" telemetry_namespace = include.root.inputs.telemetry_namespace + admin_email = include.root.inputs.cluster_mailing_list - # # Database configuration - keycloak_database = include.root.inputs.keycloak_database - keycloak_user = include.root.inputs.keycloak_username - keycloak_password = include.root.inputs.keycloak_password - - # Project information - project_name = include.root.inputs.project_name - tags = include.root.inputs.tags } diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-kiali/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/csvd-platform-lab-mcm/eks-kiali/terragrunt.hcl similarity index 79% rename from lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-kiali/terragrunt.hcl rename to lab/development/us-gov-east-1/vpc/csvd-platform-lab-mcm/eks-kiali/terragrunt.hcl index f1c9bdcb..8f19b76d 100644 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-kiali/terragrunt.hcl +++ b/lab/development/us-gov-east-1/vpc/csvd-platform-lab-mcm/eks-kiali/terragrunt.hcl @@ -4,6 +4,17 @@ include "root" { expose = true } +locals { + # Skip this module if disabled + skip = !lookup(include.root.locals.is_module_enabled, basename(get_terragrunt_dir()), true) +} + +exclude { + if = local.skip + actions = ["all_except_output"] + exclude_dependencies = false +} + terraform { source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-kiali.git?ref=${include.root.inputs.release_version}" extra_arguments "retry_lock" { @@ -33,7 +44,7 @@ dependency "eks_dns" { config_path = "../eks-dns" mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] mock_outputs = { - cluster_domain = "mock.example.com" + cluster_domain = "mock.example.com" } } @@ -46,14 +57,9 @@ dependency "eks_grafana" { port_number = "80" url = "https://grafana.mock.svc.cluster.local:80/" } - namespace = "grafana" - public_endpoint = { - hostname = "grafana.mock.lab.csp2.census.gov" - port_number = "80" - url = "https://grafana.mock.lab.csp2.census.gov:80/" - } - secret_name = "grafana" - tempo_datasource_id = "mock-tempo-datasource-id" + namespace = "grafana" + secret_name = "grafana" + tempo_datasource_id = "mock-tempo-datasource-id" } } @@ -96,31 +102,30 @@ dependencies { inputs = { # AWS Configuration account_id = include.root.inputs.aws_account_id + eecr_info = include.root.inputs.eecr_info profile = include.root.inputs.aws_profile region = include.root.inputs.aws_region # Cluster Configuration - cluster_domain = dependency.eks_dns.outputs.cluster_domain - cluster_name = dependency.eks.outputs.cluster_name - certificate_issuer = dependency.eks_cert_manager.outputs.cluster_issuer_name + cluster_domain = dependency.eks_dns.outputs.cluster_domain + cluster_name = dependency.eks.outputs.cluster_name + certificate_issuer = dependency.eks_cert_manager.outputs.cluster_issuer_name # Kiali Configuration service_name = "kiali" - namespace = include.root.inputs.namespaces["kiali"] - istio_namespace = include.root.inputs.namespaces["istio"] + namespace = include.root.inputs.namespaces["kiali"] + istio_namespace = include.root.inputs.namespaces["istio"] grafana_internal_url = dependency.eks_grafana.outputs.internal_endpoint.url grafana_namespace = dependency.eks_grafana.outputs.namespace grafana_secret_name = dependency.eks_grafana.outputs.secret_name - grafana_public_url = dependency.eks_grafana.outputs.public_endpoint kiali_application_version = include.root.inputs.kiali_application_version - kiali_operator_version = include.root.inputs.kiali_operator_version + kiali_operator_version = include.root.inputs.kiali_operator_version prometheus_internal_url = dependency.eks_prometheus.outputs.prometheus_server_internal_endpoint.url grafana_namespace = dependency.eks_grafana.outputs.namespace grafana_secret_name = dependency.eks_grafana.outputs.secret_name grafana_internal_url = dependency.eks_grafana.outputs.internal_endpoint.url - grafana_public_url = dependency.eks_grafana.outputs.public_endpoint tempo_datasource_id = dependency.eks_grafana.outputs.tempo_datasource_id tempo_internal_url = dependency.eks_tempo.outputs.tempo_internal_endpoint.url } diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-loki/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/csvd-platform-lab-mcm/eks-loki/terragrunt.hcl similarity index 83% rename from lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-loki/terragrunt.hcl rename to lab/development/us-gov-east-1/vpc/csvd-platform-lab-mcm/eks-loki/terragrunt.hcl index 55d3830e..54586f19 100644 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-loki/terragrunt.hcl +++ b/lab/development/us-gov-east-1/vpc/csvd-platform-lab-mcm/eks-loki/terragrunt.hcl @@ -4,13 +4,23 @@ include "root" { expose = true } -dependencies { - paths = [ - "../eks", - "../eks-config", - "../eks-metrics-server", - "../eks-dns" - ] +locals { + # Skip this module if disabled + skip = !lookup(include.root.locals.is_module_enabled, basename(get_terragrunt_dir()), true) +} + +exclude { + if = local.skip + actions = ["all_except_output"] + exclude_dependencies = false +} + +terraform { + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-loki.git?ref=${include.root.inputs.release_version}" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20s"] + } } dependency "eks" { @@ -30,17 +40,18 @@ dependency "eks_config" { } } -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-loki.git?ref=${include.root.inputs.release_version}" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20s"] - } +dependencies { + paths = [ + "../eks", + "../eks-config", + "../eks-metrics-server", + ] } inputs = { # AWS Configuration account_id = include.root.inputs.aws_account_id + eecr_info = include.root.inputs.eecr_info profile = include.root.inputs.aws_profile region = include.root.inputs.aws_region diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-metrics-server/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/csvd-platform-lab-mcm/eks-metrics-server/terragrunt.hcl similarity index 79% rename from lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-metrics-server/terragrunt.hcl rename to lab/development/us-gov-east-1/vpc/csvd-platform-lab-mcm/eks-metrics-server/terragrunt.hcl index 5e520aad..241bbc5d 100644 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-metrics-server/terragrunt.hcl +++ b/lab/development/us-gov-east-1/vpc/csvd-platform-lab-mcm/eks-metrics-server/terragrunt.hcl @@ -4,19 +4,15 @@ include "root" { expose = true } -dependencies { - paths = [ - "../eks", - "../eks-config" - ] +locals { + # Skip this module if disabled + skip = !lookup(include.root.locals.is_module_enabled, basename(get_terragrunt_dir()), true) } -dependency "eks" { - config_path = "../eks" - mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] - mock_outputs = { - cluster_name = "mock-cluster" - } +exclude { + if = local.skip + actions = ["all_except_output"] + exclude_dependencies = false } terraform { @@ -27,9 +23,24 @@ terraform { } } +dependency "eks" { + config_path = "../eks" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + cluster_name = "mock-cluster" + } +} + +dependencies { + paths = [ + "../eks", + ] +} + inputs = { # AWS Configuration account_id = include.root.inputs.aws_account_id + eecr_info = include.root.inputs.eecr_info profile = include.root.inputs.aws_profile region = include.root.inputs.aws_region diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-otel/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/csvd-platform-lab-mcm/eks-otel/terragrunt.hcl similarity index 58% rename from lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-otel/terragrunt.hcl rename to lab/development/us-gov-east-1/vpc/csvd-platform-lab-mcm/eks-otel/terragrunt.hcl index 2c93211d..a8a7d7c4 100644 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-otel/terragrunt.hcl +++ b/lab/development/us-gov-east-1/vpc/csvd-platform-lab-mcm/eks-otel/terragrunt.hcl @@ -4,6 +4,17 @@ include "root" { expose = true } +locals { + # Skip this module if disabled + skip = !lookup(include.root.locals.is_module_enabled, basename(get_terragrunt_dir()), true) +} + +exclude { + if = local.skip + actions = ["all_except_output"] + exclude_dependencies = false +} + terraform { source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-open-telemetry.git?ref=${include.root.inputs.release_version}" # source = "../../../../../../../tfmod-open-telemetry" @@ -13,15 +24,6 @@ terraform { } } -dependencies { - paths = [ - "../eks", - "../eks-loki", - "../eks-prometheus", - "../eks-tempo" - ] -} - dependency "eks" { config_path = "../eks" mock_outputs = { @@ -51,11 +53,33 @@ dependency "eks-tempo" { } } +dependencies { + paths = [ + "../eks", + "../eks-loki", + "../eks-prometheus", + "../eks-tempo" + ] +} + inputs = { - profile = include.root.inputs.aws_profile - cluster_name = dependency.eks.outputs.cluster_name - region = include.root.inputs.aws_region + # AWS Configuration + account_id = include.root.inputs.aws_account_id + eecr_info = include.root.inputs.eecr_info + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + + # Clouster Config + cluster_name = dependency.eks.outputs.cluster_name + + # OTEL Configuration namespace = include.root.inputs.namespaces["otel"] loki_endpoint = dependency.eks-loki.outputs.gateway_internal_endpoint.url tempo_endpoint = dependency.eks-tempo.outputs.tempo_otlp_endpoint.url + # Image Version + auto_instrumentation_java_version = include.root.inputs.auto_instrumentation_java_version + collector_contrib_version = include.root.inputs.collector_contrib_version + collector_version = include.root.inputs.collector_version + otel_helm_version = include.root.inputs.otel_helm_version + rbac_proxy_version = include.root.inputs.rbac_proxy_version } diff --git a/lab/development/us-gov-east-1/vpc/csvd-platform-lab-mcm/eks-pipeline/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/csvd-platform-lab-mcm/eks-pipeline/terragrunt.hcl new file mode 100644 index 00000000..8d705a73 --- /dev/null +++ b/lab/development/us-gov-east-1/vpc/csvd-platform-lab-mcm/eks-pipeline/terragrunt.hcl @@ -0,0 +1,100 @@ +include "root" { + path = find_in_parent_folders("root.hcl") + merge_strategy = "deep" + expose = true +} + +locals { + # Skip this module if disabled OR if running in CodeBuild (to avoid circular dependency) + skip = !lookup(include.root.locals.is_module_enabled, basename(get_terragrunt_dir()), true) || get_env("CODEBUILD_BUILD_ID", "") != "" + + artifact_bucket = format("%v%v-%v-%v-%v", + include.root.inputs.prefixes["eks-s3"], + include.root.inputs.cluster_name, + "artifacts", + include.root.inputs.aws_account_id, + join("", [for c in split("-", include.root.inputs.aws_region) : substr(c, 0, 1)])) +} + +exclude { + if = local.skip + actions = ["all_except_output"] + exclude_dependencies = false +} + +terraform { + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-pipeline.git?ref=${include.root.inputs.release_version}" + + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20s"] + } +} + +inputs = { + account_id = include.root.inputs.aws_account_id + cluster_name = include.root.inputs.cluster_name + environment = include.root.inputs.environment_abbr + region = include.root.inputs.aws_region + state_bucket_prefix = include.root.inputs.state_bucket_prefix + + # VPC Configuration + vpc_name = include.root.inputs.vpc_name + subnet_filter = "*-container-*" # or any specific pattern you want to use + + is_infrastructure_pipeline = true + + # Updated to use buildspecs from the platform-tg-infra repository + # made deploy-to-pipeline will update them from tfmod-pipeline module + buildspec_template_path = "buildspecs" + + build_configuration = { + compute_type = "BUILD_GENERAL1_LARGE" + image = "aws/codebuild/amazonlinux-x86_64-standard:5.0" + buildspec_path = "build.yml" + privileged_mode = true + environment_variables = { + ARTIFACT_BUCKET = local.artifact_bucket + TERRAGRUNT_PATH = "lab/${include.root.inputs.environment}/${include.root.inputs.aws_region}/vpc/${include.root.inputs.cluster_name}" + REGION = include.root.inputs.aws_region + ENVIRONMENT = include.root.inputs.environment_abbr + AWS_ACCOUNT_ID = include.root.inputs.aws_account_id + PROXY_CONFIG = "http://vlab-proxy.tco.census.gov:3128" + } + } + + security_scan_configuration = { + compute_type = "BUILD_GENERAL1_MEDIUM" + image = "aws/codebuild/amazonlinux-x86_64-standard:5.0" + buildspec_path = "security.yml" + environment_variables = { + ARTIFACT_BUCKET = local.artifact_bucket + TERRAGRUNT_PATH = "lab/${include.root.inputs.environment}/${include.root.inputs.aws_region}/vpc/${include.root.inputs.cluster_name}" + REGION = include.root.inputs.aws_region + ENVIRONMENT = include.root.inputs.environment_abbr + AWS_ACCOUNT_ID = include.root.inputs.aws_account_id + PROXY_CONFIG = "http://vlab-proxy.tco.census.gov:3128" + } + } + + approval_configuration = { + enabled = true + notify_emails = [include.root.inputs.cluster_mailing_list] + custom_message = "Please review and approve infrastructure changes to the CSVD platform" + } + + deployment_configuration = { + target_type = "Build" + compute_type = "BUILD_GENERAL1_MEDIUM" + image = "aws/codebuild/amazonlinux-x86_64-standard:5.0" + buildspec_path = "deploy.yml" + environment_variables = { + ARTIFACT_BUCKET = local.artifact_bucket + TERRAGRUNT_PATH = "lab/${include.root.inputs.environment}/${include.root.inputs.aws_region}/vpc/${include.root.inputs.cluster_name}" + REGION = include.root.inputs.aws_region + ENVIRONMENT = include.root.inputs.environment_abbr + AWS_ACCOUNT_ID = include.root.inputs.aws_account_id + PROXY_CONFIG = "http://vlab-proxy.tco.census.gov:3128" + } + } +} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-prometheus/README.md b/lab/development/us-gov-east-1/vpc/csvd-platform-lab-mcm/eks-prometheus/README.md similarity index 100% rename from lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-prometheus/README.md rename to lab/development/us-gov-east-1/vpc/csvd-platform-lab-mcm/eks-prometheus/README.md diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-prometheus/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/csvd-platform-lab-mcm/eks-prometheus/terragrunt.hcl similarity index 86% rename from lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-prometheus/terragrunt.hcl rename to lab/development/us-gov-east-1/vpc/csvd-platform-lab-mcm/eks-prometheus/terragrunt.hcl index 80e24e8f..1cb7f81d 100644 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-prometheus/terragrunt.hcl +++ b/lab/development/us-gov-east-1/vpc/csvd-platform-lab-mcm/eks-prometheus/terragrunt.hcl @@ -4,6 +4,17 @@ include "root" { expose = true } +locals { + # Skip this module if disabled + skip = !lookup(include.root.locals.is_module_enabled, basename(get_terragrunt_dir()), true) +} + +exclude { + if = local.skip + actions = ["all_except_output"] + exclude_dependencies = false +} + terraform { source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-prometheus.git?ref=${include.root.inputs.release_version}" extra_arguments "retry_lock" { @@ -16,6 +27,7 @@ dependencies { paths = [ "../eks", "../eks-config", + "../eks-karpenter", "../eks-metrics-server", ] } @@ -40,6 +52,7 @@ dependency "eks_config" { inputs = { # AWS Configuration account_id = include.root.inputs.aws_account_id + eecr_info = include.root.inputs.eecr_info profile = include.root.inputs.aws_profile region = include.root.inputs.aws_region diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-tempo/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/csvd-platform-lab-mcm/eks-tempo/terragrunt.hcl similarity index 86% rename from lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-tempo/terragrunt.hcl rename to lab/development/us-gov-east-1/vpc/csvd-platform-lab-mcm/eks-tempo/terragrunt.hcl index e94ad7f0..71dd0a10 100644 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-tempo/terragrunt.hcl +++ b/lab/development/us-gov-east-1/vpc/csvd-platform-lab-mcm/eks-tempo/terragrunt.hcl @@ -4,6 +4,17 @@ include "root" { expose = true } +locals { + # Skip this module if disabled + skip = !lookup(include.root.locals.is_module_enabled, basename(get_terragrunt_dir()), true) +} + +exclude { + if = local.skip + actions = ["all_except_output"] + exclude_dependencies = false +} + terraform { source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-tempo.git?ref=${include.root.inputs.release_version}" extra_arguments "retry_lock" { @@ -44,6 +55,7 @@ dependencies { inputs = { # AWS Configuration account_id = include.root.inputs.aws_account_id + eecr_info = include.root.inputs.eecr_info profile = include.root.inputs.aws_profile region = include.root.inputs.aws_region diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/csvd-platform-lab-mcm/eks/terragrunt.hcl similarity index 70% rename from lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks/terragrunt.hcl rename to lab/development/us-gov-east-1/vpc/csvd-platform-lab-mcm/eks/terragrunt.hcl index 9eca1de2..13ed5d01 100644 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks/terragrunt.hcl +++ b/lab/development/us-gov-east-1/vpc/csvd-platform-lab-mcm/eks/terragrunt.hcl @@ -4,6 +4,17 @@ include "root" { expose = true } +locals { + # Skip this module if disabled + skip = !lookup(include.root.locals.is_module_enabled, basename(get_terragrunt_dir()), true) +} + +exclude { + if = local.skip + actions = ["all_except_output"] + exclude_dependencies = false +} + terraform { source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-eks.git?ref=${include.root.inputs.release_version}" @@ -13,6 +24,12 @@ terraform { } } +dependencies { + paths = [ + "../eks-pipeline", + ] +} + inputs = { # AWS Configuration account_id = include.root.inputs.aws_account_id diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/cluster.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/cluster.hcl deleted file mode 100644 index e52f9d23..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/cluster.hcl +++ /dev/null @@ -1,28 +0,0 @@ -locals { - # Cluster specific configuration - cluster_endpoint_public_access = true - cluster_name = "platform-eng-eks-mcm" - cluster_mailing_list = "matthew.c.morgan@census.gov" - eks_instance_disk_size = 100 - eks_ng_desired_size = 2 - eks_ng_max_size = 10 - eks_ng_min_size = 2 - enable_cluster_creator_admin_permissions = true - tags = { - "slim:schedule" = "8:00-17:00" - "cluster:size" = "min:${local.eks_ng_min_size}-max:${local.eks_ng_max_size}-desired:${local.eks_ng_desired_size}" - } - - # Common configuration - common_retry_args = { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } - - common_dependencies = ["../eks", "../eks-config"] - - common_mock_eks = { - cluster_name = "mock-cluster" - oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" - } -} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-gogatekeeper/terragrunt.hcl.off b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-gogatekeeper/terragrunt.hcl.off deleted file mode 100644 index 119537e6..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-gogatekeeper/terragrunt.hcl.off +++ /dev/null @@ -1,80 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-gogatekeeper.git?ref=${include.root.inputs.release_version}" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20s"] - } -} - -dependency "eks" { - config_path = "../eks" - mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] - mock_outputs = { - cluster_name = "mock-cluster" - oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" - } -} - -dependency "eks_dns" { - config_path = "../eks-dns" - mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] - mock_outputs = { - cluster_domain = "mock.example.com" - } -} - -dependency "eks_grafana" { - config_path = "../eks-grafana" - mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] - mock_outputs = { - public_endpoint = "mock.grafaba.example.com" - } -} - -dependency "eks_keycloak" { - config_path = "../eks-keycloak" - mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] - mock_outputs = { - public_endpoint = "mock.keycloak.example.com" - discovery_url = "mock.keycloak.example.com/auth" - client_id = "mock-client-id" - client_secret = "mock-client-secret" - } -} - -dependencies { - paths = [ - "../eks", - "../eks-dns", - "../eks-grafana", - "../eks-keycloak", - "../eks-prometheus", - ] -} - -inputs = { - # Base Cluster Config - cluster_domain = dependency.eks_dns.outputs.cluster_domain - namespace = include.root.inputs.namespaces["gogatekeeper"] - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - - # Gatekeeper Config - gogatekeeper_tag = include.root.inputs.gogatekeeper_tag - gogatekeeper_chart_version = include.root.inputs.gogatekeeper_chart_version - keycloak_discovery_url = dependency.eks_keycloak.outputs.discovery_url - - # Service Behind Gatekeeper Config - service_name = "test-gc" - upstream_url = dependency.eks_grafana.outputs.public_endpoint - redirection_url = dependency.eks_grafana.outputs.public_endpoint - client_id = dependency.eks_keycloak.outputs.client_id - client_secret = dependency.eks_keycloak.outputs.client_secret - keycloak_public_url = dependency.eks_keycloak.outputs.public_endpoint -} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-loki/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-loki/terragrunt.hcl deleted file mode 100644 index e126331b..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-loki/terragrunt.hcl +++ /dev/null @@ -1,55 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -dependencies { - paths = [ - "../eks", - "../eks-config", - "../eks-metrics-server", - ] -} - -dependency "eks" { - config_path = "../eks" - mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] - mock_outputs = { - cluster_name = "mock-cluster" - oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" - } -} - -dependency "eks_config" { - config_path = "../eks-config" - mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] - mock_outputs = { - rwo_storage_class = "gp3-mock" - } -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-loki.git?ref=${include.root.inputs.release_version}" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20s"] - } -} - -inputs = { - # AWS Configuration - account_id = include.root.inputs.aws_account_id - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - - # Cluster Configuration - cluster_name = dependency.eks.outputs.cluster_name - oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn - - # Loki Configuration - loki_chart_version = include.root.inputs.loki_chart_version - loki_tag = include.root.inputs.loki_tag - namespace = include.root.inputs.namespaces["loki"] - rwo_storage_class = dependency.eks_config.outputs.rwo_storage_class -} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-metrics-server/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-metrics-server/terragrunt.hcl deleted file mode 100644 index fd02a7ac..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-metrics-server/terragrunt.hcl +++ /dev/null @@ -1,42 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -dependencies { - paths = [ - "../eks", - ] -} - -dependency "eks" { - config_path = "../eks" - mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] - mock_outputs = { - cluster_name = "mock-cluster" - } -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-metrics-server.git?ref=${include.root.inputs.release_version}" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20s"] - } -} - -inputs = { - # AWS Configuration - account_id = include.root.inputs.aws_account_id - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - - # Cluster Configuration - cluster_name = dependency.eks.outputs.cluster_name - - # Metrics Server Configuration - metrics_server_helm_chart = include.root.inputs.metrics_server_helm_chart - metrics_server_tag = include.root.inputs.metrics_server_tag - namespace = include.root.inputs.namespaces["metrics-server"] -} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/cluster.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/cluster.hcl deleted file mode 100644 index 656de00e..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/cluster.hcl +++ /dev/null @@ -1,28 +0,0 @@ -locals { - # Cluster specific configuration - cluster_endpoint_public_access = true - cluster_name = "platform-eng-eks-srn" - cluster_mailing_list = "srinivasa.nangunuri@census.gov" - eks_instance_disk_size = 100 - eks_ng_desired_size = 2 - eks_ng_max_size = 10 - eks_ng_min_size = 2 - enable_cluster_creator_admin_permissions = true - tags = { - "slim:schedule" = "8:00-17:00" - "cluster:size" = "min:${local.eks_ng_min_size}-max:${local.eks_ng_max_size}-desired:${local.eks_ng_desired_size}" - } - - # Common configuration - common_retry_args = { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } - - common_dependencies = ["../eks", "../eks-config"] - - common_mock_eks = { - cluster_name = "mock-cluster" - oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" - } -} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-cert-manager/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-cert-manager/terragrunt.hcl deleted file mode 100644 index d1e69d00..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-cert-manager/terragrunt.hcl +++ /dev/null @@ -1,57 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-cert-mgr.git?ref=${include.root.inputs.release_version}" - - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20s"] - } -} - -dependencies { - paths = [ - "../eks", - "../eks-config", - "../eks-karpenter" - ] -} - -dependency "eks" { - config_path = "../eks" - mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] - - mock_outputs = { - cluster_name = include.root.inputs.cluster_name - oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" - cluster_endpoint = "https://mock-endpoint.eks.amazonaws.com" - cluster_version = include.root.inputs.cluster_version - } -} - -inputs = { - # AWS Configuration - account_id = include.root.inputs.aws_account_id - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - - # Cluster Configuration - cluster_name = dependency.eks.outputs.cluster_name - cluster_mailing_list = include.root.inputs.cluster_mailing_list - oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn - - # Cert Manager Configuration - cert_manager_helm_chart = include.root.inputs.cert_manager_helm_chart - cluster_issuer_name = include.root.inputs.cluster_issuer_name - namespace = include.root.inputs.namespaces["cert-manager"] - - # Version Tags - cert_manager_cainjector_tag = include.root.inputs.cert_manager_cainjector_tag - cert_manager_controller_tag = include.root.inputs.cert_manager_controller_tag - cert_manager_startupapicheck_tag = include.root.inputs.cert_manager_startupapicheck_tag - cert_manager_webhook_tag = include.root.inputs.cert_manager_webhook_tag -} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-dns/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-dns/terragrunt.hcl deleted file mode 100644 index 2bf9b72f..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-dns/terragrunt.hcl +++ /dev/null @@ -1,60 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-eks-dns.git?ref=${include.root.inputs.release_version}" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20s"] - } -} - -dependency "eks" { - config_path = "../eks" - mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] - mock_outputs = { - cluster_name = include.root.inputs.cluster_name - subnets = ["subnet-mock1", "subnet-mock2", "subnet-mock3"] - } -} - -dependency "eks-istio" { - config_path = "../eks-istio" - mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] - mock_outputs = { - istio_ingress_lb = { - dns_name = "mock-${include.root.inputs.cluster_name}.elb.amazonaws.com" - zone_id = "MOCKZONEID" - } - } -} - -dependencies { - paths = [ - "../eks-config", - "../eks-istio", - "../eks-karpenter" - ] -} - -inputs = { - # AWS Configuration - account_id = include.root.inputs.aws_account_id - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - - # Cluster Configuration - cluster_name = include.root.inputs.cluster_name - - # Network Configuration - istio_ingress_lb = dependency.eks-istio.outputs.istio_ingress_lb - route53_endpoints = include.root.inputs.route53_endpoints - vpc_domain_name = include.root.inputs.vpc_domain_name - vpc_name = include.root.inputs.vpc_name - - # Additional Configuration - tags = include.root.inputs.tags -} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-grafana/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-grafana/terragrunt.hcl deleted file mode 100644 index 2bc7484b..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-grafana/terragrunt.hcl +++ /dev/null @@ -1,63 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-grafana.git?ref=${include.root.inputs.release_version}" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20s"] - } -} - -dependency "eks" { - config_path = "../eks" - mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] - mock_outputs = { - cluster_name = include.root.inputs.cluster_name - oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" - } -} - -dependency "eks_loki" { - config_path = "../eks-loki" - mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] - mock_outputs = { - rwo_storage_class = "gp3-mocked" - } -} - -dependencies { - paths = [ - "../eks", - "../eks-config", - "../eks-dns", - "../eks-karpenter", - "../eks-loki" - ] -} - -inputs = { - # AWS Configuration - account_id = include.root.inputs.aws_account_id - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - - # Cluster Configuration - cluster_name = dependency.eks.outputs.cluster_name - cluster_domain = include.root.inputs.vpc_domain_name - oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn - - # Storage Configuration - rwo_storage_class = dependency.eks_loki.outputs.rwo_storage_class - - # Grafana Configuration - grafana_chart_version = include.root.inputs.grafana_chart_version - grafana_tag = include.root.inputs.grafana_tag - download_dashboards_image_tag = include.root.inputs.download_dashboards_image_tag - init_chown_data_image_tag = include.root.inputs.init_chown_data_image_tag - namespace = include.root.inputs.namespaces["grafana"] - service_name = "grafana" -} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-istio/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-istio/terragrunt.hcl deleted file mode 100644 index 1c312166..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-istio/terragrunt.hcl +++ /dev/null @@ -1,44 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-istio.git?ref=${include.root.inputs.release_version}" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20s"] - } -} - -dependencies { - paths = [ - "../eks", - "../eks-config" - ] -} - -dependency "eks" { - config_path = "../eks" - mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] - mock_outputs = { - cluster_name = include.root.inputs.cluster_name - oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" - } -} - -inputs = { - # AWS Configuration - account_id = include.root.inputs.aws_account_id - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - - # Cluster Configuration - cluster_name = dependency.eks.outputs.cluster_name - oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn - - # Istio Configuration - namespace = include.root.inputs.namespaces["istio"] - istio_version = include.root.inputs.istio_version -} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-k8s-dashboard/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-k8s-dashboard/terragrunt.hcl deleted file mode 100644 index c32546cd..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-k8s-dashboard/terragrunt.hcl +++ /dev/null @@ -1,55 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-k8s-dashboard.git?ref=mcmCluster" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20s"] - } -} - -dependencies { - paths = [ - "../eks", - "../eks-config", - "../eks-dns" - ] -} - -dependency "eks" { - config_path = "../eks" - mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] - mock_outputs = { - cluster_name = include.root.inputs.cluster_name - oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" - } -} - -dependency "eks_dns" { - config_path = "../eks-dns" - mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] - mock_outputs = { - cluster_domain = "mock.example.com" - oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" - } -} - -inputs = { - # AWS Configuration - account_id = include.root.inputs.aws_account_id - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - - # Cluster Configuration - cluster_domain = dependency.eks_dns.outputs.cluster_domain - cluster_name = dependency.eks.outputs.cluster_name - - # Dashboard Configuration - service_name = include.root.inputs.dashboard_hostname - k8s_dashboard_version = include.root.inputs.k8s_dashboard_version - namespace = include.root.inputs.namespaces["k8s-dashboard"] -} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-karpenter/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-karpenter/terragrunt.hcl deleted file mode 100644 index 7c2ff2db..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-karpenter/terragrunt.hcl +++ /dev/null @@ -1,50 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-karpenter.git?ref=${include.root.inputs.release_version}" - - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20s"] - } -} - -dependencies { - paths = ["../eks"] -} - -dependency "eks" { - config_path = "../eks" - mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] - - mock_outputs = { - cluster_name = "mock-cluster" - cluster_endpoint = "https://mock-endpoint.eks.amazonaws.com" - oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" - node_group_name = "mock-node-group" - vpc_id = "vpc-mock" - subnets = ["subnet-mock1", "subnet-mock2"] - } -} - -inputs = { - # AWS Configuration - account_id = include.root.inputs.aws_account_id - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - - # Cluster Configuration - cluster_endpoint = dependency.eks.outputs.cluster_endpoint - cluster_name = dependency.eks.outputs.cluster_name - oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn - - # Karpenter Configuration - karpenter_tag = include.root.inputs.karpenter_tag - karpenter_helm_chart = include.root.inputs.karpenter_helm_chart - karpenter_node_group_name = dependency.eks.outputs.node_group_name - namespace = include.root.inputs.namespaces["karpenter"] -} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-keycloak/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-keycloak/terragrunt.hcl deleted file mode 100644 index 248432dd..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-keycloak/terragrunt.hcl +++ /dev/null @@ -1,87 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-keycloak.git?ref=standards" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20s"] - } -} - -dependency "eks" { - config_path = "../eks" - mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] - mock_outputs = { - cluster_name = "mock-cluster" - oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" - } -} - -dependency "eks_config" { - config_path = "../eks-config" - mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] - mock_outputs = { - rwo_storage_class = "gp3-mock" - } -} - -dependency "eks_dns" { - config_path = "../eks-dns" - mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] - mock_outputs = { - cluster_domain = "mock.example.com" - oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" - } -} - -dependency "eks_postgresql" { - config_path = "../eks-postgresql" - mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] - mock_outputs = { - internal_endpoint = { - url = "mock-internal-endpoint-url" - } - } -} - -dependencies { - paths = [ - "../eks", - "../eks-config", - "../eks-dns", - "../eks-karpenter", - "../eks-postgresql", - "../eks-prometheus", - ] -} - -inputs = { - admin_email = include.root.inputs.cluster_mailing_list - cluster_domain = dependency.eks_dns.outputs.cluster_domain - cluster_name = dependency.eks.outputs.cluster_name - namespace = include.root.inputs.namespaces["keycloak"] - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - - # keycloak config - default_storage_class = dependency.eks_config.outputs.rwo_storage_class - keycloak_chart_version = include.root.inputs.keycloak_chart_version - keycloak_hostname = include.root.inputs.keycloak_hostname - keycloak_tag = include.root.inputs.keycloak_tag - service_name = "keycloak" - telemetry_namespace = include.root.inputs.telemetry_namespace - - # Database configuration - db_host = dependency.eks_postgresql.outputs.internal_endpoint.url - db_name = include.root.inputs.postgresql_database - db_password = include.root.inputs.postgresql_password - db_user = include.root.inputs.postgresql_username - - # Project information - project_name = include.root.inputs.project_name - tags = include.root.inputs.tags -} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-kiali/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-kiali/terragrunt.hcl deleted file mode 100644 index c36c773c..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-kiali/terragrunt.hcl +++ /dev/null @@ -1,113 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-kiali.git?ref=mcmCluster" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20s"] - } -} - -dependencies { - paths = [ - "../eks", - "../eks-config", - "../eks-dns", - "../eks-grafana", - "../eks-istio", - "../eks-prometheus" - ] -} - -dependency "eks" { - config_path = "../eks" - mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] - mock_outputs = { - cluster_name = "mock-cluster" - oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" - } -} - -dependency "eks_config" { - config_path = "../eks-config" - mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] - mock_outputs = { - operators_namespace = "mock-namespace" - } -} - -dependency "eks_dns" { - config_path = "../eks-dns" - mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] - mock_outputs = { - cluster_domain = "mock.example.com" - oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" - } -} - -dependency "eks_grafana" { - config_path = "../eks-grafana" - mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] - mock_outputs = { - internal_endpoint = { - hostname = "grafana.mock.svc.cluster.local" - port_number = "80" - url = "https://grafana.mock.svc.cluster.local:80/" - } - namespace = "grafana" - public_endpoint = { - hostname = "grafana.mock.lab.csp2.census.gov" - port_number = "80" - url = "https://grafana.mock.lab.csp2.census.gov:80/" - } - secret_name = "grafana" - } -} - -dependency "eks_istio" { - config_path = "../eks-istio" - mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] - mock_outputs = { - namespace = "mock-namespace-istio" - } -} - -dependency "eks_prometheus" { - config_path = "../eks-prometheus" - mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] - mock_outputs = { - prometheus_server_internal_endpoint = { - hostname = "prometheus.mock.svc.cluster.local" - port_number = "80" - url = "https://prometheus.mock.svc.cluster.local:80/" - } - } -} - -inputs = { - # AWS Configuration - account_id = include.root.inputs.aws_account_id - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - - # Cluster Configuration - cluster_domain = dependency.eks_dns.outputs.cluster_domain - cluster_name = dependency.eks.outputs.cluster_name - - # Kiali Configuration - service_name = "kiali" - namespace = include.root.inputs.namespaces["kiali"] - grafana_internal_url = dependency.eks_grafana.outputs.internal_endpoint.url - grafana_namespace = dependency.eks_grafana.outputs.namespace - grafana_secret_name = dependency.eks_grafana.outputs.secret_name - grafana_public_url = dependency.eks_grafana.outputs.public_endpoint - - kiali_operator_version = include.root.inputs.kiali_operator_version - - prometheus_internal_url = dependency.eks_prometheus.outputs.prometheus_server_internal_endpoint.url - # jager_internal_url = dependency.eks_prometheus.outputs.jager_internal_url -} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-kiali/terragrunt.hcl.disabled b/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-kiali/terragrunt.hcl.disabled deleted file mode 100644 index a06c6e68..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-kiali/terragrunt.hcl.disabled +++ /dev/null @@ -1,108 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-kiali.git?ref=${include.root.inputs.release_version}" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20s"] - } -} - -dependencies { - paths = [ - "../eks", - "../eks-config", - "../eks-dns", - "../eks-grafana", - "../eks-istio", - "../eks-prometheus" - ] -} - -dependency "eks" { - config_path = "../eks" - mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] - mock_outputs = { - cluster_name = "mock-cluster" - oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" - } -} - -dependency "eks-config" { - config_path = "../eks-config" - mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] - mock_outputs = { - operators_namespace = "mock-namespace" - } -} - -dependency "eks_dns" { - config_path = "../eks-dns" - mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] - mock_outputs = { - cluster_domain = "mock.example.com" - oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" - } -} - -dependency "eks_grafana" { - config_path = "../eks-grafana" - mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] - mock_outputs = { - internal_endpoint = { - hostname = "grafana.mock.svc.cluster.local" - port_number = "80" - url = "https://grafana.mock.svc.cluster.local:80/" - } - namespace = "grafana" - public_endpoint = { - hostname = "grafana.mock.lab.csp2.census.gov" - port_number = "80" - url = "https://grafana.mock.lab.csp2.census.gov:80/" - } - secret_name = "grafana" - } -} - -dependency "eks_istio" { - config_path = "../eks-istio" - mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] - mock_outputs = { - namespace = "mock-namespace-istio" - } -} - -dependency "eks_prometheus" { - config_path = "../eks-prometheus" - mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] - mock_outputs = { - prometheus_internal_url = "mock-internal-url" - } -} - -inputs = { - # AWS Configuration - account_id = include.root.inputs.aws_account_id - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - - # Cluster Configuration - cluster_domain = dependency.eks_dns.outputs.cluster_domain - cluster_name = dependency.eks.outputs.cluster_name - - # Kiali Configuration - grafana_internal_url = dependency.eks_grafana.outputs.internal_endpoint.url - grafana_namespace = dependency.eks_grafana.outputs.namespace - grafana_secret_name = dependency.eks_grafana.outputs.secret_name - grafana_public_url = dependency.eks_grafana.outputs.public_endpoint.url - - kiali_operator_version = include.root.inputs.kiali_operator_version - operators_namespace = dependency.eks-config.outputs.operators_namespace - - prometheus_internal_url = dependency.eks_prometheus.outputs.internal_endpoint - jager_internal_url = dependency.eks_prometheus.outputs.jager_internal_url -} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-prometheus/README.md b/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-prometheus/README.md deleted file mode 100644 index bbbffb2a..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-prometheus/README.md +++ /dev/null @@ -1,198 +0,0 @@ -## eks-prometheus -This module deploys EKS kubeenetes prometheus inside existing EKS cluster. Prometheus is an open-source systems monitoring and alerting tool. -This module consisits of 4 components. It creates prometheus namespace and copies image repositories for the following components from quay.io into local account ECR repository. It deploys these components using helm charts using the configured ECR repositories. - 1. prometheus-alert-manager - 2. prometheus-node-exporter - 3. prometheus-pushgateway - 4. prometheus-server - -### Dependencies -This module is dependent on EKS module (eks). The cluster should exist already for this module to work. - -### Inputs - cluster_name - profile - prometheus_chart_version - prometheus_server_tag - prometheus_config_reloader_tag - alertmanager_tag - kube_state_metrics_tag - node_exporter_tag - pushgateway_tag - rwo_storage_class - -### Outputs - alertmanager_internal_endpoint - alertmanager_headless_internal_endpoint - pushgateway_internal_endpoint - prometheus_server_internal_endpoint - -### Issues observed/fixed -1. The rwo_storage_class value had to be updated from "gp3" to "gp3-encrypted" -2. The node_exporter_tag value had to be updated from "1.6.1" to "v1.8.1" -3. The kube_state_metrics_tag value had to be updated from "2.10.0" to "v2.6.0" -4. The alertmanager_tag value had to be updated from -5. The helm chart set config for the ecr image had to be split into 2 components, one for registry and other for repository as an example mentioned below: - - ``` - set { - name = "kube-state-metrics.image.registry" - value = module.images.images[local.ksm_key].dest_registry - } - set { - name = "kube-state-metrics.image.repository" - value = module.images.images[local.ksm_key].dest_repository - } - ``` - -6. In some other cases the image ecr repository had to be split by the colon separatory (:) - - ``` - set { - name = "alertmanager.configmapReload.image.repository" - value = split(":", module.images.images[local.prom_config_reload_key].dest_full_path)[0] - } - ``` - -### Chart Notes - 1. Get the application URL by running these commands: - - ```bash - export POD_NAME=$(kubectl get pods --namespace prometheus -l "app.kubernetes.io/name=prometheus-pushgateway,app.kubernetes.io/instance=prometheus" -o jsonpath="{.items[0].metadata.name}") - kubectl port-forward $POD_NAME 9091 - echo "Visit http://127.0.0.1:9091 to use your application" - ``` - - The Prometheus server can be accessed via port 80 on the following DNS name from within your cluster: - prometheus-server.prometheus.svc.cluster.local - - - Get the Prometheus server URL by running these commands in the same shell: - - ```bash - export POD_NAME=$(kubectl get pods --namespace prometheus -l "app.kubernetes.io/name=prometheus,app.kubernetes.io/instance=prometheus" -o jsonpath="{.items[0].metadata.name}") - kubectl --namespace prometheus port-forward $POD_NAME 9090 - ``` - - The Prometheus alertmanager can be accessed via port 9093 on the following DNS name from within your cluster: - `prometheus-alertmanager.prometheus.svc.cluster.local` - - - Get the Alertmanager URL by running these commands in the same shell: - - ```bash - export POD_NAME=$(kubectl get pods --namespace prometheus -l "app.kubernetes.io/name=alertmanager,app.kubernetes.io/instance=prometheus" -o jsonpath="{.items[0].metadata.name}") - kubectl --namespace prometheus port-forward $POD_NAME 9093 - ``` - - ################################################################################# - ###### WARNING: Pod Security Policy has been disabled by default since ##### - ###### it deprecated after k8s 1.25+. use ##### - ###### (index .Values "prometheus-node-exporter" "rbac" ##### - ###### "pspEnabled") with (index .Values ##### - ###### "prometheus-node-exporter" "rbac" "pspAnnotations") ##### - ###### in case you still need it. ##### - ################################################################################# - - - The Prometheus PushGateway can be accessed via port 9091 on the following DNS name from within your cluster: - `prometheus-prometheus-pushgateway.prometheus.svc.cluster.local` - - - Get the PushGateway URL by running these commands in the same shell: - - ```bash - export POD_NAME=$(kubectl get pods --namespace prometheus -l "app=prometheus-pushgateway,component=pushgateway" -o jsonpath="{.items[0].metadata.name}") - kubectl --namespace prometheus port-forward $POD_NAME 9091 - ``` - - For more information on running Prometheus, visit: - https://prometheus.io/ - - kube-state-metrics is a simple service that listens to the Kubernetes API server and generates metrics about the state of the objects. - The exposed metrics can be found here: - https://github.com/kubernetes/kube-state-metrics/blob/master/docs/README.md#exposed-metrics - - The metrics are exported on the HTTP endpoint /metrics on the listening port. - In your case, `prometheus-kube-state-metrics.prometheus.svc.cluster.local:8080/metrics` - - They are served either as plaintext or protobuf depending on the Accept header. - They are designed to be consumed either by Prometheus itself or by a scraper that is compatible with scraping a Prometheus client endpoint. - - 1. Get the application URL by running these commands: - - ```bash - export POD_NAME=$(kubectl get pods --namespace prometheus -l "app.kubernetes.io/name=alertmanager,app.kubernetes.io/instance=prometheus" -o jsonpath="{.items[0].metadata.name}") - echo "Visit http://127.0.0.1:9093 to use your application" - kubectl --namespace prometheus port-forward $POD_NAME 9093:80 - ``` - - 1. Get the application URL by running these commands: - - ```bash - export POD_NAME=$(kubectl get pods --namespace prometheus -l "app.kubernetes.io/name=prometheus-node-exporter,app.kubernetes.io/instance=prometheus" -o jsonpath="{.items[0].metadata.name}") - echo "Visit http://127.0.0.1:9100 to use your application" - kubectl port-forward --namespace prometheus $POD_NAME 9100 - ``` - - -## Requirements - -| Name | Version | -|------|---------| -| [terraform](#requirement\_terraform) | >= 0.13 | -| [aws](#requirement\_aws) | >= 5.14.0 | -| [helm](#requirement\_helm) | >= 2.11.0 | -| [kubernetes](#requirement\_kubernetes) | >= 2.23.0 | -| [null](#requirement\_null) | >= 3.2.1 | - -## Providers - -| Name | Version | -|------|---------| -| [helm](#provider\_helm) | >= 2.11.0 | -| [kubernetes](#provider\_kubernetes) | >= 2.23.0 | - -## Modules - -| Name | Source | Version | -|------|--------|---------| -| [images](#module\_images) | git@github.e.it.census.gov:terraform-modules/aws-ecr-copy-images.git/ | tf-upgrade | - -## Resources - -| Name | Type | -|------|------| -| [helm_release.prometheus](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource | -| [kubernetes_namespace.ns](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/namespace) | resource | -| [kubernetes_namespace.existing-ns](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/data-sources/namespace) | data source | - -## Inputs - -| Name | Description | Type | Default | Required | -|------|-------------|------|---------|:--------:| -| [alertmanager\_tag](#input\_alertmanager\_tag) | The image tag of the alertmanager image. | `string` | `"v0.27.0"` | no | -| [cluster\_name](#input\_cluster\_name) | The name of the cluster into which prometheus will be installed. | `string` | n/a | yes | -| [create\_namespace](#input\_create\_namespace) | Indicates whether the `namespace` needs to be created ('true') or already exists (not `true`) | `bool` | `true` | no | -| [kube\_state\_metrics\_tag](#input\_kube\_state\_metrics\_tag) | The image tag of the kube-state-metrics image. | `string` | `"v2.13.0"` | no | -| [namespace](#input\_namespace) | The namespace to install the prometheus components. Defaults to 'prometheus' | `string` | `"prometheus"` | no | -| [node\_exporter\_tag](#input\_node\_exporter\_tag) | The image tag of the node-exporter image. | `string` | `"v1.8.2"` | no | -| [profile](#input\_profile) | AWS\_PROFILE to use to apply the terraform script. | `string` | `""` | no | -| [prometheus\_chart\_version](#input\_prometheus\_chart\_version) | The version of prometheus to install into the cluster. | `string` | `"25.24.1"` | no | -| [prometheus\_config\_reloader\_tag](#input\_prometheus\_config\_reloader\_tag) | The image tag of the prometheus-config-reloader image. | `string` | `"v0.75.1"` | no | -| [prometheus\_server\_tag](#input\_prometheus\_server\_tag) | The image tag of prometheus server to install into the cluster. | `string` | `"v2.53.1"` | no | -| [pushgateway\_tag](#input\_pushgateway\_tag) | The image tag of the pushgateway image. | `string` | `"v1.9.0"` | no | -| [rwo\_storage\_class](#input\_rwo\_storage\_class) | Specify the storage class for read/write/once persistent volumes. | `string` | `"gp3-encrypted"` | no | - -## Outputs - -| Name | Description | -|------|-------------| -| [alertmanager\_headless\_internal\_endpoint](#output\_alertmanager\_headless\_internal\_endpoint) | n/a | -| [alertmanager\_internal\_endpoint](#output\_alertmanager\_internal\_endpoint) | n/a | -| [module\_name](#output\_module\_name) | The name of this module. | -| [module\_version](#output\_module\_version) | The version of this module. | -| [prometheus\_namespace](#output\_prometheus\_namespace) | n/a | -| [prometheus\_server\_internal\_endpoint](#output\_prometheus\_server\_internal\_endpoint) | n/a | -| [pushgateway\_internal\_endpoint](#output\_pushgateway\_internal\_endpoint) | n/a | - diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-prometheus/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-prometheus/terragrunt.hcl deleted file mode 100644 index 76650e5e..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-prometheus/terragrunt.hcl +++ /dev/null @@ -1,61 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-prometheus.git?ref=mcmCluster" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20s"] - } -} - -dependencies { - paths = [ - "../eks", - "../eks-config", - "../eks-metrics-server", - "../eks-dns" - ] -} - -dependency "eks" { - config_path = "../eks" - mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] - mock_outputs = { - cluster_name = include.root.inputs.cluster_name - oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" - } -} - -dependency "eks-config" { - config_path = "../eks-config" - mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] - mock_outputs = { - rwo_storage_class = "gp3-encyrpted" - } -} - -inputs = { - # AWS Configuration - account_id = include.root.inputs.aws_account_id - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - - # Cluster Configuration - cluster_name = dependency.eks.outputs.cluster_name - oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn - - # Prometheus Configuration - prometheus_chart_version = include.root.inputs.prometheus_chart_version - prometheus_server_tag = include.root.inputs.prometheus_server_tag - prometheus_config_reloader_tag = include.root.inputs.prometheus_config_reloader_tag - alertmanager_tag = include.root.inputs.alertmanager_tag - kube_state_metrics_tag = include.root.inputs.kube_state_metrics_tag - namespace = include.root.inputs.namespaces["prometheus"] - node_exporter_tag = include.root.inputs.node_exporter_tag - pushgateway_tag = include.root.inputs.pushgateway_tag - rwo_storage_class = dependency.eks-config.outputs.rwo_storage_class -} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-tempo/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-tempo/terragrunt.hcl deleted file mode 100644 index e1b17d6a..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks-tempo/terragrunt.hcl +++ /dev/null @@ -1,66 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-tempo.git?ref=keycloak" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20s"] - } -} - -dependency "eks" { - config_path = "../eks" - mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] - mock_outputs = { - cluster_name = include.root.inputs.cluster_name - oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" - } -} - -dependency "eks-prometheus" { - config_path = "../eks-prometheus" - mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] - mock_outputs = { - prometheus_svc = "prometheus-server" - prometheus_namespace = "prometheus" - prometheus_port = 80 - prometheus_server_internal_endpoint = { - hostname = "prometheus-server.prometheus.svc.cluster.local" - port_number = 9090 - url = "http://prometheus-server.prometheus.svc.cluster.local:9090/" - } - } -} - -dependencies { - paths = [ - "../eks", - "../eks-dns", - "../eks-prometheus" - ] -} - -inputs = { - # AWS Configuration - account_id = include.root.inputs.aws_account_id - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - - # Cluster Configuration - cluster_name = dependency.eks.outputs.cluster_name - oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn - - # Prometheus Configuration - prometheus_svc = dependency.eks-prometheus.outputs.prometheus_server_internal_endpoint.hostname - prometheus_namespace = dependency.eks-prometheus.outputs.prometheus_namespace - prometheus_port = dependency.eks-prometheus.outputs.prometheus_server_internal_endpoint.port_number - - # Tempo Configuration - tempo_chart_version = include.root.inputs.tempo_chart_version - tempo_tag = include.root.inputs.tempo_tag - namespace = include.root.inputs.namespaces["tempo"] -} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks/terragrunt.hcl deleted file mode 100644 index 9eca1de2..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-srn/eks/terragrunt.hcl +++ /dev/null @@ -1,28 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-eks.git?ref=${include.root.inputs.release_version}" - - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20s"] - } -} - -inputs = { - # AWS Configuration - account_id = include.root.inputs.aws_account_id - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - - # Core Cluster Configuration - cluster_name = include.root.inputs.cluster_name - cluster_version = include.root.inputs.cluster_version - - # Additional Configuration - tags = include.root.inputs.tags -} diff --git a/lab/development/us-gov-east-1/vpc/platform-test-cicd/cluster.hcl b/lab/development/us-gov-east-1/vpc/platform-test-cicd/cluster.hcl deleted file mode 100644 index 8d2831cf..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-test-cicd/cluster.hcl +++ /dev/null @@ -1,20 +0,0 @@ -# lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/cluster.hcl - -# Set cluster specific variables. These are automatically pulled in to configure the remote state bucket in the root -# terragrunt.hcl configuration. -locals { - cluster_endpoint_public_access = true - cluster_name = "platform-eng-eks-mcm" - creator = "matthew.c.morgan@census.gov" - eks_instance_disk_size = 100 - eks_ng_desired_size = 2 - eks_ng_max_size = 10 - eks_ng_min_size = 0 - enable_cluster_creator_admin_permissions = true - terraform = true - terragrunt = true - tags = { - "slim:schedule" = "8:00-17:00" - "cluster:size" = "min:${local.eks_ng_min_size}-max:${local.eks_ng_max_size}-desired:${local.eks_ng_desired_size}" - } -} diff --git a/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-cert-manager/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-cert-manager/terragrunt.hcl deleted file mode 100644 index 35e355aa..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-cert-manager/terragrunt.hcl +++ /dev/null @@ -1,40 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-cert-mgr.git?ref=${include.root.inputs.release_version}" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -dependency "eks" { - config_path = "../eks" - mock_outputs = { - cluster_name = "a-cluster-name" - oidc_provider_arn = "arn:aws-us-gov:iam::111111111111:oidc-provider/oidc.eks.us-gov-east-1.amazonaws.com/id/0000000000000000AAAAAAAAAAAAAAAA" - } -} - -dependency "eks_config" { - config_path = "../eks-config" - skip_outputs = true -} - -inputs = { - cluster_name = dependency.eks.outputs.cluster_name - cluster_mailing_list = dependency.eks.inputs.creator - oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - cert_manager_helm_chart = include.root.inputs.cert_manager_helm_chart - cert_manager_cainjector_tag = include.root.inputs.cert_manager_cainjector_tag - cert_manager_controller_tag = include.root.inputs.cert_manager_controller_tag - cert_manager_startupapicheck_tag = include.root.inputs.cert_manager_startupapicheck_tag - cert_manager_webhook_tag = include.root.inputs.cert_manager_webhook_tag - cluster_issuer_name = include.root.inputs.cluster_issuer_name -} diff --git a/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-config/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-config/terragrunt.hcl deleted file mode 100644 index d4a60dbc..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-config/terragrunt.hcl +++ /dev/null @@ -1,42 +0,0 @@ -# lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-config/terragrunt.hcl - -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-eks-configuration.git?ref=${include.root.inputs.release_version}" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -dependency "eks" { - config_path = "../eks" - mock_outputs = { - cluster_certificate_authority_data = [{ data = "THISISAVERYLONGCERTSTRINGTHATGOESHEREFORSURENODYEP" }] - cluster_endpoint = "https://12345ABCDEE42BF9C24D4C362D1DC.sk1.us-gov-east-1.eks.amazonaws.com" - cluster_name = "a-cluster-name" - eks_managed_node_groups_autoscaling_group_names = ["eks-eks-a-cluster-name-node_group-0000000000000000000000000-5ac8a5e3-14dd-c043-2cc9-f4b6ffb36d32"] - oidc_provider_arn = "arn:aws-us-gov:iam::111111111111:oidc-provider/oidc.eks.us-gov-east-1.amazonaws.com/id/0000000000000000AAAAAAAAAAAAAAAA" - security_group_all_worker_mgmt_id = "sg-00b0000000000000" - subnets = ["subnet-00000000000000001", "subnet-00000000000000002", "subnet-00000000000000003"] - token = [{ token = "THISISTHETOKENTHATDOESNTEXISTTHEREAREMANYLIKEITBUTHISONEISFORACLUSTER" }] - vpc_id = "a-vpc-id" - } -} - -inputs = { - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - vpc_id = dependency.eks.outputs.vpc_id - cluster_name = dependency.eks.outputs.cluster_name - subnets = dependency.eks.outputs.subnets - security_group_all_worker_mgmt_id = dependency.eks.outputs.security_group_all_worker_mgmt_id - eks_managed_node_groups_autoscaling_group_names = dependency.eks.outputs.eks_managed_node_groups_autoscaling_group_names - oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn - kubectl_image_tag = include.root.inputs.kubectl_image_tag -} diff --git a/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-dns/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-dns/terragrunt.hcl deleted file mode 100644 index 6e28781b..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-dns/terragrunt.hcl +++ /dev/null @@ -1,42 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-eks-dns.git?ref=${include.root.inputs.release_version}" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -dependency "eks" { - config_path = "../eks" - mock_outputs = { - subnets = ["subnet-abcdefgh", "subnet-12345678", "subnet-ab12cd34"] - } -} - -dependency "istio" { - config_path = "../eks-istio" - mock_outputs = { - istio_ingress_lb = { - dns_name = "a1111111111111111111111111111111-2bbbbbbbbbbbbbbb.elb.us-gov-east-1.amazonaws.com" - zone_id = "ZABC123456DEF" - } - } -} - -inputs = { - cluster_name = dependency.eks.inputs.cluster_name - istio_ingress_lb = dependency.istio.outputs.istio_ingress_lb - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - subnets = dependency.eks.outputs.subnets - tags = dependency.eks.inputs.tags - vpc_domain_name = dependency.eks.inputs.vpc_domain_name - vpc_name = dependency.eks.inputs.vpc_name - route53_endpoints = include.root.inputs.route53_endpoints -} diff --git a/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-grafana/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-grafana/terragrunt.hcl deleted file mode 100644 index 65ab33fe..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-grafana/terragrunt.hcl +++ /dev/null @@ -1,40 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-grafana.git?ref=${include.root.inputs.release_version}" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -dependency "eks" { - config_path = "../eks" - mock_outputs = { - cluster_name = "a-cluster-name" - } -} - -dependency "eks-loki" { - config_path = "../eks-loki" - mock_outputs = { - rwo_storage_class = "gp3-encrypted" - } -} - -inputs = { - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - cluster_name = dependency.eks.outputs.cluster_name - cluster_domain = dependency.eks.inputs.vpc_domain_name - public_hostname = include.root.inputs.grafana_hostname - rwo_storage_class = dependency.eks-loki.outputs.rwo_storage_class - grafana_chart_version = include.root.inputs.grafana_chart_version - grafana_tag = include.root.inputs.grafana_tag - download_dashboards_image_tag = include.root.inputs.download_dashboards_image_tag - init_chown_data_image_tag = include.root.inputs.init_chown_data_image_tag -} diff --git a/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-istio/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-istio/terragrunt.hcl deleted file mode 100644 index c7c22c81..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-istio/terragrunt.hcl +++ /dev/null @@ -1,32 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-istio.git?ref=${include.root.inputs.release_version}" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -dependency "eks" { - config_path = "../eks" - mock_outputs = { - cluster_name = "a-cluster-name" - } -} -dependency "eks-karpenter" { - config_path = "../eks-karpenter" - skip_outputs = true -} - -inputs = { - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - cluster_name = dependency.eks.outputs.cluster_name - istio_chart_version = include.root.inputs.istio_version - istio_version = include.root.inputs.istio_version -} diff --git a/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-k8s-dashboard/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-k8s-dashboard/terragrunt.hcl deleted file mode 100644 index cd1961b6..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-k8s-dashboard/terragrunt.hcl +++ /dev/null @@ -1,36 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-k8s-dashboard.git?ref=${include.root.inputs.release_version}" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -dependency "eks" { - config_path = "../eks" - mock_outputs = { - cluster_name = "a-cluster-name" - vpc_domain_name = "example.com" - } -} - -dependency "eks-loki" { - config_path = "../eks-loki" - skip_outputs = true -} - -inputs = { - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - cluster_name = dependency.eks.outputs.cluster_name - cluster_domain = dependency.eks.inputs.vpc_domain_name - public_hostname = include.root.inputs.dashboard_hostname - k8s_dashboard_version = include.root.inputs.k8s_dashboard_version - # datasources = dependency.eks-loki.outputs.gateway_internal_endpoint -} diff --git a/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-karpenter/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-karpenter/terragrunt.hcl deleted file mode 100644 index 6b1a862f..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-karpenter/terragrunt.hcl +++ /dev/null @@ -1,43 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-karpenter.git?ref=${include.root.inputs.release_version}" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -dependency "eks" { - config_path = "../eks" - mock_outputs = { - cluster_endpoint = "https://0000000000000000AAAAAAAAAAAAAAAA.sk1.us-gov-east-1.eks.amazonaws.com" - cluster_name = "a-cluster-name" - node_group_name = "node_group_a-cluster-name" - oidc_provider_arn = "arn:aws-us-gov:iam::111111111111:oidc-provider/oidc.eks.us-gov-east-1.amazonaws.com/id/0000000000000000AAAAAAAAAAAAAAAA" - vpc_id = "a-vpc-name" - } -} - -dependency "eks-config" { - config_path = "../eks-config" - skip_outputs = true -} - -inputs = { - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - cluster_endpoint = dependency.eks.outputs.cluster_endpoint - cluster_name = dependency.eks.outputs.cluster_name - karpenter_node_group_name = dependency.eks.outputs.node_group_name - oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn - vpc_id = dependency.eks.outputs.vpc_id - karpenter_helm_chart = include.root.inputs.karpenter_helm_chart - karpenter_tag = include.root.inputs.karpenter_tag - kubectl_tag = include.root.inputs.kubectl_image_tag - -} diff --git a/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-kiali/terragrunt.hcl.disable b/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-kiali/terragrunt.hcl.disable deleted file mode 100644 index 1e04fe0d..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-kiali/terragrunt.hcl.disable +++ /dev/null @@ -1,81 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-kiali.git?ref=${include.root.inputs.release_version}" - # source = "../../../../../../../tfmod-kiali" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -dependency "eks" { - config_path = "../eks" - mock_outputs = { - cluster_name = "a-cluster-name" - } -} -dependency "eks-cert-manager" { - config_path = "../eks-cert-manager" - mock_outputs = { - cluster_issuer_name = "acmpca-clusterissuer" - } -} -dependency "eks-prometheus" { - config_path = "../eks-prometheus" - mock_outputs = { - prometheus_server_internal_endpoint = { - hostname = "prometheus-server.prometheus.svc.cluster.local" - port_number = 9090 - url = "http://prometheus-server.prometheus.svc.cluster.local:9090/" - } - } -} -dependency "eks-grafana" { - config_path = "../eks-grafana" - mock_outputs = { - internal_endpoint = { - hostname = "grafana.grafana.svc.cluster.local" - port_number = "80" - url = "https://grafana.grafana.svc.cluster.local:80/" - } - namespace = "grafana" - public_endpoint = { - hostname = "grafana.dev.lab.csp2.census.gov" - port_number = "80" - url = "https://grafana.dev.lab.csp2.census.gov:80/" - } - secret_name = "grafana" - } -} - -inputs = { - kiali_operator_version = include.root.inputs.kiali_operator_version - kiali_application_version = include.root.inputs.kiali_application_version - - profile = include.root.inputs.aws_profile - cluster_domain = dependency.eks.inputs.vpc_domain_name - operators_namespace = "operators" - cluster_name = dependency.eks.outputs.cluster_name - certificate_issuer = dependency.eks-cert-manager.outputs.cluster_issuer_name - prometheus_internal_url = dependency.eks-prometheus.outputs.prometheus_server_internal_endpoint.url - grafana_internal_url = dependency.eks-grafana.outputs.internal_endpoint.url - grafana_namespace = dependency.eks-grafana.outputs.namespace - grafana_public_url = dependency.eks-grafana.outputs.public_endpoint.url - grafana_secret_name = "grafana" - # grafana_secret_name = dependency.eks-grafana.outputs.secret_name - jaeger_internal_url = "" - - - # client_id = var.sso_client_id - # client_secret = var.sso_client_secret - # keycloak_public_url = var.keycloak_public_url - # gogatekeeper_chart_version = var.gogatekeeper_chart_version - # gogatekeeper_registry = var.gogatekeeper_registry - # gogatekeeper_repository = var.gogatekeeper_repository - # gogatekeeper_tag = var.gogatekeeper_tag -} diff --git a/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-loki/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-loki/terragrunt.hcl deleted file mode 100644 index 2c6b6be5..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-loki/terragrunt.hcl +++ /dev/null @@ -1,44 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-loki.git?ref=${include.root.inputs.release_version}" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -dependency "eks" { - config_path = "../eks" - mock_outputs = { - cluster_name = "a-cluster-name" - oidc_provider_arn = "arn:aws-us-gov:iam::111111111111:oidc-provider/oidc.eks.us-gov-east-1.amazonaws.com/id/0000000000000000AAAAAAAAAAAAAAAA" - } -} -dependency "eks-istio" { - config_path = "../eks-istio" - skip_outputs = true -} -dependency "eks-prometheus" { - config_path = "../eks-prometheus" - skip_outputs = true -} - -inputs = { - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - cluster_name = dependency.eks.outputs.cluster_name - oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn - loki_chart_version = include.root.inputs.loki_chart_version - loki_tag = include.root.inputs.loki_tag - canary_tag = include.root.inputs.canary_tag - enterprise_logs_provisioner_tag = include.root.inputs.enterprise_logs_provisioner_tag - gateway_tag = include.root.inputs.gateway_tag - memcached_tag = include.root.inputs.memcached_tag - exporter_tag = include.root.inputs.exporter_tag - sidecar_tag = include.root.inputs.sidecar_tag -} diff --git a/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-metrics-server/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-metrics-server/terragrunt.hcl deleted file mode 100644 index 387653b9..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-metrics-server/terragrunt.hcl +++ /dev/null @@ -1,33 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-metrics-server.git?ref=${include.root.inputs.release_version}" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -dependency "eks" { - config_path = "../eks" - mock_outputs = { - cluster_name = "a-cluster-name" - } -} - -dependency "eks_config" { - config_path = "../eks-config" - skip_outputs = true -} - -inputs = { - profile = include.root.inputs.aws_profile - cluster_name = dependency.eks.outputs.cluster_name - region = include.root.inputs.aws_region - metrics_server_helm_chart = include.root.inputs.metrics_server_helm_chart - metrics_server_tag = include.root.inputs.metrics_server_tag -} diff --git a/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-prometheus/README.md b/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-prometheus/README.md deleted file mode 100644 index bbbffb2a..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-prometheus/README.md +++ /dev/null @@ -1,198 +0,0 @@ -## eks-prometheus -This module deploys EKS kubeenetes prometheus inside existing EKS cluster. Prometheus is an open-source systems monitoring and alerting tool. -This module consisits of 4 components. It creates prometheus namespace and copies image repositories for the following components from quay.io into local account ECR repository. It deploys these components using helm charts using the configured ECR repositories. - 1. prometheus-alert-manager - 2. prometheus-node-exporter - 3. prometheus-pushgateway - 4. prometheus-server - -### Dependencies -This module is dependent on EKS module (eks). The cluster should exist already for this module to work. - -### Inputs - cluster_name - profile - prometheus_chart_version - prometheus_server_tag - prometheus_config_reloader_tag - alertmanager_tag - kube_state_metrics_tag - node_exporter_tag - pushgateway_tag - rwo_storage_class - -### Outputs - alertmanager_internal_endpoint - alertmanager_headless_internal_endpoint - pushgateway_internal_endpoint - prometheus_server_internal_endpoint - -### Issues observed/fixed -1. The rwo_storage_class value had to be updated from "gp3" to "gp3-encrypted" -2. The node_exporter_tag value had to be updated from "1.6.1" to "v1.8.1" -3. The kube_state_metrics_tag value had to be updated from "2.10.0" to "v2.6.0" -4. The alertmanager_tag value had to be updated from -5. The helm chart set config for the ecr image had to be split into 2 components, one for registry and other for repository as an example mentioned below: - - ``` - set { - name = "kube-state-metrics.image.registry" - value = module.images.images[local.ksm_key].dest_registry - } - set { - name = "kube-state-metrics.image.repository" - value = module.images.images[local.ksm_key].dest_repository - } - ``` - -6. In some other cases the image ecr repository had to be split by the colon separatory (:) - - ``` - set { - name = "alertmanager.configmapReload.image.repository" - value = split(":", module.images.images[local.prom_config_reload_key].dest_full_path)[0] - } - ``` - -### Chart Notes - 1. Get the application URL by running these commands: - - ```bash - export POD_NAME=$(kubectl get pods --namespace prometheus -l "app.kubernetes.io/name=prometheus-pushgateway,app.kubernetes.io/instance=prometheus" -o jsonpath="{.items[0].metadata.name}") - kubectl port-forward $POD_NAME 9091 - echo "Visit http://127.0.0.1:9091 to use your application" - ``` - - The Prometheus server can be accessed via port 80 on the following DNS name from within your cluster: - prometheus-server.prometheus.svc.cluster.local - - - Get the Prometheus server URL by running these commands in the same shell: - - ```bash - export POD_NAME=$(kubectl get pods --namespace prometheus -l "app.kubernetes.io/name=prometheus,app.kubernetes.io/instance=prometheus" -o jsonpath="{.items[0].metadata.name}") - kubectl --namespace prometheus port-forward $POD_NAME 9090 - ``` - - The Prometheus alertmanager can be accessed via port 9093 on the following DNS name from within your cluster: - `prometheus-alertmanager.prometheus.svc.cluster.local` - - - Get the Alertmanager URL by running these commands in the same shell: - - ```bash - export POD_NAME=$(kubectl get pods --namespace prometheus -l "app.kubernetes.io/name=alertmanager,app.kubernetes.io/instance=prometheus" -o jsonpath="{.items[0].metadata.name}") - kubectl --namespace prometheus port-forward $POD_NAME 9093 - ``` - - ################################################################################# - ###### WARNING: Pod Security Policy has been disabled by default since ##### - ###### it deprecated after k8s 1.25+. use ##### - ###### (index .Values "prometheus-node-exporter" "rbac" ##### - ###### "pspEnabled") with (index .Values ##### - ###### "prometheus-node-exporter" "rbac" "pspAnnotations") ##### - ###### in case you still need it. ##### - ################################################################################# - - - The Prometheus PushGateway can be accessed via port 9091 on the following DNS name from within your cluster: - `prometheus-prometheus-pushgateway.prometheus.svc.cluster.local` - - - Get the PushGateway URL by running these commands in the same shell: - - ```bash - export POD_NAME=$(kubectl get pods --namespace prometheus -l "app=prometheus-pushgateway,component=pushgateway" -o jsonpath="{.items[0].metadata.name}") - kubectl --namespace prometheus port-forward $POD_NAME 9091 - ``` - - For more information on running Prometheus, visit: - https://prometheus.io/ - - kube-state-metrics is a simple service that listens to the Kubernetes API server and generates metrics about the state of the objects. - The exposed metrics can be found here: - https://github.com/kubernetes/kube-state-metrics/blob/master/docs/README.md#exposed-metrics - - The metrics are exported on the HTTP endpoint /metrics on the listening port. - In your case, `prometheus-kube-state-metrics.prometheus.svc.cluster.local:8080/metrics` - - They are served either as plaintext or protobuf depending on the Accept header. - They are designed to be consumed either by Prometheus itself or by a scraper that is compatible with scraping a Prometheus client endpoint. - - 1. Get the application URL by running these commands: - - ```bash - export POD_NAME=$(kubectl get pods --namespace prometheus -l "app.kubernetes.io/name=alertmanager,app.kubernetes.io/instance=prometheus" -o jsonpath="{.items[0].metadata.name}") - echo "Visit http://127.0.0.1:9093 to use your application" - kubectl --namespace prometheus port-forward $POD_NAME 9093:80 - ``` - - 1. Get the application URL by running these commands: - - ```bash - export POD_NAME=$(kubectl get pods --namespace prometheus -l "app.kubernetes.io/name=prometheus-node-exporter,app.kubernetes.io/instance=prometheus" -o jsonpath="{.items[0].metadata.name}") - echo "Visit http://127.0.0.1:9100 to use your application" - kubectl port-forward --namespace prometheus $POD_NAME 9100 - ``` - - -## Requirements - -| Name | Version | -|------|---------| -| [terraform](#requirement\_terraform) | >= 0.13 | -| [aws](#requirement\_aws) | >= 5.14.0 | -| [helm](#requirement\_helm) | >= 2.11.0 | -| [kubernetes](#requirement\_kubernetes) | >= 2.23.0 | -| [null](#requirement\_null) | >= 3.2.1 | - -## Providers - -| Name | Version | -|------|---------| -| [helm](#provider\_helm) | >= 2.11.0 | -| [kubernetes](#provider\_kubernetes) | >= 2.23.0 | - -## Modules - -| Name | Source | Version | -|------|--------|---------| -| [images](#module\_images) | git@github.e.it.census.gov:terraform-modules/aws-ecr-copy-images.git/ | tf-upgrade | - -## Resources - -| Name | Type | -|------|------| -| [helm_release.prometheus](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource | -| [kubernetes_namespace.ns](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/namespace) | resource | -| [kubernetes_namespace.existing-ns](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/data-sources/namespace) | data source | - -## Inputs - -| Name | Description | Type | Default | Required | -|------|-------------|------|---------|:--------:| -| [alertmanager\_tag](#input\_alertmanager\_tag) | The image tag of the alertmanager image. | `string` | `"v0.27.0"` | no | -| [cluster\_name](#input\_cluster\_name) | The name of the cluster into which prometheus will be installed. | `string` | n/a | yes | -| [create\_namespace](#input\_create\_namespace) | Indicates whether the `namespace` needs to be created ('true') or already exists (not `true`) | `bool` | `true` | no | -| [kube\_state\_metrics\_tag](#input\_kube\_state\_metrics\_tag) | The image tag of the kube-state-metrics image. | `string` | `"v2.13.0"` | no | -| [namespace](#input\_namespace) | The namespace to install the prometheus components. Defaults to 'prometheus' | `string` | `"prometheus"` | no | -| [node\_exporter\_tag](#input\_node\_exporter\_tag) | The image tag of the node-exporter image. | `string` | `"v1.8.2"` | no | -| [profile](#input\_profile) | AWS\_PROFILE to use to apply the terraform script. | `string` | `""` | no | -| [prometheus\_chart\_version](#input\_prometheus\_chart\_version) | The version of prometheus to install into the cluster. | `string` | `"25.24.1"` | no | -| [prometheus\_config\_reloader\_tag](#input\_prometheus\_config\_reloader\_tag) | The image tag of the prometheus-config-reloader image. | `string` | `"v0.75.1"` | no | -| [prometheus\_server\_tag](#input\_prometheus\_server\_tag) | The image tag of prometheus server to install into the cluster. | `string` | `"v2.53.1"` | no | -| [pushgateway\_tag](#input\_pushgateway\_tag) | The image tag of the pushgateway image. | `string` | `"v1.9.0"` | no | -| [rwo\_storage\_class](#input\_rwo\_storage\_class) | Specify the storage class for read/write/once persistent volumes. | `string` | `"gp3-encrypted"` | no | - -## Outputs - -| Name | Description | -|------|-------------| -| [alertmanager\_headless\_internal\_endpoint](#output\_alertmanager\_headless\_internal\_endpoint) | n/a | -| [alertmanager\_internal\_endpoint](#output\_alertmanager\_internal\_endpoint) | n/a | -| [module\_name](#output\_module\_name) | The name of this module. | -| [module\_version](#output\_module\_version) | The version of this module. | -| [prometheus\_namespace](#output\_prometheus\_namespace) | n/a | -| [prometheus\_server\_internal\_endpoint](#output\_prometheus\_server\_internal\_endpoint) | n/a | -| [pushgateway\_internal\_endpoint](#output\_pushgateway\_internal\_endpoint) | n/a | - diff --git a/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-prometheus/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-prometheus/terragrunt.hcl deleted file mode 100644 index e6c54b16..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-prometheus/terragrunt.hcl +++ /dev/null @@ -1,38 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-prometheus.git?ref=${include.root.inputs.release_version}" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -dependency "eks" { - config_path = "../eks" - mock_outputs = { - cluster_name = "a-cluster-name" - } -} - -dependency "eks-dns" { - config_path = "../eks-dns" - skip_outputs = true -} - -inputs = { - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - cluster_name = dependency.eks.outputs.cluster_name - prometheus_chart_version = include.root.inputs.prometheus_chart_version - prometheus_server_tag = include.root.inputs.prometheus_server_tag - prometheus_config_reloader_tag = include.root.inputs.prometheus_config_reloader_tag - alertmanager_tag = include.root.inputs.alertmanager_tag - kube_state_metrics_tag = include.root.inputs.kube_state_metrics_tag - node_exporter_tag = include.root.inputs.node_exporter_tag - pushgateway_tag = include.root.inputs.pushgateway_tag -} diff --git a/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-tempo/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-tempo/terragrunt.hcl deleted file mode 100644 index e9ebd485..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-tempo/terragrunt.hcl +++ /dev/null @@ -1,46 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-tempo.git?ref=${include.root.inputs.release_version}" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -dependency "eks" { - config_path = "../eks" - mock_outputs = { - cluster_name = "a-cluster-name" - oidc_provider_arn = "arn:aws-us-gov:iam::111111111111:oidc-provider/oidc.eks.us-gov-east-1.amazonaws.com/id/0000000000000000AAAAAAAAAAAAAAAA" - } -} - -dependency "eks-prometheus" { - config_path = "../eks-prometheus" - mock_outputs = { - prometheus_server_internal_endpoint = { - hostname = "prometheus-server.prometheus.svc.cluster.local" - port_number = 9090 - url = "http://prometheus-server.prometheus.svc.cluster.local:9090/" - } - prometheus_namespace = "prometheus" - } -} - -inputs = { - account_id = include.root.locals.account_id - profile = include.root.locals.aws_profile - region = include.root.locals.aws_region - cluster_name = dependency.eks.outputs.cluster_name - oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn - prometheus_port = dependency.eks-prometheus.outputs.prometheus_server_internal_endpoint.port_number - prometheus_namespace = dependency.eks-prometheus.outputs.prometheus_namespace - tempo_chart_version = include.root.inputs.tempo_chart_version - tempo_tag = include.root.inputs.tempo_tag - -} diff --git a/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks/terragrunt.hcl deleted file mode 100644 index cc7c8935..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks/terragrunt.hcl +++ /dev/null @@ -1,56 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -locals { - # Set cluster/platform specific variables, or extract from the hierarchy. - account_id = include.root.inputs.aws_account_id - cluster_endpoint_public_access = include.root.inputs.cluster_endpoint_public_access - cluster_name = include.root.inputs.cluster_name - cluster_version = include.root.inputs.cluster_version - creator = include.root.inputs.creator - eks_instance_disk_size = include.root.inputs.eks_instance_disk_size - eks_ng_desired_size = include.root.inputs.eks_ng_desired_size - eks_ng_max_size = include.root.inputs.eks_ng_max_size - eks_ng_min_size = include.root.inputs.eks_ng_min_size - eks_vpc_name = include.root.inputs.vpc_name - enable_cluster_creator_admin_permissions = include.root.inputs.enable_cluster_creator_admin_permissions - environment_abbr = include.root.inputs.environment_abbr - organization = include.root.inputs.organization - profile = include.root.inputs.aws_profile - project_name = include.root.inputs.project_name - project_number = include.root.inputs.project_number - project_role = include.root.inputs.project_role - region = include.root.inputs.aws_region - tags = include.root.inputs.tags - terraform = include.root.inputs.terraform - terragrunt = include.root.inputs.terragrunt - vpc_domain_name = include.root.inputs.vpc_domain_name -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-eks.git?ref=${include.root.inputs.release_version}" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -inputs = { - aws_account_id = local.account_id - cluster_endpoint_public_access = local.cluster_endpoint_public_access - cluster_name = local.cluster_name - cluster_version = local.cluster_version - creator = local.creator - eks_instance_disk_size = local.eks_instance_disk_size - eks_ng_desired_size = local.eks_ng_desired_size - eks_ng_max_size = local.eks_ng_max_size - eks_ng_min_size = local.eks_ng_min_size - eks_vpc_name = local.eks_vpc_name - enable_cluster_creator_admin_permissions = local.enable_cluster_creator_admin_permissions - os_username = local.creator - shared_vpc_label = local.environment_abbr - tags = local.tags -} diff --git a/lab/development/us-gov-east-1/vpc/platform-test-x/cluster.hcl b/lab/development/us-gov-east-1/vpc/platform-test-x/cluster.hcl deleted file mode 100644 index 8d2831cf..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-test-x/cluster.hcl +++ /dev/null @@ -1,20 +0,0 @@ -# lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/cluster.hcl - -# Set cluster specific variables. These are automatically pulled in to configure the remote state bucket in the root -# terragrunt.hcl configuration. -locals { - cluster_endpoint_public_access = true - cluster_name = "platform-eng-eks-mcm" - creator = "matthew.c.morgan@census.gov" - eks_instance_disk_size = 100 - eks_ng_desired_size = 2 - eks_ng_max_size = 10 - eks_ng_min_size = 0 - enable_cluster_creator_admin_permissions = true - terraform = true - terragrunt = true - tags = { - "slim:schedule" = "8:00-17:00" - "cluster:size" = "min:${local.eks_ng_min_size}-max:${local.eks_ng_max_size}-desired:${local.eks_ng_desired_size}" - } -} diff --git a/lab/development/us-gov-east-1/vpc/platform-test-z/cluster.hcl b/lab/development/us-gov-east-1/vpc/platform-test-z/cluster.hcl deleted file mode 100644 index 740c1ad9..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-test-z/cluster.hcl +++ /dev/null @@ -1,21 +0,0 @@ -# lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/cluster.hcl - -# Set cluster specific variables. These are automatically pulled in to configure the remote state bucket in the root -# terragrunt.hcl configuration. -locals { - cluster_endpoint_public_access = true - cluster_name = "platform-test-z" - created_reason = "Terragrunt Development for CICD Delivered EKS Platform" - creator = "luther.coleman.mcginty@census.gov" - eks_instance_disk_size = 100 - eks_ng_desired_size = 3 - eks_ng_max_size = 10 - eks_ng_min_size = 1 - enable_cluster_creator_admin_permissions = true - terraform = true - terragrunt = true - tags = { - "slim:schedule" = "8:00-17:00" - "cluster:size" = "min:${local.eks_ng_min_size}-max:${local.eks_ng_max_size}-desired:${local.eks_ng_desired_size}" - } -} diff --git a/lab/development/us-gov-east-1/vpc/platform-test-z/eks-alloy-disable/terragrunt.hcl.disable b/lab/development/us-gov-east-1/vpc/platform-test-z/eks-alloy-disable/terragrunt.hcl.disable deleted file mode 100644 index 97aa66fd..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-test-z/eks-alloy-disable/terragrunt.hcl.disable +++ /dev/null @@ -1,27 +0,0 @@ -include "root" { - path = find_in_parent_folders() - expose = true -} - -terraform { - # source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-alloy.git?ref=main" - source = "../../../../../../../tfmod-alloy" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -dependency "eks" { - config_path = "../eks" - mock_outputs = { - cluster_name = "a-cluster-name" - } -} - -inputs = { - profile = include.root.inputs.aws_profile - cluster_name = dependency.eks.outputs.cluster_name - region = include.root.inputs.aws_region - cluster_domain = dependency.eks.inputs.vpc_domain_name -} diff --git a/lab/development/us-gov-east-1/vpc/platform-test-z/eks-cert-manager/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-test-z/eks-cert-manager/terragrunt.hcl deleted file mode 100644 index 2522e07a..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-test-z/eks-cert-manager/terragrunt.hcl +++ /dev/null @@ -1,57 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - # source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-cert-mgr.git?ref=${include.root.inputs.release_version}" - source = "../../../../../../../tfmod-cert-mgr" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -dependencies { - paths = [ - "../eks", - "../eks-config", - "../eks-karpenter" - ] -} - -dependency "eks" { - config_path = "../eks" - mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] - - mock_outputs = { - cluster_name = include.root.inputs.cluster_name - oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" - cluster_endpoint = "https://mock-endpoint.eks.amazonaws.com" - cluster_version = include.root.inputs.cluster_version - } -} - -inputs = { - # AWS Configuration - account_id = include.root.inputs.aws_account_id - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - - # Cluster Configuration - cluster_name = dependency.eks.outputs.cluster_name - cluster_mailing_list = include.root.inputs.cluster_mailing_list - oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn - - # Cert Manager Configuration - cert_manager_helm_chart = include.root.inputs.cert_manager_helm_chart - cluster_issuer_name = include.root.inputs.cluster_issuer_name - - # Version Tags - cert_manager_cainjector_tag = include.root.inputs.cert_manager_cainjector_tag - cert_manager_controller_tag = include.root.inputs.cert_manager_controller_tag - cert_manager_startupapicheck_tag = include.root.inputs.cert_manager_startupapicheck_tag - cert_manager_webhook_tag = include.root.inputs.cert_manager_webhook_tag - namespace = include.root.inputs.namespaces["cert-manager"] -} diff --git a/lab/development/us-gov-east-1/vpc/platform-test-z/eks-config/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-test-z/eks-config/terragrunt.hcl deleted file mode 100644 index eefbf272..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-test-z/eks-config/terragrunt.hcl +++ /dev/null @@ -1,54 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -dependencies { - paths = [ - "../eks", - # "../eks-karpenter" - ] -} - -terraform { - # source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-eks-configuration.git?ref=${include.root.inputs.release_version}" - source = "../../../../../../../tfmod-eks-configuration" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -dependency "eks" { - config_path = "../eks" - mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] - - mock_outputs = { - cluster_name = "mock-cluster" - cluster_endpoint = "https://mock-endpoint.eks.amazonaws.com" - cluster_certificate_authority_data = [{ data = "mock-cert-data" }] - eks_managed_node_groups_autoscaling_group_names = ["mock-asg-name"] - oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" - security_group_all_worker_mgmt_id = "sg-mock" - subnets = ["subnet-mock1", "subnet-mock2"] - vpc_id = "vpc-mock" - } -} - -inputs = { - # AWS Configuration - account_id = include.root.inputs.aws_account_id - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - - # Core Cluster Configuration - cluster_name = dependency.eks.outputs.cluster_name - eks_managed_node_groups_autoscaling_group_names = dependency.eks.outputs.eks_managed_node_groups_autoscaling_group_names - oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn - security_group_all_worker_mgmt_id = dependency.eks.outputs.security_group_all_worker_mgmt_id - subnets = dependency.eks.outputs.subnets - vpc_id = dependency.eks.outputs.vpc_id - operators_ns = include.root.inputs.operator_namespace - telemetry_ns = include.root.inputs.telemetry_namespace -} diff --git a/lab/development/us-gov-east-1/vpc/platform-test-z/eks-dns/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-test-z/eks-dns/terragrunt.hcl deleted file mode 100644 index 83eb25fb..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-test-z/eks-dns/terragrunt.hcl +++ /dev/null @@ -1,61 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - # source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-eks-dns.git?ref=${include.root.inputs.release_version}" - source = "../../../../../../../tfmod-eks-dns" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -dependency "eks" { - config_path = "../eks" - mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] - mock_outputs = { - cluster_name = include.root.inputs.cluster_name - subnets = ["subnet-mock1", "subnet-mock2", "subnet-mock3"] - } -} - -dependency "eks-istio" { - config_path = "../eks-istio" - mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] - mock_outputs = { - istio_ingress_lb = { - dns_name = "mock-${include.root.inputs.cluster_name}.elb.amazonaws.com" - zone_id = "MOCKZONEID" - } - } -} - -dependencies { - paths = [ - "../eks-config", - "../eks-istio", - "../eks-karpenter" - ] -} - -inputs = { - # AWS Configuration - account_id = include.root.inputs.aws_account_id - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - - # Cluster Configuration - cluster_name = include.root.inputs.cluster_name - - # Network Configuration - istio_ingress_lb = dependency.eks-istio.outputs.istio_ingress_lb - route53_endpoints = include.root.inputs.route53_endpoints - vpc_domain_name = include.root.inputs.vpc_domain_name - vpc_name = include.root.inputs.vpc_name - - # Additional Configuration - tags = include.root.inputs.tags -} diff --git a/lab/development/us-gov-east-1/vpc/platform-test-z/eks-grafana/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-test-z/eks-grafana/terragrunt.hcl deleted file mode 100644 index dda8453f..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-test-z/eks-grafana/terragrunt.hcl +++ /dev/null @@ -1,81 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - # source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-grafana.git?ref=${include.root.inputs.release_version}" - source = "../../../../../../../tfmod-grafana" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -dependencies { - paths = [ - "../eks", - "../eks-loki", - "../eks-prometheus", - "../eks-tempo" - ] -} - -dependency "eks" { - config_path = "../eks" - mock_outputs = { - cluster_name = "a-cluster-name" - } -} - -dependency "eks-loki" { - config_path = "../eks-loki" - mock_outputs = { - rwo_storage_class = "gp3-encrypted" - gateway_internal_endpoint = { - hostname = "loki-gateway.telemetry.svc.cluster.local" - portNumber = "80" - url = "http://loki-gateway.telemetry.svc.cluster.local:80/" - } - } -} - -dependency "eks-prometheus" { - config_path = "../eks-prometheus" - mock_outputs = { - prometheus_server_internal_endpoint = { - hostname = "prometheus-server.prometheus.svc.cluster.local" - port_number = 9090 - url = "http://prometheus-server.prometheus.svc.cluster.local:9090/" - } - } -} - -dependency "eks-tempo" { - config_path = "../eks-tempo" - mock_outputs = { - tempo_internal_endpoint = { - hostname = "tempo.telemetry.svc.cluster.local" - port_number = 4317 - url = "http://tempo.telemetry.svc.cluster.local:4317/" - } - } -} - -inputs = { - cluster_domain = dependency.eks.inputs.vpc_domain_name - cluster_name = dependency.eks.outputs.cluster_name - download_dashboards_image_tag = include.root.inputs.download_dashboards_image_tag - grafana_chart_version = include.root.inputs.grafana_chart_version - grafana_tag = include.root.inputs.grafana_tag - init_chown_data_image_tag = include.root.inputs.init_chown_data_image_tag - profile = include.root.inputs.aws_profile - public_hostname = include.root.inputs.grafana_hostname - region = include.root.inputs.aws_region - rwo_storage_class = dependency.eks-loki.outputs.rwo_storage_class - loki_endpoint = dependency.eks-loki.outputs.gateway_internal_endpoint.url - prometheus_endpoint = dependency.eks-prometheus.outputs.prometheus_server_internal_endpoint.url - tempo_endpoint = dependency.eks-tempo.outputs.tempo_internal_endpoint.url - namespace = include.root.inputs.namespaces["grafana"] -} diff --git a/lab/development/us-gov-east-1/vpc/platform-test-z/eks-istio/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-test-z/eks-istio/terragrunt.hcl deleted file mode 100644 index dff8a76c..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-test-z/eks-istio/terragrunt.hcl +++ /dev/null @@ -1,45 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - # source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-istio.git?ref=${include.root.inputs.release_version}" - source = "../../../../../../../tfmod-istio" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -dependencies { - paths = [ - "../eks", - "../eks-config" - ] -} - -dependency "eks" { - config_path = "../eks" - mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] - mock_outputs = { - cluster_name = include.root.inputs.cluster_name - oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" - } -} - -inputs = { - # AWS Configuration - account_id = include.root.inputs.aws_account_id - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - - # Cluster Configuration - cluster_name = dependency.eks.outputs.cluster_name - oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn - - # Istio Configuration - namespace = include.root.inputs.namespaces["istio"] - istio_version = include.root.inputs.istio_version -} diff --git a/lab/development/us-gov-east-1/vpc/platform-test-z/eks-k8s-dashboard/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-test-z/eks-k8s-dashboard/terragrunt.hcl deleted file mode 100644 index 7bccdc3f..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-test-z/eks-k8s-dashboard/terragrunt.hcl +++ /dev/null @@ -1,46 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - # source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-k8s-dashboard.git?ref=${include.root.inputs.release_version}" - source = "../../../../../../../tfmod-k8s-dashboard" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -dependencies { - paths = [ - "../eks", - "../eks-config", - "../eks-dns" - ] -} - -dependency "eks" { - config_path = "../eks" - mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] - mock_outputs = { - cluster_name = include.root.inputs.cluster_name - oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" - } -} - -inputs = { - # AWS Configuration - account_id = include.root.inputs.aws_account_id - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - - # Cluster Configuration - cluster_domain = include.root.inputs.vpc_domain_name - cluster_name = dependency.eks.outputs.cluster_name - - # Dashboard Configuration - k8s_dashboard_version = include.root.inputs.k8s_dashboard_version - namespace = include.root.inputs.namespaces["k8s-dashboard"] -} \ No newline at end of file diff --git a/lab/development/us-gov-east-1/vpc/platform-test-z/eks-karpenter/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-test-z/eks-karpenter/terragrunt.hcl deleted file mode 100644 index a713f4d9..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-test-z/eks-karpenter/terragrunt.hcl +++ /dev/null @@ -1,49 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - # source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-karpenter.git?ref=${include.root.inputs.release_version}" - source = "../../../../../../../tfmod-karpenter" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} -dependencies { - paths = ["../eks"] -} - -dependency "eks" { - config_path = "../eks" - mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] - - mock_outputs = { - cluster_name = "mock-cluster" - cluster_endpoint = "https://mock-endpoint.eks.amazonaws.com" - oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" - node_group_name = "mock-node-group" - vpc_id = "vpc-mock" - subnets = ["subnet-mock1", "subnet-mock2"] - } -} - -inputs = { - # AWS Configuration - account_id = include.root.inputs.aws_account_id - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - - # Cluster Configuration - cluster_endpoint = dependency.eks.outputs.cluster_endpoint - cluster_name = dependency.eks.outputs.cluster_name - oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn - - # Karpenter Configuration - karpenter_tag = include.root.inputs.karpenter_tag - karpenter_helm_chart = include.root.inputs.karpenter_helm_chart - karpenter_node_group_name = dependency.eks.outputs.node_group_name - namespace = include.root.inputs.namespaces["karpenter"] -} diff --git a/lab/development/us-gov-east-1/vpc/platform-test-z/eks-kiali/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-test-z/eks-kiali/terragrunt.hcl deleted file mode 100644 index d0494ec1..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-test-z/eks-kiali/terragrunt.hcl +++ /dev/null @@ -1,91 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - # source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-kiali.git?ref=mcmCluster" - source = "../../../../../../../tfmod-kiali" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -dependency "eks" { - config_path = "../eks" - mock_outputs = { - cluster_name = "a-cluster-name" - } -} -dependency "eks-cert-manager" { - config_path = "../eks-cert-manager" - mock_outputs = { - cluster_issuer_name = "acmpca-clusterissuer" - } -} -dependency "eks-prometheus" { - config_path = "../eks-prometheus" - mock_outputs = { - prometheus_server_internal_endpoint = { - hostname = "prometheus-server.prometheus.svc.cluster.local" - port_number = 9090 - url = "http://prometheus-server.prometheus.svc.cluster.local:9090/" - } - } -} -dependency "eks-tempo" { - config_path = "../eks-tempo" - mock_outputs = { - tempo_internal_endpoint = { - hostname = "tempo.tempo.svc.cluster.local" - port_number = 3100 - url = "http://tempo.tempo.svc.cluster.local:3100/" - } - } -} -dependency "eks-grafana" { - config_path = "../eks-grafana" - mock_outputs = { - internal_endpoint = { - hostname = "grafana.grafana.svc.cluster.local" - port_number = "80" - url = "https://grafana.grafana.svc.cluster.local:80/" - } - namespace = "grafana" - public_endpoint = "https://grafana.dev.lab.csp2.census.gov:80/" - secret_name = "grafana" - tempo_datasource_id = "tempo" - } -} - -inputs = { - profile = include.root.inputs.aws_profile - cluster_domain = dependency.eks.inputs.vpc_domain_name - cluster_name = dependency.eks.outputs.cluster_name - certificate_issuer = dependency.eks-cert-manager.outputs.cluster_issuer_name - - kiali_application_version = include.root.inputs.kiali_application_version - - namespace = include.root.inputs.namespaces["kiali"] - istio_namespace = include.root.inputs.namespaces["istio"] - - prometheus_internal_url = dependency.eks-prometheus.outputs.prometheus_server_internal_endpoint.url - grafana_namespace = dependency.eks-grafana.outputs.namespace - grafana_secret_name = dependency.eks-grafana.outputs.secret_name - grafana_internal_url = dependency.eks-grafana.outputs.internal_endpoint.url - grafana_public_url = dependency.eks-grafana.outputs.public_endpoint - tempo_datasource_id = dependency.eks-grafana.outputs.tempo_datasource_id - tempo_internal_url = dependency.eks-tempo.outputs.tempo_internal_endpoint.url - - - - # client_id = var.sso_client_id - # client_secret = var.sso_client_secret - # keycloak_public_url = var.keycloak_public_url - # gogatekeeper_chart_version = var.gogatekeeper_chart_version - # gogatekeeper_registry = var.gogatekeeper_registry - # gogatekeeper_repository = var.gogatekeeper_repository - # gogatekeeper_tag = var.gogatekeeper_tag -} diff --git a/lab/development/us-gov-east-1/vpc/platform-test-z/eks-loki/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-test-z/eks-loki/terragrunt.hcl deleted file mode 100644 index 4c4de2fd..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-test-z/eks-loki/terragrunt.hcl +++ /dev/null @@ -1,48 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - # source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-loki.git?ref=${include.root.inputs.release_version}" - source = "../../../../../../../tfmod-loki-x" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -dependency "eks" { - config_path = "../eks" - mock_outputs = { - cluster_name = "a-cluster-name" - oidc_provider_arn = "arn:aws-us-gov:iam::111111111111:oidc-provider/oidc.eks.us-gov-east-1.amazonaws.com/id/0000000000000000AAAAAAAAAAAAAAAA" - } -} - -dependency "eks-istio" { - config_path = "../eks-istio" - skip_outputs = true -} - -# dependency "eks-prometheus" { -# config_path = "../eks-prometheus" -# skip_outputs = true -# } - -inputs = { - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - cluster_name = dependency.eks.outputs.cluster_name - oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn - loki_chart_version = include.root.inputs.loki_chart_version - loki_tag = include.root.inputs.loki_tag - canary_tag = include.root.inputs.canary_tag - enterprise_logs_provisioner_tag = include.root.inputs.enterprise_logs_provisioner_tag - gateway_tag = include.root.inputs.gateway_tag - memcached_tag = include.root.inputs.memcached_tag - exporter_tag = include.root.inputs.exporter_tag - sidecar_tag = include.root.inputs.sidecar_tag - namespace = include.root.inputs.namespaces["loki"] -} diff --git a/lab/development/us-gov-east-1/vpc/platform-test-z/eks-metrics-server/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-test-z/eks-metrics-server/terragrunt.hcl deleted file mode 100644 index 06817cc0..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-test-z/eks-metrics-server/terragrunt.hcl +++ /dev/null @@ -1,44 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -dependencies { - paths = [ - "../eks", - "../eks-config" - ] -} - -terraform { - # source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-metrics-server.git?ref=${include.root.inputs.release_version}" - source = "../../../../../../../tfmod-metrics-server" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -dependency "eks" { - config_path = "../eks" - mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] - mock_outputs = { - cluster_name = "mock-cluster" - } -} - -inputs = { - # AWS Configuration - account_id = include.root.inputs.aws_account_id - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - - # Cluster Configuration - cluster_name = dependency.eks.outputs.cluster_name - - # Metrics Server Configuration - metrics_server_helm_chart = include.root.inputs.metrics_server_helm_chart - metrics_server_tag = include.root.inputs.metrics_server_tag - namespace = include.root.inputs.namespaces["metrics-server"] -} diff --git a/lab/development/us-gov-east-1/vpc/platform-test-z/eks-open-telemetry/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-test-z/eks-open-telemetry/terragrunt.hcl deleted file mode 100644 index 2b4ce337..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-test-z/eks-open-telemetry/terragrunt.hcl +++ /dev/null @@ -1,61 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - # source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-open-telemetry.git?ref=main" - source = "../../../../../../../tfmod-open-telemetry" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -dependencies { - paths = [ - "../eks", - "../eks-loki", - "../eks-prometheus", - "../eks-tempo" - ] -} - -dependency "eks" { - config_path = "../eks" - mock_outputs = { - cluster_name = "a-cluster-name" - } -} - -dependency "eks-loki" { - config_path = "../eks-loki" - mock_outputs = { - gateway_internal_endpoint = { - hostname = "loki-gateway.telemetry.svc.cluster.local" - portNumber = "80" - url = "http://loki-gateway.telemetry.svc.cluster.local:80/" - } - } -} - -dependency "eks-tempo" { - config_path = "../eks-tempo" - mock_outputs = { - tempo_otlp_endpoint = { - hostname = "tempo.telemetry.svc.cluster.local" - portNumber = 4317 - url = "http://tempo.telemetry.svc.cluster.local:4317/" - } - } -} - -inputs = { - profile = include.root.inputs.aws_profile - cluster_name = dependency.eks.outputs.cluster_name - region = include.root.inputs.aws_region - namespace = include.root.inputs.namespaces["otel"] - loki_endpoint = dependency.eks-loki.outputs.gateway_internal_endpoint.url - tempo_endpoint = dependency.eks-tempo.outputs.tempo_otlp_endpoint.url -} diff --git a/lab/development/us-gov-east-1/vpc/platform-test-z/eks-prometheus/README.md b/lab/development/us-gov-east-1/vpc/platform-test-z/eks-prometheus/README.md deleted file mode 100644 index bbbffb2a..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-test-z/eks-prometheus/README.md +++ /dev/null @@ -1,198 +0,0 @@ -## eks-prometheus -This module deploys EKS kubeenetes prometheus inside existing EKS cluster. Prometheus is an open-source systems monitoring and alerting tool. -This module consisits of 4 components. It creates prometheus namespace and copies image repositories for the following components from quay.io into local account ECR repository. It deploys these components using helm charts using the configured ECR repositories. - 1. prometheus-alert-manager - 2. prometheus-node-exporter - 3. prometheus-pushgateway - 4. prometheus-server - -### Dependencies -This module is dependent on EKS module (eks). The cluster should exist already for this module to work. - -### Inputs - cluster_name - profile - prometheus_chart_version - prometheus_server_tag - prometheus_config_reloader_tag - alertmanager_tag - kube_state_metrics_tag - node_exporter_tag - pushgateway_tag - rwo_storage_class - -### Outputs - alertmanager_internal_endpoint - alertmanager_headless_internal_endpoint - pushgateway_internal_endpoint - prometheus_server_internal_endpoint - -### Issues observed/fixed -1. The rwo_storage_class value had to be updated from "gp3" to "gp3-encrypted" -2. The node_exporter_tag value had to be updated from "1.6.1" to "v1.8.1" -3. The kube_state_metrics_tag value had to be updated from "2.10.0" to "v2.6.0" -4. The alertmanager_tag value had to be updated from -5. The helm chart set config for the ecr image had to be split into 2 components, one for registry and other for repository as an example mentioned below: - - ``` - set { - name = "kube-state-metrics.image.registry" - value = module.images.images[local.ksm_key].dest_registry - } - set { - name = "kube-state-metrics.image.repository" - value = module.images.images[local.ksm_key].dest_repository - } - ``` - -6. In some other cases the image ecr repository had to be split by the colon separatory (:) - - ``` - set { - name = "alertmanager.configmapReload.image.repository" - value = split(":", module.images.images[local.prom_config_reload_key].dest_full_path)[0] - } - ``` - -### Chart Notes - 1. Get the application URL by running these commands: - - ```bash - export POD_NAME=$(kubectl get pods --namespace prometheus -l "app.kubernetes.io/name=prometheus-pushgateway,app.kubernetes.io/instance=prometheus" -o jsonpath="{.items[0].metadata.name}") - kubectl port-forward $POD_NAME 9091 - echo "Visit http://127.0.0.1:9091 to use your application" - ``` - - The Prometheus server can be accessed via port 80 on the following DNS name from within your cluster: - prometheus-server.prometheus.svc.cluster.local - - - Get the Prometheus server URL by running these commands in the same shell: - - ```bash - export POD_NAME=$(kubectl get pods --namespace prometheus -l "app.kubernetes.io/name=prometheus,app.kubernetes.io/instance=prometheus" -o jsonpath="{.items[0].metadata.name}") - kubectl --namespace prometheus port-forward $POD_NAME 9090 - ``` - - The Prometheus alertmanager can be accessed via port 9093 on the following DNS name from within your cluster: - `prometheus-alertmanager.prometheus.svc.cluster.local` - - - Get the Alertmanager URL by running these commands in the same shell: - - ```bash - export POD_NAME=$(kubectl get pods --namespace prometheus -l "app.kubernetes.io/name=alertmanager,app.kubernetes.io/instance=prometheus" -o jsonpath="{.items[0].metadata.name}") - kubectl --namespace prometheus port-forward $POD_NAME 9093 - ``` - - ################################################################################# - ###### WARNING: Pod Security Policy has been disabled by default since ##### - ###### it deprecated after k8s 1.25+. use ##### - ###### (index .Values "prometheus-node-exporter" "rbac" ##### - ###### "pspEnabled") with (index .Values ##### - ###### "prometheus-node-exporter" "rbac" "pspAnnotations") ##### - ###### in case you still need it. ##### - ################################################################################# - - - The Prometheus PushGateway can be accessed via port 9091 on the following DNS name from within your cluster: - `prometheus-prometheus-pushgateway.prometheus.svc.cluster.local` - - - Get the PushGateway URL by running these commands in the same shell: - - ```bash - export POD_NAME=$(kubectl get pods --namespace prometheus -l "app=prometheus-pushgateway,component=pushgateway" -o jsonpath="{.items[0].metadata.name}") - kubectl --namespace prometheus port-forward $POD_NAME 9091 - ``` - - For more information on running Prometheus, visit: - https://prometheus.io/ - - kube-state-metrics is a simple service that listens to the Kubernetes API server and generates metrics about the state of the objects. - The exposed metrics can be found here: - https://github.com/kubernetes/kube-state-metrics/blob/master/docs/README.md#exposed-metrics - - The metrics are exported on the HTTP endpoint /metrics on the listening port. - In your case, `prometheus-kube-state-metrics.prometheus.svc.cluster.local:8080/metrics` - - They are served either as plaintext or protobuf depending on the Accept header. - They are designed to be consumed either by Prometheus itself or by a scraper that is compatible with scraping a Prometheus client endpoint. - - 1. Get the application URL by running these commands: - - ```bash - export POD_NAME=$(kubectl get pods --namespace prometheus -l "app.kubernetes.io/name=alertmanager,app.kubernetes.io/instance=prometheus" -o jsonpath="{.items[0].metadata.name}") - echo "Visit http://127.0.0.1:9093 to use your application" - kubectl --namespace prometheus port-forward $POD_NAME 9093:80 - ``` - - 1. Get the application URL by running these commands: - - ```bash - export POD_NAME=$(kubectl get pods --namespace prometheus -l "app.kubernetes.io/name=prometheus-node-exporter,app.kubernetes.io/instance=prometheus" -o jsonpath="{.items[0].metadata.name}") - echo "Visit http://127.0.0.1:9100 to use your application" - kubectl port-forward --namespace prometheus $POD_NAME 9100 - ``` - - -## Requirements - -| Name | Version | -|------|---------| -| [terraform](#requirement\_terraform) | >= 0.13 | -| [aws](#requirement\_aws) | >= 5.14.0 | -| [helm](#requirement\_helm) | >= 2.11.0 | -| [kubernetes](#requirement\_kubernetes) | >= 2.23.0 | -| [null](#requirement\_null) | >= 3.2.1 | - -## Providers - -| Name | Version | -|------|---------| -| [helm](#provider\_helm) | >= 2.11.0 | -| [kubernetes](#provider\_kubernetes) | >= 2.23.0 | - -## Modules - -| Name | Source | Version | -|------|--------|---------| -| [images](#module\_images) | git@github.e.it.census.gov:terraform-modules/aws-ecr-copy-images.git/ | tf-upgrade | - -## Resources - -| Name | Type | -|------|------| -| [helm_release.prometheus](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource | -| [kubernetes_namespace.ns](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/namespace) | resource | -| [kubernetes_namespace.existing-ns](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/data-sources/namespace) | data source | - -## Inputs - -| Name | Description | Type | Default | Required | -|------|-------------|------|---------|:--------:| -| [alertmanager\_tag](#input\_alertmanager\_tag) | The image tag of the alertmanager image. | `string` | `"v0.27.0"` | no | -| [cluster\_name](#input\_cluster\_name) | The name of the cluster into which prometheus will be installed. | `string` | n/a | yes | -| [create\_namespace](#input\_create\_namespace) | Indicates whether the `namespace` needs to be created ('true') or already exists (not `true`) | `bool` | `true` | no | -| [kube\_state\_metrics\_tag](#input\_kube\_state\_metrics\_tag) | The image tag of the kube-state-metrics image. | `string` | `"v2.13.0"` | no | -| [namespace](#input\_namespace) | The namespace to install the prometheus components. Defaults to 'prometheus' | `string` | `"prometheus"` | no | -| [node\_exporter\_tag](#input\_node\_exporter\_tag) | The image tag of the node-exporter image. | `string` | `"v1.8.2"` | no | -| [profile](#input\_profile) | AWS\_PROFILE to use to apply the terraform script. | `string` | `""` | no | -| [prometheus\_chart\_version](#input\_prometheus\_chart\_version) | The version of prometheus to install into the cluster. | `string` | `"25.24.1"` | no | -| [prometheus\_config\_reloader\_tag](#input\_prometheus\_config\_reloader\_tag) | The image tag of the prometheus-config-reloader image. | `string` | `"v0.75.1"` | no | -| [prometheus\_server\_tag](#input\_prometheus\_server\_tag) | The image tag of prometheus server to install into the cluster. | `string` | `"v2.53.1"` | no | -| [pushgateway\_tag](#input\_pushgateway\_tag) | The image tag of the pushgateway image. | `string` | `"v1.9.0"` | no | -| [rwo\_storage\_class](#input\_rwo\_storage\_class) | Specify the storage class for read/write/once persistent volumes. | `string` | `"gp3-encrypted"` | no | - -## Outputs - -| Name | Description | -|------|-------------| -| [alertmanager\_headless\_internal\_endpoint](#output\_alertmanager\_headless\_internal\_endpoint) | n/a | -| [alertmanager\_internal\_endpoint](#output\_alertmanager\_internal\_endpoint) | n/a | -| [module\_name](#output\_module\_name) | The name of this module. | -| [module\_version](#output\_module\_version) | The version of this module. | -| [prometheus\_namespace](#output\_prometheus\_namespace) | n/a | -| [prometheus\_server\_internal\_endpoint](#output\_prometheus\_server\_internal\_endpoint) | n/a | -| [pushgateway\_internal\_endpoint](#output\_pushgateway\_internal\_endpoint) | n/a | - diff --git a/lab/development/us-gov-east-1/vpc/platform-test-z/eks-prometheus/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-test-z/eks-prometheus/terragrunt.hcl deleted file mode 100644 index 030dd33c..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-test-z/eks-prometheus/terragrunt.hcl +++ /dev/null @@ -1,40 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - # source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-prometheus.git?ref=${include.root.inputs.release_version}" - source = "../../../../../../../tfmod-prometheus" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -dependency "eks" { - config_path = "../eks" - mock_outputs = { - cluster_name = "a-cluster-name" - } -} - -dependency "eks-dns" { - config_path = "../eks-dns" - skip_outputs = true -} - -inputs = { - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - cluster_name = dependency.eks.outputs.cluster_name - prometheus_chart_version = include.root.inputs.prometheus_chart_version - prometheus_server_tag = include.root.inputs.prometheus_server_tag - prometheus_config_reloader_tag = include.root.inputs.prometheus_config_reloader_tag - alertmanager_tag = include.root.inputs.alertmanager_tag - kube_state_metrics_tag = include.root.inputs.kube_state_metrics_tag - node_exporter_tag = include.root.inputs.node_exporter_tag - pushgateway_tag = include.root.inputs.pushgateway_tag - namespace = include.root.inputs.namespaces["prometheus"] -} diff --git a/lab/development/us-gov-east-1/vpc/platform-test-z/eks-tempo/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-test-z/eks-tempo/terragrunt.hcl deleted file mode 100644 index d14c8a1e..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-test-z/eks-tempo/terragrunt.hcl +++ /dev/null @@ -1,47 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - # source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-tempo.git?ref=${include.root.inputs.release_version}" - source = "../../../../../../../tfmod-tempo" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -dependency "eks" { - config_path = "../eks" - mock_outputs = { - cluster_name = "a-cluster-name" - oidc_provider_arn = "arn:aws-us-gov:iam::111111111111:oidc-provider/oidc.eks.us-gov-east-1.amazonaws.com/id/0000000000000000AAAAAAAAAAAAAAAA" - } -} - -dependency "eks-prometheus" { - config_path = "../eks-prometheus" - mock_outputs = { - prometheus_server_internal_endpoint = { - hostname = "prometheus-server.prometheus.svc.cluster.local" - port_number = 9090 - url = "http://prometheus-server.prometheus.svc.cluster.local:9090/" - } - prometheus_namespace = "prometheus" - } -} - -inputs = { - account_id = include.root.locals.account_id - profile = include.root.locals.aws_profile - region = include.root.locals.aws_region - cluster_name = dependency.eks.outputs.cluster_name - oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn - prometheus_port = dependency.eks-prometheus.outputs.prometheus_server_internal_endpoint.port_number - prometheus_namespace = dependency.eks-prometheus.outputs.prometheus_namespace - tempo_chart_version = include.root.inputs.tempo_chart_version - tempo_tag = include.root.inputs.tempo_tag - namespace = include.root.inputs.namespaces["tempo"] -} diff --git a/lab/development/us-gov-east-1/vpc/platform-test-z/eks/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-test-z/eks/terragrunt.hcl deleted file mode 100644 index c77be43b..00000000 --- a/lab/development/us-gov-east-1/vpc/platform-test-z/eks/terragrunt.hcl +++ /dev/null @@ -1,28 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - # source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-eks.git?ref=${include.root.inputs.release_version}" - source = "../../../../../../../tfmod-eks" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -inputs = { - # AWS Configuration - account_id = include.root.inputs.aws_account_id - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - - # Core Cluster Configuration - cluster_name = include.root.inputs.cluster_name - cluster_version = include.root.inputs.cluster_version - - # Additional Configuration - tags = include.root.inputs.tags -} diff --git a/lab/root.hcl b/lab/root.hcl index 10706ffd..b0666374 100644 --- a/lab/root.hcl +++ b/lab/root.hcl @@ -15,6 +15,9 @@ locals { # Automatically load _envcommon, cross account and environment common variables common_vars = read_terragrunt_config(find_in_parent_folders("./_envcommon/common-variables.hcl")) + # Automatically load naming prefixes + prefix_vars = read_terragrunt_config(find_in_parent_folders("./_envcommon/prefixes.hcl")) + # Automatically load region-level variables region_vars = read_terragrunt_config(find_in_parent_folders("region.hcl")) @@ -24,21 +27,36 @@ locals { # Automatically load vpc-level variables vpc_vars = read_terragrunt_config(find_in_parent_folders("vpc.hcl")) + # Add any other locals you want to expose + # only expose things not already included via local.xxx_vars.locals.* + root_locals_for_inputs = { + is_module_enabled = local.is_module_enabled + module_name = local.module_name + } + # Extract the variables we need for easy access - account_id = local.account_vars.locals.aws_account_id - aws_profile = local.account_vars.locals.aws_profile - aws_region = local.region_vars.locals.aws_region - cluster_name = local.cluster_vars.locals.cluster_name - environment_abbr = local.account_vars.locals.environment_abbr - organization = local.common_vars.locals.organization - project_name = local.common_vars.locals.project_name - project_number = local.common_vars.locals.project_number - project_role = local.common_vars.locals.project_role + account_id = local.account_vars.locals.aws_account_id + account_name = local.account_vars.locals.account_name + aws_profile = local.account_vars.locals.aws_profile + aws_region = local.region_vars.locals.aws_region + cluster_name = local.cluster_vars.locals.cluster_name + eecr_info = local.common_vars.locals.eecr_info + environment_abbr = local.account_vars.locals.environment_abbr + finops_project_name = local.cluster_vars.locals.finops_project_name + finops_project_number = local.cluster_vars.locals.finops_project_number + finops_project_role = local.cluster_vars.locals.finops_project_role + is_eks_module = local.module_name == "eks" + prefixes = local.prefix_vars.locals.prefixes + is_module_enabled = merge( + { for module in local.versions.locals.core_modules : module => true }, + local.versions.locals.enabled_modules, + local.module_overrides + ) + module_name = basename(get_original_terragrunt_dir()) + module_overrides = local.cluster_vars.locals.module_enablement_overrides + organization = local.cluster_vars.locals.organization state_bucket_prefix = local.common_vars.locals.state_bucket_prefix state_table_name = local.common_vars.locals.state_table_name - # Check if current module is the EKS module - module_name = basename(get_original_terragrunt_dir()) - is_eks_module = local.module_name == "eks" } # Only generate providers for non-EKS modules @@ -82,7 +100,7 @@ generate "helm_provider" { if_exists = "overwrite_terragrunt" contents = local.is_eks_module ? "" : <<-EOF provider "helm" { - kubernetes { + kubernetes = { host = local.cluster_exists ? data.aws_eks_cluster.this[0].endpoint : "https://dummy" cluster_ca_certificate = local.cluster_exists ? base64decode(data.aws_eks_cluster.this[0].certificate_authority[0].data) : null token = local.cluster_exists ? data.aws_eks_cluster_auth.this[0].token : "dummy" @@ -94,24 +112,19 @@ generate "helm_provider" { # Configure Terragrunt to automatically store tfstate files in an S3 bucket remote_state { - backend = "s3" + disable_init = tobool(get_env("TG_DISABLE_INIT", "false")) + backend = "s3" generate = { path = "remote_state.backend.tf" if_exists = "overwrite_terragrunt" } config = { - bucket = "${local.state_bucket_prefix}-${local.account_id}" - dynamodb_table = "${local.state_table_name}" - key = "${trimprefix(replace(run_cmd("realpath", get_original_terragrunt_dir()), dirname(get_repo_root()), ""), "/")}/terraform.tfstate" - profile = "${local.aws_profile}" - region = "${local.aws_region}" - disable_bucket_update = true - skip_bucket_enforced_tls = true # use only if you need to access the S3 bucket without TLS being enforced - skip_bucket_public_access_blocking = true - skip_bucket_root_access = true # use only if the AWS account root user should not have access to the remote state bucket for some reason - skip_bucket_ssencryption = true # use only if non-encrypted OpenTofu/Terraform State is required and/or the object store does not support server-side encryption - skip_bucket_versioning = false # use only if the object store does not support versioning - enable_lock_table_ssencryption = false # use only if non-encrypted DynamoDB Lock Table for the OpenTofu/Terraform State is required and/or the NoSQL database service does not support server-side encryption + bucket = "${local.state_bucket_prefix}-${local.account_id}" + use_lockfile = true + key = "${trimprefix(replace(run_cmd("realpath", get_original_terragrunt_dir()), dirname(get_repo_root()), ""), "/")}/terraform.tfstate" + profile = "${local.aws_profile}" + region = "${local.aws_region}" + disable_bucket_update = true } } @@ -128,9 +141,9 @@ generate "aws-provider" { cluster_name = "${local.cluster_name}" "boc:module_name" = "${local.module_name}" environment = "${local.environment_abbr}" - finops_project_name = "${local.project_name}" - finops_project_number = "${local.project_number}" - finops_project_role = "${local.project_role}" + finops_project_name = "${local.finops_project_name}" + finops_project_number = "${local.finops_project_number}" + finops_project_role = "${local.finops_project_role}" organization = "${local.organization}" } } @@ -152,7 +165,9 @@ inputs = merge( local.account_vars.locals, local.cluster_vars.locals, local.common_vars.locals, + local.prefix_vars.locals, local.region_vars.locals, local.versions.locals, - local.vpc_vars.locals + local.vpc_vars.locals, + local.root_locals_for_inputs ) diff --git a/notes.md b/notes.md deleted file mode 100644 index 55a5ffc3..00000000 --- a/notes.md +++ /dev/null @@ -1,78 +0,0 @@ -I really like these suggestions, but I want to help shape your suggestions with some prime directives for these tasks: -1. security is paramount. we operate in govcloud and handle titled data. security is the most important consideration. -2. cost control - this is a base cluster for a customer to build on top of for their apps. It is anticipated there will be significant time between initial provisioning and first use. The cheapest possible configuration for secure operations in govcloud. -3. simplicity. ideally, I want to be able to add a single file to an exising git repository (which represents an aws account), and have it spawn this entire cluster definition. -4. maintainability. As in, a minimum amount of effort to maintain,, prioritizing future-proofing in decisions. -5. extensibility. try to keep things modular and able to be glued together as easy as possible. -6. best practices. should probably be higher in this list, but at all times, we should endevour to follow/encourage best practices. -7. testability. we are dealing with eks clusters in aws here. by nature, these are expensive resources. anything we can do to test without creation of resources, or rapid creation and destruction, is encouraged. -8. documentation - including the 5 W's (who, what, when, where, why, and how) - -Given those guidelines, does that change your suggestions? Should we start the code review over with those in mind? - -Improvement: Consider adding validation blocks for required variables -Improvement: Add more detailed comments explaining configuration choices -Improvement: Consider tagging strategy for cost allocation -Improvement: Add lifecycle policies for node groups -Warning: Public endpoint access enabled - consider restricting CIDR ranges -Improvement: Add explicit IAM role configurations -Improvement: Implement network policies -# Add to cluster configuration -cluster_security_group_additional_rules = { - ingress_nodes_ephemeral_ports = { - description = "Node to node ephemeral ports" - protocol = "tcp" - from_port = 1025 - to_port = 65535 - type = "ingress" - source_node_security_group = true - } -} -Add CloudWatch logging configuration -Implement proper metrics collection -Set up alerts for cluster health -Improvement: Add more detailed documentation -Improvement: Consider adding test environments -Add README files in each major directory -Document deployment procedures -Add troubleshooting guides -Document network architecture - -resource "aws_eks_cluster" "main" { - # ...existing code... - vpc_config { - endpoint_private_access = true - endpoint_public_access = false # Force private endpoint only - security_group_ids = [aws_security_group.cluster.id] - subnet_ids = var.private_subnet_ids - } - - encryption_config { - provider { - key_arn = aws_kms_key.eks.arn - } - resources = ["secrets"] - } -} - -24m Warning FailedGetResourceMetric horizontalpodautoscaler/loki-write failed to get cpu utilization: unable to get metrics for resource cpu: no metrics returned from resource metrics API -24m Warning FailedComputeMetricsReplicas horizontalpodautoscaler/loki-write invalid metrics (1 invalid out of 1), first error is: failed to get cpu resource metric value: failed to get cpu utilization: unable to get metrics for resource cpu: no metrics returned from resource metrics API -22m Warning FailedGetResourceMetric horizontalpodautoscaler/loki-write failed to get cpu utilization: did not receive metrics for targeted pods (pods might be unready) -2 -29m Warning FailedGetResourceMetric horizontalpodautoscaler/istiod failed to get cpu utilization: unable to get metrics for resource cpu: unable to fetch metrics from resource metrics API: the server could not find the requested resource (get pods.metrics.k8s.io) -29m Warning FailedComputeMetricsReplicas horizontalpodautoscaler/istiod invalid metrics (1 invalid out of 1), first error is: failed to get cpu resource metric value: failed to get cpu utilization: unable to get metrics for resource cpu: unable to fetch metrics from resource metrics API: the server could not find the requested resource (get pods.metrics.k8s.io) -29m Warning FailedGetResourceMetric horizontalpodautoscaler/istiod failed to get cpu utilization: unable to get metrics for resource cpu: unable to fetch metrics from resource metrics API: the server is currently unable to handle the request (get pods.metrics.k8s.io) -29m Warning FailedComputeMetricsReplicas horizontalpodautoscaler/istiod invalid metrics (1 invalid out of 1), first error is: failed to get cpu resource metric value: failed to get cpu utilization: unable to get metrics for resource cpu: unable to fetch metrics from resource metrics API: the server is currently unable to handle the request (get pods.metrics.k8s.io) -2 -* Failed to execute "terraform_current apply -lock-timeout=20m -auto-approve -input=false -auto-approve" in ./.terragrunt-cache/jrM5TqaHxjlphT8vQ1DicmFp6eM/1NbRS_ankC8AcxKegXNWAnjyQEg - ╷ - │ Error: Unable to continue with install: Certificate "platform-eng-eks-mcm" in namespace "istio-system" exists and cannot be imported into the current release: invalid ownership metadata; annotation validation error: key "meta.helm.sh/release-name" must equal "grafana-grafana-ingress": current value is "k8s-dashboard-k8s-dashboard-ingress"; annotation validation error: key "meta.helm.sh/release-namespace" must equal "grafana": current value is "k8s-dashboard" - │ - │ with module.ingress_resources.helm_release.ingress, - │ on .terraform/modules/ingress_resources/main.tf line 6, in resource "helm_release" "ingress": - │ 6: resource "helm_release" "ingress" { - │ - ╵ - - exit status 1 - \ No newline at end of file diff --git a/plan.md b/plan.md deleted file mode 100644 index bd058fd3..00000000 --- a/plan.md +++ /dev/null @@ -1,271 +0,0 @@ -Project Plan: EKS Infrastructure Codebase Improvements -1. Documentation Standardization - - Create centralized documentation standards guide - - Implement standardized README structure across all modules: - * Overview and purpose - * Prerequisites and dependencies - * Usage examples with variables - * Architecture diagrams - * Operations guide - - Establish changelog format using Commitizen convention - - Create architecture diagrams: - * High-level system architecture - * Module relationships - * Network flow diagrams - * Security group configurations - - Develop consistent module examples: - * Basic usage patterns - * Advanced configurations - * Migration guides - * Troubleshooting guides - - Implementation timeline: - * Week 1: Standards guide creation - * Week 2-3: README updates - * Week 4: Diagram creation - * Week 5: Example development - * Week 6: Review and refinement - -2. Security Enhancements - - EKS Security Group Configurations: - * Implement least-privilege access rules - * Restrict node group communication - * Define approved ingress/egress patterns - * Document security group dependencies - - - AWS GovCloud Security Implementation: - * Enable FIPS 140-2 compliant endpoints - * Implement NIST 800-53 controls - * Configure AWS KMS for all sensitive data - * Enable AWS Organizations SCPs - - - Encryption Configurations: - * Enable envelope encryption for secrets - * Implement at-rest encryption for EBS volumes - * Configure TLS for all service communications - * Rotate encryption keys automatically - - - Network Security Policies: - * Define default deny policies - * Create application-specific network policies - * Implement pod security policies - * Configure service mesh security - - - Implementation Timeline: - * Week 1: Security audit and gap analysis - * Week 2: Security group updates - * Week 3: Encryption improvements - * Week 4: Network policy implementation - * Week 5: Testing and validation - * Week 6: Documentation and training - -3. Observability Improvements - - Prometheus Configuration Standardization: - * Define standard metric collection rules - * Implement consistent recording rules - * Set up unified alerting rules - * Configure HA architecture - - - Metrics Collection Strategy: - * Define golden signals metrics - * Implement custom metric collectors - * Set up SLO/SLI tracking - * Configure cost metrics collection - - - Logging Framework: - * Implement structured logging - * Configure log aggregation - * Set up log retention policies - * Enable audit logging - - - Grafana Dashboards: - * Create cluster health dashboards - * Implement cost monitoring views - * Set up performance dashboards - * Configure security monitoring panels - - - Implementation Timeline: - * Week 1: Metrics standardization - * Week 2: Logging implementation - * Week 3: Dashboard creation - * Week 4: Alert configuration - * Week 5: Testing and validation - * Week 6: Documentation and training - -4. Infrastructure Optimization - - Node Group Configuration: - * Implement right-sized instance types - * Configure optimal scaling thresholds - * Set up mixed-instance policies - * Define node taints and labels - - - Auto-scaling Strategy: - * Configure Cluster Autoscaler settings - * Implement Karpenter provisioners - * Set up pod disruption budgets - * Define scaling policies - - - Storage Optimization: - * Define storage class specifications - * Implement volume encryption - * Configure backup policies - * Set up snapshot schedules - - - Resource Management: - * Implement namespace quotas - * Define limit ranges - * Configure resource requests/limits - * Set up cost allocation tags - - - Implementation Timeline: - * Week 1: Node group optimization - * Week 2: Auto-scaling implementation - * Week 3: Storage configuration - * Week 4: Resource quotas setup - * Week 5: Testing and validation - * Week 6: Documentation and training - -5. Module Organization - - Module Standardization: - * Create consistent module structure - * Implement standard naming conventions - * Define input/output patterns - * Establish version constraints - - - Variable Management: - * Create shared variable definitions - * Implement variable validation rules - * Define default value standards - * Document variable dependencies - - - Version Control: - * Implement semantic versioning - * Create version compatibility matrix - * Define upgrade paths - * Document breaking changes - - - Dependencies: - * Map module relationships - * Document cross-module dependencies - * Define initialization order - * Create dependency graphs - - - Implementation Timeline: - * Week 1: Module structure standardization - * Week 2: Variable management - * Week 3: Version control implementation - * Week 4: Dependency documentation - * Week 5: Testing and validation - * Week 6: Documentation and training - -6. Testing Framework - - Terraform Validation: - * Implement pre-commit hooks - * Configure format checking - * Add variable validation - * Set up static analysis - - - Integration Testing: - * Create test environments - * Implement end-to-end tests - * Configure smoke tests - * Set up regression testing - - - Security Testing: - * Implement security scanners - * Configure compliance checks - * Add vulnerability scanning - * Set up secret detection - - - Test Automation: - * Configure CI/CD pipelines - * Implement test reporting - * Set up coverage tracking - * Create automated rollbacks - - - Implementation Timeline: - * Week 1: Validation framework setup - * Week 2: Integration test development - * Week 3: Security scanning implementation - * Week 4: Automation configuration - * Week 5: Testing and validation - * Week 6: Documentation and training - -Implementation Priority: - - Security Enhancements (Critical) - - Observability Improvements (High) - - Infrastructure Optimization (High) - - Documentation Standardization (Medium) - - Module Organization (Medium) - - Testing Framework (Medium) - -Key Metrics: - - Security compliance score - - Resource utilization efficiency - - Documentation coverage - - Test coverage - - Code duplication reduction - - Deployment success rate - -Next Steps: - -1. Security Audit (Week 1-2) - - Perform comprehensive security assessment - * Review IAM roles and permissions - * Audit security group configurations - * Analyze network policies - * Review encryption settings - - Generate security findings report - - Prioritize security improvements - - Create remediation timeline - -2. Implementation Planning (Week 2-3) - - Create detailed project timeline - * Break down tasks by module - * Identify dependencies - * Assign ownership - * Set milestones - - Establish success criteria - - Define rollback procedures - - Create risk mitigation strategies - -3. Testing Pipeline Setup (Week 3-4) - - Configure CI/CD infrastructure - * Set up test environments - * Implement automated testing - * Configure quality gates - * Enable security scanning - - Create test data sets - - Develop test scenarios - - Implement monitoring for test environments - -4. Documentation Enhancement (Week 4-5) - - Audit existing documentation - - Create documentation templates - - Update README files - - Generate architecture diagrams - - Create operational runbooks - - Document emergency procedures - -5. Module Consolidation (Week 5-6) - - Analyze current module structure - - Identify consolidation opportunities - - Create module dependency map - - Plan refactoring phases - - Document migration steps - - Create validation checklist - -6. Validation and Review (Week 6-7) - - Conduct peer reviews - - Perform security validation - - Test documentation accuracy - - Validate monitoring setup - - Review automation effectiveness - - Gather stakeholder feedback - -7. Training and Handover (Week 7-8) - - Prepare training materials - - Schedule training sessions - - Document operational procedures - - Create troubleshooting guides - - Set up support channels - - Plan knowledge transfer sessions