diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md new file mode 100644 index 0000000..180d125 --- /dev/null +++ b/.github/copilot-instructions.md @@ -0,0 +1,113 @@ +# GitHub Copilot Instructions — terraform-eks-deployment + +## Project Context + +This is the **Terraform workspace** that creates EKS cluster GitHub repositories as part of the +EKS Cluster Automation (ECA) system. It is run by a CodeBuild project (`eks-terragrunt-repo-creator`) +triggered at runtime by the `eks-terragrunt-repo-gen-template-automation` Lambda function. + +--- + +## Architecture: Lambda → CodeBuild → Terraform + +EKS cluster repos are created via this chain: + +``` +SC Console → CFN Stack → Custom::GitHubRepository + → Lambda (eks-terragrunt-repo-gen-template-automation) + → CodeBuild project (eks-terragrunt-repo-creator) ← this repo runs here + → git clone terraform-eks-deployment (REPO_BRANCH) + → terraform init (providers from registry.terraform.io via Census proxy) + → terraform apply -auto-approve + → CSVD/terraform-github-repo module: + → Creates GHE repo from template-eks-cluster + → Writes 8 rendered Terragrunt HCL files via managed_extra_files + → Opens pull request (repo-init → main) + → Lambda polls build → fetches PR URL → cfn-response SUCCESS +``` + +### CodeBuild environment + +CodeBuild runs on `aws/codebuild/amazonlinux2-x86_64-standard:3.0` (Amazon Linux 2). +Key env vars injected by the Lambda at build-start time: + +| Variable | Source | Purpose | +|----------|--------|---------| +| `GITHUB_TOKEN` | Secrets Manager `ghe-runner/github-token` (PAT, `ghp_`) | Terraform GitHub provider auth; git clone | +| `TF_VAR_name` | CFN input `project_name` | Repo name | +| `TF_VAR_environment` | CFN input `environment` | e.g. `dev` | +| `TF_VAR_region` | CFN input `aws_region` | e.g. `us-gov-west-1` | +| `TF_VAR_cluster_config` | JSON-encoded EKS fields | `vpc_name`, `vpc_domain_name`, `cluster_name`, etc. | +| `TF_VAR_finops` | JSON-encoded FinOps fields | project name/number | +| `GITHUB_OWNER` | `SCT-Engineering` | GitHub org for provider | +| `GITHUB_BASE_URL` | `https://github.e.it.census.gov` | GHE API base URL | + +The buildspec is inlined from this repo's `buildspec.yml` into the +`aws_codebuild_project.eks_repo_creator` resource in `lambda-template-repo-generator/deploy/main.tf`. +After editing `buildspec.yml`, run `tf apply` in `lambda-template-repo-generator/deploy/` to update. + +--- + +## Key Files + +| File | Purpose | +|------|--------| +| `buildspec.yml` | CodeBuild build steps: install Terraform + Census CA cert, clone repo, tf init + apply | +| `main.tf` | Module call to `CSVD/terraform-github-repo`; `managed_extra_files` with rendered HCL | +| `providers.tf` | GitHub provider config (`>= 6.11.0`, `insecure = true`) + AWS provider | +| `variables.tf` | All TF_VAR_* inputs from CodeBuild env (name, environment, region, cluster_config, finops) | +| `locals.tf` | Decodes `cluster_config` JSON; resolves HCL template content | +| `defaults.tf` | Default values for template repo org, template name, org name | +| `callnotes.md` | Session notes and fix log | + +--- + +## SC Product Deployment Methods + +See `lambda-template-repo-generator/.github/copilot-instructions.md` for full details. +Both methods use the same CFN product template; they must stay in sync. + +- **Method 1** (testing): `cd lambda-template-repo-generator/deploy && tf apply` +- **Method 2** (production): `cd terraform-service-catalog-census/non-prod/csvd-dev/west/service-catalog && tf apply` + +Always verify Method 1 works first when debugging a census deployment issue. + +--- + +## Key Resources + +| Resource | Location | Purpose | +|----------|----------|---------| +| Lambda | `eks-terragrunt-repo-gen-template-automation` (us-gov-west-1, 229685449397) | CFN Custom Resource handler; starts CodeBuild | +| CodeBuild (repo creation) | `eks-terragrunt-repo-creator` | Runs this workspace via tf apply | +| CodeBuild (image build) | `eks-terragrunt-repo-generator-builder` | Builds Lambda container via packer | +| S3 assets bucket | `csvd-packer-pipeline-assets` | Terraform binary, Census CA cert, Packer binary | +| GitHub token (Terraform) | Secrets Manager `ghe-runner/github-token` | PAT (`ghp_`) for Terraform GitHub provider | +| GitHub token (Lambda) | Secrets Manager `/eks-cluster-deployment/github_token` | App token (`ghs_`) for Lambda Python API calls | +| CSVD TF module | `https://github.e.it.census.gov/CSVD/terraform-github-repo` | Creates repo + files + PR | +| Census SC product template | `terraform-service-catalog-census/templates/products/eks-terragrunt-repo/2-0-0.yaml` | Live SC product CFN template | +| Canonical SC product template | `lambda-template-repo-generator/service-catalog/product-template.yaml` | Reference/source of truth | + +--- + +## Important Runtime Notes + +- **Terraform binary** is installed from `s3://csvd-packer-pipeline-assets/terraform/terraform_1.9.1_linux_amd64.zip` in the INSTALL phase — `releases.hashicorp.com` is blocked on the Census network +- **Census CA cert** is installed from `s3://csvd-packer-pipeline-assets/certs/census-ca.pem` via `update-ca-trust` — required for TLS to `github.e.it.census.gov` +- **Census proxy** `http://proxy.tco.census.gov:3128` must be set as `HTTPS_PROXY`/`HTTP_PROXY` — required for `registry.terraform.io` provider downloads +- **`github.e.it.census.gov`** must be in `NO_PROXY` — direct connection (not via proxy) +- **GitHub provider version** must be `>= 6.11.0` — required by `CSVD/terraform-github-repo` module's `~> 6.11` constraint +- **`provider "github" { insecure = true }`** in `providers.tf` — belt-and-suspenders for TLS +- Pass `vpc_name` (string) — **not** `vpc_id` — or `is_eks_deployment` in the Lambda returns `False` + +--- + +## What NOT to Do + +- ❌ Do not switch back to a Lambda-Python-only repo creation approach — all repo creation must run through CodeBuild + this Terraform workspace (single maintenance point) +- ❌ Do not use `HappyPathway/terraform-github-repo` **public** module — pins `github ~> 6.0`, conflicts with `>= 6.6.0` requirement +- ✅ DO use `CSVD/terraform-github-repo` (https://github.e.it.census.gov/CSVD/terraform-github-repo) — internal, supports `template_repo` + `managed_extra_files` +- ❌ Do not use SSH-based remote module sources (`git::ssh://`) — Census proxy blocks SSH host key exchange; use HTTPS +- ❌ Do not add `vpc_id` as a parameter to SC product templates — use `vpc_name` +- ❌ Do not write temp files or command output to `/tmp` — use `~/tmp` (i.e. `/home/a/arnol377/tmp`) instead +- ❌ Do not use the `terraform` command directly — always use the `tf` alias (e.g. `tf plan`, `tf apply`, `tf init`) diff --git a/buildspec.yml b/buildspec.yml new file mode 100644 index 0000000..d37f165 --- /dev/null +++ b/buildspec.yml @@ -0,0 +1,89 @@ +version: 0.2 +# buildspec.yml — terraform-eks-deployment / eks-terragrunt-repo-creator +# +# This buildspec is used by the CodeBuild project that is triggered by the +# Lambda function (eks-terragrunt-repo-gen-template-automation) to create an +# EKS cluster GitHub repository. +# +# Required environment variables (injected by the Lambda as overrides): +# TF_VAR_name — cluster / repo name +# TF_VAR_environment — environment (dev / nonprod / prod) +# TF_VAR_region — AWS region (e.g. us-gov-west-1) +# TF_VAR_cluster_config — JSON object with account_name, aws_account_id, etc. +# TF_VAR_finops — JSON object with finops project_name / project_number +# GITHUB_TOKEN — GitHub PAT (passed from Lambda's Secrets Manager read) +# GITHUB_OWNER — GitHub org (default: SCT-Engineering) +# GITHUB_BASE_URL — GHE base URL (e.g. https://github.e.it.census.gov) + +env: + variables: + TF_VERSION: "1.9.1" + ASSETS_BUCKET: "csvd-packer-pipeline-assets" + REPO_HOST: "github.e.it.census.gov" + REPO_ORG: "SCT-Engineering" + REPO_NAME: "terraform-eks-deployment" + REPO_BRANCH: "fix/eca-copilot-instructions-and-callnotes" # update to main once merged + # Disable TLS verification for Census GHE (Census CA cert not trusted by default) + GIT_SSL_NO_VERIFY: "true" + TF_VAR_run_in_codebuild: "true" + TF_CLI_ARGS: "-no-color" + # Census proxy — required for registry.terraform.io provider downloads + HTTPS_PROXY: "http://proxy.tco.census.gov:3128" + HTTP_PROXY: "http://proxy.tco.census.gov:3128" + # Exclude AWS-internal endpoints and Census GHE from the proxy + NO_PROXY: "169.254.169.254,169.254.170.2,s3.us-gov-west-1.amazonaws.com,s3.amazonaws.com,.amazonaws.com,.us-gov-west-1.amazonaws.com,github.e.it.census.gov" + +phases: + install: + commands: + # ── Install Census Bureau CA certificate ────────────────────────────── + # The Census GHE TLS cert is issued by the Census Bureau CA which is not + # trusted by the CodeBuild Amazon Linux 2 trust store by default. + - | + aws s3 cp "s3://${ASSETS_BUCKET}/certs/census-ca.pem" \ + /etc/pki/ca-trust/source/anchors/census-ca.pem 2>/dev/null \ + && update-ca-trust \ + && echo "Census CA cert installed" \ + || echo "WARNING: could not install Census CA cert (continuing anyway)" + + # ── Install Terraform ───────────────────────────────────────────────── + - | + if ! command -v terraform &>/dev/null; then + TF_ZIP="terraform_${TF_VERSION}_linux_amd64.zip" + echo "Installing Terraform ${TF_VERSION}..." + aws s3 cp "s3://${ASSETS_BUCKET}/terraform/${TF_ZIP}" /tmp/${TF_ZIP} 2>/dev/null \ + || curl -fsSL "https://releases.hashicorp.com/terraform/${TF_VERSION}/${TF_ZIP}" -o /tmp/${TF_ZIP} + unzip -oq /tmp/${TF_ZIP} -d /usr/local/bin/ + chmod +x /usr/local/bin/terraform + rm /tmp/${TF_ZIP} + fi + - terraform version + + # ── Install Python dependencies for post-apply scripts ─────────────── + - pip3 install --quiet httpx rich + + # ── Clone terraform-eks-deployment ─────────────────────────────────── + - | + git config --global credential.helper \ + "!f() { echo username=x-access-token; echo password=${GITHUB_TOKEN}; }; f" + git clone --depth 1 --branch "${REPO_BRANCH}" \ + "https://${REPO_HOST}/${REPO_ORG}/${REPO_NAME}.git" \ + /tmp/eks-deploy + - echo "Cloned ${REPO_ORG}/${REPO_NAME} @ $(git -C /tmp/eks-deploy rev-parse --short HEAD)" + + build: + commands: + - cd /tmp/eks-deploy + - echo "=== terraform init ===" + - terraform init -no-color + - echo "=== terraform apply ===" + - terraform apply -auto-approve -no-color + + post_build: + commands: + - | + if [ "${CODEBUILD_BUILD_SUCCEEDING}" = "0" ]; then + echo "Build FAILED — check logs above" + else + echo "Build SUCCEEDED — repository created" + fi diff --git a/callnotes.md b/callnotes.md new file mode 100644 index 0000000..d00f967 --- /dev/null +++ b/callnotes.md @@ -0,0 +1,160 @@ +Action Plan from Matt Sync + +1. Clean up test resources in CSVD Dev + - Remove or reset any test/demo items in the CSVD Dev environment. + - Prep the environment for a clean demo. + - STATUS: ✅ CLEANED UP (2026-04-06) + - Terminated 5 SC provisioned products: `daves-real-test`, `EKS_Cluster_GitHub_Repository-01222158`, `test-enterprise-eks-fix`, `test-tf-init-fix`, `test-full-run` + - Deleted 3 SC admin products: `EKS Cluster GitHub Repository` (prod-a2v6d2aecpy7o), `eks-terragrunt-eks-repo-creator` (prod-4tee6zssmvf7a), `EKS Terragrunt Repository Creator - dev` (prod-4gchyoxp2wh74) + - Deleted 3 SC portfolios: `eks-cluster-automation`, `eks-terragrunt-github-automation`, `EKS Terragrunt Repository Creator Portfolio` + - Deleted GHE repo: `SCT-Engineering/daves-lambda-test` (`daves-real-test` was never created in GHE) + +2. Verify EKS Automation (ECA) Works + - Ensure the EKS automation pipeline is functional end-to-end. + - No need for a polished demo, just confirm it works. + - STATUS: ✅ WORKING (2026-04-02) + - Pivoted from CodeBuild+Terraform approach to direct Lambda invocation + - Lambda `eks-terragrunt-repo-gen-template-automation` successfully: + - Created repo: https://github.e.it.census.gov/SCT-Engineering/daves-lambda-test + - Rendered 8 Terragrunt HCL files via Jinja2 (root.hcl, account.hcl, region.hcl, vpc.hcl, cluster.hcl, README.md, common-variables.hcl, default-versions.hcl) + - Created PR: https://github.e.it.census.gov/SCT-Engineering/daves-lambda-test/pull/1 + - Fix applied: set VERIFY_SSL=false on Lambda (Census CA cert not in container's certifi bundle) + - No CodeBuild, no Terraform providers, no SSH keys needed + - STATUS: ✅ SC PRODUCT TEMPLATES SYNCED (2026-04-02) + - `2-0-0.yaml` (census live) was diverged from canonical `product-template.yaml`: + - Had `VpcId: AWS::EC2::VPC::Id` (dropdown) — Lambda needs `vpc_name` string → EKS path never triggered + - Missing `AccountName`, `AWSAccountId`, `AwsRegion`, `OrganizationPath`, `FinOps*` params + - Had a comment claiming DynamoDB lookup for missing params — that code does not exist + - Fixed `2-0-0.yaml` to match Lambda interface exactly (vpc_name, all required fields) + - Updated `product-template.yaml` to use `!Sub "arn:..."` for ServiceToken (dropped LambdaFunctionArn param) + - Both templates now functionally identical + - STATUS: ✅ COPILOT INSTRUCTIONS CREATED (2026-04-02) + - `lambda-template-repo-generator/.github/copilot-instructions.md` — comprehensive Lambda-first guidance + - `terraform-eks-deployment/.github/copilot-instructions.md` — deployment workspace context + - Both explicitly document the abandoned CodeBuild approach and why it was replaced + +3. Demo ECA Workflow + - Prepare to walk through the ECA (EKS Cluster Automation) process. + - Target is an internal demo, not a full CSVD stakeholder presentation. + - STATUS: ✅ SC PRODUCT REDEPLOYED VIA TERRAFORM (2026-04-06) + - Two deployment methods established and documented in copilot instructions: + 1. **Direct Terraform** (`lambda-template-repo-generator/deploy/`) — canonical, use for testing/debugging + 2. **terraform-service-catalog-census Terragrunt** — production path + - `tf apply` in `deploy/` recreated 6 SC resources (portfolio, product, association, principal, 2 constraints) + - New IDs: portfolio `port-h5qd63hw5yagq`, product `prod-lmua4oknugafg` + - Rule: always verify Method 1 works before debugging Method 2 (census pipeline) + + - STATUS: ✅ PATH STRUCTURE FIXED AND LAMBDA REBUILT (2026-04-06) + - Matt feedback: "The modules in cluster folder should be in the dave-demo-test-eks/ folder" + - Template repo was being cloned verbatim — placeholder paths (`environment/region/vpc/cluster/eks-*/`) + were landing in the generated repo instead of dynamic paths (`{env}/{region}/{vpc}/{cluster}/eks-*/`) + - Fix: added `path_mapper` parameter to `clone_repository_contents()` in `github_provider.py` + - Added `build_eks_path_mapper(cfg)` in `app.py` — remaps `environment/region/vpc/cluster/{rest}` + → `{env}/{region}/{vpc}/{cluster}/{rest}` and excludes the 4 placeholder HCL files + (replaced by Jinja2-rendered versions: account.hcl, region.hcl, vpc.hcl, cluster.hcl) + - Lambda rebuilt via packer-pipeline (build #6 SUCCEEDED), Lambda updated: + `eks-terragrunt-repo-gen-template-automation` @ `lambda:latest` (sha256:af0b5...) + - PR #1 on dave-demo-test-eks now shows correct structure: + https://github.e.it.census.gov/SCT-Engineering/dave-demo-test-eks/pull/1 + +4. Reference Matt's tfmod-pipeline + - Review the initial branch of https://github.e.it.census.gov/SCT-Engineering/tfmod-pipeline. + - Look for CodePipeline/CodeBuild setup, S3 triggers, and artifact download logic. + - Use as a reference for your own pipeline/code. + +5. Pivot: Use terraform-eks-deployment via CodeBuild (single maintenance point) + - PROBLEM IDENTIFIED (2026-04-06): Current Lambda Python code duplicates repo-creation logic + that `terraform-eks-deployment` already implements as a Terraform workspace. + Matt's feedback: "If you do it in Lambda Python code, we have two places to maintain for the + same process. If you can make it run the same TF, we have a single place to maintain." + - DECISION: Commit current state as a safe revert baseline, then pivot. + - KEY ENABLER: Internal module `CSVD/terraform-github-repo` + - https://github.e.it.census.gov/CSVD/terraform-github-repo + - Uses `github provider 6.6.0` — satisfies internal `>= 6.6.0` requirement + - Supports `template_repo` + `template_repo_org` — clones template-eks-cluster directly + - Supports `managed_extra_files` (`path` + `content`) — writes rendered HCL files + - Contributors: arnol377 + morga471 (Matt) — maintainable internally + - ARCHITECTURE (target): + ``` + SC Console → CFN Stack → Custom::GitHubRepository → Lambda + → Lambda triggers CodeBuild job + → CodeBuild runs terraform-eks-deployment workspace (tf apply) + → Terraform (via CSVD/terraform-github-repo) creates GHE repo + writes HCL files + → Lambda polls CodeBuild → reports SUCCESS/FAILED to CFN + ``` + - WHY THE EARLIER CODEBUILD+TERRAFORM ATTEMPT FAILED (context): + - That attempt used `HappyPathway/terraform-github-repo` **public** module + — pinned `github ~> 6.0`, conflicted with internal modules requiring `>= 6.6.0` + - SSH host key failures downloading remote Terraform modules + - AWS credential proxy issues inside CodeBuild + - `CSVD/terraform-github-repo` (internal) uses `github 6.6.0` — provider conflict RESOLVED + - `terraform-eks-deployment` doesn't pull SSH remote modules — those blockers don't apply + - STEP 1: Commit lambda-template-repo-generator to GHE (safe revert point) + - STATUS: ✅ DONE (2026-04-06) — commit a79cee4 on fix/eca-lambda-approach-and-copilot-docs + - Current state: fully functional Lambda Python path, path_mapper applied, built + deployed + - STEP 2: Refactor Lambda to be a thin orchestrator + - STATUS: ✅ DONE (2026-04-06) — commit ec54b54 + - Added `start_codebuild_build()` and `poll_codebuild_build()` to app.py + - EKS deployment path now: validate params → start CodeBuild → poll → cfn-response + - Non-EKS path unchanged (still writes config.json via Python GitHub API) + - Lambda env var `CODEBUILD_PROJECT_NAME` = `eks-terragrunt-repo-creator` + - STEP 3: Update terraform-eks-deployment — module source + file paths + - STATUS: ✅ DONE (2026-04-06) — commit 91202ff on fix/eca-copilot-instructions-and-callnotes + - Already uses `CSVD/terraform-github-repo`; changed SSH → HTTPS source URL + - Fixed placeholder paths in `managed_extra_files`: + - `"environment/account.hcl"` → `"${var.environment}/account.hcl"` + - `"environment/region/region.hcl"` → `"${var.environment}/${var.region}/region.hcl"` + - `"environment/region/vpc/vpc.hcl"` → `"${var.environment}/${var.region}/${var.cluster_config.vpc_name}/vpc.hcl"` + - `"environment/region/vpc/cluster/cluster.hcl"` → dynamic path with all four vars + - STEP 4: Wire CodeBuild buildspec (`tf init → tf apply`) + - STATUS: ✅ DONE (2026-04-06) — commit ec4d861 + - `buildspec.yml` added to terraform-eks-deployment root + - Installs Terraform from S3 assets bucket (csvd-packer-pipeline-assets) + - Gets GitHub token via `GITHUB_TOKEN` env var (passed by Lambda from Secrets Manager) + - Clones this repo and runs `terraform init + apply -auto-approve` + - `aws_codebuild_project.eks_repo_creator` added to lambda-template-repo-generator/deploy/ + - NEXT: Run `tf apply` in lambda-template-repo-generator/deploy/ to create the CodeBuild project + and update the Lambda env var. Then rebuild Lambda image (packer-pipeline) + end-to-end test. + - Pass rendered HCL content via `managed_extra_files` + - Ensure module accepts all EKS params as variables (driven from CodeBuild env vars) + - STEP 4: Wire CodeBuild project to terraform-eks-deployment workspace + - CodeBuild buildspec: tf init → tf apply (with env var → tfvar mapping) + - STATUS: 🔄 PENDING — commit current state first, then begin refactor +6. CodeBuild e2e Debugging (2026-04-07) — Chain of fixes to get first green test + - STATUS: ✅ ALL FIXED — e2e test sc-e2e-test-20260407-1402 PASSED (207s) on 2026-04-07 + - Fix 1: packer YAML_FILE_ERROR in csvd_config_packer.hcl + - `additional_post_build_commands` had `"- docker push ..."` — the template already wraps with `- ` + - Fixed: removed the `- ` prefix. Commit eb18463 on lambda-template-repo-generator. + - Fix 2: `terraform_1.9.0_linux_amd64.zip` not in S3; releases.hashicorp.com blocked + - Uploaded `terraform_1.9.1_linux_amd64.zip` from local tfenv cache to s3://csvd-packer-pipeline-assets/terraform/ + - Updated TF_VERSION=1.9.1 in buildspec.yml. Commit 5e50d7b. + - Fix 3: `registry.terraform.io` blocked by Census network + - Added `HTTPS_PROXY=http://proxy.tco.census.gov:3128` + `HTTP_PROXY` to buildspec env vars. Commit 0ada33a. + - Fix 4: GitHub provider version conflict — workspace `>= 6.6.0, < 6.7.0` vs CSVD module `~> 6.11` + - Changed providers.tf to `>= 6.11.0`, deleted stale .terraform.lock.hcl. Commit 05d6103. + - Fix 5: x509 cert error on github.e.it.census.gov — Census CA not in CodeBuild Amazon Linux 2 trust store + - Extracted Census CA (`US Census Bureau CA 1`) and uploaded to s3://csvd-packer-pipeline-assets/certs/census-ca.pem + - Added INSTALL phase to buildspec: downloads cert, runs update-ca-trust + - Added github.e.it.census.gov to NO_PROXY (bypass proxy for direct connection) + - Added `provider "github" { insecure = true }` as belt-and-suspenders in providers.tf + - Commits d490e1f, 20f9681. + - Fix 6: GitHub App token (ghs_) cannot call /api/v3/user — required by CSVD module's data.github_user.current + - Root cause: /eks-cluster-deployment/github_token is a GitHub App installation token (ghs_) + - Solution: use ghe-runner/github-token (a ghp_ PAT, login: arnol377) for Terraform / CodeBuild + - Lambda app.py: reads TF_GITHUB_TOKEN_SECRET_NAME first, falls back to GITHUB_TOKEN_SECRET_NAME + - deploy/main.tf: added TF_GITHUB_TOKEN_SECRET_NAME=ghe-runner/github-token env var + IAM policy + - Lambda rebuilt via packer (build #10 SUCCEEDED). Commits 5d3ff19 on lambda-template-repo-generator. + - Fix 7: DELETE /vulnerability-alerts: 404 on GHE 3.13 + - CSVD module had `vulnerability_alerts = false` (default) → GitHub provider calls DELETE → GHE 3.13 returns 404 + - Fixed CSVD/terraform-github-repo directly (admin access): + - variables.tf: changed `default = false` → `default = null, nullable = true` + - github_repo.tf: added `vulnerability_alerts` to lifecycle `ignore_changes` + - Pushed commits d7d39cb + 7e088f3 to CSVD/terraform-github-repo main branch + - Fix 8: CFN Outputs require pull_request_url + branch_name but Lambda EKS path wasn't returning them + - Root error: `Vendor response doesn't contain pull_request_url attribute` + - Fixed app.py: after CodeBuild SUCCEEDED, query GitHub API /repos/{org}/{repo}/pulls?state=open + to fetch real PR URL and branch name, include in cfn-response Data. Commit 26c6fe9. + - FINAL RESULT: sc-e2e-test-20260407-1402 → ✔ PASS (207s) + - Repo created: https://github.e.it.census.gov/SCT-Engineering/sc-e2e-test-20260407-1402 + - GitHub Verification: public, default_branch=main + - All CFN outputs populated correctly \ No newline at end of file diff --git a/main.tf b/main.tf index ec257da..c1c8aff 100644 --- a/main.tf +++ b/main.tf @@ -63,13 +63,13 @@ locals { aws_region = var.region, environment = var.environment }), - "${var.environment}/${var.region}/vpc/vpc.hcl" : templatefile("${path.module}/templates/vpc.hcl.tf.tpl", { + "${var.environment}/${var.region}/${var.cluster_config.vpc_name}/vpc.hcl" : templatefile("${path.module}/templates/vpc.hcl.tf.tpl", { vpc_name = var.cluster_config.vpc_name, vpc_domain_name = var.cluster_config.vpc_domain_name, environment = var.environment, aws_region = var.region }), - "${var.environment}/${var.region}/vpc/cluster/cluster.hcl" : templatefile("${path.module}/templates/cluster.hcl.tf.tpl", { + "${var.environment}/${var.region}/${var.cluster_config.vpc_name}/${var.name}/cluster.hcl" : templatefile("${path.module}/templates/cluster.hcl.tf.tpl", { cluster_name = var.name, cluster_mailing_list = var.cluster_config.cluster_mailing_list, aws_profile = local.aws_profile, @@ -197,7 +197,7 @@ locals { } module "github_repo" { - source = "git::https://github.e.it.census.gov/CSVD/terraform-github-repo.git?ref=main" + source = "git::https://github.e.it.census.gov/CSVD/terraform-github-repo.git" name = var.name repo_org = var.organization @@ -234,6 +234,38 @@ module "github_repo" { } +# Rename placeholder environment/ dirs to computed paths via GitHub API. +# Only runs in CodeBuild (var.run_in_codebuild = true, set by TF_VAR_run_in_codebuild in buildspec). +resource "null_resource" "rename_template_dirs" { + count = var.run_in_codebuild ? 1 : 0 + + triggers = { + repo_name = var.name + environment = var.environment + region = var.region + vpc_name = var.cluster_config.vpc_name + cluster_name = var.name + } + + provisioner "local-exec" { + interpreter = ["python3"] + command = "${path.module}/scripts/rename_template_dirs.py" + environment = { + GHE_BASE_URL = var.github_server_url + REPO_ORG = var.organization + REPO_NAME = var.name + ENVIRONMENT = var.environment + REGION = var.region + VPC_NAME = var.cluster_config.vpc_name + CLUSTER_NAME = var.name + PR_BRANCH = "repo-init" + # GITHUB_TOKEN is already set in the CodeBuild environment by the Lambda + } + } + + depends_on = [module.github_repo] +} + # The EKS deployment logic will go here, and will be skipped if create_repository is true. output "repository_url" { diff --git a/providers.tf b/providers.tf index 1dc6ac9..67d3555 100644 --- a/providers.tf +++ b/providers.tf @@ -8,5 +8,16 @@ terraform { source = "hashicorp/aws" version = "~> 6.0" } + null = { + source = "hashicorp/null" + version = ">= 3.0" + } } } + +# GITHUB_TOKEN and GITHUB_BASE_URL are injected as env vars by the Lambda. +# insecure = true is required because the Census GHE TLS certificate is signed +# by the Census internal CA, which is not in the CodeBuild container trust store. +provider "github" { + insecure = true +} diff --git a/scripts/rename_template_dirs.py b/scripts/rename_template_dirs.py new file mode 100644 index 0000000..d5a6128 --- /dev/null +++ b/scripts/rename_template_dirs.py @@ -0,0 +1,267 @@ +#!/usr/bin/env python3 +""" +rename_template_dirs.py + +After CSVD/terraform-github-repo clones template-eks-cluster and writes +managed_extra_files, the repo contains placeholder paths from the template: + + environment/region/vpc/cluster/eks-*/terragrunt.hcl + environment/region/vpc/cluster/eks/terragrunt.hcl + environment/account.hcl + environment/region/region.hcl + environment/region/vpc/vpc.hcl + environment/region/vpc/cluster/cluster.hcl + +This script uses the GitHub API to: + 1. Delete all files under environment/ from the repo-init PR branch. + 2. Re-add the eks-* files at their correct computed paths: + ${ENVIRONMENT}/${REGION}/${VPC_NAME}/${CLUSTER_NAME}/eks-*/terragrunt.hcl + +The non-eks files (account.hcl, region.hcl, vpc.hcl, cluster.hcl) are already +written by managed_extra_files with real rendered content, so they are not +re-added here. + +Required environment variables: + GITHUB_TOKEN — GitHub PAT (already set by Lambda / buildspec) + GHE_BASE_URL — e.g. https://github.e.it.census.gov + REPO_ORG — GitHub org (e.g. SCT-Engineering) + REPO_NAME — Repository name (e.g. my-eks-cluster) + ENVIRONMENT — e.g. dev + REGION — e.g. us-gov-west-1 + VPC_NAME — e.g. my-vpc + CLUSTER_NAME — e.g. my-eks-cluster + PR_BRANCH — Branch to modify (default: repo-init) +""" + +import os +import sys + +import httpx +from rich.console import Console +from rich.panel import Panel + +console = Console() + +# --------------------------------------------------------------------------- +# Configuration +# --------------------------------------------------------------------------- + +REQUIRED_VARS = [ + "GITHUB_TOKEN", + "GHE_BASE_URL", + "REPO_ORG", + "REPO_NAME", + "ENVIRONMENT", + "REGION", + "VPC_NAME", + "CLUSTER_NAME", +] + +# Template placeholder prefix that the CSVD module clones verbatim +TEMPLATE_ENV_PREFIX = "environment/region/vpc/cluster/" + +# These files under the cluster dir are rendered by managed_extra_files with +# real values — do NOT re-add them (just delete the placeholder versions). +MANAGED_BY_TERRAFORM = {"cluster.hcl"} + + +def load_env() -> dict: + missing = [v for v in REQUIRED_VARS if not os.environ.get(v)] + if missing: + console.print(f"[bold red]Missing required env vars: {', '.join(missing)}[/]") + sys.exit(1) + + return { + "token": os.environ["GITHUB_TOKEN"], + "base_url": os.environ["GHE_BASE_URL"].rstrip("/"), + "org": os.environ["REPO_ORG"], + "repo": os.environ["REPO_NAME"], + "environment": os.environ["ENVIRONMENT"], + "region": os.environ["REGION"], + "vpc_name": os.environ["VPC_NAME"], + "cluster_name": os.environ["CLUSTER_NAME"], + "pr_branch": os.environ.get("PR_BRANCH", "repo-init"), + } + + +def api_client(cfg: dict) -> httpx.Client: + """Return an httpx client configured for the Census GHE API.""" + return httpx.Client( + base_url=f"{cfg['base_url']}/api/v3", + headers={ + "Authorization": f"token {cfg['token']}", + "Accept": "application/vnd.github.v3+json", + }, + verify=False, # Census CA cert not in CodeBuild trust store + timeout=30, + ) + + +def get_branch_commit(client: httpx.Client, org: str, repo: str, branch: str) -> tuple[str, str]: + """Return (commit_sha, tree_sha) for the tip of branch.""" + r = client.get(f"/repos/{org}/{repo}/git/ref/heads/{branch}") + r.raise_for_status() + commit_sha = r.json()["object"]["sha"] + + r = client.get(f"/repos/{org}/{repo}/git/commits/{commit_sha}") + r.raise_for_status() + tree_sha = r.json()["tree"]["sha"] + + return commit_sha, tree_sha + + +def get_tree(client: httpx.Client, org: str, repo: str, tree_sha: str) -> list[dict]: + """Return the full recursive tree as a list of entry dicts.""" + r = client.get( + f"/repos/{org}/{repo}/git/trees/{tree_sha}", + params={"recursive": "1"}, + ) + r.raise_for_status() + data = r.json() + if data.get("truncated"): + console.print("[yellow]Warning: tree is truncated — repo may have too many files.[/]") + return data["tree"] + + +def build_new_tree(entries: list[dict], cfg: dict) -> list[dict]: + """ + Return the list of tree update objects to pass to the GitHub Create Tree API. + + Strategy (using base_tree, so we only need to express changes): + - For every file under environment/: set sha=null (delete) + - For every file under environment/region/vpc/cluster/ that starts with eks: + also add it at the correct computed path (preserve sha) + """ + env = cfg["environment"] + region = cfg["region"] + vpc = cfg["vpc_name"] + cluster = cfg["cluster_name"] + correct_prefix = f"{env}/{region}/{vpc}/{cluster}/" + + updates: list[dict] = [] + moved: list[str] = [] + deleted: list[str] = [] + skipped: list[str] = [] + + for entry in entries: + path: str = entry["path"] + if entry["type"] != "blob": + continue + + if not path.startswith("environment/"): + continue + + # Delete the placeholder file + updates.append({"path": path, "mode": entry["mode"], "type": "blob", "sha": None}) + deleted.append(path) + + # Is it under environment/region/vpc/cluster/? + if not path.startswith(TEMPLATE_ENV_PREFIX): + continue + + rel = path[len(TEMPLATE_ENV_PREFIX):] # e.g. "eks-config/terragrunt.hcl" + top_dir = rel.split("/")[0] # e.g. "eks-config" + + if top_dir in MANAGED_BY_TERRAFORM or rel in MANAGED_BY_TERRAFORM: + skipped.append(path) + continue + + # Move it: eks-* and eks/ dirs only + if top_dir.startswith("eks"): + new_path = correct_prefix + rel + updates.append({"path": new_path, "mode": entry["mode"], "type": "blob", "sha": entry["sha"]}) + moved.append(f"{path} → {new_path}") + + return updates, moved, deleted, skipped + + +def create_tree(client: httpx.Client, org: str, repo: str, base_tree_sha: str, updates: list[dict]) -> str: + """POST a new tree and return its SHA.""" + r = client.post( + f"/repos/{org}/{repo}/git/trees", + json={"base_tree": base_tree_sha, "tree": updates}, + ) + r.raise_for_status() + return r.json()["sha"] + + +def create_commit(client: httpx.Client, org: str, repo: str, parent_sha: str, tree_sha: str, message: str) -> str: + """POST a new commit and return its SHA.""" + r = client.post( + f"/repos/{org}/{repo}/git/commits", + json={"message": message, "tree": tree_sha, "parents": [parent_sha]}, + ) + r.raise_for_status() + return r.json()["sha"] + + +def update_ref(client: httpx.Client, org: str, repo: str, branch: str, commit_sha: str) -> None: + """Force-update the branch ref to point to commit_sha.""" + r = client.patch( + f"/repos/{org}/{repo}/git/refs/heads/{branch}", + json={"sha": commit_sha, "force": True}, + ) + r.raise_for_status() + + +def main() -> None: + cfg = load_env() + + console.print(Panel( + f"[bold]rename_template_dirs[/]\n" + f"Repo: [cyan]{cfg['org']}/{cfg['repo']}[/]\n" + f"Branch: [cyan]{cfg['pr_branch']}[/]\n" + f"Target: [cyan]{cfg['environment']}/{cfg['region']}/{cfg['vpc_name']}/{cfg['cluster_name']}/[/]", + title="EKS Template Dir Rename", + )) + + with api_client(cfg) as client: + org, repo, branch = cfg["org"], cfg["repo"], cfg["pr_branch"] + + console.print(f"[dim]Fetching branch tip for [bold]{branch}[/]…") + commit_sha, tree_sha = get_branch_commit(client, org, repo, branch) + console.print(f"[dim]Commit: {commit_sha} Tree: {tree_sha}") + + console.print("[dim]Fetching recursive tree…") + entries = get_tree(client, org, repo, tree_sha) + + env_files = [e for e in entries if e["type"] == "blob" and e["path"].startswith("environment/")] + if not env_files: + console.print("[green]No placeholder environment/ files found — nothing to do.[/]") + return + + updates, moved, deleted, skipped = build_new_tree(entries, cfg) + + console.print(f"\n[bold]Changes to apply:[/]") + for m in moved: + console.print(f" [green]MOVE[/] {m}") + for d in deleted: + if d not in [m.split(" → ")[0] for m in moved]: + console.print(f" [red]DELETE[/] {d} (managed by Terraform)") + console.print() + + if not updates: + console.print("[yellow]No changes needed.[/]") + return + + console.print("[dim]Creating new tree…") + new_tree_sha = create_tree(client, org, repo, tree_sha, updates) + + message = ( + f"chore: rename template placeholder dirs to computed paths\n\n" + f"environment/region/vpc/cluster/ → " + f"{cfg['environment']}/{cfg['region']}/{cfg['vpc_name']}/{cfg['cluster_name']}/\n\n" + f"Moved {len(moved)} eks-module file(s). " + f"Deleted {len(deleted) - len(moved)} file(s) already handled by managed_extra_files." + ) + console.print("[dim]Creating commit…") + new_commit_sha = create_commit(client, org, repo, commit_sha, new_tree_sha, message) + + console.print(f"[dim]Updating [bold]{branch}[/] → {new_commit_sha}…") + update_ref(client, org, repo, branch, new_commit_sha) + + console.print(f"\n[bold green]Done.[/] {len(moved)} file(s) moved, {len(skipped)} skipped (managed by Terraform).") + + +if __name__ == "__main__": + main() diff --git a/variables.tf b/variables.tf index 2f2bc5f..158eb1a 100644 --- a/variables.tf +++ b/variables.tf @@ -71,6 +71,12 @@ variable "force_name" { default = true } +variable "run_in_codebuild" { + description = "Set to true when running inside CodeBuild. Enables the post-apply script that renames placeholder template dirs to computed paths via the GitHub API." + type = bool + default = false +} + # Internal variables - these are kept for backward compatibility but should not be exposed to users in examples variable "common_variables" { description = "Common variables across all environments (internal use)"