diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..7dc335e --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,26 @@ +{ + // Tell the YAML language server about CloudFormation intrinsic function tags + // so it doesn't report "Unresolved tag" errors in CFN templates. + "yaml.customTags": [ + "!And sequence", + "!Base64 scalar", + "!Cidr sequence", + "!Condition scalar", + "!Equals sequence", + "!FindInMap sequence", + "!GetAtt scalar", + "!GetAZs scalar", + "!If sequence", + "!ImportValue scalar", + "!Join sequence", + "!Not sequence", + "!Or sequence", + "!Ref scalar", + "!Select sequence", + "!Split sequence", + "!Sub scalar", + "!Sub sequence", + "!Transform mapping", + "!Value scalar" + ] +} diff --git a/buildspec.yml b/buildspec.yml new file mode 100644 index 0000000..23e9778 --- /dev/null +++ b/buildspec.yml @@ -0,0 +1,127 @@ +version: 0.2 + +# --------------------------------------------------------------------------- +# tf-run-executor buildspec +# +# Required env-var overrides per build (supplied by Lambda or manual CLI): +# ACCOUNT_REPO - account repo name, e.g. 229685449397-csvd-dev-platform-dev-gov +# LAYER - terraform layer: common | infrastructure | vpc +# REGION_DIR - region directory: east | west +# GITHUB_TOKEN - GHE PAT (type PLAINTEXT, value from Secrets Manager) +# +# Optional env-var overrides: +# GIT_BRANCH - branch to commit/PR from (default: repo-init) +# TF_RUN_START_TAG - tf-run.data TAG label to start from (default: empty = from top) +# EXTRA_FILES - JSON map {"relative/path": "content"} written before tf-run +# DRY_RUN - "true" = tf plan only, no apply (default: "false") +# --------------------------------------------------------------------------- + +env: + variables: + GITHUB_ORG: "SCT-Engineering" + TF_BINARY_S3: "s3://csvd-packer-pipeline-assets/terraform/terraform_1.9.1_linux_amd64.zip" + CENSUS_CA_S3: "s3://csvd-packer-pipeline-assets/certs/census-ca.pem" + GH_CLI_S3: "s3://csvd-packer-pipeline-assets/tools/gh_2.49.0_linux_amd64.tar.gz" + HTTPS_PROXY: "http://proxy.tco.census.gov:3128" + NO_PROXY: "github.e.it.census.gov,169.254.169.254" + # Per-build defaults (overridden via environmentVariablesOverride in Lambda) + GIT_BRANCH: "repo-init" + DRY_RUN: "false" + TF_RUN_START_TAG: "" + EXTRA_FILES: "{}" + +phases: + install: + commands: + # --- Terraform binary (registry.terraform.io is blocked; pull from S3) --- + - aws s3 cp "$TF_BINARY_S3" /tmp/terraform.zip + - unzip -o /tmp/terraform.zip -d /usr/local/bin/ && chmod +x /usr/local/bin/terraform + - ln -sf /usr/local/bin/terraform /usr/local/bin/tf + + # --- Census CA certificate (GHE TLS) --- + - aws s3 cp "$CENSUS_CA_S3" /etc/pki/ca-trust/source/anchors/census-ca.pem + - update-ca-trust extract + + # --- tf-run toolchain (sourced from this repo's scripts/) --- + - cp "$CODEBUILD_SRC_DIR/scripts/tf-run" /usr/local/bin/tf-run + - cp "$CODEBUILD_SRC_DIR/scripts/tf-control.sh" /usr/local/bin/tf-control.sh + - cp "$CODEBUILD_SRC_DIR/scripts/tf-directory-setup.py" /usr/local/bin/tf-directory-setup.py + - chmod +x /usr/local/bin/tf-run /usr/local/bin/tf-control.sh /usr/local/bin/tf-directory-setup.py + # Create tf-{action} symlinks expected by tf-run and account repo steps + - > + for action in init plan apply destroy refresh output validate import state fmt taint console; do + ln -sf /usr/local/bin/tf-control.sh /usr/local/bin/tf-${action}; + done + + # --- Python deps for tf-directory-setup.py --- + - pip3 install --quiet jinja2 python-dateutil pyyaml + + # --- gh CLI --- + - aws s3 cp "$GH_CLI_S3" /tmp/gh.tar.gz + - mkdir -p /tmp/gh-cli + - tar -xzf /tmp/gh.tar.gz -C /tmp/gh-cli --strip-components=1 + - cp /tmp/gh-cli/bin/gh /usr/local/bin/gh && chmod +x /usr/local/bin/gh + + build: + commands: + # --- Clone account repo over HTTPS (SSH is blocked by Census proxy) --- + - git clone "https://${GITHUB_TOKEN}@github.e.it.census.gov/${GITHUB_ORG}/${ACCOUNT_REPO}.git" repo + - cd repo + - git checkout -B "${GIT_BRANCH}" + + # --- Write extra config files passed in from Lambda (JSON map path -> content) --- + - | + python3 -c " + import json, os, pathlib + files = json.loads(os.environ.get('EXTRA_FILES', '{}')) + for path, content in files.items(): + p = pathlib.Path(path) + p.parent.mkdir(parents=True, exist_ok=True) + p.write_text(content) + print(f'Wrote {len(files)} extra file(s)') + " + + # --- Commit and push (--allow-empty handles no-change case) --- + - git add -A + - | + git -c user.email="sc-automation@census.gov" \ + -c user.name="SC Automation" \ + commit -m "SC automation: ${LAYER}/${REGION_DIR} [${ACCOUNT_REPO}]" \ + --allow-empty + - git push origin "${GIT_BRANCH}" + + # --- Run Terraform in target layer/region directory --- + # tf-run auto-proceeds on non-TTY stdin (read -t timeout defaults to "y") + - cd "${LAYER}/${REGION_DIR}" + - | + if [ "${DRY_RUN}" = "true" ]; then + tf-plan -no-color + elif [ -n "${TF_RUN_START_TAG}" ]; then + TFARGS="-auto-approve" tf-run apply "tag:${TF_RUN_START_TAG}" + else + TFARGS="-auto-approve" tf-run apply + fi + + # --- Open PR (idempotent: skip if PR already exists) --- + - | + GH_HOST=github.e.it.census.gov \ + GH_TOKEN="${GITHUB_TOKEN}" \ + gh pr create \ + --title "SC automation: ${LAYER}/${REGION_DIR} [${ACCOUNT_REPO}]" \ + --body "Triggered by Service Catalog provisioning of **${ACCOUNT_REPO}**." \ + --base main \ + --head "${GIT_BRANCH}" \ + || echo "PR already exists or create failed, continuing" + + post_build: + commands: + - echo "BUILD_RESULT=${CODEBUILD_BUILD_SUCCEEDING}" + # Emit PR_URL so Lambda can parse it from the build output + - | + PR_URL=$(GH_HOST=github.e.it.census.gov \ + GH_TOKEN="${GITHUB_TOKEN}" \ + gh pr view \ + --repo "${GITHUB_ORG}/${ACCOUNT_REPO}" \ + "${GIT_BRANCH}" \ + --json url -q .url 2>/dev/null || echo "") + echo "PR_URL=${PR_URL}" diff --git a/deploy/codebuild.tf b/deploy/codebuild.tf new file mode 100644 index 0000000..d65d38a --- /dev/null +++ b/deploy/codebuild.tf @@ -0,0 +1,116 @@ +data "aws_partition" "current" {} +data "aws_caller_identity" "current" {} +data "aws_region" "current" {} + +# GHE source credential — one per account per server_type per region. +# If a credential for GITHUB_ENTERPRISE already exists in this account, +# import it: terraform import aws_codebuild_source_credential.ghe +resource "aws_codebuild_source_credential" "ghe" { + auth_type = "PERSONAL_ACCESS_TOKEN" + server_type = "GITHUB_ENTERPRISE" + token = data.aws_secretsmanager_secret_version.ghe_token.secret_string +} + +data "aws_secretsmanager_secret_version" "ghe_token" { + secret_id = "ghe-runner/github-token" +} + +resource "aws_codebuild_project" "tf_run_executor" { + name = "tf-run-executor" + description = "Clones account repo, writes config files, runs tf-run, opens PR" + build_timeout = 60 # minutes + service_role = aws_iam_role.codebuild_exec.arn + + artifacts { + type = "NO_ARTIFACTS" + } + + environment { + compute_type = "BUILD_GENERAL1_SMALL" + image = "aws/codebuild/amazonlinux2-x86_64-standard:3.0" + type = "LINUX_CONTAINER" + privileged_mode = false + + # --- Static defaults (overridden per-build via environmentVariablesOverride) --- + environment_variable { + name = "GITHUB_ORG" + value = var.github_org + } + environment_variable { + name = "TF_BINARY_S3" + value = var.tf_binary_s3 + } + environment_variable { + name = "CENSUS_CA_S3" + value = var.census_ca_s3 + } + environment_variable { + name = "GH_CLI_S3" + value = var.gh_cli_s3 + } + environment_variable { + name = "HTTPS_PROXY" + value = var.https_proxy + } + environment_variable { + name = "NO_PROXY" + value = "github.e.it.census.gov,169.254.169.254" + } + # Placeholder values — always overridden by Lambda per-build + environment_variable { + name = "ACCOUNT_REPO" + value = "OVERRIDE_PER_BUILD" + } + environment_variable { + name = "LAYER" + value = "OVERRIDE_PER_BUILD" + } + environment_variable { + name = "REGION_DIR" + value = "OVERRIDE_PER_BUILD" + } + environment_variable { + name = "GITHUB_TOKEN" + type = "SECRETS_MANAGER" + value = var.github_token_secret_name + } + environment_variable { + name = "GIT_BRANCH" + value = "repo-init" + } + environment_variable { + name = "DRY_RUN" + value = "false" + } + environment_variable { + name = "TF_RUN_START_TAG" + value = "" + } + environment_variable { + name = "EXTRA_FILES" + value = "{}" + } + } + + source { + type = "GITHUB_ENTERPRISE" + location = var.source_repo_url + buildspec = "buildspec.yml" + git_clone_depth = 1 + } + + logs_config { + cloudwatch_logs { + group_name = "/aws/codebuild/tf-run-executor" + stream_name = "" + status = "ENABLED" + } + } + + tags = { + Project = "sc-automation" + ManagedBy = "terraform" + } + + depends_on = [aws_codebuild_source_credential.ghe] +} diff --git a/deploy/iam.tf b/deploy/iam.tf new file mode 100644 index 0000000..0398048 --- /dev/null +++ b/deploy/iam.tf @@ -0,0 +1,166 @@ +# --------------------------------------------------------------------------- +# Lambda execution role +# --------------------------------------------------------------------------- + +data "aws_iam_policy_document" "lambda_trust" { + statement { + sid = "LambdaAssumeRole" + effect = "Allow" + actions = ["sts:AssumeRole"] + principals { + type = "Service" + identifiers = ["lambda.amazonaws.com"] + } + } +} + +resource "aws_iam_role" "lambda_exec" { + name = "tf-run-executor-lambda" + description = "Execution role for the tf-run-executor-trigger Lambda" + assume_role_policy = data.aws_iam_policy_document.lambda_trust.json + + tags = { + Project = "sc-automation" + ManagedBy = "terraform" + } +} + +data "aws_iam_policy_document" "lambda_exec" { + # CloudWatch Logs + statement { + sid = "CloudWatchLogs" + effect = "Allow" + actions = [ + "logs:CreateLogGroup", + "logs:CreateLogStream", + "logs:PutLogEvents", + ] + resources = [ + "arn:${data.aws_partition.current.partition}:logs:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:log-group:/aws/lambda/tf-run-executor-trigger", + "arn:${data.aws_partition.current.partition}:logs:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:log-group:/aws/lambda/tf-run-executor-trigger:*", + ] + } + + # Secrets Manager: read GHE token + statement { + sid = "SecretsManagerReadGheToken" + effect = "Allow" + actions = [ + "secretsmanager:GetSecretValue", + "secretsmanager:DescribeSecret", + ] + resources = [ + "arn:${data.aws_partition.current.partition}:secretsmanager:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:secret:${var.github_token_secret_name}*", + ] + } + + # CodeBuild: start builds and poll status + statement { + sid = "CodeBuildStartAndPoll" + effect = "Allow" + actions = [ + "codebuild:StartBuild", + "codebuild:BatchGetBuilds", + ] + resources = [ + aws_codebuild_project.tf_run_executor.arn, + ] + } +} + +resource "aws_iam_role_policy" "lambda_exec" { + name = "tf-run-executor-lambda" + role = aws_iam_role.lambda_exec.id + policy = data.aws_iam_policy_document.lambda_exec.json +} + +# --------------------------------------------------------------------------- +# CodeBuild service role +# --------------------------------------------------------------------------- + +data "aws_iam_policy_document" "codebuild_trust" { + statement { + sid = "CodeBuildAssumeRole" + effect = "Allow" + actions = ["sts:AssumeRole"] + principals { + type = "Service" + identifiers = ["codebuild.amazonaws.com"] + } + } +} + +resource "aws_iam_role" "codebuild_exec" { + name = "tf-run-executor-codebuild" + description = "Service role for the tf-run-executor CodeBuild project" + assume_role_policy = data.aws_iam_policy_document.codebuild_trust.json + + tags = { + Project = "sc-automation" + ManagedBy = "terraform" + } +} + +data "aws_iam_policy_document" "codebuild_exec" { + # S3: read Terraform binary, Census CA cert, and gh CLI from packer-pipeline assets bucket + statement { + sid = "S3ReadPackerAssets" + effect = "Allow" + actions = ["s3:GetObject"] + resources = [ + "arn:${data.aws_partition.current.partition}:s3:::csvd-packer-pipeline-assets/*", + ] + } + + # Secrets Manager: read the GHE PAT at runtime (GITHUB_TOKEN env var) + # Note: CodeBuild uses PARAMETER_STORE for the token; this covers the SM read + # used during Terraform apply of source credentials (aws_codebuild_source_credential). + statement { + sid = "SecretsManagerReadGheToken" + effect = "Allow" + actions = [ + "secretsmanager:GetSecretValue", + "secretsmanager:DescribeSecret", + ] + resources = [ + "arn:${data.aws_partition.current.partition}:secretsmanager:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:secret:ghe-runner/github-token*", + ] + } + + # CloudWatch Logs: write build output + statement { + sid = "CloudWatchLogsWrite" + effect = "Allow" + actions = [ + "logs:CreateLogGroup", + "logs:CreateLogStream", + "logs:PutLogEvents", + ] + resources = [ + "arn:${data.aws_partition.current.partition}:logs:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:log-group:/aws/codebuild/tf-run-executor", + "arn:${data.aws_partition.current.partition}:logs:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:log-group:/aws/codebuild/tf-run-executor:*", + ] + } + + # CodeBuild report groups (required by the standard AL2 image) + statement { + sid = "CodeBuildReports" + effect = "Allow" + actions = [ + "codebuild:CreateReportGroup", + "codebuild:CreateReport", + "codebuild:UpdateReport", + "codebuild:BatchPutTestCases", + "codebuild:BatchPutCodeCoverages", + ] + resources = [ + "arn:${data.aws_partition.current.partition}:codebuild:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:report-group/tf-run-executor-*", + ] + } +} + +resource "aws_iam_role_policy" "codebuild_exec" { + name = "tf-run-executor-codebuild" + role = aws_iam_role.codebuild_exec.id + policy = data.aws_iam_policy_document.codebuild_exec.json +} diff --git a/deploy/lambda.tf b/deploy/lambda.tf new file mode 100644 index 0000000..602a46d --- /dev/null +++ b/deploy/lambda.tf @@ -0,0 +1,74 @@ +resource "aws_ecr_repository" "lambda" { + name = "tf-run-executor/lambda" + image_tag_mutability = "MUTABLE" + + image_scanning_configuration { + scan_on_push = true + } + + tags = { + Project = "sc-automation" + ManagedBy = "terraform" + } +} + +resource "aws_ecr_lifecycle_policy" "lambda" { + repository = aws_ecr_repository.lambda.name + + policy = jsonencode({ + rules = [{ + rulePriority = 1 + description = "Keep last 10 images" + selection = { + tagStatus = "any" + countType = "imageCountMoreThan" + countNumber = 10 + } + action = { type = "expire" } + }] + }) +} + +resource "aws_lambda_function" "tf_run_trigger" { + function_name = "tf-run-executor-trigger" + description = "CFN Custom Resource handler: validates inputs, starts tf-run-executor CodeBuild, polls, returns PR URL" + + package_type = "Image" + image_uri = "${aws_ecr_repository.lambda.repository_url}:${var.lambda_image_tag}" + + role = aws_iam_role.lambda_exec.arn + timeout = 900 # 15 min — must exceed CodeBuild poll window + + memory_size = 256 + + environment { + variables = { + CODEBUILD_PROJECT_NAME = aws_codebuild_project.tf_run_executor.name + GITHUB_TOKEN_SECRET_NAME = var.github_token_secret_name + GITHUB_API = var.github_api + GITHUB_ORG_NAME = var.github_org + } + } + + tags = { + Project = "sc-automation" + ManagedBy = "terraform" + } + + depends_on = [aws_ecr_repository.lambda] + + lifecycle { + ignore_changes = [image_uri] + } +} + +# Allow CloudFormation (in any account in the org) to invoke this Lambda +resource "aws_lambda_permission" "cfn_invoke" { + statement_id = "AllowOrgCloudFormation" + action = "lambda:InvokeFunction" + function_name = aws_lambda_function.tf_run_trigger.function_name + principal = "cloudformation.amazonaws.com" + + # Restrict to the org so only accounts in this org can invoke cross-account + principal_org_id = var.org_id +} diff --git a/deploy/provider.tf b/deploy/provider.tf new file mode 100644 index 0000000..a55d2c7 --- /dev/null +++ b/deploy/provider.tf @@ -0,0 +1,13 @@ +terraform { + required_version = ">= 1.3" + required_providers { + aws = { + source = "hashicorp/aws" + version = ">= 5.0" + } + } +} + +provider "aws" { + region = "us-gov-west-1" +} diff --git a/deploy/service_catalog.tf b/deploy/service_catalog.tf new file mode 100644 index 0000000..0f55721 --- /dev/null +++ b/deploy/service_catalog.tf @@ -0,0 +1,162 @@ +locals { + product_s3_key = "tf-run-executor/v${var.product_version}/product-template.yaml" + template_url = "https://${var.artifacts_bucket_name}.s3.${data.aws_region.current.name}.amazonaws.com/${local.product_s3_key}" +} + +# --------------------------------------------------------------------------- +# Upload product template to the centrally-managed SC artifacts bucket +# --------------------------------------------------------------------------- +resource "aws_s3_object" "product_template" { + bucket = var.artifacts_bucket_name + key = local.product_s3_key + source = "${path.module}/../service-catalog/product-template.yaml" + etag = filemd5("${path.module}/../service-catalog/product-template.yaml") + + tags = { + "servicecatalog:provisioning" = "true" + Project = "sc-automation" + ManagedBy = "terraform" + } +} + +# --------------------------------------------------------------------------- +# Portfolio +# --------------------------------------------------------------------------- +resource "aws_servicecatalog_portfolio" "this" { + name = "${var.portfolio_name_prefix}-tf-run" + description = "Service Catalog portfolio for SC → CodeBuild tf-run automation" + provider_name = "CSVD Platform Engineering" + + tags = { + Project = "sc-automation" + ManagedBy = "terraform" + } +} + +# --------------------------------------------------------------------------- +# Product +# --------------------------------------------------------------------------- +resource "aws_servicecatalog_product" "tf_run" { + name = "${var.portfolio_name_prefix}-tf-run-executor" + owner = "CSVD Platform Engineering" + description = "Trigger tf-run in an account repo layer via CodeBuild. Writes extra config files, applies Terraform, and opens a PR." + type = "CLOUD_FORMATION_TEMPLATE" + + provisioning_artifact_parameters { + name = "v${var.product_version}" + description = "Version ${var.product_version}" + template_url = local.template_url + type = "CLOUD_FORMATION_TEMPLATE" + disable_template_validation = false + } + + tags = { + Project = "sc-automation" + ManagedBy = "terraform" + } + + depends_on = [aws_s3_object.product_template] +} + +# --------------------------------------------------------------------------- +# Associate product with portfolio +# --------------------------------------------------------------------------- +resource "aws_servicecatalog_product_portfolio_association" "this" { + portfolio_id = aws_servicecatalog_portfolio.this.id + product_id = aws_servicecatalog_product.tf_run.id +} + +# --------------------------------------------------------------------------- +# Portfolio access — IAM principals that can provision this product +# --------------------------------------------------------------------------- +resource "aws_servicecatalog_principal_portfolio_association" "this" { + for_each = toset(var.principal_arns) + + portfolio_id = aws_servicecatalog_portfolio.this.id + principal_arn = each.value + principal_type = "IAM" +} + +# --------------------------------------------------------------------------- +# Launch constraint role — assumed by CFN when launching the product +# --------------------------------------------------------------------------- +resource "aws_iam_role" "sc_launch" { + name = "${var.portfolio_name_prefix}-sc-launch-role" + description = "Role assumed by Service Catalog when launching tf-run-executor product" + + assume_role_policy = jsonencode({ + Version = "2012-10-17" + Statement = [{ + Effect = "Allow" + Principal = { Service = "servicecatalog.amazonaws.com" } + Action = "sts:AssumeRole" + }] + }) + + tags = { + Project = "sc-automation" + ManagedBy = "terraform" + } +} + +resource "aws_iam_role_policy" "sc_launch" { + name = "invoke-lambda-and-cfn" + role = aws_iam_role.sc_launch.id + + policy = jsonencode({ + Version = "2012-10-17" + Statement = [ + { + Sid = "InvokeLambda" + Effect = "Allow" + Action = ["lambda:InvokeFunction"] + Resource = aws_lambda_function.tf_run_trigger.arn + }, + { + Sid = "CloudFormationOperations" + Effect = "Allow" + Action = [ + "cloudformation:CreateStack", + "cloudformation:DeleteStack", + "cloudformation:DescribeStacks", + "cloudformation:DescribeStackEvents", + "cloudformation:GetTemplate", + "cloudformation:GetTemplateSummary", + "cloudformation:ValidateTemplate", + "cloudformation:UpdateStack", + "cloudformation:SetStackPolicy", + ] + Resource = "*" + }, + { + Sid = "S3ReadTemplate" + Effect = "Allow" + Action = ["s3:GetObject"] + Resource = "*" + Condition = { + StringEquals = { + "s3:ExistingObjectTag/servicecatalog:provisioning" = ["true"] + } + } + }, + { + Sid = "S3ListBucket" + Effect = "Allow" + Action = ["s3:ListBucket", "s3:GetBucketLocation"] + Resource = "arn:${data.aws_partition.current.partition}:s3:::${var.artifacts_bucket_name}" + }, + ] + }) +} + +resource "aws_servicecatalog_constraint" "launch" { + portfolio_id = aws_servicecatalog_portfolio.this.id + product_id = aws_servicecatalog_product.tf_run.id + type = "LAUNCH" + + parameters = jsonencode({ + RoleArn = aws_iam_role.sc_launch.arn + }) + + description = "Launch constraint — uses a dedicated role to invoke the Lambda" +} diff --git a/deploy/variables.tf b/deploy/variables.tf new file mode 100644 index 0000000..ea0c197 --- /dev/null +++ b/deploy/variables.tf @@ -0,0 +1,82 @@ +variable "github_org" { + description = "GitHub Enterprise organization that owns the account repos" + type = string + default = "SCT-Engineering" +} + +variable "source_repo_url" { + description = "GHE HTTPS URL for this repo (sc-lambda-ghactions), used as CodeBuild source" + type = string + # e.g. "https://github.e.it.census.gov/SCT-Engineering/sc-lambda-ghactions" +} + +variable "tf_binary_s3" { + description = "S3 URI for the Terraform Linux AMD64 zip (registry.terraform.io is blocked)" + type = string + default = "s3://csvd-packer-pipeline-assets/terraform/terraform_1.9.1_linux_amd64.zip" +} + +variable "census_ca_s3" { + description = "S3 URI for the Census CA certificate PEM file (needed for GHE TLS)" + type = string + default = "s3://csvd-packer-pipeline-assets/certs/census-ca.pem" +} + +variable "gh_cli_s3" { + description = "S3 URI for the gh CLI Linux AMD64 tarball" + type = string + default = "s3://csvd-packer-pipeline-assets/tools/gh_2.49.0_linux_amd64.tar.gz" +} + +variable "https_proxy" { + description = "HTTPS proxy for outbound connections (provider downloads, etc.)" + type = string + default = "http://proxy.tco.census.gov:3128" +} + +variable "github_token_secret_name" { + description = "Secrets Manager secret name for the GHE PAT; reused from the existing EKS automation secret" + type = string + default = "ghe-runner/github-token" +} + +variable "lambda_image_tag" { + description = "ECR image tag to deploy for the Lambda function" + type = string + default = "latest" +} + +variable "github_api" { + description = "GitHub Enterprise API base URL" + type = string + default = "https://github.e.it.census.gov/api/v3" +} + +variable "org_id" { + description = "AWS Organizations ID — used to restrict cross-account Lambda invocation to org members only" + type = string + # e.g. "o-abc123def4" +} + +variable "artifacts_bucket_name" { + description = "S3 bucket name for Service Catalog product template artifacts" + type = string +} + +variable "product_version" { + description = "Version string for the SC product provisioning artifact" + type = string + default = "1.0.0" +} + +variable "portfolio_name_prefix" { + description = "Prefix for the SC portfolio and product names" + type = string + default = "sc-automation" +} + +variable "principal_arns" { + description = "List of IAM principal ARNs to grant portfolio access" + type = list(string) + default = [] +} diff --git a/lambda/Dockerfile b/lambda/Dockerfile new file mode 100644 index 0000000..96d7c79 --- /dev/null +++ b/lambda/Dockerfile @@ -0,0 +1,10 @@ +FROM public.ecr.aws/lambda/python:3.12 + +# Install Python dependencies +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# Copy Lambda handler +COPY app.py ${LAMBDA_TASK_ROOT}/ + +CMD ["app.lambda_handler"] diff --git a/lambda/app.py b/lambda/app.py new file mode 100644 index 0000000..79c674f --- /dev/null +++ b/lambda/app.py @@ -0,0 +1,339 @@ +"""Lambda function: SC → CodeBuild tf-run-executor trigger. + +Handles CloudFormation Custom Resource events (Create/Update/Delete). +Validates inputs via Pydantic, starts the tf-run-executor CodeBuild project +with per-build env-var overrides, polls until completion, and signals +CloudFormation SUCCESS/FAILED with the PR URL. + +Architecture: + CFN Custom Resource → Lambda (tf-run-executor-trigger) + → CodeBuild (tf-run-executor) + → git clone account repo + → write EXTRA_FILES + → tf-run apply [tag:START_TAG] + → gh pr create + Lambda polls build → returns PR URL → cfn-response SUCCESS/FAILED +""" + +import json +import logging +import os +import re +import ssl +import time +import traceback +import urllib.request +from typing import Any, Literal, Optional +from urllib.request import Request, urlopen + +import boto3 +from pydantic import BaseModel, Field, field_validator + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(name)s - %(levelname)s - %(funcName)s:%(lineno)d - %(message)s", +) +logger = logging.getLogger(__name__) + + +# --------------------------------------------------------------------------- +# Input model +# --------------------------------------------------------------------------- + +class TfRunRequest(BaseModel): + """Validated input for a tf-run-executor CodeBuild invocation.""" + + account_repo: str = Field(..., description="Account repo name, e.g. 229685449397-csvd-dev-platform-dev-gov") + layer: Literal["common", "infrastructure", "vpc"] = Field(..., description="Terraform layer") + region_dir: Literal["east", "west"] = Field(..., description="Region directory") + tf_run_start_tag: str = Field(default="", description="tf-run.data TAG label to start from; empty = from beginning") + extra_files: dict = Field(default_factory=dict, description='JSON map {"relative/path": "content"} written before tf-run') + git_branch: str = Field(default="repo-init", description="Branch to commit and open PR from") + dry_run: bool = Field(default=False, description="true = tf plan only, no apply") + + @field_validator("extra_files", mode="before") + @classmethod + def parse_extra_files(cls, v: Any) -> Any: + """Accept a JSON string or a dict for extra_files. + + CFN parameters are always strings, so '{}' or '{"path": "content"}' + must be parsed before Pydantic validates the dict type. + """ + if isinstance(v, str): + try: + return json.loads(v) + except json.JSONDecodeError as exc: + raise ValueError(f"extra_files must be a valid JSON object string; got: {v!r}") from exc + return v + + class Config: + extra = "allow" + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def get_secret(secret_name: str) -> str: + """Retrieve a secret string from AWS Secrets Manager.""" + client = boto3.client("secretsmanager") + response = client.get_secret_value(SecretId=secret_name) + raw = response["SecretString"] + try: + data = json.loads(raw) + if isinstance(data, dict): + for key in ("token", "access_token", "api_token", "github_token"): + if key in data: + return data[key] + return next(iter(data.values())) + return str(data) + except json.JSONDecodeError: + return raw.strip() + + +def send_cfn_response( + event: dict, + context, + status: str, + response_data: dict, + physical_resource_id: Optional[str] = None, + reason: Optional[str] = None, +) -> None: + """PUT a CloudFormation Custom Resource response to the pre-signed S3 URL.""" + response_url = event.get("ResponseURL") + if not response_url: + logger.warning("No ResponseURL in event — skipping CloudFormation response") + return + + body = json.dumps({ + "Status": status, + "Reason": reason or f"See CloudWatch Log Stream: {context.log_stream_name}", + "PhysicalResourceId": physical_resource_id or context.log_stream_name, + "StackId": event.get("StackId"), + "RequestId": event.get("RequestId"), + "LogicalResourceId": event.get("LogicalResourceId"), + "Data": response_data, + }) + + req = Request( + response_url, + data=body.encode(), + headers={"Content-Type": "", "Content-Length": str(len(body))}, + method="PUT", + ) + try: + urlopen(req) + except Exception as exc: + logger.error(f"Failed to send CloudFormation response: {exc}") + + +def start_codebuild_build( + tf_req: TfRunRequest, + github_token: str, + request_id: str, +) -> str: + """Start the tf-run-executor CodeBuild project with per-build env-var overrides. + + Returns the CodeBuild build ID. + """ + project_name = os.environ.get("CODEBUILD_PROJECT_NAME", "tf-run-executor") + region = os.environ.get("AWS_REGION", os.environ.get("AWS_DEFAULT_REGION", "us-gov-west-1")) + cb = boto3.client("codebuild", region_name=region) + + logger.info( + f"[{request_id}] Starting CodeBuild '{project_name}' for " + f"repo={tf_req.account_repo} layer={tf_req.layer}/{tf_req.region_dir}" + ) + + response = cb.start_build( + projectName=project_name, + environmentVariablesOverride=[ + {"name": "ACCOUNT_REPO", "value": tf_req.account_repo, "type": "PLAINTEXT"}, + {"name": "LAYER", "value": tf_req.layer, "type": "PLAINTEXT"}, + {"name": "REGION_DIR", "value": tf_req.region_dir, "type": "PLAINTEXT"}, + {"name": "GIT_BRANCH", "value": tf_req.git_branch, "type": "PLAINTEXT"}, + {"name": "TF_RUN_START_TAG", "value": tf_req.tf_run_start_tag, "type": "PLAINTEXT"}, + {"name": "EXTRA_FILES", "value": json.dumps(tf_req.extra_files), "type": "PLAINTEXT"}, + {"name": "DRY_RUN", "value": str(tf_req.dry_run).lower(), "type": "PLAINTEXT"}, + {"name": "GITHUB_TOKEN", "value": github_token, "type": "PLAINTEXT"}, + ], + ) + build_id = response["build"]["id"] + logger.info(f"[{request_id}] CodeBuild build started: {build_id}") + return build_id + + +def poll_codebuild_build( + build_id: str, + request_id: str, + timeout_minutes: int = 12, +) -> tuple: + """Poll a CodeBuild build until it completes or the Lambda deadline approaches. + + Returns (status, logs_url) where status is one of: + SUCCEEDED, FAILED, FAULT, TIMED_OUT, STOPPED, or LAMBDA_TIMEOUT. + """ + region = os.environ.get("AWS_REGION", os.environ.get("AWS_DEFAULT_REGION", "us-gov-west-1")) + cb = boto3.client("codebuild", region_name=region) + deadline = time.time() + timeout_minutes * 60 + + while time.time() < deadline: + build = cb.batch_get_builds(ids=[build_id])["builds"][0] + status = build["buildStatus"] + logs_url = build.get("logs", {}).get("deepLink", "N/A") + logger.info( + f"[{request_id}] CodeBuild status: {status} " + f"(phase: {build.get('currentPhase', '?')})" + ) + if status != "IN_PROGRESS": + logger.info(f"[{request_id}] Build complete: {status}. Logs: {logs_url}") + return status, logs_url + time.sleep(20) + + logger.warning( + f"[{request_id}] CodeBuild poll timed out after {timeout_minutes} minutes" + ) + return "LAMBDA_TIMEOUT", "" + + +def fetch_pr_url( + github_token: str, + account_repo: str, + git_branch: str, + request_id: str, +) -> str: + """Fetch the open PR URL from the GitHub API for the given branch.""" + github_api = os.environ.get("GITHUB_API", "https://github.e.it.census.gov/api/v3") + github_org = os.environ.get("GITHUB_ORG_NAME", "SCT-Engineering") + api_base = github_api.rstrip("/") + + prs_url = f"{api_base}/repos/{github_org}/{account_repo}/pulls?state=open&head={github_org}:{git_branch}" + req = urllib.request.Request( + prs_url, + headers={"Authorization": f"token {github_token}"}, + ) + ctx = ssl.create_default_context() + ctx.check_hostname = False + ctx.verify_mode = ssl.CERT_NONE + + try: + with urllib.request.urlopen(req, context=ctx, timeout=10) as resp: + prs = json.loads(resp.read()) + if prs: + pr_url = prs[0].get("html_url", "N/A") + logger.info(f"[{request_id}] PR URL: {pr_url}") + return pr_url + else: + logger.warning(f"[{request_id}] No open PRs found for branch {git_branch} on {account_repo}") + return "N/A" + except Exception as exc: + logger.warning(f"[{request_id}] Could not fetch PR URL: {exc}") + return "N/A" + + +# --------------------------------------------------------------------------- +# Parameter normalization +# --------------------------------------------------------------------------- + +_KEY_ALIASES: dict = {} + + +def _normalize_params(resource_properties: dict) -> dict: + """Convert CloudFormation PascalCase property names to snake_case. + + Keys already in snake_case (contain '_' or are fully lower-case) are kept + as-is to avoid double-conversion. + """ + normalized: dict = {} + for key, value in resource_properties.items(): + if key == "ServiceToken": + continue + if "_" in key or key.islower(): + snake = key + else: + s1 = re.sub(r"([A-Z]+)([A-Z][a-z])", r"\1_\2", key) + snake = re.sub(r"([a-z0-9])([A-Z])", r"\1_\2", s1).lower() + normalized[_KEY_ALIASES.get(snake, snake)] = value + return normalized + + +# --------------------------------------------------------------------------- +# Lambda entry point +# --------------------------------------------------------------------------- + +def lambda_handler(event: dict, context) -> dict: + """Process CloudFormation Custom Resource events to trigger tf-run in account repos.""" + request_id = getattr(context, "aws_request_id", "unknown") + logger.info(f"[{request_id}] Event: {json.dumps(event, default=str)}") + + request_type = event.get("RequestType", "Unknown") + + # Delete: no action — tf-run runs are not reversible automatically + if request_type == "Delete": + logger.info(f"[{request_id}] Delete request — no action taken") + send_cfn_response( + event, context, "SUCCESS", + {"Message": "Delete acknowledged — no tf-run action taken"}, + physical_resource_id=event.get("PhysicalResourceId", "none"), + ) + return {"statusCode": 200, "body": json.dumps({"message": "Delete acknowledged"})} + + try: + if "ResourceProperties" not in event: + raise ValueError("Event missing 'ResourceProperties' — not a valid CFN Custom Resource event") + + normalized = _normalize_params(event["ResourceProperties"]) + logger.info(f"[{request_id}] Normalized params: {json.dumps(normalized, default=str)}") + + tf_req = TfRunRequest(**normalized) + logger.info( + f"[{request_id}] repo={tf_req.account_repo} " + f"layer={tf_req.layer}/{tf_req.region_dir} " + f"branch={tf_req.git_branch} dry_run={tf_req.dry_run}" + ) + + github_token_secret = os.environ["GITHUB_TOKEN_SECRET_NAME"] + logger.info(f"[{request_id}] Fetching GitHub token from secret: {github_token_secret}") + github_token = get_secret(github_token_secret) + + build_id = start_codebuild_build(tf_req, github_token, request_id) + + # Poll — leave 60s buffer before Lambda timeout for cfn-response PUT + lambda_timeout_s = context.get_remaining_time_in_millis() / 1000 + poll_budget_min = max(1, int((lambda_timeout_s - 60) / 60)) + build_status, logs_url = poll_codebuild_build(build_id, request_id, poll_budget_min) + + if build_status == "SUCCEEDED": + pr_url = fetch_pr_url(github_token, tf_req.account_repo, tf_req.git_branch, request_id) + github_base = os.environ.get("GITHUB_API", "https://github.e.it.census.gov/api/v3").rstrip("/").removesuffix("/api/v3") + github_org = os.environ.get("GITHUB_ORG_NAME", "SCT-Engineering") + repo_url = f"{github_base}/{github_org}/{tf_req.account_repo}" + + response_data = { + "PullRequestUrl": pr_url, + "pull_request_url": pr_url, + "RepositoryUrl": repo_url, + "repository_url": repo_url, + "BranchName": tf_req.git_branch, + "branch_name": tf_req.git_branch, + "CodeBuildBuildId": build_id, + } + send_cfn_response( + event, context, "SUCCESS", response_data, + physical_resource_id=f"{tf_req.account_repo}-{tf_req.layer}-{tf_req.region_dir}", + ) + return {"statusCode": 200, "body": json.dumps(response_data)} + + else: + reason = ( + f"CodeBuild build {build_status}. " + f"Build ID: {build_id}. Logs: {logs_url}" + ) + logger.error(f"[{request_id}] {reason}") + send_cfn_response(event, context, "FAILED", {}, reason=reason) + return {"statusCode": 500, "body": json.dumps({"error": reason})} + + except Exception as exc: + logger.error(f"[{request_id}] Error: {exc}\n{traceback.format_exc()}") + send_cfn_response(event, context, "FAILED", {}, reason=f"Failed: {exc}") + return {"statusCode": 500, "body": json.dumps({"error": str(exc)})} diff --git a/lambda/requirements.txt b/lambda/requirements.txt new file mode 100644 index 0000000..93bd6c3 --- /dev/null +++ b/lambda/requirements.txt @@ -0,0 +1,2 @@ +boto3>=1.34 +pydantic>=2.0 diff --git a/service-catalog/product-template.yaml b/service-catalog/product-template.yaml new file mode 100644 index 0000000..a137a5f --- /dev/null +++ b/service-catalog/product-template.yaml @@ -0,0 +1,138 @@ +AWSTemplateFormatVersion: '2010-09-09' +Description: 'Service Catalog Product: Run tf-run in an account repo layer via CodeBuild' + +Metadata: + AWS::CloudFormation::Interface: + ParameterGroups: + - Label: + default: "Target Repository" + Parameters: + - AccountRepo + - Layer + - RegionDir + - Label: + default: "Execution Options" + Parameters: + - GitBranch + - TfRunStartTag + - DryRun + - Label: + default: "Extra Files (optional)" + Parameters: + - ExtraFiles + + ParameterLabels: + AccountRepo: + default: "Account Repo Name" + Layer: + default: "Terraform Layer" + RegionDir: + default: "Region Directory" + GitBranch: + default: "Git Branch" + TfRunStartTag: + default: "tf-run Start Tag (optional)" + DryRun: + default: "Dry Run (plan only)" + ExtraFiles: + default: "Extra Config Files (JSON)" + +Parameters: + AccountRepo: + Type: String + Description: >- + Account repo name, e.g. 229685449397-csvd-dev-platform-dev-gov. + Must already exist in the SCT-Engineering GitHub org. + AllowedPattern: '^[a-z0-9][a-z0-9-]*[a-z0-9]$' + ConstraintDescription: Lowercase letters, numbers, and hyphens only + MinLength: 3 + MaxLength: 100 + + Layer: + Type: String + Description: Terraform layer to run tf-run in + AllowedValues: + - common + - infrastructure + - vpc + + RegionDir: + Type: String + Description: Region directory within the layer + AllowedValues: + - east + - west + + GitBranch: + Type: String + Description: Branch to commit extra files to and open the PR from + Default: repo-init + MinLength: 1 + MaxLength: 100 + + TfRunStartTag: + Type: String + Description: >- + tf-run.data TAG label to start execution from. + Leave blank to run all steps from the beginning. + Default: "" + MaxLength: 100 + + DryRun: + Type: String + Description: >- + Set to 'true' to run tf plan only (no apply, no PR). + Useful for validating configuration before committing. + AllowedValues: + - "true" + - "false" + Default: "false" + + ExtraFiles: + Type: String + Description: >- + JSON object mapping relative repo paths to file contents. + These files are written into the account repo before tf-run executes. + Example: {"vpc/west/my-config.tf": "# placeholder"} + Default: "{}" + +Resources: + TerraformRunResource: + Type: Custom::TerraformRun + Properties: + ServiceToken: !Sub "arn:${AWS::Partition}:lambda:${AWS::Region}:${AWS::AccountId}:function:tf-run-executor-trigger" + # Property names are snake_case to match Pydantic model field names directly. + # The Lambda normalizer handles PascalCase->snake_case but passing snake_case + # avoids any ambiguity with acronyms. + account_repo: !Ref AccountRepo + layer: !Ref Layer + region_dir: !Ref RegionDir + git_branch: !Ref GitBranch + tf_run_start_tag: !Ref TfRunStartTag + dry_run: !Ref DryRun + extra_files: !Ref ExtraFiles + +Outputs: + PullRequestUrl: + Description: URL of the pull request opened by tf-run (empty for dry runs) + Value: !GetAtt TerraformRunResource.pull_request_url + Export: + Name: !Sub '${AWS::StackName}-PullRequestUrl' + + RepositoryUrl: + Description: URL of the account repository + Value: !GetAtt TerraformRunResource.repository_url + Export: + Name: !Sub '${AWS::StackName}-RepositoryUrl' + + BranchName: + Description: Branch that was committed to + Value: !GetAtt TerraformRunResource.branch_name + Export: + Name: !Sub '${AWS::StackName}-BranchName' + + CodeBuildBuildId: + Description: ID of the CodeBuild build that ran tf-run + Value: !GetAtt TerraformRunResource.CodeBuildBuildId + Export: + Name: !Sub '${AWS::StackName}-CodeBuildBuildId'