diff --git a/service-catalog/product-template.yaml b/service-catalog/product-template.yaml index 5b74f3d..6d0b374 100644 --- a/service-catalog/product-template.yaml +++ b/service-catalog/product-template.yaml @@ -1,36 +1,66 @@ AWSTemplateFormatVersion: '2010-09-09' -Description: 'Service Catalog Product: Create GitHub Repository from Template' +Description: 'Service Catalog Product: Create EKS Cluster GitHub Repository from Template' Metadata: AWS::CloudFormation::Interface: ParameterGroups: - Label: - default: "Repository Configuration" + default: "Cluster Configuration" Parameters: - ProjectName - - OwningTeam - - Label: - default: "Environment Settings" - Parameters: + - ClusterName - Environment - AwsRegion - Label: - default: "Optional Metadata" + default: "Account Configuration" Parameters: + - AccountName + - AWSAccountId + - EnvironmentAbbr + - Label: + default: "VPC Configuration" + Parameters: + - VpcName + - VpcDomainName + - Label: + default: "Contact & Organization" + Parameters: + - OwningTeam + - ClusterMailingList - OrganizationPath + - Label: + default: "FinOps" + Parameters: - FinOpsProjectName - FinOpsProjectNumber + - Label: + default: "Optional Metadata" + Parameters: - AdditionalTags ParameterLabels: ProjectName: default: "Repository Name" + ClusterName: + default: "EKS Cluster Name" OwningTeam: default: "Owning Team" Environment: default: "Environment" AwsRegion: default: "AWS Region" + AccountName: + default: "AWS Account Name" + AWSAccountId: + default: "AWS Account ID" + EnvironmentAbbr: + default: "Environment Abbreviation" + VpcName: + default: "VPC Name" + VpcDomainName: + default: "VPC Domain Name" + ClusterMailingList: + default: "Cluster Mailing List" OrganizationPath: default: "Organization Path" FinOpsProjectName: @@ -43,12 +73,21 @@ Metadata: Parameters: ProjectName: Type: String - Description: Name of the GitHub repository to create (lowercase, hyphens only) + Description: >- + Name of the GitHub repository to create (lowercase, hyphens only). + If ClusterName is left blank this also becomes the EKS cluster name. AllowedPattern: '^[a-z0-9][a-z0-9-]*[a-z0-9]$' ConstraintDescription: Must be lowercase letters, numbers, and hyphens only MinLength: 3 MaxLength: 100 + ClusterName: + Type: String + Description: >- + EKS cluster name. Defaults to ProjectName if left blank. + Default: "" + MaxLength: 63 + OwningTeam: Type: String Description: GitHub team that should have admin access to the repository @@ -57,17 +96,16 @@ Parameters: Environment: Type: String - Description: Environment for this repository/project - Default: development + Description: Deployment environment + Default: dev AllowedValues: - - development - - staging - - production - - sandbox + - dev + - test + - prod AwsRegion: Type: String - Description: Primary AWS region for this project + Description: Primary AWS region for this EKS cluster Default: us-gov-west-1 AllowedValues: - us-gov-west-1 @@ -75,11 +113,46 @@ Parameters: - us-east-1 - us-west-2 - OrganizationPath: + AccountName: + Type: String + Description: "AWS account name (e.g., csvd-dev-ew)" + AllowedPattern: '^[a-z0-9-]+$' + ConstraintDescription: Must contain only lowercase letters, numbers, and hyphens + + AWSAccountId: + Type: String + Description: "AWS Account ID (12 digits)" + AllowedPattern: '^\d{12}$' + ConstraintDescription: Must be a valid 12-digit AWS Account ID + + EnvironmentAbbr: + Type: String + Description: "Environment abbreviation (e.g., dev, prod)" + AllowedPattern: '^[a-z]+$' + MaxLength: 10 + + VpcName: + Type: String + Description: "Name of the VPC for the cluster" + AllowedPattern: '^[a-z0-9-]+$' + ConstraintDescription: Must contain only lowercase letters, numbers, and hyphens + + VpcDomainName: + Type: String + Description: "VPC domain name (e.g., dev.inf.csp1.census.gov)" + AllowedPattern: '^[a-z0-9.-]+$' + ConstraintDescription: Must be a valid domain name + + ClusterMailingList: Type: String - Description: Organization path (e.g., dept:team:subteam) + Description: Email address for cluster notifications Default: "" + OrganizationPath: + Type: String + Description: "Organization path (e.g., census:ocio:csvd)" + Default: "census:ocio:csvd" + FinOpsProjectName: Type: String Description: FinOps project name for cost allocation @@ -92,7 +165,7 @@ Parameters: AdditionalTags: Type: String - Description: Additional tags as JSON object (e.g., {"key1":"value1","key2":"value2"}) + Description: 'Additional tags as JSON object (e.g., {"key1":"value1"})' Default: "{}" # Hidden parameter - the Lambda ARN is passed in from the Service Catalog product definition @@ -101,17 +174,38 @@ Parameters: Description: ARN of the Lambda function that creates repositories Default: "arn:aws-us-gov:lambda:us-gov-west-1:229685449397:function:service-catalog-repo-gen-template-automation" +Conditions: + ClusterNameProvided: !Not + - !Equals + - !Ref ClusterName + - "" + Resources: # Custom Resource that invokes the Lambda function + # NOTE: Property names use snake_case to match Pydantic model field names. + # The Lambda normalizer converts PascalCase→snake_case but mishandles + # acronyms (e.g. AWSAccountId → a_w_s_account_id), so we pass snake_case + # directly to avoid ambiguity. RepositoryCreator: Type: Custom::GitHubRepository Properties: ServiceToken: !Ref LambdaFunctionArn - # These properties are passed to the Lambda function + # Core repo parameters project_name: !Ref ProjectName owning_team: !Ref OwningTeam + # EKS-specific parameters – these trigger the EKS rendering path in the Lambda + cluster_name: !If + - ClusterNameProvided + - !Ref ClusterName + - !Ref ProjectName environment: !Ref Environment aws_region: !Ref AwsRegion + account_name: !Ref AccountName + aws_account_id: !Ref AWSAccountId + environment_abbr: !Ref EnvironmentAbbr + vpc_name: !Ref VpcName + vpc_domain_name: !Ref VpcDomainName + cluster_mailing_list: !Ref ClusterMailingList organization_path: !Ref OrganizationPath finops_project_name: !Ref FinOpsProjectName finops_project_number: !Ref FinOpsProjectNumber @@ -142,6 +236,15 @@ Outputs: Export: Name: !Sub '${AWS::StackName}-ConfigBranch' + ClusterName: + Description: EKS cluster name + Value: !If + - ClusterNameProvided + - !Ref ClusterName + - !Ref ProjectName + Message: Description: Status message - Value: !Sub 'Successfully created repository ${ProjectName} and opened configuration pull request. Review and merge the PR to complete setup.' + Value: !Sub >- + Successfully created repository ${ProjectName} with EKS cluster + configuration. Review and merge the PR to complete setup. diff --git a/template_automation/app.py b/template_automation/app.py index b5ee214..78d9702 100644 --- a/template_automation/app.py +++ b/template_automation/app.py @@ -21,6 +21,7 @@ from .repository_provider import MergeRequestSettings, FileContent, RepositorySettings from .github_provider import GitHubProvider from .gitlab_provider import GitLabProvider +from .eks_config import EKSDeploymentConfig, ClusterConfig, render_eks_config # Set up enhanced logging with more detailed format logging.basicConfig( @@ -42,10 +43,60 @@ class CloudFormationResourceInput(BaseModel): """Input validation model for CloudFormation Custom Resource parameters.""" project_name: str = Field(..., description="Name for the new repository") owning_team: Optional[str] = Field(default="tf-module-admins", description="Team that should own the repository") + + # EKS-specific fields (present when this is an EKS cluster deployment) + cluster_name: Optional[str] = Field(default=None, description="EKS cluster name") + environment: Optional[str] = Field(default=None, description="Environment (dev/test/prod)") + aws_region: Optional[str] = Field(default=None, description="AWS region") + account_name: Optional[str] = Field(default=None, description="AWS account name") + aws_account_id: Optional[str] = Field(default=None, description="12-digit AWS account ID") + environment_abbr: Optional[str] = Field(default=None, description="Environment abbreviation") + vpc_name: Optional[str] = Field(default=None, description="VPC name") + vpc_domain_name: Optional[str] = Field(default=None, description="VPC domain name") + cluster_mailing_list: Optional[str] = Field(default=None, description="Cluster contact email") + organization_path: Optional[str] = Field(default=None, description="Org path") + finops_project_name: Optional[str] = Field(default=None, description="FinOps project name") + finops_project_number: Optional[str] = Field(default=None, description="FinOps project number") + + class Config: + extra = "allow" + + @property + def is_eks_deployment(self) -> bool: + """Return True when the incoming parameters contain the required EKS fields.""" + return bool( + self.cluster_name + and self.account_name + and self.aws_account_id + and self.vpc_name + and self.vpc_domain_name + ) + + def to_eks_deployment_config(self) -> "EKSDeploymentConfig": + """Build a fully-hydrated ``EKSDeploymentConfig`` from the CFN params. + + Fields that are not supplied fall back to the defaults defined in + ``eks_config.py`` (which mirror ``terraform-eks-deployment/variables.tf``). + """ + return EKSDeploymentConfig( + name=self.project_name, + environment=self.environment or "dev", + region=self.aws_region or os.environ.get("AWS_REGION", "us-gov-west-1"), + cluster_config=ClusterConfig( + cluster_name=self.cluster_name or self.project_name, + account_name=self.account_name or "", + aws_account_id=self.aws_account_id or "", + environment_abbr=self.environment_abbr or (self.environment or "dev"), + vpc_name=self.vpc_name or "", + vpc_domain_name=self.vpc_domain_name or "", + cluster_mailing_list=self.cluster_mailing_list or "", + organization=self.organization_path or "census:ocio:csvd", + finops_project_name=self.finops_project_name or "", + finops_project_number=self.finops_project_number or "", + ), + ) # Allow any additional parameters from CloudFormation - model_config = {"extra": "allow"} # Pydantic v2 syntax for allowing extra fields - def to_template_settings(self) -> Dict[str, Any]: """Convert CloudFormation parameters to template settings format.""" # Extract all fields except the known top-level ones @@ -55,8 +106,8 @@ def to_template_settings(self) -> Dict[str, Any]: attrs = {} tags = {} - # Get all model fields including extra ones (Pydantic v2) - all_fields = self.model_dump() + # Get all model fields including extra ones (Pydantic v1) + all_fields = self.dict() for field_name, field_value in all_fields.items(): if field_name not in exclude_fields: @@ -481,10 +532,26 @@ def lambda_handler(event: dict, context) -> dict: logger.info(f"[{request_id}] Resource properties: {json.dumps(resource_params, default=str)}") # Normalize parameter names (CloudFormation uses PascalCase, we need snake_case) + # Uses regex to correctly handle consecutive uppercase (e.g. AWSAccountId → aws_account_id) + import re + # Canonical key aliases – the regex normalizer may split compound words + # differently from what the Pydantic model expects (e.g. FinOps → fin_ops + # instead of finops). Map to the canonical Pydantic field names. + _ALIASES: Dict[str, str] = { + "fin_ops_project_name": "finops_project_name", + "fin_ops_project_number": "finops_project_number", + } normalized_params = {} for key, value in resource_params.items(): - # Convert PascalCase to snake_case - snake_key = ''.join(['_' + c.lower() if c.isupper() else c for c in key]).lstrip('_') + # If the key is already snake_case, keep it as-is + if '_' in key or key.islower(): + snake_key = key + else: + # Insert _ before transitions: UC→LC and LC/digit→UC + s1 = re.sub(r'([A-Z]+)([A-Z][a-z])', r'\1_\2', key) + snake_key = re.sub(r'([a-z0-9])([A-Z])', r'\1_\2', s1).lower() + # Apply alias remapping + snake_key = _ALIASES.get(snake_key, snake_key) normalized_params[snake_key] = value logger.info(f"[{request_id}] Normalized parameters: {json.dumps(normalized_params, default=str)}") @@ -615,17 +682,42 @@ def lambda_handler(event: dict, context) -> dict: logger.info(f"[{request_id}] Target branch: {config_branch}") try: - file_result = provider.write_file( - cfn_input.project_name, - file=FileContent( - path=config_file, - content=config_content - ), - branch=config_branch, - message="Add repository configuration from CloudFormation" - ) - logger.info(f"[{request_id}] Configuration file written successfully") - logger.info(f"[{request_id}] Write file result: {json.dumps(file_result, default=str)}") + if cfn_input.is_eks_deployment: + # ── EKS deployment: render full Terragrunt file hierarchy ── + logger.info(f"[{request_id}] EKS deployment detected – rendering Terragrunt config files") + eks_cfg = cfn_input.to_eks_deployment_config() + rendered_files = render_eks_config(eks_cfg) + logger.info(f"[{request_id}] Rendered {len(rendered_files)} files: {[f.path for f in rendered_files]}") + + # Also include the legacy config.json for backwards compatibility + rendered_files_as_fc = [ + FileContent(path=rf.path, content=rf.content) + for rf in rendered_files + ] + rendered_files_as_fc.append( + FileContent(path=config_file, content=config_content) + ) + + file_result = provider.write_files_atomic( + cfn_input.project_name, + files=rendered_files_as_fc, + branch=config_branch, + message=f"Initialize EKS cluster config for {cfn_input.cluster_name or cfn_input.project_name}", + ) + logger.info(f"[{request_id}] Atomic write result: {json.dumps(file_result, default=str)}") + else: + # ── Generic deployment: write single config.json ── + file_result = provider.write_file( + cfn_input.project_name, + file=FileContent( + path=config_file, + content=config_content + ), + branch=config_branch, + message="Add repository configuration from CloudFormation" + ) + logger.info(f"[{request_id}] Configuration file written successfully") + logger.info(f"[{request_id}] Write file result: {json.dumps(file_result, default=str)}") except Exception as e: logger.error(f"[{request_id}] Failed to write configuration file: {str(e)}") logger.error(f"[{request_id}] Exception type: {type(e).__name__}") diff --git a/template_automation/eks_config.py b/template_automation/eks_config.py new file mode 100644 index 0000000..36926b3 --- /dev/null +++ b/template_automation/eks_config.py @@ -0,0 +1,416 @@ +"""EKS cluster configuration renderer. + +This module renders Terragrunt/HCL configuration files for EKS cluster +deployments, mirroring the terraform-eks-deployment module's template +rendering pipeline. It is invoked by the Lambda handler to produce the +set of files that will be committed to the new repository. + +The data model intentionally duplicates the Terraform variable structure +from terraform-eks-deployment so that values can flow unchanged from a +Service Catalog CloudFormation Custom Resource → Lambda → rendered files. +""" + +import json +import logging +import os +from typing import Any, Dict, List, Optional + +from jinja2 import Environment, FileSystemLoader +from pydantic import BaseModel, Field + +logger = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# Pydantic models – mirrors terraform-eks-deployment/variables.tf +# --------------------------------------------------------------------------- + +class VersionsCertManager(BaseModel): + version: str = "1.17.1" + chart_version: str = "1.17.1" + cluster_issuer_name: str = "cert-manager" + +class VersionsGoGatekeeper(BaseModel): + tag: str = "3.2.1" + chart_version: str = "0.1.53" + +class VersionsGrafana(BaseModel): + hostname: str = "grafana" + operator_chart_version: str = "4.9.8" + operator_tag: str = "5.16.0" + tag: str = "11.5.2" + os_shell_image_tag: str = "12" + +class VersionsIstio(BaseModel): + version: str = "1.25.0" + namespace: str = "istio-system" + +class VersionsK8sDashboard(BaseModel): + hostname: str = "dashboard" + metrics_scraper: str = "1.0.8" + version: str = "6.0.6" + +class VersionsKarpenter(BaseModel): + helm_chart: str = "1.3.1" + tag: str = "1.3.1" + +class VersionsKeycloak(BaseModel): + chart_version: str = "24.4.11" + tag: str = "26.1.3" + hostname: str = "keycloak" + database: str = "keycloak" + username: str = "keycloak" + password: str = "this is my very secure and totally random password horse battery staple now" + postgresql_tag: str = "17.4.0-debian-12-r2" + +class VersionsKiali(BaseModel): + operator_version: str = "2.2.0" + +class VersionsLoki(BaseModel): + chart_version: str = "6.27.0" + tag: str = "3.4.2" + enterprise_logs_provisioner_tag: str = "v1.7.0" + gateway_tag: str = "1.27-alpine" + memcached_tag: str = "1.6.37" + exporter_tag: str = "v0.15.0" + sidecar_tag: str = "1.27.4" + +class VersionsMetricsServer(BaseModel): + helm_chart: str = "3.12.2" + tag: str = "0.7.2" + +class VersionsPrometheus(BaseModel): + chart_version: str = "27.5.1" + server_tag: str = "v3.2.1" + config_reloader_tag: str = "v0.75.2" + alertmanager_tag: str = "v0.28.0" + kube_state_metrics_tag: str = "v2.15.0" + node_exporter_tag: str = "v1.9.0" + pushgateway_tag: str = "v1.11.0" + +class VersionsTempo(BaseModel): + chart_version: str = "1.18.2" + tag: str = "2.7.1" + +class Versions(BaseModel): + """All version pins – mirrors terraform-eks-deployment ``versions`` variable.""" + cluster_version: str = "1.31" + eks_module_version: str = "20.33.1" + release_version: str = "main" + aws_version: str = "5.84.0" + helm_version: str = "2.11.0" + kubernetes_version: str = "2.33.0" + null_version: str = "3.2.1" + random_version: str = "3.5.1" + template_version: str = "2.2.0" + tf_version: str = "1.5.5" + cert_manager: VersionsCertManager = Field(default_factory=VersionsCertManager) + gogatekeeper: VersionsGoGatekeeper = Field(default_factory=VersionsGoGatekeeper) + grafana: VersionsGrafana = Field(default_factory=VersionsGrafana) + istio: VersionsIstio = Field(default_factory=VersionsIstio) + k8s_dashboard: VersionsK8sDashboard = Field(default_factory=VersionsK8sDashboard) + karpenter: VersionsKarpenter = Field(default_factory=VersionsKarpenter) + keycloak: VersionsKeycloak = Field(default_factory=VersionsKeycloak) + kiali: VersionsKiali = Field(default_factory=VersionsKiali) + loki: VersionsLoki = Field(default_factory=VersionsLoki) + metrics_server: VersionsMetricsServer = Field(default_factory=VersionsMetricsServer) + prometheus: VersionsPrometheus = Field(default_factory=VersionsPrometheus) + tempo: VersionsTempo = Field(default_factory=VersionsTempo) + + +class Namespaces(BaseModel): + operator_namespace: str = "aoperator" + telemetry_namespace: str = "atelemetry" + custom_namespaces: Dict[str, str] = Field(default_factory=lambda: { + "cert-manager": "kube-system", + "karpenter": "karpenter", + "metrics-server": "kube-system", + "postgresql": "kube-system", + "keycloak": "keycloak", + "gogatekeeper": "kube-system", + "istio": "istio-system", + "kiali": "istio-system", + }) + + +class CommonVariables(BaseModel): + organization: str = "census:ocio:csvd" + project_name: str = "csvd_platformbaseline" + project_number: str = "fs0000000078" + project_role: str = "csvd_platformbaseline_app" + state_bucket_prefix: str = "inf-tfstate" + state_table_name: str = "tf_remote_state" + route53_endpoints: Dict[str, Any] = Field(default_factory=dict) + + +class ClusterConfig(BaseModel): + """Core cluster parameters – the required values the user *must* provide.""" + cluster_name: str + account_name: str + aws_account_id: str + environment_abbr: str + vpc_name: str + vpc_domain_name: str + cluster_mailing_list: str = "" + eks_instance_disk_size: int = 200 + eks_ng_desired_size: int = 3 + eks_ng_max_size: int = 10 + eks_ng_min_size: int = 3 + organization: str = "census:ocio:csvd" + finops_project_name: str = "" + finops_project_number: str = "" + finops_project_role: str = "" + tags: Dict[str, str] = Field(default_factory=dict) + module_enablement_overrides: Dict[str, bool] = Field(default_factory=dict) + + +class EKSDeploymentConfig(BaseModel): + """Top-level config that drives the entire rendering pipeline. + + Maps 1:1 to terraform-eks-deployment/variables.tf so that a single JSON + blob from Service Catalog can hydrate the full model. + """ + # -- required -- + name: str = Field(..., description="Repository / deployment name") + environment: str = Field(..., description="dev | test | prod") + region: str = Field(default="us-gov-west-1") + cluster_config: ClusterConfig + + # -- optional, with sane defaults -- + versions: Versions = Field(default_factory=Versions) + namespaces: Namespaces = Field(default_factory=Namespaces) + common_variables: CommonVariables = Field(default_factory=CommonVariables) + + +# --------------------------------------------------------------------------- +# Renderer +# --------------------------------------------------------------------------- + +class RenderedFile(BaseModel): + """A single file to be committed to the new repository.""" + path: str + content: str + + +def _build_default_versions_context(cfg: EKSDeploymentConfig) -> Dict[str, Any]: + """Flatten the ``Versions`` + ``Namespaces`` objects into the flat dict + expected by ``default-versions.hcl.j2`` (matching locals.tf logic).""" + v = cfg.versions + ns = cfg.namespaces + + # Base namespaces + base_ns = { + "cert-manager": "kube-system", + "karpenter": "karpenter", + "metrics-server": "kube-system", + "postgresql": "kube-system", + "keycloak": "keycloak", + "gogatekeeper": "kube-system", + "istio": "istio-system", + "kiali": "istio-system", + } + telemetry_ns = { + "grafana": ns.telemetry_namespace, + "k8s-dashboard": ns.telemetry_namespace, + "loki": ns.telemetry_namespace, + "otel": ns.telemetry_namespace, + "prometheus": ns.telemetry_namespace, + "tempo": ns.telemetry_namespace, + } + all_namespaces = {**base_ns, **telemetry_ns, **ns.custom_namespaces} + + return { + # Module versions + "cluster_version": v.cluster_version, + "custom_service_eks_account": v.release_version, + "eks_module_version": v.eks_module_version, + "istio_ingress_version": v.release_version, + "release_version": v.release_version, + # Provider versions + "aws_version": v.aws_version, + "helm_version": v.helm_version, + "kubernetes_version": v.kubernetes_version, + "null_version": v.null_version, + "random_version": v.random_version, + "template_version": v.template_version, + "tf_version": v.tf_version, + # Cert-Manager + "cert_manager_version": v.cert_manager.version, + "cert_manager_helm_chart": v.cert_manager.chart_version, + "cluster_issuer_name": v.cert_manager.cluster_issuer_name, + # GoGatekeeper + "gogatekeeper_tag": v.gogatekeeper.tag, + "gogatekeeper_chart_version": v.gogatekeeper.chart_version, + # Grafana + "grafana_hostname": v.grafana.hostname, + "grafana_operator_chart_version": v.grafana.operator_chart_version, + "grafana_operator_tag": v.grafana.operator_tag, + "grafana_tag": v.grafana.tag, + "os_shell_image_tag": v.grafana.os_shell_image_tag, + # Istio + "istio_namespace": v.istio.namespace, + "istio_version": v.istio.version, + # k8s-dashboard + "dashboard_hostname": v.k8s_dashboard.hostname, + "k8s_dashboard_metrics_scraper": v.k8s_dashboard.metrics_scraper, + "k8s_dashboard_version": v.k8s_dashboard.version, + # Karpenter + "karpenter_helm_chart": v.karpenter.helm_chart, + "karpenter_tag": v.karpenter.tag, + # Keycloak + "keycloak_chart_version": v.keycloak.chart_version, + "keycloak_tag": v.keycloak.tag, + "keycloak_hostname": v.keycloak.hostname, + "keycloak_database": v.keycloak.database, + "keycloak_username": v.keycloak.username, + "keycloak_password": v.keycloak.password, + "postgresql_tag": v.keycloak.postgresql_tag, + # Kiali + "kiali_operator_version": v.kiali.operator_version, + "kiali_application_version": f"v{v.kiali.operator_version}", + # Loki + "loki_chart_version": v.loki.chart_version, + "loki_tag": v.loki.tag, + "enterprise_logs_provisioner_tag": v.loki.enterprise_logs_provisioner_tag, + "gateway_tag": v.loki.gateway_tag, + "memcached_tag": v.loki.memcached_tag, + "exporter_tag": v.loki.exporter_tag, + "sidecar_tag": v.loki.sidecar_tag, + # Metrics Server + "metrics_server_helm_chart": v.metrics_server.helm_chart, + "metrics_server_tag": v.metrics_server.tag, + # Prometheus + "prometheus_chart_version": v.prometheus.chart_version, + "prometheus_server_tag": v.prometheus.server_tag, + "prometheus_config_reloader_tag": v.prometheus.config_reloader_tag, + "alertmanager_tag": v.prometheus.alertmanager_tag, + "kube_state_metrics_tag": v.prometheus.kube_state_metrics_tag, + "node_exporter_tag": v.prometheus.node_exporter_tag, + "pushgateway_tag": v.prometheus.pushgateway_tag, + # Tempo + "tempo_chart_version": v.tempo.chart_version, + "tempo_tag": v.tempo.tag, + # Namespaces + "operator_namespace": ns.operator_namespace, + "telemetry_namespace": ns.telemetry_namespace, + "namespaces": all_namespaces, + } + + +def render_eks_config(cfg: EKSDeploymentConfig) -> List[RenderedFile]: + """Render the complete set of Terragrunt configuration files for an EKS + cluster deployment. + + This mirrors ``locals.rendered_files`` + ``locals.managed_extra_files`` from + ``terraform-eks-deployment/locals.tf``, producing the same directory layout + that the Terraform ``terraform-github-repo`` module would commit via + ``managed_extra_files``. + + Args: + cfg: Fully-hydrated EKS deployment configuration. + + Returns: + List of ``RenderedFile`` objects ready to be committed to the repo. + """ + template_dir = os.path.join(os.path.dirname(__file__), "templates", "eks") + env = Environment( + loader=FileSystemLoader(template_dir), + trim_blocks=True, + lstrip_blocks=True, + keep_trailing_newline=True, + ) + + cc = cfg.cluster_config + rendered: List[RenderedFile] = [] + + # ── Hierarchy config files (rendered_files in terraform-eks-deployment) ── + + rendered.append(RenderedFile( + path="root.hcl", + content=env.get_template("root.hcl.j2").render(environment=cfg.environment), + )) + + rendered.append(RenderedFile( + path=f"{cfg.environment}/account.hcl", + content=env.get_template("account.hcl.j2").render( + account_name=cc.account_name, + aws_account_id=cc.aws_account_id, + environment=cfg.environment, + environment_abbr=cc.environment_abbr, + ), + )) + + rendered.append(RenderedFile( + path=f"{cfg.environment}/{cfg.region}/region.hcl", + content=env.get_template("region.hcl.j2").render( + aws_region=cfg.region, + environment=cfg.environment, + ), + )) + + rendered.append(RenderedFile( + path=f"{cfg.environment}/{cfg.region}/{cc.vpc_name}/vpc.hcl", + content=env.get_template("vpc.hcl.j2").render( + vpc_name=cc.vpc_name, + vpc_domain_name=cc.vpc_domain_name, + environment=cfg.environment, + aws_region=cfg.region, + ), + )) + + rendered.append(RenderedFile( + path=f"{cfg.environment}/{cfg.region}/{cc.vpc_name}/{cc.cluster_name}/cluster.hcl", + content=env.get_template("cluster.hcl.j2").render( + cluster_name=cc.cluster_name, + cluster_mailing_list=cc.cluster_mailing_list, + eks_instance_disk_size=cc.eks_instance_disk_size, + eks_ng_desired_size=cc.eks_ng_desired_size, + eks_ng_max_size=cc.eks_ng_max_size, + eks_ng_min_size=cc.eks_ng_min_size, + organization=cc.organization, + finops_project_name=cc.finops_project_name, + finops_project_number=cc.finops_project_number, + finops_project_role=cc.finops_project_role, + tags=cc.tags, + module_enablement_overrides=cc.module_enablement_overrides, + ), + )) + + rendered.append(RenderedFile( + path="README.md", + content=env.get_template("README.md.j2").render( + environment=cfg.environment, + aws_region=cfg.region, + cluster_name=cc.cluster_name, + vpc_name=cc.vpc_name, + ), + )) + + # ── Managed extra files (_envcommon/) ── + + cv = cfg.common_variables + rendered.append(RenderedFile( + path="_envcommon/common-variables.hcl", + content=env.get_template("common-variables.hcl.j2").render( + organization=cv.organization, + project_name=cv.project_name, + project_number=cv.project_number, + project_role=cv.project_role, + state_bucket_prefix=cv.state_bucket_prefix, + state_table_name=cv.state_table_name, + route53_endpoints=cv.route53_endpoints, + ), + )) + + rendered.append(RenderedFile( + path="_envcommon/default-versions.hcl", + content=env.get_template("default-versions.hcl.j2").render( + **_build_default_versions_context(cfg), + ), + )) + + logger.info( + "Rendered %d EKS config files: %s", + len(rendered), + [f.path for f in rendered], + ) + return rendered diff --git a/template_automation/github_provider.py b/template_automation/github_provider.py index 6a49e3d..dec544d 100644 --- a/template_automation/github_provider.py +++ b/template_automation/github_provider.py @@ -396,6 +396,85 @@ def write_file( return self._request(method, url, json=data) + def write_files_atomic( + self, + repo_name: str, + files: List[FileContent], + branch: str = "main", + message: str = "Add configuration files", + ) -> Dict[str, Any]: + """Write multiple files in a single atomic commit using the Git tree API. + + This is much more efficient than N individual ``write_file`` calls for + large file sets (e.g. rendered EKS config), and produces a clean single + commit in the repository history. + + Args: + repo_name: Repository name. + files: List of ``FileContent`` objects to write. + branch: Target branch. + message: Commit message. + + Returns: + Dict with ``commit_sha`` and ``tree_sha`` of the new commit. + """ + logger.info(f"write_files_atomic: committing {len(files)} files to {repo_name}@{branch}") + + # 1. Get the current HEAD SHA of the target branch + branch_data = self.get_branch(repo_name, branch) + base_sha = branch_data['commit']['sha'] + logger.info(f" Base commit: {base_sha}") + + # 2. Build tree entries – for text content we let the API create blobs + # inline by using 'content' instead of 'sha'. + tree_entries = [] + for f in files: + if isinstance(f.content, bytes): + content_str = base64.b64decode( + base64.b64encode(f.content) + ).decode('utf-8', errors='replace') + else: + content_str = f.content + + tree_entries.append({ + 'path': f.path, + 'mode': '100644', + 'type': 'blob', + 'content': content_str, + }) + + # 3. Create a new tree (with base_tree so untouched files are preserved) + tree_resp = self._request( + 'POST', + f'/repos/{self.organization}/{repo_name}/git/trees', + json={'base_tree': base_sha, 'tree': tree_entries}, + ) + tree_sha = tree_resp['sha'] + logger.info(f" New tree: {tree_sha}") + + # 4. Create a commit pointing to the new tree + commit_resp = self._request( + 'POST', + f'/repos/{self.organization}/{repo_name}/git/commits', + json={ + 'message': message, + 'tree': tree_sha, + 'parents': [base_sha], + }, + ) + commit_sha = commit_resp['sha'] + logger.info(f" New commit: {commit_sha}") + + # 5. Fast-forward the branch ref to the new commit + self._request( + 'PATCH', + f'/repos/{self.organization}/{repo_name}/git/refs/heads/{branch}', + json={'sha': commit_sha, 'force': True}, + ) + logger.info(f" Branch {branch} updated to {commit_sha}") + + return {'commit_sha': commit_sha, 'tree_sha': tree_sha} + def create_pull_request( self, repo_name: str, diff --git a/template_automation/repository_provider.py b/template_automation/repository_provider.py index e39a3a0..1b827c2 100644 --- a/template_automation/repository_provider.py +++ b/template_automation/repository_provider.py @@ -112,6 +112,33 @@ def clone_repository_contents( """Clone contents from one repository to another.""" pass + def write_files_atomic( + self, + repo_name: str, + files: List[FileContent], + branch: str = "main", + message: str = "Add configuration files", + ) -> Dict[str, Any]: + """Write multiple files in a single atomic commit. + + The default implementation falls back to writing files one at a time. + Providers that support batch operations (e.g. Git tree API) should + override this for efficiency. + + Args: + repo_name: Repository name. + files: List of files to write. + branch: Target branch. + message: Commit message. + + Returns: + Result data (provider-specific). + """ + result: Dict[str, Any] = {} + for f in files: + result[f.path] = self.write_file(repo_name, f, branch=branch, message=message) + return result + def create_merge_request( self, repo_name: str, diff --git a/template_automation/templates/eks/README.md.j2 b/template_automation/templates/eks/README.md.j2 new file mode 100644 index 0000000..c0824ff --- /dev/null +++ b/template_automation/templates/eks/README.md.j2 @@ -0,0 +1,50 @@ +# EKS Cluster Configuration - {{ environment | upper }} + +This EKS cluster configuration was generated using the Service Catalog Repository Generator. + +## Environment Details + +- **Environment**: {{ environment }} +- **Region**: {{ aws_region }} +- **Cluster Name**: {{ cluster_name }} + +## Directory Structure + +``` +{{ environment }}/ +├── account.hcl +└── {{ aws_region }}/ + ├── region.hcl + └── {{ vpc_name }}/ + ├── vpc.hcl + └── {{ cluster_name }}/ + └── cluster.hcl +``` + +## Getting Started + +To apply this configuration: + +1. Change to the directory of the module you want to deploy: + ``` + cd {{ environment }}/{{ aws_region }}/{{ vpc_name }}/{{ cluster_name }}/eks + ``` + +2. Initialize and apply the Terragrunt configuration: + ``` + terragrunt init + terragrunt plan + terragrunt apply + ``` + +3. Deploy additional modules as needed: + ``` + cd ../eks-cert-manager + terragrunt init + terragrunt plan + terragrunt apply + ``` + +## Customization + +Each module can be deployed independently using Terragrunt. diff --git a/template_automation/templates/eks/account.hcl.j2 b/template_automation/templates/eks/account.hcl.j2 new file mode 100644 index 0000000..cad12cf --- /dev/null +++ b/template_automation/templates/eks/account.hcl.j2 @@ -0,0 +1,11 @@ +# {{ environment }}/environment.hcl + +# Set account-wide variables. These are automatically pulled in to configure the remote state bucket in the root +# terragrunt.hcl configuration. +locals { + account_name = "{{ account_name }}" + aws_account_id = "{{ aws_account_id }}" + aws_profile = format("%v-%v", local.aws_account_id, replace(local.account_name, "-ew", "-gov")) + environment = "{{ environment }}" + environment_abbr = "{{ environment_abbr }}" +} diff --git a/template_automation/templates/eks/cluster.hcl.j2 b/template_automation/templates/eks/cluster.hcl.j2 new file mode 100644 index 0000000..7ea6181 --- /dev/null +++ b/template_automation/templates/eks/cluster.hcl.j2 @@ -0,0 +1,15 @@ +locals { + # Cluster specific configuration + cluster_name = "{{ cluster_name }}" + cluster_mailing_list = "{{ cluster_mailing_list }}" + eks_instance_disk_size = {{ eks_instance_disk_size }} + eks_ng_desired_size = {{ eks_ng_desired_size }} + eks_ng_max_size = {{ eks_ng_max_size }} + eks_ng_min_size = {{ eks_ng_min_size }} + organization = "{{ organization }}" + finops_project_name = "{{ finops_project_name }}" + finops_project_number = "{{ finops_project_number }}" + finops_project_role = "{{ finops_project_role }}" + tags = {{ tags | tojson }} + module_enablement_overrides = {{ module_enablement_overrides | tojson }} +} diff --git a/template_automation/templates/eks/common-variables.hcl.j2 b/template_automation/templates/eks/common-variables.hcl.j2 new file mode 100644 index 0000000..e814f8a --- /dev/null +++ b/template_automation/templates/eks/common-variables.hcl.j2 @@ -0,0 +1,10 @@ +locals { + organization = "{{ organization }}" + project_name = "{{ project_name }}" + project_number = "{{ project_number }}" + project_role = "{{ project_role }}" + state_bucket_prefix = "{{ state_bucket_prefix }}" + state_table_name = "{{ state_table_name }}" + + route53_endpoints = {{ route53_endpoints | tojson }} +} diff --git a/template_automation/templates/eks/default-versions.hcl.j2 b/template_automation/templates/eks/default-versions.hcl.j2 new file mode 100644 index 0000000..f9e3458 --- /dev/null +++ b/template_automation/templates/eks/default-versions.hcl.j2 @@ -0,0 +1,124 @@ +locals { + ##################### + # Module Versions + ##################### + cluster_version = "{{ cluster_version }}" + custom_service_eks_account = "{{ custom_service_eks_account }}" + eks_module_version = "{{ eks_module_version }}" + istio_ingress_version = "{{ istio_ingress_version }}" + release_version = "{{ release_version }}" + + ##################### + # TF Providers + ##################### + aws_version = "{{ aws_version }}" + helm_version = "{{ helm_version }}" + kubernetes_version = "{{ kubernetes_version }}" + null_version = "{{ null_version }}" + random_version = "{{ random_version }}" + template_version = "{{ template_version }}" + tf_version = "{{ tf_version }}" + + ##################### + # Component Versions + ##################### + + ################ + # Cert-Manager + ################ + cluster_issuer_name = "{{ cluster_issuer_name }}" + cert_manager_version = "{{ cert_manager_version }}" + cert_manager_helm_chart = "{{ cert_manager_helm_chart }}" + + ################ + # GoGatekeeper + ################ + gogatekeeper_tag = "{{ gogatekeeper_tag }}" + gogatekeeper_chart_version = "{{ gogatekeeper_chart_version }}" + + ################ + # Grafana + ################ + grafana_hostname = "{{ grafana_hostname }}" + grafana_operator_chart_version = "{{ grafana_operator_chart_version }}" + grafana_operator_tag = "{{ grafana_operator_tag }}" + grafana_tag = "{{ grafana_tag }}" + os_shell_image_tag = "{{ os_shell_image_tag }}" + + ################ + # Istio + ################ + istio_namespace = "{{ istio_namespace }}" + istio_version = "{{ istio_version }}" + + ################ + # k8s-dashboard + ################ + dashboard_hostname = "{{ dashboard_hostname }}" + k8s_dashboard_metrics_scraper = "{{ k8s_dashboard_metrics_scraper }}" + k8s_dashboard_version = "{{ k8s_dashboard_version }}" + + ################ + # Karpenter + ################ + karpenter_helm_chart = "{{ karpenter_helm_chart }}" + karpenter_tag = "{{ karpenter_tag }}" + + ################ + # Keycloak + ################ + keycloak_chart_version = "{{ keycloak_chart_version }}" + keycloak_tag = "{{ keycloak_tag }}" + keycloak_hostname = "{{ keycloak_hostname }}" + keycloak_database = "{{ keycloak_database }}" + keycloak_username = "{{ keycloak_username }}" + keycloak_password = "{{ keycloak_password }}" + postgresql_tag = "{{ postgresql_tag }}" + + ################ + # Kiali + ################ + kiali_operator_version = "{{ kiali_operator_version }}" + kiali_application_version = "{{ kiali_application_version }}" + + ################ + # Loki + ################ + loki_chart_version = "{{ loki_chart_version }}" + loki_tag = "{{ loki_tag }}" + enterprise_logs_provisioner_tag = "{{ enterprise_logs_provisioner_tag }}" + gateway_tag = "{{ gateway_tag }}" + memcached_tag = "{{ memcached_tag }}" + exporter_tag = "{{ exporter_tag }}" + sidecar_tag = "{{ sidecar_tag }}" + + ################ + # Metrics Server + ################ + metrics_server_helm_chart = "{{ metrics_server_helm_chart }}" + metrics_server_tag = "{{ metrics_server_tag }}" + + ################ + # Prometheus + ################ + prometheus_chart_version = "{{ prometheus_chart_version }}" + prometheus_server_tag = "{{ prometheus_server_tag }}" + prometheus_config_reloader_tag = "{{ prometheus_config_reloader_tag }}" + alertmanager_tag = "{{ alertmanager_tag }}" + kube_state_metrics_tag = "{{ kube_state_metrics_tag }}" + node_exporter_tag = "{{ node_exporter_tag }}" + pushgateway_tag = "{{ pushgateway_tag }}" + + ################ + # Tempo + ################ + tempo_chart_version = "{{ tempo_chart_version }}" + tempo_tag = "{{ tempo_tag }}" + + ##################### + # Namespaces Config + ##################### + operator_namespace = "{{ operator_namespace }}" + telemetry_namespace = "{{ telemetry_namespace }}" + namespaces = {{ namespaces | tojson }} +} diff --git a/template_automation/templates/eks/region.hcl.j2 b/template_automation/templates/eks/region.hcl.j2 new file mode 100644 index 0000000..0748c90 --- /dev/null +++ b/template_automation/templates/eks/region.hcl.j2 @@ -0,0 +1,7 @@ +# {{ environment }}/{{ aws_region }}/region.hcl + +# Set common variables for the region. This is automatically pulled in in the root terragrunt.hcl configuration to +# configure the remote state bucket and pass forward to the child modules as inputs. +locals { + aws_region = "{{ aws_region }}" +} diff --git a/template_automation/templates/eks/root.hcl.j2 b/template_automation/templates/eks/root.hcl.j2 new file mode 100644 index 0000000..0811317 --- /dev/null +++ b/template_automation/templates/eks/root.hcl.j2 @@ -0,0 +1,171 @@ +# {{ environment }}/root.hcl +# --------------------------------------------------------------------------------------------------------------------- +# TERRAGRUNT CONFIGURATION +# Terragrunt is a thin wrapper for Terraform that provides extra tools for working with multiple Terraform modules, +# remote state, and locking: https://github.com/gruntwork-io/terragrunt +# --------------------------------------------------------------------------------------------------------------------- +locals { + # Automatically load account-level variables (NOTE: In our environment account = environment so there is not separate environment layer) + account_vars = read_terragrunt_config(find_in_parent_folders("account.hcl")) + + # Automatically load cluster-level variables + cluster_vars = read_terragrunt_config(find_in_parent_folders("cluster.hcl")) + + # Automatically load _envcommon, cross account and environment common variables + common_vars = read_terragrunt_config(find_in_parent_folders("./_envcommon/common-variables.hcl")) + + # Automatically load naming prefixes + prefix_vars = read_terragrunt_config(find_in_parent_folders("./_envcommon/prefixes.hcl")) + + # Automatically load region-level variables + region_vars = read_terragrunt_config(find_in_parent_folders("region.hcl")) + + # Automatically load versions + versions = read_terragrunt_config(find_in_parent_folders("./_envcommon/default-versions.hcl")) + + # Automatically load vpc-level variables + vpc_vars = read_terragrunt_config(find_in_parent_folders("vpc.hcl")) + + # Add any other locals you want to expose + # only expose things not already included via local.xxx_vars.locals.* + root_locals_for_inputs = { + is_module_enabled = local.is_module_enabled + module_name = local.module_name + } + + # Extract the variables we need for easy access + account_id = local.account_vars.locals.aws_account_id + account_name = local.account_vars.locals.account_name + aws_profile = local.account_vars.locals.aws_profile + aws_region = local.region_vars.locals.aws_region + cluster_name = local.cluster_vars.locals.cluster_name + eecr_info = local.common_vars.locals.eecr_info + environment_abbr = local.account_vars.locals.environment_abbr + finops_project_name = local.cluster_vars.locals.finops_project_name + finops_project_number = local.cluster_vars.locals.finops_project_number + finops_project_role = local.cluster_vars.locals.finops_project_role + is_eks_module = local.module_name == "eks" + prefixes = local.prefix_vars.locals.prefixes + is_module_enabled = merge( + { for module in local.versions.locals.core_modules : module => true }, + local.versions.locals.enabled_modules, + local.module_overrides + ) + module_name = basename(get_original_terragrunt_dir()) + module_overrides = local.cluster_vars.locals.module_enablement_overrides + organization = local.cluster_vars.locals.organization + state_bucket_prefix = local.common_vars.locals.state_bucket_prefix + state_table_name = local.common_vars.locals.state_table_name +} + +# Only generate providers for non-EKS modules +generate "cluster_data" { + path = "cluster-data.tf" + if_exists = "overwrite_terragrunt" + contents = local.is_eks_module ? "" : <<-EOF + data "aws_eks_clusters" "available" {} + + locals { + cluster_exists = contains(data.aws_eks_clusters.available.names, "$${local.cluster_name}") + } + + data "aws_eks_cluster" "this" { + count = local.cluster_exists ? 1 : 0 + name = "$${local.cluster_name}" + } + + data "aws_eks_cluster_auth" "this" { + count = local.cluster_exists ? 1 : 0 + name = "$${local.cluster_name}" + } + EOF +} + +# Generate provider blocks only for non-EKS modules +generate "kube_provider" { + path = "kube-provider.tf" + if_exists = "overwrite_terragrunt" + contents = local.is_eks_module ? "" : <<-EOF + provider "kubernetes" { + host = local.cluster_exists ? data.aws_eks_cluster.this[0].endpoint : "https://dummy" + cluster_ca_certificate = local.cluster_exists ? base64decode(data.aws_eks_cluster.this[0].certificate_authority[0].data) : null + token = local.cluster_exists ? data.aws_eks_cluster_auth.this[0].token : "dummy" + } + EOF +} + +generate "helm_provider" { + path = "helm-provider.tf" + if_exists = "overwrite_terragrunt" + contents = local.is_eks_module ? "" : <<-EOF + provider "helm" { + kubernetes = { + host = local.cluster_exists ? data.aws_eks_cluster.this[0].endpoint : "https://dummy" + cluster_ca_certificate = local.cluster_exists ? base64decode(data.aws_eks_cluster.this[0].certificate_authority[0].data) : null + token = local.cluster_exists ? data.aws_eks_cluster_auth.this[0].token : "dummy" + } + } + EOF +} + +# Configure Terragrunt to automatically store tfstate files in an S3 bucket +remote_state { + disable_init = tobool(get_env("TG_DISABLE_INIT", "false")) + backend = "s3" + generate = { + path = "remote_state.backend.tf" + if_exists = "overwrite_terragrunt" + } + config = { + bucket = "$${local.state_bucket_prefix}-$${local.account_id}" + use_lockfile = true + key = "$${trimprefix(replace(run_cmd("realpath", get_original_terragrunt_dir()), dirname(get_repo_root()), ""), "/")}/terraform.tfstate" + profile = "$${local.aws_profile}" + region = "$${local.aws_region}" + disable_bucket_update = true + } +} + +# Generate an AWS provider block +generate "aws-provider" { + path = "aws-provider.tf" + if_exists = "overwrite" + contents = <<-EOF + provider "aws" { + region = "$${local.aws_region}" + profile = "$${local.aws_profile}" + default_tags { + tags = { + cluster_name = "$${local.cluster_name}" + "boc:module_name" = "$${local.module_name}" + environment = "$${local.environment_abbr}" + finops_project_name = "$${local.finops_project_name}" + finops_project_number = "$${local.finops_project_number}" + finops_project_role = "$${local.finops_project_role}" + organization = "$${local.organization}" + } + } + # Only these AWS Account IDs may be operated on by this template + allowed_account_ids = ["$${local.account_id}"] + } +EOF +} + +# --------------------------------------------------------------------------------------------------------------------- +# GLOBAL PARAMETERS +# These variables apply to all configurations in this subfolder. These are automatically merged into the child +# `terragrunt.hcl` config via the include block. +# --------------------------------------------------------------------------------------------------------------------- + +# Configure root level variables that all resources can inherit. This is especially helpful with multi-account configs +# where terraform_remote_state data sources are placed directly into the modules. +inputs = merge( + local.account_vars.locals, + local.cluster_vars.locals, + local.common_vars.locals, + local.prefix_vars.locals, + local.region_vars.locals, + local.versions.locals, + local.vpc_vars.locals, + local.root_locals_for_inputs +) diff --git a/template_automation/templates/eks/vpc.hcl.j2 b/template_automation/templates/eks/vpc.hcl.j2 new file mode 100644 index 0000000..b724bad --- /dev/null +++ b/template_automation/templates/eks/vpc.hcl.j2 @@ -0,0 +1,8 @@ +# {{ environment }}/{{ aws_region }}/vpc/vpc.hcl + +# Set VPC specific variables. These are automatically pulled in to configure the remote state bucket in the root +# terragrunt.hcl configuration. +locals { + vpc_name = "{{ vpc_name }}" + vpc_domain_name = "{{ vpc_domain_name }}" +}