diff --git a/.github/platform-tg-infra.code-workspace b/.github/platform-tg-infra.code-workspace new file mode 100644 index 0000000..5047434 --- /dev/null +++ b/.github/platform-tg-infra.code-workspace @@ -0,0 +1,81 @@ +{ + "folders": [ + { + "name": "platform-tg-infra", + "path": "../" + }, + { + "name": "tfmod-cert-mgr", + "path": "../../tfmod-cert-mgr" + }, + { + "name": "tfmod-config-job", + "path": "../../tfmod-config-job" + }, + { + "name": "tfmod-custom-iam-role-for-service-account-eks", + "path": "../../tfmod-custom-iam-role-for-service-account-eks" + }, + { + "name": "tfmod-eks", + "path": "../../tfmod-eks" + }, + { + "name": "tfmod-eks-configuration", + "path": "../../tfmod-eks-configuration" + }, + { + "name": "tfmod-eks-dns", + "path": "../../tfmod-eks-dns" + }, + { + "name": "tfmod-grafana", + "path": "../../tfmod-grafana" + }, + { + "name": "tfmod-istio", + "path": "../../tfmod-istio" + }, + { + "name": "tfmod-istio-service-ingress", + "path": "../../tfmod-istio-service-ingress" + }, + { + "name": "tfmod-k8s-dashboard", + "path": "../../tfmod-k8s-dashboard" + }, + { + "name": "tfmod-karpenter", + "path": "../../tfmod-karpenter" + }, + { + "name": "tfmod-kiali", + "path": "../../tfmod-kiali" + }, + { + "name": "tfmod-loki", + "path": "../../tfmod-loki" + }, + { + "name": "tfmod-metrics-server", + "path": "../../tfmod-metrics-server" + }, + { + "name": "tfmod-prometheus", + "path": "../../tfmod-prometheus" + }, + { + "name": "tfmod-tempo", + "path": "../../tfmod-tempo" + }, + { + "path": "../../terraform-aws-eks" + }, + { + "path": "../../karpenter-provider-aws" + }, + { + "path": "../../terragrunt" + } + ] +} diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..fc196a2 --- /dev/null +++ b/Makefile @@ -0,0 +1,35 @@ +.PHONY: help init validate plan fmt check clean + +help: + @echo "Available targets:" + @echo " init - Initialize Terragrunt configurations" + @echo " validate - Validate all Terragrunt configurations" + @echo " plan - Run plan in dry-run mode across all configurations" + @echo " fmt - Format HCL files" + @echo " check - Run all checks (format, validate, plan)" + @echo " clean - Clean up Terragrunt cache and temporary files" + +init: + @echo "Initializing Terragrunt configurations..." + terragrunt run-all init + +validate: + @echo "Validating Terragrunt configurations..." + terragrunt run-all validate + +plan: + @echo "Running plan in dry-run mode..." + terragrunt run-all plan --terragrunt-non-interactive + +fmt: + @echo "Formatting HCL files..." + find . -type f -name "*.hcl" -exec terragrunt hclfmt {} \; + +check: fmt validate plan + @echo "All checks completed" + +clean: + @echo "Cleaning Terragrunt cache..." + find . -type d -name ".terragrunt-cache" -exec rm -rf {} + + find . -type f -name ".terraform.lock.hcl" -delete + find . -type f -name "terragrunt-debug.tfvars.json" -delete diff --git a/lab/_envcommon/common-variables.hcl b/lab/_envcommon/common-variables.hcl index d2f73ef..38cb4c9 100644 --- a/lab/_envcommon/common-variables.hcl +++ b/lab/_envcommon/common-variables.hcl @@ -12,8 +12,6 @@ locals { project_role = "csvd_platformbaseline_app" state_bucket_prefix = "inf-tfstate" state_table_name = "tf_remote_state" - terraform = true - terragrunt = true route53_endpoints = { route53_main = { "account_id" = "269244441389" diff --git a/lab/_envcommon/default-versions.hcl b/lab/_envcommon/default-versions.hcl index dd0b36b..7b7df5b 100644 --- a/lab/_envcommon/default-versions.hcl +++ b/lab/_envcommon/default-versions.hcl @@ -6,9 +6,9 @@ locals { ##################### cluster_version = "1.31" custom_service_eks_account = "${local.release_version}" - eks_module_version = "20.31.1" + eks_module_version = "20.33.1" istio_ingress_version = "${local.release_version}" - release_version = "0.1.1" + release_version = "main" # change to main when testing updated modules ##################### # TF Providers @@ -47,7 +47,8 @@ locals { ################ # Istio ################ - istio_version = "1.24.2" + istio_namespace = "istio-system" + istio_version = "1.24.2" ################ # Grafana @@ -55,6 +56,7 @@ locals { download_dashboards_image_tag = "7.85.0" grafana_chart_version = "8.8.5" grafana_hostname = "grafana" + grafana_namespace = "grafana" grafana_tag = "11.4.0" init_chown_data_image_tag = "1.31.1" diff --git a/lab/_envcommon/helm-provider.hcl b/lab/_envcommon/helm-provider.hcl deleted file mode 100644 index 4323624..0000000 --- a/lab/_envcommon/helm-provider.hcl +++ /dev/null @@ -1,24 +0,0 @@ -generate "helm-provider" { - path = "helm-provider.tf" - if_exists = "overwrite" - contents = <<-EOF -%{ if startswith(local.module_name, "tfmod-eks-") ~} -provider "helm" { - kubernetes { - host = try(data.aws_eks_cluster.this[0].endpoint, "") - cluster_ca_certificate = try(base64decode(data.aws_eks_cluster.this[0].certificate_authority[0].data), "") - exec { - api_version = "client.authentication.k8s.io/v1beta1" - command = "aws" - args = ["eks", "get-token", "--cluster-name", local.cluster_name, "--region", local.aws_region] - } - } -} - -data "aws_eks_cluster" "this" { - count = var.create_eks ? 1 : 0 - name = local.cluster_name -} -%{ endif } -EOF -} \ No newline at end of file diff --git a/lab/_envcommon/kube-provider.hcl b/lab/_envcommon/kube-provider.hcl deleted file mode 100644 index 1805019..0000000 --- a/lab/_envcommon/kube-provider.hcl +++ /dev/null @@ -1,22 +0,0 @@ -generate "kube-provider" { - path = "kube-provider.tf" - if_exists = "overwrite" - contents = <<-EOF -%{ if startswith(local.module_name, "tfmod-eks-") ~} -provider "kubernetes" { - host = try(data.aws_eks_cluster.this[0].endpoint, "") - cluster_ca_certificate = try(base64decode(data.aws_eks_cluster.this[0].certificate_authority[0].data), "") - exec { - api_version = "client.authentication.k8s.io/v1beta1" - command = "aws" - args = ["eks", "get-token", "--cluster-name", local.cluster_name, "--region", local.aws_region] - } -} - -data "aws_eks_cluster" "this" { - count = var.create_eks ? 1 : 0 - name = local.cluster_name -} -%{ endif } -EOF -} \ No newline at end of file diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/cluster.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/cluster.hcl index e43148a..29eb18d 100644 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/cluster.hcl +++ b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/cluster.hcl @@ -1,21 +1,28 @@ -# lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/cluster.hcl - -# Set cluster specific variables. These are automatically pulled in to configure the remote state bucket in the root -# terragrunt.hcl configuration. locals { + # Cluster specific configuration cluster_endpoint_public_access = true cluster_name = "platform-eng-eks-mcm" - created_reason = "Terragrunt Development for CICD Delivered EKS Platform" - creator = "matthew.c.morgan@census.gov" + cluster_mailing_list = "matthew.c.morgan@census.gov" eks_instance_disk_size = 100 eks_ng_desired_size = 2 eks_ng_max_size = 10 eks_ng_min_size = 0 enable_cluster_creator_admin_permissions = true - terraform = true - terragrunt = true tags = { "slim:schedule" = "8:00-17:00" "cluster:size" = "min:${local.eks_ng_min_size}-max:${local.eks_ng_max_size}-desired:${local.eks_ng_desired_size}" } + + # Common configuration + common_retry_args = { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20m"] + } + + common_dependencies = ["../eks", "../eks-config"] + + common_mock_eks = { + cluster_name = "mock-cluster" + oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" + } } diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-cert-manager/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-cert-manager/terragrunt.hcl index a2e6077..5ceaeae 100644 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-cert-manager/terragrunt.hcl +++ b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-cert-manager/terragrunt.hcl @@ -6,36 +6,51 @@ include "root" { terraform { source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-cert-mgr.git?ref=${include.root.inputs.release_version}" + extra_arguments "retry_lock" { commands = get_terraform_commands_that_need_locking() arguments = ["-lock-timeout=20m"] } } +dependencies { + paths = [ + "../eks", + "../eks-config", + "../eks-karpenter" + ] +} + dependency "eks" { - config_path = "../eks" + config_path = "../eks" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { - cluster_name = "a-cluster-name" - oidc_provider_arn = "arn:aws-us-gov:iam::111111111111:oidc-provider/oidc.eks.us-gov-east-1.amazonaws.com/id/0000000000000000AAAAAAAAAAAAAAAA" + cluster_name = include.root.inputs.cluster_name + oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" + cluster_endpoint = "https://mock-endpoint.eks.amazonaws.com" + cluster_version = include.root.inputs.cluster_version } } -dependency "eks_config" { - config_path = "../eks-config" - skip_outputs = true -} - inputs = { + # AWS Configuration + account_id = include.root.inputs.aws_account_id + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + + # Cluster Configuration + cluster_name = dependency.eks.outputs.cluster_name + cluster_mailing_list = include.root.inputs.cluster_mailing_list + oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn + + # Cert Manager Configuration + cert_manager_helm_chart = include.root.inputs.cert_manager_helm_chart + cluster_issuer_name = include.root.inputs.cluster_issuer_name + + # Version Tags cert_manager_cainjector_tag = include.root.inputs.cert_manager_cainjector_tag cert_manager_controller_tag = include.root.inputs.cert_manager_controller_tag - cert_manager_helm_chart = include.root.inputs.cert_manager_helm_chart cert_manager_startupapicheck_tag = include.root.inputs.cert_manager_startupapicheck_tag cert_manager_webhook_tag = include.root.inputs.cert_manager_webhook_tag - cluster_issuer_name = include.root.inputs.cluster_issuer_name - cluster_mailing_list = dependency.eks.inputs.creator - cluster_name = dependency.eks.outputs.cluster_name - oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - release_version = include.root.inputs.release_version } diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-config/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-config/terragrunt.hcl index ad0fbe2..fdbffa3 100644 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-config/terragrunt.hcl +++ b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-config/terragrunt.hcl @@ -1,13 +1,19 @@ -# lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-config/terragrunt.hcl - include "root" { path = find_in_parent_folders("root.hcl") merge_strategy = "deep" expose = true } +dependencies { + paths = [ + "../eks", + "../eks-karpenter" + ] +} + terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-eks-configuration.git?ref=${include.root.inputs.release_version}" + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-eks-configuration.git?ref=outputs" + extra_arguments "retry_lock" { commands = get_terraform_commands_that_need_locking() arguments = ["-lock-timeout=20m"] @@ -15,51 +21,33 @@ terraform { } dependency "eks" { - config_path = "../eks" - mock_outputs = { - cluster_certificate_authority_data = [{ data = "THISISAVERYLONGCERTSTRINGTHATGOESHEREFORSURENODYEP" }] - cluster_endpoint = "https://12345ABCDEE42BF9C24D4C362D1DC.sk1.us-gov-east-1.eks.amazonaws.com" - cluster_name = "a-cluster-name" - eks_managed_node_groups_autoscaling_group_names = ["eks-eks-a-cluster-name-node_group-0000000000000000000000000-5ac8a5e3-14dd-c043-2cc9-f4b6ffb36d32"] - oidc_provider_arn = "arn:aws-us-gov:iam::111111111111:oidc-provider/oidc.eks.us-gov-east-1.amazonaws.com/id/0000000000000000AAAAAAAAAAAAAAAA" - security_group_all_worker_mgmt_id = "sg-00b0000000000000" - subnets = ["subnet-00000000000000001", "subnet-00000000000000002", "subnet-00000000000000003"] - vpc_id = "a-vpc-id" - } -} - -generate "kubectl-provider" { - path = "kubectl-provider.tf" - if_exists = "overwrite" - contents = <<-EOF - %{ if dependency.eks.outputs.cluster_name != "a-cluster-name" ~} - data "aws_eks_cluster" "kubectl" { - name = "${dependency.eks.outputs.cluster_name}" - } - provider "kubectl" { - apply_retry_count = 5 - host = data.aws_eks_cluster.kubectl.endpoint - cluster_ca_certificate = base64decode(data.aws_eks_cluster.kubectl.certificate_authority[0].data) - load_config_file = false + config_path = "../eks" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] - exec { - api_version = "client.authentication.k8s.io/v1beta1" - command = "aws" - args = ["eks", "get-token", "--cluster-name", "${dependency.eks.outputs.cluster_name}", "--region", "${include.root.inputs.aws_region}"] - } + mock_outputs = { + cluster_name = "mock-cluster" + cluster_endpoint = "https://mock-endpoint.eks.amazonaws.com" + cluster_certificate_authority_data = [{ data = "mock-cert-data" }] + eks_managed_node_groups_autoscaling_group_names = ["mock-asg-name"] + oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" + security_group_all_worker_mgmt_id = "sg-mock" + subnets = ["subnet-mock1", "subnet-mock2"] + vpc_id = "vpc-mock" } - %{ endif ~} - EOF } inputs = { + # AWS Configuration + account_id = include.root.inputs.aws_account_id + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + + # Core Cluster Configuration cluster_name = dependency.eks.outputs.cluster_name eks_managed_node_groups_autoscaling_group_names = dependency.eks.outputs.eks_managed_node_groups_autoscaling_group_names oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - release_version = include.root.inputs.release_version security_group_all_worker_mgmt_id = dependency.eks.outputs.security_group_all_worker_mgmt_id subnets = dependency.eks.outputs.subnets vpc_id = dependency.eks.outputs.vpc_id + } diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-dns/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-dns/terragrunt.hcl index 9b7c16f..b485d01 100644 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-dns/terragrunt.hcl +++ b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-dns/terragrunt.hcl @@ -13,30 +13,48 @@ terraform { } dependency "eks" { - config_path = "../eks" + config_path = "../eks" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] mock_outputs = { - subnets = ["subnet-abcdefgh", "subnet-12345678", "subnet-ab12cd34"] + cluster_name = include.root.inputs.cluster_name + subnets = ["subnet-mock1", "subnet-mock2", "subnet-mock3"] } } -dependency "istio" { - config_path = "../eks-istio" +dependency "eks-istio" { + config_path = "../eks-istio" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] mock_outputs = { istio_ingress_lb = { - dns_name = "a1111111111111111111111111111111-2bbbbbbbbbbbbbbb.elb.us-gov-east-1.amazonaws.com" - zone_id = "ZABC123456DEF" + dns_name = "mock-${include.root.inputs.cluster_name}.elb.amazonaws.com" + zone_id = "MOCKZONEID" } } } +dependencies { + paths = [ + "../eks-config", + "../eks-istio", + "../eks-karpenter" + ] +} + inputs = { - cluster_name = dependency.eks.inputs.cluster_name - istio_ingress_lb = dependency.istio.outputs.istio_ingress_lb - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region + # AWS Configuration + account_id = include.root.inputs.aws_account_id + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + + # Cluster Configuration + cluster_name = include.root.inputs.cluster_name + + # Network Configuration + istio_ingress_lb = dependency.eks-istio.outputs.istio_ingress_lb route53_endpoints = include.root.inputs.route53_endpoints - subnets = dependency.eks.outputs.subnets - tags = dependency.eks.inputs.tags - vpc_domain_name = dependency.eks.inputs.vpc_domain_name - vpc_name = dependency.eks.inputs.vpc_name + vpc_domain_name = include.root.inputs.vpc_domain_name + vpc_name = include.root.inputs.vpc_name + + # Additional Configuration + tags = include.root.inputs.tags } diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-grafana/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-grafana/terragrunt.hcl index cf4f29a..c1093f3 100644 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-grafana/terragrunt.hcl +++ b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-grafana/terragrunt.hcl @@ -13,28 +13,52 @@ terraform { } dependency "eks" { - config_path = "../eks" + config_path = "../eks" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] mock_outputs = { - cluster_name = "a-cluster-name" + cluster_name = "mock-cluster" + oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" } } -dependency "eks-loki" { - config_path = "../eks-loki" +dependency "eks_loki" { + config_path = "../eks-loki" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] mock_outputs = { - rwo_storage_class = "gp3-encrypted" + rwo_storage_class = "gp3-mocked" } } +dependencies { + paths = [ + "../eks", + "../eks-config", + "../eks-dns", + "../eks-karpenter", + "../eks-loki" + ] +} + inputs = { - cluster_domain = dependency.eks.inputs.vpc_domain_name - cluster_name = dependency.eks.outputs.cluster_name - download_dashboards_image_tag = include.root.inputs.download_dashboards_image_tag + # AWS Configuration + account_id = include.root.inputs.aws_account_id + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + + # Cluster Configuration + cluster_name = dependency.eks.outputs.cluster_name + cluster_domain = include.root.inputs.vpc_domain_name + oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn + + # Storage Configuration + rwo_storage_class = dependency.eks_loki.outputs.rwo_storage_class + + # Grafana Configuration grafana_chart_version = include.root.inputs.grafana_chart_version grafana_tag = include.root.inputs.grafana_tag + download_dashboards_image_tag = include.root.inputs.download_dashboards_image_tag init_chown_data_image_tag = include.root.inputs.init_chown_data_image_tag - profile = include.root.inputs.aws_profile + namespace = include.root.inputs.grafana_namespace public_hostname = include.root.inputs.grafana_hostname - region = include.root.inputs.aws_region - rwo_storage_class = dependency.eks-loki.outputs.rwo_storage_class } + diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-istio/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-istio/terragrunt.hcl index 61ea560..b9712be 100644 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-istio/terragrunt.hcl +++ b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-istio/terragrunt.hcl @@ -5,29 +5,40 @@ include "root" { } terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-istio.git?ref=${include.root.inputs.release_version}" + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-istio.git?ref=outputs" extra_arguments "retry_lock" { commands = get_terraform_commands_that_need_locking() arguments = ["-lock-timeout=20m"] } } +dependencies { + paths = [ + "../eks", + "../eks-config" + ] +} + dependency "eks" { - config_path = "../eks" + config_path = "../eks" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] mock_outputs = { - cluster_name = "a-cluster-name" + cluster_name = include.root.inputs.cluster_name + oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" } } -dependency "eks-karpenter" { - config_path = "../eks-karpenter" - skip_outputs = true -} - inputs = { - cluster_name = dependency.eks.outputs.cluster_name - istio_chart_version = include.root.inputs.istio_version - istio_version = include.root.inputs.istio_version - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region + # AWS Configuration + account_id = include.root.inputs.aws_account_id + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + + # Cluster Configuration + cluster_name = dependency.eks.outputs.cluster_name + oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn + + # Istio Configuration + namespace = include.root.inputs.istio_namespace + istio_version = include.root.inputs.istio_version } diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-k8s-dashboard/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-k8s-dashboard/terragrunt.hcl index ded7ad0..6cbe233 100644 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-k8s-dashboard/terragrunt.hcl +++ b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-k8s-dashboard/terragrunt.hcl @@ -12,25 +12,42 @@ terraform { } } +dependencies { + paths = [ + "../eks", + "../eks-config", + "../eks-dns" + ] +} + dependency "eks" { - config_path = "../eks" + config_path = "../eks" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] mock_outputs = { - cluster_name = "a-cluster-name" - vpc_domain_name = "example.com" + cluster_name = include.root.inputs.cluster_name + oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" } } -dependency "eks-loki" { - config_path = "../eks-loki" - skip_outputs = true +dependency "eks-dns" { + config_path = "../eks-dns" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + cluster_domain = "mock.example.com" + oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" + } } inputs = { - # datasources = dependency.eks-loki.outputs.gateway_internal_endpoint - cluster_domain = dependency.eks.inputs.vpc_domain_name - cluster_name = dependency.eks.outputs.cluster_name + # AWS Configuration + account_id = include.root.inputs.aws_account_id + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + + # Cluster Configuration + cluster_domain = dependency.eks-dns.outputs.cluster_domain + cluster_name = dependency.eks.outputs.cluster_name + + # Dashboard Configuration k8s_dashboard_version = include.root.inputs.k8s_dashboard_version - profile = include.root.inputs.aws_profile - public_hostname = include.root.inputs.dashboard_hostname - region = include.root.inputs.aws_region } diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-karpenter/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-karpenter/terragrunt.hcl index bbf116b..7d376f9 100644 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-karpenter/terragrunt.hcl +++ b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-karpenter/terragrunt.hcl @@ -6,37 +6,44 @@ include "root" { terraform { source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-karpenter.git?ref=${include.root.inputs.release_version}" + extra_arguments "retry_lock" { commands = get_terraform_commands_that_need_locking() arguments = ["-lock-timeout=20m"] } } +dependencies { + paths = ["../eks"] +} + dependency "eks" { - config_path = "../eks" + config_path = "../eks" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { - cluster_endpoint = "https://0000000000000000AAAAAAAAAAAAAAAA.sk1.us-gov-east-1.eks.amazonaws.com" - cluster_name = "a-cluster-name" - node_group_name = "node_group_a-cluster-name" - oidc_provider_arn = "arn:aws-us-gov:iam::111111111111:oidc-provider/oidc.eks.us-gov-east-1.amazonaws.com/id/0000000000000000AAAAAAAAAAAAAAAA" - vpc_id = "a-vpc-name" + cluster_name = "mock-cluster" + cluster_endpoint = "https://mock-endpoint.eks.amazonaws.com" + oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" + node_group_name = "mock-node-group" + vpc_id = "vpc-mock" + subnets = ["subnet-mock1", "subnet-mock2"] } } -dependency "eks-config" { - config_path = "../eks-config" - skip_outputs = true -} - inputs = { - cluster_endpoint = dependency.eks.outputs.cluster_endpoint - cluster_name = dependency.eks.outputs.cluster_name + # AWS Configuration + account_id = include.root.inputs.aws_account_id + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + + # Cluster Configuration + cluster_endpoint = dependency.eks.outputs.cluster_endpoint + cluster_name = dependency.eks.outputs.cluster_name + oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn + + # Karpenter Configuration + karpenter_tag = include.root.inputs.karpenter_tag karpenter_helm_chart = include.root.inputs.karpenter_helm_chart karpenter_node_group_name = dependency.eks.outputs.node_group_name - karpenter_tag = include.root.inputs.karpenter_tag - oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - release_version = include.root.inputs.release_version - vpc_id = dependency.eks.outputs.vpc_id } diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-kiali/terragrunt.hcl.disable b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-kiali/terragrunt.hcl.disable index 1e04fe0..27a255b 100644 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-kiali/terragrunt.hcl.disable +++ b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-kiali/terragrunt.hcl.disable @@ -15,16 +15,19 @@ terraform { dependency "eks" { config_path = "../eks" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] mock_outputs = { cluster_name = "a-cluster-name" } } + dependency "eks-cert-manager" { config_path = "../eks-cert-manager" mock_outputs = { cluster_issuer_name = "acmpca-clusterissuer" } } + dependency "eks-prometheus" { config_path = "../eks-prometheus" mock_outputs = { @@ -34,6 +37,7 @@ dependency "eks-prometheus" { url = "http://prometheus-server.prometheus.svc.cluster.local:9090/" } } + } dependency "eks-grafana" { config_path = "../eks-grafana" diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-kiali/terragrunt.hcl.disabled b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-kiali/terragrunt.hcl.disabled new file mode 100644 index 0000000..a06c6e6 --- /dev/null +++ b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-kiali/terragrunt.hcl.disabled @@ -0,0 +1,108 @@ +include "root" { + path = find_in_parent_folders("root.hcl") + merge_strategy = "deep" + expose = true +} + +terraform { + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-kiali.git?ref=${include.root.inputs.release_version}" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20s"] + } +} + +dependencies { + paths = [ + "../eks", + "../eks-config", + "../eks-dns", + "../eks-grafana", + "../eks-istio", + "../eks-prometheus" + ] +} + +dependency "eks" { + config_path = "../eks" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + cluster_name = "mock-cluster" + oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" + } +} + +dependency "eks-config" { + config_path = "../eks-config" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + operators_namespace = "mock-namespace" + } +} + +dependency "eks_dns" { + config_path = "../eks-dns" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + cluster_domain = "mock.example.com" + oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" + } +} + +dependency "eks_grafana" { + config_path = "../eks-grafana" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + internal_endpoint = { + hostname = "grafana.mock.svc.cluster.local" + port_number = "80" + url = "https://grafana.mock.svc.cluster.local:80/" + } + namespace = "grafana" + public_endpoint = { + hostname = "grafana.mock.lab.csp2.census.gov" + port_number = "80" + url = "https://grafana.mock.lab.csp2.census.gov:80/" + } + secret_name = "grafana" + } +} + +dependency "eks_istio" { + config_path = "../eks-istio" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + namespace = "mock-namespace-istio" + } +} + +dependency "eks_prometheus" { + config_path = "../eks-prometheus" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + prometheus_internal_url = "mock-internal-url" + } +} + +inputs = { + # AWS Configuration + account_id = include.root.inputs.aws_account_id + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + + # Cluster Configuration + cluster_domain = dependency.eks_dns.outputs.cluster_domain + cluster_name = dependency.eks.outputs.cluster_name + + # Kiali Configuration + grafana_internal_url = dependency.eks_grafana.outputs.internal_endpoint.url + grafana_namespace = dependency.eks_grafana.outputs.namespace + grafana_secret_name = dependency.eks_grafana.outputs.secret_name + grafana_public_url = dependency.eks_grafana.outputs.public_endpoint.url + + kiali_operator_version = include.root.inputs.kiali_operator_version + operators_namespace = dependency.eks-config.outputs.operators_namespace + + prometheus_internal_url = dependency.eks_prometheus.outputs.internal_endpoint + jager_internal_url = dependency.eks_prometheus.outputs.jager_internal_url +} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-loki/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-loki/terragrunt.hcl index 70b8b09..b9cff50 100644 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-loki/terragrunt.hcl +++ b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-loki/terragrunt.hcl @@ -4,43 +4,50 @@ include "root" { expose = true } -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-loki.git?ref=${include.root.inputs.release_version}" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } +dependencies { + paths = [ + "../eks", + "../eks-config" + ] } dependency "eks" { - config_path = "../eks" + config_path = "../eks" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] mock_outputs = { - cluster_name = "a-cluster-name" - oidc_provider_arn = "arn:aws-us-gov:iam::111111111111:oidc-provider/oidc.eks.us-gov-east-1.amazonaws.com/id/0000000000000000AAAAAAAAAAAAAAAA" + cluster_name = "mock-cluster" + oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" } } -dependency "eks-istio" { - config_path = "../eks-istio" - skip_outputs = true +dependency "eks-config" { + config_path = "../eks-config" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + rwo_storage_class = "gp3-encrypted" + } } -dependency "eks-prometheus" { - config_path = "../eks-prometheus" - skip_outputs = true +terraform { + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-loki.git?ref=${include.root.inputs.release_version}" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20m"] + } } inputs = { - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - cluster_name = dependency.eks.outputs.cluster_name - oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn - loki_chart_version = include.root.inputs.loki_chart_version - loki_tag = include.root.inputs.loki_tag - canary_tag = include.root.inputs.canary_tag - enterprise_logs_provisioner_tag = include.root.inputs.enterprise_logs_provisioner_tag - gateway_tag = include.root.inputs.gateway_tag - memcached_tag = include.root.inputs.memcached_tag - exporter_tag = include.root.inputs.exporter_tag - sidecar_tag = include.root.inputs.sidecar_tag + # AWS Configuration + account_id = include.root.inputs.aws_account_id + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + + # Cluster Configuration + cluster_name = dependency.eks.outputs.cluster_name + oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn + + # Loki Configuration + loki_chart_version = include.root.inputs.loki_chart_version + loki_tag = include.root.inputs.loki_tag + rwo_storage_class = dependency.eks-config.outputs.rwo_storage_class } diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-metrics-server/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-metrics-server/terragrunt.hcl index 387653b..ef2851f 100644 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-metrics-server/terragrunt.hcl +++ b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-metrics-server/terragrunt.hcl @@ -4,30 +4,39 @@ include "root" { expose = true } -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-metrics-server.git?ref=${include.root.inputs.release_version}" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } +dependencies { + paths = [ + "../eks", + "../eks-config" + ] } dependency "eks" { - config_path = "../eks" + config_path = "../eks" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] mock_outputs = { - cluster_name = "a-cluster-name" + cluster_name = "mock-cluster" } } -dependency "eks_config" { - config_path = "../eks-config" - skip_outputs = true +terraform { + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-metrics-server.git?ref=${include.root.inputs.release_version}" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20m"] + } } inputs = { - profile = include.root.inputs.aws_profile - cluster_name = dependency.eks.outputs.cluster_name - region = include.root.inputs.aws_region + # AWS Configuration + account_id = include.root.inputs.aws_account_id + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + + # Cluster Configuration + cluster_name = dependency.eks.outputs.cluster_name + + # Metrics Server Configuration metrics_server_helm_chart = include.root.inputs.metrics_server_helm_chart metrics_server_tag = include.root.inputs.metrics_server_tag } diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-prometheus/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-prometheus/terragrunt.hcl index e6c54b1..4a98ee6 100644 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-prometheus/terragrunt.hcl +++ b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-prometheus/terragrunt.hcl @@ -12,22 +12,41 @@ terraform { } } +dependencies { + paths = [ + "../eks", + "../eks-config" + ] +} + dependency "eks" { - config_path = "../eks" + config_path = "../eks" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] mock_outputs = { - cluster_name = "a-cluster-name" + cluster_name = include.root.inputs.cluster_name + oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" } } -dependency "eks-dns" { - config_path = "../eks-dns" - skip_outputs = true +dependency "eks-config" { + config_path = "../eks-config" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + rwo_storage_class = "gp3-encyrpted" + } } inputs = { - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - cluster_name = dependency.eks.outputs.cluster_name + # AWS Configuration + account_id = include.root.inputs.aws_account_id + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + + # Cluster Configuration + cluster_name = dependency.eks.outputs.cluster_name + oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn + + # Prometheus Configuration prometheus_chart_version = include.root.inputs.prometheus_chart_version prometheus_server_tag = include.root.inputs.prometheus_server_tag prometheus_config_reloader_tag = include.root.inputs.prometheus_config_reloader_tag @@ -35,4 +54,5 @@ inputs = { kube_state_metrics_tag = include.root.inputs.kube_state_metrics_tag node_exporter_tag = include.root.inputs.node_exporter_tag pushgateway_tag = include.root.inputs.pushgateway_tag + rwo_storage_class = dependency.eks-config.outputs.rwo_storage_class } diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-tempo/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-tempo/terragrunt.hcl index e9ebd48..14f2267 100644 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-tempo/terragrunt.hcl +++ b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-tempo/terragrunt.hcl @@ -13,34 +13,52 @@ terraform { } dependency "eks" { - config_path = "../eks" + config_path = "../eks" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] mock_outputs = { - cluster_name = "a-cluster-name" - oidc_provider_arn = "arn:aws-us-gov:iam::111111111111:oidc-provider/oidc.eks.us-gov-east-1.amazonaws.com/id/0000000000000000AAAAAAAAAAAAAAAA" + cluster_name = include.root.inputs.cluster_name + oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" } } dependency "eks-prometheus" { - config_path = "../eks-prometheus" + config_path = "../eks-prometheus" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] mock_outputs = { + prometheus_namespace = "prometheus" prometheus_server_internal_endpoint = { hostname = "prometheus-server.prometheus.svc.cluster.local" port_number = 9090 url = "http://prometheus-server.prometheus.svc.cluster.local:9090/" } - prometheus_namespace = "prometheus" } } +dependencies { + paths = [ + "../eks", + "../eks-config", + "../eks-dns", + "../eks-karpenter", + "../eks-prometheus" + ] +} + inputs = { - account_id = include.root.locals.account_id - profile = include.root.locals.aws_profile - region = include.root.locals.aws_region - cluster_name = dependency.eks.outputs.cluster_name - oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn - prometheus_port = dependency.eks-prometheus.outputs.prometheus_server_internal_endpoint.port_number + # AWS Configuration + account_id = include.root.inputs.aws_account_id + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + + # Cluster Configuration + cluster_name = dependency.eks.outputs.cluster_name + oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn + + # Prometheus Configuration prometheus_namespace = dependency.eks-prometheus.outputs.prometheus_namespace - tempo_chart_version = include.root.inputs.tempo_chart_version - tempo_tag = include.root.inputs.tempo_tag + prometheus_port = dependency.eks-prometheus.outputs.prometheus_server_internal_endpoint.port_number + # Tempo Configuration + tempo_chart_version = include.root.inputs.tempo_chart_version + tempo_tag = include.root.inputs.tempo_tag } diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks/terragrunt.hcl index ba46766..f12f202 100644 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks/terragrunt.hcl +++ b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks/terragrunt.hcl @@ -6,6 +6,7 @@ include "root" { terraform { source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-eks.git?ref=${include.root.inputs.release_version}" + extra_arguments "retry_lock" { commands = get_terraform_commands_that_need_locking() arguments = ["-lock-timeout=20m"] @@ -13,17 +14,15 @@ terraform { } inputs = { - cluster_endpoint_public_access = include.root.inputs.cluster_endpoint_public_access - cluster_name = include.root.inputs.cluster_name - cluster_version = include.root.inputs.cluster_version - creator = include.root.inputs.creator - eks_instance_disk_size = include.root.inputs.eks_instance_disk_size - eks_ng_desired_size = include.root.inputs.eks_ng_desired_size - eks_ng_max_size = include.root.inputs.eks_ng_max_size - eks_ng_min_size = include.root.inputs.eks_ng_min_size - eks_vpc_name = include.root.inputs.vpc_name - enable_cluster_creator_admin_permissions = include.root.inputs.enable_cluster_creator_admin_permissions - environment_abbr = include.root.inputs.environment_abbr - tags = include.root.inputs.tags - vpc_name = include.root.inputs.vpc_name + # AWS Configuration + account_id = include.root.inputs.aws_account_id + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + + # Core Cluster Configuration + cluster_name = include.root.inputs.cluster_name + cluster_version = include.root.inputs.cluster_version + + # Additional Configuration + tags = include.root.inputs.tags } diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/notes.txt b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/notes.txt deleted file mode 100644 index 71c3774..0000000 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/notes.txt +++ /dev/null @@ -1,68 +0,0 @@ -Provider Configuration Changes and Cluster Lifecycle Management -========================================================== - -Problem: --------- -The original provider configuration in root.hcl had issues handling different cluster lifecycle states: -1. When no cluster exists - terragrunt run-all plan would fail -2. When cluster is being created - terragrunt run-all apply needed to work -3. When cluster is being destroyed - terragrunt run-all destroy needed to work - -The main issue was that the provider configurations were using data sources that required the cluster to exist, causing failures during planning when the cluster didn't exist. - -Solution: ---------- -1. Moved provider configurations to separate files in _envcommon/: - - helm-provider.hcl - - kube-provider.hcl - -2. Added conditional data source lookup using count: - data "aws_eks_cluster" "this" { - count = var.create_eks ? 1 : 0 - name = local.cluster_name - } - -3. Used try() function with empty fallback values: - host = try(data.aws_eks_cluster.this[0].endpoint, "") - cluster_ca_certificate = try(base64decode(data.aws_eks_cluster.this[0].certificate_authority[0].data), "") - -4. Added create_eks variable control: - - Added to root.hcl locals block - - Controlled via TERRAGRUNT_CREATE_EKS environment variable - - Defaults to "true" - - Generated as a variable in each module - -How it works: ------------- -1. No cluster exists: - - Set TERRAGRUNT_CREATE_EKS=false - - Data source won't be created (count = 0) - - Provider configurations fall back to empty values - - Plan succeeds as providers are configured but not used - -2. Creating cluster: - - TERRAGRUNT_CREATE_EKS=true (default) - - As soon as cluster exists, data source becomes available - - Provider configurations get real values - - Apply continues with working providers - -3. Destroying cluster: - - Set TERRAGRUNT_CREATE_EKS=false before destroy - - Providers fall back to empty values - - Resources can be destroyed without needing cluster access - -Usage: ------- -1. For initial plan with no cluster: - export TERRAGRUNT_CREATE_EKS=false - terragrunt run-all plan - -2. For creating cluster and resources: - export TERRAGRUNT_CREATE_EKS=true (or don't set it) - terragrunt run-all apply - -3. For destroying everything: - export TERRAGRUNT_CREATE_EKS=false - terragrunt run-all destroy - -This solution allows Terragrunt to handle the full lifecycle of the cluster and its dependent resources without failing on provider initialization when the cluster doesn't exist. \ No newline at end of file diff --git a/lab/root.hcl b/lab/root.hcl index a746b5f..10706ff 100644 --- a/lab/root.hcl +++ b/lab/root.hcl @@ -28,8 +28,7 @@ locals { account_id = local.account_vars.locals.aws_account_id aws_profile = local.account_vars.locals.aws_profile aws_region = local.region_vars.locals.aws_region - created_reason = local.cluster_vars.locals.created_reason - creator = local.cluster_vars.locals.creator + cluster_name = local.cluster_vars.locals.cluster_name environment_abbr = local.account_vars.locals.environment_abbr organization = local.common_vars.locals.organization project_name = local.common_vars.locals.project_name @@ -37,12 +36,62 @@ locals { project_role = local.common_vars.locals.project_role state_bucket_prefix = local.common_vars.locals.state_bucket_prefix state_table_name = local.common_vars.locals.state_table_name - terraform = local.cluster_vars.locals.terraform - terragrunt = local.cluster_vars.locals.terragrunt - module_name = get_terragrunt_dir() - create_eks = get_env("TERRAGRUNT_CREATE_EKS", "true") + # Check if current module is the EKS module + module_name = basename(get_original_terragrunt_dir()) + is_eks_module = local.module_name == "eks" } +# Only generate providers for non-EKS modules +generate "cluster_data" { + path = "cluster-data.tf" + if_exists = "overwrite_terragrunt" + contents = local.is_eks_module ? "" : <<-EOF + data "aws_eks_clusters" "available" {} + + locals { + cluster_exists = contains(data.aws_eks_clusters.available.names, "${local.cluster_name}") + } + + data "aws_eks_cluster" "this" { + count = local.cluster_exists ? 1 : 0 + name = "${local.cluster_name}" + } + + data "aws_eks_cluster_auth" "this" { + count = local.cluster_exists ? 1 : 0 + name = "${local.cluster_name}" + } + EOF +} + +# Generate provider blocks only for non-EKS modules +generate "kube_provider" { + path = "kube-provider.tf" + if_exists = "overwrite_terragrunt" + contents = local.is_eks_module ? "" : <<-EOF + provider "kubernetes" { + host = local.cluster_exists ? data.aws_eks_cluster.this[0].endpoint : "https://dummy" + cluster_ca_certificate = local.cluster_exists ? base64decode(data.aws_eks_cluster.this[0].certificate_authority[0].data) : null + token = local.cluster_exists ? data.aws_eks_cluster_auth.this[0].token : "dummy" + } + EOF +} + +generate "helm_provider" { + path = "helm-provider.tf" + if_exists = "overwrite_terragrunt" + contents = local.is_eks_module ? "" : <<-EOF + provider "helm" { + kubernetes { + host = local.cluster_exists ? data.aws_eks_cluster.this[0].endpoint : "https://dummy" + cluster_ca_certificate = local.cluster_exists ? base64decode(data.aws_eks_cluster.this[0].certificate_authority[0].data) : null + token = local.cluster_exists ? data.aws_eks_cluster_auth.this[0].token : "dummy" + } + } + EOF +} + + # Configure Terragrunt to automatically store tfstate files in an S3 bucket remote_state { backend = "s3" @@ -71,47 +120,23 @@ generate "aws-provider" { path = "aws-provider.tf" if_exists = "overwrite" contents = <<-EOF -provider "aws" { - region = "${local.aws_region}" - profile = "${local.aws_profile}" - default_tags { - tags = { - project_identifier = "${local.project_number}:${local.project_name}" - project_name = "${local.project_name}" - project_role = "${local.project_role}" - created_by = "${local.creator}" - created_for = "${local.creator}" - created_reason = "${local.created_reason}" - environment = "${local.environment_abbr}" - organization = "${local.organization}" - project_number = "${local.project_number}" - terraform = "${local.terraform}" - terragrunt = "${local.terragrunt}" + provider "aws" { + region = "${local.aws_region}" + profile = "${local.aws_profile}" + default_tags { + tags = { + cluster_name = "${local.cluster_name}" + "boc:module_name" = "${local.module_name}" + environment = "${local.environment_abbr}" + finops_project_name = "${local.project_name}" + finops_project_number = "${local.project_number}" + finops_project_role = "${local.project_role}" + organization = "${local.organization}" + } } + # Only these AWS Account IDs may be operated on by this template + allowed_account_ids = ["${local.account_id}"] } - # Only these AWS Account IDs may be operated on by this template - allowed_account_ids = ["${local.account_id}"] -} -EOF -} - -include "helm_provider" { - path = "${dirname(find_in_parent_folders())}/_envcommon/helm-provider.hcl" -} - -include "kube_provider" { - path = "${dirname(find_in_parent_folders())}/_envcommon/kube-provider.hcl" -} - -generate "variables" { - path = "variables.tf" - if_exists = "overwrite" - contents = <<-EOF -variable "create_eks" { - description = "Controls if EKS cluster should be created (affects all AWS resources)" - type = bool - default = true -} EOF } diff --git a/notes.md b/notes.md new file mode 100644 index 0000000..55a5ffc --- /dev/null +++ b/notes.md @@ -0,0 +1,78 @@ +I really like these suggestions, but I want to help shape your suggestions with some prime directives for these tasks: +1. security is paramount. we operate in govcloud and handle titled data. security is the most important consideration. +2. cost control - this is a base cluster for a customer to build on top of for their apps. It is anticipated there will be significant time between initial provisioning and first use. The cheapest possible configuration for secure operations in govcloud. +3. simplicity. ideally, I want to be able to add a single file to an exising git repository (which represents an aws account), and have it spawn this entire cluster definition. +4. maintainability. As in, a minimum amount of effort to maintain,, prioritizing future-proofing in decisions. +5. extensibility. try to keep things modular and able to be glued together as easy as possible. +6. best practices. should probably be higher in this list, but at all times, we should endevour to follow/encourage best practices. +7. testability. we are dealing with eks clusters in aws here. by nature, these are expensive resources. anything we can do to test without creation of resources, or rapid creation and destruction, is encouraged. +8. documentation - including the 5 W's (who, what, when, where, why, and how) + +Given those guidelines, does that change your suggestions? Should we start the code review over with those in mind? + +Improvement: Consider adding validation blocks for required variables +Improvement: Add more detailed comments explaining configuration choices +Improvement: Consider tagging strategy for cost allocation +Improvement: Add lifecycle policies for node groups +Warning: Public endpoint access enabled - consider restricting CIDR ranges +Improvement: Add explicit IAM role configurations +Improvement: Implement network policies +# Add to cluster configuration +cluster_security_group_additional_rules = { + ingress_nodes_ephemeral_ports = { + description = "Node to node ephemeral ports" + protocol = "tcp" + from_port = 1025 + to_port = 65535 + type = "ingress" + source_node_security_group = true + } +} +Add CloudWatch logging configuration +Implement proper metrics collection +Set up alerts for cluster health +Improvement: Add more detailed documentation +Improvement: Consider adding test environments +Add README files in each major directory +Document deployment procedures +Add troubleshooting guides +Document network architecture + +resource "aws_eks_cluster" "main" { + # ...existing code... + vpc_config { + endpoint_private_access = true + endpoint_public_access = false # Force private endpoint only + security_group_ids = [aws_security_group.cluster.id] + subnet_ids = var.private_subnet_ids + } + + encryption_config { + provider { + key_arn = aws_kms_key.eks.arn + } + resources = ["secrets"] + } +} + +24m Warning FailedGetResourceMetric horizontalpodautoscaler/loki-write failed to get cpu utilization: unable to get metrics for resource cpu: no metrics returned from resource metrics API +24m Warning FailedComputeMetricsReplicas horizontalpodautoscaler/loki-write invalid metrics (1 invalid out of 1), first error is: failed to get cpu resource metric value: failed to get cpu utilization: unable to get metrics for resource cpu: no metrics returned from resource metrics API +22m Warning FailedGetResourceMetric horizontalpodautoscaler/loki-write failed to get cpu utilization: did not receive metrics for targeted pods (pods might be unready) +2 +29m Warning FailedGetResourceMetric horizontalpodautoscaler/istiod failed to get cpu utilization: unable to get metrics for resource cpu: unable to fetch metrics from resource metrics API: the server could not find the requested resource (get pods.metrics.k8s.io) +29m Warning FailedComputeMetricsReplicas horizontalpodautoscaler/istiod invalid metrics (1 invalid out of 1), first error is: failed to get cpu resource metric value: failed to get cpu utilization: unable to get metrics for resource cpu: unable to fetch metrics from resource metrics API: the server could not find the requested resource (get pods.metrics.k8s.io) +29m Warning FailedGetResourceMetric horizontalpodautoscaler/istiod failed to get cpu utilization: unable to get metrics for resource cpu: unable to fetch metrics from resource metrics API: the server is currently unable to handle the request (get pods.metrics.k8s.io) +29m Warning FailedComputeMetricsReplicas horizontalpodautoscaler/istiod invalid metrics (1 invalid out of 1), first error is: failed to get cpu resource metric value: failed to get cpu utilization: unable to get metrics for resource cpu: unable to fetch metrics from resource metrics API: the server is currently unable to handle the request (get pods.metrics.k8s.io) +2 +* Failed to execute "terraform_current apply -lock-timeout=20m -auto-approve -input=false -auto-approve" in ./.terragrunt-cache/jrM5TqaHxjlphT8vQ1DicmFp6eM/1NbRS_ankC8AcxKegXNWAnjyQEg + ╷ + │ Error: Unable to continue with install: Certificate "platform-eng-eks-mcm" in namespace "istio-system" exists and cannot be imported into the current release: invalid ownership metadata; annotation validation error: key "meta.helm.sh/release-name" must equal "grafana-grafana-ingress": current value is "k8s-dashboard-k8s-dashboard-ingress"; annotation validation error: key "meta.helm.sh/release-namespace" must equal "grafana": current value is "k8s-dashboard" + │ + │ with module.ingress_resources.helm_release.ingress, + │ on .terraform/modules/ingress_resources/main.tf line 6, in resource "helm_release" "ingress": + │ 6: resource "helm_release" "ingress" { + │ + ╵ + + exit status 1 + \ No newline at end of file diff --git a/platform-tg-infra.code-workspace b/platform-tg-infra.code-workspace deleted file mode 100644 index dec7709..0000000 --- a/platform-tg-infra.code-workspace +++ /dev/null @@ -1,29 +0,0 @@ -{ - "folders": [ - { - "path": "." - }, - { - "path": "../terraform-provider-github/website/docs/d", - "name": "provider/aws/data-sources" - }, - { - "path": "../terraform-provider-github/website/docs/r", - "name": "provider/aws/resources" - }, - { - "path": "../terraform/website/docs/language/tests", - "name": "terraform/tests" - }, - { - "path": "../terraform/website/docs/language/syntax", - "name": "terraform/syntax" - }, - { - "path": "../terragrunt" - } - ], - "settings": { - - } -}