diff --git a/.github/platform-tg-infra.code-workspace b/.github/platform-tg-infra.code-workspace new file mode 100644 index 0000000..5047434 --- /dev/null +++ b/.github/platform-tg-infra.code-workspace @@ -0,0 +1,81 @@ +{ + "folders": [ + { + "name": "platform-tg-infra", + "path": "../" + }, + { + "name": "tfmod-cert-mgr", + "path": "../../tfmod-cert-mgr" + }, + { + "name": "tfmod-config-job", + "path": "../../tfmod-config-job" + }, + { + "name": "tfmod-custom-iam-role-for-service-account-eks", + "path": "../../tfmod-custom-iam-role-for-service-account-eks" + }, + { + "name": "tfmod-eks", + "path": "../../tfmod-eks" + }, + { + "name": "tfmod-eks-configuration", + "path": "../../tfmod-eks-configuration" + }, + { + "name": "tfmod-eks-dns", + "path": "../../tfmod-eks-dns" + }, + { + "name": "tfmod-grafana", + "path": "../../tfmod-grafana" + }, + { + "name": "tfmod-istio", + "path": "../../tfmod-istio" + }, + { + "name": "tfmod-istio-service-ingress", + "path": "../../tfmod-istio-service-ingress" + }, + { + "name": "tfmod-k8s-dashboard", + "path": "../../tfmod-k8s-dashboard" + }, + { + "name": "tfmod-karpenter", + "path": "../../tfmod-karpenter" + }, + { + "name": "tfmod-kiali", + "path": "../../tfmod-kiali" + }, + { + "name": "tfmod-loki", + "path": "../../tfmod-loki" + }, + { + "name": "tfmod-metrics-server", + "path": "../../tfmod-metrics-server" + }, + { + "name": "tfmod-prometheus", + "path": "../../tfmod-prometheus" + }, + { + "name": "tfmod-tempo", + "path": "../../tfmod-tempo" + }, + { + "path": "../../terraform-aws-eks" + }, + { + "path": "../../karpenter-provider-aws" + }, + { + "path": "../../terragrunt" + } + ] +} diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..fc196a2 --- /dev/null +++ b/Makefile @@ -0,0 +1,35 @@ +.PHONY: help init validate plan fmt check clean + +help: + @echo "Available targets:" + @echo " init - Initialize Terragrunt configurations" + @echo " validate - Validate all Terragrunt configurations" + @echo " plan - Run plan in dry-run mode across all configurations" + @echo " fmt - Format HCL files" + @echo " check - Run all checks (format, validate, plan)" + @echo " clean - Clean up Terragrunt cache and temporary files" + +init: + @echo "Initializing Terragrunt configurations..." + terragrunt run-all init + +validate: + @echo "Validating Terragrunt configurations..." + terragrunt run-all validate + +plan: + @echo "Running plan in dry-run mode..." + terragrunt run-all plan --terragrunt-non-interactive + +fmt: + @echo "Formatting HCL files..." + find . -type f -name "*.hcl" -exec terragrunt hclfmt {} \; + +check: fmt validate plan + @echo "All checks completed" + +clean: + @echo "Cleaning Terragrunt cache..." + find . -type d -name ".terragrunt-cache" -exec rm -rf {} + + find . -type f -name ".terraform.lock.hcl" -delete + find . -type f -name "terragrunt-debug.tfvars.json" -delete diff --git a/lab/_envcommon/common-variables.hcl b/lab/_envcommon/common-variables.hcl index d2f73ef..38cb4c9 100644 --- a/lab/_envcommon/common-variables.hcl +++ b/lab/_envcommon/common-variables.hcl @@ -12,8 +12,6 @@ locals { project_role = "csvd_platformbaseline_app" state_bucket_prefix = "inf-tfstate" state_table_name = "tf_remote_state" - terraform = true - terragrunt = true route53_endpoints = { route53_main = { "account_id" = "269244441389" diff --git a/lab/_envcommon/default-versions.hcl b/lab/_envcommon/default-versions.hcl index dd0b36b..ddc4795 100644 --- a/lab/_envcommon/default-versions.hcl +++ b/lab/_envcommon/default-versions.hcl @@ -6,9 +6,9 @@ locals { ##################### cluster_version = "1.31" custom_service_eks_account = "${local.release_version}" - eks_module_version = "20.31.1" + eks_module_version = "20.33.1" istio_ingress_version = "${local.release_version}" - release_version = "0.1.1" + release_version = "main" # change to main when testing updated modules ##################### # TF Providers @@ -29,7 +29,7 @@ locals { ################ # k8s-dashboard ################ - dashboard_hostname = "dashboard" + dashboard_hostname = "k8s-dashboard" k8s_dashboard_metrics_scraper = "1.0.8" k8s_dashboard_version = "6.0.6" @@ -47,7 +47,8 @@ locals { ################ # Istio ################ - istio_version = "1.24.2" + istio_namespace = "istio-system" + istio_version = "1.24.2" ################ # Grafana @@ -55,6 +56,7 @@ locals { download_dashboards_image_tag = "7.85.0" grafana_chart_version = "8.8.5" grafana_hostname = "grafana" + grafana_namespace = "grafana" grafana_tag = "11.4.0" init_chown_data_image_tag = "1.31.1" @@ -92,6 +94,7 @@ locals { # Prometheus ################ prometheus_chart_version = "25.26.0" + prometheus_namespace = "prometheus" prometheus_server_tag = "v2.54.0" prometheus_config_reloader_tag = "v0.75.2" alertmanager_tag = "v0.27.0" @@ -103,5 +106,6 @@ locals { # Tempo ################ tempo_chart_version = "1.18.1" + tempo_namespace = "tempo" tempo_tag = "2.7.0" } diff --git a/lab/_envcommon/helm-provider.hcl b/lab/_envcommon/helm-provider.hcl deleted file mode 100644 index 4323624..0000000 --- a/lab/_envcommon/helm-provider.hcl +++ /dev/null @@ -1,24 +0,0 @@ -generate "helm-provider" { - path = "helm-provider.tf" - if_exists = "overwrite" - contents = <<-EOF -%{ if startswith(local.module_name, "tfmod-eks-") ~} -provider "helm" { - kubernetes { - host = try(data.aws_eks_cluster.this[0].endpoint, "") - cluster_ca_certificate = try(base64decode(data.aws_eks_cluster.this[0].certificate_authority[0].data), "") - exec { - api_version = "client.authentication.k8s.io/v1beta1" - command = "aws" - args = ["eks", "get-token", "--cluster-name", local.cluster_name, "--region", local.aws_region] - } - } -} - -data "aws_eks_cluster" "this" { - count = var.create_eks ? 1 : 0 - name = local.cluster_name -} -%{ endif } -EOF -} \ No newline at end of file diff --git a/lab/_envcommon/kube-provider.hcl b/lab/_envcommon/kube-provider.hcl deleted file mode 100644 index 1805019..0000000 --- a/lab/_envcommon/kube-provider.hcl +++ /dev/null @@ -1,22 +0,0 @@ -generate "kube-provider" { - path = "kube-provider.tf" - if_exists = "overwrite" - contents = <<-EOF -%{ if startswith(local.module_name, "tfmod-eks-") ~} -provider "kubernetes" { - host = try(data.aws_eks_cluster.this[0].endpoint, "") - cluster_ca_certificate = try(base64decode(data.aws_eks_cluster.this[0].certificate_authority[0].data), "") - exec { - api_version = "client.authentication.k8s.io/v1beta1" - command = "aws" - args = ["eks", "get-token", "--cluster-name", local.cluster_name, "--region", local.aws_region] - } -} - -data "aws_eks_cluster" "this" { - count = var.create_eks ? 1 : 0 - name = local.cluster_name -} -%{ endif } -EOF -} \ No newline at end of file diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/cluster.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/cluster.hcl index e43148a..29eb18d 100644 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/cluster.hcl +++ b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/cluster.hcl @@ -1,21 +1,28 @@ -# lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/cluster.hcl - -# Set cluster specific variables. These are automatically pulled in to configure the remote state bucket in the root -# terragrunt.hcl configuration. locals { + # Cluster specific configuration cluster_endpoint_public_access = true cluster_name = "platform-eng-eks-mcm" - created_reason = "Terragrunt Development for CICD Delivered EKS Platform" - creator = "matthew.c.morgan@census.gov" + cluster_mailing_list = "matthew.c.morgan@census.gov" eks_instance_disk_size = 100 eks_ng_desired_size = 2 eks_ng_max_size = 10 eks_ng_min_size = 0 enable_cluster_creator_admin_permissions = true - terraform = true - terragrunt = true tags = { "slim:schedule" = "8:00-17:00" "cluster:size" = "min:${local.eks_ng_min_size}-max:${local.eks_ng_max_size}-desired:${local.eks_ng_desired_size}" } + + # Common configuration + common_retry_args = { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20m"] + } + + common_dependencies = ["../eks", "../eks-config"] + + common_mock_eks = { + cluster_name = "mock-cluster" + oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" + } } diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-cert-manager/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-cert-manager/terragrunt.hcl index a2e6077..ea7cc82 100644 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-cert-manager/terragrunt.hcl +++ b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-cert-manager/terragrunt.hcl @@ -6,36 +6,51 @@ include "root" { terraform { source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-cert-mgr.git?ref=${include.root.inputs.release_version}" + extra_arguments "retry_lock" { commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] + arguments = ["-lock-timeout=20s"] } } +dependencies { + paths = [ + "../eks", + "../eks-config", + "../eks-karpenter" + ] +} + dependency "eks" { - config_path = "../eks" + config_path = "../eks" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { - cluster_name = "a-cluster-name" - oidc_provider_arn = "arn:aws-us-gov:iam::111111111111:oidc-provider/oidc.eks.us-gov-east-1.amazonaws.com/id/0000000000000000AAAAAAAAAAAAAAAA" + cluster_name = include.root.inputs.cluster_name + oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" + cluster_endpoint = "https://mock-endpoint.eks.amazonaws.com" + cluster_version = include.root.inputs.cluster_version } } -dependency "eks_config" { - config_path = "../eks-config" - skip_outputs = true -} - inputs = { + # AWS Configuration + account_id = include.root.inputs.aws_account_id + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + + # Cluster Configuration + cluster_name = dependency.eks.outputs.cluster_name + cluster_mailing_list = include.root.inputs.cluster_mailing_list + oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn + + # Cert Manager Configuration + cert_manager_helm_chart = include.root.inputs.cert_manager_helm_chart + cluster_issuer_name = include.root.inputs.cluster_issuer_name + + # Version Tags cert_manager_cainjector_tag = include.root.inputs.cert_manager_cainjector_tag cert_manager_controller_tag = include.root.inputs.cert_manager_controller_tag - cert_manager_helm_chart = include.root.inputs.cert_manager_helm_chart cert_manager_startupapicheck_tag = include.root.inputs.cert_manager_startupapicheck_tag cert_manager_webhook_tag = include.root.inputs.cert_manager_webhook_tag - cluster_issuer_name = include.root.inputs.cluster_issuer_name - cluster_mailing_list = dependency.eks.inputs.creator - cluster_name = dependency.eks.outputs.cluster_name - oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - release_version = include.root.inputs.release_version } diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-config/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-config/terragrunt.hcl index ad0fbe2..9798f19 100644 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-config/terragrunt.hcl +++ b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-config/terragrunt.hcl @@ -1,64 +1,51 @@ -# lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-config/terragrunt.hcl - include "root" { path = find_in_parent_folders("root.hcl") merge_strategy = "deep" expose = true } +dependencies { + paths = [ + "../eks", + "../eks-karpenter" + ] +} + terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-eks-configuration.git?ref=${include.root.inputs.release_version}" + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-eks-configuration.git?ref=outputs" + extra_arguments "retry_lock" { commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] + arguments = ["-lock-timeout=20s"] } } dependency "eks" { - config_path = "../eks" - mock_outputs = { - cluster_certificate_authority_data = [{ data = "THISISAVERYLONGCERTSTRINGTHATGOESHEREFORSURENODYEP" }] - cluster_endpoint = "https://12345ABCDEE42BF9C24D4C362D1DC.sk1.us-gov-east-1.eks.amazonaws.com" - cluster_name = "a-cluster-name" - eks_managed_node_groups_autoscaling_group_names = ["eks-eks-a-cluster-name-node_group-0000000000000000000000000-5ac8a5e3-14dd-c043-2cc9-f4b6ffb36d32"] - oidc_provider_arn = "arn:aws-us-gov:iam::111111111111:oidc-provider/oidc.eks.us-gov-east-1.amazonaws.com/id/0000000000000000AAAAAAAAAAAAAAAA" - security_group_all_worker_mgmt_id = "sg-00b0000000000000" - subnets = ["subnet-00000000000000001", "subnet-00000000000000002", "subnet-00000000000000003"] - vpc_id = "a-vpc-id" - } -} - -generate "kubectl-provider" { - path = "kubectl-provider.tf" - if_exists = "overwrite" - contents = <<-EOF - %{ if dependency.eks.outputs.cluster_name != "a-cluster-name" ~} - data "aws_eks_cluster" "kubectl" { - name = "${dependency.eks.outputs.cluster_name}" - } - provider "kubectl" { - apply_retry_count = 5 - host = data.aws_eks_cluster.kubectl.endpoint - cluster_ca_certificate = base64decode(data.aws_eks_cluster.kubectl.certificate_authority[0].data) - load_config_file = false + config_path = "../eks" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] - exec { - api_version = "client.authentication.k8s.io/v1beta1" - command = "aws" - args = ["eks", "get-token", "--cluster-name", "${dependency.eks.outputs.cluster_name}", "--region", "${include.root.inputs.aws_region}"] - } + mock_outputs = { + cluster_name = "mock-cluster" + cluster_endpoint = "https://mock-endpoint.eks.amazonaws.com" + cluster_certificate_authority_data = [{ data = "mock-cert-data" }] + eks_managed_node_groups_autoscaling_group_names = ["mock-asg-name"] + oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" + security_group_all_worker_mgmt_id = "sg-mock" + subnets = ["subnet-mock1", "subnet-mock2"] + vpc_id = "vpc-mock" } - %{ endif ~} - EOF } inputs = { + # AWS Configuration + account_id = include.root.inputs.aws_account_id + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + + # Core Cluster Configuration cluster_name = dependency.eks.outputs.cluster_name eks_managed_node_groups_autoscaling_group_names = dependency.eks.outputs.eks_managed_node_groups_autoscaling_group_names oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - release_version = include.root.inputs.release_version security_group_all_worker_mgmt_id = dependency.eks.outputs.security_group_all_worker_mgmt_id subnets = dependency.eks.outputs.subnets vpc_id = dependency.eks.outputs.vpc_id diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-dns/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-dns/terragrunt.hcl index 9b7c16f..d1a51f5 100644 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-dns/terragrunt.hcl +++ b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-dns/terragrunt.hcl @@ -5,38 +5,56 @@ include "root" { } terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-eks-dns.git?ref=${include.root.inputs.release_version}" + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-eks-dns.git?ref=explictProvider" extra_arguments "retry_lock" { commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] + arguments = ["-lock-timeout=20s"] } } dependency "eks" { - config_path = "../eks" + config_path = "../eks" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] mock_outputs = { - subnets = ["subnet-abcdefgh", "subnet-12345678", "subnet-ab12cd34"] + cluster_name = include.root.inputs.cluster_name + subnets = ["subnet-mock1", "subnet-mock2", "subnet-mock3"] } } -dependency "istio" { - config_path = "../eks-istio" +dependency "eks-istio" { + config_path = "../eks-istio" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] mock_outputs = { istio_ingress_lb = { - dns_name = "a1111111111111111111111111111111-2bbbbbbbbbbbbbbb.elb.us-gov-east-1.amazonaws.com" - zone_id = "ZABC123456DEF" + dns_name = "mock-${include.root.inputs.cluster_name}.elb.amazonaws.com" + zone_id = "MOCKZONEID" } } } +dependencies { + paths = [ + "../eks-config", + "../eks-istio", + "../eks-karpenter" + ] +} + inputs = { - cluster_name = dependency.eks.inputs.cluster_name - istio_ingress_lb = dependency.istio.outputs.istio_ingress_lb - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region + # AWS Configuration + account_id = include.root.inputs.aws_account_id + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + + # Cluster Configuration + cluster_name = include.root.inputs.cluster_name + + # Network Configuration + istio_ingress_lb = dependency.eks-istio.outputs.istio_ingress_lb route53_endpoints = include.root.inputs.route53_endpoints - subnets = dependency.eks.outputs.subnets - tags = dependency.eks.inputs.tags - vpc_domain_name = dependency.eks.inputs.vpc_domain_name - vpc_name = dependency.eks.inputs.vpc_name + vpc_domain_name = include.root.inputs.vpc_domain_name + vpc_name = include.root.inputs.vpc_name + + # Additional Configuration + tags = include.root.inputs.tags } diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-grafana/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-grafana/terragrunt.hcl index cf4f29a..ed53c15 100644 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-grafana/terragrunt.hcl +++ b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-grafana/terragrunt.hcl @@ -5,36 +5,58 @@ include "root" { } terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-grafana.git?ref=${include.root.inputs.release_version}" + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-grafana.git?ref=cert_clash" extra_arguments "retry_lock" { commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] + arguments = ["-lock-timeout=20s"] } } dependency "eks" { - config_path = "../eks" + config_path = "../eks" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] mock_outputs = { - cluster_name = "a-cluster-name" + cluster_name = include.root.inputs.cluster_name + oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" } } -dependency "eks-loki" { - config_path = "../eks-loki" +dependency "eks_loki" { + config_path = "../eks-loki" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] mock_outputs = { - rwo_storage_class = "gp3-encrypted" + rwo_storage_class = "gp3-mocked" } } +dependencies { + paths = [ + "../eks", + "../eks-config", + "../eks-dns", + "../eks-karpenter", + "../eks-loki" + ] +} + inputs = { - cluster_domain = dependency.eks.inputs.vpc_domain_name - cluster_name = dependency.eks.outputs.cluster_name - download_dashboards_image_tag = include.root.inputs.download_dashboards_image_tag + # AWS Configuration + account_id = include.root.inputs.aws_account_id + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + + # Cluster Configuration + cluster_name = dependency.eks.outputs.cluster_name + cluster_domain = include.root.inputs.vpc_domain_name + oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn + + # Storage Configuration + rwo_storage_class = dependency.eks_loki.outputs.rwo_storage_class + + # Grafana Configuration grafana_chart_version = include.root.inputs.grafana_chart_version grafana_tag = include.root.inputs.grafana_tag + download_dashboards_image_tag = include.root.inputs.download_dashboards_image_tag init_chown_data_image_tag = include.root.inputs.init_chown_data_image_tag - profile = include.root.inputs.aws_profile - public_hostname = include.root.inputs.grafana_hostname - region = include.root.inputs.aws_region - rwo_storage_class = dependency.eks-loki.outputs.rwo_storage_class + namespace = include.root.inputs.grafana_namespace } diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-istio/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-istio/terragrunt.hcl index 61ea560..1acb519 100644 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-istio/terragrunt.hcl +++ b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-istio/terragrunt.hcl @@ -5,29 +5,40 @@ include "root" { } terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-istio.git?ref=${include.root.inputs.release_version}" + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-istio.git?ref=outputs" extra_arguments "retry_lock" { commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] + arguments = ["-lock-timeout=20s"] } } +dependencies { + paths = [ + "../eks", + "../eks-config" + ] +} + dependency "eks" { - config_path = "../eks" + config_path = "../eks" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] mock_outputs = { - cluster_name = "a-cluster-name" + cluster_name = include.root.inputs.cluster_name + oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" } } -dependency "eks-karpenter" { - config_path = "../eks-karpenter" - skip_outputs = true -} - inputs = { - cluster_name = dependency.eks.outputs.cluster_name - istio_chart_version = include.root.inputs.istio_version - istio_version = include.root.inputs.istio_version - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region + # AWS Configuration + account_id = include.root.inputs.aws_account_id + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + + # Cluster Configuration + cluster_name = dependency.eks.outputs.cluster_name + oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn + + # Istio Configuration + namespace = include.root.inputs.istio_namespace + istio_version = include.root.inputs.istio_version } diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-k8s-dashboard/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-k8s-dashboard/terragrunt.hcl index ded7ad0..f213603 100644 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-k8s-dashboard/terragrunt.hcl +++ b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-k8s-dashboard/terragrunt.hcl @@ -5,32 +5,41 @@ include "root" { } terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-k8s-dashboard.git?ref=${include.root.inputs.release_version}" + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-k8s-dashboard.git?ref=cert_clash" extra_arguments "retry_lock" { commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] + arguments = ["-lock-timeout=20s"] } } +dependencies { + paths = [ + "../eks", + "../eks-config", + "../eks-dns" + ] +} + dependency "eks" { - config_path = "../eks" + config_path = "../eks" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] mock_outputs = { - cluster_name = "a-cluster-name" - vpc_domain_name = "example.com" + cluster_name = include.root.inputs.cluster_name + oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" } } -dependency "eks-loki" { - config_path = "../eks-loki" - skip_outputs = true -} - inputs = { - # datasources = dependency.eks-loki.outputs.gateway_internal_endpoint - cluster_domain = dependency.eks.inputs.vpc_domain_name - cluster_name = dependency.eks.outputs.cluster_name + # AWS Configuration + account_id = include.root.inputs.aws_account_id + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + + # Cluster Configuration + cluster_domain = include.root.inputs.vpc_domain_name + cluster_name = dependency.eks.outputs.cluster_name + + # Dashboard Configuration k8s_dashboard_version = include.root.inputs.k8s_dashboard_version - profile = include.root.inputs.aws_profile - public_hostname = include.root.inputs.dashboard_hostname - region = include.root.inputs.aws_region + namespace = include.root.inputs.dashboard_hostname } diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-karpenter/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-karpenter/terragrunt.hcl index bbf116b..1ec3a41 100644 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-karpenter/terragrunt.hcl +++ b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-karpenter/terragrunt.hcl @@ -6,37 +6,44 @@ include "root" { terraform { source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-karpenter.git?ref=${include.root.inputs.release_version}" + extra_arguments "retry_lock" { commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] + arguments = ["-lock-timeout=20s"] } } +dependencies { + paths = ["../eks"] +} + dependency "eks" { - config_path = "../eks" + config_path = "../eks" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { - cluster_endpoint = "https://0000000000000000AAAAAAAAAAAAAAAA.sk1.us-gov-east-1.eks.amazonaws.com" - cluster_name = "a-cluster-name" - node_group_name = "node_group_a-cluster-name" - oidc_provider_arn = "arn:aws-us-gov:iam::111111111111:oidc-provider/oidc.eks.us-gov-east-1.amazonaws.com/id/0000000000000000AAAAAAAAAAAAAAAA" - vpc_id = "a-vpc-name" + cluster_name = "mock-cluster" + cluster_endpoint = "https://mock-endpoint.eks.amazonaws.com" + oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" + node_group_name = "mock-node-group" + vpc_id = "vpc-mock" + subnets = ["subnet-mock1", "subnet-mock2"] } } -dependency "eks-config" { - config_path = "../eks-config" - skip_outputs = true -} - inputs = { - cluster_endpoint = dependency.eks.outputs.cluster_endpoint - cluster_name = dependency.eks.outputs.cluster_name + # AWS Configuration + account_id = include.root.inputs.aws_account_id + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + + # Cluster Configuration + cluster_endpoint = dependency.eks.outputs.cluster_endpoint + cluster_name = dependency.eks.outputs.cluster_name + oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn + + # Karpenter Configuration + karpenter_tag = include.root.inputs.karpenter_tag karpenter_helm_chart = include.root.inputs.karpenter_helm_chart karpenter_node_group_name = dependency.eks.outputs.node_group_name - karpenter_tag = include.root.inputs.karpenter_tag - oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - release_version = include.root.inputs.release_version - vpc_id = dependency.eks.outputs.vpc_id } diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-kiali/terragrunt.hcl.disable b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-kiali/terragrunt.hcl.disable index 1e04fe0..27a255b 100644 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-kiali/terragrunt.hcl.disable +++ b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-kiali/terragrunt.hcl.disable @@ -15,16 +15,19 @@ terraform { dependency "eks" { config_path = "../eks" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] mock_outputs = { cluster_name = "a-cluster-name" } } + dependency "eks-cert-manager" { config_path = "../eks-cert-manager" mock_outputs = { cluster_issuer_name = "acmpca-clusterissuer" } } + dependency "eks-prometheus" { config_path = "../eks-prometheus" mock_outputs = { @@ -34,6 +37,7 @@ dependency "eks-prometheus" { url = "http://prometheus-server.prometheus.svc.cluster.local:9090/" } } + } dependency "eks-grafana" { config_path = "../eks-grafana" diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-kiali/terragrunt.hcl.disabled b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-kiali/terragrunt.hcl.disabled new file mode 100644 index 0000000..a06c6e6 --- /dev/null +++ b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-kiali/terragrunt.hcl.disabled @@ -0,0 +1,108 @@ +include "root" { + path = find_in_parent_folders("root.hcl") + merge_strategy = "deep" + expose = true +} + +terraform { + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-kiali.git?ref=${include.root.inputs.release_version}" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20s"] + } +} + +dependencies { + paths = [ + "../eks", + "../eks-config", + "../eks-dns", + "../eks-grafana", + "../eks-istio", + "../eks-prometheus" + ] +} + +dependency "eks" { + config_path = "../eks" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + cluster_name = "mock-cluster" + oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" + } +} + +dependency "eks-config" { + config_path = "../eks-config" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + operators_namespace = "mock-namespace" + } +} + +dependency "eks_dns" { + config_path = "../eks-dns" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + cluster_domain = "mock.example.com" + oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" + } +} + +dependency "eks_grafana" { + config_path = "../eks-grafana" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + internal_endpoint = { + hostname = "grafana.mock.svc.cluster.local" + port_number = "80" + url = "https://grafana.mock.svc.cluster.local:80/" + } + namespace = "grafana" + public_endpoint = { + hostname = "grafana.mock.lab.csp2.census.gov" + port_number = "80" + url = "https://grafana.mock.lab.csp2.census.gov:80/" + } + secret_name = "grafana" + } +} + +dependency "eks_istio" { + config_path = "../eks-istio" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + namespace = "mock-namespace-istio" + } +} + +dependency "eks_prometheus" { + config_path = "../eks-prometheus" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + prometheus_internal_url = "mock-internal-url" + } +} + +inputs = { + # AWS Configuration + account_id = include.root.inputs.aws_account_id + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + + # Cluster Configuration + cluster_domain = dependency.eks_dns.outputs.cluster_domain + cluster_name = dependency.eks.outputs.cluster_name + + # Kiali Configuration + grafana_internal_url = dependency.eks_grafana.outputs.internal_endpoint.url + grafana_namespace = dependency.eks_grafana.outputs.namespace + grafana_secret_name = dependency.eks_grafana.outputs.secret_name + grafana_public_url = dependency.eks_grafana.outputs.public_endpoint.url + + kiali_operator_version = include.root.inputs.kiali_operator_version + operators_namespace = dependency.eks-config.outputs.operators_namespace + + prometheus_internal_url = dependency.eks_prometheus.outputs.internal_endpoint + jager_internal_url = dependency.eks_prometheus.outputs.jager_internal_url +} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-loki/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-loki/terragrunt.hcl index 70b8b09..e1ccdfd 100644 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-loki/terragrunt.hcl +++ b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-loki/terragrunt.hcl @@ -4,43 +4,51 @@ include "root" { expose = true } -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-loki.git?ref=${include.root.inputs.release_version}" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } +dependencies { + paths = [ + "../eks", + "../eks-config", + "../eks-metrics-server" + ] } dependency "eks" { - config_path = "../eks" + config_path = "../eks" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] mock_outputs = { - cluster_name = "a-cluster-name" - oidc_provider_arn = "arn:aws-us-gov:iam::111111111111:oidc-provider/oidc.eks.us-gov-east-1.amazonaws.com/id/0000000000000000AAAAAAAAAAAAAAAA" + cluster_name = "mock-cluster" + oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" } } -dependency "eks-istio" { - config_path = "../eks-istio" - skip_outputs = true +dependency "eks-config" { + config_path = "../eks-config" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + rwo_storage_class = "gp3-encrypted" + } } -dependency "eks-prometheus" { - config_path = "../eks-prometheus" - skip_outputs = true +terraform { + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-loki.git?ref=read_fix" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20s"] + } } inputs = { - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - cluster_name = dependency.eks.outputs.cluster_name - oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn - loki_chart_version = include.root.inputs.loki_chart_version - loki_tag = include.root.inputs.loki_tag - canary_tag = include.root.inputs.canary_tag - enterprise_logs_provisioner_tag = include.root.inputs.enterprise_logs_provisioner_tag - gateway_tag = include.root.inputs.gateway_tag - memcached_tag = include.root.inputs.memcached_tag - exporter_tag = include.root.inputs.exporter_tag - sidecar_tag = include.root.inputs.sidecar_tag + # AWS Configuration + account_id = include.root.inputs.aws_account_id + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + + # Cluster Configuration + cluster_name = dependency.eks.outputs.cluster_name + oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn + + # Loki Configuration + loki_chart_version = include.root.inputs.loki_chart_version + loki_tag = include.root.inputs.loki_tag + rwo_storage_class = dependency.eks-config.outputs.rwo_storage_class } diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-metrics-server/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-metrics-server/terragrunt.hcl index 387653b..4e4d198 100644 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-metrics-server/terragrunt.hcl +++ b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-metrics-server/terragrunt.hcl @@ -4,30 +4,39 @@ include "root" { expose = true } -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-metrics-server.git?ref=${include.root.inputs.release_version}" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } +dependencies { + paths = [ + "../eks", + "../eks-config" + ] } dependency "eks" { - config_path = "../eks" + config_path = "../eks" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] mock_outputs = { - cluster_name = "a-cluster-name" + cluster_name = "mock-cluster" } } -dependency "eks_config" { - config_path = "../eks-config" - skip_outputs = true +terraform { + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-metrics-server.git?ref=${include.root.inputs.release_version}" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20s"] + } } inputs = { - profile = include.root.inputs.aws_profile - cluster_name = dependency.eks.outputs.cluster_name - region = include.root.inputs.aws_region + # AWS Configuration + account_id = include.root.inputs.aws_account_id + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + + # Cluster Configuration + cluster_name = dependency.eks.outputs.cluster_name + + # Metrics Server Configuration metrics_server_helm_chart = include.root.inputs.metrics_server_helm_chart metrics_server_tag = include.root.inputs.metrics_server_tag } diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-prometheus/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-prometheus/terragrunt.hcl index e6c54b1..65a90e4 100644 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-prometheus/terragrunt.hcl +++ b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-prometheus/terragrunt.hcl @@ -8,31 +8,53 @@ terraform { source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-prometheus.git?ref=${include.root.inputs.release_version}" extra_arguments "retry_lock" { commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] + arguments = ["-lock-timeout=20s"] } } +dependencies { + paths = [ + "../eks", + "../eks-config", + "../eks-metrics-server" + ] +} + dependency "eks" { - config_path = "../eks" + config_path = "../eks" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] mock_outputs = { - cluster_name = "a-cluster-name" + cluster_name = include.root.inputs.cluster_name + oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" } } -dependency "eks-dns" { - config_path = "../eks-dns" - skip_outputs = true +dependency "eks-config" { + config_path = "../eks-config" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] + mock_outputs = { + rwo_storage_class = "gp3-encyrpted" + } } inputs = { - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - cluster_name = dependency.eks.outputs.cluster_name + # AWS Configuration + account_id = include.root.inputs.aws_account_id + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + + # Cluster Configuration + cluster_name = dependency.eks.outputs.cluster_name + oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn + + # Prometheus Configuration prometheus_chart_version = include.root.inputs.prometheus_chart_version + prometheus_namespace = include.root.inputs.prometheus_namespace prometheus_server_tag = include.root.inputs.prometheus_server_tag prometheus_config_reloader_tag = include.root.inputs.prometheus_config_reloader_tag alertmanager_tag = include.root.inputs.alertmanager_tag kube_state_metrics_tag = include.root.inputs.kube_state_metrics_tag node_exporter_tag = include.root.inputs.node_exporter_tag pushgateway_tag = include.root.inputs.pushgateway_tag + rwo_storage_class = dependency.eks-config.outputs.rwo_storage_class } diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-tempo/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-tempo/terragrunt.hcl index e9ebd48..dc22271 100644 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-tempo/terragrunt.hcl +++ b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-tempo/terragrunt.hcl @@ -8,39 +8,58 @@ terraform { source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-tempo.git?ref=${include.root.inputs.release_version}" extra_arguments "retry_lock" { commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] + arguments = ["-lock-timeout=20s"] } } dependency "eks" { - config_path = "../eks" + config_path = "../eks" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] mock_outputs = { - cluster_name = "a-cluster-name" - oidc_provider_arn = "arn:aws-us-gov:iam::111111111111:oidc-provider/oidc.eks.us-gov-east-1.amazonaws.com/id/0000000000000000AAAAAAAAAAAAAAAA" + cluster_name = include.root.inputs.cluster_name + oidc_provider_arn = "arn:aws-us-gov:iam::123456789012:oidc-provider/mock" } } dependency "eks-prometheus" { - config_path = "../eks-prometheus" + config_path = "../eks-prometheus" + mock_outputs_allowed_terraform_commands = ["init", "plan", "validate", "destroy"] mock_outputs = { + prometheus_namespace = "prometheus" prometheus_server_internal_endpoint = { hostname = "prometheus-server.prometheus.svc.cluster.local" port_number = 9090 url = "http://prometheus-server.prometheus.svc.cluster.local:9090/" } - prometheus_namespace = "prometheus" } } +dependencies { + paths = [ + "../eks", + "../eks-config", + "../eks-dns", + "../eks-karpenter", + "../eks-prometheus" + ] +} + inputs = { - account_id = include.root.locals.account_id - profile = include.root.locals.aws_profile - region = include.root.locals.aws_region - cluster_name = dependency.eks.outputs.cluster_name - oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn - prometheus_port = dependency.eks-prometheus.outputs.prometheus_server_internal_endpoint.port_number + # AWS Configuration + account_id = include.root.inputs.aws_account_id + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + + # Cluster Configuration + cluster_name = dependency.eks.outputs.cluster_name + oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn + + # Prometheus Configuration prometheus_namespace = dependency.eks-prometheus.outputs.prometheus_namespace - tempo_chart_version = include.root.inputs.tempo_chart_version - tempo_tag = include.root.inputs.tempo_tag + prometheus_port = dependency.eks-prometheus.outputs.prometheus_server_internal_endpoint.port_number + # Tempo Configuration + tempo_chart_version = include.root.inputs.tempo_chart_version + tempo_tag = include.root.inputs.tempo_tag + namespace = include.root.inputs.tempo_namespace } diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks/terragrunt.hcl index ba46766..9eca1de 100644 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks/terragrunt.hcl +++ b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks/terragrunt.hcl @@ -6,24 +6,23 @@ include "root" { terraform { source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-eks.git?ref=${include.root.inputs.release_version}" + extra_arguments "retry_lock" { commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] + arguments = ["-lock-timeout=20s"] } } inputs = { - cluster_endpoint_public_access = include.root.inputs.cluster_endpoint_public_access - cluster_name = include.root.inputs.cluster_name - cluster_version = include.root.inputs.cluster_version - creator = include.root.inputs.creator - eks_instance_disk_size = include.root.inputs.eks_instance_disk_size - eks_ng_desired_size = include.root.inputs.eks_ng_desired_size - eks_ng_max_size = include.root.inputs.eks_ng_max_size - eks_ng_min_size = include.root.inputs.eks_ng_min_size - eks_vpc_name = include.root.inputs.vpc_name - enable_cluster_creator_admin_permissions = include.root.inputs.enable_cluster_creator_admin_permissions - environment_abbr = include.root.inputs.environment_abbr - tags = include.root.inputs.tags - vpc_name = include.root.inputs.vpc_name + # AWS Configuration + account_id = include.root.inputs.aws_account_id + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + + # Core Cluster Configuration + cluster_name = include.root.inputs.cluster_name + cluster_version = include.root.inputs.cluster_version + + # Additional Configuration + tags = include.root.inputs.tags } diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/notes.txt b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/notes.txt deleted file mode 100644 index 71c3774..0000000 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/notes.txt +++ /dev/null @@ -1,68 +0,0 @@ -Provider Configuration Changes and Cluster Lifecycle Management -========================================================== - -Problem: --------- -The original provider configuration in root.hcl had issues handling different cluster lifecycle states: -1. When no cluster exists - terragrunt run-all plan would fail -2. When cluster is being created - terragrunt run-all apply needed to work -3. When cluster is being destroyed - terragrunt run-all destroy needed to work - -The main issue was that the provider configurations were using data sources that required the cluster to exist, causing failures during planning when the cluster didn't exist. - -Solution: ---------- -1. Moved provider configurations to separate files in _envcommon/: - - helm-provider.hcl - - kube-provider.hcl - -2. Added conditional data source lookup using count: - data "aws_eks_cluster" "this" { - count = var.create_eks ? 1 : 0 - name = local.cluster_name - } - -3. Used try() function with empty fallback values: - host = try(data.aws_eks_cluster.this[0].endpoint, "") - cluster_ca_certificate = try(base64decode(data.aws_eks_cluster.this[0].certificate_authority[0].data), "") - -4. Added create_eks variable control: - - Added to root.hcl locals block - - Controlled via TERRAGRUNT_CREATE_EKS environment variable - - Defaults to "true" - - Generated as a variable in each module - -How it works: ------------- -1. No cluster exists: - - Set TERRAGRUNT_CREATE_EKS=false - - Data source won't be created (count = 0) - - Provider configurations fall back to empty values - - Plan succeeds as providers are configured but not used - -2. Creating cluster: - - TERRAGRUNT_CREATE_EKS=true (default) - - As soon as cluster exists, data source becomes available - - Provider configurations get real values - - Apply continues with working providers - -3. Destroying cluster: - - Set TERRAGRUNT_CREATE_EKS=false before destroy - - Providers fall back to empty values - - Resources can be destroyed without needing cluster access - -Usage: ------- -1. For initial plan with no cluster: - export TERRAGRUNT_CREATE_EKS=false - terragrunt run-all plan - -2. For creating cluster and resources: - export TERRAGRUNT_CREATE_EKS=true (or don't set it) - terragrunt run-all apply - -3. For destroying everything: - export TERRAGRUNT_CREATE_EKS=false - terragrunt run-all destroy - -This solution allows Terragrunt to handle the full lifecycle of the cluster and its dependent resources without failing on provider initialization when the cluster doesn't exist. \ No newline at end of file diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/cluster.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/cluster.hcl deleted file mode 100644 index 8d2831c..0000000 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/cluster.hcl +++ /dev/null @@ -1,20 +0,0 @@ -# lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/cluster.hcl - -# Set cluster specific variables. These are automatically pulled in to configure the remote state bucket in the root -# terragrunt.hcl configuration. -locals { - cluster_endpoint_public_access = true - cluster_name = "platform-eng-eks-mcm" - creator = "matthew.c.morgan@census.gov" - eks_instance_disk_size = 100 - eks_ng_desired_size = 2 - eks_ng_max_size = 10 - eks_ng_min_size = 0 - enable_cluster_creator_admin_permissions = true - terraform = true - terragrunt = true - tags = { - "slim:schedule" = "8:00-17:00" - "cluster:size" = "min:${local.eks_ng_min_size}-max:${local.eks_ng_max_size}-desired:${local.eks_ng_desired_size}" - } -} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-cert-manager/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-cert-manager/terragrunt.hcl deleted file mode 100644 index 35e355a..0000000 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-cert-manager/terragrunt.hcl +++ /dev/null @@ -1,40 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-cert-mgr.git?ref=${include.root.inputs.release_version}" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -dependency "eks" { - config_path = "../eks" - mock_outputs = { - cluster_name = "a-cluster-name" - oidc_provider_arn = "arn:aws-us-gov:iam::111111111111:oidc-provider/oidc.eks.us-gov-east-1.amazonaws.com/id/0000000000000000AAAAAAAAAAAAAAAA" - } -} - -dependency "eks_config" { - config_path = "../eks-config" - skip_outputs = true -} - -inputs = { - cluster_name = dependency.eks.outputs.cluster_name - cluster_mailing_list = dependency.eks.inputs.creator - oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - cert_manager_helm_chart = include.root.inputs.cert_manager_helm_chart - cert_manager_cainjector_tag = include.root.inputs.cert_manager_cainjector_tag - cert_manager_controller_tag = include.root.inputs.cert_manager_controller_tag - cert_manager_startupapicheck_tag = include.root.inputs.cert_manager_startupapicheck_tag - cert_manager_webhook_tag = include.root.inputs.cert_manager_webhook_tag - cluster_issuer_name = include.root.inputs.cluster_issuer_name -} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-config/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-config/terragrunt.hcl deleted file mode 100644 index d4a60db..0000000 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-config/terragrunt.hcl +++ /dev/null @@ -1,42 +0,0 @@ -# lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-config/terragrunt.hcl - -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-eks-configuration.git?ref=${include.root.inputs.release_version}" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -dependency "eks" { - config_path = "../eks" - mock_outputs = { - cluster_certificate_authority_data = [{ data = "THISISAVERYLONGCERTSTRINGTHATGOESHEREFORSURENODYEP" }] - cluster_endpoint = "https://12345ABCDEE42BF9C24D4C362D1DC.sk1.us-gov-east-1.eks.amazonaws.com" - cluster_name = "a-cluster-name" - eks_managed_node_groups_autoscaling_group_names = ["eks-eks-a-cluster-name-node_group-0000000000000000000000000-5ac8a5e3-14dd-c043-2cc9-f4b6ffb36d32"] - oidc_provider_arn = "arn:aws-us-gov:iam::111111111111:oidc-provider/oidc.eks.us-gov-east-1.amazonaws.com/id/0000000000000000AAAAAAAAAAAAAAAA" - security_group_all_worker_mgmt_id = "sg-00b0000000000000" - subnets = ["subnet-00000000000000001", "subnet-00000000000000002", "subnet-00000000000000003"] - token = [{ token = "THISISTHETOKENTHATDOESNTEXISTTHEREAREMANYLIKEITBUTHISONEISFORACLUSTER" }] - vpc_id = "a-vpc-id" - } -} - -inputs = { - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - vpc_id = dependency.eks.outputs.vpc_id - cluster_name = dependency.eks.outputs.cluster_name - subnets = dependency.eks.outputs.subnets - security_group_all_worker_mgmt_id = dependency.eks.outputs.security_group_all_worker_mgmt_id - eks_managed_node_groups_autoscaling_group_names = dependency.eks.outputs.eks_managed_node_groups_autoscaling_group_names - oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn - kubectl_image_tag = include.root.inputs.kubectl_image_tag -} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-dns/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-dns/terragrunt.hcl deleted file mode 100644 index 6e28781..0000000 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-dns/terragrunt.hcl +++ /dev/null @@ -1,42 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-eks-dns.git?ref=${include.root.inputs.release_version}" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -dependency "eks" { - config_path = "../eks" - mock_outputs = { - subnets = ["subnet-abcdefgh", "subnet-12345678", "subnet-ab12cd34"] - } -} - -dependency "istio" { - config_path = "../eks-istio" - mock_outputs = { - istio_ingress_lb = { - dns_name = "a1111111111111111111111111111111-2bbbbbbbbbbbbbbb.elb.us-gov-east-1.amazonaws.com" - zone_id = "ZABC123456DEF" - } - } -} - -inputs = { - cluster_name = dependency.eks.inputs.cluster_name - istio_ingress_lb = dependency.istio.outputs.istio_ingress_lb - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - subnets = dependency.eks.outputs.subnets - tags = dependency.eks.inputs.tags - vpc_domain_name = dependency.eks.inputs.vpc_domain_name - vpc_name = dependency.eks.inputs.vpc_name - route53_endpoints = include.root.inputs.route53_endpoints -} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-grafana/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-grafana/terragrunt.hcl deleted file mode 100644 index 65ab33f..0000000 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-grafana/terragrunt.hcl +++ /dev/null @@ -1,40 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-grafana.git?ref=${include.root.inputs.release_version}" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -dependency "eks" { - config_path = "../eks" - mock_outputs = { - cluster_name = "a-cluster-name" - } -} - -dependency "eks-loki" { - config_path = "../eks-loki" - mock_outputs = { - rwo_storage_class = "gp3-encrypted" - } -} - -inputs = { - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - cluster_name = dependency.eks.outputs.cluster_name - cluster_domain = dependency.eks.inputs.vpc_domain_name - public_hostname = include.root.inputs.grafana_hostname - rwo_storage_class = dependency.eks-loki.outputs.rwo_storage_class - grafana_chart_version = include.root.inputs.grafana_chart_version - grafana_tag = include.root.inputs.grafana_tag - download_dashboards_image_tag = include.root.inputs.download_dashboards_image_tag - init_chown_data_image_tag = include.root.inputs.init_chown_data_image_tag -} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-istio/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-istio/terragrunt.hcl deleted file mode 100644 index c7c22c8..0000000 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-istio/terragrunt.hcl +++ /dev/null @@ -1,32 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-istio.git?ref=${include.root.inputs.release_version}" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -dependency "eks" { - config_path = "../eks" - mock_outputs = { - cluster_name = "a-cluster-name" - } -} -dependency "eks-karpenter" { - config_path = "../eks-karpenter" - skip_outputs = true -} - -inputs = { - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - cluster_name = dependency.eks.outputs.cluster_name - istio_chart_version = include.root.inputs.istio_version - istio_version = include.root.inputs.istio_version -} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-k8s-dashboard/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-k8s-dashboard/terragrunt.hcl deleted file mode 100644 index cd1961b..0000000 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-k8s-dashboard/terragrunt.hcl +++ /dev/null @@ -1,36 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-k8s-dashboard.git?ref=${include.root.inputs.release_version}" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -dependency "eks" { - config_path = "../eks" - mock_outputs = { - cluster_name = "a-cluster-name" - vpc_domain_name = "example.com" - } -} - -dependency "eks-loki" { - config_path = "../eks-loki" - skip_outputs = true -} - -inputs = { - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - cluster_name = dependency.eks.outputs.cluster_name - cluster_domain = dependency.eks.inputs.vpc_domain_name - public_hostname = include.root.inputs.dashboard_hostname - k8s_dashboard_version = include.root.inputs.k8s_dashboard_version - # datasources = dependency.eks-loki.outputs.gateway_internal_endpoint -} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-karpenter/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-karpenter/terragrunt.hcl deleted file mode 100644 index 6b1a862..0000000 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-karpenter/terragrunt.hcl +++ /dev/null @@ -1,43 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-karpenter.git?ref=${include.root.inputs.release_version}" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -dependency "eks" { - config_path = "../eks" - mock_outputs = { - cluster_endpoint = "https://0000000000000000AAAAAAAAAAAAAAAA.sk1.us-gov-east-1.eks.amazonaws.com" - cluster_name = "a-cluster-name" - node_group_name = "node_group_a-cluster-name" - oidc_provider_arn = "arn:aws-us-gov:iam::111111111111:oidc-provider/oidc.eks.us-gov-east-1.amazonaws.com/id/0000000000000000AAAAAAAAAAAAAAAA" - vpc_id = "a-vpc-name" - } -} - -dependency "eks-config" { - config_path = "../eks-config" - skip_outputs = true -} - -inputs = { - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - cluster_endpoint = dependency.eks.outputs.cluster_endpoint - cluster_name = dependency.eks.outputs.cluster_name - karpenter_node_group_name = dependency.eks.outputs.node_group_name - oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn - vpc_id = dependency.eks.outputs.vpc_id - karpenter_helm_chart = include.root.inputs.karpenter_helm_chart - karpenter_tag = include.root.inputs.karpenter_tag - kubectl_tag = include.root.inputs.kubectl_image_tag - -} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-kiali/terragrunt.hcl.disable b/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-kiali/terragrunt.hcl.disable deleted file mode 100644 index 1e04fe0..0000000 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-kiali/terragrunt.hcl.disable +++ /dev/null @@ -1,81 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-kiali.git?ref=${include.root.inputs.release_version}" - # source = "../../../../../../../tfmod-kiali" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -dependency "eks" { - config_path = "../eks" - mock_outputs = { - cluster_name = "a-cluster-name" - } -} -dependency "eks-cert-manager" { - config_path = "../eks-cert-manager" - mock_outputs = { - cluster_issuer_name = "acmpca-clusterissuer" - } -} -dependency "eks-prometheus" { - config_path = "../eks-prometheus" - mock_outputs = { - prometheus_server_internal_endpoint = { - hostname = "prometheus-server.prometheus.svc.cluster.local" - port_number = 9090 - url = "http://prometheus-server.prometheus.svc.cluster.local:9090/" - } - } -} -dependency "eks-grafana" { - config_path = "../eks-grafana" - mock_outputs = { - internal_endpoint = { - hostname = "grafana.grafana.svc.cluster.local" - port_number = "80" - url = "https://grafana.grafana.svc.cluster.local:80/" - } - namespace = "grafana" - public_endpoint = { - hostname = "grafana.dev.lab.csp2.census.gov" - port_number = "80" - url = "https://grafana.dev.lab.csp2.census.gov:80/" - } - secret_name = "grafana" - } -} - -inputs = { - kiali_operator_version = include.root.inputs.kiali_operator_version - kiali_application_version = include.root.inputs.kiali_application_version - - profile = include.root.inputs.aws_profile - cluster_domain = dependency.eks.inputs.vpc_domain_name - operators_namespace = "operators" - cluster_name = dependency.eks.outputs.cluster_name - certificate_issuer = dependency.eks-cert-manager.outputs.cluster_issuer_name - prometheus_internal_url = dependency.eks-prometheus.outputs.prometheus_server_internal_endpoint.url - grafana_internal_url = dependency.eks-grafana.outputs.internal_endpoint.url - grafana_namespace = dependency.eks-grafana.outputs.namespace - grafana_public_url = dependency.eks-grafana.outputs.public_endpoint.url - grafana_secret_name = "grafana" - # grafana_secret_name = dependency.eks-grafana.outputs.secret_name - jaeger_internal_url = "" - - - # client_id = var.sso_client_id - # client_secret = var.sso_client_secret - # keycloak_public_url = var.keycloak_public_url - # gogatekeeper_chart_version = var.gogatekeeper_chart_version - # gogatekeeper_registry = var.gogatekeeper_registry - # gogatekeeper_repository = var.gogatekeeper_repository - # gogatekeeper_tag = var.gogatekeeper_tag -} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-loki/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-loki/terragrunt.hcl deleted file mode 100644 index 2c6b6be..0000000 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-loki/terragrunt.hcl +++ /dev/null @@ -1,44 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-loki.git?ref=${include.root.inputs.release_version}" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -dependency "eks" { - config_path = "../eks" - mock_outputs = { - cluster_name = "a-cluster-name" - oidc_provider_arn = "arn:aws-us-gov:iam::111111111111:oidc-provider/oidc.eks.us-gov-east-1.amazonaws.com/id/0000000000000000AAAAAAAAAAAAAAAA" - } -} -dependency "eks-istio" { - config_path = "../eks-istio" - skip_outputs = true -} -dependency "eks-prometheus" { - config_path = "../eks-prometheus" - skip_outputs = true -} - -inputs = { - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - cluster_name = dependency.eks.outputs.cluster_name - oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn - loki_chart_version = include.root.inputs.loki_chart_version - loki_tag = include.root.inputs.loki_tag - canary_tag = include.root.inputs.canary_tag - enterprise_logs_provisioner_tag = include.root.inputs.enterprise_logs_provisioner_tag - gateway_tag = include.root.inputs.gateway_tag - memcached_tag = include.root.inputs.memcached_tag - exporter_tag = include.root.inputs.exporter_tag - sidecar_tag = include.root.inputs.sidecar_tag -} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-metrics-server/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-metrics-server/terragrunt.hcl deleted file mode 100644 index 387653b..0000000 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-metrics-server/terragrunt.hcl +++ /dev/null @@ -1,33 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-metrics-server.git?ref=${include.root.inputs.release_version}" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -dependency "eks" { - config_path = "../eks" - mock_outputs = { - cluster_name = "a-cluster-name" - } -} - -dependency "eks_config" { - config_path = "../eks-config" - skip_outputs = true -} - -inputs = { - profile = include.root.inputs.aws_profile - cluster_name = dependency.eks.outputs.cluster_name - region = include.root.inputs.aws_region - metrics_server_helm_chart = include.root.inputs.metrics_server_helm_chart - metrics_server_tag = include.root.inputs.metrics_server_tag -} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-prometheus/README.md b/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-prometheus/README.md deleted file mode 100644 index bbbffb2..0000000 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-prometheus/README.md +++ /dev/null @@ -1,198 +0,0 @@ -## eks-prometheus -This module deploys EKS kubeenetes prometheus inside existing EKS cluster. Prometheus is an open-source systems monitoring and alerting tool. -This module consisits of 4 components. It creates prometheus namespace and copies image repositories for the following components from quay.io into local account ECR repository. It deploys these components using helm charts using the configured ECR repositories. - 1. prometheus-alert-manager - 2. prometheus-node-exporter - 3. prometheus-pushgateway - 4. prometheus-server - -### Dependencies -This module is dependent on EKS module (eks). The cluster should exist already for this module to work. - -### Inputs - cluster_name - profile - prometheus_chart_version - prometheus_server_tag - prometheus_config_reloader_tag - alertmanager_tag - kube_state_metrics_tag - node_exporter_tag - pushgateway_tag - rwo_storage_class - -### Outputs - alertmanager_internal_endpoint - alertmanager_headless_internal_endpoint - pushgateway_internal_endpoint - prometheus_server_internal_endpoint - -### Issues observed/fixed -1. The rwo_storage_class value had to be updated from "gp3" to "gp3-encrypted" -2. The node_exporter_tag value had to be updated from "1.6.1" to "v1.8.1" -3. The kube_state_metrics_tag value had to be updated from "2.10.0" to "v2.6.0" -4. The alertmanager_tag value had to be updated from -5. The helm chart set config for the ecr image had to be split into 2 components, one for registry and other for repository as an example mentioned below: - - ``` - set { - name = "kube-state-metrics.image.registry" - value = module.images.images[local.ksm_key].dest_registry - } - set { - name = "kube-state-metrics.image.repository" - value = module.images.images[local.ksm_key].dest_repository - } - ``` - -6. In some other cases the image ecr repository had to be split by the colon separatory (:) - - ``` - set { - name = "alertmanager.configmapReload.image.repository" - value = split(":", module.images.images[local.prom_config_reload_key].dest_full_path)[0] - } - ``` - -### Chart Notes - 1. Get the application URL by running these commands: - - ```bash - export POD_NAME=$(kubectl get pods --namespace prometheus -l "app.kubernetes.io/name=prometheus-pushgateway,app.kubernetes.io/instance=prometheus" -o jsonpath="{.items[0].metadata.name}") - kubectl port-forward $POD_NAME 9091 - echo "Visit http://127.0.0.1:9091 to use your application" - ``` - - The Prometheus server can be accessed via port 80 on the following DNS name from within your cluster: - prometheus-server.prometheus.svc.cluster.local - - - Get the Prometheus server URL by running these commands in the same shell: - - ```bash - export POD_NAME=$(kubectl get pods --namespace prometheus -l "app.kubernetes.io/name=prometheus,app.kubernetes.io/instance=prometheus" -o jsonpath="{.items[0].metadata.name}") - kubectl --namespace prometheus port-forward $POD_NAME 9090 - ``` - - The Prometheus alertmanager can be accessed via port 9093 on the following DNS name from within your cluster: - `prometheus-alertmanager.prometheus.svc.cluster.local` - - - Get the Alertmanager URL by running these commands in the same shell: - - ```bash - export POD_NAME=$(kubectl get pods --namespace prometheus -l "app.kubernetes.io/name=alertmanager,app.kubernetes.io/instance=prometheus" -o jsonpath="{.items[0].metadata.name}") - kubectl --namespace prometheus port-forward $POD_NAME 9093 - ``` - - ################################################################################# - ###### WARNING: Pod Security Policy has been disabled by default since ##### - ###### it deprecated after k8s 1.25+. use ##### - ###### (index .Values "prometheus-node-exporter" "rbac" ##### - ###### "pspEnabled") with (index .Values ##### - ###### "prometheus-node-exporter" "rbac" "pspAnnotations") ##### - ###### in case you still need it. ##### - ################################################################################# - - - The Prometheus PushGateway can be accessed via port 9091 on the following DNS name from within your cluster: - `prometheus-prometheus-pushgateway.prometheus.svc.cluster.local` - - - Get the PushGateway URL by running these commands in the same shell: - - ```bash - export POD_NAME=$(kubectl get pods --namespace prometheus -l "app=prometheus-pushgateway,component=pushgateway" -o jsonpath="{.items[0].metadata.name}") - kubectl --namespace prometheus port-forward $POD_NAME 9091 - ``` - - For more information on running Prometheus, visit: - https://prometheus.io/ - - kube-state-metrics is a simple service that listens to the Kubernetes API server and generates metrics about the state of the objects. - The exposed metrics can be found here: - https://github.com/kubernetes/kube-state-metrics/blob/master/docs/README.md#exposed-metrics - - The metrics are exported on the HTTP endpoint /metrics on the listening port. - In your case, `prometheus-kube-state-metrics.prometheus.svc.cluster.local:8080/metrics` - - They are served either as plaintext or protobuf depending on the Accept header. - They are designed to be consumed either by Prometheus itself or by a scraper that is compatible with scraping a Prometheus client endpoint. - - 1. Get the application URL by running these commands: - - ```bash - export POD_NAME=$(kubectl get pods --namespace prometheus -l "app.kubernetes.io/name=alertmanager,app.kubernetes.io/instance=prometheus" -o jsonpath="{.items[0].metadata.name}") - echo "Visit http://127.0.0.1:9093 to use your application" - kubectl --namespace prometheus port-forward $POD_NAME 9093:80 - ``` - - 1. Get the application URL by running these commands: - - ```bash - export POD_NAME=$(kubectl get pods --namespace prometheus -l "app.kubernetes.io/name=prometheus-node-exporter,app.kubernetes.io/instance=prometheus" -o jsonpath="{.items[0].metadata.name}") - echo "Visit http://127.0.0.1:9100 to use your application" - kubectl port-forward --namespace prometheus $POD_NAME 9100 - ``` - - -## Requirements - -| Name | Version | -|------|---------| -| [terraform](#requirement\_terraform) | >= 0.13 | -| [aws](#requirement\_aws) | >= 5.14.0 | -| [helm](#requirement\_helm) | >= 2.11.0 | -| [kubernetes](#requirement\_kubernetes) | >= 2.23.0 | -| [null](#requirement\_null) | >= 3.2.1 | - -## Providers - -| Name | Version | -|------|---------| -| [helm](#provider\_helm) | >= 2.11.0 | -| [kubernetes](#provider\_kubernetes) | >= 2.23.0 | - -## Modules - -| Name | Source | Version | -|------|--------|---------| -| [images](#module\_images) | git@github.e.it.census.gov:terraform-modules/aws-ecr-copy-images.git/ | tf-upgrade | - -## Resources - -| Name | Type | -|------|------| -| [helm_release.prometheus](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource | -| [kubernetes_namespace.ns](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/namespace) | resource | -| [kubernetes_namespace.existing-ns](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/data-sources/namespace) | data source | - -## Inputs - -| Name | Description | Type | Default | Required | -|------|-------------|------|---------|:--------:| -| [alertmanager\_tag](#input\_alertmanager\_tag) | The image tag of the alertmanager image. | `string` | `"v0.27.0"` | no | -| [cluster\_name](#input\_cluster\_name) | The name of the cluster into which prometheus will be installed. | `string` | n/a | yes | -| [create\_namespace](#input\_create\_namespace) | Indicates whether the `namespace` needs to be created ('true') or already exists (not `true`) | `bool` | `true` | no | -| [kube\_state\_metrics\_tag](#input\_kube\_state\_metrics\_tag) | The image tag of the kube-state-metrics image. | `string` | `"v2.13.0"` | no | -| [namespace](#input\_namespace) | The namespace to install the prometheus components. Defaults to 'prometheus' | `string` | `"prometheus"` | no | -| [node\_exporter\_tag](#input\_node\_exporter\_tag) | The image tag of the node-exporter image. | `string` | `"v1.8.2"` | no | -| [profile](#input\_profile) | AWS\_PROFILE to use to apply the terraform script. | `string` | `""` | no | -| [prometheus\_chart\_version](#input\_prometheus\_chart\_version) | The version of prometheus to install into the cluster. | `string` | `"25.24.1"` | no | -| [prometheus\_config\_reloader\_tag](#input\_prometheus\_config\_reloader\_tag) | The image tag of the prometheus-config-reloader image. | `string` | `"v0.75.1"` | no | -| [prometheus\_server\_tag](#input\_prometheus\_server\_tag) | The image tag of prometheus server to install into the cluster. | `string` | `"v2.53.1"` | no | -| [pushgateway\_tag](#input\_pushgateway\_tag) | The image tag of the pushgateway image. | `string` | `"v1.9.0"` | no | -| [rwo\_storage\_class](#input\_rwo\_storage\_class) | Specify the storage class for read/write/once persistent volumes. | `string` | `"gp3-encrypted"` | no | - -## Outputs - -| Name | Description | -|------|-------------| -| [alertmanager\_headless\_internal\_endpoint](#output\_alertmanager\_headless\_internal\_endpoint) | n/a | -| [alertmanager\_internal\_endpoint](#output\_alertmanager\_internal\_endpoint) | n/a | -| [module\_name](#output\_module\_name) | The name of this module. | -| [module\_version](#output\_module\_version) | The version of this module. | -| [prometheus\_namespace](#output\_prometheus\_namespace) | n/a | -| [prometheus\_server\_internal\_endpoint](#output\_prometheus\_server\_internal\_endpoint) | n/a | -| [pushgateway\_internal\_endpoint](#output\_pushgateway\_internal\_endpoint) | n/a | - diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-prometheus/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-prometheus/terragrunt.hcl deleted file mode 100644 index e6c54b1..0000000 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-prometheus/terragrunt.hcl +++ /dev/null @@ -1,38 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-prometheus.git?ref=${include.root.inputs.release_version}" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -dependency "eks" { - config_path = "../eks" - mock_outputs = { - cluster_name = "a-cluster-name" - } -} - -dependency "eks-dns" { - config_path = "../eks-dns" - skip_outputs = true -} - -inputs = { - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - cluster_name = dependency.eks.outputs.cluster_name - prometheus_chart_version = include.root.inputs.prometheus_chart_version - prometheus_server_tag = include.root.inputs.prometheus_server_tag - prometheus_config_reloader_tag = include.root.inputs.prometheus_config_reloader_tag - alertmanager_tag = include.root.inputs.alertmanager_tag - kube_state_metrics_tag = include.root.inputs.kube_state_metrics_tag - node_exporter_tag = include.root.inputs.node_exporter_tag - pushgateway_tag = include.root.inputs.pushgateway_tag -} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-tempo/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-tempo/terragrunt.hcl deleted file mode 100644 index e9ebd48..0000000 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks-tempo/terragrunt.hcl +++ /dev/null @@ -1,46 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-tempo.git?ref=${include.root.inputs.release_version}" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -dependency "eks" { - config_path = "../eks" - mock_outputs = { - cluster_name = "a-cluster-name" - oidc_provider_arn = "arn:aws-us-gov:iam::111111111111:oidc-provider/oidc.eks.us-gov-east-1.amazonaws.com/id/0000000000000000AAAAAAAAAAAAAAAA" - } -} - -dependency "eks-prometheus" { - config_path = "../eks-prometheus" - mock_outputs = { - prometheus_server_internal_endpoint = { - hostname = "prometheus-server.prometheus.svc.cluster.local" - port_number = 9090 - url = "http://prometheus-server.prometheus.svc.cluster.local:9090/" - } - prometheus_namespace = "prometheus" - } -} - -inputs = { - account_id = include.root.locals.account_id - profile = include.root.locals.aws_profile - region = include.root.locals.aws_region - cluster_name = dependency.eks.outputs.cluster_name - oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn - prometheus_port = dependency.eks-prometheus.outputs.prometheus_server_internal_endpoint.port_number - prometheus_namespace = dependency.eks-prometheus.outputs.prometheus_namespace - tempo_chart_version = include.root.inputs.tempo_chart_version - tempo_tag = include.root.inputs.tempo_tag - -} diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks/terragrunt.hcl deleted file mode 100644 index cc7c893..0000000 --- a/lab/development/us-gov-east-1/vpc/platform-eng-eks-test/eks/terragrunt.hcl +++ /dev/null @@ -1,56 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -locals { - # Set cluster/platform specific variables, or extract from the hierarchy. - account_id = include.root.inputs.aws_account_id - cluster_endpoint_public_access = include.root.inputs.cluster_endpoint_public_access - cluster_name = include.root.inputs.cluster_name - cluster_version = include.root.inputs.cluster_version - creator = include.root.inputs.creator - eks_instance_disk_size = include.root.inputs.eks_instance_disk_size - eks_ng_desired_size = include.root.inputs.eks_ng_desired_size - eks_ng_max_size = include.root.inputs.eks_ng_max_size - eks_ng_min_size = include.root.inputs.eks_ng_min_size - eks_vpc_name = include.root.inputs.vpc_name - enable_cluster_creator_admin_permissions = include.root.inputs.enable_cluster_creator_admin_permissions - environment_abbr = include.root.inputs.environment_abbr - organization = include.root.inputs.organization - profile = include.root.inputs.aws_profile - project_name = include.root.inputs.project_name - project_number = include.root.inputs.project_number - project_role = include.root.inputs.project_role - region = include.root.inputs.aws_region - tags = include.root.inputs.tags - terraform = include.root.inputs.terraform - terragrunt = include.root.inputs.terragrunt - vpc_domain_name = include.root.inputs.vpc_domain_name -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-eks.git?ref=${include.root.inputs.release_version}" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -inputs = { - aws_account_id = local.account_id - cluster_endpoint_public_access = local.cluster_endpoint_public_access - cluster_name = local.cluster_name - cluster_version = local.cluster_version - creator = local.creator - eks_instance_disk_size = local.eks_instance_disk_size - eks_ng_desired_size = local.eks_ng_desired_size - eks_ng_max_size = local.eks_ng_max_size - eks_ng_min_size = local.eks_ng_min_size - eks_vpc_name = local.eks_vpc_name - enable_cluster_creator_admin_permissions = local.enable_cluster_creator_admin_permissions - os_username = local.creator - shared_vpc_label = local.environment_abbr - tags = local.tags -} diff --git a/lab/development/us-gov-east-1/vpc/platform-test-cicd/cluster.hcl b/lab/development/us-gov-east-1/vpc/platform-test-cicd/cluster.hcl deleted file mode 100644 index 8d2831c..0000000 --- a/lab/development/us-gov-east-1/vpc/platform-test-cicd/cluster.hcl +++ /dev/null @@ -1,20 +0,0 @@ -# lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/cluster.hcl - -# Set cluster specific variables. These are automatically pulled in to configure the remote state bucket in the root -# terragrunt.hcl configuration. -locals { - cluster_endpoint_public_access = true - cluster_name = "platform-eng-eks-mcm" - creator = "matthew.c.morgan@census.gov" - eks_instance_disk_size = 100 - eks_ng_desired_size = 2 - eks_ng_max_size = 10 - eks_ng_min_size = 0 - enable_cluster_creator_admin_permissions = true - terraform = true - terragrunt = true - tags = { - "slim:schedule" = "8:00-17:00" - "cluster:size" = "min:${local.eks_ng_min_size}-max:${local.eks_ng_max_size}-desired:${local.eks_ng_desired_size}" - } -} diff --git a/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-cert-manager/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-cert-manager/terragrunt.hcl deleted file mode 100644 index 35e355a..0000000 --- a/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-cert-manager/terragrunt.hcl +++ /dev/null @@ -1,40 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-cert-mgr.git?ref=${include.root.inputs.release_version}" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -dependency "eks" { - config_path = "../eks" - mock_outputs = { - cluster_name = "a-cluster-name" - oidc_provider_arn = "arn:aws-us-gov:iam::111111111111:oidc-provider/oidc.eks.us-gov-east-1.amazonaws.com/id/0000000000000000AAAAAAAAAAAAAAAA" - } -} - -dependency "eks_config" { - config_path = "../eks-config" - skip_outputs = true -} - -inputs = { - cluster_name = dependency.eks.outputs.cluster_name - cluster_mailing_list = dependency.eks.inputs.creator - oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - cert_manager_helm_chart = include.root.inputs.cert_manager_helm_chart - cert_manager_cainjector_tag = include.root.inputs.cert_manager_cainjector_tag - cert_manager_controller_tag = include.root.inputs.cert_manager_controller_tag - cert_manager_startupapicheck_tag = include.root.inputs.cert_manager_startupapicheck_tag - cert_manager_webhook_tag = include.root.inputs.cert_manager_webhook_tag - cluster_issuer_name = include.root.inputs.cluster_issuer_name -} diff --git a/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-config/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-config/terragrunt.hcl deleted file mode 100644 index d4a60db..0000000 --- a/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-config/terragrunt.hcl +++ /dev/null @@ -1,42 +0,0 @@ -# lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-config/terragrunt.hcl - -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-eks-configuration.git?ref=${include.root.inputs.release_version}" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -dependency "eks" { - config_path = "../eks" - mock_outputs = { - cluster_certificate_authority_data = [{ data = "THISISAVERYLONGCERTSTRINGTHATGOESHEREFORSURENODYEP" }] - cluster_endpoint = "https://12345ABCDEE42BF9C24D4C362D1DC.sk1.us-gov-east-1.eks.amazonaws.com" - cluster_name = "a-cluster-name" - eks_managed_node_groups_autoscaling_group_names = ["eks-eks-a-cluster-name-node_group-0000000000000000000000000-5ac8a5e3-14dd-c043-2cc9-f4b6ffb36d32"] - oidc_provider_arn = "arn:aws-us-gov:iam::111111111111:oidc-provider/oidc.eks.us-gov-east-1.amazonaws.com/id/0000000000000000AAAAAAAAAAAAAAAA" - security_group_all_worker_mgmt_id = "sg-00b0000000000000" - subnets = ["subnet-00000000000000001", "subnet-00000000000000002", "subnet-00000000000000003"] - token = [{ token = "THISISTHETOKENTHATDOESNTEXISTTHEREAREMANYLIKEITBUTHISONEISFORACLUSTER" }] - vpc_id = "a-vpc-id" - } -} - -inputs = { - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - vpc_id = dependency.eks.outputs.vpc_id - cluster_name = dependency.eks.outputs.cluster_name - subnets = dependency.eks.outputs.subnets - security_group_all_worker_mgmt_id = dependency.eks.outputs.security_group_all_worker_mgmt_id - eks_managed_node_groups_autoscaling_group_names = dependency.eks.outputs.eks_managed_node_groups_autoscaling_group_names - oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn - kubectl_image_tag = include.root.inputs.kubectl_image_tag -} diff --git a/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-dns/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-dns/terragrunt.hcl deleted file mode 100644 index 6e28781..0000000 --- a/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-dns/terragrunt.hcl +++ /dev/null @@ -1,42 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-eks-dns.git?ref=${include.root.inputs.release_version}" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -dependency "eks" { - config_path = "../eks" - mock_outputs = { - subnets = ["subnet-abcdefgh", "subnet-12345678", "subnet-ab12cd34"] - } -} - -dependency "istio" { - config_path = "../eks-istio" - mock_outputs = { - istio_ingress_lb = { - dns_name = "a1111111111111111111111111111111-2bbbbbbbbbbbbbbb.elb.us-gov-east-1.amazonaws.com" - zone_id = "ZABC123456DEF" - } - } -} - -inputs = { - cluster_name = dependency.eks.inputs.cluster_name - istio_ingress_lb = dependency.istio.outputs.istio_ingress_lb - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - subnets = dependency.eks.outputs.subnets - tags = dependency.eks.inputs.tags - vpc_domain_name = dependency.eks.inputs.vpc_domain_name - vpc_name = dependency.eks.inputs.vpc_name - route53_endpoints = include.root.inputs.route53_endpoints -} diff --git a/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-grafana/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-grafana/terragrunt.hcl deleted file mode 100644 index 65ab33f..0000000 --- a/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-grafana/terragrunt.hcl +++ /dev/null @@ -1,40 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-grafana.git?ref=${include.root.inputs.release_version}" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -dependency "eks" { - config_path = "../eks" - mock_outputs = { - cluster_name = "a-cluster-name" - } -} - -dependency "eks-loki" { - config_path = "../eks-loki" - mock_outputs = { - rwo_storage_class = "gp3-encrypted" - } -} - -inputs = { - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - cluster_name = dependency.eks.outputs.cluster_name - cluster_domain = dependency.eks.inputs.vpc_domain_name - public_hostname = include.root.inputs.grafana_hostname - rwo_storage_class = dependency.eks-loki.outputs.rwo_storage_class - grafana_chart_version = include.root.inputs.grafana_chart_version - grafana_tag = include.root.inputs.grafana_tag - download_dashboards_image_tag = include.root.inputs.download_dashboards_image_tag - init_chown_data_image_tag = include.root.inputs.init_chown_data_image_tag -} diff --git a/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-istio/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-istio/terragrunt.hcl deleted file mode 100644 index c7c22c8..0000000 --- a/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-istio/terragrunt.hcl +++ /dev/null @@ -1,32 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-istio.git?ref=${include.root.inputs.release_version}" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -dependency "eks" { - config_path = "../eks" - mock_outputs = { - cluster_name = "a-cluster-name" - } -} -dependency "eks-karpenter" { - config_path = "../eks-karpenter" - skip_outputs = true -} - -inputs = { - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - cluster_name = dependency.eks.outputs.cluster_name - istio_chart_version = include.root.inputs.istio_version - istio_version = include.root.inputs.istio_version -} diff --git a/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-k8s-dashboard/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-k8s-dashboard/terragrunt.hcl deleted file mode 100644 index cd1961b..0000000 --- a/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-k8s-dashboard/terragrunt.hcl +++ /dev/null @@ -1,36 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-k8s-dashboard.git?ref=${include.root.inputs.release_version}" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -dependency "eks" { - config_path = "../eks" - mock_outputs = { - cluster_name = "a-cluster-name" - vpc_domain_name = "example.com" - } -} - -dependency "eks-loki" { - config_path = "../eks-loki" - skip_outputs = true -} - -inputs = { - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - cluster_name = dependency.eks.outputs.cluster_name - cluster_domain = dependency.eks.inputs.vpc_domain_name - public_hostname = include.root.inputs.dashboard_hostname - k8s_dashboard_version = include.root.inputs.k8s_dashboard_version - # datasources = dependency.eks-loki.outputs.gateway_internal_endpoint -} diff --git a/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-karpenter/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-karpenter/terragrunt.hcl deleted file mode 100644 index 6b1a862..0000000 --- a/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-karpenter/terragrunt.hcl +++ /dev/null @@ -1,43 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-karpenter.git?ref=${include.root.inputs.release_version}" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -dependency "eks" { - config_path = "../eks" - mock_outputs = { - cluster_endpoint = "https://0000000000000000AAAAAAAAAAAAAAAA.sk1.us-gov-east-1.eks.amazonaws.com" - cluster_name = "a-cluster-name" - node_group_name = "node_group_a-cluster-name" - oidc_provider_arn = "arn:aws-us-gov:iam::111111111111:oidc-provider/oidc.eks.us-gov-east-1.amazonaws.com/id/0000000000000000AAAAAAAAAAAAAAAA" - vpc_id = "a-vpc-name" - } -} - -dependency "eks-config" { - config_path = "../eks-config" - skip_outputs = true -} - -inputs = { - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - cluster_endpoint = dependency.eks.outputs.cluster_endpoint - cluster_name = dependency.eks.outputs.cluster_name - karpenter_node_group_name = dependency.eks.outputs.node_group_name - oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn - vpc_id = dependency.eks.outputs.vpc_id - karpenter_helm_chart = include.root.inputs.karpenter_helm_chart - karpenter_tag = include.root.inputs.karpenter_tag - kubectl_tag = include.root.inputs.kubectl_image_tag - -} diff --git a/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-kiali/terragrunt.hcl.disable b/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-kiali/terragrunt.hcl.disable deleted file mode 100644 index 1e04fe0..0000000 --- a/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-kiali/terragrunt.hcl.disable +++ /dev/null @@ -1,81 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-kiali.git?ref=${include.root.inputs.release_version}" - # source = "../../../../../../../tfmod-kiali" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -dependency "eks" { - config_path = "../eks" - mock_outputs = { - cluster_name = "a-cluster-name" - } -} -dependency "eks-cert-manager" { - config_path = "../eks-cert-manager" - mock_outputs = { - cluster_issuer_name = "acmpca-clusterissuer" - } -} -dependency "eks-prometheus" { - config_path = "../eks-prometheus" - mock_outputs = { - prometheus_server_internal_endpoint = { - hostname = "prometheus-server.prometheus.svc.cluster.local" - port_number = 9090 - url = "http://prometheus-server.prometheus.svc.cluster.local:9090/" - } - } -} -dependency "eks-grafana" { - config_path = "../eks-grafana" - mock_outputs = { - internal_endpoint = { - hostname = "grafana.grafana.svc.cluster.local" - port_number = "80" - url = "https://grafana.grafana.svc.cluster.local:80/" - } - namespace = "grafana" - public_endpoint = { - hostname = "grafana.dev.lab.csp2.census.gov" - port_number = "80" - url = "https://grafana.dev.lab.csp2.census.gov:80/" - } - secret_name = "grafana" - } -} - -inputs = { - kiali_operator_version = include.root.inputs.kiali_operator_version - kiali_application_version = include.root.inputs.kiali_application_version - - profile = include.root.inputs.aws_profile - cluster_domain = dependency.eks.inputs.vpc_domain_name - operators_namespace = "operators" - cluster_name = dependency.eks.outputs.cluster_name - certificate_issuer = dependency.eks-cert-manager.outputs.cluster_issuer_name - prometheus_internal_url = dependency.eks-prometheus.outputs.prometheus_server_internal_endpoint.url - grafana_internal_url = dependency.eks-grafana.outputs.internal_endpoint.url - grafana_namespace = dependency.eks-grafana.outputs.namespace - grafana_public_url = dependency.eks-grafana.outputs.public_endpoint.url - grafana_secret_name = "grafana" - # grafana_secret_name = dependency.eks-grafana.outputs.secret_name - jaeger_internal_url = "" - - - # client_id = var.sso_client_id - # client_secret = var.sso_client_secret - # keycloak_public_url = var.keycloak_public_url - # gogatekeeper_chart_version = var.gogatekeeper_chart_version - # gogatekeeper_registry = var.gogatekeeper_registry - # gogatekeeper_repository = var.gogatekeeper_repository - # gogatekeeper_tag = var.gogatekeeper_tag -} diff --git a/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-loki/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-loki/terragrunt.hcl deleted file mode 100644 index 2c6b6be..0000000 --- a/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-loki/terragrunt.hcl +++ /dev/null @@ -1,44 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-loki.git?ref=${include.root.inputs.release_version}" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -dependency "eks" { - config_path = "../eks" - mock_outputs = { - cluster_name = "a-cluster-name" - oidc_provider_arn = "arn:aws-us-gov:iam::111111111111:oidc-provider/oidc.eks.us-gov-east-1.amazonaws.com/id/0000000000000000AAAAAAAAAAAAAAAA" - } -} -dependency "eks-istio" { - config_path = "../eks-istio" - skip_outputs = true -} -dependency "eks-prometheus" { - config_path = "../eks-prometheus" - skip_outputs = true -} - -inputs = { - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - cluster_name = dependency.eks.outputs.cluster_name - oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn - loki_chart_version = include.root.inputs.loki_chart_version - loki_tag = include.root.inputs.loki_tag - canary_tag = include.root.inputs.canary_tag - enterprise_logs_provisioner_tag = include.root.inputs.enterprise_logs_provisioner_tag - gateway_tag = include.root.inputs.gateway_tag - memcached_tag = include.root.inputs.memcached_tag - exporter_tag = include.root.inputs.exporter_tag - sidecar_tag = include.root.inputs.sidecar_tag -} diff --git a/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-metrics-server/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-metrics-server/terragrunt.hcl deleted file mode 100644 index 387653b..0000000 --- a/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-metrics-server/terragrunt.hcl +++ /dev/null @@ -1,33 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-metrics-server.git?ref=${include.root.inputs.release_version}" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -dependency "eks" { - config_path = "../eks" - mock_outputs = { - cluster_name = "a-cluster-name" - } -} - -dependency "eks_config" { - config_path = "../eks-config" - skip_outputs = true -} - -inputs = { - profile = include.root.inputs.aws_profile - cluster_name = dependency.eks.outputs.cluster_name - region = include.root.inputs.aws_region - metrics_server_helm_chart = include.root.inputs.metrics_server_helm_chart - metrics_server_tag = include.root.inputs.metrics_server_tag -} diff --git a/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-prometheus/README.md b/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-prometheus/README.md deleted file mode 100644 index bbbffb2..0000000 --- a/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-prometheus/README.md +++ /dev/null @@ -1,198 +0,0 @@ -## eks-prometheus -This module deploys EKS kubeenetes prometheus inside existing EKS cluster. Prometheus is an open-source systems monitoring and alerting tool. -This module consisits of 4 components. It creates prometheus namespace and copies image repositories for the following components from quay.io into local account ECR repository. It deploys these components using helm charts using the configured ECR repositories. - 1. prometheus-alert-manager - 2. prometheus-node-exporter - 3. prometheus-pushgateway - 4. prometheus-server - -### Dependencies -This module is dependent on EKS module (eks). The cluster should exist already for this module to work. - -### Inputs - cluster_name - profile - prometheus_chart_version - prometheus_server_tag - prometheus_config_reloader_tag - alertmanager_tag - kube_state_metrics_tag - node_exporter_tag - pushgateway_tag - rwo_storage_class - -### Outputs - alertmanager_internal_endpoint - alertmanager_headless_internal_endpoint - pushgateway_internal_endpoint - prometheus_server_internal_endpoint - -### Issues observed/fixed -1. The rwo_storage_class value had to be updated from "gp3" to "gp3-encrypted" -2. The node_exporter_tag value had to be updated from "1.6.1" to "v1.8.1" -3. The kube_state_metrics_tag value had to be updated from "2.10.0" to "v2.6.0" -4. The alertmanager_tag value had to be updated from -5. The helm chart set config for the ecr image had to be split into 2 components, one for registry and other for repository as an example mentioned below: - - ``` - set { - name = "kube-state-metrics.image.registry" - value = module.images.images[local.ksm_key].dest_registry - } - set { - name = "kube-state-metrics.image.repository" - value = module.images.images[local.ksm_key].dest_repository - } - ``` - -6. In some other cases the image ecr repository had to be split by the colon separatory (:) - - ``` - set { - name = "alertmanager.configmapReload.image.repository" - value = split(":", module.images.images[local.prom_config_reload_key].dest_full_path)[0] - } - ``` - -### Chart Notes - 1. Get the application URL by running these commands: - - ```bash - export POD_NAME=$(kubectl get pods --namespace prometheus -l "app.kubernetes.io/name=prometheus-pushgateway,app.kubernetes.io/instance=prometheus" -o jsonpath="{.items[0].metadata.name}") - kubectl port-forward $POD_NAME 9091 - echo "Visit http://127.0.0.1:9091 to use your application" - ``` - - The Prometheus server can be accessed via port 80 on the following DNS name from within your cluster: - prometheus-server.prometheus.svc.cluster.local - - - Get the Prometheus server URL by running these commands in the same shell: - - ```bash - export POD_NAME=$(kubectl get pods --namespace prometheus -l "app.kubernetes.io/name=prometheus,app.kubernetes.io/instance=prometheus" -o jsonpath="{.items[0].metadata.name}") - kubectl --namespace prometheus port-forward $POD_NAME 9090 - ``` - - The Prometheus alertmanager can be accessed via port 9093 on the following DNS name from within your cluster: - `prometheus-alertmanager.prometheus.svc.cluster.local` - - - Get the Alertmanager URL by running these commands in the same shell: - - ```bash - export POD_NAME=$(kubectl get pods --namespace prometheus -l "app.kubernetes.io/name=alertmanager,app.kubernetes.io/instance=prometheus" -o jsonpath="{.items[0].metadata.name}") - kubectl --namespace prometheus port-forward $POD_NAME 9093 - ``` - - ################################################################################# - ###### WARNING: Pod Security Policy has been disabled by default since ##### - ###### it deprecated after k8s 1.25+. use ##### - ###### (index .Values "prometheus-node-exporter" "rbac" ##### - ###### "pspEnabled") with (index .Values ##### - ###### "prometheus-node-exporter" "rbac" "pspAnnotations") ##### - ###### in case you still need it. ##### - ################################################################################# - - - The Prometheus PushGateway can be accessed via port 9091 on the following DNS name from within your cluster: - `prometheus-prometheus-pushgateway.prometheus.svc.cluster.local` - - - Get the PushGateway URL by running these commands in the same shell: - - ```bash - export POD_NAME=$(kubectl get pods --namespace prometheus -l "app=prometheus-pushgateway,component=pushgateway" -o jsonpath="{.items[0].metadata.name}") - kubectl --namespace prometheus port-forward $POD_NAME 9091 - ``` - - For more information on running Prometheus, visit: - https://prometheus.io/ - - kube-state-metrics is a simple service that listens to the Kubernetes API server and generates metrics about the state of the objects. - The exposed metrics can be found here: - https://github.com/kubernetes/kube-state-metrics/blob/master/docs/README.md#exposed-metrics - - The metrics are exported on the HTTP endpoint /metrics on the listening port. - In your case, `prometheus-kube-state-metrics.prometheus.svc.cluster.local:8080/metrics` - - They are served either as plaintext or protobuf depending on the Accept header. - They are designed to be consumed either by Prometheus itself or by a scraper that is compatible with scraping a Prometheus client endpoint. - - 1. Get the application URL by running these commands: - - ```bash - export POD_NAME=$(kubectl get pods --namespace prometheus -l "app.kubernetes.io/name=alertmanager,app.kubernetes.io/instance=prometheus" -o jsonpath="{.items[0].metadata.name}") - echo "Visit http://127.0.0.1:9093 to use your application" - kubectl --namespace prometheus port-forward $POD_NAME 9093:80 - ``` - - 1. Get the application URL by running these commands: - - ```bash - export POD_NAME=$(kubectl get pods --namespace prometheus -l "app.kubernetes.io/name=prometheus-node-exporter,app.kubernetes.io/instance=prometheus" -o jsonpath="{.items[0].metadata.name}") - echo "Visit http://127.0.0.1:9100 to use your application" - kubectl port-forward --namespace prometheus $POD_NAME 9100 - ``` - - -## Requirements - -| Name | Version | -|------|---------| -| [terraform](#requirement\_terraform) | >= 0.13 | -| [aws](#requirement\_aws) | >= 5.14.0 | -| [helm](#requirement\_helm) | >= 2.11.0 | -| [kubernetes](#requirement\_kubernetes) | >= 2.23.0 | -| [null](#requirement\_null) | >= 3.2.1 | - -## Providers - -| Name | Version | -|------|---------| -| [helm](#provider\_helm) | >= 2.11.0 | -| [kubernetes](#provider\_kubernetes) | >= 2.23.0 | - -## Modules - -| Name | Source | Version | -|------|--------|---------| -| [images](#module\_images) | git@github.e.it.census.gov:terraform-modules/aws-ecr-copy-images.git/ | tf-upgrade | - -## Resources - -| Name | Type | -|------|------| -| [helm_release.prometheus](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource | -| [kubernetes_namespace.ns](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/namespace) | resource | -| [kubernetes_namespace.existing-ns](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/data-sources/namespace) | data source | - -## Inputs - -| Name | Description | Type | Default | Required | -|------|-------------|------|---------|:--------:| -| [alertmanager\_tag](#input\_alertmanager\_tag) | The image tag of the alertmanager image. | `string` | `"v0.27.0"` | no | -| [cluster\_name](#input\_cluster\_name) | The name of the cluster into which prometheus will be installed. | `string` | n/a | yes | -| [create\_namespace](#input\_create\_namespace) | Indicates whether the `namespace` needs to be created ('true') or already exists (not `true`) | `bool` | `true` | no | -| [kube\_state\_metrics\_tag](#input\_kube\_state\_metrics\_tag) | The image tag of the kube-state-metrics image. | `string` | `"v2.13.0"` | no | -| [namespace](#input\_namespace) | The namespace to install the prometheus components. Defaults to 'prometheus' | `string` | `"prometheus"` | no | -| [node\_exporter\_tag](#input\_node\_exporter\_tag) | The image tag of the node-exporter image. | `string` | `"v1.8.2"` | no | -| [profile](#input\_profile) | AWS\_PROFILE to use to apply the terraform script. | `string` | `""` | no | -| [prometheus\_chart\_version](#input\_prometheus\_chart\_version) | The version of prometheus to install into the cluster. | `string` | `"25.24.1"` | no | -| [prometheus\_config\_reloader\_tag](#input\_prometheus\_config\_reloader\_tag) | The image tag of the prometheus-config-reloader image. | `string` | `"v0.75.1"` | no | -| [prometheus\_server\_tag](#input\_prometheus\_server\_tag) | The image tag of prometheus server to install into the cluster. | `string` | `"v2.53.1"` | no | -| [pushgateway\_tag](#input\_pushgateway\_tag) | The image tag of the pushgateway image. | `string` | `"v1.9.0"` | no | -| [rwo\_storage\_class](#input\_rwo\_storage\_class) | Specify the storage class for read/write/once persistent volumes. | `string` | `"gp3-encrypted"` | no | - -## Outputs - -| Name | Description | -|------|-------------| -| [alertmanager\_headless\_internal\_endpoint](#output\_alertmanager\_headless\_internal\_endpoint) | n/a | -| [alertmanager\_internal\_endpoint](#output\_alertmanager\_internal\_endpoint) | n/a | -| [module\_name](#output\_module\_name) | The name of this module. | -| [module\_version](#output\_module\_version) | The version of this module. | -| [prometheus\_namespace](#output\_prometheus\_namespace) | n/a | -| [prometheus\_server\_internal\_endpoint](#output\_prometheus\_server\_internal\_endpoint) | n/a | -| [pushgateway\_internal\_endpoint](#output\_pushgateway\_internal\_endpoint) | n/a | - diff --git a/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-prometheus/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-prometheus/terragrunt.hcl deleted file mode 100644 index e6c54b1..0000000 --- a/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-prometheus/terragrunt.hcl +++ /dev/null @@ -1,38 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-prometheus.git?ref=${include.root.inputs.release_version}" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -dependency "eks" { - config_path = "../eks" - mock_outputs = { - cluster_name = "a-cluster-name" - } -} - -dependency "eks-dns" { - config_path = "../eks-dns" - skip_outputs = true -} - -inputs = { - profile = include.root.inputs.aws_profile - region = include.root.inputs.aws_region - cluster_name = dependency.eks.outputs.cluster_name - prometheus_chart_version = include.root.inputs.prometheus_chart_version - prometheus_server_tag = include.root.inputs.prometheus_server_tag - prometheus_config_reloader_tag = include.root.inputs.prometheus_config_reloader_tag - alertmanager_tag = include.root.inputs.alertmanager_tag - kube_state_metrics_tag = include.root.inputs.kube_state_metrics_tag - node_exporter_tag = include.root.inputs.node_exporter_tag - pushgateway_tag = include.root.inputs.pushgateway_tag -} diff --git a/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-tempo/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-tempo/terragrunt.hcl deleted file mode 100644 index e9ebd48..0000000 --- a/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks-tempo/terragrunt.hcl +++ /dev/null @@ -1,46 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-tempo.git?ref=${include.root.inputs.release_version}" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -dependency "eks" { - config_path = "../eks" - mock_outputs = { - cluster_name = "a-cluster-name" - oidc_provider_arn = "arn:aws-us-gov:iam::111111111111:oidc-provider/oidc.eks.us-gov-east-1.amazonaws.com/id/0000000000000000AAAAAAAAAAAAAAAA" - } -} - -dependency "eks-prometheus" { - config_path = "../eks-prometheus" - mock_outputs = { - prometheus_server_internal_endpoint = { - hostname = "prometheus-server.prometheus.svc.cluster.local" - port_number = 9090 - url = "http://prometheus-server.prometheus.svc.cluster.local:9090/" - } - prometheus_namespace = "prometheus" - } -} - -inputs = { - account_id = include.root.locals.account_id - profile = include.root.locals.aws_profile - region = include.root.locals.aws_region - cluster_name = dependency.eks.outputs.cluster_name - oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn - prometheus_port = dependency.eks-prometheus.outputs.prometheus_server_internal_endpoint.port_number - prometheus_namespace = dependency.eks-prometheus.outputs.prometheus_namespace - tempo_chart_version = include.root.inputs.tempo_chart_version - tempo_tag = include.root.inputs.tempo_tag - -} diff --git a/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks/terragrunt.hcl deleted file mode 100644 index cc7c893..0000000 --- a/lab/development/us-gov-east-1/vpc/platform-test-cicd/eks/terragrunt.hcl +++ /dev/null @@ -1,56 +0,0 @@ -include "root" { - path = find_in_parent_folders("root.hcl") - merge_strategy = "deep" - expose = true -} - -locals { - # Set cluster/platform specific variables, or extract from the hierarchy. - account_id = include.root.inputs.aws_account_id - cluster_endpoint_public_access = include.root.inputs.cluster_endpoint_public_access - cluster_name = include.root.inputs.cluster_name - cluster_version = include.root.inputs.cluster_version - creator = include.root.inputs.creator - eks_instance_disk_size = include.root.inputs.eks_instance_disk_size - eks_ng_desired_size = include.root.inputs.eks_ng_desired_size - eks_ng_max_size = include.root.inputs.eks_ng_max_size - eks_ng_min_size = include.root.inputs.eks_ng_min_size - eks_vpc_name = include.root.inputs.vpc_name - enable_cluster_creator_admin_permissions = include.root.inputs.enable_cluster_creator_admin_permissions - environment_abbr = include.root.inputs.environment_abbr - organization = include.root.inputs.organization - profile = include.root.inputs.aws_profile - project_name = include.root.inputs.project_name - project_number = include.root.inputs.project_number - project_role = include.root.inputs.project_role - region = include.root.inputs.aws_region - tags = include.root.inputs.tags - terraform = include.root.inputs.terraform - terragrunt = include.root.inputs.terragrunt - vpc_domain_name = include.root.inputs.vpc_domain_name -} - -terraform { - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-eks.git?ref=${include.root.inputs.release_version}" - extra_arguments "retry_lock" { - commands = get_terraform_commands_that_need_locking() - arguments = ["-lock-timeout=20m"] - } -} - -inputs = { - aws_account_id = local.account_id - cluster_endpoint_public_access = local.cluster_endpoint_public_access - cluster_name = local.cluster_name - cluster_version = local.cluster_version - creator = local.creator - eks_instance_disk_size = local.eks_instance_disk_size - eks_ng_desired_size = local.eks_ng_desired_size - eks_ng_max_size = local.eks_ng_max_size - eks_ng_min_size = local.eks_ng_min_size - eks_vpc_name = local.eks_vpc_name - enable_cluster_creator_admin_permissions = local.enable_cluster_creator_admin_permissions - os_username = local.creator - shared_vpc_label = local.environment_abbr - tags = local.tags -} diff --git a/lab/development/us-gov-east-1/vpc/platform-test-x/cluster.hcl b/lab/development/us-gov-east-1/vpc/platform-test-x/cluster.hcl deleted file mode 100644 index 8d2831c..0000000 --- a/lab/development/us-gov-east-1/vpc/platform-test-x/cluster.hcl +++ /dev/null @@ -1,20 +0,0 @@ -# lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/cluster.hcl - -# Set cluster specific variables. These are automatically pulled in to configure the remote state bucket in the root -# terragrunt.hcl configuration. -locals { - cluster_endpoint_public_access = true - cluster_name = "platform-eng-eks-mcm" - creator = "matthew.c.morgan@census.gov" - eks_instance_disk_size = 100 - eks_ng_desired_size = 2 - eks_ng_max_size = 10 - eks_ng_min_size = 0 - enable_cluster_creator_admin_permissions = true - terraform = true - terragrunt = true - tags = { - "slim:schedule" = "8:00-17:00" - "cluster:size" = "min:${local.eks_ng_min_size}-max:${local.eks_ng_max_size}-desired:${local.eks_ng_desired_size}" - } -} diff --git a/lab/root.hcl b/lab/root.hcl index a746b5f..10706ff 100644 --- a/lab/root.hcl +++ b/lab/root.hcl @@ -28,8 +28,7 @@ locals { account_id = local.account_vars.locals.aws_account_id aws_profile = local.account_vars.locals.aws_profile aws_region = local.region_vars.locals.aws_region - created_reason = local.cluster_vars.locals.created_reason - creator = local.cluster_vars.locals.creator + cluster_name = local.cluster_vars.locals.cluster_name environment_abbr = local.account_vars.locals.environment_abbr organization = local.common_vars.locals.organization project_name = local.common_vars.locals.project_name @@ -37,12 +36,62 @@ locals { project_role = local.common_vars.locals.project_role state_bucket_prefix = local.common_vars.locals.state_bucket_prefix state_table_name = local.common_vars.locals.state_table_name - terraform = local.cluster_vars.locals.terraform - terragrunt = local.cluster_vars.locals.terragrunt - module_name = get_terragrunt_dir() - create_eks = get_env("TERRAGRUNT_CREATE_EKS", "true") + # Check if current module is the EKS module + module_name = basename(get_original_terragrunt_dir()) + is_eks_module = local.module_name == "eks" } +# Only generate providers for non-EKS modules +generate "cluster_data" { + path = "cluster-data.tf" + if_exists = "overwrite_terragrunt" + contents = local.is_eks_module ? "" : <<-EOF + data "aws_eks_clusters" "available" {} + + locals { + cluster_exists = contains(data.aws_eks_clusters.available.names, "${local.cluster_name}") + } + + data "aws_eks_cluster" "this" { + count = local.cluster_exists ? 1 : 0 + name = "${local.cluster_name}" + } + + data "aws_eks_cluster_auth" "this" { + count = local.cluster_exists ? 1 : 0 + name = "${local.cluster_name}" + } + EOF +} + +# Generate provider blocks only for non-EKS modules +generate "kube_provider" { + path = "kube-provider.tf" + if_exists = "overwrite_terragrunt" + contents = local.is_eks_module ? "" : <<-EOF + provider "kubernetes" { + host = local.cluster_exists ? data.aws_eks_cluster.this[0].endpoint : "https://dummy" + cluster_ca_certificate = local.cluster_exists ? base64decode(data.aws_eks_cluster.this[0].certificate_authority[0].data) : null + token = local.cluster_exists ? data.aws_eks_cluster_auth.this[0].token : "dummy" + } + EOF +} + +generate "helm_provider" { + path = "helm-provider.tf" + if_exists = "overwrite_terragrunt" + contents = local.is_eks_module ? "" : <<-EOF + provider "helm" { + kubernetes { + host = local.cluster_exists ? data.aws_eks_cluster.this[0].endpoint : "https://dummy" + cluster_ca_certificate = local.cluster_exists ? base64decode(data.aws_eks_cluster.this[0].certificate_authority[0].data) : null + token = local.cluster_exists ? data.aws_eks_cluster_auth.this[0].token : "dummy" + } + } + EOF +} + + # Configure Terragrunt to automatically store tfstate files in an S3 bucket remote_state { backend = "s3" @@ -71,47 +120,23 @@ generate "aws-provider" { path = "aws-provider.tf" if_exists = "overwrite" contents = <<-EOF -provider "aws" { - region = "${local.aws_region}" - profile = "${local.aws_profile}" - default_tags { - tags = { - project_identifier = "${local.project_number}:${local.project_name}" - project_name = "${local.project_name}" - project_role = "${local.project_role}" - created_by = "${local.creator}" - created_for = "${local.creator}" - created_reason = "${local.created_reason}" - environment = "${local.environment_abbr}" - organization = "${local.organization}" - project_number = "${local.project_number}" - terraform = "${local.terraform}" - terragrunt = "${local.terragrunt}" + provider "aws" { + region = "${local.aws_region}" + profile = "${local.aws_profile}" + default_tags { + tags = { + cluster_name = "${local.cluster_name}" + "boc:module_name" = "${local.module_name}" + environment = "${local.environment_abbr}" + finops_project_name = "${local.project_name}" + finops_project_number = "${local.project_number}" + finops_project_role = "${local.project_role}" + organization = "${local.organization}" + } } + # Only these AWS Account IDs may be operated on by this template + allowed_account_ids = ["${local.account_id}"] } - # Only these AWS Account IDs may be operated on by this template - allowed_account_ids = ["${local.account_id}"] -} -EOF -} - -include "helm_provider" { - path = "${dirname(find_in_parent_folders())}/_envcommon/helm-provider.hcl" -} - -include "kube_provider" { - path = "${dirname(find_in_parent_folders())}/_envcommon/kube-provider.hcl" -} - -generate "variables" { - path = "variables.tf" - if_exists = "overwrite" - contents = <<-EOF -variable "create_eks" { - description = "Controls if EKS cluster should be created (affects all AWS resources)" - type = bool - default = true -} EOF } diff --git a/notes.md b/notes.md new file mode 100644 index 0000000..55a5ffc --- /dev/null +++ b/notes.md @@ -0,0 +1,78 @@ +I really like these suggestions, but I want to help shape your suggestions with some prime directives for these tasks: +1. security is paramount. we operate in govcloud and handle titled data. security is the most important consideration. +2. cost control - this is a base cluster for a customer to build on top of for their apps. It is anticipated there will be significant time between initial provisioning and first use. The cheapest possible configuration for secure operations in govcloud. +3. simplicity. ideally, I want to be able to add a single file to an exising git repository (which represents an aws account), and have it spawn this entire cluster definition. +4. maintainability. As in, a minimum amount of effort to maintain,, prioritizing future-proofing in decisions. +5. extensibility. try to keep things modular and able to be glued together as easy as possible. +6. best practices. should probably be higher in this list, but at all times, we should endevour to follow/encourage best practices. +7. testability. we are dealing with eks clusters in aws here. by nature, these are expensive resources. anything we can do to test without creation of resources, or rapid creation and destruction, is encouraged. +8. documentation - including the 5 W's (who, what, when, where, why, and how) + +Given those guidelines, does that change your suggestions? Should we start the code review over with those in mind? + +Improvement: Consider adding validation blocks for required variables +Improvement: Add more detailed comments explaining configuration choices +Improvement: Consider tagging strategy for cost allocation +Improvement: Add lifecycle policies for node groups +Warning: Public endpoint access enabled - consider restricting CIDR ranges +Improvement: Add explicit IAM role configurations +Improvement: Implement network policies +# Add to cluster configuration +cluster_security_group_additional_rules = { + ingress_nodes_ephemeral_ports = { + description = "Node to node ephemeral ports" + protocol = "tcp" + from_port = 1025 + to_port = 65535 + type = "ingress" + source_node_security_group = true + } +} +Add CloudWatch logging configuration +Implement proper metrics collection +Set up alerts for cluster health +Improvement: Add more detailed documentation +Improvement: Consider adding test environments +Add README files in each major directory +Document deployment procedures +Add troubleshooting guides +Document network architecture + +resource "aws_eks_cluster" "main" { + # ...existing code... + vpc_config { + endpoint_private_access = true + endpoint_public_access = false # Force private endpoint only + security_group_ids = [aws_security_group.cluster.id] + subnet_ids = var.private_subnet_ids + } + + encryption_config { + provider { + key_arn = aws_kms_key.eks.arn + } + resources = ["secrets"] + } +} + +24m Warning FailedGetResourceMetric horizontalpodautoscaler/loki-write failed to get cpu utilization: unable to get metrics for resource cpu: no metrics returned from resource metrics API +24m Warning FailedComputeMetricsReplicas horizontalpodautoscaler/loki-write invalid metrics (1 invalid out of 1), first error is: failed to get cpu resource metric value: failed to get cpu utilization: unable to get metrics for resource cpu: no metrics returned from resource metrics API +22m Warning FailedGetResourceMetric horizontalpodautoscaler/loki-write failed to get cpu utilization: did not receive metrics for targeted pods (pods might be unready) +2 +29m Warning FailedGetResourceMetric horizontalpodautoscaler/istiod failed to get cpu utilization: unable to get metrics for resource cpu: unable to fetch metrics from resource metrics API: the server could not find the requested resource (get pods.metrics.k8s.io) +29m Warning FailedComputeMetricsReplicas horizontalpodautoscaler/istiod invalid metrics (1 invalid out of 1), first error is: failed to get cpu resource metric value: failed to get cpu utilization: unable to get metrics for resource cpu: unable to fetch metrics from resource metrics API: the server could not find the requested resource (get pods.metrics.k8s.io) +29m Warning FailedGetResourceMetric horizontalpodautoscaler/istiod failed to get cpu utilization: unable to get metrics for resource cpu: unable to fetch metrics from resource metrics API: the server is currently unable to handle the request (get pods.metrics.k8s.io) +29m Warning FailedComputeMetricsReplicas horizontalpodautoscaler/istiod invalid metrics (1 invalid out of 1), first error is: failed to get cpu resource metric value: failed to get cpu utilization: unable to get metrics for resource cpu: unable to fetch metrics from resource metrics API: the server is currently unable to handle the request (get pods.metrics.k8s.io) +2 +* Failed to execute "terraform_current apply -lock-timeout=20m -auto-approve -input=false -auto-approve" in ./.terragrunt-cache/jrM5TqaHxjlphT8vQ1DicmFp6eM/1NbRS_ankC8AcxKegXNWAnjyQEg + ╷ + │ Error: Unable to continue with install: Certificate "platform-eng-eks-mcm" in namespace "istio-system" exists and cannot be imported into the current release: invalid ownership metadata; annotation validation error: key "meta.helm.sh/release-name" must equal "grafana-grafana-ingress": current value is "k8s-dashboard-k8s-dashboard-ingress"; annotation validation error: key "meta.helm.sh/release-namespace" must equal "grafana": current value is "k8s-dashboard" + │ + │ with module.ingress_resources.helm_release.ingress, + │ on .terraform/modules/ingress_resources/main.tf line 6, in resource "helm_release" "ingress": + │ 6: resource "helm_release" "ingress" { + │ + ╵ + + exit status 1 + \ No newline at end of file diff --git a/platform-tg-infra.code-workspace b/platform-tg-infra.code-workspace deleted file mode 100644 index dec7709..0000000 --- a/platform-tg-infra.code-workspace +++ /dev/null @@ -1,29 +0,0 @@ -{ - "folders": [ - { - "path": "." - }, - { - "path": "../terraform-provider-github/website/docs/d", - "name": "provider/aws/data-sources" - }, - { - "path": "../terraform-provider-github/website/docs/r", - "name": "provider/aws/resources" - }, - { - "path": "../terraform/website/docs/language/tests", - "name": "terraform/tests" - }, - { - "path": "../terraform/website/docs/language/syntax", - "name": "terraform/syntax" - }, - { - "path": "../terragrunt" - } - ], - "settings": { - - } -}