diff --git a/lab/_envcommon/default-versions.hcl b/lab/_envcommon/default-versions.hcl index 2b29ef6..76cc77b 100644 --- a/lab/_envcommon/default-versions.hcl +++ b/lab/_envcommon/default-versions.hcl @@ -8,7 +8,7 @@ locals { custom_service_eks_account = "${local.release_version}" eks_module_version = "20.33.1" istio_ingress_version = "${local.release_version}" - release_version = "main" # change to main when testing updated modules + release_version = "0.1.1" # "main" # change to main when testing updated modules ##################### # TF Providers @@ -21,9 +21,29 @@ locals { template_version = "2.2.0" tf_version = "1.5.5" + ##################### + # Namespaces Config + ##################### + namespaces = { + cert-manager = "kube-system" + karpenter = "kube-system" + metrics-server = "kube-system" + istio = "istio-system" + kiali = "istio-system" + grafana = local.telemetry_namespace + k8s-dashboard = local.telemetry_namespace + kiali = local.telemetry_namespace + loki = local.telemetry_namespace + otel = local.telemetry_namespace + prometheus = local.telemetry_namespace + tempo = local.telemetry_namespace + } + ##################### # EKS Config ##################### + operator_namespace = "operator" + telemetry_namespace = "telemetry" # kubectl_image_tag = "1.30.4" ################ diff --git a/lab/development/us-gov-east-1/vpc/platform-test-z/cluster.hcl b/lab/development/us-gov-east-1/vpc/platform-test-z/cluster.hcl new file mode 100644 index 0000000..740c1ad --- /dev/null +++ b/lab/development/us-gov-east-1/vpc/platform-test-z/cluster.hcl @@ -0,0 +1,21 @@ +# lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/cluster.hcl + +# Set cluster specific variables. These are automatically pulled in to configure the remote state bucket in the root +# terragrunt.hcl configuration. +locals { + cluster_endpoint_public_access = true + cluster_name = "platform-test-z" + created_reason = "Terragrunt Development for CICD Delivered EKS Platform" + creator = "luther.coleman.mcginty@census.gov" + eks_instance_disk_size = 100 + eks_ng_desired_size = 3 + eks_ng_max_size = 10 + eks_ng_min_size = 1 + enable_cluster_creator_admin_permissions = true + terraform = true + terragrunt = true + tags = { + "slim:schedule" = "8:00-17:00" + "cluster:size" = "min:${local.eks_ng_min_size}-max:${local.eks_ng_max_size}-desired:${local.eks_ng_desired_size}" + } +} diff --git a/lab/development/us-gov-east-1/vpc/platform-test-z/eks-alloy-disable/terragrunt.hcl.disable b/lab/development/us-gov-east-1/vpc/platform-test-z/eks-alloy-disable/terragrunt.hcl.disable new file mode 100644 index 0000000..97aa66f --- /dev/null +++ b/lab/development/us-gov-east-1/vpc/platform-test-z/eks-alloy-disable/terragrunt.hcl.disable @@ -0,0 +1,27 @@ +include "root" { + path = find_in_parent_folders() + expose = true +} + +terraform { + # source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-alloy.git?ref=main" + source = "../../../../../../../tfmod-alloy" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20m"] + } +} + +dependency "eks" { + config_path = "../eks" + mock_outputs = { + cluster_name = "a-cluster-name" + } +} + +inputs = { + profile = include.root.inputs.aws_profile + cluster_name = dependency.eks.outputs.cluster_name + region = include.root.inputs.aws_region + cluster_domain = dependency.eks.inputs.vpc_domain_name +} diff --git a/lab/development/us-gov-east-1/vpc/platform-test-z/eks-cert-manager/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-test-z/eks-cert-manager/terragrunt.hcl new file mode 100644 index 0000000..926da7c --- /dev/null +++ b/lab/development/us-gov-east-1/vpc/platform-test-z/eks-cert-manager/terragrunt.hcl @@ -0,0 +1,43 @@ +include "root" { + path = find_in_parent_folders("root.hcl") + merge_strategy = "deep" + expose = true +} + +terraform { + # source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-cert-mgr.git?ref=${include.root.inputs.release_version}" + source = "../../../../../../../tfmod-cert-mgr" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20m"] + } +} + +dependency "eks" { + config_path = "../eks" + mock_outputs = { + cluster_name = "a-cluster-name" + oidc_provider_arn = "arn:aws-us-gov:iam::111111111111:oidc-provider/oidc.eks.us-gov-east-1.amazonaws.com/id/0000000000000000AAAAAAAAAAAAAAAA" + } +} + +dependency "eks_config" { + config_path = "../eks-config" + skip_outputs = true +} + +inputs = { + cert_manager_cainjector_tag = include.root.inputs.cert_manager_cainjector_tag + cert_manager_controller_tag = include.root.inputs.cert_manager_controller_tag + cert_manager_helm_chart = include.root.inputs.cert_manager_helm_chart + cert_manager_startupapicheck_tag = include.root.inputs.cert_manager_startupapicheck_tag + cert_manager_webhook_tag = include.root.inputs.cert_manager_webhook_tag + cluster_issuer_name = include.root.inputs.cluster_issuer_name + cluster_mailing_list = dependency.eks.inputs.creator + cluster_name = dependency.eks.outputs.cluster_name + oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + release_version = include.root.inputs.release_version + namespace = include.root.inputs.namespaces["cert-manager"] +} diff --git a/lab/development/us-gov-east-1/vpc/platform-test-z/eks-config/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-test-z/eks-config/terragrunt.hcl new file mode 100644 index 0000000..c39d32e --- /dev/null +++ b/lab/development/us-gov-east-1/vpc/platform-test-z/eks-config/terragrunt.hcl @@ -0,0 +1,68 @@ +# lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/eks-config/terragrunt.hcl + +include "root" { + path = find_in_parent_folders("root.hcl") + merge_strategy = "deep" + expose = true +} + +terraform { + # source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-eks-configuration.git?ref=${include.root.inputs.release_version}" + source = "../../../../../../../tfmod-eks-configuration" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20m"] + } +} + +dependency "eks" { + config_path = "../eks" + mock_outputs = { + cluster_certificate_authority_data = [{ data = "THISISAVERYLONGCERTSTRINGTHATGOESHEREFORSURENODYEP" }] + cluster_endpoint = "https://12345ABCDEE42BF9C24D4C362D1DC.sk1.us-gov-east-1.eks.amazonaws.com" + cluster_name = "a-cluster-name" + eks_managed_node_groups_autoscaling_group_names = ["eks-eks-a-cluster-name-node_group-0000000000000000000000000-5ac8a5e3-14dd-c043-2cc9-f4b6ffb36d32"] + oidc_provider_arn = "arn:aws-us-gov:iam::111111111111:oidc-provider/oidc.eks.us-gov-east-1.amazonaws.com/id/0000000000000000AAAAAAAAAAAAAAAA" + security_group_all_worker_mgmt_id = "sg-00b0000000000000" + subnets = ["subnet-00000000000000001", "subnet-00000000000000002", "subnet-00000000000000003"] + vpc_id = "a-vpc-id" + } +} + +generate "kubectl-provider" { + path = "kubectl-provider.tf" + if_exists = "overwrite" + contents = <<-EOF + %{if dependency.eks.outputs.cluster_name != "a-cluster-name"~} + data "aws_eks_cluster" "kubectl" { + name = "${dependency.eks.outputs.cluster_name}" + } + provider "kubectl" { + apply_retry_count = 5 + host = data.aws_eks_cluster.kubectl.endpoint + cluster_ca_certificate = base64decode(data.aws_eks_cluster.kubectl.certificate_authority[0].data) + load_config_file = false + + exec { + api_version = "client.authentication.k8s.io/v1beta1" + command = "aws" + args = ["eks", "get-token", "--cluster-name", "${dependency.eks.outputs.cluster_name}", "--region", "${include.root.inputs.aws_region}"] + } + } + %{endif~} + EOF +} + +inputs = { + cluster_name = dependency.eks.outputs.cluster_name + eks_managed_node_groups_autoscaling_group_names = dependency.eks.outputs.eks_managed_node_groups_autoscaling_group_names + oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + release_version = include.root.inputs.release_version + security_group_all_worker_mgmt_id = dependency.eks.outputs.security_group_all_worker_mgmt_id + subnets = dependency.eks.outputs.subnets + vpc_id = dependency.eks.outputs.vpc_id + operators_ns = include.root.inputs.operator_namespace + telemetry_ns = include.root.inputs.telemetry_namespace +} diff --git a/lab/development/us-gov-east-1/vpc/platform-test-z/eks-dns/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-test-z/eks-dns/terragrunt.hcl new file mode 100644 index 0000000..9b7c16f --- /dev/null +++ b/lab/development/us-gov-east-1/vpc/platform-test-z/eks-dns/terragrunt.hcl @@ -0,0 +1,42 @@ +include "root" { + path = find_in_parent_folders("root.hcl") + merge_strategy = "deep" + expose = true +} + +terraform { + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-eks-dns.git?ref=${include.root.inputs.release_version}" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20m"] + } +} + +dependency "eks" { + config_path = "../eks" + mock_outputs = { + subnets = ["subnet-abcdefgh", "subnet-12345678", "subnet-ab12cd34"] + } +} + +dependency "istio" { + config_path = "../eks-istio" + mock_outputs = { + istio_ingress_lb = { + dns_name = "a1111111111111111111111111111111-2bbbbbbbbbbbbbbb.elb.us-gov-east-1.amazonaws.com" + zone_id = "ZABC123456DEF" + } + } +} + +inputs = { + cluster_name = dependency.eks.inputs.cluster_name + istio_ingress_lb = dependency.istio.outputs.istio_ingress_lb + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + route53_endpoints = include.root.inputs.route53_endpoints + subnets = dependency.eks.outputs.subnets + tags = dependency.eks.inputs.tags + vpc_domain_name = dependency.eks.inputs.vpc_domain_name + vpc_name = dependency.eks.inputs.vpc_name +} diff --git a/lab/development/us-gov-east-1/vpc/platform-test-z/eks-grafana/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-test-z/eks-grafana/terragrunt.hcl new file mode 100644 index 0000000..cd0b935 --- /dev/null +++ b/lab/development/us-gov-east-1/vpc/platform-test-z/eks-grafana/terragrunt.hcl @@ -0,0 +1,42 @@ +include "root" { + path = find_in_parent_folders("root.hcl") + merge_strategy = "deep" + expose = true +} + +terraform { + # source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-grafana.git?ref=${include.root.inputs.release_version}" + source = "../../../../../../../tfmod-grafana" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20m"] + } +} + +dependency "eks" { + config_path = "../eks" + mock_outputs = { + cluster_name = "a-cluster-name" + } +} + +dependency "eks-loki" { + config_path = "../eks-loki" + mock_outputs = { + rwo_storage_class = "gp3-encrypted" + } +} + +inputs = { + cluster_domain = dependency.eks.inputs.vpc_domain_name + cluster_name = dependency.eks.outputs.cluster_name + download_dashboards_image_tag = include.root.inputs.download_dashboards_image_tag + grafana_chart_version = include.root.inputs.grafana_chart_version + grafana_tag = include.root.inputs.grafana_tag + init_chown_data_image_tag = include.root.inputs.init_chown_data_image_tag + profile = include.root.inputs.aws_profile + public_hostname = include.root.inputs.grafana_hostname + region = include.root.inputs.aws_region + rwo_storage_class = dependency.eks-loki.outputs.rwo_storage_class + namespace = include.root.inputs.namespaces["grafana"] +} diff --git a/lab/development/us-gov-east-1/vpc/platform-test-z/eks-istio/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-test-z/eks-istio/terragrunt.hcl new file mode 100644 index 0000000..7a591c5 --- /dev/null +++ b/lab/development/us-gov-east-1/vpc/platform-test-z/eks-istio/terragrunt.hcl @@ -0,0 +1,35 @@ +include "root" { + path = find_in_parent_folders("root.hcl") + merge_strategy = "deep" + expose = true +} + +terraform { + # source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-istio.git?ref=${include.root.inputs.release_version}" + source = "../../../../../../../tfmod-istio" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20m"] + } +} + +dependency "eks" { + config_path = "../eks" + mock_outputs = { + cluster_name = "a-cluster-name" + } +} + +dependency "eks-karpenter" { + config_path = "../eks-karpenter" + skip_outputs = true +} + +inputs = { + cluster_name = dependency.eks.outputs.cluster_name + istio_chart_version = include.root.inputs.istio_version + istio_version = include.root.inputs.istio_version + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + namespace = include.root.inputs.namespaces["istio"] +} diff --git a/lab/development/us-gov-east-1/vpc/platform-test-z/eks-k8s-dashboard/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-test-z/eks-k8s-dashboard/terragrunt.hcl new file mode 100644 index 0000000..e56658b --- /dev/null +++ b/lab/development/us-gov-east-1/vpc/platform-test-z/eks-k8s-dashboard/terragrunt.hcl @@ -0,0 +1,38 @@ +include "root" { + path = find_in_parent_folders("root.hcl") + merge_strategy = "deep" + expose = true +} + +terraform { + # source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-k8s-dashboard.git?ref=${include.root.inputs.release_version}" + source = "../../../../../../../tfmod-k8s-dashboard" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20m"] + } +} + +dependency "eks" { + config_path = "../eks" + mock_outputs = { + cluster_name = "a-cluster-name" + vpc_domain_name = "example.com" + } +} + +dependency "eks-loki" { + config_path = "../eks-loki" + skip_outputs = true +} + +inputs = { + # datasources = dependency.eks-loki.outputs.gateway_internal_endpoint + cluster_domain = dependency.eks.inputs.vpc_domain_name + cluster_name = dependency.eks.outputs.cluster_name + k8s_dashboard_version = include.root.inputs.k8s_dashboard_version + profile = include.root.inputs.aws_profile + public_hostname = include.root.inputs.dashboard_hostname + region = include.root.inputs.aws_region + namespace = include.root.inputs.namespaces["k8s-dashboard"] +} diff --git a/lab/development/us-gov-east-1/vpc/platform-test-z/eks-karpenter/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-test-z/eks-karpenter/terragrunt.hcl new file mode 100644 index 0000000..94d8489 --- /dev/null +++ b/lab/development/us-gov-east-1/vpc/platform-test-z/eks-karpenter/terragrunt.hcl @@ -0,0 +1,44 @@ +include "root" { + path = find_in_parent_folders("root.hcl") + merge_strategy = "deep" + expose = true +} + +terraform { + # source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-karpenter.git?ref=${include.root.inputs.release_version}" + source = "../../../../../../../tfmod-karpenter" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20m"] + } +} + +dependency "eks" { + config_path = "../eks" + mock_outputs = { + cluster_endpoint = "https://0000000000000000AAAAAAAAAAAAAAAA.sk1.us-gov-east-1.eks.amazonaws.com" + cluster_name = "a-cluster-name" + node_group_name = "node_group_a-cluster-name" + oidc_provider_arn = "arn:aws-us-gov:iam::111111111111:oidc-provider/oidc.eks.us-gov-east-1.amazonaws.com/id/0000000000000000AAAAAAAAAAAAAAAA" + vpc_id = "a-vpc-name" + } +} + +dependency "eks-config" { + config_path = "../eks-config" + skip_outputs = true +} + +inputs = { + cluster_endpoint = dependency.eks.outputs.cluster_endpoint + cluster_name = dependency.eks.outputs.cluster_name + karpenter_helm_chart = include.root.inputs.karpenter_helm_chart + karpenter_node_group_name = dependency.eks.outputs.node_group_name + karpenter_tag = include.root.inputs.karpenter_tag + oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + release_version = include.root.inputs.release_version + vpc_id = dependency.eks.outputs.vpc_id + namespace = include.root.inputs.namespaces["karpenter"] +} diff --git a/lab/development/us-gov-east-1/vpc/platform-test-z/eks-kiali/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-test-z/eks-kiali/terragrunt.hcl new file mode 100644 index 0000000..a2e2a0e --- /dev/null +++ b/lab/development/us-gov-east-1/vpc/platform-test-z/eks-kiali/terragrunt.hcl @@ -0,0 +1,94 @@ +include "root" { + path = find_in_parent_folders("root.hcl") + merge_strategy = "deep" + expose = true +} + +terraform { + # source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-kiali.git?ref=mcmCluster" + source = "../../../../../../../tfmod-kiali" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20m"] + } +} + +dependency "eks" { + config_path = "../eks" + mock_outputs = { + cluster_name = "a-cluster-name" + } +} +dependency "eks-cert-manager" { + config_path = "../eks-cert-manager" + mock_outputs = { + cluster_issuer_name = "acmpca-clusterissuer" + } +} +dependency "eks-prometheus" { + config_path = "../eks-prometheus" + mock_outputs = { + prometheus_server_internal_endpoint = { + hostname = "prometheus-server.prometheus.svc.cluster.local" + port_number = 9090 + url = "http://prometheus-server.prometheus.svc.cluster.local:9090/" + } + } +} +dependency "eks-tempo" { + config_path = "../eks-tempo" + mock_outputs = { + tempo_internal_endpoint = { + hostname = "tempo.tempo.svc.cluster.local" + port_number = 3100 + url = "http://tempo.tempo.svc.cluster.local:3100/" + } + } +} +dependency "eks-grafana" { + config_path = "../eks-grafana" + mock_outputs = { + internal_endpoint = { + hostname = "grafana.grafana.svc.cluster.local" + port_number = "80" + url = "https://grafana.grafana.svc.cluster.local:80/" + } + namespace = "grafana" + public_endpoint = { + hostname = "grafana.dev.lab.csp2.census.gov" + port_number = "80" + url = "https://grafana.dev.lab.csp2.census.gov:80/" + } + secret_name = "grafana" + } +} + +inputs = { + profile = include.root.inputs.aws_profile + cluster_domain = dependency.eks.inputs.vpc_domain_name + operators_namespace = "operators" + cluster_name = dependency.eks.outputs.cluster_name + certificate_issuer = dependency.eks-cert-manager.outputs.cluster_issuer_name + + # istio_namespace = + + prometheus_internal_url = dependency.eks-prometheus.outputs.prometheus_server_internal_endpoint.url + grafana_namespace = dependency.eks-grafana.outputs.namespace + grafana_secret_name = "grafana" + grafana_internal_url = dependency.eks-grafana.outputs.internal_endpoint.url + + grafana_public_url = dependency.eks-grafana.outputs.public_endpoint.url + + # grafana_secret_name = dependency.eks-grafana.outputs.secret_name + tempo_internal_url = dependency.eks-tempo.outputs.tempo_internal_endpoint.url + namespace = include.root.inputs.namespaces["kiali"] + + + # client_id = var.sso_client_id + # client_secret = var.sso_client_secret + # keycloak_public_url = var.keycloak_public_url + # gogatekeeper_chart_version = var.gogatekeeper_chart_version + # gogatekeeper_registry = var.gogatekeeper_registry + # gogatekeeper_repository = var.gogatekeeper_repository + # gogatekeeper_tag = var.gogatekeeper_tag +} diff --git a/lab/development/us-gov-east-1/vpc/platform-test-z/eks-loki/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-test-z/eks-loki/terragrunt.hcl new file mode 100644 index 0000000..1aa60a3 --- /dev/null +++ b/lab/development/us-gov-east-1/vpc/platform-test-z/eks-loki/terragrunt.hcl @@ -0,0 +1,48 @@ +include "root" { + path = find_in_parent_folders("root.hcl") + merge_strategy = "deep" + expose = true +} + +terraform { + # source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-loki.git?ref=${include.root.inputs.release_version}" + source = "../../../../../../../tfmod-loki" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20m"] + } +} + +dependency "eks" { + config_path = "../eks" + mock_outputs = { + cluster_name = "a-cluster-name" + oidc_provider_arn = "arn:aws-us-gov:iam::111111111111:oidc-provider/oidc.eks.us-gov-east-1.amazonaws.com/id/0000000000000000AAAAAAAAAAAAAAAA" + } +} + +dependency "eks-istio" { + config_path = "../eks-istio" + skip_outputs = true +} + +# dependency "eks-prometheus" { +# config_path = "../eks-prometheus" +# skip_outputs = true +# } + +inputs = { + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + cluster_name = dependency.eks.outputs.cluster_name + oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn + loki_chart_version = include.root.inputs.loki_chart_version + loki_tag = include.root.inputs.loki_tag + canary_tag = include.root.inputs.canary_tag + enterprise_logs_provisioner_tag = include.root.inputs.enterprise_logs_provisioner_tag + gateway_tag = include.root.inputs.gateway_tag + memcached_tag = include.root.inputs.memcached_tag + exporter_tag = include.root.inputs.exporter_tag + sidecar_tag = include.root.inputs.sidecar_tag + namespace = include.root.inputs.namespaces["loki"] +} diff --git a/lab/development/us-gov-east-1/vpc/platform-test-z/eks-metrics-server/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-test-z/eks-metrics-server/terragrunt.hcl new file mode 100644 index 0000000..a472aba --- /dev/null +++ b/lab/development/us-gov-east-1/vpc/platform-test-z/eks-metrics-server/terragrunt.hcl @@ -0,0 +1,35 @@ +include "root" { + path = find_in_parent_folders("root.hcl") + merge_strategy = "deep" + expose = true +} + +terraform { + # source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-metrics-server.git?ref=${include.root.inputs.release_version}" + source = "../../../../../../../tfmod-metrics-server" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20m"] + } +} + +dependency "eks" { + config_path = "../eks" + mock_outputs = { + cluster_name = "a-cluster-name" + } +} + +dependency "eks_config" { + config_path = "../eks-config" + skip_outputs = true +} + +inputs = { + profile = include.root.inputs.aws_profile + cluster_name = dependency.eks.outputs.cluster_name + region = include.root.inputs.aws_region + metrics_server_helm_chart = include.root.inputs.metrics_server_helm_chart + metrics_server_tag = include.root.inputs.metrics_server_tag + namespace = include.root.inputs.namespaces["metrics-server"] +} diff --git a/lab/development/us-gov-east-1/vpc/platform-test-z/eks-open-telemetry/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-test-z/eks-open-telemetry/terragrunt.hcl new file mode 100644 index 0000000..1f60ec6 --- /dev/null +++ b/lab/development/us-gov-east-1/vpc/platform-test-z/eks-open-telemetry/terragrunt.hcl @@ -0,0 +1,28 @@ +include "root" { + path = find_in_parent_folders("root.hcl") + merge_strategy = "deep" + expose = true +} + +terraform { + # source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-open-telemetry.git?ref=main" + source = "../../../../../../../tfmod-open-telemetry" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20m"] + } +} + +dependency "eks" { + config_path = "../eks" + mock_outputs = { + cluster_name = "a-cluster-name" + } +} + +inputs = { + profile = include.root.inputs.aws_profile + cluster_name = dependency.eks.outputs.cluster_name + region = include.root.inputs.aws_region + namespace = include.root.inputs.namespaces["otel"] +} diff --git a/lab/development/us-gov-east-1/vpc/platform-test-z/eks-prometheus/README.md b/lab/development/us-gov-east-1/vpc/platform-test-z/eks-prometheus/README.md new file mode 100644 index 0000000..bbbffb2 --- /dev/null +++ b/lab/development/us-gov-east-1/vpc/platform-test-z/eks-prometheus/README.md @@ -0,0 +1,198 @@ +## eks-prometheus +This module deploys EKS kubeenetes prometheus inside existing EKS cluster. Prometheus is an open-source systems monitoring and alerting tool. +This module consisits of 4 components. It creates prometheus namespace and copies image repositories for the following components from quay.io into local account ECR repository. It deploys these components using helm charts using the configured ECR repositories. + 1. prometheus-alert-manager + 2. prometheus-node-exporter + 3. prometheus-pushgateway + 4. prometheus-server + +### Dependencies +This module is dependent on EKS module (eks). The cluster should exist already for this module to work. + +### Inputs + cluster_name + profile + prometheus_chart_version + prometheus_server_tag + prometheus_config_reloader_tag + alertmanager_tag + kube_state_metrics_tag + node_exporter_tag + pushgateway_tag + rwo_storage_class + +### Outputs + alertmanager_internal_endpoint + alertmanager_headless_internal_endpoint + pushgateway_internal_endpoint + prometheus_server_internal_endpoint + +### Issues observed/fixed +1. The rwo_storage_class value had to be updated from "gp3" to "gp3-encrypted" +2. The node_exporter_tag value had to be updated from "1.6.1" to "v1.8.1" +3. The kube_state_metrics_tag value had to be updated from "2.10.0" to "v2.6.0" +4. The alertmanager_tag value had to be updated from +5. The helm chart set config for the ecr image had to be split into 2 components, one for registry and other for repository as an example mentioned below: + + ``` + set { + name = "kube-state-metrics.image.registry" + value = module.images.images[local.ksm_key].dest_registry + } + set { + name = "kube-state-metrics.image.repository" + value = module.images.images[local.ksm_key].dest_repository + } + ``` + +6. In some other cases the image ecr repository had to be split by the colon separatory (:) + + ``` + set { + name = "alertmanager.configmapReload.image.repository" + value = split(":", module.images.images[local.prom_config_reload_key].dest_full_path)[0] + } + ``` + +### Chart Notes + 1. Get the application URL by running these commands: + + ```bash + export POD_NAME=$(kubectl get pods --namespace prometheus -l "app.kubernetes.io/name=prometheus-pushgateway,app.kubernetes.io/instance=prometheus" -o jsonpath="{.items[0].metadata.name}") + kubectl port-forward $POD_NAME 9091 + echo "Visit http://127.0.0.1:9091 to use your application" + ``` + + The Prometheus server can be accessed via port 80 on the following DNS name from within your cluster: + prometheus-server.prometheus.svc.cluster.local + + + Get the Prometheus server URL by running these commands in the same shell: + + ```bash + export POD_NAME=$(kubectl get pods --namespace prometheus -l "app.kubernetes.io/name=prometheus,app.kubernetes.io/instance=prometheus" -o jsonpath="{.items[0].metadata.name}") + kubectl --namespace prometheus port-forward $POD_NAME 9090 + ``` + + The Prometheus alertmanager can be accessed via port 9093 on the following DNS name from within your cluster: + `prometheus-alertmanager.prometheus.svc.cluster.local` + + + Get the Alertmanager URL by running these commands in the same shell: + + ```bash + export POD_NAME=$(kubectl get pods --namespace prometheus -l "app.kubernetes.io/name=alertmanager,app.kubernetes.io/instance=prometheus" -o jsonpath="{.items[0].metadata.name}") + kubectl --namespace prometheus port-forward $POD_NAME 9093 + ``` + + ################################################################################# + ###### WARNING: Pod Security Policy has been disabled by default since ##### + ###### it deprecated after k8s 1.25+. use ##### + ###### (index .Values "prometheus-node-exporter" "rbac" ##### + ###### "pspEnabled") with (index .Values ##### + ###### "prometheus-node-exporter" "rbac" "pspAnnotations") ##### + ###### in case you still need it. ##### + ################################################################################# + + + The Prometheus PushGateway can be accessed via port 9091 on the following DNS name from within your cluster: + `prometheus-prometheus-pushgateway.prometheus.svc.cluster.local` + + + Get the PushGateway URL by running these commands in the same shell: + + ```bash + export POD_NAME=$(kubectl get pods --namespace prometheus -l "app=prometheus-pushgateway,component=pushgateway" -o jsonpath="{.items[0].metadata.name}") + kubectl --namespace prometheus port-forward $POD_NAME 9091 + ``` + + For more information on running Prometheus, visit: + https://prometheus.io/ + + kube-state-metrics is a simple service that listens to the Kubernetes API server and generates metrics about the state of the objects. + The exposed metrics can be found here: + https://github.com/kubernetes/kube-state-metrics/blob/master/docs/README.md#exposed-metrics + + The metrics are exported on the HTTP endpoint /metrics on the listening port. + In your case, `prometheus-kube-state-metrics.prometheus.svc.cluster.local:8080/metrics` + + They are served either as plaintext or protobuf depending on the Accept header. + They are designed to be consumed either by Prometheus itself or by a scraper that is compatible with scraping a Prometheus client endpoint. + + 1. Get the application URL by running these commands: + + ```bash + export POD_NAME=$(kubectl get pods --namespace prometheus -l "app.kubernetes.io/name=alertmanager,app.kubernetes.io/instance=prometheus" -o jsonpath="{.items[0].metadata.name}") + echo "Visit http://127.0.0.1:9093 to use your application" + kubectl --namespace prometheus port-forward $POD_NAME 9093:80 + ``` + + 1. Get the application URL by running these commands: + + ```bash + export POD_NAME=$(kubectl get pods --namespace prometheus -l "app.kubernetes.io/name=prometheus-node-exporter,app.kubernetes.io/instance=prometheus" -o jsonpath="{.items[0].metadata.name}") + echo "Visit http://127.0.0.1:9100 to use your application" + kubectl port-forward --namespace prometheus $POD_NAME 9100 + ``` + + +## Requirements + +| Name | Version | +|------|---------| +| [terraform](#requirement\_terraform) | >= 0.13 | +| [aws](#requirement\_aws) | >= 5.14.0 | +| [helm](#requirement\_helm) | >= 2.11.0 | +| [kubernetes](#requirement\_kubernetes) | >= 2.23.0 | +| [null](#requirement\_null) | >= 3.2.1 | + +## Providers + +| Name | Version | +|------|---------| +| [helm](#provider\_helm) | >= 2.11.0 | +| [kubernetes](#provider\_kubernetes) | >= 2.23.0 | + +## Modules + +| Name | Source | Version | +|------|--------|---------| +| [images](#module\_images) | git@github.e.it.census.gov:terraform-modules/aws-ecr-copy-images.git/ | tf-upgrade | + +## Resources + +| Name | Type | +|------|------| +| [helm_release.prometheus](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource | +| [kubernetes_namespace.ns](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/namespace) | resource | +| [kubernetes_namespace.existing-ns](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/data-sources/namespace) | data source | + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:--------:| +| [alertmanager\_tag](#input\_alertmanager\_tag) | The image tag of the alertmanager image. | `string` | `"v0.27.0"` | no | +| [cluster\_name](#input\_cluster\_name) | The name of the cluster into which prometheus will be installed. | `string` | n/a | yes | +| [create\_namespace](#input\_create\_namespace) | Indicates whether the `namespace` needs to be created ('true') or already exists (not `true`) | `bool` | `true` | no | +| [kube\_state\_metrics\_tag](#input\_kube\_state\_metrics\_tag) | The image tag of the kube-state-metrics image. | `string` | `"v2.13.0"` | no | +| [namespace](#input\_namespace) | The namespace to install the prometheus components. Defaults to 'prometheus' | `string` | `"prometheus"` | no | +| [node\_exporter\_tag](#input\_node\_exporter\_tag) | The image tag of the node-exporter image. | `string` | `"v1.8.2"` | no | +| [profile](#input\_profile) | AWS\_PROFILE to use to apply the terraform script. | `string` | `""` | no | +| [prometheus\_chart\_version](#input\_prometheus\_chart\_version) | The version of prometheus to install into the cluster. | `string` | `"25.24.1"` | no | +| [prometheus\_config\_reloader\_tag](#input\_prometheus\_config\_reloader\_tag) | The image tag of the prometheus-config-reloader image. | `string` | `"v0.75.1"` | no | +| [prometheus\_server\_tag](#input\_prometheus\_server\_tag) | The image tag of prometheus server to install into the cluster. | `string` | `"v2.53.1"` | no | +| [pushgateway\_tag](#input\_pushgateway\_tag) | The image tag of the pushgateway image. | `string` | `"v1.9.0"` | no | +| [rwo\_storage\_class](#input\_rwo\_storage\_class) | Specify the storage class for read/write/once persistent volumes. | `string` | `"gp3-encrypted"` | no | + +## Outputs + +| Name | Description | +|------|-------------| +| [alertmanager\_headless\_internal\_endpoint](#output\_alertmanager\_headless\_internal\_endpoint) | n/a | +| [alertmanager\_internal\_endpoint](#output\_alertmanager\_internal\_endpoint) | n/a | +| [module\_name](#output\_module\_name) | The name of this module. | +| [module\_version](#output\_module\_version) | The version of this module. | +| [prometheus\_namespace](#output\_prometheus\_namespace) | n/a | +| [prometheus\_server\_internal\_endpoint](#output\_prometheus\_server\_internal\_endpoint) | n/a | +| [pushgateway\_internal\_endpoint](#output\_pushgateway\_internal\_endpoint) | n/a | + diff --git a/lab/development/us-gov-east-1/vpc/platform-test-z/eks-prometheus/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-test-z/eks-prometheus/terragrunt.hcl new file mode 100644 index 0000000..030dd33 --- /dev/null +++ b/lab/development/us-gov-east-1/vpc/platform-test-z/eks-prometheus/terragrunt.hcl @@ -0,0 +1,40 @@ +include "root" { + path = find_in_parent_folders("root.hcl") + merge_strategy = "deep" + expose = true +} + +terraform { + # source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-prometheus.git?ref=${include.root.inputs.release_version}" + source = "../../../../../../../tfmod-prometheus" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20m"] + } +} + +dependency "eks" { + config_path = "../eks" + mock_outputs = { + cluster_name = "a-cluster-name" + } +} + +dependency "eks-dns" { + config_path = "../eks-dns" + skip_outputs = true +} + +inputs = { + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + cluster_name = dependency.eks.outputs.cluster_name + prometheus_chart_version = include.root.inputs.prometheus_chart_version + prometheus_server_tag = include.root.inputs.prometheus_server_tag + prometheus_config_reloader_tag = include.root.inputs.prometheus_config_reloader_tag + alertmanager_tag = include.root.inputs.alertmanager_tag + kube_state_metrics_tag = include.root.inputs.kube_state_metrics_tag + node_exporter_tag = include.root.inputs.node_exporter_tag + pushgateway_tag = include.root.inputs.pushgateway_tag + namespace = include.root.inputs.namespaces["prometheus"] +} diff --git a/lab/development/us-gov-east-1/vpc/platform-test-z/eks-tempo/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-test-z/eks-tempo/terragrunt.hcl new file mode 100644 index 0000000..d14c8a1 --- /dev/null +++ b/lab/development/us-gov-east-1/vpc/platform-test-z/eks-tempo/terragrunt.hcl @@ -0,0 +1,47 @@ +include "root" { + path = find_in_parent_folders("root.hcl") + merge_strategy = "deep" + expose = true +} + +terraform { + # source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-tempo.git?ref=${include.root.inputs.release_version}" + source = "../../../../../../../tfmod-tempo" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20m"] + } +} + +dependency "eks" { + config_path = "../eks" + mock_outputs = { + cluster_name = "a-cluster-name" + oidc_provider_arn = "arn:aws-us-gov:iam::111111111111:oidc-provider/oidc.eks.us-gov-east-1.amazonaws.com/id/0000000000000000AAAAAAAAAAAAAAAA" + } +} + +dependency "eks-prometheus" { + config_path = "../eks-prometheus" + mock_outputs = { + prometheus_server_internal_endpoint = { + hostname = "prometheus-server.prometheus.svc.cluster.local" + port_number = 9090 + url = "http://prometheus-server.prometheus.svc.cluster.local:9090/" + } + prometheus_namespace = "prometheus" + } +} + +inputs = { + account_id = include.root.locals.account_id + profile = include.root.locals.aws_profile + region = include.root.locals.aws_region + cluster_name = dependency.eks.outputs.cluster_name + oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn + prometheus_port = dependency.eks-prometheus.outputs.prometheus_server_internal_endpoint.port_number + prometheus_namespace = dependency.eks-prometheus.outputs.prometheus_namespace + tempo_chart_version = include.root.inputs.tempo_chart_version + tempo_tag = include.root.inputs.tempo_tag + namespace = include.root.inputs.namespaces["tempo"] +} diff --git a/lab/development/us-gov-east-1/vpc/platform-test-z/eks/terragrunt.hcl b/lab/development/us-gov-east-1/vpc/platform-test-z/eks/terragrunt.hcl new file mode 100644 index 0000000..cff2547 --- /dev/null +++ b/lab/development/us-gov-east-1/vpc/platform-test-z/eks/terragrunt.hcl @@ -0,0 +1,30 @@ +include "root" { + path = find_in_parent_folders("root.hcl") + merge_strategy = "deep" + expose = true +} + +terraform { + # source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-eks.git?ref=${include.root.inputs.release_version}" + source = "../../../../../../../tfmod-eks" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20m"] + } +} + +inputs = { + cluster_endpoint_public_access = include.root.inputs.cluster_endpoint_public_access + cluster_name = include.root.inputs.cluster_name + cluster_version = include.root.inputs.cluster_version + creator = include.root.inputs.creator + eks_instance_disk_size = include.root.inputs.eks_instance_disk_size + eks_ng_desired_size = include.root.inputs.eks_ng_desired_size + eks_ng_max_size = include.root.inputs.eks_ng_max_size + eks_ng_min_size = include.root.inputs.eks_ng_min_size + eks_vpc_name = include.root.inputs.vpc_name + enable_cluster_creator_admin_permissions = include.root.inputs.enable_cluster_creator_admin_permissions + environment_abbr = include.root.inputs.environment_abbr + tags = include.root.inputs.tags + vpc_name = include.root.inputs.vpc_name +}