From 9086d270299af382699f3312a6974d093c7fc340 Mon Sep 17 00:00:00 2001 From: mcgin314 Date: Tue, 10 Sep 2024 17:04:55 -0400 Subject: [PATCH 1/7] Revisions related to standardized setup for a terragrunt-live repository. --- .../platform-test-2/common_vars.hcl.disable | 170 +++++++++++++++ .../_envcommon/common-variables.hcl | 11 + project-x-infra-live/development/account.hcl | 11 + .../development/us-gov-east-1/region.hcl | 5 + .../eks-cert-manager/terragrunt.hcl | 34 +++ .../platform-test-2/eks-config/terragrunt.hcl | 42 ++++ .../eks-grafana/terragrunt.hcl | 38 ++++ .../platform-test-2/eks-istio/terragrunt.hcl | 31 +++ .../eks-karpenter/terragrunt.hcl | 38 ++++ .../eks-kiali.disable/terragrunt.hcl.disable | 76 +++++++ .../platform-test-2/eks-loki/terragrunt.hcl | 31 +++ .../eks-metrics-server/terragrunt.hcl | 25 +++ .../platform-test-2/eks-prometheus/README.md | 198 ++++++++++++++++++ .../eks-prometheus/terragrunt.hcl | 30 +++ .../platform-test-2/eks-tempo/terragrunt.hcl | 41 ++++ .../vpc/platform-test-2/eks/terragrunt.hcl | 64 ++++++ .../development/us-gov-east-1/vpc/vpc.hcl | 6 + project-x-infra-live/terragrunt.hcl | 111 ++++++++++ 18 files changed, 962 insertions(+) create mode 100644 lab/us-gov-east-1/vpc/platform-test-2/common_vars.hcl.disable create mode 100644 project-x-infra-live/_envcommon/common-variables.hcl create mode 100644 project-x-infra-live/development/account.hcl create mode 100644 project-x-infra-live/development/us-gov-east-1/region.hcl create mode 100644 project-x-infra-live/development/us-gov-east-1/vpc/platform-test-2/eks-cert-manager/terragrunt.hcl create mode 100644 project-x-infra-live/development/us-gov-east-1/vpc/platform-test-2/eks-config/terragrunt.hcl create mode 100644 project-x-infra-live/development/us-gov-east-1/vpc/platform-test-2/eks-grafana/terragrunt.hcl create mode 100644 project-x-infra-live/development/us-gov-east-1/vpc/platform-test-2/eks-istio/terragrunt.hcl create mode 100644 project-x-infra-live/development/us-gov-east-1/vpc/platform-test-2/eks-karpenter/terragrunt.hcl create mode 100644 project-x-infra-live/development/us-gov-east-1/vpc/platform-test-2/eks-kiali.disable/terragrunt.hcl.disable create mode 100644 project-x-infra-live/development/us-gov-east-1/vpc/platform-test-2/eks-loki/terragrunt.hcl create mode 100644 project-x-infra-live/development/us-gov-east-1/vpc/platform-test-2/eks-metrics-server/terragrunt.hcl create mode 100644 project-x-infra-live/development/us-gov-east-1/vpc/platform-test-2/eks-prometheus/README.md create mode 100644 project-x-infra-live/development/us-gov-east-1/vpc/platform-test-2/eks-prometheus/terragrunt.hcl create mode 100644 project-x-infra-live/development/us-gov-east-1/vpc/platform-test-2/eks-tempo/terragrunt.hcl create mode 100644 project-x-infra-live/development/us-gov-east-1/vpc/platform-test-2/eks/terragrunt.hcl create mode 100644 project-x-infra-live/development/us-gov-east-1/vpc/vpc.hcl create mode 100644 project-x-infra-live/terragrunt.hcl diff --git a/lab/us-gov-east-1/vpc/platform-test-2/common_vars.hcl.disable b/lab/us-gov-east-1/vpc/platform-test-2/common_vars.hcl.disable new file mode 100644 index 0000000..fbbb5ef --- /dev/null +++ b/lab/us-gov-east-1/vpc/platform-test-2/common_vars.hcl.disable @@ -0,0 +1,170 @@ +locals { + # Automatically load account-level variables (NOTE: In our environment account = environment so there is not separate environment layer) + account_vars = read_terragrunt_config(find_in_parent_folders("account.hcl")) + + # Automatically load region-level variables + region_vars = read_terragrunt_config(find_in_parent_folders("region.hcl")) + + # Which AWS_PROFILE to use to perform the operations + profile = local.account_vars.locals.aws_profile + + # In which AWS region are operations being performed + region = local.region_vars.locals.aws_region + + # Tags applied to AWS objects created + application_tags = { + "Project Name" = local.project_name + "ProjectNumber" = local.project_number + "CostAllocation" = local.organization + "Organization" = local.organization + "Environment" = local.account_vars.locals.environment + } + + #################################################### + ## VPC Information + #################################################### + # Information about the VPC in which artifacts are being created + + vpc_name = "vpc3-lab-dev" + + #################################################### + ## EKS Configuration + #################################################### + + # The name of the EKS cluster + cluster_name = "platform-eng-eks-mcm" + + # The kubernetes version to use for the cluster + cluster_version = "1.30" + + # The domain in which the cluster is being built + domain = local.account_vars.locals.vpc_domain_name + + # The size of the disk for the worker nodes, in GB + # Loki claims 60 GB - 10g claims - 2 services each with 3 replicas + # Prometheus claims 10g + # Tempo claims 10g x 3 replicas + eks_instance_disk_size = 120 + + # The VPC name in which the cluster will operate + eks_vpc_name = local.vpc_name + + # The types of instances to use for the worker nodes + eks_instance_types = ["t3.xlarge", "m4.xlarge", "m5.xlarge"] + + # How many worker nodes are desired + eks_ng_desired_size = 1 + + # What is the maximum number of worker nodes allowed. + eks_ng_max_size = 10 + + # What is the minimum number of worker nodes allowed. + eks_ng_min_size = 1 + + # Namespace to use for operator installation + operators_namespace = "operators" + + #################################################### + ## Cloudwatch Agent Configuration for EKS + #################################################### + # Uses cluster_name and region + + # The namespace that cloudwatch-agent and fluentbit will be installed. + cw_namespace = "cloudwatch" + + # How long the logs will be maintained within cloudwatch before deletion. + cw_log_retention_days = "30" + + # Fluent Bit reads log files from the tail, and will capture only new + # logs after it is deployed. If you want the opposite, set + # fluent_bit_read_from_head='On' and it will collect all logs in the + # file system and set fluent_bit_read_from_tail='Off'. + cw_fluent_bit_read_from_head = "off" + cw_fluent_bit_read_from_tail = "on" + + # Fluent Bit can integrate with prometheus and serve metrics. If the + # metrics server is desired, set this to "on" else set it to "off" to + # disable the metrics server + cw_fluent_bit_http_server = "on" + + # When the metrics server is enabled, the port on which the server is to run. + cw_fluent_bit_http_port = "2020" + + #################################################### + ## cert-manager Configuration + #################################################### + # Uses cluster_name and region + + # The namespace that cert-manager will be installed. + cm_namespace = "cert-manager" + + # The name of the cluster issuer to install + cm_cluster_issuer_name = "clusterissuer" + + # How to issue certs: + # Intermediate CA - Request an intermediate CA from TCO and provide the + # name of the file here: + #cm_intermediate_ca_crt = "./certs/pki.adsd-cumulus-sandbox.dev.csp1.census.gov.bundle.crt" + #cm_intermediate_ca_key = "./certs/pki.adsd-cumulus-sandbox.dev.csp1.census.gov.key" + # + # ACM - Use aws-pca-issuer to request leaf certs from AWS ACM. + cm_acm = true + + #################################################### + ## Istio Configuration + #################################################### + # Uses cluster_name and region + + # The namespace that Istio will be installed. + istio_namespace = "istio-system" + + istio_enable_telemetry = true + + # Potentially allow istio to control traffic outbound from the cluster. + istio_enable_egressgateway = true + + #################################################### + ## Keycloak Configuration + #################################################### + # Uses cluster_name and region + + keycloak_enabled = true + + # The namespace that Keycloak will be installed. + keycloak_namespace = "keycloak" + + # The email address of the person considered the keycloak administrator + keycloak_admin_email = "robel.t.fesshaye@census.gov" + + # Details about the database keycloak should use to store configuration data + keycloak_db_vendor = "postgresql" + keycloak_db_address = "adsd-rds-mft-sbox.c2tx3ocukdth.us-gov-east-1.rds.amazonaws.com" + keycloak_db_port = "5432" + keycloak_db_database = "keycloak" + keycloak_db_username = "keycloak" + keycloak_db_password = "a-secret-password" + + #################################################### + ## log-trace-monitor configuration + #################################################### + log_trace_monitor_namespace = "default" + log_trace_monitor_create_namespace = false + + #################################################### + ## Kubernetes Dashboard configuration + #################################################### + # Uses cluster_name and region + + # The namespace that kubernetes dashboard will be installed. + kube_dashboard_namespace = "kube-dashboard" + + #################################################### + ## Metrics Server configuration + #################################################### + # Uses cluster_name and region + + # The namespace that metrics-server will be installed. + ms_namespace = "kube-system" + + +} diff --git a/project-x-infra-live/_envcommon/common-variables.hcl b/project-x-infra-live/_envcommon/common-variables.hcl new file mode 100644 index 0000000..c7252f3 --- /dev/null +++ b/project-x-infra-live/_envcommon/common-variables.hcl @@ -0,0 +1,11 @@ +# --------------------------------------------------------------------------------------------------------------------- +# GLOBAL PARAMETERS +# These are the variables we pass to use across modules regardless of environment, i.e. these are the parameters +# that are common across all environments/accounts. +# --------------------------------------------------------------------------------------------------------------------- +locals { + project_number = "fs0000000078" + project_name = "csvd_platformbaseline" + project_role = "csvd_platformbaseline_app" + organization = "census:ocio:csvd" +} \ No newline at end of file diff --git a/project-x-infra-live/development/account.hcl b/project-x-infra-live/development/account.hcl new file mode 100644 index 0000000..31ffcb3 --- /dev/null +++ b/project-x-infra-live/development/account.hcl @@ -0,0 +1,11 @@ +# Set account-wide variables. These are automatically pulled in to configure the remote state bucket in the root +# terragrunt.hcl configuration. Terragrunt often segments account and environment, but given our strategy is to +# leverage accounts as environment boundaries, there is an anticipated 1:1 account to environment model that +# combines these here. +locals { + account_name = "lab-dev-ew" + aws_account_id = "224384469011" + aws_profile = "224384469011-lab-dev-gov.inf-admin-t2" + environment = "development" + environment_abbr = "dev" +} \ No newline at end of file diff --git a/project-x-infra-live/development/us-gov-east-1/region.hcl b/project-x-infra-live/development/us-gov-east-1/region.hcl new file mode 100644 index 0000000..4adfaa3 --- /dev/null +++ b/project-x-infra-live/development/us-gov-east-1/region.hcl @@ -0,0 +1,5 @@ +# Set common variables for the region. This is automatically pulled in in the root terragrunt.hcl configuration to +# configure the remote state bucket and pass forward to the child modules as inputs. +locals { + aws_region = "us-gov-east-1" +} \ No newline at end of file diff --git a/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-2/eks-cert-manager/terragrunt.hcl b/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-2/eks-cert-manager/terragrunt.hcl new file mode 100644 index 0000000..1448ac8 --- /dev/null +++ b/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-2/eks-cert-manager/terragrunt.hcl @@ -0,0 +1,34 @@ +include "root" { + path = find_in_parent_folders() + expose = true +} + +terraform { + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-cert-mgr.git?ref=main" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20m"] + } +} + +dependency "eks" { + config_path = "../eks" + mock_outputs = { + cluster_name = "a-cluster-name" + oidc_provider_arn = "arn:aws-us-gov:iam::111111111111:oidc-provider/oidc.eks.us-gov-east-1.amazonaws.com/id/0000000000000000AAAAAAAAAAAAAAAA" + } +} + +inputs = { + cluster_name = dependency.eks.outputs.cluster_name + cluster_mailing_list = dependency.eks.inputs.creator + oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + cert_manager_helm_chart = "1.15.1" + cert_manager_cainjector_tag = "v1.15.1" + cert_manager_controller_tag = "v1.15.1" + cert_manager_startupapicheck_tag = "v1.15.1" + cert_manager_webhook_tag = "v1.15.1" + cluster_issuer_name = "cert-manager" +} diff --git a/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-2/eks-config/terragrunt.hcl b/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-2/eks-config/terragrunt.hcl new file mode 100644 index 0000000..84bb1ff --- /dev/null +++ b/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-2/eks-config/terragrunt.hcl @@ -0,0 +1,42 @@ +include "root" { + path = find_in_parent_folders() + expose = true +} + +# locals { +# tag_costallocation = "census:csvd:platformbaseline" +# } + +terraform { + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-eks-configuration.git?ref=main" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20m"] + } +} + +dependency "eks" { + config_path = "../eks" + mock_outputs = { + vpc_id = "a-vpc-id" + cluster_name = "a-cluster-name" + subnets = ["subnet-00000000000000001", "subnet-00000000000000002", "subnet-00000000000000003", ] + security_group_all_worker_mgmt_id = "sg-00b0000000000000" + eks_managed_node_groups_autoscaling_group_names = ["eks-eks-a-cluster-name-node_group-0000000000000000000000000-5ac8a5e3-14dd-c043-2cc9-f4b6ffb36d32"] + oidc_provider_arn = "arn:aws-us-gov:iam::111111111111:oidc-provider/oidc.eks.us-gov-east-1.amazonaws.com/id/0000000000000000AAAAAAAAAAAAAAAA" + } +} + +inputs = { + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + vpc_id = dependency.eks.outputs.vpc_id + cluster_name = dependency.eks.outputs.cluster_name + subnets = dependency.eks.outputs.subnets + security_group_all_worker_mgmt_id = dependency.eks.outputs.security_group_all_worker_mgmt_id + eks_managed_node_groups_autoscaling_group_names = dependency.eks.outputs.eks_managed_node_groups_autoscaling_group_names + oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn + # tags = dependency.eks.inputs.tags + # tag_costallocation = local.tag_costallocation + # cluster_autoscaler_role_name = dependency.eks.outputs.cluster_autoscaler_role_name +} diff --git a/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-2/eks-grafana/terragrunt.hcl b/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-2/eks-grafana/terragrunt.hcl new file mode 100644 index 0000000..c2172e8 --- /dev/null +++ b/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-2/eks-grafana/terragrunt.hcl @@ -0,0 +1,38 @@ +include "root" { + path = find_in_parent_folders() + expose = true +} + +terraform { + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-grafana.git" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20m"] + } +} + +dependency "eks" { + config_path = "../eks" + mock_outputs = { + cluster_name = "a-cluster-name" + } +} +dependency "eks-loki" { + config_path = "../eks-loki" + mock_outputs = { + rwo_storage_class = "gp3-encrypted" + } +} +# dependency "eks-tempo" { +# config_path = "../eks-tempo" +# skip_outputs = true +# } + +inputs = { + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + cluster_name = dependency.eks.outputs.cluster_name + cluster_domain = dependency.eks.inputs.vpc_domain_name + rwo_storage_class = dependency.eks-loki.outputs.rwo_storage_class + # datasources = dependency.eks-loki.outputs.gateway_internal_endpoint +} diff --git a/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-2/eks-istio/terragrunt.hcl b/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-2/eks-istio/terragrunt.hcl new file mode 100644 index 0000000..5a30c0e --- /dev/null +++ b/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-2/eks-istio/terragrunt.hcl @@ -0,0 +1,31 @@ +include "root" { + path = find_in_parent_folders() + expose = true +} + +terraform { + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-istio.git?ref=main" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20m"] + } +} + +dependency "eks" { + config_path = "../eks" + mock_outputs = { + cluster_name = "a-cluster-name" + } +} +dependency "eks-karpenter" { + config_path = "../eks-karpenter" + skip_outputs = true +} + +inputs = { + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + cluster_name = dependency.eks.outputs.cluster_name + istio_chart_version = "1.22.1" + istio_version = "1.22.1" +} diff --git a/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-2/eks-karpenter/terragrunt.hcl b/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-2/eks-karpenter/terragrunt.hcl new file mode 100644 index 0000000..982e1d7 --- /dev/null +++ b/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-2/eks-karpenter/terragrunt.hcl @@ -0,0 +1,38 @@ +include "root" { + path = find_in_parent_folders() + expose = true +} + +terraform { + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-karpenter.git?ref=main" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20m"] + } +} + +dependency "eks" { + config_path = "../eks" + mock_outputs = { + cluster_endpoint = "https://0000000000000000AAAAAAAAAAAAAAAA.sk1.us-gov-east-1.eks.amazonaws.com" + cluster_name = "a-cluster-name" + node_group_name = "node_group_a-cluster-name" + oidc_provider_arn = "arn:aws-us-gov:iam::111111111111:oidc-provider/oidc.eks.us-gov-east-1.amazonaws.com/id/0000000000000000AAAAAAAAAAAAAAAA" + vpc_id = "a-vpc-name" + } +} + +dependency "eks-config" { + config_path = "../eks-config" + skip_outputs = true +} + +inputs = { + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + cluster_endpoint = dependency.eks.outputs.cluster_endpoint + cluster_name = dependency.eks.outputs.cluster_name + karpenter_node_group_name = dependency.eks.outputs.node_group_name + oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn + vpc_id = dependency.eks.outputs.vpc_id +} diff --git a/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-2/eks-kiali.disable/terragrunt.hcl.disable b/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-2/eks-kiali.disable/terragrunt.hcl.disable new file mode 100644 index 0000000..63e88a4 --- /dev/null +++ b/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-2/eks-kiali.disable/terragrunt.hcl.disable @@ -0,0 +1,76 @@ +include "root" { + path = find_in_parent_folders() + expose = true +} + +terraform { + # source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-kiali.git?ref=mcmCluster" + source = "../../../../../../../tfmod-kiali" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20m"] + } +} + +dependency "eks" { + config_path = "../eks" + mock_outputs = { + cluster_name = "a-cluster-name" + } +} +dependency "eks-cert-manager" { + config_path = "../eks-cert-manager" + mock_outputs = { + cluster_issuer_name = "acmpca-clusterissuer" + } +} +dependency "eks-prometheus" { + config_path = "../eks-prometheus" + mock_outputs = { + prometheus_server_internal_endpoint = { + hostname = "prometheus-server.prometheus.svc.cluster.local" + port_number = 9090 + url = "http://prometheus-server.prometheus.svc.cluster.local:9090/" + } + } +} +dependency "eks-grafana" { + config_path = "../eks-grafana" + mock_outputs = { + internal_endpoint = { + hostname = "grafana.grafana.svc.cluster.local" + port_number = "80" + url = "https://grafana.grafana.svc.cluster.local:80/" + } + namespace = "grafana" + public_endpoint = { + hostname = "grafana.dev.lab.csp2.census.gov" + port_number = "80" + url = "https://grafana.dev.lab.csp2.census.gov:80/" + } + secret_name = "grafana" + } +} + +inputs = { + profile = include.root.inputs.aws_profile + cluster_domain = dependency.eks.inputs.vpc_domain_name + operators_namespace = dependency.eks.inputs.operators_ns + cluster_name = dependency.eks.outputs.cluster_name + certificate_issuer = dependency.eks-cert-manager.outputs.cluster_issuer_name + prometheus_internal_url = dependency.eks-prometheus.outputs.prometheus_server_internal_endpoint.url + grafana_internal_url = dependency.eks-grafana.outputs.internal_endpoint.url + grafana_namespace = dependency.eks-grafana.outputs.namespace + grafana_public_url = dependency.eks-grafana.outputs.public_endpoint.url + grafana_secret_name = dependency.eks-grafana.outputs.secret_name + jaeger_internal_url = "" + + + # client_id = var.sso_client_id + # client_secret = var.sso_client_secret + # keycloak_public_url = var.keycloak_public_url + # gogatekeeper_chart_version = var.gogatekeeper_chart_version + # gogatekeeper_registry = var.gogatekeeper_registry + # gogatekeeper_repository = var.gogatekeeper_repository + # gogatekeeper_tag = var.gogatekeeper_tag +} diff --git a/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-2/eks-loki/terragrunt.hcl b/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-2/eks-loki/terragrunt.hcl new file mode 100644 index 0000000..cc94f7f --- /dev/null +++ b/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-2/eks-loki/terragrunt.hcl @@ -0,0 +1,31 @@ +include "root" { + path = find_in_parent_folders() + expose = true +} + +terraform { + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-loki.git?ref=main" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20m"] + } +} + +dependency "eks" { + config_path = "../eks" + mock_outputs = { + cluster_name = "a-cluster-name" + oidc_provider_arn = "arn:aws-us-gov:iam::111111111111:oidc-provider/oidc.eks.us-gov-east-1.amazonaws.com/id/0000000000000000AAAAAAAAAAAAAAAA" + } +} +dependency "eks-istio" { + config_path = "../eks-istio" + skip_outputs = true +} + +inputs = { + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + cluster_name = dependency.eks.outputs.cluster_name + oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn +} diff --git a/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-2/eks-metrics-server/terragrunt.hcl b/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-2/eks-metrics-server/terragrunt.hcl new file mode 100644 index 0000000..5414a72 --- /dev/null +++ b/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-2/eks-metrics-server/terragrunt.hcl @@ -0,0 +1,25 @@ +include "root" { + path = find_in_parent_folders() + expose = true +} + +terraform { + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-metrics-server.git?ref=main" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20m"] + } +} + +dependency "eks" { + config_path = "../eks" + mock_outputs = { + cluster_name = "a-cluster-name" + } +} + +inputs = { + profile = include.root.inputs.aws_profile + cluster_name = dependency.eks.outputs.cluster_name + region = include.root.inputs.aws_region +} diff --git a/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-2/eks-prometheus/README.md b/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-2/eks-prometheus/README.md new file mode 100644 index 0000000..bbbffb2 --- /dev/null +++ b/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-2/eks-prometheus/README.md @@ -0,0 +1,198 @@ +## eks-prometheus +This module deploys EKS kubeenetes prometheus inside existing EKS cluster. Prometheus is an open-source systems monitoring and alerting tool. +This module consisits of 4 components. It creates prometheus namespace and copies image repositories for the following components from quay.io into local account ECR repository. It deploys these components using helm charts using the configured ECR repositories. + 1. prometheus-alert-manager + 2. prometheus-node-exporter + 3. prometheus-pushgateway + 4. prometheus-server + +### Dependencies +This module is dependent on EKS module (eks). The cluster should exist already for this module to work. + +### Inputs + cluster_name + profile + prometheus_chart_version + prometheus_server_tag + prometheus_config_reloader_tag + alertmanager_tag + kube_state_metrics_tag + node_exporter_tag + pushgateway_tag + rwo_storage_class + +### Outputs + alertmanager_internal_endpoint + alertmanager_headless_internal_endpoint + pushgateway_internal_endpoint + prometheus_server_internal_endpoint + +### Issues observed/fixed +1. The rwo_storage_class value had to be updated from "gp3" to "gp3-encrypted" +2. The node_exporter_tag value had to be updated from "1.6.1" to "v1.8.1" +3. The kube_state_metrics_tag value had to be updated from "2.10.0" to "v2.6.0" +4. The alertmanager_tag value had to be updated from +5. The helm chart set config for the ecr image had to be split into 2 components, one for registry and other for repository as an example mentioned below: + + ``` + set { + name = "kube-state-metrics.image.registry" + value = module.images.images[local.ksm_key].dest_registry + } + set { + name = "kube-state-metrics.image.repository" + value = module.images.images[local.ksm_key].dest_repository + } + ``` + +6. In some other cases the image ecr repository had to be split by the colon separatory (:) + + ``` + set { + name = "alertmanager.configmapReload.image.repository" + value = split(":", module.images.images[local.prom_config_reload_key].dest_full_path)[0] + } + ``` + +### Chart Notes + 1. Get the application URL by running these commands: + + ```bash + export POD_NAME=$(kubectl get pods --namespace prometheus -l "app.kubernetes.io/name=prometheus-pushgateway,app.kubernetes.io/instance=prometheus" -o jsonpath="{.items[0].metadata.name}") + kubectl port-forward $POD_NAME 9091 + echo "Visit http://127.0.0.1:9091 to use your application" + ``` + + The Prometheus server can be accessed via port 80 on the following DNS name from within your cluster: + prometheus-server.prometheus.svc.cluster.local + + + Get the Prometheus server URL by running these commands in the same shell: + + ```bash + export POD_NAME=$(kubectl get pods --namespace prometheus -l "app.kubernetes.io/name=prometheus,app.kubernetes.io/instance=prometheus" -o jsonpath="{.items[0].metadata.name}") + kubectl --namespace prometheus port-forward $POD_NAME 9090 + ``` + + The Prometheus alertmanager can be accessed via port 9093 on the following DNS name from within your cluster: + `prometheus-alertmanager.prometheus.svc.cluster.local` + + + Get the Alertmanager URL by running these commands in the same shell: + + ```bash + export POD_NAME=$(kubectl get pods --namespace prometheus -l "app.kubernetes.io/name=alertmanager,app.kubernetes.io/instance=prometheus" -o jsonpath="{.items[0].metadata.name}") + kubectl --namespace prometheus port-forward $POD_NAME 9093 + ``` + + ################################################################################# + ###### WARNING: Pod Security Policy has been disabled by default since ##### + ###### it deprecated after k8s 1.25+. use ##### + ###### (index .Values "prometheus-node-exporter" "rbac" ##### + ###### "pspEnabled") with (index .Values ##### + ###### "prometheus-node-exporter" "rbac" "pspAnnotations") ##### + ###### in case you still need it. ##### + ################################################################################# + + + The Prometheus PushGateway can be accessed via port 9091 on the following DNS name from within your cluster: + `prometheus-prometheus-pushgateway.prometheus.svc.cluster.local` + + + Get the PushGateway URL by running these commands in the same shell: + + ```bash + export POD_NAME=$(kubectl get pods --namespace prometheus -l "app=prometheus-pushgateway,component=pushgateway" -o jsonpath="{.items[0].metadata.name}") + kubectl --namespace prometheus port-forward $POD_NAME 9091 + ``` + + For more information on running Prometheus, visit: + https://prometheus.io/ + + kube-state-metrics is a simple service that listens to the Kubernetes API server and generates metrics about the state of the objects. + The exposed metrics can be found here: + https://github.com/kubernetes/kube-state-metrics/blob/master/docs/README.md#exposed-metrics + + The metrics are exported on the HTTP endpoint /metrics on the listening port. + In your case, `prometheus-kube-state-metrics.prometheus.svc.cluster.local:8080/metrics` + + They are served either as plaintext or protobuf depending on the Accept header. + They are designed to be consumed either by Prometheus itself or by a scraper that is compatible with scraping a Prometheus client endpoint. + + 1. Get the application URL by running these commands: + + ```bash + export POD_NAME=$(kubectl get pods --namespace prometheus -l "app.kubernetes.io/name=alertmanager,app.kubernetes.io/instance=prometheus" -o jsonpath="{.items[0].metadata.name}") + echo "Visit http://127.0.0.1:9093 to use your application" + kubectl --namespace prometheus port-forward $POD_NAME 9093:80 + ``` + + 1. Get the application URL by running these commands: + + ```bash + export POD_NAME=$(kubectl get pods --namespace prometheus -l "app.kubernetes.io/name=prometheus-node-exporter,app.kubernetes.io/instance=prometheus" -o jsonpath="{.items[0].metadata.name}") + echo "Visit http://127.0.0.1:9100 to use your application" + kubectl port-forward --namespace prometheus $POD_NAME 9100 + ``` + + +## Requirements + +| Name | Version | +|------|---------| +| [terraform](#requirement\_terraform) | >= 0.13 | +| [aws](#requirement\_aws) | >= 5.14.0 | +| [helm](#requirement\_helm) | >= 2.11.0 | +| [kubernetes](#requirement\_kubernetes) | >= 2.23.0 | +| [null](#requirement\_null) | >= 3.2.1 | + +## Providers + +| Name | Version | +|------|---------| +| [helm](#provider\_helm) | >= 2.11.0 | +| [kubernetes](#provider\_kubernetes) | >= 2.23.0 | + +## Modules + +| Name | Source | Version | +|------|--------|---------| +| [images](#module\_images) | git@github.e.it.census.gov:terraform-modules/aws-ecr-copy-images.git/ | tf-upgrade | + +## Resources + +| Name | Type | +|------|------| +| [helm_release.prometheus](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource | +| [kubernetes_namespace.ns](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/namespace) | resource | +| [kubernetes_namespace.existing-ns](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/data-sources/namespace) | data source | + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:--------:| +| [alertmanager\_tag](#input\_alertmanager\_tag) | The image tag of the alertmanager image. | `string` | `"v0.27.0"` | no | +| [cluster\_name](#input\_cluster\_name) | The name of the cluster into which prometheus will be installed. | `string` | n/a | yes | +| [create\_namespace](#input\_create\_namespace) | Indicates whether the `namespace` needs to be created ('true') or already exists (not `true`) | `bool` | `true` | no | +| [kube\_state\_metrics\_tag](#input\_kube\_state\_metrics\_tag) | The image tag of the kube-state-metrics image. | `string` | `"v2.13.0"` | no | +| [namespace](#input\_namespace) | The namespace to install the prometheus components. Defaults to 'prometheus' | `string` | `"prometheus"` | no | +| [node\_exporter\_tag](#input\_node\_exporter\_tag) | The image tag of the node-exporter image. | `string` | `"v1.8.2"` | no | +| [profile](#input\_profile) | AWS\_PROFILE to use to apply the terraform script. | `string` | `""` | no | +| [prometheus\_chart\_version](#input\_prometheus\_chart\_version) | The version of prometheus to install into the cluster. | `string` | `"25.24.1"` | no | +| [prometheus\_config\_reloader\_tag](#input\_prometheus\_config\_reloader\_tag) | The image tag of the prometheus-config-reloader image. | `string` | `"v0.75.1"` | no | +| [prometheus\_server\_tag](#input\_prometheus\_server\_tag) | The image tag of prometheus server to install into the cluster. | `string` | `"v2.53.1"` | no | +| [pushgateway\_tag](#input\_pushgateway\_tag) | The image tag of the pushgateway image. | `string` | `"v1.9.0"` | no | +| [rwo\_storage\_class](#input\_rwo\_storage\_class) | Specify the storage class for read/write/once persistent volumes. | `string` | `"gp3-encrypted"` | no | + +## Outputs + +| Name | Description | +|------|-------------| +| [alertmanager\_headless\_internal\_endpoint](#output\_alertmanager\_headless\_internal\_endpoint) | n/a | +| [alertmanager\_internal\_endpoint](#output\_alertmanager\_internal\_endpoint) | n/a | +| [module\_name](#output\_module\_name) | The name of this module. | +| [module\_version](#output\_module\_version) | The version of this module. | +| [prometheus\_namespace](#output\_prometheus\_namespace) | n/a | +| [prometheus\_server\_internal\_endpoint](#output\_prometheus\_server\_internal\_endpoint) | n/a | +| [pushgateway\_internal\_endpoint](#output\_pushgateway\_internal\_endpoint) | n/a | + diff --git a/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-2/eks-prometheus/terragrunt.hcl b/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-2/eks-prometheus/terragrunt.hcl new file mode 100644 index 0000000..62611b1 --- /dev/null +++ b/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-2/eks-prometheus/terragrunt.hcl @@ -0,0 +1,30 @@ +include "root" { + path = find_in_parent_folders() + expose = true +} + +terraform { + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-prometheus.git?ref=main" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20m"] + } +} + +dependency "eks" { + config_path = "../eks" + mock_outputs = { + cluster_name = "a-cluster-name" + } +} + +dependency "eks-istio" { + config_path = "../eks-istio" + skip_outputs = true +} + +inputs = { + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + cluster_name = dependency.eks.outputs.cluster_name +} diff --git a/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-2/eks-tempo/terragrunt.hcl b/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-2/eks-tempo/terragrunt.hcl new file mode 100644 index 0000000..02fad53 --- /dev/null +++ b/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-2/eks-tempo/terragrunt.hcl @@ -0,0 +1,41 @@ +include "root" { + path = find_in_parent_folders() + expose = true +} + +terraform { + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-tempo.git?ref=main" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20m"] + } +} + +dependency "eks" { + config_path = "../eks" + mock_outputs = { + cluster_name = "a-cluster-name" + oidc_provider_arn = "arn:aws-us-gov:iam::111111111111:oidc-provider/oidc.eks.us-gov-east-1.amazonaws.com/id/0000000000000000AAAAAAAAAAAAAAAA" + } +} + +dependency "eks-prometheus" { + config_path = "../eks-prometheus" + mock_outputs = { + prometheus_server_internal_endpoint = { + hostname = "prometheus-server.prometheus.svc.cluster.local" + port_number = 9090 + url = "http://prometheus-server.prometheus.svc.cluster.local:9090/" + } + prometheus_namespace = "prometheus" + } +} + +inputs = { + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + cluster_name = dependency.eks.outputs.cluster_name + oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn + prometheus_port = dependency.eks-prometheus.outputs.prometheus_server_internal_endpoint.port_number + prometheus_namespace = dependency.eks-prometheus.outputs.prometheus_namespace +} diff --git a/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-2/eks/terragrunt.hcl b/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-2/eks/terragrunt.hcl new file mode 100644 index 0000000..bb2fd63 --- /dev/null +++ b/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-2/eks/terragrunt.hcl @@ -0,0 +1,64 @@ +include "root" { + path = find_in_parent_folders() + expose = true +} + +locals { + # Set cluster/platform specific variables, or extract from the hierarchy. + account_id = include.root.inputs.aws_account_id + vpc_name = include.root.inputs.vpc_name + cluster_name = "platform-test-2" + cluster_version = "1.30" + vpc_domain_name = include.root.inputs.vpc_domain_name + eks_instance_disk_size = 100 + eks_vpc_name = include.root.inputs.vpc_name + eks_ng_desired_size = 2 + eks_ng_max_size = 10 + eks_ng_min_size = 2 + operators_ns = "operators" + enable_cluster_creator_admin_permissions = true + cluster_endpoint_public_access = true + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + cluster_mailing_list = "luther.coleman.mcginty@census.gov" + environment_abbr = include.root.inputs.environment_abbr + + # Tags applied to AWS objects created + tags = { + "Environment" = local.environment_abbr + "slim:schedule" = "8:00-17:00" + "cluster:size" = "min:${local.eks_ng_min_size}-max:${local.eks_ng_max_size}-desired:${local.eks_ng_desired_size}" + } + +} + +terraform { + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-eks.git?ref=main" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20m"] + } +} + +inputs = { + aws_account_id = local.account_id + profile = local.profile + vpc_name = local.eks_vpc_name + cluster_name = local.cluster_name + cluster_version = local.cluster_version + eks_instance_disk_size = local.eks_instance_disk_size + eks_vpc_name = local.eks_vpc_name + # eks_instance_types = local.eks_instance_types + eks_ng_desired_size = local.eks_ng_desired_size + eks_ng_max_size = local.eks_ng_max_size + eks_ng_min_size = local.eks_ng_min_size + operators_ns = local.operators_ns + enable_cluster_creator_admin_permissions = local.enable_cluster_creator_admin_permissions + cluster_endpoint_public_access = local.cluster_endpoint_public_access + tags = local.tags + vcp_domain_name = local.vpc_domain_name + region = local.region + creator = local.cluster_mailing_list + os_username = local.cluster_mailing_list + shared_vpc_label = local.environment_abbr +} diff --git a/project-x-infra-live/development/us-gov-east-1/vpc/vpc.hcl b/project-x-infra-live/development/us-gov-east-1/vpc/vpc.hcl new file mode 100644 index 0000000..907ce6d --- /dev/null +++ b/project-x-infra-live/development/us-gov-east-1/vpc/vpc.hcl @@ -0,0 +1,6 @@ +# Set VPC specific variables. These are automatically pulled in to configure the remote state bucket in the root +# terragrunt.hcl configuration. +locals { + vpc_name = "vpc3-lab-dev" + vpc_domain_name = "dev.lab.csp2.census.gov" +} \ No newline at end of file diff --git a/project-x-infra-live/terragrunt.hcl b/project-x-infra-live/terragrunt.hcl new file mode 100644 index 0000000..abd1da7 --- /dev/null +++ b/project-x-infra-live/terragrunt.hcl @@ -0,0 +1,111 @@ +# --------------------------------------------------------------------------------------------------------------------- +# TERRAGRUNT CONFIGURATION +# Terragrunt is a thin wrapper for Terraform that provides extra tools for working with multiple Terraform modules, +# remote state, and locking: https://github.com/gruntwork-io/terragrunt +# --------------------------------------------------------------------------------------------------------------------- + +locals { + # Automatically load _envcommon, cross account and environment common variables + common_vars = read_terragrunt_config("${dirname(find_in_parent_folders())}/_envcommon/common-variables.hcl", "skip-account-if-does-not-exist") + + # Automatically load account-level variables (NOTE: In our environment account = environment so there is not separate environment layer) + account_vars = read_terragrunt_config(find_in_parent_folders("account.hcl")) + + # Automatically load region-level variables + # region_vars = read_terragrunt_config(find_in_parent_folders("region.hcl", find_in_parent_folders("empty.hcl"))) + region_vars = read_terragrunt_config(find_in_parent_folders("region.hcl", "skip-account-if-does-not-exist")) + + # Automatically load vpc-level variables + vpc_vars = read_terragrunt_config(find_in_parent_folders("vpc.hcl", "skip-account-if-does-not-exist")) +# vpc_vars = read_terragrunt_config(find_in_parent_folders("vpc.hcl", "skip-account-if-does-not-exist")) + + # Extract the variables we need for easy access + account_name = local.account_vars.locals.account_name + account_id = local.account_vars.locals.aws_account_id + aws_profile = local.account_vars.locals.aws_profile + aws_region = local.region_vars.locals.aws_region + organization = local.common_vars.locals.organization + project_number = local.common_vars.locals.project_number + project_name = local.common_vars.locals.project_name + project_role = local.common_vars.locals.project_role +} + +# Generate an AWS provider block +generate "provider" { + path = "provider.tf" + if_exists = "overwrite_terragrunt" + contents = < Date: Fri, 13 Sep 2024 16:08:30 -0400 Subject: [PATCH 2/7] Latest full plan execution --- project-x-infra-live/terragrunt.hcl | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/project-x-infra-live/terragrunt.hcl b/project-x-infra-live/terragrunt.hcl index abd1da7..fd1b88f 100644 --- a/project-x-infra-live/terragrunt.hcl +++ b/project-x-infra-live/terragrunt.hcl @@ -95,6 +95,27 @@ remote_state { } } +# remote_state { +# backend = "s3" +# generate = { +# path = "remote_state.backend.tf" +# if_exists = "overwrite_terragrunt" +# } +# config = { +# #--- +# # "${local.account_name}": "${get_path_from_repo_root()}" +# #--- +# bucket = "${local.state_bucket_prefix}-${local.account_id}" +# profile = "${local.profile}" +# key = "${trimprefix(replace(run_cmd("realpath", get_original_terragrunt_dir()), dirname(get_repo_root()), ""), "/")}" +# region = "${local.region}" +# dynamodb_table = "${local.state_table_name}" +# skip_bucket_root_access = true +# skip_bucket_ssencryption = true +# skip_bucket_enforced_tls = true +# } +# } + # --------------------------------------------------------------------------------------------------------------------- # GLOBAL PARAMETERS # These variables apply to all configurations in this subfolder. These are automatically merged into the child From f45c8a8df8005c3ea1f037f3b42ba6779a2c2098 Mon Sep 17 00:00:00 2001 From: mcgin314 Date: Fri, 13 Sep 2024 16:17:39 -0400 Subject: [PATCH 3/7] Minor adjustments for Youssef --- project-x-infra-live/development/account.hcl | 2 +- .../us-gov-east-1/vpc/platform-test-2/eks/terragrunt.hcl | 4 ++-- project-x-infra-live/terragrunt.hcl | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/project-x-infra-live/development/account.hcl b/project-x-infra-live/development/account.hcl index 31ffcb3..1992080 100644 --- a/project-x-infra-live/development/account.hcl +++ b/project-x-infra-live/development/account.hcl @@ -5,7 +5,7 @@ locals { account_name = "lab-dev-ew" aws_account_id = "224384469011" - aws_profile = "224384469011-lab-dev-gov.inf-admin-t2" + aws_profile = "224384469011-lab-dev-gov" environment = "development" environment_abbr = "dev" } \ No newline at end of file diff --git a/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-2/eks/terragrunt.hcl b/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-2/eks/terragrunt.hcl index bb2fd63..13a37b8 100644 --- a/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-2/eks/terragrunt.hcl +++ b/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-2/eks/terragrunt.hcl @@ -7,7 +7,7 @@ locals { # Set cluster/platform specific variables, or extract from the hierarchy. account_id = include.root.inputs.aws_account_id vpc_name = include.root.inputs.vpc_name - cluster_name = "platform-test-2" + cluster_name = "platform-test-cicd" cluster_version = "1.30" vpc_domain_name = include.root.inputs.vpc_domain_name eks_instance_disk_size = 100 @@ -20,7 +20,7 @@ locals { cluster_endpoint_public_access = true profile = include.root.inputs.aws_profile region = include.root.inputs.aws_region - cluster_mailing_list = "luther.coleman.mcginty@census.gov" + cluster_mailing_list = "ahmed.m.youssef@census.gov" environment_abbr = include.root.inputs.environment_abbr # Tags applied to AWS objects created diff --git a/project-x-infra-live/terragrunt.hcl b/project-x-infra-live/terragrunt.hcl index fd1b88f..af3abb0 100644 --- a/project-x-infra-live/terragrunt.hcl +++ b/project-x-infra-live/terragrunt.hcl @@ -80,7 +80,7 @@ remote_state { backend = "s3" config = { encrypt = true - bucket = "${get_env("TG_BUCKET_PREFIX", "")}tg-infrastructure-tf-state-${local.account_name}-us-gov-east-1" + bucket = "${get_env("TG_BUCKET_PREFIX", "")}cicd-tg-infrastructure-tf-state-${local.account_name}-us-gov-east-1" //bucket = "inf-tfstate-224384469011" key = "${path_relative_to_include()}/terraform.tfstate" //key = "${trimprefix(replace(run_cmd("realpath",get_original_terragrunt_dir()),dirname(get_repo_root()),""),"/")}" From 81a5db71b78dab1f5223fb29ab2fda7170052509 Mon Sep 17 00:00:00 2001 From: Luther Coleman McGinty Date: Mon, 16 Sep 2024 12:40:49 -0400 Subject: [PATCH 4/7] Update terragrunt.hcl Try to fix lock collision --- project-x-infra-live/terragrunt.hcl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/project-x-infra-live/terragrunt.hcl b/project-x-infra-live/terragrunt.hcl index af3abb0..1a347f0 100644 --- a/project-x-infra-live/terragrunt.hcl +++ b/project-x-infra-live/terragrunt.hcl @@ -86,7 +86,7 @@ remote_state { //key = "${trimprefix(replace(run_cmd("realpath",get_original_terragrunt_dir()),dirname(get_repo_root()),""),"/")}" region = "us-gov-east-1" //dynamodb_table = "tf_remote_state" - dynamodb_table = "terraform-locks" + dynamodb_table = "terraform-locks-cidcd" // REVERT DON'S CHANGES TEMPORARILY WHILE FINISHING TESTING } generate = { @@ -129,4 +129,4 @@ inputs = merge( local.account_vars.locals, local.region_vars.locals, local.vpc_vars.locals, -) \ No newline at end of file +) From 61fdc20f3beb2ce8e4d5411533a0acb8a8a743b5 Mon Sep 17 00:00:00 2001 From: mcgin314 Date: Tue, 17 Sep 2024 11:20:27 -0400 Subject: [PATCH 5/7] Add _mcmCluster --- project-x-infra-live/development/account.hcl | 2 +- .../eks-cert-manager/terragrunt.hcl | 34 +++ .../vpc/_mcmCluster/eks-config/terragrunt.hcl | 42 ++++ .../_mcmCluster/eks-grafana/terragrunt.hcl | 38 ++++ .../vpc/_mcmCluster/eks-istio/terragrunt.hcl | 31 +++ .../_mcmCluster/eks-karpenter/terragrunt.hcl | 38 ++++ .../eks-kiali.disable/terragrunt.hcl.disable | 77 +++++++ .../vpc/_mcmCluster/eks-loki/terragrunt.hcl | 31 +++ .../eks-metrics-server/terragrunt.hcl | 25 +++ .../vpc/_mcmCluster/eks-prometheus/README.md | 198 ++++++++++++++++++ .../_mcmCluster/eks-prometheus/terragrunt.hcl | 30 +++ .../vpc/_mcmCluster/eks-tempo/terragrunt.hcl | 41 ++++ .../vpc/_mcmCluster/eks/terragrunt.hcl | 65 ++++++ .../eks-cert-manager/terragrunt.hcl | 34 +++ .../platform-test-x/eks-config/terragrunt.hcl | 42 ++++ .../eks-grafana/terragrunt.hcl | 38 ++++ .../platform-test-x/eks-istio/terragrunt.hcl | 31 +++ .../eks-karpenter/terragrunt.hcl | 38 ++++ .../eks-kiali.disable/terragrunt.hcl.disable | 77 +++++++ .../platform-test-x/eks-loki/terragrunt.hcl | 31 +++ .../eks-metrics-server/terragrunt.hcl | 25 +++ .../platform-test-x/eks-prometheus/README.md | 198 ++++++++++++++++++ .../eks-prometheus/terragrunt.hcl | 30 +++ .../platform-test-x/eks-tempo/terragrunt.hcl | 41 ++++ .../vpc/platform-test-x/eks/terragrunt.hcl | 65 ++++++ project-x-infra-live/terragrunt.hcl | 2 +- 26 files changed, 1302 insertions(+), 2 deletions(-) create mode 100644 project-x-infra-live/development/us-gov-east-1/vpc/_mcmCluster/eks-cert-manager/terragrunt.hcl create mode 100644 project-x-infra-live/development/us-gov-east-1/vpc/_mcmCluster/eks-config/terragrunt.hcl create mode 100644 project-x-infra-live/development/us-gov-east-1/vpc/_mcmCluster/eks-grafana/terragrunt.hcl create mode 100644 project-x-infra-live/development/us-gov-east-1/vpc/_mcmCluster/eks-istio/terragrunt.hcl create mode 100644 project-x-infra-live/development/us-gov-east-1/vpc/_mcmCluster/eks-karpenter/terragrunt.hcl create mode 100644 project-x-infra-live/development/us-gov-east-1/vpc/_mcmCluster/eks-kiali.disable/terragrunt.hcl.disable create mode 100644 project-x-infra-live/development/us-gov-east-1/vpc/_mcmCluster/eks-loki/terragrunt.hcl create mode 100644 project-x-infra-live/development/us-gov-east-1/vpc/_mcmCluster/eks-metrics-server/terragrunt.hcl create mode 100644 project-x-infra-live/development/us-gov-east-1/vpc/_mcmCluster/eks-prometheus/README.md create mode 100644 project-x-infra-live/development/us-gov-east-1/vpc/_mcmCluster/eks-prometheus/terragrunt.hcl create mode 100644 project-x-infra-live/development/us-gov-east-1/vpc/_mcmCluster/eks-tempo/terragrunt.hcl create mode 100644 project-x-infra-live/development/us-gov-east-1/vpc/_mcmCluster/eks/terragrunt.hcl create mode 100644 project-x-infra-live/development/us-gov-east-1/vpc/platform-test-x/eks-cert-manager/terragrunt.hcl create mode 100644 project-x-infra-live/development/us-gov-east-1/vpc/platform-test-x/eks-config/terragrunt.hcl create mode 100644 project-x-infra-live/development/us-gov-east-1/vpc/platform-test-x/eks-grafana/terragrunt.hcl create mode 100644 project-x-infra-live/development/us-gov-east-1/vpc/platform-test-x/eks-istio/terragrunt.hcl create mode 100644 project-x-infra-live/development/us-gov-east-1/vpc/platform-test-x/eks-karpenter/terragrunt.hcl create mode 100644 project-x-infra-live/development/us-gov-east-1/vpc/platform-test-x/eks-kiali.disable/terragrunt.hcl.disable create mode 100644 project-x-infra-live/development/us-gov-east-1/vpc/platform-test-x/eks-loki/terragrunt.hcl create mode 100644 project-x-infra-live/development/us-gov-east-1/vpc/platform-test-x/eks-metrics-server/terragrunt.hcl create mode 100644 project-x-infra-live/development/us-gov-east-1/vpc/platform-test-x/eks-prometheus/README.md create mode 100644 project-x-infra-live/development/us-gov-east-1/vpc/platform-test-x/eks-prometheus/terragrunt.hcl create mode 100644 project-x-infra-live/development/us-gov-east-1/vpc/platform-test-x/eks-tempo/terragrunt.hcl create mode 100644 project-x-infra-live/development/us-gov-east-1/vpc/platform-test-x/eks/terragrunt.hcl diff --git a/project-x-infra-live/development/account.hcl b/project-x-infra-live/development/account.hcl index 1992080..31ffcb3 100644 --- a/project-x-infra-live/development/account.hcl +++ b/project-x-infra-live/development/account.hcl @@ -5,7 +5,7 @@ locals { account_name = "lab-dev-ew" aws_account_id = "224384469011" - aws_profile = "224384469011-lab-dev-gov" + aws_profile = "224384469011-lab-dev-gov.inf-admin-t2" environment = "development" environment_abbr = "dev" } \ No newline at end of file diff --git a/project-x-infra-live/development/us-gov-east-1/vpc/_mcmCluster/eks-cert-manager/terragrunt.hcl b/project-x-infra-live/development/us-gov-east-1/vpc/_mcmCluster/eks-cert-manager/terragrunt.hcl new file mode 100644 index 0000000..1448ac8 --- /dev/null +++ b/project-x-infra-live/development/us-gov-east-1/vpc/_mcmCluster/eks-cert-manager/terragrunt.hcl @@ -0,0 +1,34 @@ +include "root" { + path = find_in_parent_folders() + expose = true +} + +terraform { + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-cert-mgr.git?ref=main" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20m"] + } +} + +dependency "eks" { + config_path = "../eks" + mock_outputs = { + cluster_name = "a-cluster-name" + oidc_provider_arn = "arn:aws-us-gov:iam::111111111111:oidc-provider/oidc.eks.us-gov-east-1.amazonaws.com/id/0000000000000000AAAAAAAAAAAAAAAA" + } +} + +inputs = { + cluster_name = dependency.eks.outputs.cluster_name + cluster_mailing_list = dependency.eks.inputs.creator + oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + cert_manager_helm_chart = "1.15.1" + cert_manager_cainjector_tag = "v1.15.1" + cert_manager_controller_tag = "v1.15.1" + cert_manager_startupapicheck_tag = "v1.15.1" + cert_manager_webhook_tag = "v1.15.1" + cluster_issuer_name = "cert-manager" +} diff --git a/project-x-infra-live/development/us-gov-east-1/vpc/_mcmCluster/eks-config/terragrunt.hcl b/project-x-infra-live/development/us-gov-east-1/vpc/_mcmCluster/eks-config/terragrunt.hcl new file mode 100644 index 0000000..84bb1ff --- /dev/null +++ b/project-x-infra-live/development/us-gov-east-1/vpc/_mcmCluster/eks-config/terragrunt.hcl @@ -0,0 +1,42 @@ +include "root" { + path = find_in_parent_folders() + expose = true +} + +# locals { +# tag_costallocation = "census:csvd:platformbaseline" +# } + +terraform { + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-eks-configuration.git?ref=main" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20m"] + } +} + +dependency "eks" { + config_path = "../eks" + mock_outputs = { + vpc_id = "a-vpc-id" + cluster_name = "a-cluster-name" + subnets = ["subnet-00000000000000001", "subnet-00000000000000002", "subnet-00000000000000003", ] + security_group_all_worker_mgmt_id = "sg-00b0000000000000" + eks_managed_node_groups_autoscaling_group_names = ["eks-eks-a-cluster-name-node_group-0000000000000000000000000-5ac8a5e3-14dd-c043-2cc9-f4b6ffb36d32"] + oidc_provider_arn = "arn:aws-us-gov:iam::111111111111:oidc-provider/oidc.eks.us-gov-east-1.amazonaws.com/id/0000000000000000AAAAAAAAAAAAAAAA" + } +} + +inputs = { + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + vpc_id = dependency.eks.outputs.vpc_id + cluster_name = dependency.eks.outputs.cluster_name + subnets = dependency.eks.outputs.subnets + security_group_all_worker_mgmt_id = dependency.eks.outputs.security_group_all_worker_mgmt_id + eks_managed_node_groups_autoscaling_group_names = dependency.eks.outputs.eks_managed_node_groups_autoscaling_group_names + oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn + # tags = dependency.eks.inputs.tags + # tag_costallocation = local.tag_costallocation + # cluster_autoscaler_role_name = dependency.eks.outputs.cluster_autoscaler_role_name +} diff --git a/project-x-infra-live/development/us-gov-east-1/vpc/_mcmCluster/eks-grafana/terragrunt.hcl b/project-x-infra-live/development/us-gov-east-1/vpc/_mcmCluster/eks-grafana/terragrunt.hcl new file mode 100644 index 0000000..c2172e8 --- /dev/null +++ b/project-x-infra-live/development/us-gov-east-1/vpc/_mcmCluster/eks-grafana/terragrunt.hcl @@ -0,0 +1,38 @@ +include "root" { + path = find_in_parent_folders() + expose = true +} + +terraform { + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-grafana.git" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20m"] + } +} + +dependency "eks" { + config_path = "../eks" + mock_outputs = { + cluster_name = "a-cluster-name" + } +} +dependency "eks-loki" { + config_path = "../eks-loki" + mock_outputs = { + rwo_storage_class = "gp3-encrypted" + } +} +# dependency "eks-tempo" { +# config_path = "../eks-tempo" +# skip_outputs = true +# } + +inputs = { + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + cluster_name = dependency.eks.outputs.cluster_name + cluster_domain = dependency.eks.inputs.vpc_domain_name + rwo_storage_class = dependency.eks-loki.outputs.rwo_storage_class + # datasources = dependency.eks-loki.outputs.gateway_internal_endpoint +} diff --git a/project-x-infra-live/development/us-gov-east-1/vpc/_mcmCluster/eks-istio/terragrunt.hcl b/project-x-infra-live/development/us-gov-east-1/vpc/_mcmCluster/eks-istio/terragrunt.hcl new file mode 100644 index 0000000..5a30c0e --- /dev/null +++ b/project-x-infra-live/development/us-gov-east-1/vpc/_mcmCluster/eks-istio/terragrunt.hcl @@ -0,0 +1,31 @@ +include "root" { + path = find_in_parent_folders() + expose = true +} + +terraform { + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-istio.git?ref=main" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20m"] + } +} + +dependency "eks" { + config_path = "../eks" + mock_outputs = { + cluster_name = "a-cluster-name" + } +} +dependency "eks-karpenter" { + config_path = "../eks-karpenter" + skip_outputs = true +} + +inputs = { + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + cluster_name = dependency.eks.outputs.cluster_name + istio_chart_version = "1.22.1" + istio_version = "1.22.1" +} diff --git a/project-x-infra-live/development/us-gov-east-1/vpc/_mcmCluster/eks-karpenter/terragrunt.hcl b/project-x-infra-live/development/us-gov-east-1/vpc/_mcmCluster/eks-karpenter/terragrunt.hcl new file mode 100644 index 0000000..982e1d7 --- /dev/null +++ b/project-x-infra-live/development/us-gov-east-1/vpc/_mcmCluster/eks-karpenter/terragrunt.hcl @@ -0,0 +1,38 @@ +include "root" { + path = find_in_parent_folders() + expose = true +} + +terraform { + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-karpenter.git?ref=main" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20m"] + } +} + +dependency "eks" { + config_path = "../eks" + mock_outputs = { + cluster_endpoint = "https://0000000000000000AAAAAAAAAAAAAAAA.sk1.us-gov-east-1.eks.amazonaws.com" + cluster_name = "a-cluster-name" + node_group_name = "node_group_a-cluster-name" + oidc_provider_arn = "arn:aws-us-gov:iam::111111111111:oidc-provider/oidc.eks.us-gov-east-1.amazonaws.com/id/0000000000000000AAAAAAAAAAAAAAAA" + vpc_id = "a-vpc-name" + } +} + +dependency "eks-config" { + config_path = "../eks-config" + skip_outputs = true +} + +inputs = { + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + cluster_endpoint = dependency.eks.outputs.cluster_endpoint + cluster_name = dependency.eks.outputs.cluster_name + karpenter_node_group_name = dependency.eks.outputs.node_group_name + oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn + vpc_id = dependency.eks.outputs.vpc_id +} diff --git a/project-x-infra-live/development/us-gov-east-1/vpc/_mcmCluster/eks-kiali.disable/terragrunt.hcl.disable b/project-x-infra-live/development/us-gov-east-1/vpc/_mcmCluster/eks-kiali.disable/terragrunt.hcl.disable new file mode 100644 index 0000000..c395110 --- /dev/null +++ b/project-x-infra-live/development/us-gov-east-1/vpc/_mcmCluster/eks-kiali.disable/terragrunt.hcl.disable @@ -0,0 +1,77 @@ +include "root" { + path = find_in_parent_folders() + expose = true +} + +terraform { + # source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-kiali.git?ref=mcmCluster" + source = "../../../../../../../tfmod-kiali" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20m"] + } +} + +dependency "eks" { + config_path = "../eks" + mock_outputs = { + cluster_name = "a-cluster-name" + } +} +dependency "eks-cert-manager" { + config_path = "../eks-cert-manager" + mock_outputs = { + cluster_issuer_name = "acmpca-clusterissuer" + } +} +dependency "eks-prometheus" { + config_path = "../eks-prometheus" + mock_outputs = { + prometheus_server_internal_endpoint = { + hostname = "prometheus-server.prometheus.svc.cluster.local" + port_number = 9090 + url = "http://prometheus-server.prometheus.svc.cluster.local:9090/" + } + } +} +dependency "eks-grafana" { + config_path = "../eks-grafana" + mock_outputs = { + internal_endpoint = { + hostname = "grafana.grafana.svc.cluster.local" + port_number = "80" + url = "https://grafana.grafana.svc.cluster.local:80/" + } + namespace = "grafana" + public_endpoint = { + hostname = "grafana.dev.lab.csp2.census.gov" + port_number = "80" + url = "https://grafana.dev.lab.csp2.census.gov:80/" + } + secret_name = "grafana" + } +} + +inputs = { + profile = include.root.inputs.aws_profile + cluster_domain = dependency.eks.inputs.vpc_domain_name + operators_namespace = dependency.eks.inputs.operators_ns + cluster_name = dependency.eks.outputs.cluster_name + certificate_issuer = dependency.eks-cert-manager.outputs.cluster_issuer_name + prometheus_internal_url = dependency.eks-prometheus.outputs.prometheus_server_internal_endpoint.url + grafana_internal_url = dependency.eks-grafana.outputs.internal_endpoint.url + grafana_namespace = dependency.eks-grafana.outputs.namespace + grafana_public_url = dependency.eks-grafana.outputs.public_endpoint.url + grafana_secret_name = "grafana" + # grafana_secret_name = dependency.eks-grafana.outputs.secret_name + jaeger_internal_url = "" + + + # client_id = var.sso_client_id + # client_secret = var.sso_client_secret + # keycloak_public_url = var.keycloak_public_url + # gogatekeeper_chart_version = var.gogatekeeper_chart_version + # gogatekeeper_registry = var.gogatekeeper_registry + # gogatekeeper_repository = var.gogatekeeper_repository + # gogatekeeper_tag = var.gogatekeeper_tag +} diff --git a/project-x-infra-live/development/us-gov-east-1/vpc/_mcmCluster/eks-loki/terragrunt.hcl b/project-x-infra-live/development/us-gov-east-1/vpc/_mcmCluster/eks-loki/terragrunt.hcl new file mode 100644 index 0000000..cc94f7f --- /dev/null +++ b/project-x-infra-live/development/us-gov-east-1/vpc/_mcmCluster/eks-loki/terragrunt.hcl @@ -0,0 +1,31 @@ +include "root" { + path = find_in_parent_folders() + expose = true +} + +terraform { + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-loki.git?ref=main" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20m"] + } +} + +dependency "eks" { + config_path = "../eks" + mock_outputs = { + cluster_name = "a-cluster-name" + oidc_provider_arn = "arn:aws-us-gov:iam::111111111111:oidc-provider/oidc.eks.us-gov-east-1.amazonaws.com/id/0000000000000000AAAAAAAAAAAAAAAA" + } +} +dependency "eks-istio" { + config_path = "../eks-istio" + skip_outputs = true +} + +inputs = { + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + cluster_name = dependency.eks.outputs.cluster_name + oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn +} diff --git a/project-x-infra-live/development/us-gov-east-1/vpc/_mcmCluster/eks-metrics-server/terragrunt.hcl b/project-x-infra-live/development/us-gov-east-1/vpc/_mcmCluster/eks-metrics-server/terragrunt.hcl new file mode 100644 index 0000000..5414a72 --- /dev/null +++ b/project-x-infra-live/development/us-gov-east-1/vpc/_mcmCluster/eks-metrics-server/terragrunt.hcl @@ -0,0 +1,25 @@ +include "root" { + path = find_in_parent_folders() + expose = true +} + +terraform { + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-metrics-server.git?ref=main" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20m"] + } +} + +dependency "eks" { + config_path = "../eks" + mock_outputs = { + cluster_name = "a-cluster-name" + } +} + +inputs = { + profile = include.root.inputs.aws_profile + cluster_name = dependency.eks.outputs.cluster_name + region = include.root.inputs.aws_region +} diff --git a/project-x-infra-live/development/us-gov-east-1/vpc/_mcmCluster/eks-prometheus/README.md b/project-x-infra-live/development/us-gov-east-1/vpc/_mcmCluster/eks-prometheus/README.md new file mode 100644 index 0000000..bbbffb2 --- /dev/null +++ b/project-x-infra-live/development/us-gov-east-1/vpc/_mcmCluster/eks-prometheus/README.md @@ -0,0 +1,198 @@ +## eks-prometheus +This module deploys EKS kubeenetes prometheus inside existing EKS cluster. Prometheus is an open-source systems monitoring and alerting tool. +This module consisits of 4 components. It creates prometheus namespace and copies image repositories for the following components from quay.io into local account ECR repository. It deploys these components using helm charts using the configured ECR repositories. + 1. prometheus-alert-manager + 2. prometheus-node-exporter + 3. prometheus-pushgateway + 4. prometheus-server + +### Dependencies +This module is dependent on EKS module (eks). The cluster should exist already for this module to work. + +### Inputs + cluster_name + profile + prometheus_chart_version + prometheus_server_tag + prometheus_config_reloader_tag + alertmanager_tag + kube_state_metrics_tag + node_exporter_tag + pushgateway_tag + rwo_storage_class + +### Outputs + alertmanager_internal_endpoint + alertmanager_headless_internal_endpoint + pushgateway_internal_endpoint + prometheus_server_internal_endpoint + +### Issues observed/fixed +1. The rwo_storage_class value had to be updated from "gp3" to "gp3-encrypted" +2. The node_exporter_tag value had to be updated from "1.6.1" to "v1.8.1" +3. The kube_state_metrics_tag value had to be updated from "2.10.0" to "v2.6.0" +4. The alertmanager_tag value had to be updated from +5. The helm chart set config for the ecr image had to be split into 2 components, one for registry and other for repository as an example mentioned below: + + ``` + set { + name = "kube-state-metrics.image.registry" + value = module.images.images[local.ksm_key].dest_registry + } + set { + name = "kube-state-metrics.image.repository" + value = module.images.images[local.ksm_key].dest_repository + } + ``` + +6. In some other cases the image ecr repository had to be split by the colon separatory (:) + + ``` + set { + name = "alertmanager.configmapReload.image.repository" + value = split(":", module.images.images[local.prom_config_reload_key].dest_full_path)[0] + } + ``` + +### Chart Notes + 1. Get the application URL by running these commands: + + ```bash + export POD_NAME=$(kubectl get pods --namespace prometheus -l "app.kubernetes.io/name=prometheus-pushgateway,app.kubernetes.io/instance=prometheus" -o jsonpath="{.items[0].metadata.name}") + kubectl port-forward $POD_NAME 9091 + echo "Visit http://127.0.0.1:9091 to use your application" + ``` + + The Prometheus server can be accessed via port 80 on the following DNS name from within your cluster: + prometheus-server.prometheus.svc.cluster.local + + + Get the Prometheus server URL by running these commands in the same shell: + + ```bash + export POD_NAME=$(kubectl get pods --namespace prometheus -l "app.kubernetes.io/name=prometheus,app.kubernetes.io/instance=prometheus" -o jsonpath="{.items[0].metadata.name}") + kubectl --namespace prometheus port-forward $POD_NAME 9090 + ``` + + The Prometheus alertmanager can be accessed via port 9093 on the following DNS name from within your cluster: + `prometheus-alertmanager.prometheus.svc.cluster.local` + + + Get the Alertmanager URL by running these commands in the same shell: + + ```bash + export POD_NAME=$(kubectl get pods --namespace prometheus -l "app.kubernetes.io/name=alertmanager,app.kubernetes.io/instance=prometheus" -o jsonpath="{.items[0].metadata.name}") + kubectl --namespace prometheus port-forward $POD_NAME 9093 + ``` + + ################################################################################# + ###### WARNING: Pod Security Policy has been disabled by default since ##### + ###### it deprecated after k8s 1.25+. use ##### + ###### (index .Values "prometheus-node-exporter" "rbac" ##### + ###### "pspEnabled") with (index .Values ##### + ###### "prometheus-node-exporter" "rbac" "pspAnnotations") ##### + ###### in case you still need it. ##### + ################################################################################# + + + The Prometheus PushGateway can be accessed via port 9091 on the following DNS name from within your cluster: + `prometheus-prometheus-pushgateway.prometheus.svc.cluster.local` + + + Get the PushGateway URL by running these commands in the same shell: + + ```bash + export POD_NAME=$(kubectl get pods --namespace prometheus -l "app=prometheus-pushgateway,component=pushgateway" -o jsonpath="{.items[0].metadata.name}") + kubectl --namespace prometheus port-forward $POD_NAME 9091 + ``` + + For more information on running Prometheus, visit: + https://prometheus.io/ + + kube-state-metrics is a simple service that listens to the Kubernetes API server and generates metrics about the state of the objects. + The exposed metrics can be found here: + https://github.com/kubernetes/kube-state-metrics/blob/master/docs/README.md#exposed-metrics + + The metrics are exported on the HTTP endpoint /metrics on the listening port. + In your case, `prometheus-kube-state-metrics.prometheus.svc.cluster.local:8080/metrics` + + They are served either as plaintext or protobuf depending on the Accept header. + They are designed to be consumed either by Prometheus itself or by a scraper that is compatible with scraping a Prometheus client endpoint. + + 1. Get the application URL by running these commands: + + ```bash + export POD_NAME=$(kubectl get pods --namespace prometheus -l "app.kubernetes.io/name=alertmanager,app.kubernetes.io/instance=prometheus" -o jsonpath="{.items[0].metadata.name}") + echo "Visit http://127.0.0.1:9093 to use your application" + kubectl --namespace prometheus port-forward $POD_NAME 9093:80 + ``` + + 1. Get the application URL by running these commands: + + ```bash + export POD_NAME=$(kubectl get pods --namespace prometheus -l "app.kubernetes.io/name=prometheus-node-exporter,app.kubernetes.io/instance=prometheus" -o jsonpath="{.items[0].metadata.name}") + echo "Visit http://127.0.0.1:9100 to use your application" + kubectl port-forward --namespace prometheus $POD_NAME 9100 + ``` + + +## Requirements + +| Name | Version | +|------|---------| +| [terraform](#requirement\_terraform) | >= 0.13 | +| [aws](#requirement\_aws) | >= 5.14.0 | +| [helm](#requirement\_helm) | >= 2.11.0 | +| [kubernetes](#requirement\_kubernetes) | >= 2.23.0 | +| [null](#requirement\_null) | >= 3.2.1 | + +## Providers + +| Name | Version | +|------|---------| +| [helm](#provider\_helm) | >= 2.11.0 | +| [kubernetes](#provider\_kubernetes) | >= 2.23.0 | + +## Modules + +| Name | Source | Version | +|------|--------|---------| +| [images](#module\_images) | git@github.e.it.census.gov:terraform-modules/aws-ecr-copy-images.git/ | tf-upgrade | + +## Resources + +| Name | Type | +|------|------| +| [helm_release.prometheus](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource | +| [kubernetes_namespace.ns](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/namespace) | resource | +| [kubernetes_namespace.existing-ns](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/data-sources/namespace) | data source | + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:--------:| +| [alertmanager\_tag](#input\_alertmanager\_tag) | The image tag of the alertmanager image. | `string` | `"v0.27.0"` | no | +| [cluster\_name](#input\_cluster\_name) | The name of the cluster into which prometheus will be installed. | `string` | n/a | yes | +| [create\_namespace](#input\_create\_namespace) | Indicates whether the `namespace` needs to be created ('true') or already exists (not `true`) | `bool` | `true` | no | +| [kube\_state\_metrics\_tag](#input\_kube\_state\_metrics\_tag) | The image tag of the kube-state-metrics image. | `string` | `"v2.13.0"` | no | +| [namespace](#input\_namespace) | The namespace to install the prometheus components. Defaults to 'prometheus' | `string` | `"prometheus"` | no | +| [node\_exporter\_tag](#input\_node\_exporter\_tag) | The image tag of the node-exporter image. | `string` | `"v1.8.2"` | no | +| [profile](#input\_profile) | AWS\_PROFILE to use to apply the terraform script. | `string` | `""` | no | +| [prometheus\_chart\_version](#input\_prometheus\_chart\_version) | The version of prometheus to install into the cluster. | `string` | `"25.24.1"` | no | +| [prometheus\_config\_reloader\_tag](#input\_prometheus\_config\_reloader\_tag) | The image tag of the prometheus-config-reloader image. | `string` | `"v0.75.1"` | no | +| [prometheus\_server\_tag](#input\_prometheus\_server\_tag) | The image tag of prometheus server to install into the cluster. | `string` | `"v2.53.1"` | no | +| [pushgateway\_tag](#input\_pushgateway\_tag) | The image tag of the pushgateway image. | `string` | `"v1.9.0"` | no | +| [rwo\_storage\_class](#input\_rwo\_storage\_class) | Specify the storage class for read/write/once persistent volumes. | `string` | `"gp3-encrypted"` | no | + +## Outputs + +| Name | Description | +|------|-------------| +| [alertmanager\_headless\_internal\_endpoint](#output\_alertmanager\_headless\_internal\_endpoint) | n/a | +| [alertmanager\_internal\_endpoint](#output\_alertmanager\_internal\_endpoint) | n/a | +| [module\_name](#output\_module\_name) | The name of this module. | +| [module\_version](#output\_module\_version) | The version of this module. | +| [prometheus\_namespace](#output\_prometheus\_namespace) | n/a | +| [prometheus\_server\_internal\_endpoint](#output\_prometheus\_server\_internal\_endpoint) | n/a | +| [pushgateway\_internal\_endpoint](#output\_pushgateway\_internal\_endpoint) | n/a | + diff --git a/project-x-infra-live/development/us-gov-east-1/vpc/_mcmCluster/eks-prometheus/terragrunt.hcl b/project-x-infra-live/development/us-gov-east-1/vpc/_mcmCluster/eks-prometheus/terragrunt.hcl new file mode 100644 index 0000000..62611b1 --- /dev/null +++ b/project-x-infra-live/development/us-gov-east-1/vpc/_mcmCluster/eks-prometheus/terragrunt.hcl @@ -0,0 +1,30 @@ +include "root" { + path = find_in_parent_folders() + expose = true +} + +terraform { + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-prometheus.git?ref=main" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20m"] + } +} + +dependency "eks" { + config_path = "../eks" + mock_outputs = { + cluster_name = "a-cluster-name" + } +} + +dependency "eks-istio" { + config_path = "../eks-istio" + skip_outputs = true +} + +inputs = { + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + cluster_name = dependency.eks.outputs.cluster_name +} diff --git a/project-x-infra-live/development/us-gov-east-1/vpc/_mcmCluster/eks-tempo/terragrunt.hcl b/project-x-infra-live/development/us-gov-east-1/vpc/_mcmCluster/eks-tempo/terragrunt.hcl new file mode 100644 index 0000000..02fad53 --- /dev/null +++ b/project-x-infra-live/development/us-gov-east-1/vpc/_mcmCluster/eks-tempo/terragrunt.hcl @@ -0,0 +1,41 @@ +include "root" { + path = find_in_parent_folders() + expose = true +} + +terraform { + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-tempo.git?ref=main" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20m"] + } +} + +dependency "eks" { + config_path = "../eks" + mock_outputs = { + cluster_name = "a-cluster-name" + oidc_provider_arn = "arn:aws-us-gov:iam::111111111111:oidc-provider/oidc.eks.us-gov-east-1.amazonaws.com/id/0000000000000000AAAAAAAAAAAAAAAA" + } +} + +dependency "eks-prometheus" { + config_path = "../eks-prometheus" + mock_outputs = { + prometheus_server_internal_endpoint = { + hostname = "prometheus-server.prometheus.svc.cluster.local" + port_number = 9090 + url = "http://prometheus-server.prometheus.svc.cluster.local:9090/" + } + prometheus_namespace = "prometheus" + } +} + +inputs = { + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + cluster_name = dependency.eks.outputs.cluster_name + oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn + prometheus_port = dependency.eks-prometheus.outputs.prometheus_server_internal_endpoint.port_number + prometheus_namespace = dependency.eks-prometheus.outputs.prometheus_namespace +} diff --git a/project-x-infra-live/development/us-gov-east-1/vpc/_mcmCluster/eks/terragrunt.hcl b/project-x-infra-live/development/us-gov-east-1/vpc/_mcmCluster/eks/terragrunt.hcl new file mode 100644 index 0000000..0f4fb64 --- /dev/null +++ b/project-x-infra-live/development/us-gov-east-1/vpc/_mcmCluster/eks/terragrunt.hcl @@ -0,0 +1,65 @@ +include "root" { + path = find_in_parent_folders() + expose = true +} + +locals { + # Set cluster/platform specific variables, or extract from the hierarchy. + account_id = include.root.inputs.aws_account_id + vpc_name = include.root.inputs.vpc_name + cluster_name = "platform-eng-eks-mcm" + cluster_version = "1.30" + vpc_domain_name = include.root.inputs.vpc_domain_name + eks_instance_disk_size = 100 + eks_vpc_name = include.root.inputs.vpc_name + eks_ng_desired_size = 2 + eks_ng_max_size = 10 + eks_ng_min_size = 2 + operators_ns = "operators" + enable_cluster_creator_admin_permissions = true + cluster_endpoint_public_access = true + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + cluster_mailing_list = "matthew.c.morgan@census.gov" + environment_abbr = include.root.inputs.environment_abbr + + # Tags applied to AWS objects created + tags = { + "eks-cluster-name" = local.cluster_name + "Environment" = local.environment_abbr + "slim:schedule" = "8:00-17:00" + "cluster:size" = "min:${local.eks_ng_min_size}-max:${local.eks_ng_max_size}-desired:${local.eks_ng_desired_size}" + } + +} + +terraform { + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-eks.git?ref=main" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20m"] + } +} + +inputs = { + aws_account_id = local.account_id + profile = local.profile + vpc_name = local.eks_vpc_name + cluster_name = local.cluster_name + cluster_version = local.cluster_version + eks_instance_disk_size = local.eks_instance_disk_size + eks_vpc_name = local.eks_vpc_name + # eks_instance_types = local.eks_instance_types + eks_ng_desired_size = local.eks_ng_desired_size + eks_ng_max_size = local.eks_ng_max_size + eks_ng_min_size = local.eks_ng_min_size + operators_ns = local.operators_ns + enable_cluster_creator_admin_permissions = local.enable_cluster_creator_admin_permissions + cluster_endpoint_public_access = local.cluster_endpoint_public_access + tags = local.tags + vcp_domain_name = local.vpc_domain_name + region = local.region + creator = local.cluster_mailing_list + os_username = local.cluster_mailing_list + shared_vpc_label = local.environment_abbr +} diff --git a/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-x/eks-cert-manager/terragrunt.hcl b/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-x/eks-cert-manager/terragrunt.hcl new file mode 100644 index 0000000..1448ac8 --- /dev/null +++ b/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-x/eks-cert-manager/terragrunt.hcl @@ -0,0 +1,34 @@ +include "root" { + path = find_in_parent_folders() + expose = true +} + +terraform { + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-cert-mgr.git?ref=main" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20m"] + } +} + +dependency "eks" { + config_path = "../eks" + mock_outputs = { + cluster_name = "a-cluster-name" + oidc_provider_arn = "arn:aws-us-gov:iam::111111111111:oidc-provider/oidc.eks.us-gov-east-1.amazonaws.com/id/0000000000000000AAAAAAAAAAAAAAAA" + } +} + +inputs = { + cluster_name = dependency.eks.outputs.cluster_name + cluster_mailing_list = dependency.eks.inputs.creator + oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + cert_manager_helm_chart = "1.15.1" + cert_manager_cainjector_tag = "v1.15.1" + cert_manager_controller_tag = "v1.15.1" + cert_manager_startupapicheck_tag = "v1.15.1" + cert_manager_webhook_tag = "v1.15.1" + cluster_issuer_name = "cert-manager" +} diff --git a/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-x/eks-config/terragrunt.hcl b/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-x/eks-config/terragrunt.hcl new file mode 100644 index 0000000..84bb1ff --- /dev/null +++ b/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-x/eks-config/terragrunt.hcl @@ -0,0 +1,42 @@ +include "root" { + path = find_in_parent_folders() + expose = true +} + +# locals { +# tag_costallocation = "census:csvd:platformbaseline" +# } + +terraform { + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-eks-configuration.git?ref=main" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20m"] + } +} + +dependency "eks" { + config_path = "../eks" + mock_outputs = { + vpc_id = "a-vpc-id" + cluster_name = "a-cluster-name" + subnets = ["subnet-00000000000000001", "subnet-00000000000000002", "subnet-00000000000000003", ] + security_group_all_worker_mgmt_id = "sg-00b0000000000000" + eks_managed_node_groups_autoscaling_group_names = ["eks-eks-a-cluster-name-node_group-0000000000000000000000000-5ac8a5e3-14dd-c043-2cc9-f4b6ffb36d32"] + oidc_provider_arn = "arn:aws-us-gov:iam::111111111111:oidc-provider/oidc.eks.us-gov-east-1.amazonaws.com/id/0000000000000000AAAAAAAAAAAAAAAA" + } +} + +inputs = { + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + vpc_id = dependency.eks.outputs.vpc_id + cluster_name = dependency.eks.outputs.cluster_name + subnets = dependency.eks.outputs.subnets + security_group_all_worker_mgmt_id = dependency.eks.outputs.security_group_all_worker_mgmt_id + eks_managed_node_groups_autoscaling_group_names = dependency.eks.outputs.eks_managed_node_groups_autoscaling_group_names + oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn + # tags = dependency.eks.inputs.tags + # tag_costallocation = local.tag_costallocation + # cluster_autoscaler_role_name = dependency.eks.outputs.cluster_autoscaler_role_name +} diff --git a/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-x/eks-grafana/terragrunt.hcl b/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-x/eks-grafana/terragrunt.hcl new file mode 100644 index 0000000..c2172e8 --- /dev/null +++ b/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-x/eks-grafana/terragrunt.hcl @@ -0,0 +1,38 @@ +include "root" { + path = find_in_parent_folders() + expose = true +} + +terraform { + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-grafana.git" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20m"] + } +} + +dependency "eks" { + config_path = "../eks" + mock_outputs = { + cluster_name = "a-cluster-name" + } +} +dependency "eks-loki" { + config_path = "../eks-loki" + mock_outputs = { + rwo_storage_class = "gp3-encrypted" + } +} +# dependency "eks-tempo" { +# config_path = "../eks-tempo" +# skip_outputs = true +# } + +inputs = { + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + cluster_name = dependency.eks.outputs.cluster_name + cluster_domain = dependency.eks.inputs.vpc_domain_name + rwo_storage_class = dependency.eks-loki.outputs.rwo_storage_class + # datasources = dependency.eks-loki.outputs.gateway_internal_endpoint +} diff --git a/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-x/eks-istio/terragrunt.hcl b/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-x/eks-istio/terragrunt.hcl new file mode 100644 index 0000000..5a30c0e --- /dev/null +++ b/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-x/eks-istio/terragrunt.hcl @@ -0,0 +1,31 @@ +include "root" { + path = find_in_parent_folders() + expose = true +} + +terraform { + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-istio.git?ref=main" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20m"] + } +} + +dependency "eks" { + config_path = "../eks" + mock_outputs = { + cluster_name = "a-cluster-name" + } +} +dependency "eks-karpenter" { + config_path = "../eks-karpenter" + skip_outputs = true +} + +inputs = { + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + cluster_name = dependency.eks.outputs.cluster_name + istio_chart_version = "1.22.1" + istio_version = "1.22.1" +} diff --git a/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-x/eks-karpenter/terragrunt.hcl b/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-x/eks-karpenter/terragrunt.hcl new file mode 100644 index 0000000..982e1d7 --- /dev/null +++ b/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-x/eks-karpenter/terragrunt.hcl @@ -0,0 +1,38 @@ +include "root" { + path = find_in_parent_folders() + expose = true +} + +terraform { + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-karpenter.git?ref=main" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20m"] + } +} + +dependency "eks" { + config_path = "../eks" + mock_outputs = { + cluster_endpoint = "https://0000000000000000AAAAAAAAAAAAAAAA.sk1.us-gov-east-1.eks.amazonaws.com" + cluster_name = "a-cluster-name" + node_group_name = "node_group_a-cluster-name" + oidc_provider_arn = "arn:aws-us-gov:iam::111111111111:oidc-provider/oidc.eks.us-gov-east-1.amazonaws.com/id/0000000000000000AAAAAAAAAAAAAAAA" + vpc_id = "a-vpc-name" + } +} + +dependency "eks-config" { + config_path = "../eks-config" + skip_outputs = true +} + +inputs = { + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + cluster_endpoint = dependency.eks.outputs.cluster_endpoint + cluster_name = dependency.eks.outputs.cluster_name + karpenter_node_group_name = dependency.eks.outputs.node_group_name + oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn + vpc_id = dependency.eks.outputs.vpc_id +} diff --git a/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-x/eks-kiali.disable/terragrunt.hcl.disable b/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-x/eks-kiali.disable/terragrunt.hcl.disable new file mode 100644 index 0000000..c395110 --- /dev/null +++ b/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-x/eks-kiali.disable/terragrunt.hcl.disable @@ -0,0 +1,77 @@ +include "root" { + path = find_in_parent_folders() + expose = true +} + +terraform { + # source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-kiali.git?ref=mcmCluster" + source = "../../../../../../../tfmod-kiali" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20m"] + } +} + +dependency "eks" { + config_path = "../eks" + mock_outputs = { + cluster_name = "a-cluster-name" + } +} +dependency "eks-cert-manager" { + config_path = "../eks-cert-manager" + mock_outputs = { + cluster_issuer_name = "acmpca-clusterissuer" + } +} +dependency "eks-prometheus" { + config_path = "../eks-prometheus" + mock_outputs = { + prometheus_server_internal_endpoint = { + hostname = "prometheus-server.prometheus.svc.cluster.local" + port_number = 9090 + url = "http://prometheus-server.prometheus.svc.cluster.local:9090/" + } + } +} +dependency "eks-grafana" { + config_path = "../eks-grafana" + mock_outputs = { + internal_endpoint = { + hostname = "grafana.grafana.svc.cluster.local" + port_number = "80" + url = "https://grafana.grafana.svc.cluster.local:80/" + } + namespace = "grafana" + public_endpoint = { + hostname = "grafana.dev.lab.csp2.census.gov" + port_number = "80" + url = "https://grafana.dev.lab.csp2.census.gov:80/" + } + secret_name = "grafana" + } +} + +inputs = { + profile = include.root.inputs.aws_profile + cluster_domain = dependency.eks.inputs.vpc_domain_name + operators_namespace = dependency.eks.inputs.operators_ns + cluster_name = dependency.eks.outputs.cluster_name + certificate_issuer = dependency.eks-cert-manager.outputs.cluster_issuer_name + prometheus_internal_url = dependency.eks-prometheus.outputs.prometheus_server_internal_endpoint.url + grafana_internal_url = dependency.eks-grafana.outputs.internal_endpoint.url + grafana_namespace = dependency.eks-grafana.outputs.namespace + grafana_public_url = dependency.eks-grafana.outputs.public_endpoint.url + grafana_secret_name = "grafana" + # grafana_secret_name = dependency.eks-grafana.outputs.secret_name + jaeger_internal_url = "" + + + # client_id = var.sso_client_id + # client_secret = var.sso_client_secret + # keycloak_public_url = var.keycloak_public_url + # gogatekeeper_chart_version = var.gogatekeeper_chart_version + # gogatekeeper_registry = var.gogatekeeper_registry + # gogatekeeper_repository = var.gogatekeeper_repository + # gogatekeeper_tag = var.gogatekeeper_tag +} diff --git a/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-x/eks-loki/terragrunt.hcl b/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-x/eks-loki/terragrunt.hcl new file mode 100644 index 0000000..cc94f7f --- /dev/null +++ b/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-x/eks-loki/terragrunt.hcl @@ -0,0 +1,31 @@ +include "root" { + path = find_in_parent_folders() + expose = true +} + +terraform { + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-loki.git?ref=main" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20m"] + } +} + +dependency "eks" { + config_path = "../eks" + mock_outputs = { + cluster_name = "a-cluster-name" + oidc_provider_arn = "arn:aws-us-gov:iam::111111111111:oidc-provider/oidc.eks.us-gov-east-1.amazonaws.com/id/0000000000000000AAAAAAAAAAAAAAAA" + } +} +dependency "eks-istio" { + config_path = "../eks-istio" + skip_outputs = true +} + +inputs = { + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + cluster_name = dependency.eks.outputs.cluster_name + oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn +} diff --git a/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-x/eks-metrics-server/terragrunt.hcl b/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-x/eks-metrics-server/terragrunt.hcl new file mode 100644 index 0000000..5414a72 --- /dev/null +++ b/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-x/eks-metrics-server/terragrunt.hcl @@ -0,0 +1,25 @@ +include "root" { + path = find_in_parent_folders() + expose = true +} + +terraform { + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-metrics-server.git?ref=main" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20m"] + } +} + +dependency "eks" { + config_path = "../eks" + mock_outputs = { + cluster_name = "a-cluster-name" + } +} + +inputs = { + profile = include.root.inputs.aws_profile + cluster_name = dependency.eks.outputs.cluster_name + region = include.root.inputs.aws_region +} diff --git a/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-x/eks-prometheus/README.md b/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-x/eks-prometheus/README.md new file mode 100644 index 0000000..bbbffb2 --- /dev/null +++ b/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-x/eks-prometheus/README.md @@ -0,0 +1,198 @@ +## eks-prometheus +This module deploys EKS kubeenetes prometheus inside existing EKS cluster. Prometheus is an open-source systems monitoring and alerting tool. +This module consisits of 4 components. It creates prometheus namespace and copies image repositories for the following components from quay.io into local account ECR repository. It deploys these components using helm charts using the configured ECR repositories. + 1. prometheus-alert-manager + 2. prometheus-node-exporter + 3. prometheus-pushgateway + 4. prometheus-server + +### Dependencies +This module is dependent on EKS module (eks). The cluster should exist already for this module to work. + +### Inputs + cluster_name + profile + prometheus_chart_version + prometheus_server_tag + prometheus_config_reloader_tag + alertmanager_tag + kube_state_metrics_tag + node_exporter_tag + pushgateway_tag + rwo_storage_class + +### Outputs + alertmanager_internal_endpoint + alertmanager_headless_internal_endpoint + pushgateway_internal_endpoint + prometheus_server_internal_endpoint + +### Issues observed/fixed +1. The rwo_storage_class value had to be updated from "gp3" to "gp3-encrypted" +2. The node_exporter_tag value had to be updated from "1.6.1" to "v1.8.1" +3. The kube_state_metrics_tag value had to be updated from "2.10.0" to "v2.6.0" +4. The alertmanager_tag value had to be updated from +5. The helm chart set config for the ecr image had to be split into 2 components, one for registry and other for repository as an example mentioned below: + + ``` + set { + name = "kube-state-metrics.image.registry" + value = module.images.images[local.ksm_key].dest_registry + } + set { + name = "kube-state-metrics.image.repository" + value = module.images.images[local.ksm_key].dest_repository + } + ``` + +6. In some other cases the image ecr repository had to be split by the colon separatory (:) + + ``` + set { + name = "alertmanager.configmapReload.image.repository" + value = split(":", module.images.images[local.prom_config_reload_key].dest_full_path)[0] + } + ``` + +### Chart Notes + 1. Get the application URL by running these commands: + + ```bash + export POD_NAME=$(kubectl get pods --namespace prometheus -l "app.kubernetes.io/name=prometheus-pushgateway,app.kubernetes.io/instance=prometheus" -o jsonpath="{.items[0].metadata.name}") + kubectl port-forward $POD_NAME 9091 + echo "Visit http://127.0.0.1:9091 to use your application" + ``` + + The Prometheus server can be accessed via port 80 on the following DNS name from within your cluster: + prometheus-server.prometheus.svc.cluster.local + + + Get the Prometheus server URL by running these commands in the same shell: + + ```bash + export POD_NAME=$(kubectl get pods --namespace prometheus -l "app.kubernetes.io/name=prometheus,app.kubernetes.io/instance=prometheus" -o jsonpath="{.items[0].metadata.name}") + kubectl --namespace prometheus port-forward $POD_NAME 9090 + ``` + + The Prometheus alertmanager can be accessed via port 9093 on the following DNS name from within your cluster: + `prometheus-alertmanager.prometheus.svc.cluster.local` + + + Get the Alertmanager URL by running these commands in the same shell: + + ```bash + export POD_NAME=$(kubectl get pods --namespace prometheus -l "app.kubernetes.io/name=alertmanager,app.kubernetes.io/instance=prometheus" -o jsonpath="{.items[0].metadata.name}") + kubectl --namespace prometheus port-forward $POD_NAME 9093 + ``` + + ################################################################################# + ###### WARNING: Pod Security Policy has been disabled by default since ##### + ###### it deprecated after k8s 1.25+. use ##### + ###### (index .Values "prometheus-node-exporter" "rbac" ##### + ###### "pspEnabled") with (index .Values ##### + ###### "prometheus-node-exporter" "rbac" "pspAnnotations") ##### + ###### in case you still need it. ##### + ################################################################################# + + + The Prometheus PushGateway can be accessed via port 9091 on the following DNS name from within your cluster: + `prometheus-prometheus-pushgateway.prometheus.svc.cluster.local` + + + Get the PushGateway URL by running these commands in the same shell: + + ```bash + export POD_NAME=$(kubectl get pods --namespace prometheus -l "app=prometheus-pushgateway,component=pushgateway" -o jsonpath="{.items[0].metadata.name}") + kubectl --namespace prometheus port-forward $POD_NAME 9091 + ``` + + For more information on running Prometheus, visit: + https://prometheus.io/ + + kube-state-metrics is a simple service that listens to the Kubernetes API server and generates metrics about the state of the objects. + The exposed metrics can be found here: + https://github.com/kubernetes/kube-state-metrics/blob/master/docs/README.md#exposed-metrics + + The metrics are exported on the HTTP endpoint /metrics on the listening port. + In your case, `prometheus-kube-state-metrics.prometheus.svc.cluster.local:8080/metrics` + + They are served either as plaintext or protobuf depending on the Accept header. + They are designed to be consumed either by Prometheus itself or by a scraper that is compatible with scraping a Prometheus client endpoint. + + 1. Get the application URL by running these commands: + + ```bash + export POD_NAME=$(kubectl get pods --namespace prometheus -l "app.kubernetes.io/name=alertmanager,app.kubernetes.io/instance=prometheus" -o jsonpath="{.items[0].metadata.name}") + echo "Visit http://127.0.0.1:9093 to use your application" + kubectl --namespace prometheus port-forward $POD_NAME 9093:80 + ``` + + 1. Get the application URL by running these commands: + + ```bash + export POD_NAME=$(kubectl get pods --namespace prometheus -l "app.kubernetes.io/name=prometheus-node-exporter,app.kubernetes.io/instance=prometheus" -o jsonpath="{.items[0].metadata.name}") + echo "Visit http://127.0.0.1:9100 to use your application" + kubectl port-forward --namespace prometheus $POD_NAME 9100 + ``` + + +## Requirements + +| Name | Version | +|------|---------| +| [terraform](#requirement\_terraform) | >= 0.13 | +| [aws](#requirement\_aws) | >= 5.14.0 | +| [helm](#requirement\_helm) | >= 2.11.0 | +| [kubernetes](#requirement\_kubernetes) | >= 2.23.0 | +| [null](#requirement\_null) | >= 3.2.1 | + +## Providers + +| Name | Version | +|------|---------| +| [helm](#provider\_helm) | >= 2.11.0 | +| [kubernetes](#provider\_kubernetes) | >= 2.23.0 | + +## Modules + +| Name | Source | Version | +|------|--------|---------| +| [images](#module\_images) | git@github.e.it.census.gov:terraform-modules/aws-ecr-copy-images.git/ | tf-upgrade | + +## Resources + +| Name | Type | +|------|------| +| [helm_release.prometheus](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource | +| [kubernetes_namespace.ns](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/namespace) | resource | +| [kubernetes_namespace.existing-ns](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/data-sources/namespace) | data source | + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:--------:| +| [alertmanager\_tag](#input\_alertmanager\_tag) | The image tag of the alertmanager image. | `string` | `"v0.27.0"` | no | +| [cluster\_name](#input\_cluster\_name) | The name of the cluster into which prometheus will be installed. | `string` | n/a | yes | +| [create\_namespace](#input\_create\_namespace) | Indicates whether the `namespace` needs to be created ('true') or already exists (not `true`) | `bool` | `true` | no | +| [kube\_state\_metrics\_tag](#input\_kube\_state\_metrics\_tag) | The image tag of the kube-state-metrics image. | `string` | `"v2.13.0"` | no | +| [namespace](#input\_namespace) | The namespace to install the prometheus components. Defaults to 'prometheus' | `string` | `"prometheus"` | no | +| [node\_exporter\_tag](#input\_node\_exporter\_tag) | The image tag of the node-exporter image. | `string` | `"v1.8.2"` | no | +| [profile](#input\_profile) | AWS\_PROFILE to use to apply the terraform script. | `string` | `""` | no | +| [prometheus\_chart\_version](#input\_prometheus\_chart\_version) | The version of prometheus to install into the cluster. | `string` | `"25.24.1"` | no | +| [prometheus\_config\_reloader\_tag](#input\_prometheus\_config\_reloader\_tag) | The image tag of the prometheus-config-reloader image. | `string` | `"v0.75.1"` | no | +| [prometheus\_server\_tag](#input\_prometheus\_server\_tag) | The image tag of prometheus server to install into the cluster. | `string` | `"v2.53.1"` | no | +| [pushgateway\_tag](#input\_pushgateway\_tag) | The image tag of the pushgateway image. | `string` | `"v1.9.0"` | no | +| [rwo\_storage\_class](#input\_rwo\_storage\_class) | Specify the storage class for read/write/once persistent volumes. | `string` | `"gp3-encrypted"` | no | + +## Outputs + +| Name | Description | +|------|-------------| +| [alertmanager\_headless\_internal\_endpoint](#output\_alertmanager\_headless\_internal\_endpoint) | n/a | +| [alertmanager\_internal\_endpoint](#output\_alertmanager\_internal\_endpoint) | n/a | +| [module\_name](#output\_module\_name) | The name of this module. | +| [module\_version](#output\_module\_version) | The version of this module. | +| [prometheus\_namespace](#output\_prometheus\_namespace) | n/a | +| [prometheus\_server\_internal\_endpoint](#output\_prometheus\_server\_internal\_endpoint) | n/a | +| [pushgateway\_internal\_endpoint](#output\_pushgateway\_internal\_endpoint) | n/a | + diff --git a/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-x/eks-prometheus/terragrunt.hcl b/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-x/eks-prometheus/terragrunt.hcl new file mode 100644 index 0000000..62611b1 --- /dev/null +++ b/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-x/eks-prometheus/terragrunt.hcl @@ -0,0 +1,30 @@ +include "root" { + path = find_in_parent_folders() + expose = true +} + +terraform { + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-prometheus.git?ref=main" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20m"] + } +} + +dependency "eks" { + config_path = "../eks" + mock_outputs = { + cluster_name = "a-cluster-name" + } +} + +dependency "eks-istio" { + config_path = "../eks-istio" + skip_outputs = true +} + +inputs = { + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + cluster_name = dependency.eks.outputs.cluster_name +} diff --git a/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-x/eks-tempo/terragrunt.hcl b/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-x/eks-tempo/terragrunt.hcl new file mode 100644 index 0000000..02fad53 --- /dev/null +++ b/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-x/eks-tempo/terragrunt.hcl @@ -0,0 +1,41 @@ +include "root" { + path = find_in_parent_folders() + expose = true +} + +terraform { + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-tempo.git?ref=main" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20m"] + } +} + +dependency "eks" { + config_path = "../eks" + mock_outputs = { + cluster_name = "a-cluster-name" + oidc_provider_arn = "arn:aws-us-gov:iam::111111111111:oidc-provider/oidc.eks.us-gov-east-1.amazonaws.com/id/0000000000000000AAAAAAAAAAAAAAAA" + } +} + +dependency "eks-prometheus" { + config_path = "../eks-prometheus" + mock_outputs = { + prometheus_server_internal_endpoint = { + hostname = "prometheus-server.prometheus.svc.cluster.local" + port_number = 9090 + url = "http://prometheus-server.prometheus.svc.cluster.local:9090/" + } + prometheus_namespace = "prometheus" + } +} + +inputs = { + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + cluster_name = dependency.eks.outputs.cluster_name + oidc_provider_arn = dependency.eks.outputs.oidc_provider_arn + prometheus_port = dependency.eks-prometheus.outputs.prometheus_server_internal_endpoint.port_number + prometheus_namespace = dependency.eks-prometheus.outputs.prometheus_namespace +} diff --git a/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-x/eks/terragrunt.hcl b/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-x/eks/terragrunt.hcl new file mode 100644 index 0000000..aa98f69 --- /dev/null +++ b/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-x/eks/terragrunt.hcl @@ -0,0 +1,65 @@ +include "root" { + path = find_in_parent_folders() + expose = true +} + +locals { + # Set cluster/platform specific variables, or extract from the hierarchy. + account_id = include.root.inputs.aws_account_id + vpc_name = include.root.inputs.vpc_name + cluster_name = "platform-test-x" + cluster_version = "1.30" + vpc_domain_name = include.root.inputs.vpc_domain_name + eks_instance_disk_size = 100 + eks_vpc_name = include.root.inputs.vpc_name + eks_ng_desired_size = 2 + eks_ng_max_size = 10 + eks_ng_min_size = 2 + operators_ns = "operators" + enable_cluster_creator_admin_permissions = true + cluster_endpoint_public_access = true + profile = include.root.inputs.aws_profile + region = include.root.inputs.aws_region + cluster_mailing_list = "luther.coleman.mcginty@census.gov" + environment_abbr = include.root.inputs.environment_abbr + + # Tags applied to AWS objects created + tags = { + "eks-cluster-name" = local.cluster_name + "Environment" = local.environment_abbr + "slim:schedule" = "8:00-17:00" + "cluster:size" = "min:${local.eks_ng_min_size}-max:${local.eks_ng_max_size}-desired:${local.eks_ng_desired_size}" + } + +} + +terraform { + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-eks.git?ref=main" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20m"] + } +} + +inputs = { + aws_account_id = local.account_id + profile = local.profile + vpc_name = local.eks_vpc_name + cluster_name = local.cluster_name + cluster_version = local.cluster_version + eks_instance_disk_size = local.eks_instance_disk_size + eks_vpc_name = local.eks_vpc_name + # eks_instance_types = local.eks_instance_types + eks_ng_desired_size = local.eks_ng_desired_size + eks_ng_max_size = local.eks_ng_max_size + eks_ng_min_size = local.eks_ng_min_size + operators_ns = local.operators_ns + enable_cluster_creator_admin_permissions = local.enable_cluster_creator_admin_permissions + cluster_endpoint_public_access = local.cluster_endpoint_public_access + tags = local.tags + vcp_domain_name = local.vpc_domain_name + region = local.region + creator = local.cluster_mailing_list + os_username = local.cluster_mailing_list + shared_vpc_label = local.environment_abbr +} diff --git a/project-x-infra-live/terragrunt.hcl b/project-x-infra-live/terragrunt.hcl index af3abb0..fd1b88f 100644 --- a/project-x-infra-live/terragrunt.hcl +++ b/project-x-infra-live/terragrunt.hcl @@ -80,7 +80,7 @@ remote_state { backend = "s3" config = { encrypt = true - bucket = "${get_env("TG_BUCKET_PREFIX", "")}cicd-tg-infrastructure-tf-state-${local.account_name}-us-gov-east-1" + bucket = "${get_env("TG_BUCKET_PREFIX", "")}tg-infrastructure-tf-state-${local.account_name}-us-gov-east-1" //bucket = "inf-tfstate-224384469011" key = "${path_relative_to_include()}/terraform.tfstate" //key = "${trimprefix(replace(run_cmd("realpath",get_original_terragrunt_dir()),dirname(get_repo_root()),""),"/")}" From 54a4ff07ba18da058713096a42e2779f0d39e08e Mon Sep 17 00:00:00 2001 From: mcgin314 Date: Tue, 17 Sep 2024 11:36:38 -0400 Subject: [PATCH 6/7] Adopt latest state management approach --- project-x-infra-live/terragrunt.hcl | 44 ++++++++--------------------- 1 file changed, 11 insertions(+), 33 deletions(-) diff --git a/project-x-infra-live/terragrunt.hcl b/project-x-infra-live/terragrunt.hcl index ebd7375..1c510ae 100644 --- a/project-x-infra-live/terragrunt.hcl +++ b/project-x-infra-live/terragrunt.hcl @@ -78,44 +78,22 @@ EOF # Configure Terragrunt to automatically store tfstate files in an S3 bucket remote_state { backend = "s3" - config = { - encrypt = true - bucket = "${get_env("TG_BUCKET_PREFIX", "")}tg-infrastructure-tf-state-${local.account_name}-us-gov-east-1" - //bucket = "inf-tfstate-224384469011" - key = "${path_relative_to_include()}/terraform.tfstate" - //key = "${trimprefix(replace(run_cmd("realpath",get_original_terragrunt_dir()),dirname(get_repo_root()),""),"/")}" - region = "us-gov-east-1" - //dynamodb_table = "tf_remote_state" - dynamodb_table = "terraform-locks-cidcd" - // REVERT DON'S CHANGES TEMPORARILY WHILE FINISHING TESTING - } generate = { - path = "backend.tf" + path = "remote_state.backend.tf" if_exists = "overwrite_terragrunt" } + config = { + bucket = "${local.state_bucket_prefix}-${local.account_id}" + dynamodb_table = "${local.state_table_name}" + key = "${trimprefix(replace(run_cmd("realpath", get_original_terragrunt_dir()), dirname(get_repo_root()), ""), "/")}" + profile = "${local.profile}" + region = "${local.region}" + skip_bucket_enforced_tls = true + skip_bucket_root_access = true + skip_bucket_ssencryption = true + } } -# remote_state { -# backend = "s3" -# generate = { -# path = "remote_state.backend.tf" -# if_exists = "overwrite_terragrunt" -# } -# config = { -# #--- -# # "${local.account_name}": "${get_path_from_repo_root()}" -# #--- -# bucket = "${local.state_bucket_prefix}-${local.account_id}" -# profile = "${local.profile}" -# key = "${trimprefix(replace(run_cmd("realpath", get_original_terragrunt_dir()), dirname(get_repo_root()), ""), "/")}" -# region = "${local.region}" -# dynamodb_table = "${local.state_table_name}" -# skip_bucket_root_access = true -# skip_bucket_ssencryption = true -# skip_bucket_enforced_tls = true -# } -# } - # --------------------------------------------------------------------------------------------------------------------- # GLOBAL PARAMETERS # These variables apply to all configurations in this subfolder. These are automatically merged into the child From 8d7cbc170861fea2c3d6b9d6a1cef9e4f9654421 Mon Sep 17 00:00:00 2001 From: mcgin314 Date: Tue, 17 Sep 2024 11:55:02 -0400 Subject: [PATCH 7/7] Modifications to state management --- ...{terragrunt.hcl.disable => terragrunt.hcl} | 0 project-x-infra-live/terragrunt.hcl | 19 +++++++++++-------- 2 files changed, 11 insertions(+), 8 deletions(-) rename project-x-infra-live/development/us-gov-east-1/vpc/platform-test-x/eks-kiali.disable/{terragrunt.hcl.disable => terragrunt.hcl} (100%) diff --git a/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-x/eks-kiali.disable/terragrunt.hcl.disable b/project-x-infra-live/development/us-gov-east-1/vpc/platform-test-x/eks-kiali.disable/terragrunt.hcl similarity index 100% rename from project-x-infra-live/development/us-gov-east-1/vpc/platform-test-x/eks-kiali.disable/terragrunt.hcl.disable rename to project-x-infra-live/development/us-gov-east-1/vpc/platform-test-x/eks-kiali.disable/terragrunt.hcl diff --git a/project-x-infra-live/terragrunt.hcl b/project-x-infra-live/terragrunt.hcl index 1c510ae..504e9d4 100644 --- a/project-x-infra-live/terragrunt.hcl +++ b/project-x-infra-live/terragrunt.hcl @@ -83,14 +83,17 @@ remote_state { if_exists = "overwrite_terragrunt" } config = { - bucket = "${local.state_bucket_prefix}-${local.account_id}" - dynamodb_table = "${local.state_table_name}" - key = "${trimprefix(replace(run_cmd("realpath", get_original_terragrunt_dir()), dirname(get_repo_root()), ""), "/")}" - profile = "${local.profile}" - region = "${local.region}" - skip_bucket_enforced_tls = true - skip_bucket_root_access = true - skip_bucket_ssencryption = true + bucket = "${local.state_bucket_prefix}-${local.account_id}" + dynamodb_table = "${local.state_table_name}" + key = "${trimprefix(replace(run_cmd("realpath", get_original_terragrunt_dir()), dirname(get_repo_root()), ""), "/")}/terraform.tfstate" + profile = "${local.profile}" + region = "${local.region}" + skip_bucket_enforced_tls = true # use only if you need to access the S3 bucket without TLS being enforced + skip_bucket_public_access_blocking = true + skip_bucket_root_access = true # use only if the AWS account root user should not have access to the remote state bucket for some reason + skip_bucket_ssencryption = true # use only if non-encrypted OpenTofu/Terraform State is required and/or the object store does not support server-side encryption + skip_bucket_versioning = false # use only if the object store does not support versioning + enable_lock_table_ssencryption = false # use only if non-encrypted DynamoDB Lock Table for the OpenTofu/Terraform State is required and/or the NoSQL database service does not support server-side encryption } }