diff --git a/lab/us-gov-east-1/vpc/_mcmCluster/eks-promethus/README.md b/lab/us-gov-east-1/vpc/_mcmCluster/eks-promethus/README.md new file mode 100644 index 0000000..f4b317e --- /dev/null +++ b/lab/us-gov-east-1/vpc/_mcmCluster/eks-promethus/README.md @@ -0,0 +1,55 @@ +## eks-prometheus +This module deploys EKS kubeenetes prometheus inside existing EKS cluster. Prometheus is an open-source systems monitoring and alerting tool. +This module consisits of 4 components. It creates prometheus namespace and copies image repositories for the following components from quay.io into local account ECR repository. It deploys these components using helm charts using the configured ECR repositories. + 1. prometheus-alert-manager + 2. prometheus-node-exporter + 3. prometheus-pushgateway + 4. prometheus-server + +### Dependencies +This module is dependent on EKS module (eks). The cluster should exist already for this module to work. + +### Inputs + cluster_name + profile + prometheus_chart_version + prometheus_server_tag + prometheus_config_reloader_tag + alertmanager_tag + kube_state_metrics_tag + node_exporter_tag + pushgateway_tag + rwo_storage_class + +### Outputs + alertmanager_internal_endpoint + alertmanager_headless_internal_endpoint + pushgateway_internal_endpoint + prometheus_server_internal_endpoint + +### Issues observed/fixed +1. The rwo_storage_class value had to be updated from "gp3" to "gp3-encrypted" +2. The node_exporter_tag value had to be updated from "1.6.1" to "v1.8.1" +3. The kube_state_metrics_tag value had to be updated from "2.10.0" to "v2.6.0" +4. The alertmanager_tag value had to be updated from +5. The helm chart set config for the ecr image had to be split into 2 components, one for registry and other for repository as an example mentioned below: + ``` + set { + name = "kube-state-metrics.image.registry" + value = module.images.images[local.ksm_key].dest_registry + } + set { + name = "kube-state-metrics.image.repository" + value = module.images.images[local.ksm_key].dest_repository + } + ``` +6. In some other cases the image ecr repository had to be split by the colon separatory (:) + ``` + set { + name = "alertmanager.configmapReload.image.repository" + value = split(":", module.images.images[local.prom_config_reload_key].dest_full_path)[0] + } + ``` + + + diff --git a/lab/us-gov-east-1/vpc/_mcmCluster/eks-promethus/terragrunt.hcl b/lab/us-gov-east-1/vpc/_mcmCluster/eks-promethus/terragrunt.hcl new file mode 100644 index 0000000..c6f2572 --- /dev/null +++ b/lab/us-gov-east-1/vpc/_mcmCluster/eks-promethus/terragrunt.hcl @@ -0,0 +1,47 @@ +terraform { + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-prometheus.git?ref=platform-changes" + extra_arguments "retry_lock" { + commands = get_terraform_commands_that_need_locking() + arguments = ["-lock-timeout=20m"] + } +} + +dependency "eks" { + config_path = "../eks" +} + +inputs = { + profile = dependency.eks.inputs.profile + cluster_name = dependency.eks.inputs.cluster_name + region = dependency.eks.inputs.region +} + +generate "provider-eks" { + path = "provider.tf" + if_exists = "overwrite_terragrunt" + contents = <