Skip to content

Commit

Permalink
add promethus
Browse files Browse the repository at this point in the history
  • Loading branch information
morga471 committed Jul 22, 2024
1 parent b536f94 commit 27f6889
Show file tree
Hide file tree
Showing 3 changed files with 187 additions and 0 deletions.
55 changes: 55 additions & 0 deletions lab/us-gov-east-1/vpc/_mcmCluster/eks-promethus/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
## eks-prometheus
This module deploys EKS kubeenetes prometheus inside existing EKS cluster. Prometheus is an open-source systems monitoring and alerting tool.
This module consisits of 4 components. It creates prometheus namespace and copies image repositories for the following components from quay.io into local account ECR repository. It deploys these components using helm charts using the configured ECR repositories.
1. prometheus-alert-manager
2. prometheus-node-exporter
3. prometheus-pushgateway
4. prometheus-server

### Dependencies
This module is dependent on EKS module (eks). The cluster should exist already for this module to work.

### Inputs
cluster_name
profile
prometheus_chart_version
prometheus_server_tag
prometheus_config_reloader_tag
alertmanager_tag
kube_state_metrics_tag
node_exporter_tag
pushgateway_tag
rwo_storage_class

### Outputs
alertmanager_internal_endpoint
alertmanager_headless_internal_endpoint
pushgateway_internal_endpoint
prometheus_server_internal_endpoint

### Issues observed/fixed
1. The rwo_storage_class value had to be updated from "gp3" to "gp3-encrypted"
2. The node_exporter_tag value had to be updated from "1.6.1" to "v1.8.1"
3. The kube_state_metrics_tag value had to be updated from "2.10.0" to "v2.6.0"
4. The alertmanager_tag value had to be updated from
5. The helm chart set config for the ecr image had to be split into 2 components, one for registry and other for repository as an example mentioned below:
```
set {
name = "kube-state-metrics.image.registry"
value = module.images.images[local.ksm_key].dest_registry
}
set {
name = "kube-state-metrics.image.repository"
value = module.images.images[local.ksm_key].dest_repository
}
```
6. In some other cases the image ecr repository had to be split by the colon separatory (:)
```
set {
name = "alertmanager.configmapReload.image.repository"
value = split(":", module.images.images[local.prom_config_reload_key].dest_full_path)[0]
}
```



47 changes: 47 additions & 0 deletions lab/us-gov-east-1/vpc/_mcmCluster/eks-promethus/terragrunt.hcl
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
terraform {
source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-prometheus.git?ref=platform-changes"
extra_arguments "retry_lock" {
commands = get_terraform_commands_that_need_locking()
arguments = ["-lock-timeout=20m"]
}
}

dependency "eks" {
config_path = "../eks"
}

inputs = {
profile = dependency.eks.inputs.profile
cluster_name = dependency.eks.inputs.cluster_name
region = dependency.eks.inputs.region
}

generate "provider-eks" {
path = "provider.tf"
if_exists = "overwrite_terragrunt"
contents = <<EOF
data "aws_eks_cluster" "cluster" {
name = "${dependency.eks.inputs.cluster_name}"
}
data "aws_eks_cluster_auth" "cluster" {
name = "${dependency.eks.inputs.cluster_name}"
}
provider "kubernetes" {
host = data.aws_eks_cluster.cluster.endpoint
cluster_ca_certificate = base64decode(data.aws_eks_cluster.cluster.certificate_authority[0].data)
token = data.aws_eks_cluster_auth.cluster.token
}
provider "helm" {
kubernetes {
host = data.aws_eks_cluster.cluster.endpoint
cluster_ca_certificate = base64decode(data.aws_eks_cluster.cluster.certificate_authority[0].data)
token = data.aws_eks_cluster_auth.cluster.token
}
}
EOF
}
85 changes: 85 additions & 0 deletions lab/us-gov-east-1/vpc/_mcmCluster/eks-slim/terragrunt.hcl
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
include "root" {
path = find_in_parent_folders()
expose = true
}

locals {
# In which AWS region are operations being performed
vpc_name = "vpc3-lab-dev"
cluster_name = "eks-slim-cluster"
cluster_version = 1.29
domain = "dev.lab.csp2.census.gov"
eks_instance_disk_size = 40
eks_vpc_name = "vpc3-lab-dev"
eks_ng_desired_size = 1
eks_ng_max_size = 4
eks_ng_min_size = 1
operators_ns = "operators"
enable_cluster_creator_admin_permissions = true
cluster_endpoint_public_access = true
profile = "224384469011-lab-dev-gov"

# Tags applied to AWS objects created
tags = {
"Environment" = "dev"
"slim:schedule" = "8:00-17:00"
"cluster:size" = "min:${local.eks_ng_min_size}-max:${local.eks_ng_max_size}-desired:${local.eks_ng_desired_size}"
}

aws_auth_roles = [
{
rolearn : "arn:aws-us-gov:iam::224384469011:role/AWSReservedSSO_inf-admin-t3_b200ae7af469cdc8"
aws_rolename : ""
username : "admin"
groups = ["system:masters"]
},
{
rolearn : "arn:aws-us-gov:iam::224384469011:role/AWSReservedSSO_inf-admin-t2_f3912d726991bbfa"
aws_rolename : ""
username : "admin"
groups = ["system:masters"]
}
]
}

terraform {
source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-eks.git"
extra_arguments "retry_lock" {
commands = get_terraform_commands_that_need_locking()
arguments = ["-lock-timeout=20m"]
}
}

remote_state {
backend = "s3"
generate = {
path = "backend.tf"
if_exists = "overwrite_terragrunt"
}
config = {
bucket = "tg-infrastructure-tf-state-lab-dev-ew-us-gov-east-1"
key = "eks-slim-cluster/terraform.tfstate"
region = "us-gov-east-1"
encrypt = true
#dynamodb_table = "my-lock-table"
}
}

inputs = {
profile = local.profile
vpc_name = local.eks_vpc_name
cluster_name = local.cluster_name
cluster_version = local.cluster_version
eks_instance_disk_size = local.eks_instance_disk_size
eks_vpc_name = local.eks_vpc_name
#eks_instance_types = local.eks_instance_types
eks_ng_desired_size = local.eks_ng_desired_size
eks_ng_max_size = local.eks_ng_max_size
eks_ng_min_size = local.eks_ng_min_size
operators_ns = local.operators_ns
enable_cluster_creator_admin_permissions = local.enable_cluster_creator_admin_permissions
cluster_endpoint_public_access = local.cluster_endpoint_public_access
tags = local.tags
aws_auth_roles = local.aws_auth_roles
domain = local.domain
}

0 comments on commit 27f6889

Please sign in to comment.