From 63c38f7a3a379a99134bfa0f02f76e17cb14d14a Mon Sep 17 00:00:00 2001 From: "Matthew C. Morgan" Date: Fri, 7 Mar 2025 14:43:22 -0500 Subject: [PATCH 01/42] refactor chart values --- main.tf | 109 ++++++++++++++++++++++++++ values/loki.yml.tpl | 185 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 294 insertions(+) diff --git a/main.tf b/main.tf index 5be6fda..7753fdc 100644 --- a/main.tf +++ b/main.tf @@ -53,6 +53,11 @@ resource "helm_release" "loki" { provisioner_image_tag = module.images.images[local.provisioner_key].tag gateway_image_repository = module.images.images[local.gateway_key].dest_repository gateway_image_tag = module.images.images[local.gateway_key].tag +<<<<<<< HEAD +======= + canary_image_repository = module.images.images[local.canary_key].dest_repository + canary_image_tag = module.images.images[local.canary_key].tag +>>>>>>> 98921ea (refactor chart values) sidecar_image_repository = split(":", module.images.images[local.sidecar_key].dest_full_path)[0] sidecar_image_tag = module.images.images[local.sidecar_key].tag memcached_image_repository = split(":", module.images.images[local.memcached_key].dest_full_path)[0] @@ -69,3 +74,107 @@ resource "helm_release" "loki" { }) ] } + + +# # Storage-related dynamic configurations +# set { +# name = "loki.storage.bucketNames.chunks" +# value = module.loki_s3.s3_requested_bucket_name +# } +# set { +# name = "loki.storage.bucketNames.ruler" +# value = module.loki_s3.s3_requested_bucket_name +# } +# set { +# name = "loki.storage.bucketNames.admin" +# value = module.loki_s3.s3_requested_bucket_name +# } +# set { +# name = "loki.storage.type" +# value = "s3" +# } +# set { +# name = "loki.storage.s3.s3" +# value = format("s3://%v", var.region) +# } +# set { +# name = "loki.storage.s3.region" +# value = var.region +# } +# set { +# name = "loki.storage_config.aws.s3" +# value = format("s3://%v/%v", +# var.region, +# module.loki_s3.s3_requested_bucket_name +# ) +# } + +# # Storage class configurations +# set { +# name = "write.persistence.storageClass" +# value = var.rwo_storage_class +# } +# set { +# name = "backend.persistence.storageClass" +# value = var.rwo_storage_class +# } +# set { +# name = "read.persistence.storageClass" +# value = var.rwo_storage_class +# } + +# # Image configurations for additional components +# set { +# name = "loki.provisioner.image.repository" +# value = split(":", module.images.images[local.provisioner_key].dest_full_path)[0] +# } +# set { +# name = "loki.provisioner.image.tag" +# value = module.images.images[local.provisioner_key].tag +# } + +# set { +# name = "gateway.image.repository" +# value = module.images.images[local.gateway_key].dest_repository +# } +# set { +# name = "gateway.image.tag" +# value = module.images.images[local.gateway_key].tag +# } + +# set { +# name = "lokiCanary.image.repository" +# value = module.images.images[local.canary_key].dest_repository +# } +# set { +# name = "lokiCanary.image.tag" +# value = module.images.images[local.canary_key].tag +# } + +# set { +# name = "sidecar.image.repository" +# value = split(":", module.images.images[local.sidecar_key].dest_full_path)[0] +# } +# set { +# name = "sidecar.image.tag" +# value = module.images.images[local.sidecar_key].tag +# } + +# set { +# name = "memcached.image.repository" +# value = split(":", module.images.images[local.memcached_key].dest_full_path)[0] +# } +# set { +# name = "memcached.image.tag" +# value = module.images.images[local.memcached_key].tag +# } + +# set { +# name = "memcachedExporter.image.repository" +# value = split(":", module.images.images[local.exporter_key].dest_full_path)[0] +# } +# set { +# name = "memcachedExporter.image.tag" +# value = module.images.images[local.exporter_key].tag +# } +# } diff --git a/values/loki.yml.tpl b/values/loki.yml.tpl index 2a89401..442086f 100644 --- a/values/loki.yml.tpl +++ b/values/loki.yml.tpl @@ -8,6 +8,7 @@ global: # Main Loki configuration loki: +<<<<<<< HEAD auth_enabled: false image: repository: ${loki_image_repository} @@ -22,11 +23,35 @@ loki: max_streams_per_user: 1000 query_timeout: 300s retention_period: 2160h +======= + image: + repository: ${loki_image_repository} + tag: ${loki_image_tag} + auth_enabled: false + analytics: + reporting_enabled: true + use_thanos_objstore: true + # Storage configuration + storage: + type: s3 + bucketNames: + chunks: ${s3_bucket_name} + ruler: ${s3_bucket_name} + admin: ${s3_bucket_name} + s3: + s3: s3://${region} + region: ${region} + storage_config: + aws: + s3: s3://${region}/${s3_bucket_name} + +>>>>>>> 98921ea (refactor chart values) # Provisioner settings provisioner: image: repository: ${provisioner_image_repository} tag: ${provisioner_image_tag} +<<<<<<< HEAD querier: max_concurrent: 4 replication_factor: 1 @@ -56,10 +81,29 @@ loki: backend: +======= + + schemaConfig: + configs: + - from: 2024-04-01 + index: + period: 24h + prefix: index_ + object_store: s3 + schema: v13 + store: tsdb + +backend: + persistence: + enabled: true + storageClass: ${rwo_storage_class} + replicas: 1 +>>>>>>> 98921ea (refactor chart values) autoscaling: enabled: true minReplicas: 1 targetCPUUtilizationPercentage: 80 +<<<<<<< HEAD persistence: storageClass: ${rwo_storage_class} replicas: 3 @@ -80,6 +124,31 @@ compactor: replicas: 0 distributer: replicas: 0 +======= + resources: + requests: + cpu: 1m + memory: 1Mi + limits: + cpu: 200m + memory: 256Mi + +compactor: + working_directory: /loki/compactor + shared_store: s3 + compaction_interval: 10m + retention_enabled: true + retention_delete_delay: 2h + retention_delete_worker_count: 150 + resources: + requests: + cpu: 1m + memory: 1Mi + limits: + cpu: 200m + memory: 256Mi + +>>>>>>> 98921ea (refactor chart values) gateway: image: repository: ${gateway_image_repository} @@ -87,6 +156,7 @@ gateway: autoscaling: enabled: true targetCPUUtilizationPercentage: 80 +<<<<<<< HEAD replicas: 1 resources: requests: @@ -116,10 +186,67 @@ queryFrontend: queryScheduler: replicas: 0 read: +======= + resources: + requests: + cpu: 1m + memory: 1Mi + limits: + cpu: 100m + memory: 128Mi + +lokiCanary: + enabled: false + image: + repository: ${canary_image_repository} + tag: ${canary_image_tag} + resources: + requests: + cpu: 1m + memory: 1Mi + limits: + cpu: 100m + memory: 128Mi + +memcached: + chunk_cache: + enabled: true + results_cache: + enabled: true + image: + repository: ${memcached_image_repository} + tag: ${memcached_image_tag} + resources: + requests: + cpu: 1m + memory: 1Mi + limits: + cpu: 200m + memory: 256Mi + +memcachedExporter: + image: + repository: ${exporter_image_repository} + tag: ${exporter_image_tag} + resources: + requests: + cpu: 1m + memory: 1Mi + limits: + cpu: 200m + memory: 256Mi + +read: + persistence: + enabled: true + storageClass: ${rwo_storage_class} + replicas: 1 +>>>>>>> 98921ea (refactor chart values) autoscaling: enabled: true minReplicas: 1 targetCPUUtilizationPercentage: 80 +<<<<<<< HEAD replicas: 3 resources: requests: @@ -135,12 +262,32 @@ ruler: serviceAccount: annotations: eks.amazonaws.com/role-arn: ${iam_role_arn} +======= + resources: + requests: + cpu: 1m + memory: 1Mi + limits: + cpu: 200m + memory: 256Mi + +ruler: + resources: + requests: + cpu: 1m + memory: 1Mi + limits: + cpu: 200m + memory: 256Mi + +>>>>>>> 98921ea (refactor chart values) sidecar: image: repository: ${sidecar_image_repository} tag: ${sidecar_image_tag} resources: requests: +<<<<<<< HEAD cpu: 10m memory: 128Mi limits: @@ -154,14 +301,52 @@ write: persistence: storageClass: ${rwo_storage_class} replicas: 3 +======= + cpu: 1m + memory: 1Mi + limits: + cpu: 200m + memory: 256Mi + + +memberlist: + service: + publishNotReadyAddresses: false + +serviceAccount: + annotations: + eks.amazonaws.com/role-arn: ${iam_role_arn} + +test: + enabled: false + +write: + persistence: + enabled: true + storageClass: ${rwo_storage_class} + replicas: 1 +>>>>>>> 98921ea (refactor chart values) autoscaling: enabled: true minReplicas: 1 targetCPUUtilizationPercentage: 80 resources: requests: +<<<<<<< HEAD cpu: 10m memory: 128Mi limits: cpu: 1000m memory: 512Mi +======= + cpu: 1m + memory: 1Mi + limits: + cpu: 200m + memory: 256Mi + extraVolumesMounts: + - name: data + mountPath: /loki + extraVolumes: + - name: loki +>>>>>>> 98921ea (refactor chart values) From c5d4e86f8c2a5327e7fda5bdfa80506ed96ab39f Mon Sep 17 00:00:00 2001 From: "Matthew C. Morgan" Date: Fri, 7 Mar 2025 14:47:44 -0500 Subject: [PATCH 02/42] cleanup --- main.tf | 104 -------------------------------------------------------- 1 file changed, 104 deletions(-) diff --git a/main.tf b/main.tf index 7753fdc..c12ea2b 100644 --- a/main.tf +++ b/main.tf @@ -74,107 +74,3 @@ resource "helm_release" "loki" { }) ] } - - -# # Storage-related dynamic configurations -# set { -# name = "loki.storage.bucketNames.chunks" -# value = module.loki_s3.s3_requested_bucket_name -# } -# set { -# name = "loki.storage.bucketNames.ruler" -# value = module.loki_s3.s3_requested_bucket_name -# } -# set { -# name = "loki.storage.bucketNames.admin" -# value = module.loki_s3.s3_requested_bucket_name -# } -# set { -# name = "loki.storage.type" -# value = "s3" -# } -# set { -# name = "loki.storage.s3.s3" -# value = format("s3://%v", var.region) -# } -# set { -# name = "loki.storage.s3.region" -# value = var.region -# } -# set { -# name = "loki.storage_config.aws.s3" -# value = format("s3://%v/%v", -# var.region, -# module.loki_s3.s3_requested_bucket_name -# ) -# } - -# # Storage class configurations -# set { -# name = "write.persistence.storageClass" -# value = var.rwo_storage_class -# } -# set { -# name = "backend.persistence.storageClass" -# value = var.rwo_storage_class -# } -# set { -# name = "read.persistence.storageClass" -# value = var.rwo_storage_class -# } - -# # Image configurations for additional components -# set { -# name = "loki.provisioner.image.repository" -# value = split(":", module.images.images[local.provisioner_key].dest_full_path)[0] -# } -# set { -# name = "loki.provisioner.image.tag" -# value = module.images.images[local.provisioner_key].tag -# } - -# set { -# name = "gateway.image.repository" -# value = module.images.images[local.gateway_key].dest_repository -# } -# set { -# name = "gateway.image.tag" -# value = module.images.images[local.gateway_key].tag -# } - -# set { -# name = "lokiCanary.image.repository" -# value = module.images.images[local.canary_key].dest_repository -# } -# set { -# name = "lokiCanary.image.tag" -# value = module.images.images[local.canary_key].tag -# } - -# set { -# name = "sidecar.image.repository" -# value = split(":", module.images.images[local.sidecar_key].dest_full_path)[0] -# } -# set { -# name = "sidecar.image.tag" -# value = module.images.images[local.sidecar_key].tag -# } - -# set { -# name = "memcached.image.repository" -# value = split(":", module.images.images[local.memcached_key].dest_full_path)[0] -# } -# set { -# name = "memcached.image.tag" -# value = module.images.images[local.memcached_key].tag -# } - -# set { -# name = "memcachedExporter.image.repository" -# value = split(":", module.images.images[local.exporter_key].dest_full_path)[0] -# } -# set { -# name = "memcachedExporter.image.tag" -# value = module.images.images[local.exporter_key].tag -# } -# } From 74b16949826163a9faa6b57832b69e3978e452e5 Mon Sep 17 00:00:00 2001 From: "Matthew C. Morgan" Date: Fri, 7 Mar 2025 15:52:32 -0500 Subject: [PATCH 03/42] kill the canary --- main.tf | 5 ----- values/loki.yml.tpl | 14 -------------- 2 files changed, 19 deletions(-) diff --git a/main.tf b/main.tf index c12ea2b..5be6fda 100644 --- a/main.tf +++ b/main.tf @@ -53,11 +53,6 @@ resource "helm_release" "loki" { provisioner_image_tag = module.images.images[local.provisioner_key].tag gateway_image_repository = module.images.images[local.gateway_key].dest_repository gateway_image_tag = module.images.images[local.gateway_key].tag -<<<<<<< HEAD -======= - canary_image_repository = module.images.images[local.canary_key].dest_repository - canary_image_tag = module.images.images[local.canary_key].tag ->>>>>>> 98921ea (refactor chart values) sidecar_image_repository = split(":", module.images.images[local.sidecar_key].dest_full_path)[0] sidecar_image_tag = module.images.images[local.sidecar_key].tag memcached_image_repository = split(":", module.images.images[local.memcached_key].dest_full_path)[0] diff --git a/values/loki.yml.tpl b/values/loki.yml.tpl index 442086f..9cf48dc 100644 --- a/values/loki.yml.tpl +++ b/values/loki.yml.tpl @@ -197,16 +197,6 @@ read: lokiCanary: enabled: false - image: - repository: ${canary_image_repository} - tag: ${canary_image_tag} - resources: - requests: - cpu: 1m - memory: 1Mi - limits: - cpu: 100m - memory: 128Mi memcached: chunk_cache: @@ -309,10 +299,6 @@ write: memory: 256Mi -memberlist: - service: - publishNotReadyAddresses: false - serviceAccount: annotations: eks.amazonaws.com/role-arn: ${iam_role_arn} From 2308742cf39b2a0ccaa2faaa77eb25852f81f564 Mon Sep 17 00:00:00 2001 From: "Matthew C. Morgan" Date: Fri, 7 Mar 2025 18:04:47 -0500 Subject: [PATCH 04/42] unified config --- README.md | 2 - copy_images.tf | 22 +------ main.tf | 4 -- values/loki.yml.tpl | 137 +++++++++++++++++++++++++++++--------------- variables.tf | 12 ---- 5 files changed, 92 insertions(+), 85 deletions(-) diff --git a/README.md b/README.md index efb515c..5b7ff48 100644 --- a/README.md +++ b/README.md @@ -50,11 +50,9 @@ to loki. |------|-------------|------|---------|:--------:| | [cluster\_name](#input\_cluster\_name) | EKS cluster name name component used through out the EKS cluster describing its purpose (ex: dice-dev) | `string` | n/a | yes | | [enterprise\_logs\_provisioner\_tag](#input\_enterprise\_logs\_provisioner\_tag) | The version of the grafana/enterprise-logs-provisioner image to use. | `string` | `"v1.7.0"` | no | -| [exporter\_tag](#input\_exporter\_tag) | The version of prom/memcached-exporter to use for the gateway. | `string` | `"v0.14.4"` | no | | [gateway\_tag](#input\_gateway\_tag) | The version of nginxinc/nginx-unprivileged to use for the gateway. | `string` | `"1.25.2-alpine"` | no | | [loki\_chart\_version](#input\_loki\_chart\_version) | Which version of the grafana/loki helm chart to use. | `string` | `"6.10.2"` | no | | [loki\_tag](#input\_loki\_tag) | The tag of the loki image to use. | `string` | `"3.1.1"` | no | -| [memcached\_tag](#input\_memcached\_tag) | The version of memcached to use for the gateway. | `string` | `"1.6.23-alpine"` | no | | [namespace](#input\_namespace) | The namespace into which grafana will be deployed | `string` | `"loki"` | no | | [oidc\_provider\_arn](#input\_oidc\_provider\_arn) | The ARN in the EKS cluster for the OpenID Connect identity provider. | `string` | n/a | yes | | [profile](#input\_profile) | AWS config profile used to upload images into ECR | `string` | `""` | no | diff --git a/copy_images.tf b/copy_images.tf index b1547be..b3e1517 100644 --- a/copy_images.tf +++ b/copy_images.tf @@ -1,9 +1,7 @@ locals { - exporter_key = format("%v#%v", "prom/memcached-exporter", var.exporter_tag) - gateway_key = format("%v#%v", "grafana/nginx-unprivileged", var.gateway_tag) loki_key = format("%v#%v", "grafana/loki", var.loki_tag) - memcached_key = format("%v#%v", "memcached", var.memcached_tag) provisioner_key = format("%v#%v", "grafana/enterprise-logs-provisioner", var.enterprise_logs_provisioner_tag) + gateway_key = format("%v#%v", "grafana/nginx-unprivileged", var.gateway_tag) sidecar_key = format("%v#%v", "kiwigrid/k8s-sidecar", var.sidecar_tag) image_config = [ @@ -16,24 +14,6 @@ locals { source_tag = var.loki_tag tag = var.loki_tag }, - { - enabled = true - dest_path = null - name = "memcached" - source_image = "bitnami/memcached" - source_registry = "public.ecr.aws" - source_tag = var.memcached_tag - tag = var.memcached_tag - }, - { - enabled = true - dest_path = null - name = "prom/memcached-exporter" - source_image = "prom/memcached-exporter" - source_registry = "docker.io" - source_tag = var.exporter_tag - tag = var.exporter_tag - }, { enabled = true dest_path = null diff --git a/main.tf b/main.tf index 5be6fda..77704a1 100644 --- a/main.tf +++ b/main.tf @@ -55,10 +55,6 @@ resource "helm_release" "loki" { gateway_image_tag = module.images.images[local.gateway_key].tag sidecar_image_repository = split(":", module.images.images[local.sidecar_key].dest_full_path)[0] sidecar_image_tag = module.images.images[local.sidecar_key].tag - memcached_image_repository = split(":", module.images.images[local.memcached_key].dest_full_path)[0] - memcached_image_tag = module.images.images[local.memcached_key].tag - exporter_image_repository = split(":", module.images.images[local.exporter_key].dest_full_path)[0] - exporter_image_tag = module.images.images[local.exporter_key].tag # Storage configuration s3_bucket_name = module.loki_s3.s3_requested_bucket_name region = var.region diff --git a/values/loki.yml.tpl b/values/loki.yml.tpl index 9cf48dc..f0750c4 100644 --- a/values/loki.yml.tpl +++ b/values/loki.yml.tpl @@ -8,6 +8,7 @@ global: # Main Loki configuration loki: +<<<<<<< HEAD <<<<<<< HEAD auth_enabled: false image: @@ -24,33 +25,49 @@ loki: query_timeout: 300s retention_period: 2160h ======= +======= + analytics: + reporting_enabled: true + auth_enabled: false +>>>>>>> 54f31cf (unified config) image: repository: ${loki_image_repository} tag: ${loki_image_tag} - auth_enabled: false - analytics: - reporting_enabled: true - use_thanos_objstore: true + ingester: + chunk_encoding: snappy + limits_config: + allow_structured_metadata: true + query_timeout: 30s + max_streams_per_user: 1000 + max_query_parallelism: 32 + max_query_length: 2160h + ingestion_rate_strategy: local + split_queries_by_interval: 15m + max_cache_freshness_per_query: 10m + reject_old_samples_max_age: 168h + reject_old_samples: true + retention_period: 2160h # Storage configuration - storage: - type: s3 - bucketNames: - chunks: ${s3_bucket_name} - ruler: ${s3_bucket_name} - admin: ${s3_bucket_name} + use_thanos_objstore: true + object_store: s3: - s3: s3://${region} + bucketName: ${s3_bucket_name} + endpoint: s3.${region}.amazonaws.com region: ${region} +<<<<<<< HEAD storage_config: aws: s3: s3://${region}/${s3_bucket_name} >>>>>>> 98921ea (refactor chart values) +======= +>>>>>>> 54f31cf (unified config) # Provisioner settings provisioner: image: repository: ${provisioner_image_repository} tag: ${provisioner_image_tag} +<<<<<<< HEAD <<<<<<< HEAD querier: max_concurrent: 4 @@ -83,6 +100,9 @@ loki: backend: ======= +======= + replication_factor: 1 +>>>>>>> 54f31cf (unified config) schemaConfig: configs: - from: 2024-04-01 @@ -92,17 +112,23 @@ backend: object_store: s3 schema: v13 store: tsdb + tracing: + enabled: true backend: +<<<<<<< HEAD persistence: enabled: true storageClass: ${rwo_storage_class} replicas: 1 >>>>>>> 98921ea (refactor chart values) +======= +>>>>>>> 54f31cf (unified config) autoscaling: enabled: true minReplicas: 1 targetCPUUtilizationPercentage: 80 +<<<<<<< HEAD <<<<<<< HEAD persistence: storageClass: ${rwo_storage_class} @@ -125,6 +151,12 @@ compactor: distributer: replicas: 0 ======= +======= + persistence: + enabled: true + storageClass: ${rwo_storage_class} + replicas: 1 +>>>>>>> 54f31cf (unified config) resources: requests: cpu: 1m @@ -132,8 +164,14 @@ distributer: limits: cpu: 200m memory: 256Mi - +bloomCompactor: + replicas: 0 +bloomGateway: + replicas: 0 +chunksCache: + enabled: false compactor: + replicas: 0 working_directory: /loki/compactor shared_store: s3 compaction_interval: 10m @@ -147,8 +185,13 @@ compactor: limits: cpu: 200m memory: 256Mi +<<<<<<< HEAD >>>>>>> 98921ea (refactor chart values) +======= +distributer: + replicas: 0 +>>>>>>> 54f31cf (unified config) gateway: image: repository: ${gateway_image_repository} @@ -194,48 +237,41 @@ read: limits: cpu: 100m memory: 128Mi - +indexGateway: + replicas: 0 +ingester: + replicas: 0 lokiCanary: enabled: false - memcached: + enabled: false chunk_cache: - enabled: true + enabled: false results_cache: - enabled: true - image: - repository: ${memcached_image_repository} - tag: ${memcached_image_tag} - resources: - requests: - cpu: 1m - memory: 1Mi - limits: - cpu: 200m - memory: 256Mi - + enabled: false memcachedExporter: - image: - repository: ${exporter_image_repository} - tag: ${exporter_image_tag} - resources: - requests: - cpu: 1m - memory: 1Mi - limits: - cpu: 200m - memory: 256Mi - + enabled: false +querier: + replicas: 0 + max_concurrent: 4 +queryFrontend: + replicas: 0 +queryScheduler: + replicas: 0 read: +<<<<<<< HEAD persistence: enabled: true storageClass: ${rwo_storage_class} replicas: 1 >>>>>>> 98921ea (refactor chart values) +======= +>>>>>>> 54f31cf (unified config) autoscaling: enabled: true minReplicas: 1 targetCPUUtilizationPercentage: 80 +<<<<<<< HEAD <<<<<<< HEAD replicas: 3 resources: @@ -253,6 +289,12 @@ serviceAccount: annotations: eks.amazonaws.com/role-arn: ${iam_role_arn} ======= +======= + persistence: + enabled: true + storageClass: ${rwo_storage_class} + replicas: 1 +>>>>>>> 54f31cf (unified config) resources: requests: cpu: 1m @@ -260,8 +302,10 @@ serviceAccount: limits: cpu: 200m memory: 256Mi - +resultsCache: + enabled: false ruler: +<<<<<<< HEAD resources: requests: cpu: 1m @@ -271,6 +315,12 @@ ruler: memory: 256Mi >>>>>>> 98921ea (refactor chart values) +======= + enabled: false +serviceAccount: + annotations: + eks.amazonaws.com/role-arn: ${iam_role_arn} +>>>>>>> 54f31cf (unified config) sidecar: image: repository: ${sidecar_image_repository} @@ -297,15 +347,10 @@ write: limits: cpu: 200m memory: 256Mi - - -serviceAccount: - annotations: - eks.amazonaws.com/role-arn: ${iam_role_arn} - +singleBinary: + replicas: 0 test: enabled: false - write: persistence: enabled: true diff --git a/variables.tf b/variables.tf index 35009fe..ad048a5 100644 --- a/variables.tf +++ b/variables.tf @@ -65,18 +65,6 @@ variable "gateway_tag" { default = "1.25.2-alpine" } -variable "memcached_tag" { - description = "The version of memcached to use for the gateway." - type = string - default = "1.6.23-alpine" -} - -variable "exporter_tag" { - description = "The version of prom/memcached-exporter to use for the gateway." - type = string - default = "v0.14.4" -} - variable "sidecar_tag" { description = "The version of kiwigrid/k8s-sidecar to use for the gateway." type = string From 20f12efde5958668de70da97657feeb00d967b25 Mon Sep 17 00:00:00 2001 From: "Matthew C. Morgan" Date: Fri, 7 Mar 2025 18:36:42 -0500 Subject: [PATCH 05/42] fix values --- values/loki.yml.tpl | 233 +++----------------------------------------- 1 file changed, 16 insertions(+), 217 deletions(-) diff --git a/values/loki.yml.tpl b/values/loki.yml.tpl index f0750c4..f0038ef 100644 --- a/values/loki.yml.tpl +++ b/values/loki.yml.tpl @@ -8,8 +8,6 @@ global: # Main Loki configuration loki: -<<<<<<< HEAD -<<<<<<< HEAD auth_enabled: false image: repository: ${loki_image_repository} @@ -24,63 +22,8 @@ loki: max_streams_per_user: 1000 query_timeout: 300s retention_period: 2160h -======= -======= analytics: reporting_enabled: true - auth_enabled: false ->>>>>>> 54f31cf (unified config) - image: - repository: ${loki_image_repository} - tag: ${loki_image_tag} - ingester: - chunk_encoding: snappy - limits_config: - allow_structured_metadata: true - query_timeout: 30s - max_streams_per_user: 1000 - max_query_parallelism: 32 - max_query_length: 2160h - ingestion_rate_strategy: local - split_queries_by_interval: 15m - max_cache_freshness_per_query: 10m - reject_old_samples_max_age: 168h - reject_old_samples: true - retention_period: 2160h - # Storage configuration - use_thanos_objstore: true - object_store: - s3: - bucketName: ${s3_bucket_name} - endpoint: s3.${region}.amazonaws.com - region: ${region} -<<<<<<< HEAD - storage_config: - aws: - s3: s3://${region}/${s3_bucket_name} - ->>>>>>> 98921ea (refactor chart values) -======= ->>>>>>> 54f31cf (unified config) - # Provisioner settings - provisioner: - image: - repository: ${provisioner_image_repository} - tag: ${provisioner_image_tag} -<<<<<<< HEAD -<<<<<<< HEAD - querier: - max_concurrent: 4 - replication_factor: 1 - schemaConfig: - configs: - - from: "2024-04-01" - store: tsdb - object_store: s3 - schema: v13 - index: - prefix: index_ - period: 24h # Storage configuration storage: type: s3 @@ -96,43 +39,16 @@ loki: tracing: enabled: true - -backend: -======= - -======= - replication_factor: 1 ->>>>>>> 54f31cf (unified config) - schemaConfig: - configs: - - from: 2024-04-01 - index: - period: 24h - prefix: index_ - object_store: s3 - schema: v13 - store: tsdb - tracing: - enabled: true - backend: -<<<<<<< HEAD - persistence: - enabled: true - storageClass: ${rwo_storage_class} - replicas: 1 ->>>>>>> 98921ea (refactor chart values) -======= ->>>>>>> 54f31cf (unified config) autoscaling: enabled: true minReplicas: 1 targetCPUUtilizationPercentage: 80 -<<<<<<< HEAD -<<<<<<< HEAD persistence: + enabled: true storageClass: ${rwo_storage_class} - replicas: 3 + replicas: 1 + replication_factor: 1 resources: requests: cpu: 10m @@ -140,6 +56,15 @@ backend: limits: cpu: 1000m memory: 512Mi + schemaConfig: + configs: + - from: 2024-04-01 + index: + period: 24h + prefix: index_ + object_store: s3 + schema: v13 + store: tsdb bloomCompactor: replicas: 0 bloomGateway: @@ -150,48 +75,6 @@ compactor: replicas: 0 distributer: replicas: 0 -======= -======= - persistence: - enabled: true - storageClass: ${rwo_storage_class} - replicas: 1 ->>>>>>> 54f31cf (unified config) - resources: - requests: - cpu: 1m - memory: 1Mi - limits: - cpu: 200m - memory: 256Mi -bloomCompactor: - replicas: 0 -bloomGateway: - replicas: 0 -chunksCache: - enabled: false -compactor: - replicas: 0 - working_directory: /loki/compactor - shared_store: s3 - compaction_interval: 10m - retention_enabled: true - retention_delete_delay: 2h - retention_delete_worker_count: 150 - resources: - requests: - cpu: 1m - memory: 1Mi - limits: - cpu: 200m - memory: 256Mi -<<<<<<< HEAD - ->>>>>>> 98921ea (refactor chart values) -======= -distributer: - replicas: 0 ->>>>>>> 54f31cf (unified config) gateway: image: repository: ${gateway_image_repository} @@ -199,7 +82,6 @@ gateway: autoscaling: enabled: true targetCPUUtilizationPercentage: 80 -<<<<<<< HEAD replicas: 1 resources: requests: @@ -229,58 +111,21 @@ queryFrontend: queryScheduler: replicas: 0 read: -======= + replicas: 1 resources: requests: - cpu: 1m - memory: 1Mi + cpu: 10m + memory: 128Mi limits: cpu: 100m memory: 128Mi -indexGateway: - replicas: 0 -ingester: - replicas: 0 -lokiCanary: - enabled: false -memcached: - enabled: false - chunk_cache: - enabled: false - results_cache: - enabled: false -memcachedExporter: - enabled: false -querier: - replicas: 0 - max_concurrent: 4 -queryFrontend: - replicas: 0 -queryScheduler: - replicas: 0 -read: -<<<<<<< HEAD persistence: enabled: true storageClass: ${rwo_storage_class} - replicas: 1 ->>>>>>> 98921ea (refactor chart values) -======= ->>>>>>> 54f31cf (unified config) autoscaling: enabled: true minReplicas: 1 targetCPUUtilizationPercentage: 80 -<<<<<<< HEAD -<<<<<<< HEAD - replicas: 3 - resources: - requests: - cpu: 10m - memory: 128Mi - limits: - cpu: 500m - memory: 512Mi resultsCache: enabled: false ruler: @@ -288,13 +133,10 @@ ruler: serviceAccount: annotations: eks.amazonaws.com/role-arn: ${iam_role_arn} -======= -======= persistence: enabled: true storageClass: ${rwo_storage_class} replicas: 1 ->>>>>>> 54f31cf (unified config) resources: requests: cpu: 1m @@ -302,32 +144,12 @@ serviceAccount: limits: cpu: 200m memory: 256Mi -resultsCache: - enabled: false -ruler: -<<<<<<< HEAD - resources: - requests: - cpu: 1m - memory: 1Mi - limits: - cpu: 200m - memory: 256Mi - ->>>>>>> 98921ea (refactor chart values) -======= - enabled: false -serviceAccount: - annotations: - eks.amazonaws.com/role-arn: ${iam_role_arn} ->>>>>>> 54f31cf (unified config) sidecar: image: repository: ${sidecar_image_repository} tag: ${sidecar_image_tag} resources: requests: -<<<<<<< HEAD cpu: 10m memory: 128Mi limits: @@ -337,41 +159,19 @@ singleBinary: replicas: 0 test: enabled: false -write: - persistence: - storageClass: ${rwo_storage_class} - replicas: 3 -======= - cpu: 1m - memory: 1Mi - limits: - cpu: 200m - memory: 256Mi -singleBinary: - replicas: 0 -test: - enabled: false write: persistence: enabled: true storageClass: ${rwo_storage_class} replicas: 1 ->>>>>>> 98921ea (refactor chart values) autoscaling: enabled: true minReplicas: 1 targetCPUUtilizationPercentage: 80 resources: requests: -<<<<<<< HEAD - cpu: 10m + cpu: 100m memory: 128Mi - limits: - cpu: 1000m - memory: 512Mi -======= - cpu: 1m - memory: 1Mi limits: cpu: 200m memory: 256Mi @@ -380,4 +180,3 @@ write: mountPath: /loki extraVolumes: - name: loki ->>>>>>> 98921ea (refactor chart values) From 24d79cc516b21c55d818dd88f72f90a7b1710802 Mon Sep 17 00:00:00 2001 From: "Matthew C. Morgan" Date: Fri, 7 Mar 2025 18:46:33 -0500 Subject: [PATCH 06/42] template error --- values/loki.yml.tpl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/values/loki.yml.tpl b/values/loki.yml.tpl index f0038ef..df77c4c 100644 --- a/values/loki.yml.tpl +++ b/values/loki.yml.tpl @@ -82,7 +82,7 @@ gateway: autoscaling: enabled: true targetCPUUtilizationPercentage: 80 - replicas: 1 + replicas: 0 resources: requests: cpu: 10m @@ -136,7 +136,7 @@ serviceAccount: persistence: enabled: true storageClass: ${rwo_storage_class} - replicas: 1 + replicas: 2 resources: requests: cpu: 1m @@ -163,7 +163,7 @@ write: persistence: enabled: true storageClass: ${rwo_storage_class} - replicas: 1 + replicas: 2 autoscaling: enabled: true minReplicas: 1 From c2972ba5457ce32992a73336a0d75ddff5912d7a Mon Sep 17 00:00:00 2001 From: "Matthew C. Morgan" Date: Fri, 7 Mar 2025 18:54:39 -0500 Subject: [PATCH 07/42] remove extras --- values/loki.yml.tpl | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/values/loki.yml.tpl b/values/loki.yml.tpl index df77c4c..62bc403 100644 --- a/values/loki.yml.tpl +++ b/values/loki.yml.tpl @@ -69,8 +69,6 @@ bloomCompactor: replicas: 0 bloomGateway: replicas: 0 -chunksCache: - enabled: false compactor: replicas: 0 distributer: @@ -82,7 +80,6 @@ gateway: autoscaling: enabled: true targetCPUUtilizationPercentage: 80 - replicas: 0 resources: requests: cpu: 10m @@ -90,20 +87,10 @@ gateway: limits: cpu: 100m memory: 128Mi -indexGateway: - replicas: 0 ingester: replicas: 0 lokiCanary: enabled: false -memcached: - enabled: false - chunk_cache: - enabled: false - results_cache: - enabled: false -memcachedExporter: - enabled: false querier: replicas: 0 queryFrontend: From 02e363dfbe6d3d1d2d230981e154bcfad59a9d44 Mon Sep 17 00:00:00 2001 From: "Matthew C. Morgan" Date: Fri, 7 Mar 2025 19:17:01 -0500 Subject: [PATCH 08/42] add some back --- values/loki.yml.tpl | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/values/loki.yml.tpl b/values/loki.yml.tpl index 62bc403..037324d 100644 --- a/values/loki.yml.tpl +++ b/values/loki.yml.tpl @@ -69,8 +69,23 @@ bloomCompactor: replicas: 0 bloomGateway: replicas: 0 +chunksCache: + enabled: false compactor: + compaction_interval: 10m replicas: 0 + resources: + requests: + cpu: 1m + memory: 1Mi + limits: + cpu: 200m + memory: 256Mi + retention_delete_delay: 2h + retention_delete_worker_count: 150 + retention_enabled: true + shared_store: s3 + working_directory: /loki/compactor distributer: replicas: 0 gateway: @@ -80,6 +95,7 @@ gateway: autoscaling: enabled: true targetCPUUtilizationPercentage: 80 + replicas: 1 resources: requests: cpu: 10m @@ -87,10 +103,20 @@ gateway: limits: cpu: 100m memory: 128Mi +indexGateway: + replicas: 0 ingester: replicas: 0 lokiCanary: enabled: false +memcached: + enabled: false + chunk_cache: + enabled: false + results_cache: + enabled: false +memcachedExporter: + enabled: false querier: replicas: 0 queryFrontend: From fa8e32b699648142f15fdd3c0849a6cdeb846c8d Mon Sep 17 00:00:00 2001 From: "Matthew C. Morgan" Date: Fri, 7 Mar 2025 19:27:01 -0500 Subject: [PATCH 09/42] add back required images --- README.md | 2 ++ copy_images.tf | 22 +++++++++++++++++++++- main.tf | 4 ++++ variables.tf | 12 ++++++++++++ 4 files changed, 39 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 5b7ff48..efb515c 100644 --- a/README.md +++ b/README.md @@ -50,9 +50,11 @@ to loki. |------|-------------|------|---------|:--------:| | [cluster\_name](#input\_cluster\_name) | EKS cluster name name component used through out the EKS cluster describing its purpose (ex: dice-dev) | `string` | n/a | yes | | [enterprise\_logs\_provisioner\_tag](#input\_enterprise\_logs\_provisioner\_tag) | The version of the grafana/enterprise-logs-provisioner image to use. | `string` | `"v1.7.0"` | no | +| [exporter\_tag](#input\_exporter\_tag) | The version of prom/memcached-exporter to use for the gateway. | `string` | `"v0.14.4"` | no | | [gateway\_tag](#input\_gateway\_tag) | The version of nginxinc/nginx-unprivileged to use for the gateway. | `string` | `"1.25.2-alpine"` | no | | [loki\_chart\_version](#input\_loki\_chart\_version) | Which version of the grafana/loki helm chart to use. | `string` | `"6.10.2"` | no | | [loki\_tag](#input\_loki\_tag) | The tag of the loki image to use. | `string` | `"3.1.1"` | no | +| [memcached\_tag](#input\_memcached\_tag) | The version of memcached to use for the gateway. | `string` | `"1.6.23-alpine"` | no | | [namespace](#input\_namespace) | The namespace into which grafana will be deployed | `string` | `"loki"` | no | | [oidc\_provider\_arn](#input\_oidc\_provider\_arn) | The ARN in the EKS cluster for the OpenID Connect identity provider. | `string` | n/a | yes | | [profile](#input\_profile) | AWS config profile used to upload images into ECR | `string` | `""` | no | diff --git a/copy_images.tf b/copy_images.tf index b3e1517..b1547be 100644 --- a/copy_images.tf +++ b/copy_images.tf @@ -1,7 +1,9 @@ locals { + exporter_key = format("%v#%v", "prom/memcached-exporter", var.exporter_tag) + gateway_key = format("%v#%v", "grafana/nginx-unprivileged", var.gateway_tag) loki_key = format("%v#%v", "grafana/loki", var.loki_tag) + memcached_key = format("%v#%v", "memcached", var.memcached_tag) provisioner_key = format("%v#%v", "grafana/enterprise-logs-provisioner", var.enterprise_logs_provisioner_tag) - gateway_key = format("%v#%v", "grafana/nginx-unprivileged", var.gateway_tag) sidecar_key = format("%v#%v", "kiwigrid/k8s-sidecar", var.sidecar_tag) image_config = [ @@ -14,6 +16,24 @@ locals { source_tag = var.loki_tag tag = var.loki_tag }, + { + enabled = true + dest_path = null + name = "memcached" + source_image = "bitnami/memcached" + source_registry = "public.ecr.aws" + source_tag = var.memcached_tag + tag = var.memcached_tag + }, + { + enabled = true + dest_path = null + name = "prom/memcached-exporter" + source_image = "prom/memcached-exporter" + source_registry = "docker.io" + source_tag = var.exporter_tag + tag = var.exporter_tag + }, { enabled = true dest_path = null diff --git a/main.tf b/main.tf index 77704a1..5be6fda 100644 --- a/main.tf +++ b/main.tf @@ -55,6 +55,10 @@ resource "helm_release" "loki" { gateway_image_tag = module.images.images[local.gateway_key].tag sidecar_image_repository = split(":", module.images.images[local.sidecar_key].dest_full_path)[0] sidecar_image_tag = module.images.images[local.sidecar_key].tag + memcached_image_repository = split(":", module.images.images[local.memcached_key].dest_full_path)[0] + memcached_image_tag = module.images.images[local.memcached_key].tag + exporter_image_repository = split(":", module.images.images[local.exporter_key].dest_full_path)[0] + exporter_image_tag = module.images.images[local.exporter_key].tag # Storage configuration s3_bucket_name = module.loki_s3.s3_requested_bucket_name region = var.region diff --git a/variables.tf b/variables.tf index ad048a5..35009fe 100644 --- a/variables.tf +++ b/variables.tf @@ -65,6 +65,18 @@ variable "gateway_tag" { default = "1.25.2-alpine" } +variable "memcached_tag" { + description = "The version of memcached to use for the gateway." + type = string + default = "1.6.23-alpine" +} + +variable "exporter_tag" { + description = "The version of prom/memcached-exporter to use for the gateway." + type = string + default = "v0.14.4" +} + variable "sidecar_tag" { description = "The version of kiwigrid/k8s-sidecar to use for the gateway." type = string From 50c67d092e4993b4e4cc2fb19db0d7b8e4c318ad Mon Sep 17 00:00:00 2001 From: "Matthew C. Morgan" Date: Fri, 7 Mar 2025 20:30:09 -0500 Subject: [PATCH 10/42] add path_prefix --- values/loki.yml.tpl | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/values/loki.yml.tpl b/values/loki.yml.tpl index 037324d..13061bd 100644 --- a/values/loki.yml.tpl +++ b/values/loki.yml.tpl @@ -9,6 +9,8 @@ global: # Main Loki configuration loki: auth_enabled: false + commonConfig: + path_prefix: /loki image: repository: ${loki_image_repository} tag: ${loki_image_tag} @@ -31,11 +33,40 @@ loki: admin: ${s3_bucket_name} chunks: ${s3_bucket_name} ruler: ${s3_bucket_name} +<<<<<<< HEAD s3: s3: s3://${region} bucketName: ${s3_bucket_name} region: ${region} s3ForcePathStyle: false +======= + admin: ${s3_bucket_name} + object_store: + s3: + bucketName: ${s3_bucket_name} + endpoint: s3.${region}.amazonaws.com + region: ${region} + use_thanos_objstore: true + pattern_ingester: + enabled: false + # Provisioner settings + provisioner: + image: + repository: ${provisioner_image_repository} + tag: ${provisioner_image_tag} + querier: + max_concurrent: 2 + replication_factor: 1 + schemaConfig: + configs: + - from: 2024-04-01 + index: + period: 24h + prefix: index_ + object_store: s3 + schema: v13 + store: tsdb +>>>>>>> 895316f (add path_prefix) tracing: enabled: true From f2938bedea814aa0dc2c57ffefaaba99e9762a58 Mon Sep 17 00:00:00 2001 From: "Matthew C. Morgan" Date: Fri, 7 Mar 2025 20:35:37 -0500 Subject: [PATCH 11/42] give more mem to backend and write --- values/loki.yml.tpl | 31 +------------------------------ 1 file changed, 1 insertion(+), 30 deletions(-) diff --git a/values/loki.yml.tpl b/values/loki.yml.tpl index 13061bd..37594c1 100644 --- a/values/loki.yml.tpl +++ b/values/loki.yml.tpl @@ -33,40 +33,11 @@ loki: admin: ${s3_bucket_name} chunks: ${s3_bucket_name} ruler: ${s3_bucket_name} -<<<<<<< HEAD s3: s3: s3://${region} bucketName: ${s3_bucket_name} region: ${region} s3ForcePathStyle: false -======= - admin: ${s3_bucket_name} - object_store: - s3: - bucketName: ${s3_bucket_name} - endpoint: s3.${region}.amazonaws.com - region: ${region} - use_thanos_objstore: true - pattern_ingester: - enabled: false - # Provisioner settings - provisioner: - image: - repository: ${provisioner_image_repository} - tag: ${provisioner_image_tag} - querier: - max_concurrent: 2 - replication_factor: 1 - schemaConfig: - configs: - - from: 2024-04-01 - index: - period: 24h - prefix: index_ - object_store: s3 - schema: v13 - store: tsdb ->>>>>>> 895316f (add path_prefix) tracing: enabled: true @@ -218,7 +189,7 @@ write: memory: 128Mi limits: cpu: 200m - memory: 256Mi + memory: 512Mi extraVolumesMounts: - name: data mountPath: /loki From 60924414747a5dbefc88aab1daf8ffd0593b0b1c Mon Sep 17 00:00:00 2001 From: "Matthew C. Morgan" Date: Mon, 10 Mar 2025 14:49:00 -0400 Subject: [PATCH 12/42] update requests resources --- values/loki.yml.tpl | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/values/loki.yml.tpl b/values/loki.yml.tpl index 37594c1..0f33551 100644 --- a/values/loki.yml.tpl +++ b/values/loki.yml.tpl @@ -76,13 +76,6 @@ chunksCache: compactor: compaction_interval: 10m replicas: 0 - resources: - requests: - cpu: 1m - memory: 1Mi - limits: - cpu: 200m - memory: 256Mi retention_delete_delay: 2h retention_delete_worker_count: 150 retention_enabled: true @@ -188,7 +181,7 @@ write: cpu: 100m memory: 128Mi limits: - cpu: 200m + cpu: 500m memory: 512Mi extraVolumesMounts: - name: data From 837b5b83ecce036ca02ac78409c6b14d6a9bc81b Mon Sep 17 00:00:00 2001 From: "Matthew C. Morgan" Date: Mon, 10 Mar 2025 15:16:52 -0400 Subject: [PATCH 13/42] more values --- values/loki.yml.tpl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/values/loki.yml.tpl b/values/loki.yml.tpl index 0f33551..570e32d 100644 --- a/values/loki.yml.tpl +++ b/values/loki.yml.tpl @@ -9,6 +9,9 @@ global: # Main Loki configuration loki: auth_enabled: false + compactor: + retention_enabled: true + delete_request_store: true commonConfig: path_prefix: /loki image: From 243268c2d0261f231c1d770c2d7afbb979660fd0 Mon Sep 17 00:00:00 2001 From: "Matthew C. Morgan" Date: Mon, 10 Mar 2025 18:36:21 -0400 Subject: [PATCH 14/42] more testing --- values/loki.yml.tpl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/values/loki.yml.tpl b/values/loki.yml.tpl index 570e32d..3f83c9f 100644 --- a/values/loki.yml.tpl +++ b/values/loki.yml.tpl @@ -14,6 +14,7 @@ loki: delete_request_store: true commonConfig: path_prefix: /loki + replication_factor: 1 image: repository: ${loki_image_repository} tag: ${loki_image_tag} @@ -184,7 +185,7 @@ write: cpu: 100m memory: 128Mi limits: - cpu: 500m + cpu: 1000m memory: 512Mi extraVolumesMounts: - name: data From db86c4a35f3981d25a2a793dc42e2b315fa9ab4e Mon Sep 17 00:00:00 2001 From: "Matthew C. Morgan" Date: Mon, 10 Mar 2025 18:42:05 -0400 Subject: [PATCH 15/42] delete store fix --- values/loki.yml.tpl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/values/loki.yml.tpl b/values/loki.yml.tpl index 3f83c9f..86fe848 100644 --- a/values/loki.yml.tpl +++ b/values/loki.yml.tpl @@ -11,7 +11,7 @@ loki: auth_enabled: false compactor: retention_enabled: true - delete_request_store: true + delete_request_store: s3 commonConfig: path_prefix: /loki replication_factor: 1 From cb477d4eeead1d9dc19898d1abc9ac8fc44b1d1e Mon Sep 17 00:00:00 2001 From: "Matthew C. Morgan" Date: Mon, 10 Mar 2025 19:22:11 -0400 Subject: [PATCH 16/42] fix volumes --- values/loki.yml.tpl | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/values/loki.yml.tpl b/values/loki.yml.tpl index 86fe848..3369e6e 100644 --- a/values/loki.yml.tpl +++ b/values/loki.yml.tpl @@ -12,8 +12,9 @@ loki: compactor: retention_enabled: true delete_request_store: s3 + working_directory: /data/loki/compactor commonConfig: - path_prefix: /loki + path_prefix: /data/loki replication_factor: 1 image: repository: ${loki_image_repository} @@ -45,6 +46,7 @@ loki: tracing: enabled: true + backend: autoscaling: enabled: true @@ -84,7 +86,11 @@ compactor: retention_delete_worker_count: 150 retention_enabled: true shared_store: s3 - working_directory: /loki/compactor + working_directory: /data/loki/compactor + persistence: + enabled: true + size: 10Gi + storageClass: ${rwo_storage_class} distributer: replicas: 0 gateway: @@ -189,6 +195,7 @@ write: memory: 512Mi extraVolumesMounts: - name: data - mountPath: /loki + mountPath: /data extraVolumes: - - name: loki + - name: data + emptyDir: {} From 5f1a044b58f0ae6d804bf661893348ff0f172128 Mon Sep 17 00:00:00 2001 From: "Matthew C. Morgan" Date: Mon, 10 Mar 2025 20:28:15 -0400 Subject: [PATCH 17/42] guess --- values/loki.yml.tpl | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/values/loki.yml.tpl b/values/loki.yml.tpl index 3369e6e..6a2b1e1 100644 --- a/values/loki.yml.tpl +++ b/values/loki.yml.tpl @@ -53,7 +53,6 @@ backend: minReplicas: 1 targetCPUUtilizationPercentage: 80 persistence: - enabled: true storageClass: ${rwo_storage_class} replicas: 1 replication_factor: 1 @@ -152,7 +151,6 @@ serviceAccount: annotations: eks.amazonaws.com/role-arn: ${iam_role_arn} persistence: - enabled: true storageClass: ${rwo_storage_class} replicas: 2 resources: @@ -179,7 +177,6 @@ test: enabled: false write: persistence: - enabled: true storageClass: ${rwo_storage_class} replicas: 2 autoscaling: @@ -197,5 +194,5 @@ write: - name: data mountPath: /data extraVolumes: - - name: data + - name: loki emptyDir: {} From 8a527e2dea4f0c5cc5b88c69c06f935b153756bb Mon Sep 17 00:00:00 2001 From: "Matthew C. Morgan" Date: Mon, 10 Mar 2025 20:52:44 -0400 Subject: [PATCH 18/42] less is more --- values/loki.yml.tpl | 25 +------------------------ 1 file changed, 1 insertion(+), 24 deletions(-) diff --git a/values/loki.yml.tpl b/values/loki.yml.tpl index 6a2b1e1..7d9623d 100644 --- a/values/loki.yml.tpl +++ b/values/loki.yml.tpl @@ -9,13 +9,6 @@ global: # Main Loki configuration loki: auth_enabled: false - compactor: - retention_enabled: true - delete_request_store: s3 - working_directory: /data/loki/compactor - commonConfig: - path_prefix: /data/loki - replication_factor: 1 image: repository: ${loki_image_repository} tag: ${loki_image_tag} @@ -79,17 +72,7 @@ bloomGateway: chunksCache: enabled: false compactor: - compaction_interval: 10m replicas: 0 - retention_delete_delay: 2h - retention_delete_worker_count: 150 - retention_enabled: true - shared_store: s3 - working_directory: /data/loki/compactor - persistence: - enabled: true - size: 10Gi - storageClass: ${rwo_storage_class} distributer: replicas: 0 gateway: @@ -178,7 +161,7 @@ test: write: persistence: storageClass: ${rwo_storage_class} - replicas: 2 + replicas: 3 autoscaling: enabled: true minReplicas: 1 @@ -190,9 +173,3 @@ write: limits: cpu: 1000m memory: 512Mi - extraVolumesMounts: - - name: data - mountPath: /data - extraVolumes: - - name: loki - emptyDir: {} From 2db0632a6b1fffe3e1c3fb17f23f6f6324c5457e Mon Sep 17 00:00:00 2001 From: "Matthew C. Morgan" Date: Fri, 21 Mar 2025 20:41:04 -0400 Subject: [PATCH 19/42] update names and resources --- main.tf | 2 +- prefixes.tf | 34 ++++++++++++++++++++++++++++++++++ values/loki.yml.tpl | 19 ++++++++++++++++--- 3 files changed, 51 insertions(+), 4 deletions(-) create mode 100644 prefixes.tf diff --git a/main.tf b/main.tf index 5be6fda..e1df774 100644 --- a/main.tf +++ b/main.tf @@ -8,7 +8,7 @@ module "loki_irsa_role" { # tflint-ignore: terraform_module_pinned_source source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-custom-iam-role-for-service-account-eks.git//?ref=main" - role_name = "r-${var.cluster_name}-loki" + role_name = format("%v%v-%v", local.prefixes["eks-role"], var.cluster_name, "-loki") attach_s3_bucket_owner_policy = true attach_encrypted_object_manager_policy = true diff --git a/prefixes.tf b/prefixes.tf new file mode 100644 index 0000000..4e2709e --- /dev/null +++ b/prefixes.tf @@ -0,0 +1,34 @@ +locals { + prefixes = { + "efs" = "v-efs-" + "s3" = "v-s3-" + "ebs" = "v-ebs-" + "kms" = "k-kms-" + "role" = "r-" + "policy" = "p-" + "group" = "g-" + "security-group" = "" # "sg-" + # VPC + "vpc" = "" + "dhcp-options" = "" + "vpc-peer" = "vpcp-" + "route-table" = "route-" + "subnet" = "" + "vpc-endpoint" = "vpce-" + "elastic-ip" = "eip-" + "nat-gateway" = "nat-" + "internet-gateway" = "igw-" + "network-acl" = "nacl-" + "customer-gateway" = "cgw-" + "vpn-gateway" = "vpcg-" + "vpn-connection" = "vpn_" + "log-group" = "lg-" + "log-stream" = "lgs-" + # EKS + "eks" = "eks-" + "eks-user" = "s-eks-" + "eks-role" = "r-eks-" + "eks-policy" = "p-eks-" + "eks-security-group" = "eks-sg-" # "sg-eks-" + } +} diff --git a/values/loki.yml.tpl b/values/loki.yml.tpl index 7d9623d..62a47ad 100644 --- a/values/loki.yml.tpl +++ b/values/loki.yml.tpl @@ -45,13 +45,26 @@ backend: enabled: true minReplicas: 1 targetCPUUtilizationPercentage: 80 + behavior: + scaleUp: + stabilizationWindowSeconds: 300 + policies: + - type: Pods + value: 1 + periodSeconds: 60 + scaleDown: + stabilizationWindowSeconds: 300 + policies: + - type: Pods + value: 1 + periodSeconds: 180 persistence: storageClass: ${rwo_storage_class} replicas: 1 replication_factor: 1 resources: requests: - cpu: 10m + cpu: 100m memory: 128Mi limits: cpu: 1000m @@ -85,7 +98,7 @@ gateway: replicas: 1 resources: requests: - cpu: 10m + cpu: 100m memory: 128Mi limits: cpu: 100m @@ -114,7 +127,7 @@ read: replicas: 1 resources: requests: - cpu: 10m + cpu: 100m memory: 128Mi limits: cpu: 100m From 771c42f1ca517c6f1274186bee643e6235d95669 Mon Sep 17 00:00:00 2001 From: "Matthew C. Morgan" Date: Fri, 21 Mar 2025 21:00:29 -0400 Subject: [PATCH 20/42] update bucket naming --- prefixes.tf | 1 + s3.tf | 14 +++++++++++--- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/prefixes.tf b/prefixes.tf index 4e2709e..e9a6127 100644 --- a/prefixes.tf +++ b/prefixes.tf @@ -26,6 +26,7 @@ locals { "log-stream" = "lgs-" # EKS "eks" = "eks-" + "eks-s3" = "v-s3-eks-" "eks-user" = "s-eks-" "eks-role" = "r-eks-" "eks-policy" = "p-eks-" diff --git a/s3.tf b/s3.tf index dd5a704..c3442c9 100644 --- a/s3.tf +++ b/s3.tf @@ -1,8 +1,9 @@ data "aws_caller_identity" "current" {} -## create bucket locals { - account_id = data.aws_caller_identity.current.account_id + account_id = data.aws_caller_identity.current.account_id + bucket_name = format("%v%v-loki-%v-%v", local.prefixes["eks-s3"], var.cluster_name, local.account_id, local.region_short) + region_short = join("", [for c in split("-", var.region) : substr(c, 0, 1)]) } data "aws_s3_bucket" "s3_server_access_logs" { @@ -12,8 +13,15 @@ data "aws_s3_bucket" "s3_server_access_logs" { module "loki_s3" { source = "git@github.e.it.census.gov:terraform-modules/aws-s3.git//standard?ref=tf-upgrade" - bucket_name = format("%v-loki", var.cluster_name) + bucket_name = local.bucket_name access_log_bucket = data.aws_s3_bucket.s3_server_access_logs.id tags = var.tags + + lifecycle { + precondition { + condition = length(local.bucket_name) >= 3 && length(local.bucket_name) <= 63 + error_message = "S3 bucket name must be between 3 and 63 characters. Current name '${local.bucket_name}' is ${length(local.bucket_name)} characters." + } + } } From a63be2ea6be1d520431b1a2432df27e7234f4a55 Mon Sep 17 00:00:00 2001 From: "Matthew C. Morgan" Date: Fri, 21 Mar 2025 21:06:51 -0400 Subject: [PATCH 21/42] nope --- README.md | 2 ++ s3.tf | 20 +++++++++++++------- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index efb515c..ea3cc8b 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,7 @@ to loki. |------|---------| | [aws](#provider\_aws) | 5.89.0 | | [helm](#provider\_helm) | 2.17.0 | +| [terraform](#provider\_terraform) | n/a | ## Modules @@ -41,6 +42,7 @@ to loki. | Name | Type | |------|------| | [helm_release.loki](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource | +| [terraform_data.bucket_name_validator](https://registry.terraform.io/providers/hashicorp/terraform/latest/docs/resources/data) | resource | | [aws_caller_identity.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/caller_identity) | data source | | [aws_s3_bucket.s3_server_access_logs](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/s3_bucket) | data source | diff --git a/s3.tf b/s3.tf index c3442c9..5a83d5a 100644 --- a/s3.tf +++ b/s3.tf @@ -10,13 +10,9 @@ data "aws_s3_bucket" "s3_server_access_logs" { bucket = format("inf-logs-%v-%v", local.account_id, var.region) } -module "loki_s3" { - source = "git@github.e.it.census.gov:terraform-modules/aws-s3.git//standard?ref=tf-upgrade" - - bucket_name = local.bucket_name - access_log_bucket = data.aws_s3_bucket.s3_server_access_logs.id - - tags = var.tags +# Validate S3 bucket name length +resource "terraform_data" "bucket_name_validator" { + input = local.bucket_name lifecycle { precondition { @@ -25,3 +21,13 @@ module "loki_s3" { } } } + +module "loki_s3" { + depends_on = [terraform_data.bucket_name_validator] + source = "git@github.e.it.census.gov:terraform-modules/aws-s3.git//standard?ref=tf-upgrade" + + bucket_name = local.bucket_name + access_log_bucket = data.aws_s3_bucket.s3_server_access_logs.id + + tags = var.tags +} From cedb4d70669ac205a92da29b4240de25838ee6b9 Mon Sep 17 00:00:00 2001 From: "Matthew C. Morgan" Date: Fri, 21 Mar 2025 21:43:22 -0400 Subject: [PATCH 22/42] force_destroy bucket --- README.md | 61 ++++++++++++++++++++++++++++++++++++++++++++++--------- s3.tf | 1 + 2 files changed, 52 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index ea3cc8b..7e249a7 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,56 @@ # tfmod-loki -Installs the loki as the log aggregation sink, and promtail to forward the logs -to loki. - -* Requires additional Node HD space - 40GB is not enough. - -# tfmod-loki - - - - +This module installs Grafana Loki as a log aggregation and storage solution in an EKS cluster, with the following components: + +* Deploys Loki using the official Grafana Helm chart +* Creates an S3 bucket for persistent log storage +* Configures IAM roles for service accounts (IRSA) to securely access S3 +* Sets up internal gateway for log queries and ingestion + +## Architecture + +The module sets up: +- A Loki deployment via Helm with configurable image versions +- An S3 bucket with KMS encryption for log persistence +- An IRSA role for Loki to access the S3 bucket securely +- Internal gateway service (`loki-gateway.{namespace}.svc.cluster.local`) for accessing Loki within the cluster + +## Prerequisites + +* An existing EKS cluster with OIDC provider configured +* Sufficient node storage - nodes should have more than 40GB disk space +* AWS S3 access for log storage +* Appropriate Kubernetes storage classes configured + +## Usage + +```hcl +module "loki" { + source = "git@github.e.it.census.gov:path/to/tfmod-loki.git" + + cluster_name = "my-eks-cluster" + oidc_provider_arn = module.eks.oidc_provider_arn + region = "us-east-1" + namespace = "monitoring" + + # Optional - override default image versions + loki_tag = "3.1.1" + gateway_tag = "1.25.2-alpine" + + tags = { + Environment = "production" + Team = "platform" + } +} + +# Access Loki internal endpoint +resource "kubernetes_manifest" "example_grafana_datasource" { + manifest = { + # Configure Grafana datasource to point to: + # ${module.loki.gateway_internal_endpoint} + } +} +``` ## Requirements diff --git a/s3.tf b/s3.tf index 5a83d5a..7092b20 100644 --- a/s3.tf +++ b/s3.tf @@ -28,6 +28,7 @@ module "loki_s3" { bucket_name = local.bucket_name access_log_bucket = data.aws_s3_bucket.s3_server_access_logs.id + force_destroy = true tags = var.tags } From dc7b00a7a420c732aaa9011d916ca216f94ee829 Mon Sep 17 00:00:00 2001 From: "Matthew C. Morgan" Date: Tue, 25 Mar 2025 12:34:01 -0400 Subject: [PATCH 23/42] remove extra hypen --- main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.tf b/main.tf index e1df774..230db1e 100644 --- a/main.tf +++ b/main.tf @@ -8,7 +8,7 @@ module "loki_irsa_role" { # tflint-ignore: terraform_module_pinned_source source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-custom-iam-role-for-service-account-eks.git//?ref=main" - role_name = format("%v%v-%v", local.prefixes["eks-role"], var.cluster_name, "-loki") + role_name = format("%v%v-%v", local.prefixes["eks-role"], var.cluster_name, "loki") attach_s3_bucket_owner_policy = true attach_encrypted_object_manager_policy = true From b5b4cc9395eb4945a8d2e82aac9f223ee1f3c7d7 Mon Sep 17 00:00:00 2001 From: "Matthew C. Morgan" Date: Tue, 1 Apr 2025 11:51:12 -0400 Subject: [PATCH 24/42] add module release process --- .github/workflows/terraform-release.yaml | 73 ++++++++++++++++ .github/workflows/terraform-validate.yaml | 42 +++++++++ .github/workflows/terragrunt-cicd.yml | 101 ---------------------- 3 files changed, 115 insertions(+), 101 deletions(-) create mode 100644 .github/workflows/terraform-release.yaml create mode 100644 .github/workflows/terraform-validate.yaml delete mode 100644 .github/workflows/terragrunt-cicd.yml diff --git a/.github/workflows/terraform-release.yaml b/.github/workflows/terraform-release.yaml new file mode 100644 index 0000000..90910bc --- /dev/null +++ b/.github/workflows/terraform-release.yaml @@ -0,0 +1,73 @@ +name: Terraform CI/CD +on: + workflow_dispatch: + pull_request: + types: [closed] + branches: + - main +jobs: + terraform-ci-cd: + runs-on: 229685449397 + permissions: + contents: write + + steps: + - name: Checkout code + uses: CSVD/gh-actions-checkout@v4 + + - name: Setup Terraform + uses: CSVD/gh-actions-setup-terraform@v3 + with: + terraform_version: "1.9.1" + + - name: Setup GITHUB Credentials + id: github_credentials + uses: CSVD/gh-auth@main + with: + github_app_pem_file: ${{ secrets.GH_APP_PEM_FILE }} + github_app_installation_id: ${{ vars.GH_APP_INSTALLATION_ID }} + github_app_id: ${{ vars.GH_APP_ID }} + + + - name: Debug Authentication + run: | + # Print the GitHub server URL + echo "GitHub Server URL: ${{ github.server_url }}" + + # Extract the host from the URL + HOST="${{ github.server_url }}" + HOST="${HOST#*//}" + HOST="${HOST%%/*}" + echo "GitHub Host: $HOST" + + # Check if token exists + if [[ -n "${{ steps.github_credentials.outputs.github_token }}" ]]; then + echo "Token generated successfully" + # Test the token with a simple GitHub API call (without exposing the token) + STATUS=$(curl -s -o /dev/null -w "%{http_code}" -H "Authorization: Bearer ${{ steps.github_credentials.outputs.github_token }}" "${{ github.server_url }}/api/v3/user") + echo "API Test Status Code: $STATUS" + else + echo "No token was generated!" + fi + + - name: Setup GitHub CLI + run: | + # Force manual authentication since setup-git might not work with GitHub Enterprise + echo "${{ steps.github_credentials.outputs.github_token }}" > /tmp/token.txt + gh auth login --with-token --hostname "github.e.it.census.gov" < /tmp/token.txt + rm /tmp/token.txt + + # Test GitHub CLI auth status + gh auth status || echo "GitHub CLI authentication failed" + + - name: AWS Auth + id: aws_auth + uses: CSVD/aws-auth@main + with: + ecs: true + + - name: Run Terraform Module Release Action + uses: CSVD/terraform-module-release@main + with: + github-token: ${{ steps.github_credentials.outputs.github_token }} + working-directory: '.' diff --git a/.github/workflows/terraform-validate.yaml b/.github/workflows/terraform-validate.yaml new file mode 100644 index 0000000..72829d8 --- /dev/null +++ b/.github/workflows/terraform-validate.yaml @@ -0,0 +1,42 @@ +name: Terraform Validate +on: + pull_request: + workflow_dispatch: + +jobs: + + terraform-validate: + runs-on: "229685449397" + permissions: + contents: write + steps: + - name: Checkout code + uses: CSVD/gh-actions-checkout@v4 + + - name: Setup Terraform + uses: CSVD/gh-actions-setup-terraform@v2 + with: + terraform_version: '1.7.3' + + - name: Validate Terraform Configuration + id: validate + uses: CSVD/terraform-validate@main + + - name: Check Validation/Test Results + if: always() + run: | + # Set default values if outputs are empty + IS_VALID="${{ steps.validate.outputs.is_valid }}" + TESTS_PASSED="${{ steps.validate.outputs.tests_passed }}" + + # If outputs are empty, set them to false + [ -z "$IS_VALID" ] && IS_VALID="false" + [ -z "$TESTS_PASSED" ] && TESTS_PASSED="false" + + if [[ "$IS_VALID" != "true" || "$TESTS_PASSED" != "true" ]]; then + echo "Validation or test errors found:" + echo "${{ steps.validate.outputs.stderr }}" + exit 1 + else + echo "All validations and tests passed successfully!" + fi diff --git a/.github/workflows/terragrunt-cicd.yml b/.github/workflows/terragrunt-cicd.yml deleted file mode 100644 index a78523e..0000000 --- a/.github/workflows/terragrunt-cicd.yml +++ /dev/null @@ -1,101 +0,0 @@ -name: 'Terraform Module CI' - -on: - push: - branches: - - main - paths: - - '**/*.hcl' - - '**/*.tf' - pull_request: - branches: - - main - paths: - - '**/*.hcl' - - '**/*.tf' - -permissions: - contents: read - pull-requests: write - -jobs: - validate: - name: 'Validate Module' - runs-on: self-hosted - - steps: - - name: Checkout - uses: actions/checkout@v3 - - - name: Setup Terraform - uses: hashicorp/setup-terraform@v2 - with: - terraform_version: 1.5.0 - - - name: Terraform Init - run: | - terraform init -backend=false - - - name: Terraform Format - run: | - terraform fmt -check - - - name: Terraform Validate - run: | - terraform validate - - - name: Run tflint - uses: terraform-linters/setup-tflint@v3 - if: github.event_name == 'pull_request' - - - name: Lint Terraform - if: github.event_name == 'pull_request' - run: | - tflint --format compact - - release: - name: 'Create Release' - needs: validate - if: github.ref == 'refs/heads/main' && github.event_name == 'push' - runs-on: self-hosted - permissions: - contents: write - - steps: - - name: Checkout - uses: actions/checkout@v3 - with: - fetch-depth: 0 - token: ${{ secrets.GITHUB_TOKEN }} - - - name: Setup Python - uses: actions/setup-python@v4 - with: - python-version: '3.9' - - - name: Install Commitizen - run: | - pip install commitizen - - - name: Configure Git - run: | - git config --local user.email "action@github.com" - git config --local user.name "GitHub Action" - - - name: Bump Version and Generate Changelog - id: cz - run: | - cz bump --yes - echo "new_version=$(cz version --project)" >> $GITHUB_OUTPUT - echo "changelog=$(cz changelog --dry-run)" >> $GITHUB_OUTPUT - - - name: Create Release - uses: actions/create-release@v1 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - with: - tag_name: v${{ steps.cz.outputs.new_version }} - release_name: Release v${{ steps.cz.outputs.new_version }} - draft: false - prerelease: false - body: ${{ steps.cz.outputs.changelog }} From f6174d516bfab54be98c72559303a3685ce14633 Mon Sep 17 00:00:00 2001 From: "Matthew C. Morgan" Date: Tue, 1 Apr 2025 18:33:14 -0400 Subject: [PATCH 25/42] update module source --- .pre-commit-config.yaml | 4 ++-- README.md | 4 ++-- copy_images.tf | 2 +- main.tf | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2675093..0e4a8bc 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -49,7 +49,7 @@ repos: # Terraform Hooks - repo: https://github.com/antonbabenko/pre-commit-terraform - rev: v1.97.3 # Get the latest from: https://github.com/antonbabenko/pre-commit-terraform/releases + rev: v1.98.0 # Get the latest from: https://github.com/antonbabenko/pre-commit-terraform/releases hooks: - id: terraform_fmt args: @@ -106,6 +106,6 @@ repos: # - --hook-config=--parallelism-ci-cpu-cores=2 - repo: https://github.com/ljnsn/cz-conventional-gitmoji - rev: v0.6.1 + rev: v0.7.0 hooks: - id: conventional-gitmoji diff --git a/README.md b/README.md index 7e249a7..5cf4e4b 100644 --- a/README.md +++ b/README.md @@ -74,8 +74,8 @@ resource "kubernetes_manifest" "example_grafana_datasource" { | Name | Source | Version | |------|--------|---------| -| [images](#module\_images) | git@github.e.it.census.gov:terraform-modules/aws-ecr-copy-images.git/ | tf-upgrade | -| [loki\_irsa\_role](#module\_loki\_irsa\_role) | git@github.e.it.census.gov:SCT-Engineering/tfmod-custom-iam-role-for-service-account-eks.git// | main | +| [images](#module\_images) | git::https://github.e.it.census.gov/terraform-modules/aws-ecr-copy-images.git/ | tf-upgrade | +| [loki\_irsa\_role](#module\_loki\_irsa\_role) | git::https://github.e.it.census.gov/SCT-Engineering/tfmod-custom-iam-role-for-service-account-eks.git// | main | | [loki\_s3](#module\_loki\_s3) | git@github.e.it.census.gov:terraform-modules/aws-s3.git//standard | tf-upgrade | ## Resources diff --git a/copy_images.tf b/copy_images.tf index b1547be..e8cb9ed 100644 --- a/copy_images.tf +++ b/copy_images.tf @@ -65,7 +65,7 @@ locals { } module "images" { - source = "git@github.e.it.census.gov:terraform-modules/aws-ecr-copy-images.git/?ref=tf-upgrade" + source = "git::https://github.e.it.census.gov/terraform-modules/aws-ecr-copy-images.git/?ref=tf-upgrade" profile = var.profile application_name = var.cluster_name diff --git a/main.tf b/main.tf index 230db1e..a2a269c 100644 --- a/main.tf +++ b/main.tf @@ -6,7 +6,7 @@ locals { module "loki_irsa_role" { # tflint-ignore: terraform_module_pinned_source - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-custom-iam-role-for-service-account-eks.git//?ref=main" + source = "git::https://github.e.it.census.gov/SCT-Engineering/tfmod-custom-iam-role-for-service-account-eks.git//?ref=main" role_name = format("%v%v-%v", local.prefixes["eks-role"], var.cluster_name, "loki") From 20b958226ed4a3a2341cecad780857cf6e94f34d Mon Sep 17 00:00:00 2001 From: "Matthew C. Morgan" Date: Tue, 1 Apr 2025 19:32:45 -0400 Subject: [PATCH 26/42] update module source --- README.md | 2 +- s3.tf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 5cf4e4b..977b61c 100644 --- a/README.md +++ b/README.md @@ -76,7 +76,7 @@ resource "kubernetes_manifest" "example_grafana_datasource" { |------|--------|---------| | [images](#module\_images) | git::https://github.e.it.census.gov/terraform-modules/aws-ecr-copy-images.git/ | tf-upgrade | | [loki\_irsa\_role](#module\_loki\_irsa\_role) | git::https://github.e.it.census.gov/SCT-Engineering/tfmod-custom-iam-role-for-service-account-eks.git// | main | -| [loki\_s3](#module\_loki\_s3) | git@github.e.it.census.gov:terraform-modules/aws-s3.git//standard | tf-upgrade | +| [loki\_s3](#module\_loki\_s3) | git::https://github.e.it.census.gov/terraform-modules/aws-s3.git//standard | tf-upgrade | ## Resources diff --git a/s3.tf b/s3.tf index 7092b20..bacaa77 100644 --- a/s3.tf +++ b/s3.tf @@ -24,7 +24,7 @@ resource "terraform_data" "bucket_name_validator" { module "loki_s3" { depends_on = [terraform_data.bucket_name_validator] - source = "git@github.e.it.census.gov:terraform-modules/aws-s3.git//standard?ref=tf-upgrade" + source = "git::https://github.e.it.census.gov/terraform-modules/aws-s3.git//standard?ref=tf-upgrade" bucket_name = local.bucket_name access_log_bucket = data.aws_s3_bucket.s3_server_access_logs.id From 6d3abf7b8acc61f6c63bae8b09b5fd82673a9b82 Mon Sep 17 00:00:00 2001 From: "Matthew C. Morgan" Date: Tue, 1 Apr 2025 20:14:20 -0400 Subject: [PATCH 27/42] fix bad merge --- values/loki.yml.tpl | 79 +++++++++++++++++---------------------------- 1 file changed, 29 insertions(+), 50 deletions(-) diff --git a/values/loki.yml.tpl b/values/loki.yml.tpl index 62a47ad..2a89401 100644 --- a/values/loki.yml.tpl +++ b/values/loki.yml.tpl @@ -22,8 +22,23 @@ loki: max_streams_per_user: 1000 query_timeout: 300s retention_period: 2160h - analytics: - reporting_enabled: true + # Provisioner settings + provisioner: + image: + repository: ${provisioner_image_repository} + tag: ${provisioner_image_tag} + querier: + max_concurrent: 4 + replication_factor: 1 + schemaConfig: + configs: + - from: "2024-04-01" + store: tsdb + object_store: s3 + schema: v13 + index: + prefix: index_ + period: 24h # Storage configuration storage: type: s3 @@ -45,39 +60,16 @@ backend: enabled: true minReplicas: 1 targetCPUUtilizationPercentage: 80 - behavior: - scaleUp: - stabilizationWindowSeconds: 300 - policies: - - type: Pods - value: 1 - periodSeconds: 60 - scaleDown: - stabilizationWindowSeconds: 300 - policies: - - type: Pods - value: 1 - periodSeconds: 180 persistence: storageClass: ${rwo_storage_class} - replicas: 1 - replication_factor: 1 + replicas: 3 resources: requests: - cpu: 100m + cpu: 10m memory: 128Mi limits: cpu: 1000m memory: 512Mi - schemaConfig: - configs: - - from: 2024-04-01 - index: - period: 24h - prefix: index_ - object_store: s3 - schema: v13 - store: tsdb bloomCompactor: replicas: 0 bloomGateway: @@ -98,7 +90,7 @@ gateway: replicas: 1 resources: requests: - cpu: 100m + cpu: 10m memory: 128Mi limits: cpu: 100m @@ -124,21 +116,18 @@ queryFrontend: queryScheduler: replicas: 0 read: - replicas: 1 - resources: - requests: - cpu: 100m - memory: 128Mi - limits: - cpu: 100m - memory: 128Mi - persistence: - enabled: true - storageClass: ${rwo_storage_class} autoscaling: enabled: true minReplicas: 1 targetCPUUtilizationPercentage: 80 + replicas: 3 + resources: + requests: + cpu: 10m + memory: 128Mi + limits: + cpu: 500m + memory: 512Mi resultsCache: enabled: false ruler: @@ -146,16 +135,6 @@ ruler: serviceAccount: annotations: eks.amazonaws.com/role-arn: ${iam_role_arn} - persistence: - storageClass: ${rwo_storage_class} - replicas: 2 - resources: - requests: - cpu: 1m - memory: 1Mi - limits: - cpu: 200m - memory: 256Mi sidecar: image: repository: ${sidecar_image_repository} @@ -181,7 +160,7 @@ write: targetCPUUtilizationPercentage: 80 resources: requests: - cpu: 100m + cpu: 10m memory: 128Mi limits: cpu: 1000m From a5e0e6ff30fb2efc9ca30bea5ec03d4a711be8cb Mon Sep 17 00:00:00 2001 From: "Matthew C. Morgan" Date: Tue, 1 Apr 2025 20:15:00 -0400 Subject: [PATCH 28/42] update request --- values/loki.yml.tpl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/values/loki.yml.tpl b/values/loki.yml.tpl index 2a89401..c1085a8 100644 --- a/values/loki.yml.tpl +++ b/values/loki.yml.tpl @@ -65,7 +65,7 @@ backend: replicas: 3 resources: requests: - cpu: 10m + cpu: 100m memory: 128Mi limits: cpu: 1000m @@ -160,7 +160,7 @@ write: targetCPUUtilizationPercentage: 80 resources: requests: - cpu: 10m + cpu: 100m memory: 128Mi limits: cpu: 1000m From ae460888ffa046f0ccea0de919e99a4929f6a88c Mon Sep 17 00:00:00 2001 From: "Matthew C. Morgan" Date: Mon, 14 Apr 2025 17:56:46 -0400 Subject: [PATCH 29/42] pull from ent-ecr --- .github/workflows/terraform-validate.yaml | 54 ++++++++-------- README.md | 3 + copy_images.tf | 42 +++++++------ variables.tf | 75 ++++++++++++----------- 4 files changed, 93 insertions(+), 81 deletions(-) diff --git a/.github/workflows/terraform-validate.yaml b/.github/workflows/terraform-validate.yaml index 72829d8..04b96db 100644 --- a/.github/workflows/terraform-validate.yaml +++ b/.github/workflows/terraform-validate.yaml @@ -1,42 +1,40 @@ -name: Terraform Validate +name: Terraform CI/CD on: - pull_request: workflow_dispatch: - + pull_request: + types: [closed] + branches: + - main jobs: - - terraform-validate: - runs-on: "229685449397" + terraform-ci-cd: + runs-on: 229685449397 permissions: contents: write + steps: - name: Checkout code uses: CSVD/gh-actions-checkout@v4 - - name: Setup Terraform - uses: CSVD/gh-actions-setup-terraform@v2 + - name: Setup GITHUB Credentials + id: github_credentials + uses: CSVD/gh-auth@main with: - terraform_version: '1.7.3' - - - name: Validate Terraform Configuration - id: validate - uses: CSVD/terraform-validate@main + github_app_pem_file: ${{ secrets.GH_APP_PEM_FILE }} + github_app_installation_id: ${{ vars.GH_APP_INSTALLATION_ID }} + github_app_id: ${{ vars.GH_APP_ID }} - - name: Check Validation/Test Results - if: always() + - name: Setup GitHub CLI run: | - # Set default values if outputs are empty - IS_VALID="${{ steps.validate.outputs.is_valid }}" - TESTS_PASSED="${{ steps.validate.outputs.tests_passed }}" + # Force manual authentication since setup-git might not work with GitHub Enterprise + echo "${{ steps.github_credentials.outputs.github_token }}" > /tmp/token.txt + gh auth login --with-token --hostname "github.e.it.census.gov" < /tmp/token.txt + rm /tmp/token.txt - # If outputs are empty, set them to false - [ -z "$IS_VALID" ] && IS_VALID="false" - [ -z "$TESTS_PASSED" ] && TESTS_PASSED="false" + # Test GitHub CLI auth status + gh auth status || echo "GitHub CLI authentication failed" - if [[ "$IS_VALID" != "true" || "$TESTS_PASSED" != "true" ]]; then - echo "Validation or test errors found:" - echo "${{ steps.validate.outputs.stderr }}" - exit 1 - else - echo "All validations and tests passed successfully!" - fi + - name: Run Release Action + uses: CSVD/releaser@main + with: + github-token: ${{ steps.github_credentials.outputs.github_token }} + working-directory: '.' diff --git a/README.md b/README.md index 977b61c..2647d60 100644 --- a/README.md +++ b/README.md @@ -85,12 +85,15 @@ resource "kubernetes_manifest" "example_grafana_datasource" { | [helm_release.loki](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource | | [terraform_data.bucket_name_validator](https://registry.terraform.io/providers/hashicorp/terraform/latest/docs/resources/data) | resource | | [aws_caller_identity.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/caller_identity) | data source | +| [aws_ecr_authorization_token.ecr_token](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/ecr_authorization_token) | data source | +| [aws_ecr_authorization_token.token](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/ecr_authorization_token) | data source | | [aws_s3_bucket.s3_server_access_logs](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/s3_bucket) | data source | ## Inputs | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| +| [account\_id](#input\_account\_id) | aws account number | `string` | `""` | no | | [cluster\_name](#input\_cluster\_name) | EKS cluster name name component used through out the EKS cluster describing its purpose (ex: dice-dev) | `string` | n/a | yes | | [enterprise\_logs\_provisioner\_tag](#input\_enterprise\_logs\_provisioner\_tag) | The version of the grafana/enterprise-logs-provisioner image to use. | `string` | `"v1.7.0"` | no | | [exporter\_tag](#input\_exporter\_tag) | The version of prom/memcached-exporter to use for the gateway. | `string` | `"v0.14.4"` | no | diff --git a/copy_images.tf b/copy_images.tf index e8cb9ed..32cf25f 100644 --- a/copy_images.tf +++ b/copy_images.tf @@ -1,4 +1,5 @@ locals { + ent_ecr_source = format("%v.%v.%v.%v", var.account_id, "dkr.ecr", var.region, "amazonaws.com/ent-images") exporter_key = format("%v#%v", "prom/memcached-exporter", var.exporter_tag) gateway_key = format("%v#%v", "grafana/nginx-unprivileged", var.gateway_tag) loki_key = format("%v#%v", "grafana/loki", var.loki_tag) @@ -12,7 +13,7 @@ locals { dest_path = null name = "grafana/loki" source_image = "bitnami/grafana-loki" - source_registry = "public.ecr.aws" + source_registry = format("%v/%v", local.ent_ecr_source, "public-ecr") source_tag = var.loki_tag tag = var.loki_tag }, @@ -21,7 +22,7 @@ locals { dest_path = null name = "memcached" source_image = "bitnami/memcached" - source_registry = "public.ecr.aws" + source_registry = format("%v/%v", local.ent_ecr_source, "public-ecr") source_tag = var.memcached_tag tag = var.memcached_tag }, @@ -30,7 +31,7 @@ locals { dest_path = null name = "prom/memcached-exporter" source_image = "prom/memcached-exporter" - source_registry = "docker.io" + source_registry = format("%v/%v", local.ent_ecr_source, "docker") source_tag = var.exporter_tag tag = var.exporter_tag }, @@ -39,7 +40,7 @@ locals { dest_path = null name = "kiwigrid/k8s-sidecar" source_image = "kiwigrid/k8s-sidecar" - source_registry = "quay.io" + source_registry = format("%v/%v", local.ent_ecr_source, "quay") source_tag = var.sidecar_tag tag = var.sidecar_tag }, @@ -48,7 +49,7 @@ locals { dest_path = null name = "grafana/enterprise-logs-provisioner" source_image = "grafana/enterprise-logs-provisioner" - source_registry = "docker.io" + source_registry = format("%v/%v", local.ent_ecr_source, "docker") source_tag = var.enterprise_logs_provisioner_tag tag = var.enterprise_logs_provisioner_tag }, @@ -57,7 +58,7 @@ locals { dest_path = null name = "grafana/nginx-unprivileged" source_image = "nginx/nginx-unprivileged" - source_registry = "public.ecr.aws" + source_registry = format("%v/%v", local.ent_ecr_source, "public-ecr") source_tag = var.gateway_tag tag = var.gateway_tag }, @@ -72,17 +73,22 @@ module "images" { image_config = local.image_config tags = {} - ### optional - ## account_alias = "" - ## account_id = "" - ## destination_password = "" - ## destination_username = "" - ## override_prefixes = {} - ## region = "" - ## source_password = "" - ## source_username = "" + enable_lifecycle_policy = true + lifecycle_policy_all = true + force_delete = true + lifecycle_policy_keep_count = 5 - enable_lifecycle_policy = true - lifecycle_policy_all = true - force_delete = true + source_username = data.aws_ecr_authorization_token.ecr_token.user_name + source_password = data.aws_ecr_authorization_token.ecr_token.password + + destination_username = data.aws_ecr_authorization_token.token.user_name + destination_password = data.aws_ecr_authorization_token.token.password +} + +data "aws_ecr_authorization_token" "ecr_token" { + registry_id = var.account_id +} + +data "aws_ecr_authorization_token" "token" { + registry_id = var.account_id } diff --git a/variables.tf b/variables.tf index 35009fe..cc7aafd 100644 --- a/variables.tf +++ b/variables.tf @@ -1,13 +1,7 @@ - -variable "tags" { - description = "Additional tags to add to resources created in AWS (s3 bucket, ...)" - type = map(string) - default = {} -} - -variable "region" { - description = "The region holding these resources (for the s3 bucket.)" +variable "account_id" { + description = "aws account number" type = string + default = "" } variable "cluster_name" { @@ -15,27 +9,22 @@ variable "cluster_name" { type = string } -variable "profile" { - description = "AWS config profile used to upload images into ECR" - type = string - default = "" -} - -variable "namespace" { - description = "The namespace into which grafana will be deployed" +variable "enterprise_logs_provisioner_tag" { + description = "The version of the grafana/enterprise-logs-provisioner image to use." type = string - default = "loki" + default = "v1.7.0" } -variable "oidc_provider_arn" { - description = "The ARN in the EKS cluster for the OpenID Connect identity provider." +variable "exporter_tag" { + description = "The version of prom/memcached-exporter to use for the gateway." type = string + default = "v0.14.4" } -variable "rwo_storage_class" { - description = "Specify the storage class for read/write/once persistent volumes." +variable "gateway_tag" { + description = "The version of nginxinc/nginx-unprivileged to use for the gateway." type = string - default = "gp3-encrypted" + default = "1.25.2-alpine" } # helm add repo grafana "https://grafana.github.io/helm-charts" @@ -53,28 +42,38 @@ variable "loki_tag" { default = "3.1.1" } -variable "enterprise_logs_provisioner_tag" { - description = "The version of the grafana/enterprise-logs-provisioner image to use." +variable "memcached_tag" { + description = "The version of memcached to use for the gateway." type = string - default = "v1.7.0" + default = "1.6.23-alpine" } -variable "gateway_tag" { - description = "The version of nginxinc/nginx-unprivileged to use for the gateway." +variable "namespace" { + description = "The namespace into which grafana will be deployed" type = string - default = "1.25.2-alpine" + default = "loki" } -variable "memcached_tag" { - description = "The version of memcached to use for the gateway." +variable "oidc_provider_arn" { + description = "The ARN in the EKS cluster for the OpenID Connect identity provider." type = string - default = "1.6.23-alpine" } -variable "exporter_tag" { - description = "The version of prom/memcached-exporter to use for the gateway." +variable "profile" { + description = "AWS config profile used to upload images into ECR" type = string - default = "v0.14.4" + default = "" +} + +variable "region" { + description = "The region holding these resources (for the s3 bucket.)" + type = string +} + +variable "rwo_storage_class" { + description = "Specify the storage class for read/write/once persistent volumes." + type = string + default = "gp3-encrypted" } variable "sidecar_tag" { @@ -82,3 +81,9 @@ variable "sidecar_tag" { type = string default = "1.27.4" } + +variable "tags" { + description = "Additional tags to add to resources created in AWS (s3 bucket, ...)" + type = map(string) + default = {} +} From a4fbdd5a4e114dd4c1feda6a0dcf0c1e5e94d2a8 Mon Sep 17 00:00:00 2001 From: "Matthew C. Morgan" Date: Mon, 14 Apr 2025 22:32:18 -0400 Subject: [PATCH 30/42] update sources --- copy_images.tf | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/copy_images.tf b/copy_images.tf index 32cf25f..c4bcf69 100644 --- a/copy_images.tf +++ b/copy_images.tf @@ -12,8 +12,8 @@ locals { enabled = true dest_path = null name = "grafana/loki" - source_image = "bitnami/grafana-loki" - source_registry = format("%v/%v", local.ent_ecr_source, "public-ecr") + source_image = "opensource/grafana/loki" + source_registry = format("%v/%v", local.ent_ecr_source, "ironbank") source_tag = var.loki_tag tag = var.loki_tag }, @@ -21,8 +21,8 @@ locals { enabled = true dest_path = null name = "memcached" - source_image = "bitnami/memcached" - source_registry = format("%v/%v", local.ent_ecr_source, "public-ecr") + source_image = "opensource/memcached" + source_registry = format("%v/%v", local.ent_ecr_source, "ironbank") source_tag = var.memcached_tag tag = var.memcached_tag }, @@ -30,8 +30,8 @@ locals { enabled = true dest_path = null name = "prom/memcached-exporter" - source_image = "prom/memcached-exporter" - source_registry = format("%v/%v", local.ent_ecr_source, "docker") + source_image = "opensource/prometheus/memcached-exporter" + source_registry = format("%v/%v", local.ent_ecr_source, "ironbank") source_tag = var.exporter_tag tag = var.exporter_tag }, @@ -40,7 +40,7 @@ locals { dest_path = null name = "kiwigrid/k8s-sidecar" source_image = "kiwigrid/k8s-sidecar" - source_registry = format("%v/%v", local.ent_ecr_source, "quay") + source_registry = format("%v/%v", local.ent_ecr_source, "ironbank") source_tag = var.sidecar_tag tag = var.sidecar_tag }, From 3b379a4a88bf5aa0a636b6754205f4239885d6f7 Mon Sep 17 00:00:00 2001 From: "Matthew C. Morgan" Date: Mon, 14 Apr 2025 22:41:04 -0400 Subject: [PATCH 31/42] update source path --- copy_images.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/copy_images.tf b/copy_images.tf index c4bcf69..0bf30ec 100644 --- a/copy_images.tf +++ b/copy_images.tf @@ -21,7 +21,7 @@ locals { enabled = true dest_path = null name = "memcached" - source_image = "opensource/memcached" + source_image = "opensource/memcached/memcached" source_registry = format("%v/%v", local.ent_ecr_source, "ironbank") source_tag = var.memcached_tag tag = var.memcached_tag From 93d9693e16c971a9661d83c3292dc5b918d32cd5 Mon Sep 17 00:00:00 2001 From: "Matthew C. Morgan" Date: Thu, 17 Apr 2025 14:17:55 -0400 Subject: [PATCH 32/42] pull across accounts from central ecr --- README.md | 3 ++- copy_images.tf | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 2647d60..b621737 100644 --- a/README.md +++ b/README.md @@ -66,7 +66,8 @@ resource "kubernetes_manifest" "example_grafana_datasource" { | Name | Version | |------|---------| -| [aws](#provider\_aws) | 5.89.0 | +| [aws](#provider\_aws) | 5.94.1 | +| [aws.eecr](#provider\_aws.eecr) | 5.94.1 | | [helm](#provider\_helm) | 2.17.0 | | [terraform](#provider\_terraform) | n/a | diff --git a/copy_images.tf b/copy_images.tf index 0bf30ec..ed6acfe 100644 --- a/copy_images.tf +++ b/copy_images.tf @@ -86,7 +86,8 @@ module "images" { } data "aws_ecr_authorization_token" "ecr_token" { - registry_id = var.account_id + provider = aws.eecr + registry_id = var.eecr_account_id } data "aws_ecr_authorization_token" "token" { From e0da302c915b9f8dc27c2b01ff23eebc3779de38 Mon Sep 17 00:00:00 2001 From: "Matthew C. Morgan" Date: Thu, 17 Apr 2025 17:46:14 -0400 Subject: [PATCH 33/42] add eecr_account_id --- README.md | 1 + variables.tf | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/README.md b/README.md index b621737..f8a148d 100644 --- a/README.md +++ b/README.md @@ -96,6 +96,7 @@ resource "kubernetes_manifest" "example_grafana_datasource" { |------|-------------|------|---------|:--------:| | [account\_id](#input\_account\_id) | aws account number | `string` | `""` | no | | [cluster\_name](#input\_cluster\_name) | EKS cluster name name component used through out the EKS cluster describing its purpose (ex: dice-dev) | `string` | n/a | yes | +| [eecr\_account\_id](#input\_eecr\_account\_id) | enterpirse ecr source aws account number | `string` | `""` | no | | [enterprise\_logs\_provisioner\_tag](#input\_enterprise\_logs\_provisioner\_tag) | The version of the grafana/enterprise-logs-provisioner image to use. | `string` | `"v1.7.0"` | no | | [exporter\_tag](#input\_exporter\_tag) | The version of prom/memcached-exporter to use for the gateway. | `string` | `"v0.14.4"` | no | | [gateway\_tag](#input\_gateway\_tag) | The version of nginxinc/nginx-unprivileged to use for the gateway. | `string` | `"1.25.2-alpine"` | no | diff --git a/variables.tf b/variables.tf index cc7aafd..ca9beef 100644 --- a/variables.tf +++ b/variables.tf @@ -9,6 +9,12 @@ variable "cluster_name" { type = string } +variable "eecr_account_id" { + description = "enterpirse ecr source aws account number" + type = string + default = "" +} + variable "enterprise_logs_provisioner_tag" { description = "The version of the grafana/enterprise-logs-provisioner image to use." type = string From 5e8c8d5e5b9b227c72bf83969c01aba8235ad344 Mon Sep 17 00:00:00 2001 From: "Matthew C. Morgan" Date: Thu, 17 Apr 2025 19:36:23 -0400 Subject: [PATCH 34/42] fix ent_ecr_source --- copy_images.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/copy_images.tf b/copy_images.tf index ed6acfe..5d33bd8 100644 --- a/copy_images.tf +++ b/copy_images.tf @@ -1,5 +1,5 @@ locals { - ent_ecr_source = format("%v.%v.%v.%v", var.account_id, "dkr.ecr", var.region, "amazonaws.com/ent-images") + ent_ecr_source = format("%v.%v.%v.%v", var.eecr_account_id, "dkr.ecr", var.region, "amazonaws.com/ent-images") exporter_key = format("%v#%v", "prom/memcached-exporter", var.exporter_tag) gateway_key = format("%v#%v", "grafana/nginx-unprivileged", var.gateway_tag) loki_key = format("%v#%v", "grafana/loki", var.loki_tag) From a3bd46a6fb23f633cc69569c01464ea53aacb97f Mon Sep 17 00:00:00 2001 From: "Matthew C. Morgan" Date: Thu, 17 Apr 2025 19:51:06 -0400 Subject: [PATCH 35/42] update image sources --- copy_images.tf | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/copy_images.tf b/copy_images.tf index 5d33bd8..52efaae 100644 --- a/copy_images.tf +++ b/copy_images.tf @@ -48,8 +48,8 @@ locals { enabled = true dest_path = null name = "grafana/enterprise-logs-provisioner" - source_image = "grafana/enterprise-logs-provisioner" - source_registry = format("%v/%v", local.ent_ecr_source, "docker") + source_image = "ironbank/opensource/grafana/enterprise-logs-provisioner" + source_registry = format("%v/%v", local.ent_ecr_source, "ironbank") source_tag = var.enterprise_logs_provisioner_tag tag = var.enterprise_logs_provisioner_tag }, @@ -57,8 +57,8 @@ locals { enabled = true dest_path = null name = "grafana/nginx-unprivileged" - source_image = "nginx/nginx-unprivileged" - source_registry = format("%v/%v", local.ent_ecr_source, "public-ecr") + source_image = "opensource/nginx/nginx-alpine" + source_registry = format("%v/%v", local.ent_ecr_source, "ironbank") source_tag = var.gateway_tag tag = var.gateway_tag }, From 88f4bb7ddf4e795a99d143211d254b87e914153e Mon Sep 17 00:00:00 2001 From: "Matthew C. Morgan" Date: Thu, 17 Apr 2025 23:33:39 -0400 Subject: [PATCH 36/42] dynamic version --- README.md | 4 ++++ requirements.tf | 4 ++++ version.tf | 27 +++++++++++++++++++++++++-- 3 files changed, 33 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index f8a148d..79431b7 100644 --- a/README.md +++ b/README.md @@ -61,6 +61,7 @@ resource "kubernetes_manifest" "example_grafana_datasource" { | [aws](#requirement\_aws) | >= 5.14.0 | | [helm](#requirement\_helm) | >= 2.11.0 | | [kubernetes](#requirement\_kubernetes) | >= 2.23.0 | +| [null](#requirement\_null) | >= 3.2.1 | ## Providers @@ -69,6 +70,7 @@ resource "kubernetes_manifest" "example_grafana_datasource" { | [aws](#provider\_aws) | 5.94.1 | | [aws.eecr](#provider\_aws.eecr) | 5.94.1 | | [helm](#provider\_helm) | 2.17.0 | +| [null](#provider\_null) | 3.2.3 | | [terraform](#provider\_terraform) | n/a | ## Modules @@ -84,6 +86,8 @@ resource "kubernetes_manifest" "example_grafana_datasource" { | Name | Type | |------|------| | [helm_release.loki](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource | +| [null_resource.git_version](https://registry.terraform.io/providers/hashicorp/null/latest/docs/resources/resource) | resource | +| [null_resource.module_name](https://registry.terraform.io/providers/hashicorp/null/latest/docs/resources/resource) | resource | | [terraform_data.bucket_name_validator](https://registry.terraform.io/providers/hashicorp/terraform/latest/docs/resources/data) | resource | | [aws_caller_identity.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/caller_identity) | data source | | [aws_ecr_authorization_token.ecr_token](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/ecr_authorization_token) | data source | diff --git a/requirements.tf b/requirements.tf index ae62e15..32e5c6f 100644 --- a/requirements.tf +++ b/requirements.tf @@ -14,5 +14,9 @@ terraform { source = "hashicorp/kubernetes" version = ">= 2.23.0" } + null = { + source = "hashicorp/null" + version = ">= 3.2.1" + } } } diff --git a/version.tf b/version.tf index 33ac557..ac138e0 100644 --- a/version.tf +++ b/version.tf @@ -1,4 +1,27 @@ +resource "null_resource" "git_version" { + triggers = { + # Force this to run on every apply to get the latest tag value + always_run = timestamp() + } + + provisioner "local-exec" { + command = "git describe --tags --abbrev=0 2>/dev/null || echo 'unknown' > ${path.module}/.git_tag" + on_failure = continue + } +} + +resource "null_resource" "module_name" { + triggers = { + module_path = path.module + } + + provisioner "local-exec" { + command = "basename $(pwd) > ${path.module}/.module_name" + on_failure = continue + } +} + locals { - module_name = "tfmod-loki" - module_version = "0.1.1" + module_name = fileexists("${path.module}/.module_name") ? trimspace(file("${path.module}/.module_name")) : "tfmod-loki" + module_version = fileexists("${path.module}/.git_tag") ? trimspace(file("${path.module}/.git_tag")) : "latest" } From 213aa523fd4f51d735a757f92b7f58ee084fc61c Mon Sep 17 00:00:00 2001 From: "Matthew C. Morgan" Date: Fri, 18 Apr 2025 15:02:13 -0400 Subject: [PATCH 37/42] remove eecr data item as it is in the provider --- README.md | 2 -- copy_images.tf | 5 ----- 2 files changed, 7 deletions(-) diff --git a/README.md b/README.md index 79431b7..b154fe7 100644 --- a/README.md +++ b/README.md @@ -68,7 +68,6 @@ resource "kubernetes_manifest" "example_grafana_datasource" { | Name | Version | |------|---------| | [aws](#provider\_aws) | 5.94.1 | -| [aws.eecr](#provider\_aws.eecr) | 5.94.1 | | [helm](#provider\_helm) | 2.17.0 | | [null](#provider\_null) | 3.2.3 | | [terraform](#provider\_terraform) | n/a | @@ -90,7 +89,6 @@ resource "kubernetes_manifest" "example_grafana_datasource" { | [null_resource.module_name](https://registry.terraform.io/providers/hashicorp/null/latest/docs/resources/resource) | resource | | [terraform_data.bucket_name_validator](https://registry.terraform.io/providers/hashicorp/terraform/latest/docs/resources/data) | resource | | [aws_caller_identity.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/caller_identity) | data source | -| [aws_ecr_authorization_token.ecr_token](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/ecr_authorization_token) | data source | | [aws_ecr_authorization_token.token](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/ecr_authorization_token) | data source | | [aws_s3_bucket.s3_server_access_logs](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/s3_bucket) | data source | diff --git a/copy_images.tf b/copy_images.tf index 52efaae..6178b75 100644 --- a/copy_images.tf +++ b/copy_images.tf @@ -85,11 +85,6 @@ module "images" { destination_password = data.aws_ecr_authorization_token.token.password } -data "aws_ecr_authorization_token" "ecr_token" { - provider = aws.eecr - registry_id = var.eecr_account_id -} - data "aws_ecr_authorization_token" "token" { registry_id = var.account_id } From c329a1a30a9ff7af92eed71d5534b3ba84695a3c Mon Sep 17 00:00:00 2001 From: "Matthew C. Morgan" Date: Fri, 18 Apr 2025 21:15:49 -0400 Subject: [PATCH 38/42] update copy images for eecr pulling --- README.md | 2 ++ copy_images.tf | 11 +++++++++++ 2 files changed, 13 insertions(+) diff --git a/README.md b/README.md index b154fe7..79431b7 100644 --- a/README.md +++ b/README.md @@ -68,6 +68,7 @@ resource "kubernetes_manifest" "example_grafana_datasource" { | Name | Version | |------|---------| | [aws](#provider\_aws) | 5.94.1 | +| [aws.eecr](#provider\_aws.eecr) | 5.94.1 | | [helm](#provider\_helm) | 2.17.0 | | [null](#provider\_null) | 3.2.3 | | [terraform](#provider\_terraform) | n/a | @@ -89,6 +90,7 @@ resource "kubernetes_manifest" "example_grafana_datasource" { | [null_resource.module_name](https://registry.terraform.io/providers/hashicorp/null/latest/docs/resources/resource) | resource | | [terraform_data.bucket_name_validator](https://registry.terraform.io/providers/hashicorp/terraform/latest/docs/resources/data) | resource | | [aws_caller_identity.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/caller_identity) | data source | +| [aws_ecr_authorization_token.ecr_token](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/ecr_authorization_token) | data source | | [aws_ecr_authorization_token.token](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/ecr_authorization_token) | data source | | [aws_s3_bucket.s3_server_access_logs](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/s3_bucket) | data source | diff --git a/copy_images.tf b/copy_images.tf index 6178b75..38fc99d 100644 --- a/copy_images.tf +++ b/copy_images.tf @@ -88,3 +88,14 @@ module "images" { data "aws_ecr_authorization_token" "token" { registry_id = var.account_id } + +data "aws_ecr_authorization_token" "ecr_token" { + provider = aws.eecr + registry_id = var.eecr_info.account_id +} + +provider "aws" { + alias = "eecr" + profile = var.eecr_info.profile + region = var.eecr_info.region +} From 44a21639fb09f6013bc621d0f884fd216463ed3d Mon Sep 17 00:00:00 2001 From: "Matthew C. Morgan" Date: Fri, 18 Apr 2025 21:47:52 -0400 Subject: [PATCH 39/42] update var and ent_ecr_source --- README.md | 2 +- copy_images.tf | 2 +- variables.tf | 18 ++++++++++++++---- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 79431b7..082d0a5 100644 --- a/README.md +++ b/README.md @@ -100,7 +100,7 @@ resource "kubernetes_manifest" "example_grafana_datasource" { |------|-------------|------|---------|:--------:| | [account\_id](#input\_account\_id) | aws account number | `string` | `""` | no | | [cluster\_name](#input\_cluster\_name) | EKS cluster name name component used through out the EKS cluster describing its purpose (ex: dice-dev) | `string` | n/a | yes | -| [eecr\_account\_id](#input\_eecr\_account\_id) | enterpirse ecr source aws account number | `string` | `""` | no | +| [eecr\_info](#input\_eecr\_info) | Enterprise ECR source information |
object({
account_id = string
alias = string
profile = string
region = string
})
|
{
"account_id": "269222635945",
"alias": "lab-gov-shared-nonprod",
"profile": "269222635945-lab-gov-shared-nonprod",
"region": "us-gov-east-1"
}
| no | | [enterprise\_logs\_provisioner\_tag](#input\_enterprise\_logs\_provisioner\_tag) | The version of the grafana/enterprise-logs-provisioner image to use. | `string` | `"v1.7.0"` | no | | [exporter\_tag](#input\_exporter\_tag) | The version of prom/memcached-exporter to use for the gateway. | `string` | `"v0.14.4"` | no | | [gateway\_tag](#input\_gateway\_tag) | The version of nginxinc/nginx-unprivileged to use for the gateway. | `string` | `"1.25.2-alpine"` | no | diff --git a/copy_images.tf b/copy_images.tf index 38fc99d..7237fdf 100644 --- a/copy_images.tf +++ b/copy_images.tf @@ -1,5 +1,5 @@ locals { - ent_ecr_source = format("%v.%v.%v.%v", var.eecr_account_id, "dkr.ecr", var.region, "amazonaws.com/ent-images") + ent_ecr_source = format("%v.%v.%v.%v", var.eecr_info.account_id, "dkr.ecr", var.region, "amazonaws.com/ent-images") exporter_key = format("%v#%v", "prom/memcached-exporter", var.exporter_tag) gateway_key = format("%v#%v", "grafana/nginx-unprivileged", var.gateway_tag) loki_key = format("%v#%v", "grafana/loki", var.loki_tag) diff --git a/variables.tf b/variables.tf index ca9beef..05bfade 100644 --- a/variables.tf +++ b/variables.tf @@ -9,10 +9,20 @@ variable "cluster_name" { type = string } -variable "eecr_account_id" { - description = "enterpirse ecr source aws account number" - type = string - default = "" +variable "eecr_info" { + description = "Enterprise ECR source information" + type = object({ + account_id = string + alias = string + profile = string + region = string + }) + default = { + account_id = "269222635945" + alias = "lab-gov-shared-nonprod" + profile = "269222635945-lab-gov-shared-nonprod" + region = "us-gov-east-1" + } } variable "enterprise_logs_provisioner_tag" { From 123759927d717a0ccf0988f583eda21c75ada4c3 Mon Sep 17 00:00:00 2001 From: "Matthew C. Morgan" Date: Mon, 21 Apr 2025 11:42:27 -0400 Subject: [PATCH 40/42] ensure workflows are current --- .github/workflows/terraform-release.yaml | 2 +- terraform-release.yaml | 40 ++++++++++++++++++++++ terraform-validate.yaml | 42 ++++++++++++++++++++++++ 3 files changed, 83 insertions(+), 1 deletion(-) create mode 100644 terraform-release.yaml create mode 100644 terraform-validate.yaml diff --git a/.github/workflows/terraform-release.yaml b/.github/workflows/terraform-release.yaml index 90910bc..b91ef15 100644 --- a/.github/workflows/terraform-release.yaml +++ b/.github/workflows/terraform-release.yaml @@ -7,7 +7,7 @@ on: - main jobs: terraform-ci-cd: - runs-on: 229685449397 + runs-on: "229685449397" permissions: contents: write diff --git a/terraform-release.yaml b/terraform-release.yaml new file mode 100644 index 0000000..3f67574 --- /dev/null +++ b/terraform-release.yaml @@ -0,0 +1,40 @@ +name: Terraform Module Release +on: + workflow_dispatch: + pull_request: + types: [closed] + branches: + - main +jobs: + terraform-release: + runs-on: "229685449397" + permissions: + contents: write + + steps: + - name: Checkout code + uses: CSVD/gh-actions-checkout@v4 + + - name: Setup GITHUB Credentials + id: github_credentials + uses: CSVD/gh-auth@main + with: + github_app_pem_file: ${{ secrets.GH_APP_PEM_FILE }} + github_app_installation_id: ${{ vars.GH_APP_INSTALLATION_ID }} + github_app_id: ${{ vars.GH_APP_ID }} + + - name: Setup GitHub CLI + run: | + # Force manual authentication since setup-git might not work with GitHub Enterprise + echo "${{ steps.github_credentials.outputs.github_token }}" > /tmp/token.txt + gh auth login --with-token --hostname "github.e.it.census.gov" < /tmp/token.txt + rm /tmp/token.txt + + # Test GitHub CLI auth status + gh auth status || echo "GitHub CLI authentication failed" + + - name: Run Release Action + uses: CSVD/releaser@main + with: + github-token: ${{ steps.github_credentials.outputs.github_token }} + working-directory: '.' diff --git a/terraform-validate.yaml b/terraform-validate.yaml new file mode 100644 index 0000000..ac349eb --- /dev/null +++ b/terraform-validate.yaml @@ -0,0 +1,42 @@ +name: Terraform Validate +on: + pull_request: + workflow_dispatch: + +jobs: + + terraform-validate: + runs-on: "229685449397" + permissions: + contents: write + steps: + - name: Checkout code + uses: CSVD/gh-actions-checkout@v4 + + - name: Setup Terraform + uses: CSVD/gh-actions-setup-terraform@v2 + with: + terraform_version: '1.10.5' + + - name: Validate Terraform Configuration + id: validate + uses: CSVD/terraform-validate@main + + - name: Check Validation/Test Results + if: always() + run: | + # Set default values if outputs are empty + IS_VALID="${{ steps.validate.outputs.is_valid }}" + TESTS_PASSED="${{ steps.validate.outputs.tests_passed }}" + + # If outputs are empty, set them to false + [ -z "$IS_VALID" ] && IS_VALID="false" + [ -z "$TESTS_PASSED" ] && TESTS_PASSED="false" + + if [[ "$IS_VALID" != "true" || "$TESTS_PASSED" != "true" ]]; then + echo "Validation or test errors found:" + echo "${{ steps.validate.outputs.stderr }}" + exit 1 + else + echo "All validations and tests passed successfully!" + fi From 600da77fd1a0c5da0a46da86ebff65960460ae50 Mon Sep 17 00:00:00 2001 From: "Matthew C. Morgan" Date: Mon, 21 Apr 2025 15:43:05 -0400 Subject: [PATCH 41/42] ensure workflows are current --- .github/workflows/terraform-release.yaml | 41 +++-------------------- terraform-release.yaml | 40 ---------------------- terraform-validate.yaml | 42 ------------------------ 3 files changed, 4 insertions(+), 119 deletions(-) delete mode 100644 terraform-release.yaml delete mode 100644 terraform-validate.yaml diff --git a/.github/workflows/terraform-release.yaml b/.github/workflows/terraform-release.yaml index b91ef15..3f67574 100644 --- a/.github/workflows/terraform-release.yaml +++ b/.github/workflows/terraform-release.yaml @@ -1,4 +1,4 @@ -name: Terraform CI/CD +name: Terraform Module Release on: workflow_dispatch: pull_request: @@ -6,7 +6,7 @@ on: branches: - main jobs: - terraform-ci-cd: + terraform-release: runs-on: "229685449397" permissions: contents: write @@ -15,11 +15,6 @@ jobs: - name: Checkout code uses: CSVD/gh-actions-checkout@v4 - - name: Setup Terraform - uses: CSVD/gh-actions-setup-terraform@v3 - with: - terraform_version: "1.9.1" - - name: Setup GITHUB Credentials id: github_credentials uses: CSVD/gh-auth@main @@ -28,28 +23,6 @@ jobs: github_app_installation_id: ${{ vars.GH_APP_INSTALLATION_ID }} github_app_id: ${{ vars.GH_APP_ID }} - - - name: Debug Authentication - run: | - # Print the GitHub server URL - echo "GitHub Server URL: ${{ github.server_url }}" - - # Extract the host from the URL - HOST="${{ github.server_url }}" - HOST="${HOST#*//}" - HOST="${HOST%%/*}" - echo "GitHub Host: $HOST" - - # Check if token exists - if [[ -n "${{ steps.github_credentials.outputs.github_token }}" ]]; then - echo "Token generated successfully" - # Test the token with a simple GitHub API call (without exposing the token) - STATUS=$(curl -s -o /dev/null -w "%{http_code}" -H "Authorization: Bearer ${{ steps.github_credentials.outputs.github_token }}" "${{ github.server_url }}/api/v3/user") - echo "API Test Status Code: $STATUS" - else - echo "No token was generated!" - fi - - name: Setup GitHub CLI run: | # Force manual authentication since setup-git might not work with GitHub Enterprise @@ -60,14 +33,8 @@ jobs: # Test GitHub CLI auth status gh auth status || echo "GitHub CLI authentication failed" - - name: AWS Auth - id: aws_auth - uses: CSVD/aws-auth@main - with: - ecs: true - - - name: Run Terraform Module Release Action - uses: CSVD/terraform-module-release@main + - name: Run Release Action + uses: CSVD/releaser@main with: github-token: ${{ steps.github_credentials.outputs.github_token }} working-directory: '.' diff --git a/terraform-release.yaml b/terraform-release.yaml deleted file mode 100644 index 3f67574..0000000 --- a/terraform-release.yaml +++ /dev/null @@ -1,40 +0,0 @@ -name: Terraform Module Release -on: - workflow_dispatch: - pull_request: - types: [closed] - branches: - - main -jobs: - terraform-release: - runs-on: "229685449397" - permissions: - contents: write - - steps: - - name: Checkout code - uses: CSVD/gh-actions-checkout@v4 - - - name: Setup GITHUB Credentials - id: github_credentials - uses: CSVD/gh-auth@main - with: - github_app_pem_file: ${{ secrets.GH_APP_PEM_FILE }} - github_app_installation_id: ${{ vars.GH_APP_INSTALLATION_ID }} - github_app_id: ${{ vars.GH_APP_ID }} - - - name: Setup GitHub CLI - run: | - # Force manual authentication since setup-git might not work with GitHub Enterprise - echo "${{ steps.github_credentials.outputs.github_token }}" > /tmp/token.txt - gh auth login --with-token --hostname "github.e.it.census.gov" < /tmp/token.txt - rm /tmp/token.txt - - # Test GitHub CLI auth status - gh auth status || echo "GitHub CLI authentication failed" - - - name: Run Release Action - uses: CSVD/releaser@main - with: - github-token: ${{ steps.github_credentials.outputs.github_token }} - working-directory: '.' diff --git a/terraform-validate.yaml b/terraform-validate.yaml deleted file mode 100644 index ac349eb..0000000 --- a/terraform-validate.yaml +++ /dev/null @@ -1,42 +0,0 @@ -name: Terraform Validate -on: - pull_request: - workflow_dispatch: - -jobs: - - terraform-validate: - runs-on: "229685449397" - permissions: - contents: write - steps: - - name: Checkout code - uses: CSVD/gh-actions-checkout@v4 - - - name: Setup Terraform - uses: CSVD/gh-actions-setup-terraform@v2 - with: - terraform_version: '1.10.5' - - - name: Validate Terraform Configuration - id: validate - uses: CSVD/terraform-validate@main - - - name: Check Validation/Test Results - if: always() - run: | - # Set default values if outputs are empty - IS_VALID="${{ steps.validate.outputs.is_valid }}" - TESTS_PASSED="${{ steps.validate.outputs.tests_passed }}" - - # If outputs are empty, set them to false - [ -z "$IS_VALID" ] && IS_VALID="false" - [ -z "$TESTS_PASSED" ] && TESTS_PASSED="false" - - if [[ "$IS_VALID" != "true" || "$TESTS_PASSED" != "true" ]]; then - echo "Validation or test errors found:" - echo "${{ steps.validate.outputs.stderr }}" - exit 1 - else - echo "All validations and tests passed successfully!" - fi From 8d5272d4c8aa9aa6648700bd8b7bfa2d33ca4f7a Mon Sep 17 00:00:00 2001 From: "Matthew C. Morgan" Date: Tue, 22 Apr 2025 00:04:44 -0400 Subject: [PATCH 42/42] ensure committed --- README.md | 1 - version.tf | 13 +------------ 2 files changed, 1 insertion(+), 13 deletions(-) diff --git a/README.md b/README.md index 082d0a5..abaf214 100644 --- a/README.md +++ b/README.md @@ -87,7 +87,6 @@ resource "kubernetes_manifest" "example_grafana_datasource" { |------|------| | [helm_release.loki](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource | | [null_resource.git_version](https://registry.terraform.io/providers/hashicorp/null/latest/docs/resources/resource) | resource | -| [null_resource.module_name](https://registry.terraform.io/providers/hashicorp/null/latest/docs/resources/resource) | resource | | [terraform_data.bucket_name_validator](https://registry.terraform.io/providers/hashicorp/terraform/latest/docs/resources/data) | resource | | [aws_caller_identity.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/caller_identity) | data source | | [aws_ecr_authorization_token.ecr_token](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/ecr_authorization_token) | data source | diff --git a/version.tf b/version.tf index ac138e0..94f902b 100644 --- a/version.tf +++ b/version.tf @@ -10,18 +10,7 @@ resource "null_resource" "git_version" { } } -resource "null_resource" "module_name" { - triggers = { - module_path = path.module - } - - provisioner "local-exec" { - command = "basename $(pwd) > ${path.module}/.module_name" - on_failure = continue - } -} - locals { - module_name = fileexists("${path.module}/.module_name") ? trimspace(file("${path.module}/.module_name")) : "tfmod-loki" + module_name = "tfmod-loki" module_version = fileexists("${path.module}/.git_tag") ? trimspace(file("${path.module}/.git_tag")) : "latest" }