From 42ca3b1f2c9386d11a09de704e152a44ca8863af Mon Sep 17 00:00:00 2001 From: "Matthew C. Morgan" Date: Tue, 16 Jun 2026 19:51:39 -0400 Subject: [PATCH 1/3] update 1.34 and write docs --- .../full-cluster-tf-upgrade/1.34/charts.yml | 6 +-- .../cluster-autoscaler/cluster-autoscaler.tf | 2 +- .../full-cluster-tf-upgrade/1.34/images.yml | 20 +++---- .../README.upgrade-1.33-1.34.md | 53 +++++++++++++++---- 4 files changed, 57 insertions(+), 24 deletions(-) diff --git a/examples/full-cluster-tf-upgrade/1.34/charts.yml b/examples/full-cluster-tf-upgrade/1.34/charts.yml index 44712c4..03f1643 100644 --- a/examples/full-cluster-tf-upgrade/1.34/charts.yml +++ b/examples/full-cluster-tf-upgrade/1.34/charts.yml @@ -2,13 +2,13 @@ cluster-autoscaler: documentation: "https://artifacthub.io/packages/helm/cluster-autoscaler/cluster-autoscaler" name: "cluster-autoscaler" repository: "https://kubernetes.github.io/autoscaler" - version: "9.52.1" + version: "9.57.0" use_remote: true cert-manager: documetation: "https://artifacthub.io/packages/helm/cert-manager/cert-manager" name: "cert-manager" repository: "https://charts.jetstack.io" - version: "1.19.1" + version: "1.20.2" use_remote: true metrics-server: # documentation: "https://artifacthub.io/packages/helm/bitnami/metrics-server" @@ -17,7 +17,7 @@ metrics-server: # repository: "https://charts.bitnami.com/bitnami" repository: "https://kubernetes-sigs.github.io/metrics-server" # version: "7.2.14" - version: "3.13.0" + version: "3.13.1" use_remote: true # new one, does not work yet # repository: "oci://registry-1.docker.io/bitnamicharts" diff --git a/examples/full-cluster-tf-upgrade/1.34/common-services/cluster-autoscaler/cluster-autoscaler.tf b/examples/full-cluster-tf-upgrade/1.34/common-services/cluster-autoscaler/cluster-autoscaler.tf index 4098d3a..88d0d44 100644 --- a/examples/full-cluster-tf-upgrade/1.34/common-services/cluster-autoscaler/cluster-autoscaler.tf +++ b/examples/full-cluster-tf-upgrade/1.34/common-services/cluster-autoscaler/cluster-autoscaler.tf @@ -2,7 +2,7 @@ module "role_cluster-autoscaler" { source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts" description = "EKS IAM Role for ${var.cluster_name} for service account ${var.cluster_autoscaler_namespace}:${var.cluster_autoscaler_name}" - name = format("%v%v-irsa__%v", local._prefixes["eks-role"], var.cluster_name, "autoscaler") + name = format("%v%v-irsa__%v", local._prefixes["eks-role"], var.cluster_name, "as") attach_cluster_autoscaler_policy = true cluster_autoscaler_cluster_names = [var.cluster_name] diff --git a/examples/full-cluster-tf-upgrade/1.34/images.yml b/examples/full-cluster-tf-upgrade/1.34/images.yml index 3251462..dcc4b55 100644 --- a/examples/full-cluster-tf-upgrade/1.34/images.yml +++ b/examples/full-cluster-tf-upgrade/1.34/images.yml @@ -8,7 +8,7 @@ karpenter-controller: source_tag: null enabled: false # tag: "1.0.6" - tag: "1.8.0" + tag: "1.13.0" cluster-autoscaler: documentation: "https://github.com/kubernetes/autoscaler/releases" name: "cluster-autoscaler" @@ -28,7 +28,7 @@ cert-manager-controller: source_image: "jetstack/cert-manager-controller" source_tag: null enabled: true - tag: "v1.19.1" + tag: "v1.20.2" cert-manager-cainjector: documentation: "https://cert-manager.io/docs/releases/" name: "cert-manager-cainjector" @@ -38,7 +38,7 @@ cert-manager-cainjector: source_image: "jetstack/cert-manager-cainjector" source_tag: null enabled: true - tag: "v1.19.1" + tag: "v1.20.2" cert-manager-webhook: documentation: "https://cert-manager.io/docs/releases/" name: "cert-manager-webhook" @@ -48,7 +48,7 @@ cert-manager-webhook: source_image: "jetstack/cert-manager-webhook" source_tag: null enabled: true - tag: "v1.19.1" + tag: "v1.20.2" cert-manager-ctl: documentation: "https://cert-manager.io/docs/releases/" name: "cert-manager-ctl" @@ -83,7 +83,7 @@ metrics-server: source_tag: null enabled: true ## tag: "v0.7.2" - tag: "v0.8.0" + tag: "v0.8.1" istio-operator: documentation: "https://istio.io/latest/docs/releases/supported-releases" name: "istio/operator" @@ -104,7 +104,7 @@ istio-pilot: source_tag: null enabled: true # tag: "1.25.3" - tag: "1.28.0" + tag: "1.30.1" istio-proxyv2: documentation: "https://istio.io/latest/docs/releases/supported-releases" name: "istio/proxyv2" @@ -115,7 +115,7 @@ istio-proxyv2: source_tag: null enabled: true # tag: "1.25.3" - tag: "1.28.0" + tag: "1.30.1" prometheus: documentation: "https://hub.docker.com/r/bitnami/prometheus/tags" name: "prometheus" @@ -128,7 +128,7 @@ prometheus: enabled: true # tag: "3.0.1" # tag: "v2.54.0" - tag: "v3.8.0" + tag: "v3.12.0" prometheus-operator: # documentation: "https://hub.docker.com/r/bitnami/prometheus-operator/tags" name: "prometheus-operator" @@ -143,7 +143,7 @@ prometheus-operator: enabled: true # tag: "0.79.2" ## tag: "v0.74.0" - tag: "v0.87.0" + tag: "v0.91.0" alertmanager: # documentation: "https://hub.docker.com/r/bitnami/alertmanager/tags" name: "alertmanager" @@ -155,4 +155,4 @@ alertmanager: enabled: true # tag: "0.27.0" ## tag: "v0.28.0" - tag: "v0.29.0" + tag: "v0.33.0" diff --git a/examples/full-cluster-tf-upgrade/README.upgrade-1.33-1.34.md b/examples/full-cluster-tf-upgrade/README.upgrade-1.33-1.34.md index c2a9599..e3e8caf 100644 --- a/examples/full-cluster-tf-upgrade/README.upgrade-1.33-1.34.md +++ b/examples/full-cluster-tf-upgrade/README.upgrade-1.33-1.34.md @@ -1,15 +1,15 @@ # EKS Upgrade 1.33 to 1.34 -This change has ONLY the version of 1.33 -> 1.34 and the addons file processing. It is missing -update to the AMI (to AL2023) and any charts or image changes. +This change has ONLY the version of 1.33 -> 1.34 and the addons file processing. ## Copy Files Copy files from 1.34/{path} to eks-{clustername}/{path} -* versions.tf -* addons/addons.tf +* charts.yml +* images.yml * addons/addons.yml +* common-services/cluster-autoscaler/cluster-autoscaler.tf ## Update Files @@ -18,11 +18,38 @@ Update `cluster_version` from 1.33 to 1.34 in * settings.auto.tfvars ## Apply changes - -Apply in various directories where changes happened +Ensure `includes.d/parent_rs.tf` is correct. +If cluster is an upgrade, comment out `ebs-encryption.tf` line 50-58 the resource for `delete_default_sc` as it should already have been applied and will throw an error if not commented out. + +NOTE: irsa-roles/ does not nav after tf-run, should go back to cluster folder and `tf-run apply 37` +NOTE: common-services/cluster-autoscaler does not nav after tf-run, go back to cluster folder and `tf-run apply 41` + + +BEST PRACTICE - use the `tf-run apply` and follow the prompts, starting from the cluster folder. +``` +- tf-run apply +- tf-run apply 4 +- tf-run apply 20 +- cd aws-auth; tf-run apply; cd ..; +- tf-run apply 31 +- cd efs; tf-run apply; cd ..; +- tf-run apply 33 +- cd addons; tf-run apply; cd ..; +- tf-run apply 35 +- cd irsa-roles; tf-run apply; cd ..; +- tf-run apply 37 +- cd common-services; tf-run apply; cd cluster-autoscaler; tf-run apply; cd ../../; +- tf-run apply 41 +- tf-run apply 43 +``` +By running this way we ensure that tag updates are propagated to all resources. + +Otherwise, Apply in various directories where changes happened * (main) * addons/ +* common-services/ +* common-services/cluster-autoscaler There is some approach/process to upgrade the version, find it and put it here. @@ -46,12 +73,18 @@ There is some approach/process to upgrade the version, find it and put it here. - addons/addon_cloudwatch.tf and - common-services/cluster-autoscaler/cluster-autoscaler.tf -- 1.33.0 -- 2026-04-16 +- 1.33.0 -- 2026-05-05 + - ami updated from `AL2_x86_64` to `AL2023_x86_64_STANDARD` + - user data no longer required - add files to update - - addons/addons.tf + - main.tf + - charts.yml + - images.yml - addons/addons.yml -- 1.34.0 -- 2026-04-16 +- 1.34.0 -- 2026-06-16 - add files to update - - addons/addons.tf + - charts.yml + - images.yml - addons/addons.yml + - common-services/cluster-autoscaler/cluster-autoscaler.tf From 5e51015303467cbadcaa1ab11e999865c0da4785 Mon Sep 17 00:00:00 2001 From: "Matthew C. Morgan" Date: Tue, 16 Jun 2026 20:14:24 -0400 Subject: [PATCH 2/3] add tags and powerschedule --- examples/full-cluster-tf-upgrade/1.34/main.tf | 22 ++++++++++++++++++- examples/full-cluster-tf-upgrade/1.34/tags.tf | 10 +++++++++ .../full-cluster-tf-upgrade/1.34/tags.yml | 4 ++++ .../1.34/variables.eks.tf | 6 +++++ .../README.upgrade-1.33-1.34.md | 17 ++++++++++++++ 5 files changed, 58 insertions(+), 1 deletion(-) create mode 100644 examples/full-cluster-tf-upgrade/1.34/tags.tf create mode 100644 examples/full-cluster-tf-upgrade/1.34/tags.yml diff --git a/examples/full-cluster-tf-upgrade/1.34/main.tf b/examples/full-cluster-tf-upgrade/1.34/main.tf index 7ac64ef..1217645 100644 --- a/examples/full-cluster-tf-upgrade/1.34/main.tf +++ b/examples/full-cluster-tf-upgrade/1.34/main.tf @@ -38,6 +38,7 @@ locals { autoscale_tags = { format("k8s.io/cluster-autoscaler/%v", var.cluster_name) = "owned" "k8s.io/cluster-autoscaler/enabled" = "TRUE" + "PowerSchedule" = var.power_schedule } } @@ -78,6 +79,7 @@ resource "aws_eks_cluster" "eks_cluster" { } tags = merge( + module.tags.tags, local.base_tags, local.common_tags, var.tags, @@ -92,6 +94,18 @@ resource "aws_eks_cluster" "eks_cluster" { ] } +resource "aws_autoscaling_group_tag" "power-schedule" { + # Use for_each if you have multiple node groups or tags + autoscaling_group_name = aws_eks_node_group.eks-nodegroup.resources[0].autoscaling_groups[0].name + + tag { + key = "PowerSchedule" + value = var.power_schedule + propagate_at_launch = true + } + depends_on = [aws_eks_node_group.eks-nodegroup] +} + resource "aws_eks_node_group" "eks-nodegroup" { cluster_name = aws_eks_cluster.eks_cluster.name node_group_name = format("%v%v-nodegroup", local._prefixes["eks"], var.cluster_name) @@ -113,6 +127,7 @@ resource "aws_eks_node_group" "eks-nodegroup" { } tags = merge( + module.tags.tags, local.base_tags, local.common_tags, var.tags, @@ -149,6 +164,7 @@ locals { launch_template_tags = { "Name" = format("%v%v-nodegroup-instance-name", local._prefixes["eks"], var.cluster_name) format("kubernetes.io/cluster/%v", var.cluster_name) = "owned" + "PowerSchedule" = var.power_schedule } } @@ -162,6 +178,7 @@ resource "aws_launch_template" "eks-nodegroup" { vpc_security_group_ids = [aws_security_group.extra_cluster_sg.id] tags = merge( + module.tags.tags, local.base_tags, local.common_tags, var.tags, @@ -172,6 +189,7 @@ resource "aws_launch_template" "eks-nodegroup" { resource_type = "instance" tags = merge( + module.tags.tags, local.base_tags, { "boc:created_by" = "eks-launch-template" }, local.common_tags, @@ -185,6 +203,7 @@ resource "aws_launch_template" "eks-nodegroup" { resource_type = "volume" tags = merge( + module.tags.tags, local.base_tags, { "boc:created_by" = "eks-launch-template" }, local.common_tags, @@ -197,6 +216,7 @@ resource "aws_launch_template" "eks-nodegroup" { resource_type = "network-interface" tags = merge( + module.tags.tags, local.base_tags, { "boc:created_by" = "eks-launch-template" }, local.common_tags, @@ -230,7 +250,7 @@ resource "aws_launch_template" "eks-nodegroup" { } } - # user_data = base64encode(local.eks-node-private-userdata) + user_data = base64encode(local.eks-node-private-userdata) } #### User data for worker launch diff --git a/examples/full-cluster-tf-upgrade/1.34/tags.tf b/examples/full-cluster-tf-upgrade/1.34/tags.tf new file mode 100644 index 0000000..3e8cb3b --- /dev/null +++ b/examples/full-cluster-tf-upgrade/1.34/tags.tf @@ -0,0 +1,10 @@ +module "tags" { + source = "git@github.e.it.census.gov:terraform-modules/boc-nts//tags" + filename = format("%v/%v", path.root, "tags.yml") + + legacy_tags = merge( + var.account_tags, + var.infrastructure_tags, + var.application_tags, + ) +} \ No newline at end of file diff --git a/examples/full-cluster-tf-upgrade/1.34/tags.yml b/examples/full-cluster-tf-upgrade/1.34/tags.yml new file mode 100644 index 0000000..159e71c --- /dev/null +++ b/examples/full-cluster-tf-upgrade/1.34/tags.yml @@ -0,0 +1,4 @@ +finops: + number: + name: + role: +eks \ No newline at end of file diff --git a/examples/full-cluster-tf-upgrade/1.34/variables.eks.tf b/examples/full-cluster-tf-upgrade/1.34/variables.eks.tf index 5e166f4..7b007ac 100644 --- a/examples/full-cluster-tf-upgrade/1.34/variables.eks.tf +++ b/examples/full-cluster-tf-upgrade/1.34/variables.eks.tf @@ -78,3 +78,9 @@ variable "contact_email" { description = "Email address in @census.gov of contact for the certificate. This is strongly recommended to be a group email address." type = string } + +variable "power_schedule" { + description = "The PowerSchedule tag value to apply to the cluster and cluster autoscaler resources. This is used by the cluster autoscaler to determine when to scale down nodes during off hours. The value must match a PowerSchedule defined in the PowerScheduler service." + type = string + default = "weekday-7a-7p" +} \ No newline at end of file diff --git a/examples/full-cluster-tf-upgrade/README.upgrade-1.33-1.34.md b/examples/full-cluster-tf-upgrade/README.upgrade-1.33-1.34.md index e3e8caf..1938dd7 100644 --- a/examples/full-cluster-tf-upgrade/README.upgrade-1.33-1.34.md +++ b/examples/full-cluster-tf-upgrade/README.upgrade-1.33-1.34.md @@ -6,8 +6,11 @@ This change has ONLY the version of 1.33 -> 1.34 and the addons file processing. Copy files from 1.34/{path} to eks-{clustername}/{path} +* main.tf * charts.yml * images.yml +* tags.tf +* tags.yml * addons/addons.yml * common-services/cluster-autoscaler/cluster-autoscaler.tf @@ -17,6 +20,16 @@ Update `cluster_version` from 1.33 to 1.34 in * settings.auto.tfvars +Add `power_schedule` in + +* settings.auto.tfvars + +**WARNING**: the default value for the power_schedule is `weekday-7a-7p`, update to `always-on` for always on behavior. + +Update `finops_project_number` and `finops_project_name` in + +* tags.yml + ## Apply changes Ensure `includes.d/parent_rs.tf` is correct. If cluster is an upgrade, comment out `ebs-encryption.tf` line 50-58 the resource for `delete_default_sc` as it should already have been applied and will throw an error if not commented out. @@ -84,7 +97,11 @@ There is some approach/process to upgrade the version, find it and put it here. - 1.34.0 -- 2026-06-16 - add files to update + - main.tf - charts.yml - images.yml + - tags.tf + - tags.yml + - variables.eks.tf - addons/addons.yml - common-services/cluster-autoscaler/cluster-autoscaler.tf From 4562e779d0f073de043678279030b1de6ec48787 Mon Sep 17 00:00:00 2001 From: "Matthew C. Morgan" Date: Tue, 16 Jun 2026 20:15:26 -0400 Subject: [PATCH 3/3] comment out userdata --- examples/full-cluster-tf-upgrade/1.34/main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/full-cluster-tf-upgrade/1.34/main.tf b/examples/full-cluster-tf-upgrade/1.34/main.tf index 1217645..e52c4e7 100644 --- a/examples/full-cluster-tf-upgrade/1.34/main.tf +++ b/examples/full-cluster-tf-upgrade/1.34/main.tf @@ -250,7 +250,7 @@ resource "aws_launch_template" "eks-nodegroup" { } } - user_data = base64encode(local.eks-node-private-userdata) + # user_data = base64encode(local.eks-node-private-userdata) } #### User data for worker launch