Skip to content

update 1.34 and write docs #17

Open
wants to merge 3 commits into
base: tf-upgrade
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions examples/full-cluster-tf-upgrade/1.34/charts.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@ cluster-autoscaler:
documentation: "https://artifacthub.io/packages/helm/cluster-autoscaler/cluster-autoscaler"
name: "cluster-autoscaler"
repository: "https://kubernetes.github.io/autoscaler"
version: "9.52.1"
version: "9.57.0"
use_remote: true
cert-manager:
documetation: "https://artifacthub.io/packages/helm/cert-manager/cert-manager"
name: "cert-manager"
repository: "https://charts.jetstack.io"
version: "1.19.1"
version: "1.20.2"
use_remote: true
metrics-server:
# documentation: "https://artifacthub.io/packages/helm/bitnami/metrics-server"
Expand All @@ -17,7 +17,7 @@ metrics-server:
# repository: "https://charts.bitnami.com/bitnami"
repository: "https://kubernetes-sigs.github.io/metrics-server"
# version: "7.2.14"
version: "3.13.0"
version: "3.13.1"
use_remote: true
# new one, does not work yet
# repository: "oci://registry-1.docker.io/bitnamicharts"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ module "role_cluster-autoscaler" {
source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts"

description = "EKS IAM Role for ${var.cluster_name} for service account ${var.cluster_autoscaler_namespace}:${var.cluster_autoscaler_name}"
name = format("%v%v-irsa__%v", local._prefixes["eks-role"], var.cluster_name, "autoscaler")
name = format("%v%v-irsa__%v", local._prefixes["eks-role"], var.cluster_name, "as")

attach_cluster_autoscaler_policy = true
cluster_autoscaler_cluster_names = [var.cluster_name]
Expand Down
20 changes: 10 additions & 10 deletions examples/full-cluster-tf-upgrade/1.34/images.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ karpenter-controller:
source_tag: null
enabled: false
# tag: "1.0.6"
tag: "1.8.0"
tag: "1.13.0"
cluster-autoscaler:
documentation: "https://github.com/kubernetes/autoscaler/releases"
name: "cluster-autoscaler"
Expand All @@ -28,7 +28,7 @@ cert-manager-controller:
source_image: "jetstack/cert-manager-controller"
source_tag: null
enabled: true
tag: "v1.19.1"
tag: "v1.20.2"
cert-manager-cainjector:
documentation: "https://cert-manager.io/docs/releases/"
name: "cert-manager-cainjector"
Expand All @@ -38,7 +38,7 @@ cert-manager-cainjector:
source_image: "jetstack/cert-manager-cainjector"
source_tag: null
enabled: true
tag: "v1.19.1"
tag: "v1.20.2"
cert-manager-webhook:
documentation: "https://cert-manager.io/docs/releases/"
name: "cert-manager-webhook"
Expand All @@ -48,7 +48,7 @@ cert-manager-webhook:
source_image: "jetstack/cert-manager-webhook"
source_tag: null
enabled: true
tag: "v1.19.1"
tag: "v1.20.2"
cert-manager-ctl:
documentation: "https://cert-manager.io/docs/releases/"
name: "cert-manager-ctl"
Expand Down Expand Up @@ -83,7 +83,7 @@ metrics-server:
source_tag: null
enabled: true
## tag: "v0.7.2"
tag: "v0.8.0"
tag: "v0.8.1"
istio-operator:
documentation: "https://istio.io/latest/docs/releases/supported-releases"
name: "istio/operator"
Expand All @@ -104,7 +104,7 @@ istio-pilot:
source_tag: null
enabled: true
# tag: "1.25.3"
tag: "1.28.0"
tag: "1.30.1"
istio-proxyv2:
documentation: "https://istio.io/latest/docs/releases/supported-releases"
name: "istio/proxyv2"
Expand All @@ -115,7 +115,7 @@ istio-proxyv2:
source_tag: null
enabled: true
# tag: "1.25.3"
tag: "1.28.0"
tag: "1.30.1"
prometheus:
documentation: "https://hub.docker.com/r/bitnami/prometheus/tags"
name: "prometheus"
Expand All @@ -128,7 +128,7 @@ prometheus:
enabled: true
# tag: "3.0.1"
# tag: "v2.54.0"
tag: "v3.8.0"
tag: "v3.12.0"
prometheus-operator:
# documentation: "https://hub.docker.com/r/bitnami/prometheus-operator/tags"
name: "prometheus-operator"
Expand All @@ -143,7 +143,7 @@ prometheus-operator:
enabled: true
# tag: "0.79.2"
## tag: "v0.74.0"
tag: "v0.87.0"
tag: "v0.91.0"
alertmanager:
# documentation: "https://hub.docker.com/r/bitnami/alertmanager/tags"
name: "alertmanager"
Expand All @@ -155,4 +155,4 @@ alertmanager:
enabled: true
# tag: "0.27.0"
## tag: "v0.28.0"
tag: "v0.29.0"
tag: "v0.33.0"
20 changes: 20 additions & 0 deletions examples/full-cluster-tf-upgrade/1.34/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ locals {
autoscale_tags = {
format("k8s.io/cluster-autoscaler/%v", var.cluster_name) = "owned"
"k8s.io/cluster-autoscaler/enabled" = "TRUE"
"PowerSchedule" = var.power_schedule
}

}
Expand Down Expand Up @@ -78,6 +79,7 @@ resource "aws_eks_cluster" "eks_cluster" {
}

tags = merge(
module.tags.tags,
local.base_tags,
local.common_tags,
var.tags,
Expand All @@ -92,6 +94,18 @@ resource "aws_eks_cluster" "eks_cluster" {
]
}

resource "aws_autoscaling_group_tag" "power-schedule" {
# Use for_each if you have multiple node groups or tags
autoscaling_group_name = aws_eks_node_group.eks-nodegroup.resources[0].autoscaling_groups[0].name

tag {
key = "PowerSchedule"
value = var.power_schedule
propagate_at_launch = true
}
depends_on = [aws_eks_node_group.eks-nodegroup]
}

resource "aws_eks_node_group" "eks-nodegroup" {
cluster_name = aws_eks_cluster.eks_cluster.name
node_group_name = format("%v%v-nodegroup", local._prefixes["eks"], var.cluster_name)
Expand All @@ -113,6 +127,7 @@ resource "aws_eks_node_group" "eks-nodegroup" {
}

tags = merge(
module.tags.tags,
local.base_tags,
local.common_tags,
var.tags,
Expand Down Expand Up @@ -149,6 +164,7 @@ locals {
launch_template_tags = {
"Name" = format("%v%v-nodegroup-instance-name", local._prefixes["eks"], var.cluster_name)
format("kubernetes.io/cluster/%v", var.cluster_name) = "owned"
"PowerSchedule" = var.power_schedule
}
}

Expand All @@ -162,6 +178,7 @@ resource "aws_launch_template" "eks-nodegroup" {
vpc_security_group_ids = [aws_security_group.extra_cluster_sg.id]

tags = merge(
module.tags.tags,
local.base_tags,
local.common_tags,
var.tags,
Expand All @@ -172,6 +189,7 @@ resource "aws_launch_template" "eks-nodegroup" {
resource_type = "instance"

tags = merge(
module.tags.tags,
local.base_tags,
{ "boc:created_by" = "eks-launch-template" },
local.common_tags,
Expand All @@ -185,6 +203,7 @@ resource "aws_launch_template" "eks-nodegroup" {
resource_type = "volume"

tags = merge(
module.tags.tags,
local.base_tags,
{ "boc:created_by" = "eks-launch-template" },
local.common_tags,
Expand All @@ -197,6 +216,7 @@ resource "aws_launch_template" "eks-nodegroup" {
resource_type = "network-interface"

tags = merge(
module.tags.tags,
local.base_tags,
{ "boc:created_by" = "eks-launch-template" },
local.common_tags,
Expand Down
10 changes: 10 additions & 0 deletions examples/full-cluster-tf-upgrade/1.34/tags.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
module "tags" {
source = "git@github.e.it.census.gov:terraform-modules/boc-nts//tags"
filename = format("%v/%v", path.root, "tags.yml")

legacy_tags = merge(
var.account_tags,
var.infrastructure_tags,
var.application_tags,
)
}
4 changes: 4 additions & 0 deletions examples/full-cluster-tf-upgrade/1.34/tags.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
finops:
number: <your-finops_project_number>
name: <your-cluster-name>
role: +eks
6 changes: 6 additions & 0 deletions examples/full-cluster-tf-upgrade/1.34/variables.eks.tf
Original file line number Diff line number Diff line change
Expand Up @@ -78,3 +78,9 @@ variable "contact_email" {
description = "Email address in @census.gov of contact for the certificate. This is strongly recommended to be a group email address."
type = string
}

variable "power_schedule" {
description = "The PowerSchedule tag value to apply to the cluster and cluster autoscaler resources. This is used by the cluster autoscaler to determine when to scale down nodes during off hours. The value must match a PowerSchedule defined in the PowerScheduler service."
type = string
default = "weekday-7a-7p"
}
70 changes: 60 additions & 10 deletions examples/full-cluster-tf-upgrade/README.upgrade-1.33-1.34.md
Original file line number Diff line number Diff line change
@@ -1,28 +1,68 @@
# EKS Upgrade 1.33 to 1.34

This change has ONLY the version of 1.33 -> 1.34 and the addons file processing. It is missing
update to the AMI (to AL2023) and any charts or image changes.
This change has ONLY the version of 1.33 -> 1.34 and the addons file processing.

## Copy Files

Copy files from 1.34/{path} to eks-{clustername}/{path}

* versions.tf
* addons/addons.tf
* main.tf
* charts.yml
* images.yml
* tags.tf
* tags.yml
* addons/addons.yml
* common-services/cluster-autoscaler/cluster-autoscaler.tf

## Update Files

Update `cluster_version` from 1.33 to 1.34 in

* settings.auto.tfvars

## Apply changes
Add `power_schedule` in

* settings.auto.tfvars

**WARNING**: the default value for the power_schedule is `weekday-7a-7p`, update to `always-on` for always on behavior.

Apply in various directories where changes happened
Update `finops_project_number` and `finops_project_name` in

* tags.yml

## Apply changes
Ensure `includes.d/parent_rs.tf` is correct.
If cluster is an upgrade, comment out `ebs-encryption.tf` line 50-58 the resource for `delete_default_sc` as it should already have been applied and will throw an error if not commented out.

NOTE: irsa-roles/ does not nav after tf-run, should go back to cluster folder and `tf-run apply 37`
NOTE: common-services/cluster-autoscaler does not nav after tf-run, go back to cluster folder and `tf-run apply 41`


BEST PRACTICE - use the `tf-run apply` and follow the prompts, starting from the cluster folder.
```
- tf-run apply
- tf-run apply 4
- tf-run apply 20
- cd aws-auth; tf-run apply; cd ..;
- tf-run apply 31
- cd efs; tf-run apply; cd ..;
- tf-run apply 33
- cd addons; tf-run apply; cd ..;
- tf-run apply 35
- cd irsa-roles; tf-run apply; cd ..;
- tf-run apply 37
- cd common-services; tf-run apply; cd cluster-autoscaler; tf-run apply; cd ../../;
- tf-run apply 41
- tf-run apply 43
```
By running this way we ensure that tag updates are propagated to all resources.

Otherwise, Apply in various directories where changes happened

* (main)
* addons/
* common-services/
* common-services/cluster-autoscaler

There is some approach/process to upgrade the version, find it and put it here.

Expand All @@ -46,12 +86,22 @@ There is some approach/process to upgrade the version, find it and put it here.
- addons/addon_cloudwatch.tf and
- common-services/cluster-autoscaler/cluster-autoscaler.tf

- 1.33.0 -- 2026-04-16
- 1.33.0 -- 2026-05-05
- ami updated from `AL2_x86_64` to `AL2023_x86_64_STANDARD`
- user data no longer required
- add files to update
- addons/addons.tf
- main.tf
- charts.yml
- images.yml
- addons/addons.yml

- 1.34.0 -- 2026-04-16
- 1.34.0 -- 2026-06-16
- add files to update
- addons/addons.tf
- main.tf
- charts.yml
- images.yml
- tags.tf
- tags.yml
- variables.eks.tf
- addons/addons.yml
- common-services/cluster-autoscaler/cluster-autoscaler.tf