From 53e749d27ba7ad3bede9b4386e2d55d9fd38e73a Mon Sep 17 00:00:00 2001 From: arnol377 Date: Fri, 7 Feb 2025 20:24:25 -0500 Subject: [PATCH] Refactor EKS provider configuration for improved lifecycle management --- lab/_envcommon/helm-provider.hcl | 24 +++++++ lab/_envcommon/kube-provider.hcl | 22 ++++++ .../vpc/platform-eng-eks-mcm/notes.txt | 68 +++++++++++++++++++ lab/root.hcl | 47 ++++--------- platform-tg-infra.code-workspace | 29 ++++++++ 5 files changed, 155 insertions(+), 35 deletions(-) create mode 100644 lab/_envcommon/helm-provider.hcl create mode 100644 lab/_envcommon/kube-provider.hcl create mode 100644 lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/notes.txt create mode 100644 platform-tg-infra.code-workspace diff --git a/lab/_envcommon/helm-provider.hcl b/lab/_envcommon/helm-provider.hcl new file mode 100644 index 0000000..4323624 --- /dev/null +++ b/lab/_envcommon/helm-provider.hcl @@ -0,0 +1,24 @@ +generate "helm-provider" { + path = "helm-provider.tf" + if_exists = "overwrite" + contents = <<-EOF +%{ if startswith(local.module_name, "tfmod-eks-") ~} +provider "helm" { + kubernetes { + host = try(data.aws_eks_cluster.this[0].endpoint, "") + cluster_ca_certificate = try(base64decode(data.aws_eks_cluster.this[0].certificate_authority[0].data), "") + exec { + api_version = "client.authentication.k8s.io/v1beta1" + command = "aws" + args = ["eks", "get-token", "--cluster-name", local.cluster_name, "--region", local.aws_region] + } + } +} + +data "aws_eks_cluster" "this" { + count = var.create_eks ? 1 : 0 + name = local.cluster_name +} +%{ endif } +EOF +} \ No newline at end of file diff --git a/lab/_envcommon/kube-provider.hcl b/lab/_envcommon/kube-provider.hcl new file mode 100644 index 0000000..1805019 --- /dev/null +++ b/lab/_envcommon/kube-provider.hcl @@ -0,0 +1,22 @@ +generate "kube-provider" { + path = "kube-provider.tf" + if_exists = "overwrite" + contents = <<-EOF +%{ if startswith(local.module_name, "tfmod-eks-") ~} +provider "kubernetes" { + host = try(data.aws_eks_cluster.this[0].endpoint, "") + cluster_ca_certificate = try(base64decode(data.aws_eks_cluster.this[0].certificate_authority[0].data), "") + exec { + api_version = "client.authentication.k8s.io/v1beta1" + command = "aws" + args = ["eks", "get-token", "--cluster-name", local.cluster_name, "--region", local.aws_region] + } +} + +data "aws_eks_cluster" "this" { + count = var.create_eks ? 1 : 0 + name = local.cluster_name +} +%{ endif } +EOF +} \ No newline at end of file diff --git a/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/notes.txt b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/notes.txt new file mode 100644 index 0000000..71c3774 --- /dev/null +++ b/lab/development/us-gov-east-1/vpc/platform-eng-eks-mcm/notes.txt @@ -0,0 +1,68 @@ +Provider Configuration Changes and Cluster Lifecycle Management +========================================================== + +Problem: +-------- +The original provider configuration in root.hcl had issues handling different cluster lifecycle states: +1. When no cluster exists - terragrunt run-all plan would fail +2. When cluster is being created - terragrunt run-all apply needed to work +3. When cluster is being destroyed - terragrunt run-all destroy needed to work + +The main issue was that the provider configurations were using data sources that required the cluster to exist, causing failures during planning when the cluster didn't exist. + +Solution: +--------- +1. Moved provider configurations to separate files in _envcommon/: + - helm-provider.hcl + - kube-provider.hcl + +2. Added conditional data source lookup using count: + data "aws_eks_cluster" "this" { + count = var.create_eks ? 1 : 0 + name = local.cluster_name + } + +3. Used try() function with empty fallback values: + host = try(data.aws_eks_cluster.this[0].endpoint, "") + cluster_ca_certificate = try(base64decode(data.aws_eks_cluster.this[0].certificate_authority[0].data), "") + +4. Added create_eks variable control: + - Added to root.hcl locals block + - Controlled via TERRAGRUNT_CREATE_EKS environment variable + - Defaults to "true" + - Generated as a variable in each module + +How it works: +------------ +1. No cluster exists: + - Set TERRAGRUNT_CREATE_EKS=false + - Data source won't be created (count = 0) + - Provider configurations fall back to empty values + - Plan succeeds as providers are configured but not used + +2. Creating cluster: + - TERRAGRUNT_CREATE_EKS=true (default) + - As soon as cluster exists, data source becomes available + - Provider configurations get real values + - Apply continues with working providers + +3. Destroying cluster: + - Set TERRAGRUNT_CREATE_EKS=false before destroy + - Providers fall back to empty values + - Resources can be destroyed without needing cluster access + +Usage: +------ +1. For initial plan with no cluster: + export TERRAGRUNT_CREATE_EKS=false + terragrunt run-all plan + +2. For creating cluster and resources: + export TERRAGRUNT_CREATE_EKS=true (or don't set it) + terragrunt run-all apply + +3. For destroying everything: + export TERRAGRUNT_CREATE_EKS=false + terragrunt run-all destroy + +This solution allows Terragrunt to handle the full lifecycle of the cluster and its dependent resources without failing on provider initialization when the cluster doesn't exist. \ No newline at end of file diff --git a/lab/root.hcl b/lab/root.hcl index 7253386..a746b5f 100644 --- a/lab/root.hcl +++ b/lab/root.hcl @@ -40,6 +40,7 @@ locals { terraform = local.cluster_vars.locals.terraform terragrunt = local.cluster_vars.locals.terragrunt module_name = get_terragrunt_dir() + create_eks = get_env("TERRAGRUNT_CREATE_EKS", "true") } # Configure Terragrunt to automatically store tfstate files in an S3 bucket @@ -94,47 +95,23 @@ provider "aws" { EOF } -generate "kube-provider" { - path = "kube-provider.tf" - if_exists = "overwrite" - contents = <<-EOF -%{ if startswith(local.module_name, "tfmod-eks-") ~} -data "aws_eks_cluster" "kube" { - name = "${local.cluster_name}" +include "helm_provider" { + path = "${dirname(find_in_parent_folders())}/_envcommon/helm-provider.hcl" } -provider "kubernetes" { - host = data.aws_eks_cluster.kube.endpoint - cluster_ca_certificate = base64decode(data.aws_eks_cluster.kube.certificate_authority[0].data) - exec { - api_version = "client.authentication.k8s.io/v1beta1" - command = "aws" - args = ["eks", "get-token", "--cluster-name", "${local.cluster_name}", "--region", "${local.aws_region}"] - } -} -%{ endif } -EOF + +include "kube_provider" { + path = "${dirname(find_in_parent_folders())}/_envcommon/kube-provider.hcl" } -generate "helm-provider" { - path = "helm-provider.tf" +generate "variables" { + path = "variables.tf" if_exists = "overwrite" contents = <<-EOF -%{ if startswith(local.module_name, "tfmod-eks-") ~} -data "aws_eks_cluster" "helm" { - name = "${local.cluster_name}" -} -provider "helm" { - kubernetes { - host = data.aws_eks_cluster.helm.endpoint - cluster_ca_certificate = base64decode(data.aws_eks_cluster.helm.certificate_authority[0].data) - exec { - api_version = "client.authentication.k8s.io/v1beta1" - command = "aws" - args = ["eks", "get-token", "--cluster-name", "${local.cluster_name}", "--region", "${local.aws_region}"] - } - } +variable "create_eks" { + description = "Controls if EKS cluster should be created (affects all AWS resources)" + type = bool + default = true } -%{ endif } EOF } diff --git a/platform-tg-infra.code-workspace b/platform-tg-infra.code-workspace new file mode 100644 index 0000000..dec7709 --- /dev/null +++ b/platform-tg-infra.code-workspace @@ -0,0 +1,29 @@ +{ + "folders": [ + { + "path": "." + }, + { + "path": "../terraform-provider-github/website/docs/d", + "name": "provider/aws/data-sources" + }, + { + "path": "../terraform-provider-github/website/docs/r", + "name": "provider/aws/resources" + }, + { + "path": "../terraform/website/docs/language/tests", + "name": "terraform/tests" + }, + { + "path": "../terraform/website/docs/language/syntax", + "name": "terraform/syntax" + }, + { + "path": "../terragrunt" + } + ], + "settings": { + + } +}