diff --git a/.tflint.hcl b/.tflint.hcl index 684d807..ab8ea66 100644 --- a/.tflint.hcl +++ b/.tflint.hcl @@ -4,18 +4,18 @@ config { disabled_by_default = false } -rule "aws_instance_invalid_type" { - enabled = true -} +# rule "aws_instance_invalid_type" { +# enabled = true +# } -plugin "aws" { - enabled = true - version = "0.32.0" - source = "github.com/terraform-linters/tflint-ruleset-aws" -} +# plugin "aws" { +# enabled = true +# version = "0.32.0" +# source = "github.com/terraform-linters/tflint-ruleset-aws" +# } -plugin "terraform" { - enabled = true - version = "0.9.0" - source = "github.com/terraform-linters/tflint-ruleset-terraform" -} +# plugin "terraform" { +# enabled = true +# version = "0.9.0" +# source = "github.com/terraform-linters/tflint-ruleset-terraform" +# } diff --git a/README.md b/README.md index d0313b0..43fe01a 100644 --- a/README.md +++ b/README.md @@ -1,90 +1,110 @@ # tfmod-eks -Create an EKS cluster given the specification of the cluster. -The module creates an EKS cluster named `cluster_name` in the region using kubernetes version `cluster_version` with `eks_ng_desired_size` nodes initially. The services in the cluster will be accessible using hostnames for the services ending with the `cluster_name.domain` fully qualified domain name. The nodegroup for karpenter will resize based upon capacity from a minimum of `eks_ng_min_size` to a maximum of `eks_ng_max_size`. After initial deployment, karpenter will create a node group for workloads that will autoscale using on-demand or spot instances with compaction based upon running workloads. +Creates and configures an Amazon EKS cluster with comprehensive node management and essential addons. -The cluster is configured with an oidc provider allowing service accounts to be configured with IRSA roles as needed. +## Overview + +This module provisions an EKS cluster with the following key features: +- Kubernetes version configurable via `cluster_version` +- Initial node group sized from `eks_ng_min_size` to `eks_ng_max_size` with `eks_ng_desired_size` target +- Bottlerocket-based managed node groups +- Comprehensive IRSA (IAM Roles for Service Accounts) configuration +- Full addon integration +- Automatic node tagging and security group configuration ## Addons -Addons installed: +The following addons are automatically installed and configured: -* amazon-cloudwatch-observability -* aws-ebs-csi-driver -* aws-efs-csi-driver +* amazon-cloudwatch-observability (with IRSA) +* aws-ebs-csi-driver (with IRSA) +* aws-efs-csi-driver (with IRSA) * coredns +* eks-pod-identity-agent * kube-proxy * snapshot-controller +* vpc-cni (with IRSA) + +## Usage Example + +```hcl +module "eks" { + source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-eks.git" + + cluster_name = "my-cluster" + cluster_version = "1.28" + vpc_name = "my-vpc" + + eks_ng_min_size = 3 + eks_ng_desired_size = 4 + eks_ng_max_size = 10 + + tags = { + Environment = "production" + Team = "platform" + } +} +``` + +## Deployment ### Apply -Successful completion should show: -```terraform -Apply complete! Resources: 80 added, 0 changed, 0 destroyed. +Typical deployment time is around 12-15 minutes: +```bash +Apply complete! Resources: 77 added, 0 changed, 0 destroyed. real 12m24.922s -user 0m17.709s -sys 0m2.079s ``` ### Destroy -Successful destroy should show: -```terraform -Destroy complete! Resources: 80 destroyed. +Clean removal takes approximately 10-12 minutes: +```bash +Destroy complete! Resources: 77 destroyed. real 10m48.444s -user 0m17.624s -sys 0m2.040s ``` ### Verification -To verify the nodes were created, use kubectl like -`kubectl get nodes` which should ouput: - +1. Check node status: ```bash -[morga471@iebcloud terraform]$ k get nodes +$ kubectl get nodes NAME STATUS ROLES AGE VERSION -ip-10-129-62-142.us-gov-east-1.compute.internal Ready 4m7s v1.30.1-eks-e564799 +ip-10-129-62-142.us-gov-east-1.compute.internal Ready 4m7s v1.28.1-eks-e564799 ``` -To verify the pods for the base cluster were created, use kubectl like -`k get pods -A` - +2. Verify addon deployments: ```bash -[morga471@iebcloud terraform]$ k get pods -A -NAMESPACE NAME READY STATUS RESTARTS AGE -amazon-cloudwatch amazon-cloudwatch-observability-controller-manager-5c9d9677h2xf 1/1 Running 0 2m58s -amazon-cloudwatch cloudwatch-agent-95g5j 1/1 Running 0 2m52s -amazon-cloudwatch fluent-bit-cnp45 1/1 Running 0 2m58s -kube-system aws-node-dsdqg 2/2 Running 0 3m8s -kube-system coredns-5479bb6d65-c6x79 1/1 Running 0 3m11s -kube-system coredns-5479bb6d65-j74cm 1/1 Running 0 3m11s -kube-system ebs-csi-controller-7cd8c597d7-96j7p 6/6 Running 0 3m10s -kube-system ebs-csi-controller-7cd8c597d7-m48j8 6/6 Running 0 3m9s -kube-system ebs-csi-node-x555s 3/3 Running 0 3m10s -kube-system efs-csi-controller-85c5486b89-rdjk8 3/3 Running 0 3m9s -kube-system efs-csi-controller-85c5486b89-v6fx2 3/3 Running 0 3m9s -kube-system efs-csi-node-tz47s 3/3 Running 0 3m9s -kube-system kube-proxy-qbgbk 1/1 Running 0 4m12s -kube-system snapshot-controller-7f8d9b84dd-7w7qz 1/1 Running 0 3m10s -kube-system snapshot-controller-7f8d9b84dd-nqqsb 1/1 Running 0 3m10s +$ kubectl get pods -n kube-system +NAME READY STATUS RESTARTS AGE +aws-node-dsdqg 2/2 Running 0 3m8s +coredns-5479bb6d65-c6x79 1/1 Running 0 3m11s +ebs-csi-controller-7cd8c597d7-96j7p 6/6 Running 0 3m10s +efs-csi-node-tz47s 3/3 Running 0 3m9s +kube-proxy-qbgbk 1/1 Running 0 4m12s ``` -#### Changelog -Change logs are auto-generated with commitizen. - -[CHANGELOG.md](CHANGELOG.md) +3. Check IRSA configuration: +```bash +$ kubectl get serviceaccount -n kube-system +NAME SECRETS AGE +aws-node 0 5m +ebs-csi-controller 0 5m +efs-csi-controller 0 5m +``` +## Documentation ## Requirements | Name | Version | |------|---------| | [terraform](#requirement\_terraform) | >= 0.13 | -| [aws](#requirement\_aws) | >= 5.14.0 | +| [aws](#requirement\_aws) | ~> 5.14.0 | ## Providers | Name | Version | |------|---------| | [aws](#provider\_aws) | 5.84.0 | +| [terraform](#provider\_terraform) | n/a | ## Modules @@ -104,6 +124,7 @@ Change logs are auto-generated with commitizen. | [aws_security_group.additional_eks_cluster_sg](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/security_group) | resource | | [aws_security_group.all_worker_mgmt](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/security_group) | resource | | [aws_security_group_rule.allow_sidecar_injection](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/security_group_rule) | resource | +| [terraform_data.subnet_validation](https://registry.terraform.io/providers/hashicorp/terraform/latest/docs/resources/data) | resource | | [aws_arn.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/arn) | data source | | [aws_caller_identity.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/caller_identity) | data source | | [aws_ebs_default_kms_key.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/ebs_default_kms_key) | data source | @@ -122,18 +143,18 @@ Change logs are auto-generated with commitizen. |------|-------------|------|---------|:--------:| | [access\_entries](#input\_access\_entries) | Map of access entries to add to the cluster | `any` | `{}` | no | | [census\_private\_cidr](#input\_census\_private\_cidr) | Census Private CIR Blocks | `list(string)` |
[
"148.129.0.0/16",
"172.16.0.0/12",
"192.168.0.0/16"
]
| no | -| [cluster\_endpoint\_public\_access](#input\_cluster\_endpoint\_public\_access) | This allows to access the cluster from IEB cloud host | `bool` | `false` | no | +| [cluster\_endpoint\_public\_access](#input\_cluster\_endpoint\_public\_access) | Whether the EKS cluster API server endpoint is publicly accessible | `bool` | `false` | no | | [cluster\_name](#input\_cluster\_name) | EKS cluster name name component used through out the EKS cluster describing its purpose (ex: dice-dev) | `string` | n/a | yes | -| [cluster\_version](#input\_cluster\_version) | The Kubernetes version number to use for this EKS cluster. See https://docs.aws.amazon.com/eks/latest/userguide/kubernetes-versions.html | `string` | `"1.27"` | no | -| [eks\_instance\_disk\_size](#input\_eks\_instance\_disk\_size) | The size of the disk of the worker nodes in gigabytes. 40 is the approximate minimum. Needs to hold the all of the normal operating system files plus every image that will be used in the cluster. | `number` | `80` | no | -| [eks\_instance\_types](#input\_eks\_instance\_types) | EKS worker node instance types | `list(string)` |
[
"t3a.large"
]
| no | -| [eks\_ng\_desired\_size](#input\_eks\_ng\_desired\_size) | Node Group desired size | `number` | `4` | no | -| [eks\_ng\_max\_size](#input\_eks\_ng\_max\_size) | Node Group maximum size | `number` | `15` | no | -| [eks\_ng\_min\_size](#input\_eks\_ng\_min\_size) | Node Group minimum size | `number` | `4` | no | -| [enable\_cluster\_creator\_admin\_permissions](#input\_enable\_cluster\_creator\_admin\_permissions) | Indicates whether or not to add the cluster creator (the identity used by Terraform) as an administrator via access entry | `bool` | `false` | no | -| [subnets\_name](#input\_subnets\_name) | Define the name of the subnets to be used by this cluster | `string` | `"*-container-*"` | no | -| [tags](#input\_tags) | AWS Tags to apply to appropriate resources | `map(string)` | `{}` | no | -| [vpc\_name](#input\_vpc\_name) | Define the VPC name that will be used by this cluster | `string` | n/a | yes | +| [cluster\_version](#input\_cluster\_version) | Kubernetes version to use for the EKS cluster | `string` | n/a | yes | +| [eks\_instance\_disk\_size](#input\_eks\_instance\_disk\_size) | Size of the EKS node disk in GB | `number` | `80` | no | +| [eks\_instance\_types](#input\_eks\_instance\_types) | List of EC2 instance types for the EKS node group | `list(string)` |
[
"t3a.large"
]
| no | +| [eks\_ng\_desired\_size](#input\_eks\_ng\_desired\_size) | Desired size of the EKS node group | `number` | `4` | no | +| [eks\_ng\_max\_size](#input\_eks\_ng\_max\_size) | Maximum size of the EKS node group | `number` | `15` | no | +| [eks\_ng\_min\_size](#input\_eks\_ng\_min\_size) | Minimum size of the EKS node group | `number` | `4` | no | +| [enable\_cluster\_creator\_admin\_permissions](#input\_enable\_cluster\_creator\_admin\_permissions) | Grant admin permissions to the cluster creator | `bool` | `false` | no | +| [subnets\_name](#input\_subnets\_name) | Name pattern for subnets to be used by EKS cluster | `string` | `"*-container-*"` | no | +| [tags](#input\_tags) | Additional tags to apply to all resources | `map(string)` | `{}` | no | +| [vpc\_name](#input\_vpc\_name) | Name of the VPC where EKS cluster will be created | `string` | n/a | yes | ## Outputs diff --git a/main.tf b/main.tf index 068d433..fdec46c 100644 --- a/main.tf +++ b/main.tf @@ -43,6 +43,17 @@ locals { vpc_id = data.aws_vpc.eks_vpc.id } +resource "terraform_data" "subnet_validation" { + count = length(local.subnets) >= 2 ? 0 : "fail" + + lifecycle { + precondition { + condition = length(local.subnets) >= 2 + error_message = "At least 2 subnets in different AZs are required for EKS cluster." + } + } +} + module "cluster" { source = "git@github.e.it.census.gov:SCT-Engineering/terraform-aws-eks.git?ref=v20.33.1" @@ -131,16 +142,6 @@ module "cluster" { labels = { intent = "control-apps" } - # This cannot be enabled until karpenter is available. - # taints = { - # # The pods that do not tolerate this taint should run on nodes - # # created by Karpenter - # karpenter = { - # key = "karpenter.sh/controller" - # value = "true" - # effect = "NO_SCHEDULE" - # } - # } } } tags = local.tags diff --git a/requirements.tf b/requirements.tf index 1c41a9b..7cef9f7 100644 --- a/requirements.tf +++ b/requirements.tf @@ -4,7 +4,7 @@ terraform { required_providers { aws = { source = "hashicorp/aws" - version = ">= 5.14.0" + version = "~> 5.14.0" } } } diff --git a/variables.tf b/variables.tf index 2aac95c..925c44f 100644 --- a/variables.tf +++ b/variables.tf @@ -1,67 +1,110 @@ variable "cluster_name" { description = "EKS cluster name name component used through out the EKS cluster describing its purpose (ex: dice-dev)" type = string + validation { + condition = can(regex("^[a-zA-Z][a-zA-Z0-9-]*$", var.cluster_name)) && length(var.cluster_name) <= 100 + error_message = "Cluster name must start with a letter, can only contain letters, numbers, and hyphens, and must be no longer than 100 characters." + } } variable "cluster_version" { - description = "The Kubernetes version number to use for this EKS cluster. See https://docs.aws.amazon.com/eks/latest/userguide/kubernetes-versions.html" + description = "Kubernetes version to use for the EKS cluster" type = string - default = "1.27" + validation { + condition = can(regex("^[0-9]+\\.[0-9]+$", var.cluster_version)) && contains(["1.27", "1.28", "1.29", "1.30", "1.31", "1.32"], var.cluster_version) + error_message = "Cluster version must be in the format 'x.y' (e.g., '1.27') and must be one of: 1.27, 1.28, 1.29, 1.30, 1.31, 1.32" + } } variable "cluster_endpoint_public_access" { - description = "This allows to access the cluster from IEB cloud host" + description = "Whether the EKS cluster API server endpoint is publicly accessible" type = bool default = false } variable "enable_cluster_creator_admin_permissions" { - description = "Indicates whether or not to add the cluster creator (the identity used by Terraform) as an administrator via access entry" + description = "Grant admin permissions to the cluster creator" type = bool default = false } variable "vpc_name" { - description = "Define the VPC name that will be used by this cluster" + description = "Name of the VPC where EKS cluster will be created" type = string + validation { + condition = can(regex("^[a-zA-Z0-9-]+$", var.vpc_name)) && length(var.vpc_name) <= 255 + error_message = "VPC name can only contain alphanumeric characters and hyphens, and must be <= 255 characters." + } } variable "subnets_name" { - description = "Define the name of the subnets to be used by this cluster" + description = "Name pattern for subnets to be used by EKS cluster" type = string default = "*-container-*" + validation { + condition = can(regex("^[a-zA-Z0-9*._-]+$", var.subnets_name)) + error_message = "Subnet name pattern can only contain alphanumeric characters, hyphens, dots, underscores, and asterisks." + } } variable "eks_instance_disk_size" { - description = "The size of the disk of the worker nodes in gigabytes. 40 is the approximate minimum. Needs to hold the all of the normal operating system files plus every image that will be used in the cluster." + description = "Size of the EKS node disk in GB" type = number default = 80 + validation { + condition = var.eks_instance_disk_size >= 20 && var.eks_instance_disk_size <= 16384 + error_message = "Instance disk size must be between 20 GB and 16384 GB." + } } variable "eks_instance_types" { - # NOTE: Given the current eks cluster defaults, t3a.large is the smallest node - # that can be used to successfully build the clsuter - description = "EKS worker node instance types" + description = "List of EC2 instance types for the EKS node group" type = list(string) default = [ "t3a.large" ] + validation { + condition = length(var.eks_instance_types) > 0 + error_message = "At least one instance type must be specified." + } + validation { + condition = alltrue([for t in var.eks_instance_types : can(regex("^[a-z][1-9][.][a-z0-9]+$", t))]) + error_message = "Instance types must be valid EC2 instance type formats (e.g., t3.large, m5.xlarge)." + } } variable "eks_ng_min_size" { - description = "Node Group minimum size" + description = "Minimum size of the EKS node group" type = number default = 4 + validation { + condition = var.eks_ng_min_size >= 1 + error_message = "Minimum node group size must be at least 1." + } } + variable "eks_ng_desired_size" { - description = "Node Group desired size" + description = "Desired size of the EKS node group" type = number default = 4 + validation { + condition = var.eks_ng_desired_size >= var.eks_ng_min_size && var.eks_ng_desired_size <= var.eks_ng_max_size + error_message = "Desired size must be between minimum and maximum sizes." + } + validation { + condition = var.eks_ng_desired_size >= 1 + error_message = "Desired size must be at least 1." + } } + variable "eks_ng_max_size" { - description = "Node Group maximum size" + description = "Maximum size of the EKS node group" type = number default = 15 + validation { + condition = var.eks_ng_max_size >= var.eks_ng_min_size + error_message = "Maximum node group size must be greater than or equal to minimum size." + } } # tflint-ignore: terraform_unused_declarations @@ -69,6 +112,12 @@ variable "access_entries" { description = "Map of access entries to add to the cluster" type = any default = {} + validation { + condition = alltrue([ + for k, v in var.access_entries : can(v.principal_arn) && can(v.policy_associations) + ]) + error_message = "Each access entry must contain 'principal_arn' and 'policy_associations'." + } } ################################################################### @@ -79,10 +128,24 @@ variable "census_private_cidr" { description = "Census Private CIR Blocks" type = list(string) default = ["148.129.0.0/16", "172.16.0.0/12", "192.168.0.0/16"] + validation { + condition = alltrue([ + for cidr in var.census_private_cidr : can(cidrhost(cidr, 0)) + ]) + error_message = "All CIDR blocks must be in valid CIDR notation (e.g., '10.0.0.0/16')." + } } variable "tags" { - description = "AWS Tags to apply to appropriate resources" + description = "Additional tags to apply to all resources" type = map(string) default = {} + validation { + condition = length(var.tags) <= 45 + error_message = "Maximum number of tags allowed is 45." + } + validation { + condition = alltrue([for k, v in var.tags : length(k) <= 128 && length(v) <= 256 && can(regex("^[\\w\\s+=.@-]*$", k)) && can(regex("^[\\w\\s+=.@-]*$", v))]) + error_message = "Tag keys must be <= 128 chars, values <= 256 chars, and both can only contain alphanumeric characters, spaces, and '.+-=@_'." + } }