diff --git a/.github/workflows/terraform-release.yaml b/.github/workflows/terraform-release.yaml new file mode 100644 index 0000000..90910bc --- /dev/null +++ b/.github/workflows/terraform-release.yaml @@ -0,0 +1,73 @@ +name: Terraform CI/CD +on: + workflow_dispatch: + pull_request: + types: [closed] + branches: + - main +jobs: + terraform-ci-cd: + runs-on: 229685449397 + permissions: + contents: write + + steps: + - name: Checkout code + uses: CSVD/gh-actions-checkout@v4 + + - name: Setup Terraform + uses: CSVD/gh-actions-setup-terraform@v3 + with: + terraform_version: "1.9.1" + + - name: Setup GITHUB Credentials + id: github_credentials + uses: CSVD/gh-auth@main + with: + github_app_pem_file: ${{ secrets.GH_APP_PEM_FILE }} + github_app_installation_id: ${{ vars.GH_APP_INSTALLATION_ID }} + github_app_id: ${{ vars.GH_APP_ID }} + + + - name: Debug Authentication + run: | + # Print the GitHub server URL + echo "GitHub Server URL: ${{ github.server_url }}" + + # Extract the host from the URL + HOST="${{ github.server_url }}" + HOST="${HOST#*//}" + HOST="${HOST%%/*}" + echo "GitHub Host: $HOST" + + # Check if token exists + if [[ -n "${{ steps.github_credentials.outputs.github_token }}" ]]; then + echo "Token generated successfully" + # Test the token with a simple GitHub API call (without exposing the token) + STATUS=$(curl -s -o /dev/null -w "%{http_code}" -H "Authorization: Bearer ${{ steps.github_credentials.outputs.github_token }}" "${{ github.server_url }}/api/v3/user") + echo "API Test Status Code: $STATUS" + else + echo "No token was generated!" + fi + + - name: Setup GitHub CLI + run: | + # Force manual authentication since setup-git might not work with GitHub Enterprise + echo "${{ steps.github_credentials.outputs.github_token }}" > /tmp/token.txt + gh auth login --with-token --hostname "github.e.it.census.gov" < /tmp/token.txt + rm /tmp/token.txt + + # Test GitHub CLI auth status + gh auth status || echo "GitHub CLI authentication failed" + + - name: AWS Auth + id: aws_auth + uses: CSVD/aws-auth@main + with: + ecs: true + + - name: Run Terraform Module Release Action + uses: CSVD/terraform-module-release@main + with: + github-token: ${{ steps.github_credentials.outputs.github_token }} + working-directory: '.' diff --git a/.github/workflows/terraform-validate.yaml b/.github/workflows/terraform-validate.yaml new file mode 100644 index 0000000..72829d8 --- /dev/null +++ b/.github/workflows/terraform-validate.yaml @@ -0,0 +1,42 @@ +name: Terraform Validate +on: + pull_request: + workflow_dispatch: + +jobs: + + terraform-validate: + runs-on: "229685449397" + permissions: + contents: write + steps: + - name: Checkout code + uses: CSVD/gh-actions-checkout@v4 + + - name: Setup Terraform + uses: CSVD/gh-actions-setup-terraform@v2 + with: + terraform_version: '1.7.3' + + - name: Validate Terraform Configuration + id: validate + uses: CSVD/terraform-validate@main + + - name: Check Validation/Test Results + if: always() + run: | + # Set default values if outputs are empty + IS_VALID="${{ steps.validate.outputs.is_valid }}" + TESTS_PASSED="${{ steps.validate.outputs.tests_passed }}" + + # If outputs are empty, set them to false + [ -z "$IS_VALID" ] && IS_VALID="false" + [ -z "$TESTS_PASSED" ] && TESTS_PASSED="false" + + if [[ "$IS_VALID" != "true" || "$TESTS_PASSED" != "true" ]]; then + echo "Validation or test errors found:" + echo "${{ steps.validate.outputs.stderr }}" + exit 1 + else + echo "All validations and tests passed successfully!" + fi diff --git a/.github/workflows/terragrunt-cicd.yml b/.github/workflows/terragrunt-cicd.yml deleted file mode 100644 index a78523e..0000000 --- a/.github/workflows/terragrunt-cicd.yml +++ /dev/null @@ -1,101 +0,0 @@ -name: 'Terraform Module CI' - -on: - push: - branches: - - main - paths: - - '**/*.hcl' - - '**/*.tf' - pull_request: - branches: - - main - paths: - - '**/*.hcl' - - '**/*.tf' - -permissions: - contents: read - pull-requests: write - -jobs: - validate: - name: 'Validate Module' - runs-on: self-hosted - - steps: - - name: Checkout - uses: actions/checkout@v3 - - - name: Setup Terraform - uses: hashicorp/setup-terraform@v2 - with: - terraform_version: 1.5.0 - - - name: Terraform Init - run: | - terraform init -backend=false - - - name: Terraform Format - run: | - terraform fmt -check - - - name: Terraform Validate - run: | - terraform validate - - - name: Run tflint - uses: terraform-linters/setup-tflint@v3 - if: github.event_name == 'pull_request' - - - name: Lint Terraform - if: github.event_name == 'pull_request' - run: | - tflint --format compact - - release: - name: 'Create Release' - needs: validate - if: github.ref == 'refs/heads/main' && github.event_name == 'push' - runs-on: self-hosted - permissions: - contents: write - - steps: - - name: Checkout - uses: actions/checkout@v3 - with: - fetch-depth: 0 - token: ${{ secrets.GITHUB_TOKEN }} - - - name: Setup Python - uses: actions/setup-python@v4 - with: - python-version: '3.9' - - - name: Install Commitizen - run: | - pip install commitizen - - - name: Configure Git - run: | - git config --local user.email "action@github.com" - git config --local user.name "GitHub Action" - - - name: Bump Version and Generate Changelog - id: cz - run: | - cz bump --yes - echo "new_version=$(cz version --project)" >> $GITHUB_OUTPUT - echo "changelog=$(cz changelog --dry-run)" >> $GITHUB_OUTPUT - - - name: Create Release - uses: actions/create-release@v1 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - with: - tag_name: v${{ steps.cz.outputs.new_version }} - release_name: Release v${{ steps.cz.outputs.new_version }} - draft: false - prerelease: false - body: ${{ steps.cz.outputs.changelog }} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2675093..0e4a8bc 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -49,7 +49,7 @@ repos: # Terraform Hooks - repo: https://github.com/antonbabenko/pre-commit-terraform - rev: v1.97.3 # Get the latest from: https://github.com/antonbabenko/pre-commit-terraform/releases + rev: v1.98.0 # Get the latest from: https://github.com/antonbabenko/pre-commit-terraform/releases hooks: - id: terraform_fmt args: @@ -106,6 +106,6 @@ repos: # - --hook-config=--parallelism-ci-cpu-cores=2 - repo: https://github.com/ljnsn/cz-conventional-gitmoji - rev: v0.6.1 + rev: v0.7.0 hooks: - id: conventional-gitmoji diff --git a/README.md b/README.md index efb515c..977b61c 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,56 @@ # tfmod-loki -Installs the loki as the log aggregation sink, and promtail to forward the logs -to loki. - -* Requires additional Node HD space - 40GB is not enough. - -# tfmod-loki - - - - +This module installs Grafana Loki as a log aggregation and storage solution in an EKS cluster, with the following components: + +* Deploys Loki using the official Grafana Helm chart +* Creates an S3 bucket for persistent log storage +* Configures IAM roles for service accounts (IRSA) to securely access S3 +* Sets up internal gateway for log queries and ingestion + +## Architecture + +The module sets up: +- A Loki deployment via Helm with configurable image versions +- An S3 bucket with KMS encryption for log persistence +- An IRSA role for Loki to access the S3 bucket securely +- Internal gateway service (`loki-gateway.{namespace}.svc.cluster.local`) for accessing Loki within the cluster + +## Prerequisites + +* An existing EKS cluster with OIDC provider configured +* Sufficient node storage - nodes should have more than 40GB disk space +* AWS S3 access for log storage +* Appropriate Kubernetes storage classes configured + +## Usage + +```hcl +module "loki" { + source = "git@github.e.it.census.gov:path/to/tfmod-loki.git" + + cluster_name = "my-eks-cluster" + oidc_provider_arn = module.eks.oidc_provider_arn + region = "us-east-1" + namespace = "monitoring" + + # Optional - override default image versions + loki_tag = "3.1.1" + gateway_tag = "1.25.2-alpine" + + tags = { + Environment = "production" + Team = "platform" + } +} + +# Access Loki internal endpoint +resource "kubernetes_manifest" "example_grafana_datasource" { + manifest = { + # Configure Grafana datasource to point to: + # ${module.loki.gateway_internal_endpoint} + } +} +``` ## Requirements @@ -27,20 +68,22 @@ to loki. |------|---------| | [aws](#provider\_aws) | 5.89.0 | | [helm](#provider\_helm) | 2.17.0 | +| [terraform](#provider\_terraform) | n/a | ## Modules | Name | Source | Version | |------|--------|---------| -| [images](#module\_images) | git@github.e.it.census.gov:terraform-modules/aws-ecr-copy-images.git/ | tf-upgrade | -| [loki\_irsa\_role](#module\_loki\_irsa\_role) | git@github.e.it.census.gov:SCT-Engineering/tfmod-custom-iam-role-for-service-account-eks.git// | main | -| [loki\_s3](#module\_loki\_s3) | git@github.e.it.census.gov:terraform-modules/aws-s3.git//standard | tf-upgrade | +| [images](#module\_images) | git::https://github.e.it.census.gov/terraform-modules/aws-ecr-copy-images.git/ | tf-upgrade | +| [loki\_irsa\_role](#module\_loki\_irsa\_role) | git::https://github.e.it.census.gov/SCT-Engineering/tfmod-custom-iam-role-for-service-account-eks.git// | main | +| [loki\_s3](#module\_loki\_s3) | git::https://github.e.it.census.gov/terraform-modules/aws-s3.git//standard | tf-upgrade | ## Resources | Name | Type | |------|------| | [helm_release.loki](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource | +| [terraform_data.bucket_name_validator](https://registry.terraform.io/providers/hashicorp/terraform/latest/docs/resources/data) | resource | | [aws_caller_identity.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/caller_identity) | data source | | [aws_s3_bucket.s3_server_access_logs](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/s3_bucket) | data source | diff --git a/copy_images.tf b/copy_images.tf index b1547be..e8cb9ed 100644 --- a/copy_images.tf +++ b/copy_images.tf @@ -65,7 +65,7 @@ locals { } module "images" { - source = "git@github.e.it.census.gov:terraform-modules/aws-ecr-copy-images.git/?ref=tf-upgrade" + source = "git::https://github.e.it.census.gov/terraform-modules/aws-ecr-copy-images.git/?ref=tf-upgrade" profile = var.profile application_name = var.cluster_name diff --git a/main.tf b/main.tf index 5be6fda..a2a269c 100644 --- a/main.tf +++ b/main.tf @@ -6,9 +6,9 @@ locals { module "loki_irsa_role" { # tflint-ignore: terraform_module_pinned_source - source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-custom-iam-role-for-service-account-eks.git//?ref=main" + source = "git::https://github.e.it.census.gov/SCT-Engineering/tfmod-custom-iam-role-for-service-account-eks.git//?ref=main" - role_name = "r-${var.cluster_name}-loki" + role_name = format("%v%v-%v", local.prefixes["eks-role"], var.cluster_name, "loki") attach_s3_bucket_owner_policy = true attach_encrypted_object_manager_policy = true diff --git a/prefixes.tf b/prefixes.tf new file mode 100644 index 0000000..e9a6127 --- /dev/null +++ b/prefixes.tf @@ -0,0 +1,35 @@ +locals { + prefixes = { + "efs" = "v-efs-" + "s3" = "v-s3-" + "ebs" = "v-ebs-" + "kms" = "k-kms-" + "role" = "r-" + "policy" = "p-" + "group" = "g-" + "security-group" = "" # "sg-" + # VPC + "vpc" = "" + "dhcp-options" = "" + "vpc-peer" = "vpcp-" + "route-table" = "route-" + "subnet" = "" + "vpc-endpoint" = "vpce-" + "elastic-ip" = "eip-" + "nat-gateway" = "nat-" + "internet-gateway" = "igw-" + "network-acl" = "nacl-" + "customer-gateway" = "cgw-" + "vpn-gateway" = "vpcg-" + "vpn-connection" = "vpn_" + "log-group" = "lg-" + "log-stream" = "lgs-" + # EKS + "eks" = "eks-" + "eks-s3" = "v-s3-eks-" + "eks-user" = "s-eks-" + "eks-role" = "r-eks-" + "eks-policy" = "p-eks-" + "eks-security-group" = "eks-sg-" # "sg-eks-" + } +} diff --git a/s3.tf b/s3.tf index dd5a704..bacaa77 100644 --- a/s3.tf +++ b/s3.tf @@ -1,19 +1,34 @@ data "aws_caller_identity" "current" {} -## create bucket locals { - account_id = data.aws_caller_identity.current.account_id + account_id = data.aws_caller_identity.current.account_id + bucket_name = format("%v%v-loki-%v-%v", local.prefixes["eks-s3"], var.cluster_name, local.account_id, local.region_short) + region_short = join("", [for c in split("-", var.region) : substr(c, 0, 1)]) } data "aws_s3_bucket" "s3_server_access_logs" { bucket = format("inf-logs-%v-%v", local.account_id, var.region) } +# Validate S3 bucket name length +resource "terraform_data" "bucket_name_validator" { + input = local.bucket_name + + lifecycle { + precondition { + condition = length(local.bucket_name) >= 3 && length(local.bucket_name) <= 63 + error_message = "S3 bucket name must be between 3 and 63 characters. Current name '${local.bucket_name}' is ${length(local.bucket_name)} characters." + } + } +} + module "loki_s3" { - source = "git@github.e.it.census.gov:terraform-modules/aws-s3.git//standard?ref=tf-upgrade" + depends_on = [terraform_data.bucket_name_validator] + source = "git::https://github.e.it.census.gov/terraform-modules/aws-s3.git//standard?ref=tf-upgrade" - bucket_name = format("%v-loki", var.cluster_name) + bucket_name = local.bucket_name access_log_bucket = data.aws_s3_bucket.s3_server_access_logs.id + force_destroy = true tags = var.tags } diff --git a/values/loki.yml.tpl b/values/loki.yml.tpl index 2a89401..c1085a8 100644 --- a/values/loki.yml.tpl +++ b/values/loki.yml.tpl @@ -65,7 +65,7 @@ backend: replicas: 3 resources: requests: - cpu: 10m + cpu: 100m memory: 128Mi limits: cpu: 1000m @@ -160,7 +160,7 @@ write: targetCPUUtilizationPercentage: 80 resources: requests: - cpu: 10m + cpu: 100m memory: 128Mi limits: cpu: 1000m