Skip to content

Mcm cluster #12

Merged
merged 28 commits into from
Apr 4, 2025
Merged
Show file tree
Hide file tree
Changes from 24 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 73 additions & 0 deletions .github/workflows/terraform-release.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
name: Terraform CI/CD
on:
workflow_dispatch:
pull_request:
types: [closed]
branches:
- main
jobs:
terraform-ci-cd:
runs-on: 229685449397
permissions:
contents: write

steps:
- name: Checkout code
uses: CSVD/gh-actions-checkout@v4

- name: Setup Terraform
uses: CSVD/gh-actions-setup-terraform@v3
with:
terraform_version: "1.9.1"

- name: Setup GITHUB Credentials
id: github_credentials
uses: CSVD/gh-auth@main
with:
github_app_pem_file: ${{ secrets.GH_APP_PEM_FILE }}
github_app_installation_id: ${{ vars.GH_APP_INSTALLATION_ID }}
github_app_id: ${{ vars.GH_APP_ID }}


- name: Debug Authentication
run: |
# Print the GitHub server URL
echo "GitHub Server URL: ${{ github.server_url }}"
# Extract the host from the URL
HOST="${{ github.server_url }}"
HOST="${HOST#*//}"
HOST="${HOST%%/*}"
echo "GitHub Host: $HOST"
# Check if token exists
if [[ -n "${{ steps.github_credentials.outputs.github_token }}" ]]; then
echo "Token generated successfully"
# Test the token with a simple GitHub API call (without exposing the token)
STATUS=$(curl -s -o /dev/null -w "%{http_code}" -H "Authorization: Bearer ${{ steps.github_credentials.outputs.github_token }}" "${{ github.server_url }}/api/v3/user")
echo "API Test Status Code: $STATUS"
else
echo "No token was generated!"
fi
- name: Setup GitHub CLI
run: |
# Force manual authentication since setup-git might not work with GitHub Enterprise
echo "${{ steps.github_credentials.outputs.github_token }}" > /tmp/token.txt
gh auth login --with-token --hostname "github.e.it.census.gov" < /tmp/token.txt
rm /tmp/token.txt
# Test GitHub CLI auth status
gh auth status || echo "GitHub CLI authentication failed"
- name: AWS Auth
id: aws_auth
uses: CSVD/aws-auth@main
with:
ecs: true

- name: Run Terraform Module Release Action
uses: CSVD/terraform-module-release@main
with:
github-token: ${{ steps.github_credentials.outputs.github_token }}
working-directory: '.'
42 changes: 42 additions & 0 deletions .github/workflows/terraform-validate.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
name: Terraform Validate
on:
pull_request:
workflow_dispatch:

jobs:

terraform-validate:
runs-on: "229685449397"
permissions:
contents: write
steps:
- name: Checkout code
uses: CSVD/gh-actions-checkout@v4

- name: Setup Terraform
uses: CSVD/gh-actions-setup-terraform@v2
with:
terraform_version: '1.7.3'

- name: Validate Terraform Configuration
id: validate
uses: CSVD/terraform-validate@main

- name: Check Validation/Test Results
if: always()
run: |
# Set default values if outputs are empty
IS_VALID="${{ steps.validate.outputs.is_valid }}"
TESTS_PASSED="${{ steps.validate.outputs.tests_passed }}"
# If outputs are empty, set them to false
[ -z "$IS_VALID" ] && IS_VALID="false"
[ -z "$TESTS_PASSED" ] && TESTS_PASSED="false"
if [[ "$IS_VALID" != "true" || "$TESTS_PASSED" != "true" ]]; then
echo "Validation or test errors found:"
echo "${{ steps.validate.outputs.stderr }}"
exit 1
else
echo "All validations and tests passed successfully!"
fi
101 changes: 0 additions & 101 deletions .github/workflows/terragrunt-cicd.yml

This file was deleted.

63 changes: 53 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,15 +1,56 @@
# tfmod-loki

Installs the loki as the log aggregation sink, and promtail to forward the logs
to loki.

* Requires additional Node HD space - 40GB is not enough.

# tfmod-loki




This module installs Grafana Loki as a log aggregation and storage solution in an EKS cluster, with the following components:

* Deploys Loki using the official Grafana Helm chart
* Creates an S3 bucket for persistent log storage
* Configures IAM roles for service accounts (IRSA) to securely access S3
* Sets up internal gateway for log queries and ingestion

## Architecture

The module sets up:
- A Loki deployment via Helm with configurable image versions
- An S3 bucket with KMS encryption for log persistence
- An IRSA role for Loki to access the S3 bucket securely
- Internal gateway service (`loki-gateway.{namespace}.svc.cluster.local`) for accessing Loki within the cluster

## Prerequisites

* An existing EKS cluster with OIDC provider configured
* Sufficient node storage - nodes should have more than 40GB disk space
* AWS S3 access for log storage
* Appropriate Kubernetes storage classes configured

## Usage

```hcl
module "loki" {
source = "git@github.e.it.census.gov:path/to/tfmod-loki.git"

cluster_name = "my-eks-cluster"
oidc_provider_arn = module.eks.oidc_provider_arn
region = "us-east-1"
namespace = "monitoring"

# Optional - override default image versions
loki_tag = "3.1.1"
gateway_tag = "1.25.2-alpine"

tags = {
Environment = "production"
Team = "platform"
}
}

# Access Loki internal endpoint
resource "kubernetes_manifest" "example_grafana_datasource" {
manifest = {
# Configure Grafana datasource to point to:
# ${module.loki.gateway_internal_endpoint}
}
}
```

<!-- BEGIN_TF_DOCS -->
## Requirements
Expand All @@ -27,6 +68,7 @@ to loki.
|------|---------|
| <a name="provider_aws"></a> [aws](#provider\_aws) | 5.89.0 |
| <a name="provider_helm"></a> [helm](#provider\_helm) | 2.17.0 |
| <a name="provider_terraform"></a> [terraform](#provider\_terraform) | n/a |

## Modules

Expand All @@ -41,6 +83,7 @@ to loki.
| Name | Type |
|------|------|
| [helm_release.loki](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource |
| [terraform_data.bucket_name_validator](https://registry.terraform.io/providers/hashicorp/terraform/latest/docs/resources/data) | resource |
| [aws_caller_identity.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/caller_identity) | data source |
| [aws_s3_bucket.s3_server_access_logs](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/s3_bucket) | data source |

Expand Down
2 changes: 1 addition & 1 deletion main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ module "loki_irsa_role" {
# tflint-ignore: terraform_module_pinned_source
source = "git@github.e.it.census.gov:SCT-Engineering/tfmod-custom-iam-role-for-service-account-eks.git//?ref=main"

role_name = "r-${var.cluster_name}-loki"
role_name = format("%v%v-%v", local.prefixes["eks-role"], var.cluster_name, "loki")

attach_s3_bucket_owner_policy = true
attach_encrypted_object_manager_policy = true
Expand Down
35 changes: 35 additions & 0 deletions prefixes.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
locals {
prefixes = {
"efs" = "v-efs-"
"s3" = "v-s3-"
"ebs" = "v-ebs-"
"kms" = "k-kms-"
"role" = "r-"
"policy" = "p-"
"group" = "g-"
"security-group" = "" # "sg-"
# VPC
"vpc" = ""
"dhcp-options" = ""
"vpc-peer" = "vpcp-"
"route-table" = "route-"
"subnet" = ""
"vpc-endpoint" = "vpce-"
"elastic-ip" = "eip-"
"nat-gateway" = "nat-"
"internet-gateway" = "igw-"
"network-acl" = "nacl-"
"customer-gateway" = "cgw-"
"vpn-gateway" = "vpcg-"
"vpn-connection" = "vpn_"
"log-group" = "lg-"
"log-stream" = "lgs-"
# EKS
"eks" = "eks-"
"eks-s3" = "v-s3-eks-"
"eks-user" = "s-eks-"
"eks-role" = "r-eks-"
"eks-policy" = "p-eks-"
"eks-security-group" = "eks-sg-" # "sg-eks-"
}
}
23 changes: 19 additions & 4 deletions s3.tf
Original file line number Diff line number Diff line change
@@ -1,19 +1,34 @@
data "aws_caller_identity" "current" {}

## create bucket
locals {
account_id = data.aws_caller_identity.current.account_id
account_id = data.aws_caller_identity.current.account_id
bucket_name = format("%v%v-loki-%v-%v", local.prefixes["eks-s3"], var.cluster_name, local.account_id, local.region_short)
region_short = join("", [for c in split("-", var.region) : substr(c, 0, 1)])
}

data "aws_s3_bucket" "s3_server_access_logs" {
bucket = format("inf-logs-%v-%v", local.account_id, var.region)
}

# Validate S3 bucket name length
resource "terraform_data" "bucket_name_validator" {
input = local.bucket_name

lifecycle {
precondition {
condition = length(local.bucket_name) >= 3 && length(local.bucket_name) <= 63
error_message = "S3 bucket name must be between 3 and 63 characters. Current name '${local.bucket_name}' is ${length(local.bucket_name)} characters."
}
}
}

module "loki_s3" {
source = "git@github.e.it.census.gov:terraform-modules/aws-s3.git//standard?ref=tf-upgrade"
depends_on = [terraform_data.bucket_name_validator]
source = "git@github.e.it.census.gov:terraform-modules/aws-s3.git//standard?ref=tf-upgrade"

bucket_name = format("%v-loki", var.cluster_name)
bucket_name = local.bucket_name
access_log_bucket = data.aws_s3_bucket.s3_server_access_logs.id
force_destroy = true

tags = var.tags
}
Loading
Loading