From 27c21f8411ebac20bb1e5dcecbc6b74157248a21 Mon Sep 17 00:00:00 2001 From: badra001 Date: Fri, 9 Jan 2026 14:56:51 -0500 Subject: [PATCH] add size checks --- .../cross-organization/assess_check_ecr.py | 86 ++++++++++--------- 1 file changed, 44 insertions(+), 42 deletions(-) diff --git a/local-app/python-tools/cross-organization/assess_check_ecr.py b/local-app/python-tools/cross-organization/assess_check_ecr.py index d0877a4c..e16670f4 100755 --- a/local-app/python-tools/cross-organization/assess_check_ecr.py +++ b/local-app/python-tools/cross-organization/assess_check_ecr.py @@ -2,31 +2,25 @@ import json, argparse, sys, os, glob from datetime import datetime, timezone -from collections import Counter +from collections import Counter, defaultdict # --- VERSIONING --- -__version__ = "1.1.0" +__version__ = "1.1.1" def find_latest_file(pattern): files = glob.glob(pattern) return max(files, key=os.path.getctime) if files else None def get_days_ago(iso_str): - """Calculates days between now and an ISO date string.""" - if not iso_str or iso_str == "N/A": - return None + if not iso_str or iso_str == "N/A": return None try: dt = datetime.fromisoformat(iso_str) - # Handle timezone awareness - if dt.tzinfo is None: - dt = dt.replace(tzinfo=timezone.utc) + if dt.tzinfo is None: dt = dt.replace(tzinfo=timezone.utc) now = datetime.now(timezone.utc) return (now - dt).days - except: - return None + except: return None def bucket_age(days, counters): - """Increments the appropriate age bucket counter.""" if days is None: return if days >= 365: counters['365+'] += 1 elif days >= 180: counters['180-364'] += 1 @@ -36,30 +30,29 @@ def bucket_age(days, counters): else: counters['<30'] += 1 def main(): - parser = argparse.ArgumentParser(description="AWS ECR Audit Assessor - Data Aging Edition") + parser = argparse.ArgumentParser(description="AWS ECR Audit Assessor - Image Statistics Edition") parser.add_argument("--input", help="JSON audit file") args = parser.parse_args() input_file = args.input or find_latest_file("audit_results.check_ecr.*.json") - if not input_file: - print("Error: No file found."); sys.exit(1) + if not input_file: print("Error: No file found."); sys.exit(1) - with open(input_file, 'r') as f: - data = json.load(f) + with open(input_file, 'r') as f: data = json.load(f) - report_width = 220 + report_width = 230 print("-" * report_width) - print(f"ECR COMPREHENSIVE ASSESSMENT | Total Accounts: {len(data)} | Input: {os.path.basename(input_file)}") + print(f"ECR IMAGE STATISTICS ASSESSMENT | Total Accounts: {len(data)}") print("-" * report_width) - print(f"{'Idx':<5} | {'Account ID':<15} | {'Region':<12} | {'Repo Name':<45} | {'Mutability':<12} | {'Lifecycle':<10} | {'Img Count'}") + print(f"{'Idx':<5} | {'Account ID':<15} | {'Region':<12} | {'Repo Name':<45} | {'Img Count':<10} | {'Mutability':<12} | {'Lifecycle'}") print("-" * report_width) - # Summary Statistics + # Core Stats stats = { - "repos": 0, "images": 0, "no_lifecycle": 0, - "mutable": 0, "immutable": 0, + "repos": 0, "total_images": 0, "total_bytes": 0, + "region_bytes": defaultdict(int), "push_ages": Counter(), "pull_ages": Counter(), - "total_push_days": 0, "push_day_count": 0 + "total_push_days": 0, "push_day_count": 0, + "mutable": 0, "immutable": 0, "no_lifecycle": 0 } for idx, account in enumerate(data, 1): @@ -70,45 +63,54 @@ def main(): region = key.split(":")[0] stats["repos"] += 1 - # Mutability Tracking + # Mutability & Lifecycle mut = val.get("mutability", "MUTABLE") if mut == "IMMUTABLE": stats["immutable"] += 1 else: stats["mutable"] += 1 + if val.get("has_lifecycle") == "False": stats["no_lifecycle"] += 1 - # Lifecycle Tracking - has_lc = val.get("has_lifecycle") == "True" - if not has_lc: stats["no_lifecycle"] += 1 - - # Image Age Tracking + # Process Images images = val.get("images", []) - stats["images"] += len(images) + stats["total_images"] += len(images) for img in images: - # Push Age + # Size calculation + img_size = img.get("size_bytes", 0) + stats["total_bytes"] += img_size + stats["region_bytes"][region] += img_size + + # Age calculation p_days = get_days_ago(img.get("pushed_at")) if p_days is not None: bucket_age(p_days, stats["push_ages"]) stats["total_push_days"] += p_days stats["push_day_count"] += 1 - # Pull Age l_days = get_days_ago(img.get("last_pulled_at")) bucket_age(l_days, stats["pull_ages"]) - print(f"{idx:<5} | {account['account_id']:<15} | {region:<12} | {val['repo_name']:<45} | {mut:<12} | {'YES' if has_lc else 'NO':<10} | {len(images)}") + print(f"{idx:<5} | {account['account_id']:<15} | {region:<12} | {val['repo_name']:<45} | {len(images):<10} | {mut:<12} | {'YES' if val.get('has_lifecycle')=='True' else 'NO'}") - # Footer Logic + # Aggregated Results + total_gb = stats["total_bytes"] / (1024**3) + avg_img_mb = (stats["total_bytes"] / stats["total_images"]) / (1024**2) if stats["total_images"] > 0 else 0 avg_push_age = stats["total_push_days"] / stats["push_day_count"] if stats["push_day_count"] > 0 else 0 - + print("-" * report_width) - print(f"ORGANIZATION ECR FOOTPRINT SUMMARY") - print(f" --- Repository Config ---") - print(f" Total Repositories: {stats['repos']} | Immutable: {stats['immutable']} | Mutable: {stats['mutable']}") - print(f" Repos Missing Lifecycle Policies: {stats['no_lifecycle']} (Action Required)") + print(f"ORGANIZATION IMAGE FOOTPRINT SUMMARY") + print(f" --- Image Storage Statistics ---") + print(f" Total Images Found: {stats['total_images']:,}") + print(f" Total Image Storage: {total_gb:.2f} GB") + print(f" Average Image Size: {avg_img_mb:.2f} MB") + + print(f"\n --- Aging & Lifecycle ---") + print(f" Average Image Age: {avg_push_age:.1f} days") + print(f" Images Older > 1yr: {stats['push_ages']['365+']} pushed | {stats['pull_ages']['365+']} pulled") + print(f" Repos w/o Lifecycle: {stats['no_lifecycle']} (Critical Gap)") - print(f"\n --- Image Aging & Usage ---") - print(f" Total Images: {stats['images']} | Average Age (Since Push): {avg_push_age:.1f} days") - print(f" Images Older than 1 Year: {stats['push_ages']['365+']}") + print(f"\n --- Regional Image Storage Breakdown ---") + for reg, r_bytes in sorted(stats["region_bytes"].items(), key=lambda x: x[1], reverse=True): + print(f" - {reg:<15}: {r_bytes/(1024**3):>8.2f} GB") print(f"\n Age Distribution (Days Since Action):") print(f" Bucket | Pushed Count | Last Pull Count")