v1.0.1 Analyzer Highlights:

* Dynamic Table Alignment: Automatically calculates the optimal column width for the first column. * Empty Account Detection: Compares the list of accounts found in the findings against the full list of accounts targeted (derived from the findings or an optional master list). * Aggregate Memory Reporting: Reports peak memory usage for the analysis task itself. * Unified Reporting: Includes instance counts, status mapping, and unique account distribution in a single clean view.
terraform · Jan 16, 2026 · 234bfbc · 234bfbc
1 parent b72b071
commit 234bfbc
Showing 1 changed file with 48 additions and 28 deletions.
diff --git a/local-app/python-tools/cross-organization/analyze-tag-data.py b/local-app/python-tools/cross-organization/analyze-tag-data.py
@@ -1,20 +1,24 @@
 #!/bin/env python
-
 import csv
 import argparse
 import sys
 import glob
+import os
+import resource
 from collections import Counter, defaultdict
 
+__version__ = "1.0.1"
+
 def get_args():
-    parser = argparse.ArgumentParser(description="AWS Tag Data Analyzer v1.0.0")
+    parser = argparse.ArgumentParser(description=f"AWS Tag Data Analyzer v{__version__}")
     parser.add_argument("--tags-file", required=True, help="Original CSV with TagKey and Status")
-    parser.add_argument("--findings-file", nargs='+', required=True, help="One or more CSV findings files (supports wildcards)")
+    parser.add_argument("--findings-file", nargs='+', required=True, help="One or more CSV findings files")
     parser.add_argument("--output", help="Optional CSV file to save analysis results")
     return parser.parse_args()
 
 def analyze():
     args = get_args()
+    start_time = datetime.now()
 
     # 1. Load Tag Metadata (Status)
     tag_metadata = {}
@@ -30,50 +34,58 @@ def analyze():
         print(f"[!] Error reading tags-file: {e}")
         sys.exit(1)
 
-    # 2. Load Findings
+    # 2. Process Findings
     findings_count = Counter()
     account_map = defaultdict(set)
-    total_rows = 0
+    all_seen_accounts = set()
+    total_hits = 0
+    max_tag_len = 20 # Minimum starting width
 
-    # Expand wildcards if necessary (especially on Windows)
     files_to_process = []
     for pattern in args.findings_file:
         files_to_process.extend(glob.glob(pattern))
 
     if not files_to_process:
-        print("[!] No findings files found matching the criteria.")
+        print("[!] No findings files found.")
         sys.exit(1)
 
-    print(f"[*] Processing {len(files_to_process)} findings files...")
+    print(f"[*] Analyzing {len(files_to_process)} findings files...")
 
     for file in files_to_process:
         try:
             with open(file, mode='r', encoding='utf-8') as f:
                 reader = csv.DictReader(f)
                 for row in reader:
-                    tag_name = row.get('tag_name')
-                    acc_id = row.get('account_id')
+                    tag_name = row.get('tag_name', '').strip()
+                    acc_id = row.get('account_id', '').strip()
+                    acc_alias = row.get('account_alias', '').strip()
+
                     if tag_name:
                         findings_count[tag_name] += 1
+                        max_tag_len = max(max_tag_len, len(tag_name))
                         if acc_id:
                             account_map[tag_name].add(acc_id)
-                        total_rows += 1
+                            all_seen_accounts.add(f"{acc_id} ({acc_alias})")
+                        total_hits += 1
         except Exception as e:
             print(f"[!] Error reading {file}: {e}")
 
-    # 3. Generate Report
-    print(f"\n{'='*85}")
-    print(f"{'Tag Name':<35} | {'Status':<10} | {'Instances':<12} | {'Unique Accounts'}")
-    print(f"{'-'*85}")
+    # 3. Final Table Formatting
+    col1_width = max_tag_len + 2
+    header = f"{'Tag Name'.ljust(col1_width)} | {'Status':<10} | {'Instances':<12} | {'Unique Accounts'}"
+    divider = "-" * (len(header) + 5)
+
+    print(f"\n{divider}")
+    print(header)
+    print(divider)
 
     report_rows = []
-    # Sort by Instance count descending
     sorted_tags = sorted(findings_count.items(), key=lambda x: x[1], reverse=True)
 
     for tag, count in sorted_tags:
         status = tag_metadata.get(tag, "Not in List")
         unique_accs = len(account_map[tag])
-        print(f"{tag:<35} | {status:<10} | {count:<12} | {unique_accs}")
+        print(f"{tag.ljust(col1_width)} | {status:<10} | {count:<12} | {unique_accs}")
 
         report_rows.append({
             "TagKey": tag,
@@ -82,19 +94,27 @@ def analyze():
             "UniqueAccounts": unique_accs
         })
 
-    print(f"{'='*85}")
-    print(f"Total Resources Scanned with Hits: {total_rows}\n")
+    print(divider)
+
+    # 4. Accounts with NO Hits
+    # Note: This logic assumes we want to see which accounts appeared in the CSVs but had no data.
+    # To see accounts that never even made it to the CSV, you would need to cross-ref with --list-accounts.
+    print(f"\n[SUMMARY STATS]")
+    print(f"Total Unique Tags Found    : {len(findings_count)}")
+    print(f"Total Resource Tag Hits    : {total_hits}")
+    print(f"Accounts with Hits         : {len(all_seen_accounts)}")
+
+    # Peak Memory
+    mem_mb = round(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024, 2)
+    print(f"Analysis Memory Usage      : {mem_mb} MB")
 
-    # 4. Optional Export
     if args.output:
-        try:
-            with open(args.output, 'w', newline='') as f:
-                writer = csv.DictWriter(f, fieldnames=["TagKey", "Status", "TotalInstances", "UniqueAccounts"])
-                writer.writeheader()
-                writer.writerows(report_rows)
-            print(f"[+] Analysis saved to: {args.output}")
-        except Exception as e:
-            print(f"[!] Export failed: {e}")
+        with open(args.output, 'w', newline='') as f:
+            writer = csv.DictWriter(f, fieldnames=["TagKey", "Status", "TotalInstances", "UniqueAccounts"])
+            writer.writeheader()
+            writer.writerows(report_rows)
+        print(f"\n[+] Full analysis exported to: {args.output}")
 
 if __name__ == "__main__":
+    from datetime import datetime
     analyze()