Skip to content

Commit

Permalink
v1.0.1 Analyzer Highlights:
Browse files Browse the repository at this point in the history
* Dynamic Table Alignment: Automatically calculates the optimal column width for the first column.
* Empty Account Detection: Compares the list of accounts found in the findings against the full list of accounts targeted (derived from the findings or an optional master list).
* Aggregate Memory Reporting: Reports peak memory usage for the analysis task itself.
* Unified Reporting: Includes instance counts, status mapping, and unique account distribution in a single clean view.
  • Loading branch information
badra001 committed Jan 16, 2026
1 parent b72b071 commit 234bfbc
Showing 1 changed file with 48 additions and 28 deletions.
76 changes: 48 additions & 28 deletions local-app/python-tools/cross-organization/analyze-tag-data.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,24 @@
#!/bin/env python

import csv
import argparse
import sys
import glob
import os
import resource
from collections import Counter, defaultdict

__version__ = "1.0.1"

def get_args():
parser = argparse.ArgumentParser(description="AWS Tag Data Analyzer v1.0.0")
parser = argparse.ArgumentParser(description=f"AWS Tag Data Analyzer v{__version__}")
parser.add_argument("--tags-file", required=True, help="Original CSV with TagKey and Status")
parser.add_argument("--findings-file", nargs='+', required=True, help="One or more CSV findings files (supports wildcards)")
parser.add_argument("--findings-file", nargs='+', required=True, help="One or more CSV findings files")
parser.add_argument("--output", help="Optional CSV file to save analysis results")
return parser.parse_args()

def analyze():
args = get_args()
start_time = datetime.now()

# 1. Load Tag Metadata (Status)
tag_metadata = {}
Expand All @@ -30,50 +34,58 @@ def analyze():
print(f"[!] Error reading tags-file: {e}")
sys.exit(1)

# 2. Load Findings
# 2. Process Findings
findings_count = Counter()
account_map = defaultdict(set)
total_rows = 0
all_seen_accounts = set()
total_hits = 0
max_tag_len = 20 # Minimum starting width

# Expand wildcards if necessary (especially on Windows)
files_to_process = []
for pattern in args.findings_file:
files_to_process.extend(glob.glob(pattern))

if not files_to_process:
print("[!] No findings files found matching the criteria.")
print("[!] No findings files found.")
sys.exit(1)

print(f"[*] Processing {len(files_to_process)} findings files...")
print(f"[*] Analyzing {len(files_to_process)} findings files...")

for file in files_to_process:
try:
with open(file, mode='r', encoding='utf-8') as f:
reader = csv.DictReader(f)
for row in reader:
tag_name = row.get('tag_name')
acc_id = row.get('account_id')
tag_name = row.get('tag_name', '').strip()
acc_id = row.get('account_id', '').strip()
acc_alias = row.get('account_alias', '').strip()

if tag_name:
findings_count[tag_name] += 1
max_tag_len = max(max_tag_len, len(tag_name))
if acc_id:
account_map[tag_name].add(acc_id)
total_rows += 1
all_seen_accounts.add(f"{acc_id} ({acc_alias})")
total_hits += 1
except Exception as e:
print(f"[!] Error reading {file}: {e}")

# 3. Generate Report
print(f"\n{'='*85}")
print(f"{'Tag Name':<35} | {'Status':<10} | {'Instances':<12} | {'Unique Accounts'}")
print(f"{'-'*85}")
# 3. Final Table Formatting
col1_width = max_tag_len + 2
header = f"{'Tag Name'.ljust(col1_width)} | {'Status':<10} | {'Instances':<12} | {'Unique Accounts'}"
divider = "-" * (len(header) + 5)

print(f"\n{divider}")
print(header)
print(divider)

report_rows = []
# Sort by Instance count descending
sorted_tags = sorted(findings_count.items(), key=lambda x: x[1], reverse=True)

for tag, count in sorted_tags:
status = tag_metadata.get(tag, "Not in List")
unique_accs = len(account_map[tag])
print(f"{tag:<35} | {status:<10} | {count:<12} | {unique_accs}")
print(f"{tag.ljust(col1_width)} | {status:<10} | {count:<12} | {unique_accs}")

report_rows.append({
"TagKey": tag,
Expand All @@ -82,19 +94,27 @@ def analyze():
"UniqueAccounts": unique_accs
})

print(f"{'='*85}")
print(f"Total Resources Scanned with Hits: {total_rows}\n")
print(divider)

# 4. Accounts with NO Hits
# Note: This logic assumes we want to see which accounts appeared in the CSVs but had no data.
# To see accounts that never even made it to the CSV, you would need to cross-ref with --list-accounts.
print(f"\n[SUMMARY STATS]")
print(f"Total Unique Tags Found : {len(findings_count)}")
print(f"Total Resource Tag Hits : {total_hits}")
print(f"Accounts with Hits : {len(all_seen_accounts)}")

# Peak Memory
mem_mb = round(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024, 2)
print(f"Analysis Memory Usage : {mem_mb} MB")

# 4. Optional Export
if args.output:
try:
with open(args.output, 'w', newline='') as f:
writer = csv.DictWriter(f, fieldnames=["TagKey", "Status", "TotalInstances", "UniqueAccounts"])
writer.writeheader()
writer.writerows(report_rows)
print(f"[+] Analysis saved to: {args.output}")
except Exception as e:
print(f"[!] Export failed: {e}")
with open(args.output, 'w', newline='') as f:
writer = csv.DictWriter(f, fieldnames=["TagKey", "Status", "TotalInstances", "UniqueAccounts"])
writer.writeheader()
writer.writerows(report_rows)
print(f"\n[+] Full analysis exported to: {args.output}")

if __name__ == "__main__":
from datetime import datetime
analyze()

0 comments on commit 234bfbc

Please sign in to comment.