From 89aad59f57474273ecb0a1eaacc257b31224d950 Mon Sep 17 00:00:00 2001
From: badra001 <donald.e.badrak.ii@census.gov>
Date: Thu, 15 Jan 2026 12:29:29 -0500
Subject: [PATCH] fix tags-file header syntax

---
 .../cross-organization/tag-checker.py         | 63 ++++++++-----------
 1 file changed, 26 insertions(+), 37 deletions(-)

diff --git a/local-app/python-tools/cross-organization/tag-checker.py b/local-app/python-tools/cross-organization/tag-checker.py
index 5ab83c03..939afc2b 100755
--- a/local-app/python-tools/cross-organization/tag-checker.py
+++ b/local-app/python-tools/cross-organization/tag-checker.py
@@ -14,14 +14,14 @@
 from botocore.exceptions import ClientError
 from tqdm import tqdm
 
-__version__ = "1.1.12"
+__version__ = "1.1.13"
 
 def get_args():
     parser = argparse.ArgumentParser(description=f"AWS Org Tag Scanner v{__version__}")
     parser.add_argument("--role-name", required=True, help="Role to assume in member accounts")
     parser.add_argument("--region", required=True, help="Management account region (e.g., us-gov-east-1)")
     parser.add_argument("--profile", required=True, help="AWS CLI profile for Management Account")
-    parser.add_argument("--tags-file", required=True, help="CSV file with Tag Key, Status, Type, etc.")
+    parser.add_argument("--tags-file", required=True, help="CSV file with TagKey, Type, Status, etc.")
     parser.add_argument("--max-workers", type=int, default=8, help="Max concurrent account scans")
     parser.add_argument("--account-regex", help="Regex to filter accounts by alias")
     parser.add_argument("--accounts-from", help="File of Account IDs to process")
@@ -82,7 +82,6 @@ def scan_account(account, management_session, role_name, partition, tag_keys, ac
         for r in active_regions:
             r_start = time.perf_counter()
             r_hits, r_res_found, r_tags_found = 0, set(), set()
-            
             client = m_session.client('resourcegroupstaggingapi', region_name=r)
             try:
                 paginator = client.get_paginator('get_resources')
@@ -104,8 +103,6 @@ def scan_account(account, management_session, role_name, partition, tag_keys, ac
             
             r_elapsed = round(time.perf_counter() - r_start, 4)
             r_entry = next((m for m in regional_metrics if m['region'] == r), None)
-            
-            # Intersection with active tags
             r_active_found = sorted(list(r_tags_found.intersection(active_tag_keys)))
 
             if not r_entry:
@@ -148,31 +145,29 @@ def main():
     args = get_args()
     cmd_line = " ".join(sys.argv)
     ts = datetime.now().strftime("%Y%m%d_%H%M%S")
-    start_iso = datetime.now().isoformat()
-    start_ts = time.time()
+    start_iso, start_ts = datetime.now().isoformat(), time.time()
     
     try:
         session = boto3.Session(profile_name=args.profile, region_name=args.region)
         org = session.client('organizations', region_name=args.region)
         partition = session.client('sts', region_name=args.region).get_caller_identity()['Arn'].split(':')[1]
         
-        # Parse Tag CSV for Status
-        tag_keys = []
-        active_tag_keys = set()
+        tag_keys, active_tag_keys = [], set()
         with open(args.tags_file, mode='r', encoding='utf-8-sig') as f:
-            reader = csv.DictReader(f) # TagKey, Type, Status, ...
+            # Using your specific headers
+            reader = csv.DictReader(f, skipinitialspace=True) 
             for row in reader:
-                key = row['Tag Key'].strip()
-                tag_keys.append(key)
-                if row.get('Status', '').strip().lower() == 'active':
-                    active_tag_keys.add(key)
+                key = row.get('TagKey', '').strip().replace('"', '')
+                if key:
+                    tag_keys.append(key)
+                    if row.get('Status', '').strip().lower() == 'active':
+                        active_tag_keys.add(key)
         
         target_ids = []
         if args.accounts_from:
             with open(args.accounts_from, 'r') as f:
                 target_ids = [l.strip() for l in f if l.strip()]
 
-        # Unique Account Discovery
         unique_accounts = {}
         paginator = org.get_paginator('list_accounts')
         for page in paginator.paginate():
@@ -180,12 +175,10 @@ def main():
                 if a['Status'] == 'ACTIVE':
                     unique_accounts[a['Id']] = a
 
-        to_process = []
-        for aid, acc in unique_accounts.items():
-            if not target_ids or aid in target_ids:
-                to_process.append(acc)
-
+        to_process = [v for k, v in unique_accounts.items() if not target_ids or k in target_ids]
         if args.limit > 0: to_process = to_process[:args.limit]
+        
+        # UI: Fixed width for Label = acc_id(12) + space(1) + alias(max) + buffer(1)
         max_label_len = max([12 + 1 + len(a['Name']) for a in to_process]) + 1 if to_process else 40
         
         print(f"\n{'='*85}\nAWS TAG CHECKER v{__version__}\n{'='*85}")
@@ -193,6 +186,7 @@ def main():
         print(f"Tags Read: {len(tag_keys)} ({len(active_tag_keys)} active)")
         print(f"Accounts Found (Unique): {len(unique_accounts)}")
         print(f"Accounts Targeted: {len(to_process)}")
+        print(f"Arguments: {vars(args)}")
         print(f"{'='*85}\n")
         
         all_findings, account_results = [], []
@@ -203,7 +197,6 @@ def main():
                 futures = {executor.submit(scan_account, acc, session, args.role_name, partition, 
                                            tag_keys, active_tag_keys, args.region, (i % args.max_workers) + 1, 
                                            args.account_regex, args.verbose, max_label_len): acc for i, acc in enumerate(to_process)}
-                
                 for future in as_completed(futures):
                     res, acc_id, alias, m, status = future.result()
                     if status == "Success":
@@ -219,13 +212,10 @@ def main():
         overall_pbar.close()
         print("\n" * (args.max_workers + 1)) 
 
-        # Summary Generation
-        total_hits = sum(a['global_metrics']['hits'] for a in account_results)
-        total_res = len(set(f['arn'] for f in all_findings))
+        # Memory usage in MB (Linux RSS is KB)
+        mem_mb = round(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024, 2)
+        total_unique_res = len(set(f['arn'] for f in all_findings))
         all_found_keys = set(f['tag_name'] for f in all_findings)
-        max_mem = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
-        # Convert to MB (resource reports in KB on Linux, bytes on macOS - adjusting for Linux standard)
-        mem_mb = round(max_mem / 1024, 2)
 
         output_summary = {
             "summary": {
@@ -237,23 +227,22 @@ def main():
                 "execution_end": datetime.now().isoformat(),
                 "elapsed_sec_total": round(time.time() - start_ts, 2),
                 "max_memory_mb": mem_mb,
-                "total_hits": total_hits,
-                "total_unique_resources": total_res,
-                "total_tags_found_count": len(all_found_keys)
+                "total_hits": sum(a['global_metrics']['hits'] for a in account_results),
+                "total_unique_resources": total_unique_res,
+                "total_tags_found_count": len(all_found_keys),
+                "total_tags_not_found_count": len(tag_keys) - len(all_found_keys)
             },
             "accounts": account_results
         }
 
-        sum_file = f"{args.output}_summary_{ts}.json"
-        fin_file = f"{args.output}_findings_{ts}.csv"
-        
-        with open(sum_file, 'w') as f: json.dump(output_summary, f, indent=4)
+        sum_f, fin_f = f"{args.output}_summary_{ts}.json", f"{args.output}_findings_{ts}.csv"
+        with open(sum_f, 'w') as f: json.dump(output_summary, f, indent=4)
         if all_findings:
-            with open(fin_file, 'w', newline='') as f:
+            with open(fin_f, 'w', newline='') as f:
                 writer = csv.DictWriter(f, fieldnames=all_findings[0].keys())
                 writer.writeheader(); writer.writerows(all_findings)
 
-        print(f"[+] Summary: {sum_file}\n[+] Findings: {fin_file}")
+        print(f"[+] Summary: {sum_f}\n[+] Findings: {fin_f}")
 
     except KeyboardInterrupt:
         sys.exit(130)