memory stats

terraform · Jan 15, 2026 · d28776e · d28776e
1 parent 4810bec
commit d28776e
Showing 1 changed file with 118 additions and 106 deletions.
diff --git a/local-app/python-tools/cross-organization/tag-checker.py b/local-app/python-tools/cross-organization/tag-checker.py
@@ -8,19 +8,20 @@
 import time
 import re
 import os
+import resource
 from datetime import datetime
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from botocore.exceptions import ClientError
 from tqdm import tqdm
 
-__version__ = "1.1.10"
+__version__ = "1.1.12"
 
 def get_args():
     parser = argparse.ArgumentParser(description=f"AWS Org Tag Scanner v{__version__}")
     parser.add_argument("--role-name", required=True, help="Role to assume in member accounts")
     parser.add_argument("--region", required=True, help="Management account region (e.g., us-gov-east-1)")
     parser.add_argument("--profile", required=True, help="AWS CLI profile for Management Account")
-    parser.add_argument("--tags-file", required=True, help="CSV file with Tag Key in the first column")
+    parser.add_argument("--tags-file", required=True, help="CSV file with Tag Key, Status, Type, etc.")
     parser.add_argument("--max-workers", type=int, default=8, help="Max concurrent account scans")
     parser.add_argument("--account-regex", help="Regex to filter accounts by alias")
     parser.add_argument("--accounts-from", help="File of Account IDs to process")
@@ -45,7 +46,7 @@ def get_session(management_session, account_id, role_name, partition, region_nam
         if verbose: tqdm.write(f"[!] Auth Error for {account_id}: {str(e)}")
         return None
 
-def scan_account(account, management_session, role_name, partition, tag_keys, region_name, lane_id, account_regex, verbose, bar_width):
+def scan_account(account, management_session, role_name, partition, tag_keys, active_tag_keys, region_name, lane_id, account_regex, verbose, bar_width):
     acc_id = account['Id']
     m_session = get_session(management_session, acc_id, role_name, partition, region_name, verbose)
 
@@ -73,17 +74,14 @@ def scan_account(account, management_session, role_name, partition, tag_keys, re
     global_tags_found = set()
     regional_metrics = []
 
-    # UI Alignment: Zero-padded lane + fixed width label
     label = f"{acc_id} {alias}".ljust(bar_width)
     pbar = tqdm(total=len(tag_keys), desc=f"Lane {lane_id:02d} | {label}", 
                 position=lane_id, leave=False, bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt}')
 
     for key in tag_keys:
         for r in active_regions:
             r_start = time.perf_counter()
-            r_hits = 0
-            r_res_found = set()
-            r_tags_found = set()
+            r_hits, r_res_found, r_tags_found = 0, set(), set()
 
             client = m_session.client('resourcegroupstaggingapi', region_name=r)
             try:
@@ -106,16 +104,24 @@ def scan_account(account, management_session, role_name, partition, tag_keys, re
 
             r_elapsed = round(time.perf_counter() - r_start, 4)
             r_entry = next((m for m in regional_metrics if m['region'] == r), None)
+
+            # Intersection with active tags
+            r_active_found = sorted(list(r_tags_found.intersection(active_tag_keys)))
+
             if not r_entry:
                 regional_metrics.append({
                     "region": r, "hits": r_hits, "unique_resources": len(r_res_found),
-                    "tags_found_count": len(r_tags_found), "tags_found_list": sorted(list(r_tags_found)),
-                    "tags_not_found_count": len(tag_keys) - len(r_tags_found), "elapsed_sec": r_elapsed
+                    "tags_found_count": len(r_tags_found), 
+                    "tags_found_list": sorted(list(r_tags_found)),
+                    "tags_found_list_active": r_active_found,
+                    "tags_not_found_count": len(tag_keys) - len(r_tags_found), 
+                    "elapsed_sec": r_elapsed
                 })
             else:
                 r_entry['hits'] += r_hits
                 current_tags = set(r_entry['tags_found_list']) | r_tags_found
                 r_entry['tags_found_list'] = sorted(list(current_tags))
+                r_entry['tags_found_list_active'] = sorted(list(current_tags.intersection(active_tag_keys)))
                 r_entry['tags_found_count'] = len(current_tags)
                 r_entry['tags_not_found_count'] = len(tag_keys) - len(current_tags)
                 r_entry['elapsed_sec'] = round(r_entry['elapsed_sec'] + r_elapsed, 4)
@@ -130,6 +136,7 @@ def scan_account(account, management_session, role_name, partition, tag_keys, re
             "unique_resources": len(global_resources),
             "tags_found_count": len(global_tags_found),
             "tags_found_list": sorted(list(global_tags_found)),
+            "tags_found_list_active": sorted(list(global_tags_found.intersection(active_tag_keys))),
             "tags_not_found_count": len(tag_keys) - len(global_tags_found),
             "elapsed_sec": round(time.time() - acc_start, 2)
         },
@@ -144,107 +151,112 @@ def main():
     start_iso = datetime.now().isoformat()
     start_ts = time.time()
 
-    session = boto3.Session(profile_name=args.profile, region_name=args.region)
-    org = session.client('organizations', region_name=args.region)
-    partition = session.client('sts', region_name=args.region).get_caller_identity()['Arn'].split(':')[1]
-
-    with open(args.tags_file, mode='r', encoding='utf-8-sig') as f:
-        tag_keys = [row[0].strip() for row in list(csv.reader(f))[1:] if row]
-
-    target_ids = []
-    if args.accounts_from:
-        with open(args.accounts_from, 'r') as f:
-            target_ids = [l.strip() for l in f if l.strip()]
-
-    # Initial Discovery for Header info
-    all_raw_accounts = []
-    paginator = org.get_paginator('list_accounts')
-    for page in paginator.paginate():
-        all_raw_accounts.extend(page['Accounts'])
-
-    to_process = []
-    for a in all_raw_accounts:
-        if a['Status'] == 'ACTIVE':
-            if not target_ids or a['Id'] in target_ids:
-                to_process.append(a)
-
-    if args.limit > 0: to_process = to_process[:args.limit]
-
-    # Calculate bar width
-    max_label_len = max([12 + 1 + len(a['Name']) for a in to_process]) + 1 if to_process else 40
-
-    # STARTUP HEADER
-    print(f"\n{'='*85}\nAWS TAG CHECKER v{__version__}\n{'='*85}")
-    print(f"Execution Profile : {args.profile}")
-    print(f"Execution Region  : {args.region}")
-    print(f"Assume Role Name  : {args.role_name}")
-    print(f"Partition         : {partition}")
-    print(f"Thread Count      : {args.max_workers}")
-    print(f"Tags Read         : {len(tag_keys)}")
-    print(f"Accounts Found    : {len(all_raw_accounts)}")
-    print(f"Accounts Targeted : {len(to_process)}")
-    print(f"{'-'*85}")
-    print(f"Arguments: {vars(args)}")
-    print(f"{'='*85}\n")
-
-    all_findings = []
-    account_results = []
-    overall_pbar = tqdm(total=len(to_process), desc="Total Org Progress", position=0)
-
-    with ThreadPoolExecutor(max_workers=args.max_workers) as executor:
-        futures = {executor.submit(scan_account, acc, session, args.role_name, partition, 
-                                   tag_keys, args.region, (i % args.max_workers) + 1, 
-                                   args.account_regex, args.verbose, max_label_len): acc for i, acc in enumerate(to_process)}
+    try:
+        session = boto3.Session(profile_name=args.profile, region_name=args.region)
+        org = session.client('organizations', region_name=args.region)
+        partition = session.client('sts', region_name=args.region).get_caller_identity()['Arn'].split(':')[1]
 
-        for future in as_completed(futures):
-            res, acc_id, alias, m, status = future.result()
-            if status == "Success":
-                all_findings.extend(res)
-                account_results.append({
-                    "account_id": acc_id, "alias": alias, 
-                    "global_metrics": m["global"],
-                    "regional_metrics": m["regions"]
-                })
-            else:
-                overall_pbar.write(f"[-] {acc_id}: {status}")
-            overall_pbar.update(1)
+        # Parse Tag CSV for Status
+        tag_keys = []
+        active_tag_keys = set()
+        with open(args.tags_file, mode='r', encoding='utf-8-sig') as f:
+            reader = csv.DictReader(f) # TagKey, Type, Status, ...
+            for row in reader:
+                key = row['Tag Key'].strip()
+                tag_keys.append(key)
+                if row.get('Status', '').strip().lower() == 'active':
+                    active_tag_keys.add(key)
+
+        target_ids = []
+        if args.accounts_from:
+            with open(args.accounts_from, 'r') as f:
+                target_ids = [l.strip() for l in f if l.strip()]
 
-    overall_pbar.close()
-    print("\n" * (args.max_workers + 1)) 
+        # Unique Account Discovery
+        unique_accounts = {}
+        paginator = org.get_paginator('list_accounts')
+        for page in paginator.paginate():
+            for a in page['Accounts']:
+                if a['Status'] == 'ACTIVE':
+                    unique_accounts[a['Id']] = a
 
-    # Summary Aggregation
-    total_hits = sum(a['global_metrics']['hits'] for a in account_results)
-    total_res = len(set(f['arn'] for f in all_findings))
-    all_found_keys = set(f['tag_name'] for f in all_findings)
-
-    output_summary = {
-        "summary": {
-            "version": __version__,
-            "command_line": cmd_line,
-            "aws_accounts_scanned": len(account_results),
-            "execution_start": start_iso,
-            "execution_end": datetime.now().isoformat(),
-            "elapsed_sec_total": round(time.time() - start_ts, 2),
-            "threads": args.max_workers,
-            "total_hits": total_hits,
-            "total_unique_resources": total_res,
-            "total_tags_found_count": len(all_found_keys),
-            "total_tags_not_found_count": len(tag_keys) - len(all_found_keys)
-        },
-        "accounts": account_results
-    }
+        to_process = []
+        for aid, acc in unique_accounts.items():
+            if not target_ids or aid in target_ids:
+                to_process.append(acc)
 
-    sum_file = f"{args.output}_summary_{ts}.json"
-    fin_file = f"{args.output}_findings_{ts}.csv"
-
-    with open(sum_file, 'w') as f: json.dump(output_summary, f, indent=4)
-    if all_findings:
-        with open(fin_file, 'w', newline='') as f:
-            writer = csv.DictWriter(f, fieldnames=all_findings[0].keys())
-            writer.writeheader(); writer.writerows(all_findings)
-
-    print(f"[+] Summary: {sum_file}")
-    print(f"[+] Findings: {fin_file}")
+        if args.limit > 0: to_process = to_process[:args.limit]
+        max_label_len = max([12 + 1 + len(a['Name']) for a in to_process]) + 1 if to_process else 40
+
+        print(f"\n{'='*85}\nAWS TAG CHECKER v{__version__}\n{'='*85}")
+        print(f"Profile: {args.profile} | Region: {args.region} | Role: {args.role_name}")
+        print(f"Tags Read: {len(tag_keys)} ({len(active_tag_keys)} active)")
+        print(f"Accounts Found (Unique): {len(unique_accounts)}")
+        print(f"Accounts Targeted: {len(to_process)}")
+        print(f"{'='*85}\n")
+
+        all_findings, account_results = [], []
+        overall_pbar = tqdm(total=len(to_process), desc="Total Org Progress", position=0)
+
+        with ThreadPoolExecutor(max_workers=args.max_workers) as executor:
+            try:
+                futures = {executor.submit(scan_account, acc, session, args.role_name, partition, 
+                                           tag_keys, active_tag_keys, args.region, (i % args.max_workers) + 1, 
+                                           args.account_regex, args.verbose, max_label_len): acc for i, acc in enumerate(to_process)}
+
+                for future in as_completed(futures):
+                    res, acc_id, alias, m, status = future.result()
+                    if status == "Success":
+                        all_findings.extend(res)
+                        account_results.append({"account_id": acc_id, "alias": alias, "global_metrics": m["global"], "regional_metrics": m["regions"]})
+                    else:
+                        overall_pbar.write(f"[-] {acc_id}: {status}")
+                    overall_pbar.update(1)
+            except KeyboardInterrupt:
+                executor.shutdown(wait=False, cancel_futures=True)
+                sys.exit(130)
+
+        overall_pbar.close()
+        print("\n" * (args.max_workers + 1)) 
+
+        # Summary Generation
+        total_hits = sum(a['global_metrics']['hits'] for a in account_results)
+        total_res = len(set(f['arn'] for f in all_findings))
+        all_found_keys = set(f['tag_name'] for f in all_findings)
+        max_mem = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
+        # Convert to MB (resource reports in KB on Linux, bytes on macOS - adjusting for Linux standard)
+        mem_mb = round(max_mem / 1024, 2)
+
+        output_summary = {
+            "summary": {
+                "version": __version__,
+                "command_line": cmd_line,
+                "aws_accounts_scanned": len(account_results),
+                "tags_read_count": len(tag_keys),
+                "execution_start": start_iso,
+                "execution_end": datetime.now().isoformat(),
+                "elapsed_sec_total": round(time.time() - start_ts, 2),
+                "max_memory_mb": mem_mb,
+                "total_hits": total_hits,
+                "total_unique_resources": total_res,
+                "total_tags_found_count": len(all_found_keys)
+            },
+            "accounts": account_results
+        }
+
+        sum_file = f"{args.output}_summary_{ts}.json"
+        fin_file = f"{args.output}_findings_{ts}.csv"
+
+        with open(sum_file, 'w') as f: json.dump(output_summary, f, indent=4)
+        if all_findings:
+            with open(fin_file, 'w', newline='') as f:
+                writer = csv.DictWriter(f, fieldnames=all_findings[0].keys())
+                writer.writeheader(); writer.writerows(all_findings)
+
+        print(f"[+] Summary: {sum_file}\n[+] Findings: {fin_file}")
+
+    except KeyboardInterrupt:
+        sys.exit(130)
 
 if __name__ == "__main__":
     main()