diff --git a/local-app/python-tools/cross-organization/tag-checker.py b/local-app/python-tools/cross-organization/tag-checker.py index 543988e8..5ab83c03 100755 --- a/local-app/python-tools/cross-organization/tag-checker.py +++ b/local-app/python-tools/cross-organization/tag-checker.py @@ -8,19 +8,20 @@ import time import re import os +import resource from datetime import datetime from concurrent.futures import ThreadPoolExecutor, as_completed from botocore.exceptions import ClientError from tqdm import tqdm -__version__ = "1.1.10" +__version__ = "1.1.12" def get_args(): parser = argparse.ArgumentParser(description=f"AWS Org Tag Scanner v{__version__}") parser.add_argument("--role-name", required=True, help="Role to assume in member accounts") parser.add_argument("--region", required=True, help="Management account region (e.g., us-gov-east-1)") parser.add_argument("--profile", required=True, help="AWS CLI profile for Management Account") - parser.add_argument("--tags-file", required=True, help="CSV file with Tag Key in the first column") + parser.add_argument("--tags-file", required=True, help="CSV file with Tag Key, Status, Type, etc.") parser.add_argument("--max-workers", type=int, default=8, help="Max concurrent account scans") parser.add_argument("--account-regex", help="Regex to filter accounts by alias") parser.add_argument("--accounts-from", help="File of Account IDs to process") @@ -45,7 +46,7 @@ def get_session(management_session, account_id, role_name, partition, region_nam if verbose: tqdm.write(f"[!] Auth Error for {account_id}: {str(e)}") return None -def scan_account(account, management_session, role_name, partition, tag_keys, region_name, lane_id, account_regex, verbose, bar_width): +def scan_account(account, management_session, role_name, partition, tag_keys, active_tag_keys, region_name, lane_id, account_regex, verbose, bar_width): acc_id = account['Id'] m_session = get_session(management_session, acc_id, role_name, partition, region_name, verbose) @@ -73,7 +74,6 @@ def scan_account(account, management_session, role_name, partition, tag_keys, re global_tags_found = set() regional_metrics = [] - # UI Alignment: Zero-padded lane + fixed width label label = f"{acc_id} {alias}".ljust(bar_width) pbar = tqdm(total=len(tag_keys), desc=f"Lane {lane_id:02d} | {label}", position=lane_id, leave=False, bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt}') @@ -81,9 +81,7 @@ def scan_account(account, management_session, role_name, partition, tag_keys, re for key in tag_keys: for r in active_regions: r_start = time.perf_counter() - r_hits = 0 - r_res_found = set() - r_tags_found = set() + r_hits, r_res_found, r_tags_found = 0, set(), set() client = m_session.client('resourcegroupstaggingapi', region_name=r) try: @@ -106,16 +104,24 @@ def scan_account(account, management_session, role_name, partition, tag_keys, re r_elapsed = round(time.perf_counter() - r_start, 4) r_entry = next((m for m in regional_metrics if m['region'] == r), None) + + # Intersection with active tags + r_active_found = sorted(list(r_tags_found.intersection(active_tag_keys))) + if not r_entry: regional_metrics.append({ "region": r, "hits": r_hits, "unique_resources": len(r_res_found), - "tags_found_count": len(r_tags_found), "tags_found_list": sorted(list(r_tags_found)), - "tags_not_found_count": len(tag_keys) - len(r_tags_found), "elapsed_sec": r_elapsed + "tags_found_count": len(r_tags_found), + "tags_found_list": sorted(list(r_tags_found)), + "tags_found_list_active": r_active_found, + "tags_not_found_count": len(tag_keys) - len(r_tags_found), + "elapsed_sec": r_elapsed }) else: r_entry['hits'] += r_hits current_tags = set(r_entry['tags_found_list']) | r_tags_found r_entry['tags_found_list'] = sorted(list(current_tags)) + r_entry['tags_found_list_active'] = sorted(list(current_tags.intersection(active_tag_keys))) r_entry['tags_found_count'] = len(current_tags) r_entry['tags_not_found_count'] = len(tag_keys) - len(current_tags) r_entry['elapsed_sec'] = round(r_entry['elapsed_sec'] + r_elapsed, 4) @@ -130,6 +136,7 @@ def scan_account(account, management_session, role_name, partition, tag_keys, re "unique_resources": len(global_resources), "tags_found_count": len(global_tags_found), "tags_found_list": sorted(list(global_tags_found)), + "tags_found_list_active": sorted(list(global_tags_found.intersection(active_tag_keys))), "tags_not_found_count": len(tag_keys) - len(global_tags_found), "elapsed_sec": round(time.time() - acc_start, 2) }, @@ -144,107 +151,112 @@ def main(): start_iso = datetime.now().isoformat() start_ts = time.time() - session = boto3.Session(profile_name=args.profile, region_name=args.region) - org = session.client('organizations', region_name=args.region) - partition = session.client('sts', region_name=args.region).get_caller_identity()['Arn'].split(':')[1] - - with open(args.tags_file, mode='r', encoding='utf-8-sig') as f: - tag_keys = [row[0].strip() for row in list(csv.reader(f))[1:] if row] - - target_ids = [] - if args.accounts_from: - with open(args.accounts_from, 'r') as f: - target_ids = [l.strip() for l in f if l.strip()] - - # Initial Discovery for Header info - all_raw_accounts = [] - paginator = org.get_paginator('list_accounts') - for page in paginator.paginate(): - all_raw_accounts.extend(page['Accounts']) - - to_process = [] - for a in all_raw_accounts: - if a['Status'] == 'ACTIVE': - if not target_ids or a['Id'] in target_ids: - to_process.append(a) - - if args.limit > 0: to_process = to_process[:args.limit] - - # Calculate bar width - max_label_len = max([12 + 1 + len(a['Name']) for a in to_process]) + 1 if to_process else 40 - - # STARTUP HEADER - print(f"\n{'='*85}\nAWS TAG CHECKER v{__version__}\n{'='*85}") - print(f"Execution Profile : {args.profile}") - print(f"Execution Region : {args.region}") - print(f"Assume Role Name : {args.role_name}") - print(f"Partition : {partition}") - print(f"Thread Count : {args.max_workers}") - print(f"Tags Read : {len(tag_keys)}") - print(f"Accounts Found : {len(all_raw_accounts)}") - print(f"Accounts Targeted : {len(to_process)}") - print(f"{'-'*85}") - print(f"Arguments: {vars(args)}") - print(f"{'='*85}\n") - - all_findings = [] - account_results = [] - overall_pbar = tqdm(total=len(to_process), desc="Total Org Progress", position=0) - - with ThreadPoolExecutor(max_workers=args.max_workers) as executor: - futures = {executor.submit(scan_account, acc, session, args.role_name, partition, - tag_keys, args.region, (i % args.max_workers) + 1, - args.account_regex, args.verbose, max_label_len): acc for i, acc in enumerate(to_process)} + try: + session = boto3.Session(profile_name=args.profile, region_name=args.region) + org = session.client('organizations', region_name=args.region) + partition = session.client('sts', region_name=args.region).get_caller_identity()['Arn'].split(':')[1] - for future in as_completed(futures): - res, acc_id, alias, m, status = future.result() - if status == "Success": - all_findings.extend(res) - account_results.append({ - "account_id": acc_id, "alias": alias, - "global_metrics": m["global"], - "regional_metrics": m["regions"] - }) - else: - overall_pbar.write(f"[-] {acc_id}: {status}") - overall_pbar.update(1) + # Parse Tag CSV for Status + tag_keys = [] + active_tag_keys = set() + with open(args.tags_file, mode='r', encoding='utf-8-sig') as f: + reader = csv.DictReader(f) # TagKey, Type, Status, ... + for row in reader: + key = row['Tag Key'].strip() + tag_keys.append(key) + if row.get('Status', '').strip().lower() == 'active': + active_tag_keys.add(key) + + target_ids = [] + if args.accounts_from: + with open(args.accounts_from, 'r') as f: + target_ids = [l.strip() for l in f if l.strip()] - overall_pbar.close() - print("\n" * (args.max_workers + 1)) + # Unique Account Discovery + unique_accounts = {} + paginator = org.get_paginator('list_accounts') + for page in paginator.paginate(): + for a in page['Accounts']: + if a['Status'] == 'ACTIVE': + unique_accounts[a['Id']] = a - # Summary Aggregation - total_hits = sum(a['global_metrics']['hits'] for a in account_results) - total_res = len(set(f['arn'] for f in all_findings)) - all_found_keys = set(f['tag_name'] for f in all_findings) - - output_summary = { - "summary": { - "version": __version__, - "command_line": cmd_line, - "aws_accounts_scanned": len(account_results), - "execution_start": start_iso, - "execution_end": datetime.now().isoformat(), - "elapsed_sec_total": round(time.time() - start_ts, 2), - "threads": args.max_workers, - "total_hits": total_hits, - "total_unique_resources": total_res, - "total_tags_found_count": len(all_found_keys), - "total_tags_not_found_count": len(tag_keys) - len(all_found_keys) - }, - "accounts": account_results - } + to_process = [] + for aid, acc in unique_accounts.items(): + if not target_ids or aid in target_ids: + to_process.append(acc) - sum_file = f"{args.output}_summary_{ts}.json" - fin_file = f"{args.output}_findings_{ts}.csv" - - with open(sum_file, 'w') as f: json.dump(output_summary, f, indent=4) - if all_findings: - with open(fin_file, 'w', newline='') as f: - writer = csv.DictWriter(f, fieldnames=all_findings[0].keys()) - writer.writeheader(); writer.writerows(all_findings) - - print(f"[+] Summary: {sum_file}") - print(f"[+] Findings: {fin_file}") + if args.limit > 0: to_process = to_process[:args.limit] + max_label_len = max([12 + 1 + len(a['Name']) for a in to_process]) + 1 if to_process else 40 + + print(f"\n{'='*85}\nAWS TAG CHECKER v{__version__}\n{'='*85}") + print(f"Profile: {args.profile} | Region: {args.region} | Role: {args.role_name}") + print(f"Tags Read: {len(tag_keys)} ({len(active_tag_keys)} active)") + print(f"Accounts Found (Unique): {len(unique_accounts)}") + print(f"Accounts Targeted: {len(to_process)}") + print(f"{'='*85}\n") + + all_findings, account_results = [], [] + overall_pbar = tqdm(total=len(to_process), desc="Total Org Progress", position=0) + + with ThreadPoolExecutor(max_workers=args.max_workers) as executor: + try: + futures = {executor.submit(scan_account, acc, session, args.role_name, partition, + tag_keys, active_tag_keys, args.region, (i % args.max_workers) + 1, + args.account_regex, args.verbose, max_label_len): acc for i, acc in enumerate(to_process)} + + for future in as_completed(futures): + res, acc_id, alias, m, status = future.result() + if status == "Success": + all_findings.extend(res) + account_results.append({"account_id": acc_id, "alias": alias, "global_metrics": m["global"], "regional_metrics": m["regions"]}) + else: + overall_pbar.write(f"[-] {acc_id}: {status}") + overall_pbar.update(1) + except KeyboardInterrupt: + executor.shutdown(wait=False, cancel_futures=True) + sys.exit(130) + + overall_pbar.close() + print("\n" * (args.max_workers + 1)) + + # Summary Generation + total_hits = sum(a['global_metrics']['hits'] for a in account_results) + total_res = len(set(f['arn'] for f in all_findings)) + all_found_keys = set(f['tag_name'] for f in all_findings) + max_mem = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss + # Convert to MB (resource reports in KB on Linux, bytes on macOS - adjusting for Linux standard) + mem_mb = round(max_mem / 1024, 2) + + output_summary = { + "summary": { + "version": __version__, + "command_line": cmd_line, + "aws_accounts_scanned": len(account_results), + "tags_read_count": len(tag_keys), + "execution_start": start_iso, + "execution_end": datetime.now().isoformat(), + "elapsed_sec_total": round(time.time() - start_ts, 2), + "max_memory_mb": mem_mb, + "total_hits": total_hits, + "total_unique_resources": total_res, + "total_tags_found_count": len(all_found_keys) + }, + "accounts": account_results + } + + sum_file = f"{args.output}_summary_{ts}.json" + fin_file = f"{args.output}_findings_{ts}.csv" + + with open(sum_file, 'w') as f: json.dump(output_summary, f, indent=4) + if all_findings: + with open(fin_file, 'w', newline='') as f: + writer = csv.DictWriter(f, fieldnames=all_findings[0].keys()) + writer.writeheader(); writer.writerows(all_findings) + + print(f"[+] Summary: {sum_file}\n[+] Findings: {fin_file}") + + except KeyboardInterrupt: + sys.exit(130) if __name__ == "__main__": main()