diff --git a/local-app/python-tools/cross-organization/tag-checker.py b/local-app/python-tools/cross-organization/tag-checker.py index 3e39efec..3f2c97c5 100755 --- a/local-app/python-tools/cross-organization/tag-checker.py +++ b/local-app/python-tools/cross-organization/tag-checker.py @@ -9,7 +9,7 @@ from datetime import datetime from botocore.exceptions import ClientError -__version__ = "1.0.3" +__version__ = "1.0.4" def get_args(): parser = argparse.ArgumentParser(description=f"AWS Org Tag Scanner v{__version__}") @@ -17,6 +17,7 @@ def get_args(): parser.add_argument("--region", required=True, help="Primary region for API initialization") parser.add_argument("--profile", required=True, help="AWS CLI profile for Management Account") parser.add_argument("--tags-file", required=True, help="CSV file with Tag Key in the first column") + parser.add_argument("--limit", type=int, default=0, help="Limit scan to X number of accounts (0 for no limit)") return parser.parse_args() def get_session_for_account(management_session, account_id, role_name, partition): @@ -47,7 +48,6 @@ def main(): args = get_args() start_time_overall = time.time() - # Initialize Management Session session = boto3.Session(profile_name=args.profile) org_client = session.client('organizations') partition = session.client('sts').get_caller_identity()['Arn'].split(':')[1] @@ -56,33 +56,37 @@ def main(): tag_keys = [] with open(args.tags_file, mode='r', encoding='utf-8-sig') as f: reader = csv.reader(f) - next(reader) # Skip header + next(reader) tag_keys = [row[0].strip() for row in reader if row] - print(f"[*] Starting Scan v{__version__} at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") + print(f"[*] Starting Scan v{__version__} (Testing {len(tag_keys)} tags)") all_results = [] account_metrics = [] total_resources_found = 0 + accounts_processed = 0 paginator = org_client.get_paginator('list_accounts') for page in paginator.paginate(): for account in page['Accounts']: if account['Status'] != 'ACTIVE': continue + # Apply Account Limit for Testing + if args.limit > 0 and accounts_processed >= args.limit: + break + acc_start_time = time.time() acc_id, acc_name = account['Id'], account['Name'] - print(f" --> Processing {acc_id} ({acc_name})...", end="\r") + print(f" --> Account {accounts_processed + 1}: {acc_id} ({acc_name})...", end="\r") m_session = get_session_for_account(session, acc_id, args.role_name, partition) if not m_session: - print(f" [!] Skipped {acc_id}: Access Denied") + print(f"\n [!] Skipped {acc_id}: Access Denied (Check Role)") continue alias = get_account_alias(m_session) resources_in_account = 0 - # Regional Scan try: ec2 = m_session.client('ec2', region_name=args.region) regions = [r['RegionName'] for r in ec2.describe_regions()['Regions']] @@ -93,55 +97,65 @@ def main(): tag_client = m_session.client('resourcegroupstaggingapi', region_name=region) tag_paginator = tag_client.get_paginator('get_resources') - # We filter by 50 tags per API call (AWS limit) - for i in range(0, len(tag_keys), 50): - chunk = [{'Key': k} for k in tag_keys[i:i+50]] - for tag_page in tag_paginator.paginate(TagFilters=chunk): - for r_mapping in tag_page.get('ResourceTagMappingList', []): - resources_in_account += 1 - arn = r_mapping['ResourceARN'] - found_tags = [t for t in r_mapping['Tags'] if t['Key'] in tag_keys] - for tag in found_tags: + # BUG FIX: Process tags one by one or in small batches. + # AWS TagFilters is an "AND" operation if you provide multiple keys in one filter dict. + # To do an "OR" (Find ANY of these tags), we iterate through each tag key individually. + for target_key in tag_keys: + filter_param = [{'Key': target_key}] + try: + for tag_page in tag_paginator.paginate(TagFilters=filter_param): + for r_mapping in tag_page.get('ResourceTagMappingList', []): + resources_in_account += 1 + arn = r_mapping['ResourceARN'] + # Get the value for the specific key we searched for + val = next((t['Value'] for t in r_mapping['Tags'] if t['Key'] == target_key), "N/A") + all_results.append({ - "tag_name": tag['Key'], "tag_value": tag['Value'], - "account_id": acc_id, "account_alias": alias, - "region": region, "arn": arn + "tag_name": target_key, + "tag_value": val, + "account_id": acc_id, + "account_alias": alias, + "region": region, + "arn": arn }) + except ClientError as e: + if "Throttling" in str(e): + time.sleep(1) # Simple backoff + continue acc_elapsed = time.time() - acc_start_time total_resources_found += resources_in_account account_metrics.append({ "account_id": acc_id, "account_name": acc_name, - "account_alias": alias, - "resources_with_target_tags": resources_in_account, - "elapsed_seconds": round(acc_elapsed, 2) + "elapsed_seconds": round(acc_elapsed, 2), + "hits_found": resources_in_account }) + accounts_processed += 1 + + if args.limit > 0 and accounts_processed >= args.limit: + break - # Summary Generation + # Final Output total_elapsed = time.time() - start_time_overall summary = { - "version": __version__, - "timestamp": datetime.now().isoformat(), - "total_accounts_scanned": len(account_metrics), - "total_target_resources_found": total_resources_found, - "total_elapsed_seconds": round(total_elapsed, 2), - "average_seconds_per_account": round(total_elapsed / len(account_metrics), 2) if account_metrics else 0, - "account_details": account_metrics + "scan_version": __version__, + "total_accounts": accounts_processed, + "total_hits": len(all_results), + "total_time": f"{round(total_elapsed, 2)}s", + "account_breakdown": account_metrics } - # Save Outputs - with open('findings.json', 'w') as f: json.dump(all_results, f, indent=4) with open('summary_metrics.json', 'w') as f: json.dump(summary, f, indent=4) if all_results: + keys = all_results[0].keys() with open('findings.csv', 'w', newline='') as f: - writer = csv.DictWriter(f, fieldnames=all_results[0].keys()) - writer.writeheader(); writer.writerows(all_results) + writer = csv.DictWriter(f, fieldnames=keys) + writer.writeheader() + writer.writerows(all_results) + + print(f"\n\n[+] Scan Complete. Found {len(all_results)} tag instances across {accounts_processed} accounts.") - print("\n" + "="*40) - print(f"SCAN SUMMARY (v{__version__})") - print(f"Total Accounts: {summary['total_accounts_scanned']}") - print(f"Total Resources: {summary['total_target_resources_found']}") - print(f"Total Time: {summary['total_elapsed_seconds']}s") - print("="*40) +if __name__ == "__main__": + main()