diff --git a/local-app/python-tools/cross-organization/tag-checker.py b/local-app/python-tools/cross-organization/tag-checker.py index 4468df81..84cbf4eb 100755 --- a/local-app/python-tools/cross-organization/tag-checker.py +++ b/local-app/python-tools/cross-organization/tag-checker.py @@ -12,7 +12,7 @@ from botocore.exceptions import ClientError from tqdm import tqdm -__version__ = "1.1.3" +__version__ = "1.1.4" def get_args(): parser = argparse.ArgumentParser(description=f"AWS Org Tag Scanner v{__version__}") @@ -23,19 +23,17 @@ def get_args(): parser.add_argument("--max-workers", type=int, default=8, help="Max concurrent account scans") parser.add_argument("--account-regex", help="Regex to filter accounts by alias") parser.add_argument("--accounts-from", help="File of Account IDs to process") - parser.add_argument("--output", default="tag_checker_findings", help="Prefix for output files") + parser.add_argument("--output", default="tag_checker", help="Prefix for output files") parser.add_argument("--limit", type=int, default=0, help="Limit total accounts processed") parser.add_argument("--verbose", action="store_true", help="Enable detailed logging") return parser.parse_args() def get_session(management_session, account_id, role_name, partition, region_name, verbose): - """Creates a session in the member account with explicit region/partition context.""" sts = management_session.client('sts', region_name=region_name) role_arn = f"arn:{partition}:iam::{account_id}:role/{role_name}" try: response = sts.assume_role(RoleArn=role_arn, RoleSessionName="TagDiscoveryScanner") c = response['Credentials'] - # Explicitly passing region_name here is critical for GovCloud/China partitions return boto3.Session( aws_access_key_id=c['AccessKeyId'], aws_secret_access_key=c['SecretAccessKey'], @@ -43,77 +41,74 @@ def get_session(management_session, account_id, role_name, partition, region_nam region_name=region_name ) except Exception as e: - if verbose: - tqdm.write(f"\n[!] Auth Error for {account_id} in {region_name}: {str(e)}") + if verbose: tqdm.write(f"[!] Auth Error for {account_id}: {str(e)}") return None -def scan_account(account, management_session, role_name, partition, tag_keys, region_name, lane_id, account_regex, verbose): +def scan_account(account, management_session, role_name, partition, tag_keys, region_name, lane_id, account_regex, verbose, bar_width): acc_id = account['Id'] m_session = get_session(management_session, acc_id, role_name, partition, region_name, verbose) if not m_session: - return [], acc_id, "N/A", "Skipped: Auth/Session Failure" + return [], acc_id, "N/A", 0, 0, len(tag_keys), 0, "Auth Failure" - alias = "N/A" + # Precise Alias Retrieval try: - # Explicitly setting region_name on the IAM client for partition consistency - iam_client = m_session.client('iam', region_name=region_name) - alias_resp = iam_client.list_account_aliases() - alias_list = alias_resp.get('AccountAliases', []) - - if verbose: - tqdm.write(f"[DEBUG] {acc_id} | Raw Aliases: {alias_list} | Region: {region_name}") - - alias = alias_list[0] if alias_list else "N/A" - except Exception as e: - alias = f"ERROR: {type(e).__name__}" - if verbose: - tqdm.write(f"[ERROR] {acc_id} | Alias Fetch Error: {str(e)}") + alias_resp = m_session.client('iam', region_name=region_name).list_account_aliases() + alias = alias_resp.get('AccountAliases', ["N/A"])[0] + except Exception: + alias = "N/A" if account_regex and not re.search(account_regex, alias, re.IGNORECASE): - return [], acc_id, alias, f"Skipped: Regex mismatch ({alias})" + return [], acc_id, alias, 0, 0, len(tag_keys), 0, f"Regex Mismatch ({alias})" - findings = [] - pbar = tqdm(total=len(tag_keys), desc=f"Lane {lane_id} | {acc_id} ({alias})", - position=lane_id, leave=False, bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt}') - + # Multi-Region Discovery try: ec2 = m_session.client('ec2', region_name=region_name) - regions = [r['RegionName'] for r in ec2.describe_regions()['Regions']] - except Exception as e: - regions = [region_name] + active_regions = [r['RegionName'] for r in ec2.describe_regions()['Regions']] + except Exception: + active_regions = [region_name] + acc_start = time.time() + findings = [] + unique_resources = set() + tags_found_keys = set() + + # Aligned Progress Bar + label = f"{acc_id} ({alias})".ljust(bar_width) + pbar = tqdm(total=len(tag_keys), desc=f"Lane {lane_id} | {label}", + position=lane_id, leave=False, bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt}') + for key in tag_keys: - for r in regions: + for r in active_regions: client = m_session.client('resourcegroupstaggingapi', region_name=r) try: paginator = client.get_paginator('get_resources') for page in paginator.paginate(TagFilters=[{'Key': key}]): for mapping in page.get('ResourceTagMappingList', []): + arn = mapping['ResourceARN'] val = next((t['Value'] for t in mapping['Tags'] if t['Key'] == key), "N/A") findings.append({ "tag_name": key, "tag_value": val, "account_id": acc_id, - "account_alias": alias, "region": r, "arn": mapping['ResourceARN'] + "account_alias": alias, "region": r, "arn": arn }) + unique_resources.add(arn) + tags_found_keys.add(key) except ClientError as e: if "Throttling" in str(e): time.sleep(1) pbar.update(1) pbar.close() - return findings, acc_id, alias, f"Completed: {len(findings)} hits" + elapsed = round(time.time() - acc_start, 2) + return findings, acc_id, alias, len(findings), len(unique_resources), (len(tag_keys) - len(tags_found_keys)), elapsed, "Success" def main(): args = get_args() ts = datetime.now().strftime("%Y%m%d_%H%M%S") start_overall = time.time() - # Init Management Session with the targeted region session = boto3.Session(profile_name=args.profile, region_name=args.region) org = session.client('organizations', region_name=args.region) - - # Detect Partition (aws, aws-us-gov, aws-cn) - sts_client = session.client('sts', region_name=args.region) - partition = sts_client.get_caller_identity()['Arn'].split(':')[1] + partition = session.client('sts', region_name=args.region).get_caller_identity()['Arn'].split(':')[1] with open(args.tags_file, mode='r', encoding='utf-8-sig') as f: tag_keys = [row[0].strip() for row in list(csv.reader(f))[1:] if row] @@ -123,48 +118,59 @@ def main(): with open(args.accounts_from, 'r') as f: target_ids = [l.strip() for l in f if l.strip()] - print(f"\n{'='*70}\nAWS TAG CHECKER v{__version__}\n{'='*70}") - print(f"Profile: {args.profile} | Region: {args.region} | Partition: {partition}") + print(f"\n{'='*75}\nAWS TAG CHECKER v{__version__}\n{'='*75}") all_accs = [] paginator = org.get_paginator('list_accounts') for page in paginator.paginate(): for a in page['Accounts']: - if a['Status'] == 'ACTIVE': - if not target_ids or a['Id'] in target_ids: - all_accs.append(a) + if a['Status'] == 'ACTIVE' and (not target_ids or a['Id'] in target_ids): + all_accs.append(a) if args.limit > 0: all_accs = all_accs[:args.limit] - print(f"Tags: {len(tag_keys)} | Targeted Accounts: {len(all_accs)}") - print(f"{'='*70}\n") + + # Calculate bar width for alignment (12 chars for ID + max alias length + padding) + max_label_len = max([len(a['Name']) for a in all_accs]) + 15 if all_accs else 30 + + print(f"Workers: {args.max_workers} | Tags: {len(tag_keys)} | Targeted Accounts: {len(all_accs)}") + print(f"{'='*75}\n") all_findings = [] + summary_list = [] overall_pbar = tqdm(total=len(all_accs), desc="Total Org Progress", position=0) with ThreadPoolExecutor(max_workers=args.max_workers) as executor: futures = {executor.submit(scan_account, acc, session, args.role_name, partition, tag_keys, args.region, (i % args.max_workers) + 1, - args.account_regex, args.verbose): acc for i, acc in enumerate(all_accs)} + args.account_regex, args.verbose, max_label_len): acc for i, acc in enumerate(all_accs)} for future in as_completed(futures): - res, acc_id, alias, status = future.result() - if "Skipped" in status or "Skipping" in status: - overall_pbar.write(f"[-] {acc_id} ({alias}): {status}") - else: + res, acc_id, alias, hits, res_count, miss_count, elapsed, status = future.result() + if "Success" in status: all_findings.extend(res) + summary_list.append({ + "account_id": acc_id, "alias": alias, "hits": hits, + "unique_resources": res_count, "tags_not_found": miss_count, "elapsed": elapsed + }) + else: + overall_pbar.write(f"[-] {acc_id}: {status}") overall_pbar.update(1) overall_pbar.close() print("\n" * (args.max_workers + 1)) - csv_out = f"{args.output}_{ts}.csv" + # Final Summary and Findings + sum_file = f"{args.output}_summary_{ts}.json" + fin_file = f"{args.output}_findings_{ts}.csv" + + with open(sum_file, 'w') as f: json.dump(summary_list, f, indent=4) if all_findings: - with open(csv_out, 'w', newline='') as f: + with open(fin_file, 'w', newline='') as f: writer = csv.DictWriter(f, fieldnames=all_findings[0].keys()) writer.writeheader(); writer.writerows(all_findings) print(f"[+] Scan Complete in {round(time.time()-start_overall, 2)}s") - print(f"[+] Findings saved to: {csv_out}") + print(f"[+] Summary: {sum_file} | Findings: {fin_file}") if __name__ == "__main__": main()