Skip to content

Commit

Permalink
add summary output
Browse files Browse the repository at this point in the history
  • Loading branch information
badra001 committed Jan 15, 2026
1 parent cbeefe0 commit 5ef1fa7
Showing 1 changed file with 59 additions and 53 deletions.
112 changes: 59 additions & 53 deletions local-app/python-tools/cross-organization/tag-checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from botocore.exceptions import ClientError
from tqdm import tqdm

__version__ = "1.1.3"
__version__ = "1.1.4"

def get_args():
parser = argparse.ArgumentParser(description=f"AWS Org Tag Scanner v{__version__}")
Expand All @@ -23,97 +23,92 @@ def get_args():
parser.add_argument("--max-workers", type=int, default=8, help="Max concurrent account scans")
parser.add_argument("--account-regex", help="Regex to filter accounts by alias")
parser.add_argument("--accounts-from", help="File of Account IDs to process")
parser.add_argument("--output", default="tag_checker_findings", help="Prefix for output files")
parser.add_argument("--output", default="tag_checker", help="Prefix for output files")
parser.add_argument("--limit", type=int, default=0, help="Limit total accounts processed")
parser.add_argument("--verbose", action="store_true", help="Enable detailed logging")
return parser.parse_args()

def get_session(management_session, account_id, role_name, partition, region_name, verbose):
"""Creates a session in the member account with explicit region/partition context."""
sts = management_session.client('sts', region_name=region_name)
role_arn = f"arn:{partition}:iam::{account_id}:role/{role_name}"
try:
response = sts.assume_role(RoleArn=role_arn, RoleSessionName="TagDiscoveryScanner")
c = response['Credentials']
# Explicitly passing region_name here is critical for GovCloud/China partitions
return boto3.Session(
aws_access_key_id=c['AccessKeyId'],
aws_secret_access_key=c['SecretAccessKey'],
aws_session_token=c['SessionToken'],
region_name=region_name
)
except Exception as e:
if verbose:
tqdm.write(f"\n[!] Auth Error for {account_id} in {region_name}: {str(e)}")
if verbose: tqdm.write(f"[!] Auth Error for {account_id}: {str(e)}")
return None

def scan_account(account, management_session, role_name, partition, tag_keys, region_name, lane_id, account_regex, verbose):
def scan_account(account, management_session, role_name, partition, tag_keys, region_name, lane_id, account_regex, verbose, bar_width):
acc_id = account['Id']
m_session = get_session(management_session, acc_id, role_name, partition, region_name, verbose)

if not m_session:
return [], acc_id, "N/A", "Skipped: Auth/Session Failure"
return [], acc_id, "N/A", 0, 0, len(tag_keys), 0, "Auth Failure"

alias = "N/A"
# Precise Alias Retrieval
try:
# Explicitly setting region_name on the IAM client for partition consistency
iam_client = m_session.client('iam', region_name=region_name)
alias_resp = iam_client.list_account_aliases()
alias_list = alias_resp.get('AccountAliases', [])

if verbose:
tqdm.write(f"[DEBUG] {acc_id} | Raw Aliases: {alias_list} | Region: {region_name}")

alias = alias_list[0] if alias_list else "N/A"
except Exception as e:
alias = f"ERROR: {type(e).__name__}"
if verbose:
tqdm.write(f"[ERROR] {acc_id} | Alias Fetch Error: {str(e)}")
alias_resp = m_session.client('iam', region_name=region_name).list_account_aliases()
alias = alias_resp.get('AccountAliases', ["N/A"])[0]
except Exception:
alias = "N/A"

if account_regex and not re.search(account_regex, alias, re.IGNORECASE):
return [], acc_id, alias, f"Skipped: Regex mismatch ({alias})"
return [], acc_id, alias, 0, 0, len(tag_keys), 0, f"Regex Mismatch ({alias})"

findings = []
pbar = tqdm(total=len(tag_keys), desc=f"Lane {lane_id} | {acc_id} ({alias})",
position=lane_id, leave=False, bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt}')

# Multi-Region Discovery
try:
ec2 = m_session.client('ec2', region_name=region_name)
regions = [r['RegionName'] for r in ec2.describe_regions()['Regions']]
except Exception as e:
regions = [region_name]
active_regions = [r['RegionName'] for r in ec2.describe_regions()['Regions']]
except Exception:
active_regions = [region_name]

acc_start = time.time()
findings = []
unique_resources = set()
tags_found_keys = set()

# Aligned Progress Bar
label = f"{acc_id} ({alias})".ljust(bar_width)
pbar = tqdm(total=len(tag_keys), desc=f"Lane {lane_id} | {label}",
position=lane_id, leave=False, bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt}')

for key in tag_keys:
for r in regions:
for r in active_regions:
client = m_session.client('resourcegroupstaggingapi', region_name=r)
try:
paginator = client.get_paginator('get_resources')
for page in paginator.paginate(TagFilters=[{'Key': key}]):
for mapping in page.get('ResourceTagMappingList', []):
arn = mapping['ResourceARN']
val = next((t['Value'] for t in mapping['Tags'] if t['Key'] == key), "N/A")
findings.append({
"tag_name": key, "tag_value": val, "account_id": acc_id,
"account_alias": alias, "region": r, "arn": mapping['ResourceARN']
"account_alias": alias, "region": r, "arn": arn
})
unique_resources.add(arn)
tags_found_keys.add(key)
except ClientError as e:
if "Throttling" in str(e): time.sleep(1)
pbar.update(1)

pbar.close()
return findings, acc_id, alias, f"Completed: {len(findings)} hits"
elapsed = round(time.time() - acc_start, 2)
return findings, acc_id, alias, len(findings), len(unique_resources), (len(tag_keys) - len(tags_found_keys)), elapsed, "Success"

def main():
args = get_args()
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
start_overall = time.time()

# Init Management Session with the targeted region
session = boto3.Session(profile_name=args.profile, region_name=args.region)
org = session.client('organizations', region_name=args.region)

# Detect Partition (aws, aws-us-gov, aws-cn)
sts_client = session.client('sts', region_name=args.region)
partition = sts_client.get_caller_identity()['Arn'].split(':')[1]
partition = session.client('sts', region_name=args.region).get_caller_identity()['Arn'].split(':')[1]

with open(args.tags_file, mode='r', encoding='utf-8-sig') as f:
tag_keys = [row[0].strip() for row in list(csv.reader(f))[1:] if row]
Expand All @@ -123,48 +118,59 @@ def main():
with open(args.accounts_from, 'r') as f:
target_ids = [l.strip() for l in f if l.strip()]

print(f"\n{'='*70}\nAWS TAG CHECKER v{__version__}\n{'='*70}")
print(f"Profile: {args.profile} | Region: {args.region} | Partition: {partition}")
print(f"\n{'='*75}\nAWS TAG CHECKER v{__version__}\n{'='*75}")

all_accs = []
paginator = org.get_paginator('list_accounts')
for page in paginator.paginate():
for a in page['Accounts']:
if a['Status'] == 'ACTIVE':
if not target_ids or a['Id'] in target_ids:
all_accs.append(a)
if a['Status'] == 'ACTIVE' and (not target_ids or a['Id'] in target_ids):
all_accs.append(a)

if args.limit > 0: all_accs = all_accs[:args.limit]
print(f"Tags: {len(tag_keys)} | Targeted Accounts: {len(all_accs)}")
print(f"{'='*70}\n")

# Calculate bar width for alignment (12 chars for ID + max alias length + padding)
max_label_len = max([len(a['Name']) for a in all_accs]) + 15 if all_accs else 30

print(f"Workers: {args.max_workers} | Tags: {len(tag_keys)} | Targeted Accounts: {len(all_accs)}")
print(f"{'='*75}\n")

all_findings = []
summary_list = []
overall_pbar = tqdm(total=len(all_accs), desc="Total Org Progress", position=0)

with ThreadPoolExecutor(max_workers=args.max_workers) as executor:
futures = {executor.submit(scan_account, acc, session, args.role_name, partition,
tag_keys, args.region, (i % args.max_workers) + 1,
args.account_regex, args.verbose): acc for i, acc in enumerate(all_accs)}
args.account_regex, args.verbose, max_label_len): acc for i, acc in enumerate(all_accs)}

for future in as_completed(futures):
res, acc_id, alias, status = future.result()
if "Skipped" in status or "Skipping" in status:
overall_pbar.write(f"[-] {acc_id} ({alias}): {status}")
else:
res, acc_id, alias, hits, res_count, miss_count, elapsed, status = future.result()
if "Success" in status:
all_findings.extend(res)
summary_list.append({
"account_id": acc_id, "alias": alias, "hits": hits,
"unique_resources": res_count, "tags_not_found": miss_count, "elapsed": elapsed
})
else:
overall_pbar.write(f"[-] {acc_id}: {status}")
overall_pbar.update(1)

overall_pbar.close()
print("\n" * (args.max_workers + 1))

csv_out = f"{args.output}_{ts}.csv"
# Final Summary and Findings
sum_file = f"{args.output}_summary_{ts}.json"
fin_file = f"{args.output}_findings_{ts}.csv"

with open(sum_file, 'w') as f: json.dump(summary_list, f, indent=4)
if all_findings:
with open(csv_out, 'w', newline='') as f:
with open(fin_file, 'w', newline='') as f:
writer = csv.DictWriter(f, fieldnames=all_findings[0].keys())
writer.writeheader(); writer.writerows(all_findings)

print(f"[+] Scan Complete in {round(time.time()-start_overall, 2)}s")
print(f"[+] Findings saved to: {csv_out}")
print(f"[+] Summary: {sum_file} | Findings: {fin_file}")

if __name__ == "__main__":
main()

0 comments on commit 5ef1fa7

Please sign in to comment.