Skip to content

Commit

Permalink
memory stats
Browse files Browse the repository at this point in the history
  • Loading branch information
badra001 committed Jan 15, 2026
1 parent 4810bec commit d28776e
Showing 1 changed file with 118 additions and 106 deletions.
224 changes: 118 additions & 106 deletions local-app/python-tools/cross-organization/tag-checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,20 @@
import time
import re
import os
import resource
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor, as_completed
from botocore.exceptions import ClientError
from tqdm import tqdm

__version__ = "1.1.10"
__version__ = "1.1.12"

def get_args():
parser = argparse.ArgumentParser(description=f"AWS Org Tag Scanner v{__version__}")
parser.add_argument("--role-name", required=True, help="Role to assume in member accounts")
parser.add_argument("--region", required=True, help="Management account region (e.g., us-gov-east-1)")
parser.add_argument("--profile", required=True, help="AWS CLI profile for Management Account")
parser.add_argument("--tags-file", required=True, help="CSV file with Tag Key in the first column")
parser.add_argument("--tags-file", required=True, help="CSV file with Tag Key, Status, Type, etc.")
parser.add_argument("--max-workers", type=int, default=8, help="Max concurrent account scans")
parser.add_argument("--account-regex", help="Regex to filter accounts by alias")
parser.add_argument("--accounts-from", help="File of Account IDs to process")
Expand All @@ -45,7 +46,7 @@ def get_session(management_session, account_id, role_name, partition, region_nam
if verbose: tqdm.write(f"[!] Auth Error for {account_id}: {str(e)}")
return None

def scan_account(account, management_session, role_name, partition, tag_keys, region_name, lane_id, account_regex, verbose, bar_width):
def scan_account(account, management_session, role_name, partition, tag_keys, active_tag_keys, region_name, lane_id, account_regex, verbose, bar_width):
acc_id = account['Id']
m_session = get_session(management_session, acc_id, role_name, partition, region_name, verbose)

Expand Down Expand Up @@ -73,17 +74,14 @@ def scan_account(account, management_session, role_name, partition, tag_keys, re
global_tags_found = set()
regional_metrics = []

# UI Alignment: Zero-padded lane + fixed width label
label = f"{acc_id} {alias}".ljust(bar_width)
pbar = tqdm(total=len(tag_keys), desc=f"Lane {lane_id:02d} | {label}",
position=lane_id, leave=False, bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt}')

for key in tag_keys:
for r in active_regions:
r_start = time.perf_counter()
r_hits = 0
r_res_found = set()
r_tags_found = set()
r_hits, r_res_found, r_tags_found = 0, set(), set()

client = m_session.client('resourcegroupstaggingapi', region_name=r)
try:
Expand All @@ -106,16 +104,24 @@ def scan_account(account, management_session, role_name, partition, tag_keys, re

r_elapsed = round(time.perf_counter() - r_start, 4)
r_entry = next((m for m in regional_metrics if m['region'] == r), None)

# Intersection with active tags
r_active_found = sorted(list(r_tags_found.intersection(active_tag_keys)))

if not r_entry:
regional_metrics.append({
"region": r, "hits": r_hits, "unique_resources": len(r_res_found),
"tags_found_count": len(r_tags_found), "tags_found_list": sorted(list(r_tags_found)),
"tags_not_found_count": len(tag_keys) - len(r_tags_found), "elapsed_sec": r_elapsed
"tags_found_count": len(r_tags_found),
"tags_found_list": sorted(list(r_tags_found)),
"tags_found_list_active": r_active_found,
"tags_not_found_count": len(tag_keys) - len(r_tags_found),
"elapsed_sec": r_elapsed
})
else:
r_entry['hits'] += r_hits
current_tags = set(r_entry['tags_found_list']) | r_tags_found
r_entry['tags_found_list'] = sorted(list(current_tags))
r_entry['tags_found_list_active'] = sorted(list(current_tags.intersection(active_tag_keys)))
r_entry['tags_found_count'] = len(current_tags)
r_entry['tags_not_found_count'] = len(tag_keys) - len(current_tags)
r_entry['elapsed_sec'] = round(r_entry['elapsed_sec'] + r_elapsed, 4)
Expand All @@ -130,6 +136,7 @@ def scan_account(account, management_session, role_name, partition, tag_keys, re
"unique_resources": len(global_resources),
"tags_found_count": len(global_tags_found),
"tags_found_list": sorted(list(global_tags_found)),
"tags_found_list_active": sorted(list(global_tags_found.intersection(active_tag_keys))),
"tags_not_found_count": len(tag_keys) - len(global_tags_found),
"elapsed_sec": round(time.time() - acc_start, 2)
},
Expand All @@ -144,107 +151,112 @@ def main():
start_iso = datetime.now().isoformat()
start_ts = time.time()

session = boto3.Session(profile_name=args.profile, region_name=args.region)
org = session.client('organizations', region_name=args.region)
partition = session.client('sts', region_name=args.region).get_caller_identity()['Arn'].split(':')[1]

with open(args.tags_file, mode='r', encoding='utf-8-sig') as f:
tag_keys = [row[0].strip() for row in list(csv.reader(f))[1:] if row]

target_ids = []
if args.accounts_from:
with open(args.accounts_from, 'r') as f:
target_ids = [l.strip() for l in f if l.strip()]

# Initial Discovery for Header info
all_raw_accounts = []
paginator = org.get_paginator('list_accounts')
for page in paginator.paginate():
all_raw_accounts.extend(page['Accounts'])

to_process = []
for a in all_raw_accounts:
if a['Status'] == 'ACTIVE':
if not target_ids or a['Id'] in target_ids:
to_process.append(a)

if args.limit > 0: to_process = to_process[:args.limit]

# Calculate bar width
max_label_len = max([12 + 1 + len(a['Name']) for a in to_process]) + 1 if to_process else 40

# STARTUP HEADER
print(f"\n{'='*85}\nAWS TAG CHECKER v{__version__}\n{'='*85}")
print(f"Execution Profile : {args.profile}")
print(f"Execution Region : {args.region}")
print(f"Assume Role Name : {args.role_name}")
print(f"Partition : {partition}")
print(f"Thread Count : {args.max_workers}")
print(f"Tags Read : {len(tag_keys)}")
print(f"Accounts Found : {len(all_raw_accounts)}")
print(f"Accounts Targeted : {len(to_process)}")
print(f"{'-'*85}")
print(f"Arguments: {vars(args)}")
print(f"{'='*85}\n")

all_findings = []
account_results = []
overall_pbar = tqdm(total=len(to_process), desc="Total Org Progress", position=0)

with ThreadPoolExecutor(max_workers=args.max_workers) as executor:
futures = {executor.submit(scan_account, acc, session, args.role_name, partition,
tag_keys, args.region, (i % args.max_workers) + 1,
args.account_regex, args.verbose, max_label_len): acc for i, acc in enumerate(to_process)}
try:
session = boto3.Session(profile_name=args.profile, region_name=args.region)
org = session.client('organizations', region_name=args.region)
partition = session.client('sts', region_name=args.region).get_caller_identity()['Arn'].split(':')[1]

for future in as_completed(futures):
res, acc_id, alias, m, status = future.result()
if status == "Success":
all_findings.extend(res)
account_results.append({
"account_id": acc_id, "alias": alias,
"global_metrics": m["global"],
"regional_metrics": m["regions"]
})
else:
overall_pbar.write(f"[-] {acc_id}: {status}")
overall_pbar.update(1)
# Parse Tag CSV for Status
tag_keys = []
active_tag_keys = set()
with open(args.tags_file, mode='r', encoding='utf-8-sig') as f:
reader = csv.DictReader(f) # TagKey, Type, Status, ...
for row in reader:
key = row['Tag Key'].strip()
tag_keys.append(key)
if row.get('Status', '').strip().lower() == 'active':
active_tag_keys.add(key)

target_ids = []
if args.accounts_from:
with open(args.accounts_from, 'r') as f:
target_ids = [l.strip() for l in f if l.strip()]

overall_pbar.close()
print("\n" * (args.max_workers + 1))
# Unique Account Discovery
unique_accounts = {}
paginator = org.get_paginator('list_accounts')
for page in paginator.paginate():
for a in page['Accounts']:
if a['Status'] == 'ACTIVE':
unique_accounts[a['Id']] = a

# Summary Aggregation
total_hits = sum(a['global_metrics']['hits'] for a in account_results)
total_res = len(set(f['arn'] for f in all_findings))
all_found_keys = set(f['tag_name'] for f in all_findings)

output_summary = {
"summary": {
"version": __version__,
"command_line": cmd_line,
"aws_accounts_scanned": len(account_results),
"execution_start": start_iso,
"execution_end": datetime.now().isoformat(),
"elapsed_sec_total": round(time.time() - start_ts, 2),
"threads": args.max_workers,
"total_hits": total_hits,
"total_unique_resources": total_res,
"total_tags_found_count": len(all_found_keys),
"total_tags_not_found_count": len(tag_keys) - len(all_found_keys)
},
"accounts": account_results
}
to_process = []
for aid, acc in unique_accounts.items():
if not target_ids or aid in target_ids:
to_process.append(acc)

sum_file = f"{args.output}_summary_{ts}.json"
fin_file = f"{args.output}_findings_{ts}.csv"

with open(sum_file, 'w') as f: json.dump(output_summary, f, indent=4)
if all_findings:
with open(fin_file, 'w', newline='') as f:
writer = csv.DictWriter(f, fieldnames=all_findings[0].keys())
writer.writeheader(); writer.writerows(all_findings)

print(f"[+] Summary: {sum_file}")
print(f"[+] Findings: {fin_file}")
if args.limit > 0: to_process = to_process[:args.limit]
max_label_len = max([12 + 1 + len(a['Name']) for a in to_process]) + 1 if to_process else 40

print(f"\n{'='*85}\nAWS TAG CHECKER v{__version__}\n{'='*85}")
print(f"Profile: {args.profile} | Region: {args.region} | Role: {args.role_name}")
print(f"Tags Read: {len(tag_keys)} ({len(active_tag_keys)} active)")
print(f"Accounts Found (Unique): {len(unique_accounts)}")
print(f"Accounts Targeted: {len(to_process)}")
print(f"{'='*85}\n")

all_findings, account_results = [], []
overall_pbar = tqdm(total=len(to_process), desc="Total Org Progress", position=0)

with ThreadPoolExecutor(max_workers=args.max_workers) as executor:
try:
futures = {executor.submit(scan_account, acc, session, args.role_name, partition,
tag_keys, active_tag_keys, args.region, (i % args.max_workers) + 1,
args.account_regex, args.verbose, max_label_len): acc for i, acc in enumerate(to_process)}

for future in as_completed(futures):
res, acc_id, alias, m, status = future.result()
if status == "Success":
all_findings.extend(res)
account_results.append({"account_id": acc_id, "alias": alias, "global_metrics": m["global"], "regional_metrics": m["regions"]})
else:
overall_pbar.write(f"[-] {acc_id}: {status}")
overall_pbar.update(1)
except KeyboardInterrupt:
executor.shutdown(wait=False, cancel_futures=True)
sys.exit(130)

overall_pbar.close()
print("\n" * (args.max_workers + 1))

# Summary Generation
total_hits = sum(a['global_metrics']['hits'] for a in account_results)
total_res = len(set(f['arn'] for f in all_findings))
all_found_keys = set(f['tag_name'] for f in all_findings)
max_mem = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
# Convert to MB (resource reports in KB on Linux, bytes on macOS - adjusting for Linux standard)
mem_mb = round(max_mem / 1024, 2)

output_summary = {
"summary": {
"version": __version__,
"command_line": cmd_line,
"aws_accounts_scanned": len(account_results),
"tags_read_count": len(tag_keys),
"execution_start": start_iso,
"execution_end": datetime.now().isoformat(),
"elapsed_sec_total": round(time.time() - start_ts, 2),
"max_memory_mb": mem_mb,
"total_hits": total_hits,
"total_unique_resources": total_res,
"total_tags_found_count": len(all_found_keys)
},
"accounts": account_results
}

sum_file = f"{args.output}_summary_{ts}.json"
fin_file = f"{args.output}_findings_{ts}.csv"

with open(sum_file, 'w') as f: json.dump(output_summary, f, indent=4)
if all_findings:
with open(fin_file, 'w', newline='') as f:
writer = csv.DictWriter(f, fieldnames=all_findings[0].keys())
writer.writeheader(); writer.writerows(all_findings)

print(f"[+] Summary: {sum_file}\n[+] Findings: {fin_file}")

except KeyboardInterrupt:
sys.exit(130)

if __name__ == "__main__":
main()

0 comments on commit d28776e

Please sign in to comment.