diff --git a/local-app/python-tools/cross-organization/README.plugin.md b/local-app/python-tools/cross-organization/README.plugin.md new file mode 100644 index 00000000..2ed641f7 --- /dev/null +++ b/local-app/python-tools/cross-organization/README.plugin.md @@ -0,0 +1,55 @@ +# Organization Crawler Module Specification + +This specification defines the contract between the **Master Crawler** (the orchestration engine) and individual **Check Modules**. + +## **Overview** + +The Crawler is a multi-threaded engine that assumes a role into target accounts and dynamically imports "Check Modules" to gather data. Each module must be a standalone Python file. + +## **I. Function Signature Requirements** + +Each module MUST implement an `account_task` function with the following signature: +`def account_task(account_session, account_id, account_name, region):` + +* **`account_session`**: A `boto3.Session` object pre-authenticated into the target account. +* **`account_id`**: The 12-digit AWS Account ID string. +* **`account_name`**: The name of the account as defined in AWS Organizations. +* **`region`**: The default region for the session (though modules may iterate through other regions internally). + +## **II. Return Data Structure** + +The function must return a dictionary with the following top-level keys: + +1. **`alias`**: The IAM Account Alias (string). +2. **`data`**: A dictionary containing the actual findings. +* **Resource Keys**: Every entry in `data` should be keyed by a unique identifier (e.g., ARN or Region:ResourceName). +* **The `resource` Key**: Every inner dictionary MUST contain a `resource` key (usually the ARN). The Crawler uses this for CSV mapping. + + +3. **`account_summary`**: A nested dictionary inside `data` with a `_summary` key used for high-level reporting. +4. **`error`**: (Optional) If the task fails, include the exception string here. + +## **III. Data Standards** + +* **Date Formats**: All datetime objects MUST be converted to strings using `.isoformat()` to ensure JSON serializability. +* **Booleans**: Convert booleans to strings (`"True"`/`"False"`) if they are going to be displayed in CLI tables or CSVs. +* **Global vs. Regional**: +* **Regional Modules**: Should iterate through `ec2.describe_regions()` and gather data for every enabled region. +* **Global Modules**: (like IAM or Route53) should run exactly once per account to avoid duplicate data. + + + +## **IV. File Output Handling** + +Modules do not handle their own file I/O. + +1. The **Crawler** collects the returned dictionaries from all threads. +2. The **Crawler** saves the aggregated list as a JSON file named `audit_results...json`. +3. The **Crawler** automatically flattens the `data` block into a CSV file for any keys that exist in the first returned object. + +## **V. Development Checklist** + +* [ ] Includes `__version__ = "x.x.x"` at the top. +* [ ] Uses `get_paginator` for any AWS calls that may return more than 100 items. +* [ ] Implements a `try/except` block inside `account_task` to prevent one failing account from crashing the entire thread. +* [ ] Returns "NEVER" or "N/A" for null fields rather than omitting them. diff --git a/local-app/python-tools/cross-organization/analyze-tag-data.py b/local-app/python-tools/cross-organization/analyze-tag-data.py deleted file mode 100755 index 4e7d0aa3..00000000 --- a/local-app/python-tools/cross-organization/analyze-tag-data.py +++ /dev/null @@ -1,120 +0,0 @@ -#!/bin/env python -import csv -import argparse -import sys -import glob -import os -import resource -from collections import Counter, defaultdict - -__version__ = "1.0.1" - -def get_args(): - parser = argparse.ArgumentParser(description=f"AWS Tag Data Analyzer v{__version__}") - parser.add_argument("--tags-file", required=True, help="Original CSV with TagKey and Status") - parser.add_argument("--findings-file", nargs='+', required=True, help="One or more CSV findings files") - parser.add_argument("--output", help="Optional CSV file to save analysis results") - return parser.parse_args() - -def analyze(): - args = get_args() - start_time = datetime.now() - - # 1. Load Tag Metadata (Status) - tag_metadata = {} - try: - with open(args.tags_file, mode='r', encoding='utf-8-sig') as f: - reader = csv.DictReader(f) - for row in reader: - key = row.get('TagKey', '').strip() - status = row.get('Status', 'Unknown').strip() - if key: - tag_metadata[key] = status - except Exception as e: - print(f"[!] Error reading tags-file: {e}") - sys.exit(1) - - # 2. Process Findings - findings_count = Counter() - account_map = defaultdict(set) - all_seen_accounts = set() - total_hits = 0 - max_tag_len = 20 # Minimum starting width - - files_to_process = [] - for pattern in args.findings_file: - files_to_process.extend(glob.glob(pattern)) - - if not files_to_process: - print("[!] No findings files found.") - sys.exit(1) - - print(f"[*] Analyzing {len(files_to_process)} findings files...") - - for file in files_to_process: - try: - with open(file, mode='r', encoding='utf-8') as f: - reader = csv.DictReader(f) - for row in reader: - tag_name = row.get('tag_name', '').strip() - acc_id = row.get('account_id', '').strip() - acc_alias = row.get('account_alias', '').strip() - - if tag_name: - findings_count[tag_name] += 1 - max_tag_len = max(max_tag_len, len(tag_name)) - if acc_id: - account_map[tag_name].add(acc_id) - all_seen_accounts.add(f"{acc_id} ({acc_alias})") - total_hits += 1 - except Exception as e: - print(f"[!] Error reading {file}: {e}") - - # 3. Final Table Formatting - col1_width = max_tag_len + 2 - header = f"{'Tag Name'.ljust(col1_width)} | {'Status':<10} | {'Instances':<12} | {'Unique Accounts'}" - divider = "-" * (len(header) + 5) - - print(f"\n{divider}") - print(header) - print(divider) - - report_rows = [] - sorted_tags = sorted(findings_count.items(), key=lambda x: x[1], reverse=True) - - for tag, count in sorted_tags: - status = tag_metadata.get(tag, "Not in List") - unique_accs = len(account_map[tag]) - print(f"{tag.ljust(col1_width)} | {status:<10} | {count:<12} | {unique_accs}") - - report_rows.append({ - "TagKey": tag, - "Status": status, - "TotalInstances": count, - "UniqueAccounts": unique_accs - }) - - print(divider) - - # 4. Accounts with NO Hits - # Note: This logic assumes we want to see which accounts appeared in the CSVs but had no data. - # To see accounts that never even made it to the CSV, you would need to cross-ref with --list-accounts. - print(f"\n[SUMMARY STATS]") - print(f"Total Unique Tags Found : {len(findings_count)}") - print(f"Total Resource Tag Hits : {total_hits}") - print(f"Accounts with Hits : {len(all_seen_accounts)}") - - # Peak Memory - mem_mb = round(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024, 2) - print(f"Analysis Memory Usage : {mem_mb} MB") - - if args.output: - with open(args.output, 'w', newline='') as f: - writer = csv.DictWriter(f, fieldnames=["TagKey", "Status", "TotalInstances", "UniqueAccounts"]) - writer.writeheader() - writer.writerows(report_rows) - print(f"\n[+] Full analysis exported to: {args.output}") - -if __name__ == "__main__": - from datetime import datetime - analyze() diff --git a/local-app/python-tools/cross-organization/check-iam.py b/local-app/python-tools/cross-organization/check-iam.py new file mode 100644 index 00000000..12edb806 --- /dev/null +++ b/local-app/python-tools/cross-organization/check-iam.py @@ -0,0 +1,74 @@ +import boto3 +from datetime import datetime, timezone + +# --- VERSIONING --- +__version__ = "1.0.0" + +def get_days_ago(dt): + """Calculates days since a given datetime object.""" + if not dt: return "NEVER" + now = datetime.now(timezone.utc) + return (now - dt).days + +def account_task(account_session, account_id, account_name, region): + """ + IAM is a global service; this runs once per account. + Captures credential aging, MFA status, and key rotation metrics. + """ + results = {"alias": "N/A", "data": {}} + try: + iam = account_session.client('iam') + results["alias"] = iam.list_account_aliases().get('AccountAliases', ["N/A"])[0] + + users = [] + paginator = iam.get_paginator('list_users') + + for page in paginator.paginate(): + for user in page['Users']: + username = user['UserName'] + + # Basic Login Stats + last_login = user.get('PasswordLastUsed') + days_since_login = get_days_ago(last_login) + + # MFA Status + mfa = iam.list_mfa_devices(UserName=username).get('MFADevices', []) + has_mfa = len(mfa) > 0 + + # Access Key Stats + keys = iam.list_access_keys(UserName=username).get('AccessKeyMetadata', []) + key_details = [] + for k in keys: + k_id = k['AccessKeyId'] + last_used_resp = iam.get_access_key_last_used(AccessKeyId=k_id) + last_used_date = last_used_resp.get('AccessKeyLastUsed', {}).get('LastUsedDate') + + key_details.append({ + "access_key_id": k_id, + "status": k['Status'], + "created_date": k['CreateDate'].isoformat(), + "days_since_rotated": get_days_ago(k['CreateDate']), + "last_used_date": last_used_date.isoformat() if last_used_date else "NEVER", + "days_since_used": get_days_ago(last_used_date) + }) + + user_payload = { + "resource": user['Arn'], + "username": username, + "user_id": user['UserId'], + "create_date": user['CreateDate'].isoformat(), + "mfa_enabled": str(has_mfa), + "last_login_date": last_login.isoformat() if last_login else "NEVER", + "days_since_login": days_since_login, + "access_keys": key_details + } + + # Key into data by Username for the JSON output + results["data"][username] = user_payload + + results["data"]["account_summary"] = {"_summary": f"USERS:{len(results['data'])}"} + + except Exception as e: + results["error"] = str(e) + + return results diff --git a/local-app/python-tools/cross-organization/tag-checker.py b/local-app/python-tools/cross-organization/tag-checker.py deleted file mode 100755 index c905d1a0..00000000 --- a/local-app/python-tools/cross-organization/tag-checker.py +++ /dev/null @@ -1,205 +0,0 @@ -#!/bin/env python - -import boto3 -import csv -import json -import argparse -import sys -import time -import re -import os -import resource -import threading -from datetime import datetime, timezone, timedelta -from concurrent.futures import ThreadPoolExecutor, as_completed -from botocore.exceptions import ClientError -from botocore.credentials import RefreshableCredentials -from botocore.session import get_session as get_botocore_session -from tqdm import tqdm - -__version__ = "1.1.17" - -# Counter for global sequence tracking -ACCOUNT_COUNTER = 0 -COUNTER_LOCK = threading.Lock() - -def get_args(): - parser = argparse.ArgumentParser(description=f"AWS Org Tag Scanner v{__version__}") - parser.add_argument("--role-name", required=False, help="Role to assume in member accounts") - parser.add_argument("--region", required=True, help="Management account region (e.g., us-gov-east-1)") - parser.add_argument("--profile", required=True, help="AWS CLI profile for Management Account") - parser.add_argument("--tags-file", required=False, help="CSV file with TagKey, Type, Status, etc.") - parser.add_argument("--max-workers", type=int, default=8, help="Max concurrent account scans") - parser.add_argument("--account-regex", help="Regex to filter accounts by alias") - parser.add_argument("--region-regex", help="Regex to filter regions (e.g., '^us-')") - parser.add_argument("--accounts-from", help="File of Account IDs to process") - parser.add_argument("--output", default="tag_checker", help="Prefix for output files") - parser.add_argument("--limit", type=int, default=0, help="Limit total accounts processed") - parser.add_argument("--verbose", action="store_true", help="Enable detailed logging") - parser.add_argument("--list-accounts", action="store_true", help="List Account IDs and exit") - return parser.parse_args() - -def create_refreshable_session(profile_name, region_name): - bc_session = get_botocore_session() - def refresh_credentials(): - temp_session = boto3.Session(profile_name=profile_name, region_name=region_name) - creds = temp_session.get_credentials() - return { - "access_key": creds.access_key, - "secret_key": creds.secret_key, - "token": creds.token, - "expiry_time": creds._expiry_time.isoformat() if creds._expiry_time else (datetime.now(timezone.utc) + timedelta(hours=1)).isoformat() - } - session_creds = RefreshableCredentials.create_from_metadata( - metadata=refresh_credentials(), refresh_using=refresh_credentials, method="sts-assume-role" - ) - bc_session._credentials = session_creds - bc_session.set_config_variable("region", region_name) - return boto3.Session(botocore_session=bc_session) - -def get_member_session(management_session, account_id, role_name, partition, region_name, verbose): - sts = management_session.client('sts', region_name=region_name) - role_arn = f"arn:{partition}:iam::{account_id}:role/{role_name}" - try: - response = sts.assume_role(RoleArn=role_arn, RoleSessionName="TagDiscoveryScanner", DurationSeconds=3600) - c = response['Credentials'] - return boto3.Session(aws_access_key_id=c['AccessKeyId'], aws_secret_access_key=c['SecretAccessKey'], - aws_session_token=c['SessionToken'], region_name=region_name) - except Exception as e: - if verbose: tqdm.write(f"[!] Auth Error for {account_id}: {str(e)}") - return None - -def scan_account(account, management_session, role_name, partition, tag_keys, active_tag_keys, region_name, lane_id, account_regex, region_regex_str, verbose, bar_width): - global ACCOUNT_COUNTER - with COUNTER_LOCK: - ACCOUNT_COUNTER += 1 - current_index = ACCOUNT_COUNTER - - acc_id = account['Id'] - m_session = get_member_session(management_session, acc_id, role_name, partition, region_name, verbose) - if not m_session: return [], acc_id, "N/A", {}, "Auth Fail" - - try: - alias_resp = m_session.client('iam', region_name=region_name).list_account_aliases() - alias = alias_resp.get('AccountAliases', ["N/A"])[0] - except Exception: alias = "N/A" - - if account_regex and not re.search(account_regex, alias, re.IGNORECASE): - return [], acc_id, alias, {}, f"Regex Skip ({alias})" - - try: - ec2 = m_session.client('ec2', region_name=region_name) - all_regions = [r['RegionName'] for r in ec2.describe_regions()['Regions']] - active_regions = [r for r in all_regions if re.search(region_regex_str, r, re.IGNORECASE)] if region_regex_str else all_regions - except: active_regions = [region_name] - - acc_start, findings, global_resources, global_tags_found, regional_metrics = time.time(), [], set(), set(), [] - - # FORMAT: {lane_id} | {index} | {acc_id} {alias} - label = f"{lane_id:02d} | {current_index:03d} | {acc_id} {alias}".ljust(bar_width) - pbar = tqdm(total=len(tag_keys), desc=label, position=lane_id, leave=False, bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt}') - - for key in tag_keys: - for r in active_regions: - r_start, r_hits, r_res_found, r_tags_found = time.perf_counter(), 0, set(), set() - client = m_session.client('resourcegroupstaggingapi', region_name=r) - try: - paginator = client.get_paginator('get_resources') - for page in paginator.paginate(TagFilters=[{'Key': key}]): - for mapping in page.get('ResourceTagMappingList', []): - arn = mapping['ResourceARN'] - val = next((t['Value'] for t in mapping['Tags'] if t['Key'] == key), "N/A") - findings.append({"tag_name": key, "tag_value": val, "account_id": acc_id, "account_alias": alias, "region": r, "arn": arn}) - global_resources.add(arn); global_tags_found.add(key); r_res_found.add(arn); r_tags_found.add(key); r_hits += 1 - except ClientError as e: - if "Throttling" in str(e): time.sleep(1) - - r_elapsed = round(time.perf_counter() - r_start, 4) - r_entry = next((m for m in regional_metrics if m['region'] == r), None) - if not r_entry: - regional_metrics.append({"region": r, "hits": r_hits, "unique_resources": len(r_res_found), "tags_found_count": len(r_tags_found), "tags_found_list": sorted(list(r_tags_found)), "tags_found_list_active": sorted(list(r_tags_found.intersection(active_tag_keys))), "tags_not_found_count": len(tag_keys) - len(r_tags_found), "elapsed_sec": r_elapsed}) - else: - r_entry['hits'] += r_hits; current_tags = set(r_entry['tags_found_list']) | r_tags_found - r_entry['tags_found_list'] = sorted(list(current_tags)); r_entry['tags_found_list_active'] = sorted(list(current_tags.intersection(active_tag_keys))) - r_entry['tags_found_count'] = len(current_tags); r_entry['tags_not_found_count'] = len(tag_keys) - len(current_tags); r_entry['elapsed_sec'] = round(r_entry['elapsed_sec'] + r_elapsed, 4) - pbar.update(1) - - pbar.close() - metrics = {"global": {"hits": len(findings), "unique_resources": len(global_resources), "tags_found_count": len(global_tags_found), "tags_found_list": sorted(list(global_tags_found)), "tags_found_list_active": sorted(list(global_tags_found.intersection(active_tag_keys))), "tags_not_found_count": len(tag_keys) - len(global_tags_found), "elapsed_sec": round(time.time() - acc_start, 2)}, "regions": regional_metrics} - return findings, acc_id, alias, metrics, "Success" - -def main(): - args = get_args() - cmd_line = " ".join(sys.argv) - ts = datetime.now().strftime("%Y%m%d_%H%M%S") - start_iso, start_ts = datetime.now().isoformat(), time.time() - try: - session = create_refreshable_session(args.profile, args.region) - org = session.client('organizations', region_name=args.region) - partition = session.client('sts', region_name=args.region).get_caller_identity()['Arn'].split(':')[1] - - unique_accounts = {} - paginator = org.get_paginator('list_accounts') - for page in paginator.paginate(): - for a in page['Accounts']: - if a['Status'] == 'ACTIVE': unique_accounts[a['Id']] = a - - if args.list_accounts: - for aid in sorted(unique_accounts.keys()): print(aid) - sys.exit(0) - - tag_keys, active_tag_keys = [], set() - with open(args.tags_file, mode='r', encoding='utf-8-sig') as f: - reader = csv.DictReader(f, skipinitialspace=True) - for row in reader: - key = row.get('TagKey', '').strip() - if key: - tag_keys.append(key) - if row.get('Status', '').strip().lower() == 'active': active_tag_keys.add(key) - - target_ids = [] - if args.accounts_from: - with open(args.accounts_from, 'r') as f: target_ids = [l.strip() for l in f if l.strip()] - - to_process = [v for k, v in unique_accounts.items() if not target_ids or k in target_ids] - if args.limit > 0: to_process = to_process[:args.limit] - - # UI Width Calculation: "01 | 001 | 123456789012 MyAlias" - max_label_len = max([3 + 3 + 3 + 12 + 1 + len(a['Name']) for a in to_process]) + 2 if to_process else 50 - - print(f"\n{'='*85}\nAWS TAG CHECKER v{__version__}\n{'='*85}") - print(f"Profile: {args.profile} | Region: {args.region} | Role: {args.role_name}") - print(f"Tags Read: {len(tag_keys)} ({len(active_tag_keys)} active)") - print(f"Accounts Targeted: {len(to_process)} (Unique Total: {len(unique_accounts)})") - print(f"Thread Count: {args.max_workers}\n{'='*85}\n") - - all_findings, account_results = [], [] - overall_pbar = tqdm(total=len(to_process), desc="Overall Progress", position=0) - - with ThreadPoolExecutor(max_workers=args.max_workers) as executor: - try: - futures = {executor.submit(scan_account, acc, session, args.role_name, partition, tag_keys, active_tag_keys, args.region, (i % args.max_workers) + 1, args.account_regex, args.region_regex, args.verbose, max_label_len): acc for i, acc in enumerate(to_process)} - for future in as_completed(futures): - res, acc_id, alias, m, status = future.result() - if status == "Success": - all_findings.extend(res); account_results.append({"account_id": acc_id, "alias": alias, "global_metrics": m["global"], "regional_metrics": m["regions"]}) - overall_pbar.update(1) - except KeyboardInterrupt: executor.shutdown(wait=False, cancel_futures=True); sys.exit(130) - - overall_pbar.close(); print("\n" * (args.max_workers + 1)) - mem_mb = round(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024, 2) - total_unique_res = len(set(f['arn'] for f in all_findings)) - all_found_keys = set(f['tag_name'] for f in all_findings) - - output_summary = {"summary": {"version": __version__, "command_line": cmd_line, "aws_accounts_scanned": len(account_results), "tags_read_count": len(tag_keys), "execution_start": start_iso, "execution_end": datetime.now().isoformat(), "elapsed_sec_total": round(time.time() - start_ts, 2), "max_memory_mb": mem_mb, "total_hits": sum(a['global_metrics']['hits'] for a in account_results), "total_unique_resources": total_unique_res, "total_tags_found_count": len(all_found_keys)}, "accounts": account_results} - - sum_f, fin_f = f"{args.output}_summary_{ts}.json", f"{args.output}_findings_{ts}.csv" - with open(sum_f, 'w') as f: json.dump(output_summary, f, indent=4) - if all_findings: - with open(fin_f, 'w', newline='') as f: - writer = csv.DictWriter(f, fieldnames=all_findings[0].keys()); writer.writeheader(); writer.writerows(all_findings) - print(f"[+] Summary: {sum_f}\n[+] Findings: {fin_f}") - except KeyboardInterrupt: sys.exit(130) - -if __name__ == "__main__": - main()