diff --git a/local-app/python-tools/cross-organization/tag-checker/analyze-tag-data.py b/local-app/python-tools/cross-organization/tag-checker/analyze-tag-data.py index caa7081f..b257e250 100755 --- a/local-app/python-tools/cross-organization/tag-checker/analyze-tag-data.py +++ b/local-app/python-tools/cross-organization/tag-checker/analyze-tag-data.py @@ -12,10 +12,9 @@ from datetime import datetime from difflib import SequenceMatcher -__version__ = "1.1.10" +__version__ = "1.1.11" -# Services that do NOT have a resource type in the 6th ARN field -# Format: arn:aws:service:region:account:resource-name +# Services that do NOT have a resource type in the 6th ARN field (Flat ARNs) SERVICES_WITHOUT_TYPES = [ "s3", "sns", "sqs", "codepipeline", "codebuild", "cloudwatch", "events", "logs", "states", "athena", "glue" @@ -44,14 +43,11 @@ def parse_arn_details(arn): try: parts = arn.split(':') if len(parts) < 6: return "unknown", "" - service = parts[2] resource_part = parts[5] - if service in SERVICES_WITHOUT_TYPES: return service, "" - - # Break at first / or : to get type (e.g., instance/i-123 -> instance) + # Split at first / or : to isolate the type res_type = re.split(r'[:/]', resource_part)[0] return service, res_type except: @@ -67,16 +63,14 @@ def analyze(): # --- LOAD METADATA --- input_tags = {} - stats = {"total": 0, "active": 0, "inactive": 0} + stats = {"total": 0, "active": 0} with open(args.tags_file, mode='r', encoding='utf-8-sig') as f: for row in csv.DictReader(f): k = row.get('TagKey', '').strip() if not k: continue - s = row.get('Status', 'Unknown').strip().lower() - input_tags[k] = s + input_tags[k] = row.get('Status', 'Unknown').strip().lower() stats["total"] += 1 - if s == 'active': stats["active"] += 1 - elif s == 'inactive': stats["inactive"] += 1 + if input_tags[k] == 'active': stats["active"] += 1 required_keys = set() if args.required_tags_file: @@ -94,8 +88,6 @@ def analyze(): tag_values = defaultdict(Counter) account_map = defaultdict(set) resource_tags = defaultdict(dict) - - # Track unique resources per service: { "service name": set(arns) } service_resource_tracking = defaultdict(set) files = [] @@ -105,20 +97,21 @@ def analyze(): with open(file, mode='r', encoding='utf-8') as f: for row in csv.DictReader(f): arn, tag, val = row['arn'], row['tag_name'], row['tag_value'] - acc = row['account_id'] - findings_count[tag] += 1 tag_values[tag][val] += 1 - account_map[tag].add(acc) + account_map[tag].add(row['account_id']) resource_tags[arn][tag] = val - svc, r_type = parse_arn_details(arn) display_name = f"{svc} {r_type}".strip() service_resource_tracking[display_name].add(arn) - max_tag_len = max([len(t) for t in findings_count.keys()] + [20]) - col1 = max_tag_len + 2 - div = "=" * (col1 + 65) + # Dynamic column widths + max_tag_len = max([len(t) for t in findings_count.keys()] + [10], default=10) + max_svc_len = max([len(s) for s in service_resource_tracking.keys()] + [15], default=15) + + col1_tag = max_tag_len + 2 + col1_svc = max_svc_len + 2 + div = "=" * (max(col1_tag, col1_svc) + 65) # --- SECTION 1: GLOBAL SUMMARY --- print(f"\n{div}\nSECTION 1: GLOBAL SUMMARY\n{div}") @@ -130,23 +123,25 @@ def analyze(): print(f"\n{div}\nSECTION 2: TAG VALUE DISTRIBUTION (TOP 5)\n{div}") for tag in sorted(findings_count.keys()): vals = ", ".join([f"{v}({c})" for v, c in tag_values[tag].most_common(5)]) - print(f"{tag.ljust(col1)} | {vals}") + print(f"{tag.ljust(col1_tag)} | {vals}") - # --- SECTION 6: SERVICE DISTRIBUTION (UNIQUE RESOURCE COUNT) --- + # --- SECTION 6: SERVICE DISTRIBUTION --- print(f"\n{div}\nSECTION 6: SERVICE DISTRIBUTION (UNIQUE RESOURCES)\n{div}") if service_map: - print(f"{'Service & Type':<30} | {'Friendly Name':<40} | {'Count'}") - print("-" * (30 + 40 + 15)) + header = f"{'Service & Type'.ljust(col1_svc)} | {'Friendly Name':<40} | {'Count'}" + print(header) + print("-" * len(header)) for key in sorted(service_resource_tracking.keys()): prefix = key.split(' ')[0] friendly = service_map.get(prefix, 'Unknown') - print(f"{key:<30} | {friendly:<40} | {len(service_resource_tracking[key])}") + print(f"{key.ljust(col1_svc)} | {friendly:<40} | {len(service_resource_tracking[key])}") else: - print(f"{'Service & Type':<30} | {'Count'}") - print("-" * (30 + 15)) + header = f"{'Service & Type'.ljust(col1_svc)} | {'Count'}" + print(header) + print("-" * len(header)) for key in sorted(service_resource_tracking.keys()): - print(f"{key:<30} | {len(service_resource_tracking[key])}") + print(f"{key.ljust(col1_svc)} | {len(service_resource_tracking[key])}") if __name__ == "__main__": analyze()