Skip to content

Commit

Permalink
unique resources only
Browse files Browse the repository at this point in the history
  • Loading branch information
badra001 committed Jan 29, 2026
1 parent 41c9ef8 commit dbd0fa6
Showing 1 changed file with 18 additions and 34 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,13 @@
from datetime import datetime
from difflib import SequenceMatcher

__version__ = "1.0.9"
__version__ = "1.1.10"

# Services that do NOT have a resource type in the 6th ARN field
# They use: arn:aws:service:region:account:resource-name
# Format: arn:aws:service:region:account:resource-name
SERVICES_WITHOUT_TYPES = [
"s3", "sns", "sqs", "codepipeline", "codebuild",
"cloudwatch", "events", "logs", "sns", "states"
"cloudwatch", "events", "logs", "states", "athena", "glue"
]

def get_args():
Expand Down Expand Up @@ -48,12 +48,10 @@ def parse_arn_details(arn):
service = parts[2]
resource_part = parts[5]

# If the service is known to skip the type field, just return the service
if service in SERVICES_WITHOUT_TYPES:
return service, ""

# Otherwise, find the type (break at first / or :)
# Example: 'ec2' 'instance/i-123' -> 'ec2' 'instance'
# Break at first / or : to get type (e.g., instance/i-123 -> instance)
res_type = re.split(r'[:/]', resource_part)[0]
return service, res_type
except:
Expand Down Expand Up @@ -96,7 +94,9 @@ def analyze():
tag_values = defaultdict(Counter)
account_map = defaultdict(set)
resource_tags = defaultdict(dict)
service_distribution = Counter()

# Track unique resources per service: { "service name": set(arns) }
service_resource_tracking = defaultdict(set)

files = []
for p in args.findings_file: files.extend(glob.glob(p))
Expand All @@ -114,55 +114,39 @@ def analyze():

svc, r_type = parse_arn_details(arn)
display_name = f"{svc} {r_type}".strip()
service_distribution[display_name] += 1
service_resource_tracking[display_name].add(arn)

max_tag_len = max([len(t) for t in findings_count.keys()] + [20])
col1 = max_tag_len + 2
div = "=" * (col1 + 65)

# --- SECTIONS 1-5 (Summary, Values, Similarity, Compliance, Legacy) ---
# --- SECTION 1: GLOBAL SUMMARY ---
print(f"\n{div}\nSECTION 1: GLOBAL SUMMARY\n{div}")
print(f"Input Tags: {stats['total']} | Active: {stats['active']} | Found: {len(findings_count)}")
print(f"Total Hits: {sum(findings_count.values())} | Accounts: {len(set().union(*account_map.values()))}")
print(f"Total Unique Resources Found: {len(resource_tags)}")
print(f"Total Resource Hits: {sum(findings_count.values())} | Accounts: {len(set().union(*account_map.values()))}")

# --- SECTION 2: VALUES ---
print(f"\n{div}\nSECTION 2: TAG VALUE DISTRIBUTION (TOP 5)\n{div}")
for tag in sorted(findings_count.keys()):
vals = ", ".join([f"{v}({c})" for v, c in tag_values[tag].most_common(5)])
print(f"{tag.ljust(col1)} | {vals}")

print(f"\n{div}\nSECTION 3: SUSPECTED DUPLICATES (TYPOS/CASE)\n{div}")
tags_list = sorted(findings_count.keys())
for i in range(len(tags_list)):
for j in range(i + 1, len(tags_list)):
if get_similarity(tags_list[i], tags_list[j]) >= args.similarity_threshold:
print(f"[!] {tags_list[i]} <-> {tags_list[j]}")

if required_keys:
print(f"\n{div}\nSECTION 4: COVERAGE & COMPLIANCE\n{div}")
non_compliant = sum(1 for tags in resource_tags.values() if not (required_keys <= set(tags.keys())))
print(f"Total Resources: {len(resource_tags)} | Non-Compliant: {non_compliant}")
print(f"Compliance Rate: {((len(resource_tags)-non_compliant)/len(resource_tags)*100):.2f}%")

if legacy_map:
print(f"\n{div}\nSECTION 5: LEGACY MIGRATION STATUS\n{div}")
pending = sum(1 for tags in resource_tags.values() for leg, targ in legacy_map.items() if leg in tags and targ not in tags)
print(f"Pending Migration Tasks: {pending}")

# --- SECTION 6: SERVICE DISTRIBUTION ---
print(f"\n{div}\nSECTION 6: SERVICE DISTRIBUTION\n{div}")
# --- SECTION 6: SERVICE DISTRIBUTION (UNIQUE RESOURCE COUNT) ---
print(f"\n{div}\nSECTION 6: SERVICE DISTRIBUTION (UNIQUE RESOURCES)\n{div}")

if service_map:
print(f"{'Service & Type':<30} | {'Friendly Name':<40} | {'Count'}")
print("-" * (30 + 40 + 15))
for key in sorted(service_distribution.keys()):
for key in sorted(service_resource_tracking.keys()):
prefix = key.split(' ')[0]
friendly = service_map.get(prefix, 'Unknown')
print(f"{key:<30} | {friendly:<40} | {service_distribution[key]}")
print(f"{key:<30} | {friendly:<40} | {len(service_resource_tracking[key])}")
else:
print(f"{'Service & Type':<30} | {'Count'}")
print("-" * (30 + 15))
for key in sorted(service_distribution.keys()):
print(f"{key:<30} | {service_distribution[key]}")
for key in sorted(service_resource_tracking.keys()):
print(f"{key:<30} | {len(service_resource_tracking[key])}")

if __name__ == "__main__":
analyze()

0 comments on commit dbd0fa6

Please sign in to comment.