From 3ed80e81b28a952e732a45586ace09c05b950765 Mon Sep 17 00:00:00 2001 From: badra001 Date: Fri, 27 Mar 2026 11:01:42 -0400 Subject: [PATCH] generate CSV from resources --- .../assess_check_scheduling.py | 261 ++++++++---------- 1 file changed, 109 insertions(+), 152 deletions(-) diff --git a/local-app/python-tools/cross-organization/assess_check_scheduling.py b/local-app/python-tools/cross-organization/assess_check_scheduling.py index 85fdcb78..bbb1d82d 100755 --- a/local-app/python-tools/cross-organization/assess_check_scheduling.py +++ b/local-app/python-tools/cross-organization/assess_check_scheduling.py @@ -1,161 +1,118 @@ #!/usr/bin/env python -import json, argparse, sys, os, glob, csv -from collections import Counter, defaultdict +import json +import os +import glob +import csv from datetime import datetime +from collections import defaultdict # --- VERSIONING --- -__version__ = "1.6.2" - -def find_latest_file(pattern): - """Locates the most recent check_scheduling JSON file.""" - files = glob.glob(pattern) - return max(files, key=os.path.getctime) if files else None - -def main(): - parser = argparse.ArgumentParser(description="PowerSchedule Assessor - v1.6.2") - parser.add_argument("--input", help="JSON audit file") - parser.add_argument("--csv", action="store_true", help="Export matrices to CSV") - args = parser.parse_args() - - input_file = args.input or find_latest_file("audit_results.check_scheduling.*.json") - if not input_file: - print("Error: No scheduling audit file found."); sys.exit(1) - - with open(input_file, 'r') as f: - data = json.load(f) - - # Core Tracking - env_matrix = defaultdict(Counter) - env_totals = Counter() - type_totals = Counter() - r3_data = defaultdict(lambda: defaultdict(lambda: defaultdict(int))) - category_env_totals = defaultdict(Counter) +__version__ = "1.7.1" + +def get_tag(tags, key, default=""): + """Case-insensitive tag lookup.""" + for k, v in tags.items(): + if k.lower() == key.lower(): + return v.strip() if v else default + return default + +def is_schedule_enabled(power_schedule): + """ + Logic: True if schedule is NOT 'Always_on' or 'No Schedule'. + """ + val = str(power_schedule).lower().strip() + return val not in ["always_on", "no schedule", ""] + +def generate_csv(data, fields, filename): + """Standard CSV writer for resource categories.""" + try: + with open(filename, 'w', newline='') as f: + writer = csv.DictWriter(f, fieldnames=fields) + writer.writeheader() + writer.writerows(data) + return True + except Exception as e: + print(f" Error writing {filename}: {e}") + return False + +def generate_master_resource_csv(all_resources, ts): + """ + Generates the comprehensive resource/FinOps inventory. + """ + filename = f"scheduling_summary.resources.{ts}.csv" + target_tags = ["PowerSchedule", "Environment", "Name", "finops_project_number", "finops_project_name"] + fields = ["account_id", "region", "type", "arn"] + target_tags - asg_names = set() - eks_clusters = set() - ec2_groups = Counter({"plain": 0, "asg": 0, "eks": 0}) - total_resources = 0 - all_envs = set() - created_files = [] - - for account in data: - acc_id = account.get('account_id') - for key, val in account.get("data", {}).items(): - if key == "account_summary" or ":" not in key: continue - - total_resources += 1 - tags = val.get("tags", {}) - res_type = val.get("type", "unknown") - - if res_type == "eks_node": - cat, ec2_groups["eks"] = "eks_ec2", ec2_groups["eks"] + 1 - elif res_type == "asg_member": - cat, ec2_groups["asg"] = "asg_ec2", ec2_groups["asg"] + 1 - elif res_type == "rds": - cat = "rds" - else: - cat, ec2_groups["plain"] = "plain_ec2", ec2_groups["plain"] + 1 - - if val.get("eks_cluster") and val.get("eks_cluster") != "N/A": - eks_clusters.add(f"{acc_id}:{val.get('eks_cluster')}") - if val.get("asg_name") and val.get("asg_name") != "N/A": - asg_names.add(f"{acc_id}:{val.get('asg_name')}") - - env = tags.get('Environment') or tags.get('environment') or "Undefined" - all_envs.add(env) - schedule = tags.get('PowerSchedule', "No Schedule") - sched_lower = schedule.lower().strip() - - is_enabled = sched_lower not in ["always_on", "no schedule"] - enabled_key = "Scheduled: True" if is_enabled else "Scheduled: False" - - env_matrix[env][schedule] += 1 - env_totals[env] += 1 - type_totals[cat] += 1 - r3_data[cat][schedule][env] += 1 - r3_data[cat][enabled_key][env] += 1 - category_env_totals[cat][env] += 1 - - sorted_envs = sorted(list(all_envs)) - report_width = 130 + rows = [] + for res in all_resources: + row = { + "account_id": res['account_id'], + "region": res['region'], + "type": res['type'], + "arn": res['arn'] + } + for tag in target_tags: + row[tag] = get_tag(res.get('tags', {}), tag) + rows.append(row) - # EKS Metrics - eks_cluster_count = len(eks_clusters) - eks_node_count = type_totals['eks_ec2'] - eks_avg = (eks_node_count / eks_cluster_count) if eks_cluster_count > 0 else 0 + generate_csv(rows, fields, filename) + return filename - # --- REPORT 1: BREAKDOWN BY ENVIRONMENT --- - print(f"\nREPORT 1: BREAKDOWN BY ENVIRONMENT") - print("-" * report_width) - for env in sorted_envs: - print(f"\nEnvironment: {env} (Total: {env_totals[env]})") - for sched, count in sorted(env_matrix[env].items()): - pct = (count / env_totals[env]) * 100 - print(f" {sched:<30} | {count:<5} | {pct:>5.1f}%") - - # --- REPORT 2: BREAKDOWN BY RESOURCE TYPE --- - print(f"\n\nREPORT 2: BREAKDOWN BY RESOURCE TYPE") - print("-" * report_width) - ec2_total = ec2_groups["plain"] + ec2_groups["asg"] + ec2_groups["eks"] - print(f"Resource Group: EC2 (Total: {ec2_total})") - print(f" -> Plain: {ec2_groups['plain']} | ASG: {ec2_groups['asg']} | EKS: {ec2_groups['eks']}") - print(f"Resource Group: RDS (Total: {type_totals['rds']})") - - # --- REPORT 3: SCHEDULING MATRIX BY CATEGORY --- - print(f"\n\nREPORT 3: SCHEDULING MATRIX BY CATEGORY") - for cat in ["plain_ec2", "asg_ec2", "eks_ec2", "rds"]: - cat_total = type_totals[cat] - if cat_total == 0: continue +def main(): + ts = datetime.now().strftime("%Y%m%dT%H%M%S") + all_res = [] + # Search for all JSON audit results + for file_path in glob.glob("audit_results/*.json"): + with open(file_path, 'r') as f: + try: + data = json.load(f) + # Assuming audit structure: data['account_id'], data['resources'][] + all_res.extend(data.get('resources', [])) + except: continue + + if not all_res: + print("No audit data found in audit_results/"); return + + # --- REPORT 1: FINOPS HEALTH --- + missing_num = sum(1 for r in all_res if not get_tag(r.get('tags', {}), 'finops_project_number')) + missing_name = sum(1 for r in all_res if not get_tag(r.get('tags', {}), 'finops_project_name')) + + print("\n" + "=" * 80) + print(f"FINOPS TAGGING HEALTH (Total Resources: {len(all_res)})") + print("-" * 80) + print(f" Missing 'finops_project_number': {missing_num:>5}") + print(f" Missing 'finops_project_name': {missing_name:>5}") + print(f" Current Tagging Compliance: {((len(all_res)-missing_num)/len(all_res))*100:.1f}%") + + # --- REPORT 2: COMPLIANCE MATRIX --- + matrix = defaultdict(lambda: {"total": 0, "scheduled": 0}) + for res in all_res: + env = get_tag(res.get('tags', {}), 'Environment', 'Unknown') + sched = get_tag(res.get('tags', {}), 'PowerSchedule') - print(f"\n{cat.upper()} SCHEDULING DETAIL") - header = f"{'PowerSchedule Tag':<25} | {'Org Total (%%)':<16}" - for env in sorted_envs: header += f" | {env[:10]:<10}" - print("-" * len(header)) - print(header) - print("-" * len(header)) - - all_tags = sorted([t for t in r3_data[cat].keys() if not t.startswith("Scheduled:")]) - all_tags += ["Scheduled: True", "Scheduled: False"] - - for tag in all_tags: - row_total = sum(r3_data[cat][tag].values()) - row_pct = (row_total / cat_total) * 100 - line = f"{tag[:25]:<25} | {row_total:<5} ({row_pct:>3.0f}%)" - for env in sorted_envs: - count = r3_data[cat][tag][env] - line += f" | {count:<10}" - print(line) - - # --- CSV EXPORT --- - if args.csv: - ds = datetime.now().strftime("%Y%m%dT%H%M%S") - for cat in ["plain_ec2", "asg_ec2", "eks_ec2", "rds"]: - if type_totals[cat] == 0: continue - fname = f"scheduling_summary.{cat}.{ds}.csv" - with open(fname, 'w', newline='') as f: - writer = csv.writer(f) - writer.writerow(["PowerSchedule Tag", "Org Total Count", "Org Total %"] + sorted_envs) - all_tags = sorted([t for t in r3_data[cat].keys() if not t.startswith("Scheduled:")]) - all_tags += ["Scheduled: True", "Scheduled: False"] - for tag in all_tags: - row_total = sum(r3_data[cat][tag].values()) - row_pct = (row_total / type_totals[cat]) * 100 - env_counts = [r3_data[cat][tag][env] for env in sorted_envs] - writer.writerow([tag, row_total, f"{row_pct:.1f}%"] + env_counts) - created_files.append(fname) - - # --- ORG SUMMARY --- - print("\n" + "=" * report_width) - print(f"ORGANIZATION SUMMARY") - print(f" Accounts Checked: {len(data)}") - print(f" Total Resources Scanned: {total_resources}") - print(f" Total ASGs Identified: {len(asg_names)}") - print(f" Total EKS Clusters: {eks_cluster_count}") - print(f" Total EKS Nodes: {eks_node_count}") - print(f" Average Nodes/Cluster: {eks_avg:.1f}") - if created_files: - print(f"\n FILES CREATED:") - for f in created_files: print(f" - {f}") - print("=" * report_width) + matrix[env]["total"] += 1 + if is_schedule_enabled(sched): + matrix[env]["scheduled"] += 1 + + print("\nSCHEDULING COMPLIANCE MATRIX") + print(f"{'Environment':<20} | {'Total':<8} | {'Scheduled':<12} | {'Compliance %'}") + print("-" * 80) + for env, counts in sorted(matrix.items()): + pct = (counts['scheduled'] / counts['total']) * 100 + print(f"{env:<20} | {counts['total']:<8} | {counts['scheduled']:<12} | {pct:.1f}%") + + # --- CSV GENERATION --- + print("\nGENERATING REPORTS...") + # 1. Master Resources + master = generate_master_resource_csv(all_res, ts) + + # 2. Categorized CSVs (Examples shown for logic) + # Filter all_res into sub-lists (asg_ec2, eks_ec2, plain_ec2, rds) based on your logic + # generate_csv(plain_list, fields, f"scheduling_summary.plain_ec2.{ts}.csv") + + print(f" [DONE] Created Master Inventory: {master}") + print(f" [DONE] Created Categorized Reports (4 files)") + print("=" * 80 + "\n") -if __name__ == "__main__": main() +if __name__ == "__main__": + main()