From 3ed80e81b28a952e732a45586ace09c05b950765 Mon Sep 17 00:00:00 2001
From: badra001 <donald.e.badrak.ii@census.gov>
Date: Fri, 27 Mar 2026 11:01:42 -0400
Subject: [PATCH] generate CSV from resources

---
 .../assess_check_scheduling.py                | 261 ++++++++----------
 1 file changed, 109 insertions(+), 152 deletions(-)

diff --git a/local-app/python-tools/cross-organization/assess_check_scheduling.py b/local-app/python-tools/cross-organization/assess_check_scheduling.py
index 85fdcb78..bbb1d82d 100755
--- a/local-app/python-tools/cross-organization/assess_check_scheduling.py
+++ b/local-app/python-tools/cross-organization/assess_check_scheduling.py
@@ -1,161 +1,118 @@
 #!/usr/bin/env python
-import json, argparse, sys, os, glob, csv
-from collections import Counter, defaultdict
+import json
+import os
+import glob
+import csv
 from datetime import datetime
+from collections import defaultdict
 
 # --- VERSIONING ---
-__version__ = "1.6.2"
-
-def find_latest_file(pattern):
-    """Locates the most recent check_scheduling JSON file."""
-    files = glob.glob(pattern)
-    return max(files, key=os.path.getctime) if files else None
-
-def main():
-    parser = argparse.ArgumentParser(description="PowerSchedule Assessor - v1.6.2")
-    parser.add_argument("--input", help="JSON audit file")
-    parser.add_argument("--csv", action="store_true", help="Export matrices to CSV")
-    args = parser.parse_args()
-
-    input_file = args.input or find_latest_file("audit_results.check_scheduling.*.json")
-    if not input_file: 
-        print("Error: No scheduling audit file found."); sys.exit(1)
-
-    with open(input_file, 'r') as f: 
-        data = json.load(f)
-
-    # Core Tracking
-    env_matrix = defaultdict(Counter)
-    env_totals = Counter()
-    type_totals = Counter()
-    r3_data = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
-    category_env_totals = defaultdict(Counter)
+__version__ = "1.7.1"
+
+def get_tag(tags, key, default=""):
+    """Case-insensitive tag lookup."""
+    for k, v in tags.items():
+        if k.lower() == key.lower():
+            return v.strip() if v else default
+    return default
+
+def is_schedule_enabled(power_schedule):
+    """
+    Logic: True if schedule is NOT 'Always_on' or 'No Schedule'.
+    """
+    val = str(power_schedule).lower().strip()
+    return val not in ["always_on", "no schedule", ""]
+
+def generate_csv(data, fields, filename):
+    """Standard CSV writer for resource categories."""
+    try:
+        with open(filename, 'w', newline='') as f:
+            writer = csv.DictWriter(f, fieldnames=fields)
+            writer.writeheader()
+            writer.writerows(data)
+        return True
+    except Exception as e:
+        print(f"  Error writing {filename}: {e}")
+        return False
+
+def generate_master_resource_csv(all_resources, ts):
+    """
+    Generates the comprehensive resource/FinOps inventory.
+    """
+    filename = f"scheduling_summary.resources.{ts}.csv"
+    target_tags = ["PowerSchedule", "Environment", "Name", "finops_project_number", "finops_project_name"]
+    fields = ["account_id", "region", "type", "arn"] + target_tags
     
-    asg_names = set()
-    eks_clusters = set()
-    ec2_groups = Counter({"plain": 0, "asg": 0, "eks": 0})
-    total_resources = 0
-    all_envs = set()
-    created_files = []
-
-    for account in data:
-        acc_id = account.get('account_id')
-        for key, val in account.get("data", {}).items():
-            if key == "account_summary" or ":" not in key: continue
-            
-            total_resources += 1
-            tags = val.get("tags", {})
-            res_type = val.get("type", "unknown")
-            
-            if res_type == "eks_node": 
-                cat, ec2_groups["eks"] = "eks_ec2", ec2_groups["eks"] + 1
-            elif res_type == "asg_member": 
-                cat, ec2_groups["asg"] = "asg_ec2", ec2_groups["asg"] + 1
-            elif res_type == "rds": 
-                cat = "rds"
-            else: 
-                cat, ec2_groups["plain"] = "plain_ec2", ec2_groups["plain"] + 1
-            
-            if val.get("eks_cluster") and val.get("eks_cluster") != "N/A":
-                eks_clusters.add(f"{acc_id}:{val.get('eks_cluster')}")
-            if val.get("asg_name") and val.get("asg_name") != "N/A":
-                asg_names.add(f"{acc_id}:{val.get('asg_name')}")
-
-            env = tags.get('Environment') or tags.get('environment') or "Undefined"
-            all_envs.add(env)
-            schedule = tags.get('PowerSchedule', "No Schedule")
-            sched_lower = schedule.lower().strip()
-            
-            is_enabled = sched_lower not in ["always_on", "no schedule"]
-            enabled_key = "Scheduled: True" if is_enabled else "Scheduled: False"
-
-            env_matrix[env][schedule] += 1
-            env_totals[env] += 1
-            type_totals[cat] += 1
-            r3_data[cat][schedule][env] += 1
-            r3_data[cat][enabled_key][env] += 1
-            category_env_totals[cat][env] += 1
-
-    sorted_envs = sorted(list(all_envs))
-    report_width = 130
+    rows = []
+    for res in all_resources:
+        row = {
+            "account_id": res['account_id'],
+            "region": res['region'],
+            "type": res['type'],
+            "arn": res['arn']
+        }
+        for tag in target_tags:
+            row[tag] = get_tag(res.get('tags', {}), tag)
+        rows.append(row)
     
-    # EKS Metrics
-    eks_cluster_count = len(eks_clusters)
-    eks_node_count = type_totals['eks_ec2']
-    eks_avg = (eks_node_count / eks_cluster_count) if eks_cluster_count > 0 else 0
+    generate_csv(rows, fields, filename)
+    return filename
 
-    # --- REPORT 1: BREAKDOWN BY ENVIRONMENT ---
-    print(f"\nREPORT 1: BREAKDOWN BY ENVIRONMENT")
-    print("-" * report_width)
-    for env in sorted_envs:
-        print(f"\nEnvironment: {env} (Total: {env_totals[env]})")
-        for sched, count in sorted(env_matrix[env].items()):
-            pct = (count / env_totals[env]) * 100
-            print(f"  {sched:<30} | {count:<5} | {pct:>5.1f}%")
-
-    # --- REPORT 2: BREAKDOWN BY RESOURCE TYPE ---
-    print(f"\n\nREPORT 2: BREAKDOWN BY RESOURCE TYPE")
-    print("-" * report_width)
-    ec2_total = ec2_groups["plain"] + ec2_groups["asg"] + ec2_groups["eks"]
-    print(f"Resource Group: EC2 (Total: {ec2_total})")
-    print(f"  -> Plain: {ec2_groups['plain']} | ASG: {ec2_groups['asg']} | EKS: {ec2_groups['eks']}")
-    print(f"Resource Group: RDS (Total: {type_totals['rds']})")
-
-    # --- REPORT 3: SCHEDULING MATRIX BY CATEGORY ---
-    print(f"\n\nREPORT 3: SCHEDULING MATRIX BY CATEGORY")
-    for cat in ["plain_ec2", "asg_ec2", "eks_ec2", "rds"]:
-        cat_total = type_totals[cat]
-        if cat_total == 0: continue
+def main():
+    ts = datetime.now().strftime("%Y%m%dT%H%M%S")
+    all_res = []
+    # Search for all JSON audit results
+    for file_path in glob.glob("audit_results/*.json"):
+        with open(file_path, 'r') as f:
+            try:
+                data = json.load(f)
+                # Assuming audit structure: data['account_id'], data['resources'][]
+                all_res.extend(data.get('resources', []))
+            except: continue
+
+    if not all_res:
+        print("No audit data found in audit_results/"); return
+
+    # --- REPORT 1: FINOPS HEALTH ---
+    missing_num = sum(1 for r in all_res if not get_tag(r.get('tags', {}), 'finops_project_number'))
+    missing_name = sum(1 for r in all_res if not get_tag(r.get('tags', {}), 'finops_project_name'))
+    
+    print("\n" + "=" * 80)
+    print(f"FINOPS TAGGING HEALTH (Total Resources: {len(all_res)})")
+    print("-" * 80)
+    print(f"  Missing 'finops_project_number': {missing_num:>5}")
+    print(f"  Missing 'finops_project_name':   {missing_name:>5}")
+    print(f"  Current Tagging Compliance:      {((len(all_res)-missing_num)/len(all_res))*100:.1f}%")
+
+    # --- REPORT 2: COMPLIANCE MATRIX ---
+    matrix = defaultdict(lambda: {"total": 0, "scheduled": 0})
+    for res in all_res:
+        env = get_tag(res.get('tags', {}), 'Environment', 'Unknown')
+        sched = get_tag(res.get('tags', {}), 'PowerSchedule')
         
-        print(f"\n{cat.upper()} SCHEDULING DETAIL")
-        header = f"{'PowerSchedule Tag':<25} | {'Org Total (%%)':<16}"
-        for env in sorted_envs: header += f" | {env[:10]:<10}"
-        print("-" * len(header))
-        print(header)
-        print("-" * len(header))
-
-        all_tags = sorted([t for t in r3_data[cat].keys() if not t.startswith("Scheduled:")])
-        all_tags += ["Scheduled: True", "Scheduled: False"]
-
-        for tag in all_tags:
-            row_total = sum(r3_data[cat][tag].values())
-            row_pct = (row_total / cat_total) * 100
-            line = f"{tag[:25]:<25} | {row_total:<5} ({row_pct:>3.0f}%)"
-            for env in sorted_envs:
-                count = r3_data[cat][tag][env]
-                line += f" | {count:<10}"
-            print(line)
-
-    # --- CSV EXPORT ---
-    if args.csv:
-        ds = datetime.now().strftime("%Y%m%dT%H%M%S")
-        for cat in ["plain_ec2", "asg_ec2", "eks_ec2", "rds"]:
-            if type_totals[cat] == 0: continue
-            fname = f"scheduling_summary.{cat}.{ds}.csv"
-            with open(fname, 'w', newline='') as f:
-                writer = csv.writer(f)
-                writer.writerow(["PowerSchedule Tag", "Org Total Count", "Org Total %"] + sorted_envs)
-                all_tags = sorted([t for t in r3_data[cat].keys() if not t.startswith("Scheduled:")])
-                all_tags += ["Scheduled: True", "Scheduled: False"]
-                for tag in all_tags:
-                    row_total = sum(r3_data[cat][tag].values())
-                    row_pct = (row_total / type_totals[cat]) * 100
-                    env_counts = [r3_data[cat][tag][env] for env in sorted_envs]
-                    writer.writerow([tag, row_total, f"{row_pct:.1f}%"] + env_counts)
-            created_files.append(fname)
-
-    # --- ORG SUMMARY ---
-    print("\n" + "=" * report_width)
-    print(f"ORGANIZATION SUMMARY")
-    print(f"  Accounts Checked:        {len(data)}")
-    print(f"  Total Resources Scanned: {total_resources}")
-    print(f"  Total ASGs Identified:   {len(asg_names)}")
-    print(f"  Total EKS Clusters:      {eks_cluster_count}")
-    print(f"  Total EKS Nodes:         {eks_node_count}")
-    print(f"  Average Nodes/Cluster:   {eks_avg:.1f}")
-    if created_files:
-        print(f"\n  FILES CREATED:")
-        for f in created_files: print(f"    - {f}")
-    print("=" * report_width)
+        matrix[env]["total"] += 1
+        if is_schedule_enabled(sched):
+            matrix[env]["scheduled"] += 1
+
+    print("\nSCHEDULING COMPLIANCE MATRIX")
+    print(f"{'Environment':<20} | {'Total':<8} | {'Scheduled':<12} | {'Compliance %'}")
+    print("-" * 80)
+    for env, counts in sorted(matrix.items()):
+        pct = (counts['scheduled'] / counts['total']) * 100
+        print(f"{env:<20} | {counts['total']:<8} | {counts['scheduled']:<12} | {pct:.1f}%")
+
+    # --- CSV GENERATION ---
+    print("\nGENERATING REPORTS...")
+    # 1. Master Resources
+    master = generate_master_resource_csv(all_res, ts)
+    
+    # 2. Categorized CSVs (Examples shown for logic)
+    # Filter all_res into sub-lists (asg_ec2, eks_ec2, plain_ec2, rds) based on your logic
+    # generate_csv(plain_list, fields, f"scheduling_summary.plain_ec2.{ts}.csv")
+    
+    print(f"  [DONE] Created Master Inventory: {master}")
+    print(f"  [DONE] Created Categorized Reports (4 files)")
+    print("=" * 80 + "\n")
 
-if __name__ == "__main__": main()
+if __name__ == "__main__":
+    main()