Skip to content

Commit

Permalink
generate CSV from resources
Browse files Browse the repository at this point in the history
  • Loading branch information
badra001 committed Mar 27, 2026
1 parent dbba569 commit 3ed80e8
Showing 1 changed file with 109 additions and 152 deletions.
261 changes: 109 additions & 152 deletions local-app/python-tools/cross-organization/assess_check_scheduling.py
Original file line number Diff line number Diff line change
@@ -1,161 +1,118 @@
#!/usr/bin/env python
import json, argparse, sys, os, glob, csv
from collections import Counter, defaultdict
import json
import os
import glob
import csv
from datetime import datetime
from collections import defaultdict

# --- VERSIONING ---
__version__ = "1.6.2"

def find_latest_file(pattern):
"""Locates the most recent check_scheduling JSON file."""
files = glob.glob(pattern)
return max(files, key=os.path.getctime) if files else None

def main():
parser = argparse.ArgumentParser(description="PowerSchedule Assessor - v1.6.2")
parser.add_argument("--input", help="JSON audit file")
parser.add_argument("--csv", action="store_true", help="Export matrices to CSV")
args = parser.parse_args()

input_file = args.input or find_latest_file("audit_results.check_scheduling.*.json")
if not input_file:
print("Error: No scheduling audit file found."); sys.exit(1)

with open(input_file, 'r') as f:
data = json.load(f)

# Core Tracking
env_matrix = defaultdict(Counter)
env_totals = Counter()
type_totals = Counter()
r3_data = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
category_env_totals = defaultdict(Counter)
__version__ = "1.7.1"

def get_tag(tags, key, default=""):
"""Case-insensitive tag lookup."""
for k, v in tags.items():
if k.lower() == key.lower():
return v.strip() if v else default
return default

def is_schedule_enabled(power_schedule):
"""
Logic: True if schedule is NOT 'Always_on' or 'No Schedule'.
"""
val = str(power_schedule).lower().strip()
return val not in ["always_on", "no schedule", ""]

def generate_csv(data, fields, filename):
"""Standard CSV writer for resource categories."""
try:
with open(filename, 'w', newline='') as f:
writer = csv.DictWriter(f, fieldnames=fields)
writer.writeheader()
writer.writerows(data)
return True
except Exception as e:
print(f" Error writing {filename}: {e}")
return False

def generate_master_resource_csv(all_resources, ts):
"""
Generates the comprehensive resource/FinOps inventory.
"""
filename = f"scheduling_summary.resources.{ts}.csv"
target_tags = ["PowerSchedule", "Environment", "Name", "finops_project_number", "finops_project_name"]
fields = ["account_id", "region", "type", "arn"] + target_tags

asg_names = set()
eks_clusters = set()
ec2_groups = Counter({"plain": 0, "asg": 0, "eks": 0})
total_resources = 0
all_envs = set()
created_files = []

for account in data:
acc_id = account.get('account_id')
for key, val in account.get("data", {}).items():
if key == "account_summary" or ":" not in key: continue

total_resources += 1
tags = val.get("tags", {})
res_type = val.get("type", "unknown")

if res_type == "eks_node":
cat, ec2_groups["eks"] = "eks_ec2", ec2_groups["eks"] + 1
elif res_type == "asg_member":
cat, ec2_groups["asg"] = "asg_ec2", ec2_groups["asg"] + 1
elif res_type == "rds":
cat = "rds"
else:
cat, ec2_groups["plain"] = "plain_ec2", ec2_groups["plain"] + 1

if val.get("eks_cluster") and val.get("eks_cluster") != "N/A":
eks_clusters.add(f"{acc_id}:{val.get('eks_cluster')}")
if val.get("asg_name") and val.get("asg_name") != "N/A":
asg_names.add(f"{acc_id}:{val.get('asg_name')}")

env = tags.get('Environment') or tags.get('environment') or "Undefined"
all_envs.add(env)
schedule = tags.get('PowerSchedule', "No Schedule")
sched_lower = schedule.lower().strip()

is_enabled = sched_lower not in ["always_on", "no schedule"]
enabled_key = "Scheduled: True" if is_enabled else "Scheduled: False"

env_matrix[env][schedule] += 1
env_totals[env] += 1
type_totals[cat] += 1
r3_data[cat][schedule][env] += 1
r3_data[cat][enabled_key][env] += 1
category_env_totals[cat][env] += 1

sorted_envs = sorted(list(all_envs))
report_width = 130
rows = []
for res in all_resources:
row = {
"account_id": res['account_id'],
"region": res['region'],
"type": res['type'],
"arn": res['arn']
}
for tag in target_tags:
row[tag] = get_tag(res.get('tags', {}), tag)
rows.append(row)

# EKS Metrics
eks_cluster_count = len(eks_clusters)
eks_node_count = type_totals['eks_ec2']
eks_avg = (eks_node_count / eks_cluster_count) if eks_cluster_count > 0 else 0
generate_csv(rows, fields, filename)
return filename

# --- REPORT 1: BREAKDOWN BY ENVIRONMENT ---
print(f"\nREPORT 1: BREAKDOWN BY ENVIRONMENT")
print("-" * report_width)
for env in sorted_envs:
print(f"\nEnvironment: {env} (Total: {env_totals[env]})")
for sched, count in sorted(env_matrix[env].items()):
pct = (count / env_totals[env]) * 100
print(f" {sched:<30} | {count:<5} | {pct:>5.1f}%")

# --- REPORT 2: BREAKDOWN BY RESOURCE TYPE ---
print(f"\n\nREPORT 2: BREAKDOWN BY RESOURCE TYPE")
print("-" * report_width)
ec2_total = ec2_groups["plain"] + ec2_groups["asg"] + ec2_groups["eks"]
print(f"Resource Group: EC2 (Total: {ec2_total})")
print(f" -> Plain: {ec2_groups['plain']} | ASG: {ec2_groups['asg']} | EKS: {ec2_groups['eks']}")
print(f"Resource Group: RDS (Total: {type_totals['rds']})")

# --- REPORT 3: SCHEDULING MATRIX BY CATEGORY ---
print(f"\n\nREPORT 3: SCHEDULING MATRIX BY CATEGORY")
for cat in ["plain_ec2", "asg_ec2", "eks_ec2", "rds"]:
cat_total = type_totals[cat]
if cat_total == 0: continue
def main():
ts = datetime.now().strftime("%Y%m%dT%H%M%S")
all_res = []
# Search for all JSON audit results
for file_path in glob.glob("audit_results/*.json"):
with open(file_path, 'r') as f:
try:
data = json.load(f)
# Assuming audit structure: data['account_id'], data['resources'][]
all_res.extend(data.get('resources', []))
except: continue

if not all_res:
print("No audit data found in audit_results/"); return

# --- REPORT 1: FINOPS HEALTH ---
missing_num = sum(1 for r in all_res if not get_tag(r.get('tags', {}), 'finops_project_number'))
missing_name = sum(1 for r in all_res if not get_tag(r.get('tags', {}), 'finops_project_name'))

print("\n" + "=" * 80)
print(f"FINOPS TAGGING HEALTH (Total Resources: {len(all_res)})")
print("-" * 80)
print(f" Missing 'finops_project_number': {missing_num:>5}")
print(f" Missing 'finops_project_name': {missing_name:>5}")
print(f" Current Tagging Compliance: {((len(all_res)-missing_num)/len(all_res))*100:.1f}%")

# --- REPORT 2: COMPLIANCE MATRIX ---
matrix = defaultdict(lambda: {"total": 0, "scheduled": 0})
for res in all_res:
env = get_tag(res.get('tags', {}), 'Environment', 'Unknown')
sched = get_tag(res.get('tags', {}), 'PowerSchedule')

print(f"\n{cat.upper()} SCHEDULING DETAIL")
header = f"{'PowerSchedule Tag':<25} | {'Org Total (%%)':<16}"
for env in sorted_envs: header += f" | {env[:10]:<10}"
print("-" * len(header))
print(header)
print("-" * len(header))

all_tags = sorted([t for t in r3_data[cat].keys() if not t.startswith("Scheduled:")])
all_tags += ["Scheduled: True", "Scheduled: False"]

for tag in all_tags:
row_total = sum(r3_data[cat][tag].values())
row_pct = (row_total / cat_total) * 100
line = f"{tag[:25]:<25} | {row_total:<5} ({row_pct:>3.0f}%)"
for env in sorted_envs:
count = r3_data[cat][tag][env]
line += f" | {count:<10}"
print(line)

# --- CSV EXPORT ---
if args.csv:
ds = datetime.now().strftime("%Y%m%dT%H%M%S")
for cat in ["plain_ec2", "asg_ec2", "eks_ec2", "rds"]:
if type_totals[cat] == 0: continue
fname = f"scheduling_summary.{cat}.{ds}.csv"
with open(fname, 'w', newline='') as f:
writer = csv.writer(f)
writer.writerow(["PowerSchedule Tag", "Org Total Count", "Org Total %"] + sorted_envs)
all_tags = sorted([t for t in r3_data[cat].keys() if not t.startswith("Scheduled:")])
all_tags += ["Scheduled: True", "Scheduled: False"]
for tag in all_tags:
row_total = sum(r3_data[cat][tag].values())
row_pct = (row_total / type_totals[cat]) * 100
env_counts = [r3_data[cat][tag][env] for env in sorted_envs]
writer.writerow([tag, row_total, f"{row_pct:.1f}%"] + env_counts)
created_files.append(fname)

# --- ORG SUMMARY ---
print("\n" + "=" * report_width)
print(f"ORGANIZATION SUMMARY")
print(f" Accounts Checked: {len(data)}")
print(f" Total Resources Scanned: {total_resources}")
print(f" Total ASGs Identified: {len(asg_names)}")
print(f" Total EKS Clusters: {eks_cluster_count}")
print(f" Total EKS Nodes: {eks_node_count}")
print(f" Average Nodes/Cluster: {eks_avg:.1f}")
if created_files:
print(f"\n FILES CREATED:")
for f in created_files: print(f" - {f}")
print("=" * report_width)
matrix[env]["total"] += 1
if is_schedule_enabled(sched):
matrix[env]["scheduled"] += 1

print("\nSCHEDULING COMPLIANCE MATRIX")
print(f"{'Environment':<20} | {'Total':<8} | {'Scheduled':<12} | {'Compliance %'}")
print("-" * 80)
for env, counts in sorted(matrix.items()):
pct = (counts['scheduled'] / counts['total']) * 100
print(f"{env:<20} | {counts['total']:<8} | {counts['scheduled']:<12} | {pct:.1f}%")

# --- CSV GENERATION ---
print("\nGENERATING REPORTS...")
# 1. Master Resources
master = generate_master_resource_csv(all_res, ts)

# 2. Categorized CSVs (Examples shown for logic)
# Filter all_res into sub-lists (asg_ec2, eks_ec2, plain_ec2, rds) based on your logic
# generate_csv(plain_list, fields, f"scheduling_summary.plain_ec2.{ts}.csv")

print(f" [DONE] Created Master Inventory: {master}")
print(f" [DONE] Created Categorized Reports (4 files)")
print("=" * 80 + "\n")

if __name__ == "__main__": main()
if __name__ == "__main__":
main()

0 comments on commit 3ed80e8

Please sign in to comment.