From 724cb908e8c81e2635b0249ea222c96cbd9e091a Mon Sep 17 00:00:00 2001 From: badra001 Date: Tue, 5 May 2026 16:30:41 -0400 Subject: [PATCH] create catalog --- inventory/catalog.md | 58 +++++++++++ inventory/templates/bedrock_catalog.md.j2 | 11 +++ inventory/yaml_to_md.py | 113 ++++++++++++++++++++++ 3 files changed, 182 insertions(+) create mode 100644 inventory/catalog.md create mode 100755 inventory/templates/bedrock_catalog.md.j2 create mode 100755 inventory/yaml_to_md.py diff --git a/inventory/catalog.md b/inventory/catalog.md new file mode 100644 index 0000000..8ea807d --- /dev/null +++ b/inventory/catalog.md @@ -0,0 +1,58 @@ +# Bedrock Use Case Catalog + +--- + +## [20260127-1](20260127-1.yml) + +**Name:** Field Skills Interview Training +**Summary:** Responses to short real-world practice scenarios to provide personalized user feedback as part of online training. + + +--- + +## [20260420-1](20260420-1.yml) + +**Name:** Valhalla +**Summary:** We are using LLMs to map unstructured data into a known schema (known variables, etc..) + +--- + +## [20260420-2](20260420-2.yml) + +**Name:** Decennial Census Data Quality Evaluation +**Summary:** This project will evaluate data quality throughout the 2020 Census production cycle using LLMs to inform Census Bureau and +Department of Commerce leadership decisions regarding processes, methodologies, and business rules to be used for the 2030 Census. + + +--- + +## [20260504-1](20260504-1.yml) + +**Name:** QUANTM +**Summary:** As part of the quality operations of the US Census Bureau, computer matching and clerical operations are employed. These efforts help in the assessment +of the quality of data collection. The existing operations have constraints in their ability to accurately match and compare complex datasets, resulting in high number +of cases being flagged or removed. This process is time-intensive and expensive, creating a significant burden on human resources. With the advanced AI capabilities +available in the industry, QUANTM is assessing their feasibility to support the quality assessment efforts and reduce the clerical footprint. The bureau aims to explore +whether large language model (LLM) technology can replicate and improve upon the reasoning processes used by human clerks. + + +--- + +## [20260505-1](20260505-1.yml) + +**Name:** CEDSCI AI Assisted Code Development +**Summary:** The goal of this project is to support the controlled use of AWS Bedrock–hosted large language models (LLMs) to +enable software development, refactoring, and application modernization activities within the CEDSCI DEV environment. + + +--- + +## [20260505-2](20260505-2.yml) + +**Name:** CEDSCI AI Agentic Semantic Search +**Summary:** CEDSCI AI Agentic Semantic Search is a cloud-native, generative AI–driven capability that enables a +natural-language interface for interacting with publicly released Census Bureau statistical data and metadata. +The project introduces an agentic semantic search layer that combines large language models (LLMs) with structured +metadata and data assets to provide a conversational AI interface, exposed through data.census.gov and api.census.gov. + + diff --git a/inventory/templates/bedrock_catalog.md.j2 b/inventory/templates/bedrock_catalog.md.j2 new file mode 100755 index 0000000..2538f59 --- /dev/null +++ b/inventory/templates/bedrock_catalog.md.j2 @@ -0,0 +1,11 @@ +# Bedrock Use Case Catalog + +{% for record in records %} +--- + +## [{{ record.federal_standards_information.id }}]({{ record.federal_standards_information.id }}.yml) + +**Name:** {{ record.federal_standards_information.project_name }} +**Summary:** {{ record.federal_standards_information.project_summary }} + +{% endfor %} diff --git a/inventory/yaml_to_md.py b/inventory/yaml_to_md.py new file mode 100755 index 0000000..a5778f2 --- /dev/null +++ b/inventory/yaml_to_md.py @@ -0,0 +1,113 @@ +#!/usr/bin/env python3 +""" +yaml_to_md.py - Render a collection of YAML files into a Markdown file via a Jinja2 template. + +Usage: + python yaml_to_md.py --input-pattern "specs/*.yml" --template bedrock_catalog.md.j2 --output catalog.md + +Changelog: + v1.0.0 2026-05-05 Initial release. +""" + +__version__ = "1.0.0" + +import argparse +import glob +import sys +from pathlib import Path + +import yaml +from jinja2 import Environment, FileSystemLoader, StrictUndefined, TemplateNotFound + + +# --------------------------------------------------------------------------- +# helpers +# --------------------------------------------------------------------------- + +def load_yaml_files(pattern: str) -> list[dict]: + """Glob *pattern* and return a list of parsed YAML dicts, sorted by path.""" + paths = sorted(glob.glob(pattern, recursive=True)) + if not paths: + print(f"[warn] No files matched pattern: {pattern}", file=sys.stderr) + records = [] + for p in paths: + try: + with open(p, encoding="utf-8") as fh: + data = yaml.safe_load(fh) + if data: + data["_source_file"] = p # inject source path for convenience + records.append(data) + print(f"[load] {p}", file=sys.stderr) + except yaml.YAMLError as exc: + print(f"[error] Failed to parse {p}: {exc}", file=sys.stderr) + return records + + +def render_template(template_path: str, records: list[dict]) -> str: + """Render *template_path* with *records* and return the resulting string.""" + tmpl_file = Path(template_path) + env = Environment( + loader=FileSystemLoader(str(tmpl_file.parent)), + undefined=StrictUndefined, # raise on missing variables + trim_blocks=True, + lstrip_blocks=True, + keep_trailing_newline=True, + ) + try: + template = env.get_template(tmpl_file.name) + except TemplateNotFound: + print(f"[error] Template not found: {template_path}", file=sys.stderr) + sys.exit(1) + + return template.render(records=records) + + +# --------------------------------------------------------------------------- +# CLI +# --------------------------------------------------------------------------- + +def build_parser() -> argparse.ArgumentParser: + p = argparse.ArgumentParser( + description="Render YAML files into Markdown via a Jinja2 template.", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=__doc__, + ) + p.add_argument( + "--input-pattern", "-i", + required=True, + help='Glob pattern for input YAML files, e.g. "specs/*.yml"', + ) + p.add_argument( + "--template", "-t", + required=True, + help="Path to the Jinja2 template file (.j2 or .md.j2)", + ) + p.add_argument( + "--output", "-o", + required=True, + help="Path for the rendered Markdown output file", + ) + p.add_argument( + "--version", "-V", + action="version", + version=f"%(prog)s {__version__}", + ) + return p + + +def main() -> None: + args = build_parser().parse_args() + + records = load_yaml_files(args.input_pattern) + print(f"[info] Loaded {len(records)} record(s).", file=sys.stderr) + + rendered = render_template(args.template, records) + + out_path = Path(args.output) + out_path.parent.mkdir(parents=True, exist_ok=True) + out_path.write_text(rendered, encoding="utf-8") + print(f"[done] Wrote {out_path}", file=sys.stderr) + + +if __name__ == "__main__": + main()