mirror of
https://github.com/vacp2p/rfc-index.git
synced 2026-01-08 23:28:15 -05:00
118 lines
3.1 KiB
Python
118 lines
3.1 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Generate a JSON index of RFC metadata for the landing page filters.
|
|
|
|
Scans the docs/ tree for Markdown files and writes
|
|
`docs/rfc-index.json`.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
from pathlib import Path
|
|
from typing import Dict, List, Optional
|
|
import html
|
|
import re
|
|
|
|
ROOT = Path(__file__).resolve().parent.parent
|
|
DOCS = ROOT / "docs"
|
|
OUTPUT = DOCS / "rfc-index.json"
|
|
|
|
EXCLUDE_FILES = {"README.md", "SUMMARY.md"}
|
|
EXCLUDE_PARTS = {"previous-versions"}
|
|
|
|
|
|
def parse_meta_from_html(text: str) -> Optional[Dict[str, str]]:
|
|
if '<div class="rfc-meta">' not in text:
|
|
return None
|
|
|
|
meta: Dict[str, str] = {}
|
|
for match in re.findall(r"<tr><th>([^<]+)</th><td>(.*?)</td></tr>", text, flags=re.DOTALL):
|
|
key = match[0].strip().lower()
|
|
value = match[1].replace("<br>", "\n").strip()
|
|
value = html.unescape(value)
|
|
meta[key] = value
|
|
|
|
return meta or None
|
|
|
|
|
|
def parse_front_matter(text: str) -> Optional[Dict[str, str]]:
|
|
if not text.startswith("---"):
|
|
return None
|
|
|
|
end = text.find("\n---", 3)
|
|
if end == -1:
|
|
return None
|
|
|
|
front = text[3:end].strip().splitlines()
|
|
meta: Dict[str, str] = {}
|
|
for line in front:
|
|
if ":" not in line:
|
|
continue
|
|
key, value = line.split(":", 1)
|
|
key = key.strip().lower()
|
|
value = value.strip().strip('"').strip("'")
|
|
if key and value:
|
|
meta[key] = value
|
|
return meta or None
|
|
|
|
|
|
def parse_title_from_h1(text: str) -> Optional[str]:
|
|
match = re.search(r"^#\\s+(.+)$", text, flags=re.MULTILINE)
|
|
if not match:
|
|
return None
|
|
return match.group(1).strip()
|
|
|
|
|
|
def collect() -> List[Dict[str, str]]:
|
|
entries: List[Dict[str, str]] = []
|
|
for path in DOCS.rglob("*.md"):
|
|
rel = path.relative_to(DOCS)
|
|
|
|
if rel.name in EXCLUDE_FILES:
|
|
continue
|
|
if EXCLUDE_PARTS.intersection(rel.parts):
|
|
continue
|
|
|
|
text = path.read_text(encoding="utf-8", errors="ignore")
|
|
|
|
meta = parse_front_matter(text)
|
|
if meta is None:
|
|
meta = parse_meta_from_html(text) or {}
|
|
|
|
slug = meta.get("slug")
|
|
title = meta.get("title") or meta.get("name") or parse_title_from_h1(text) or rel.stem
|
|
status = meta.get("status") or "unknown"
|
|
category = meta.get("category") or "unspecified"
|
|
project = rel.parts[0]
|
|
|
|
# Skip the template placeholder
|
|
if slug == "XX":
|
|
continue
|
|
|
|
# mdBook renders Markdown to .html, keep links consistent
|
|
html_path = rel.with_suffix(".html").as_posix()
|
|
|
|
entries.append(
|
|
{
|
|
"project": project,
|
|
"slug": str(slug) if slug is not None else title,
|
|
"title": title,
|
|
"status": status,
|
|
"category": category,
|
|
"path": html_path,
|
|
}
|
|
)
|
|
|
|
entries.sort(key=lambda r: (r["project"], r["slug"]))
|
|
return entries
|
|
|
|
|
|
def main() -> None:
|
|
entries = collect()
|
|
OUTPUT.write_text(json.dumps(entries, indent=2), encoding="utf-8")
|
|
print(f"Wrote {len(entries)} entries to {OUTPUT}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|