mirror of
https://github.com/vacp2p/roadmap.git
synced 2026-01-08 15:23:55 -05:00
## Summary - Introduce a standalone Python roadmap validator with a CLI entry point, modular validation pipeline, and GitHub Actions wiring so roadmap content can be linted locally and in CI. - Provide reusable validation primitives for path resolution, front-matter parsing, identity checks, task parsing, catalog enforcement, and template adherence. - Document usage, configuration, and workflow behaviour to make the validator approachable for contributors. ## Validator Details - **Core tooling** - Added the `tools/roadmap_validator/` package with `validate.py` (CLI), `validator.py` (orchestration), and helper modules (`tasks.py`, `identity.py`, `paths.py`, `constants.py`, `issues.py`). - CLI supports directory/file targets, skips default filenames, emits GitHub annotations, and integrates optional substring filtering - README explains features, environment variables, and development guidance. - **Catalog and template enforcement** - `catalog.py` verifies each allowed content unit has `index.md` and `preview.md`, confirms roadmap entries appear under the proper quarter/area, and flags stale or missing links. - `templates.py` enforces template basics: front matter completeness, `## Description` ordering/content, template placeholder cleanup, and task section detection. - **Task validation** - `tasks.py` checks required metadata (`owner`, `status`, `start-date`, `end-date`), date formats, populated descriptions/deliverables, TODO markers, tangible deliverable heuristics, and `fully-qualified-name` prefixes. - **Workflow integration** - `.github/workflows/roadmap-validator.yml` runs the validator on pushes and manual dispatch, installs dependencies, scopes validation to changed Markdown, and surfaces findings via GitHub annotations. ## Existing Roadmap Updates - Normalised 2025q4 commitments across Web, DST, QA, SC, and other units by filling in missing descriptions, deliverables, schedule notes, recurring task statuses, and maintenance tasks. - Added tasks where absent, removed remaining template placeholders, aligned fully qualified names, and ensured roadmap files conform to the new validator checks. ## Testing ```bash python tools/roadmap_validator/validate.py *2025q4* ``` CI: `Roadmap Validator` workflow runs automatically on pushes/dispatch. --------- Co-authored-by: kaiserd <1684595+kaiserd@users.noreply.github.com>
195 lines
6.9 KiB
Python
195 lines
6.9 KiB
Python
"""Task parsing and validation routines."""
|
|
|
|
from dataclasses import dataclass, field
|
|
from typing import Dict, List, Optional, Tuple
|
|
from constants import (
|
|
DATE_RE,
|
|
META_LINE_RE,
|
|
METADATA_ALIAS_MAP,
|
|
REQUIRED_TASK_FIELDS,
|
|
TANGIBLE_KEYWORDS,
|
|
TASK_HEADING_RE,
|
|
TODO_RE,
|
|
VAGUE_KEYWORDS,
|
|
)
|
|
|
|
|
|
@dataclass
|
|
class TaskIssue:
|
|
message: str
|
|
line: Optional[int]
|
|
|
|
|
|
@dataclass
|
|
class TaskReport:
|
|
name: str
|
|
line: int
|
|
metadata: Dict[str, Tuple[str, int]] = field(default_factory=dict)
|
|
had_description: bool = False
|
|
description_lines: int = 0
|
|
description_line: Optional[int] = None
|
|
description_texts: List[Tuple[str, int]] = field(default_factory=list)
|
|
deliverables_sections: int = 0
|
|
deliverables_items: int = 0
|
|
deliverables_line: Optional[int] = None
|
|
deliverables_texts: List[Tuple[str, int]] = field(default_factory=list)
|
|
todo_hits: List[Tuple[str, int]] = field(default_factory=list)
|
|
issues: List[TaskIssue] = field(default_factory=list)
|
|
expected_base: Optional[str] = None
|
|
|
|
def add_issue(self, message: str, line: Optional[int] = None) -> None:
|
|
self.issues.append(TaskIssue(message, line or self.line))
|
|
|
|
def record_metadata(self, key: str, value: str, line: int) -> None:
|
|
canonical = METADATA_ALIAS_MAP.get(key, key)
|
|
self.metadata[canonical] = (value.strip(), line)
|
|
|
|
def finalize(self) -> None:
|
|
for field_name in REQUIRED_TASK_FIELDS:
|
|
if field_name not in self.metadata:
|
|
self.add_issue(f"missing `{field_name}` metadata", self.line)
|
|
continue
|
|
value, line = self.metadata[field_name]
|
|
if not value:
|
|
self.add_issue(f"`{field_name}` value is empty", line)
|
|
elif field_name in ("start-date", "end-date") and not DATE_RE.match(value):
|
|
self.add_issue(
|
|
f"`{field_name}` should use YYYY/MM/DD format (found `{value}`)", line
|
|
)
|
|
|
|
if not self.had_description or self.description_lines == 0:
|
|
line = self.description_line or self.line
|
|
self.add_issue("missing populated `#### Description` section", line)
|
|
|
|
if self.deliverables_sections == 0:
|
|
line = self.deliverables_line or self.line
|
|
self.add_issue("missing `#### Deliverables` section", line)
|
|
elif self.deliverables_sections > 1:
|
|
line = self.deliverables_line or self.line
|
|
self.add_issue("multiple `#### Deliverables` sections found", line)
|
|
elif self.deliverables_items == 0:
|
|
line = self.deliverables_line or self.line
|
|
self.add_issue("`#### Deliverables` section is empty", line)
|
|
|
|
for text, line in self.todo_hits:
|
|
self.add_issue(f"contains TODO marker: `{text.strip()}`", line)
|
|
|
|
description_vague_hits = [
|
|
(text, line)
|
|
for text, line in self.description_texts
|
|
if any(keyword in text.lower() for keyword in VAGUE_KEYWORDS)
|
|
]
|
|
has_tangible_deliverable = any(
|
|
any(keyword in text.lower() for keyword in TANGIBLE_KEYWORDS)
|
|
for text, _ in self.deliverables_texts
|
|
)
|
|
if description_vague_hits and not has_tangible_deliverable:
|
|
text, line = description_vague_hits[0]
|
|
self.add_issue(
|
|
"uses vague wording without tangible deliverables; clarify scope "
|
|
f"(`{text.strip()}`)",
|
|
line,
|
|
)
|
|
|
|
fq_entry = self.metadata.get("fully-qualified-name")
|
|
if fq_entry and self.expected_base:
|
|
raw_value, line = fq_entry
|
|
normalized = raw_value.strip().strip("`")
|
|
expected = self.expected_base
|
|
if not (
|
|
normalized == expected or normalized.startswith(f"{expected}:")
|
|
):
|
|
self.add_issue(
|
|
f"`fully qualified name` should start with `{expected}` (found `{normalized}`)",
|
|
line,
|
|
)
|
|
|
|
|
|
def parse_tasks(
|
|
lines: List[str],
|
|
start: int,
|
|
end: int,
|
|
expected_base: Optional[str],
|
|
) -> List[TaskReport]:
|
|
tasks: List[TaskReport] = []
|
|
current: Optional[TaskReport] = None
|
|
in_description = False
|
|
in_deliverables = False
|
|
|
|
def flush_current() -> None:
|
|
nonlocal current, in_description, in_deliverables
|
|
if current is not None:
|
|
current.finalize()
|
|
tasks.append(current)
|
|
current = None
|
|
in_description = False
|
|
in_deliverables = False
|
|
|
|
for idx in range(start, end):
|
|
line = lines[idx]
|
|
stripped = line.strip()
|
|
heading_match = TASK_HEADING_RE.match(stripped)
|
|
if stripped.startswith("## "):
|
|
flush_current()
|
|
break
|
|
if heading_match:
|
|
flush_current()
|
|
task_name = heading_match.group(1).strip()
|
|
current = TaskReport(name=task_name, line=idx + 1, expected_base=expected_base)
|
|
continue
|
|
if current is None:
|
|
continue
|
|
|
|
if stripped.lower().startswith("#### "):
|
|
in_description = False
|
|
in_deliverables = False
|
|
section_title = stripped[4:].strip().lower().rstrip(":")
|
|
if section_title == "description":
|
|
current.had_description = True
|
|
current.description_line = idx + 1
|
|
current.description_lines = 0
|
|
current.description_texts = []
|
|
in_description = True
|
|
elif section_title == "deliverables":
|
|
current.deliverables_sections += 1
|
|
current.deliverables_line = idx + 1
|
|
current.deliverables_items = 0
|
|
current.deliverables_texts = []
|
|
in_deliverables = True
|
|
continue
|
|
|
|
if stripped == "---":
|
|
in_description = False
|
|
in_deliverables = False
|
|
continue
|
|
|
|
if in_description:
|
|
if stripped:
|
|
current.description_lines += 1
|
|
current.description_texts.append((stripped, idx + 1))
|
|
if TODO_RE.search(stripped):
|
|
current.todo_hits.append((stripped, idx + 1))
|
|
continue
|
|
|
|
if in_deliverables:
|
|
if stripped:
|
|
if TODO_RE.search(stripped):
|
|
current.todo_hits.append((stripped, idx + 1))
|
|
if stripped != "---":
|
|
current.deliverables_items += 1
|
|
current.deliverables_texts.append((stripped, idx + 1))
|
|
continue
|
|
|
|
meta_match = META_LINE_RE.match(stripped)
|
|
if meta_match:
|
|
field_key = meta_match.group(1).strip().lower().replace("_", " ")
|
|
value = meta_match.group(2)
|
|
current.record_metadata(field_key, value, idx + 1)
|
|
continue
|
|
|
|
if stripped and TODO_RE.search(stripped):
|
|
current.todo_hits.append((stripped, idx + 1))
|
|
|
|
flush_current()
|
|
return tasks
|