roadmap/tools/roadmap_validator/tasks.py

"""Task parsing and validation routines."""

from dataclasses import dataclass, field
from typing import Dict, List, Optional, Tuple
from constants import (
    DATE_RE,
    META_LINE_RE,
    METADATA_ALIAS_MAP,
    REQUIRED_TASK_FIELDS,
    STATUS_ALLOWED,
    STATUS_IN_PROGRESS_RE,
    TANGIBLE_KEYWORDS,
    TASK_HEADING_RE,
    TODO_RE,
    VAGUE_KEYWORDS,
)


@dataclass
class TaskIssue:
    message: str
    line: Optional[int]


@dataclass
class TaskReport:
    name: str
    line: int
    metadata: Dict[str, Tuple[str, int]] = field(default_factory=dict)
    had_description: bool = False
    description_lines: int = 0
    description_line: Optional[int] = None
    description_texts: List[Tuple[str, int]] = field(default_factory=list)
    deliverables_sections: int = 0
    deliverables_items: int = 0
    deliverables_line: Optional[int] = None
    deliverables_texts: List[Tuple[str, int]] = field(default_factory=list)
    todo_hits: List[Tuple[str, int]] = field(default_factory=list)
    issues: List[TaskIssue] = field(default_factory=list)
    expected_base: Optional[str] = None

    def add_issue(self, message: str, line: Optional[int] = None) -> None:
        self.issues.append(TaskIssue(message, line or self.line))

    def record_metadata(self, key: str, value: str, line: int) -> None:
        canonical = METADATA_ALIAS_MAP.get(key, key)
        self.metadata[canonical] = (value.strip(), line)

    def finalize(self) -> None:
        for field_name in REQUIRED_TASK_FIELDS:
            if field_name not in self.metadata:
                self.add_issue(f"missing `{field_name}` metadata", self.line)
                continue
            value, line = self.metadata[field_name]
            if not value:
                self.add_issue(f"`{field_name}` value is empty", line)
            elif field_name in ("start-date", "end-date") and not DATE_RE.match(value):
                self.add_issue(
                    f"`{field_name}` should use YYYY/MM/DD format (found `{value}`)", line
                )
            elif field_name == "status":
                normalized = value.strip().lower()
                match = STATUS_IN_PROGRESS_RE.match(normalized)
                if normalized not in STATUS_ALLOWED:
                    if match:
                        progress = int(match.group(1))
                        if progress >= 100:
                            self.add_issue(
                                "`status` percentage must be between 0% and 99% for `in progress`",
                                line,
                            )
                    else:
                        allowed_text = ", ".join(sorted(STATUS_ALLOWED)) + ", or `in progress (NN%)`"
                        self.add_issue(
                            f"`status` should be {allowed_text} (found `{value.strip()}`)",
                            line,
                        )

        if not self.had_description or self.description_lines == 0:
            line = self.description_line or self.line
            self.add_issue("missing populated `#### Description` section", line)

        if self.deliverables_sections == 0:
            line = self.deliverables_line or self.line
            self.add_issue("missing `#### Deliverables` section", line)
        elif self.deliverables_sections > 1:
            line = self.deliverables_line or self.line
            self.add_issue("multiple `#### Deliverables` sections found", line)
        elif self.deliverables_items == 0:
            line = self.deliverables_line or self.line
            self.add_issue("`#### Deliverables` section is empty", line)

        for text, line in self.todo_hits:
            self.add_issue(f"contains TODO marker: `{text.strip()}`", line)

        description_vague_hits = [
            (text, line)
            for text, line in self.description_texts
            if any(keyword in text.lower() for keyword in VAGUE_KEYWORDS)
        ]
        has_tangible_deliverable = any(
            any(keyword in text.lower() for keyword in TANGIBLE_KEYWORDS)
            for text, _ in self.deliverables_texts
        )
        if description_vague_hits and not has_tangible_deliverable:
            text, line = description_vague_hits[0]
            self.add_issue(
                "uses vague wording without tangible deliverables; clarify scope "
                f"(`{text.strip()}`)",
                line,
            )

        fq_entry = self.metadata.get("fully-qualified-name")
        if fq_entry and self.expected_base:
            raw_value, line = fq_entry
            normalized = raw_value.strip().strip("`")
            expected = self.expected_base
            if not (
                normalized == expected or normalized.startswith(f"{expected}:")
            ):
                self.add_issue(
                    f"`fully qualified name` should start with `{expected}` (found `{normalized}`)",
                    line,
                )


def parse_tasks(
    lines: List[str],
    start: int,
    end: int,
    expected_base: Optional[str],
) -> List[TaskReport]:
    tasks: List[TaskReport] = []
    current: Optional[TaskReport] = None
    in_description = False
    in_deliverables = False

    def flush_current() -> None:
        nonlocal current, in_description, in_deliverables
        if current is not None:
            current.finalize()
            tasks.append(current)
        current = None
        in_description = False
        in_deliverables = False

    for idx in range(start, end):
        line = lines[idx]
        stripped = line.strip()
        heading_match = TASK_HEADING_RE.match(stripped)
        if stripped.startswith("## "):
            flush_current()
            break
        if heading_match:
            flush_current()
            task_name = heading_match.group(1).strip()
            current = TaskReport(name=task_name, line=idx + 1, expected_base=expected_base)
            continue
        if current is None:
            continue

        if stripped.lower().startswith("#### "):
            in_description = False
            in_deliverables = False
            section_title = stripped[4:].strip().lower().rstrip(":")
            if section_title == "description":
                current.had_description = True
                current.description_line = idx + 1
                current.description_lines = 0
                current.description_texts = []
                in_description = True
            elif section_title == "deliverables":
                current.deliverables_sections += 1
                current.deliverables_line = idx + 1
                current.deliverables_items = 0
                current.deliverables_texts = []
                in_deliverables = True
            continue

        if stripped == "---":
            in_description = False
            in_deliverables = False
            continue

        if in_description:
            if stripped:
                current.description_lines += 1
                current.description_texts.append((stripped, idx + 1))
                if TODO_RE.search(stripped):
                    current.todo_hits.append((stripped, idx + 1))
            continue

        if in_deliverables:
            if stripped:
                if TODO_RE.search(stripped):
                    current.todo_hits.append((stripped, idx + 1))
                if stripped != "---":
                    current.deliverables_items += 1
                    current.deliverables_texts.append((stripped, idx + 1))
            continue

        meta_match = META_LINE_RE.match(stripped)
        if meta_match:
            field_key = meta_match.group(1).strip().lower().replace("_", " ")
            value = meta_match.group(2)
            current.record_metadata(field_key, value, idx + 1)
            continue

        if stripped and TODO_RE.search(stripped):
            current.todo_hits.append((stripped, idx + 1))

    flush_current()
    return tasks