#!/usr/bin/env python3
from __future__ import annotations

import argparse
import csv
import hashlib
import html
import json
import re
import shutil
import sys
from collections import Counter, defaultdict
from datetime import datetime, timezone
from pathlib import Path


ASK_ETC_ROOT = Path(__file__).resolve().parent
CORPUS_ROOT = ASK_ETC_ROOT / "corpus"
CORE_ROOT = ASK_ETC_ROOT / "core-canon"
CONFIG_PATH = ASK_ETC_ROOT / "config" / "core_canon.tsv"
MANIFESTS_DIR = ASK_ETC_ROOT / "manifests"
FULL_MANIFEST_PATH = MANIFESTS_DIR / "manifest.tsv"
CORE_MANIFEST_PATH = MANIFESTS_DIR / "core_canon_manifest.tsv"
CORE_SUMMARY_PATH = MANIFESTS_DIR / "core_canon_summary.json"
INDEXES_DIR = ASK_ETC_ROOT / "indexes"
FULL_INDEX_PATH = INDEXES_DIR / "full_index.json"
CORE_INDEX_PATH = INDEXES_DIR / "core_index.json"
PACKED_DIR = ASK_ETC_ROOT / "packed"
FULL_PACK_TXT_PATH = PACKED_DIR / "full-context-packed.txt"
FULL_PACK_HTML_PATH = PACKED_DIR / "full-context-packed.html"
CORE_PACK_TXT_PATH = PACKED_DIR / "core-context-packed.txt"
CORE_PACK_HTML_PATH = PACKED_DIR / "core-context-packed.html"
PACKED_SUMMARY_PATH = PACKED_DIR / "summary.json"


def die(message: str) -> int:
    print(f"error: {message}", file=sys.stderr)
    return 1


def read_tsv(path: Path) -> list[dict[str, str]]:
    with path.open(newline="") as handle:
        return list(csv.DictReader(handle, delimiter="\t"))


def write_tsv(path: Path, fieldnames: list[str], rows: list[dict[str, object]]) -> None:
    path.parent.mkdir(parents=True, exist_ok=True)
    with path.open("w", newline="") as handle:
        writer = csv.DictWriter(handle, fieldnames=fieldnames, delimiter="\t")
        writer.writeheader()
        for row in rows:
            writer.writerow(row)


def load_full_manifest() -> dict[str, dict[str, str]]:
    data: dict[str, dict[str, str]] = {}
    for row in read_tsv(FULL_MANIFEST_PATH):
        copied_path = row["copied_path"]
        prefix = "corpus/"
        rel_path = copied_path[len(prefix) :] if copied_path.startswith(prefix) else copied_path
        data[rel_path] = row
    return data


def load_core_config() -> list[dict[str, str]]:
    rows = []
    for row in read_tsv(CONFIG_PATH):
        rows.append(
            {
                "section": row["section"].strip(),
                "path": row["path"].strip().lstrip("./"),
                "reason": row["reason"].strip(),
            }
        )
    return rows


def parse_simple_frontmatter(text: str) -> tuple[dict[str, object], str]:
    normalized = text.replace("\r\n", "\n")
    if not normalized.startswith("---\n"):
        return {}, normalized
    end = normalized.find("\n---\n", 4)
    if end == -1:
        return {}, normalized
    raw = normalized[4:end]
    body = normalized[end + 5 :]
    data: dict[str, object] = {}
    for line in raw.splitlines():
        if ":" not in line:
            continue
        key, value = line.split(":", 1)
        key = key.strip()
        value = value.strip()
        if not value:
            continue
        if value.startswith("[") and value.endswith("]"):
            inner = value[1:-1].strip()
            if inner:
                items = []
                for part in inner.split(","):
                    part = part.strip().strip('"').strip("'")
                    if part:
                        items.append(part)
                data[key] = items
            else:
                data[key] = []
            continue
        data[key] = value.strip('"').strip("'")
    return data, body


def normalize_newlines(text: str) -> str:
    return text.replace("\r\n", "\n").replace("\r", "\n")


def collapse_blank_lines(text: str) -> str:
    text = re.sub(r"[ \t]+\n", "\n", text)
    text = re.sub(r"\n{3,}", "\n\n", text)
    return text.strip()


def strip_html_markup(text: str) -> str:
    text = re.sub(r"(?is)<(script|style|iframe)[^>]*>.*?</\1>", "\n", text)
    text = re.sub(r"(?is)<br\s*/?>", "\n", text)
    text = re.sub(r"(?is)</(p|div|section|article|aside|header|footer|main|table|tr|blockquote)>", "\n", text)
    text = re.sub(r"(?is)<li[^>]*>", "- ", text)
    text = re.sub(r"(?is)</li>", "\n", text)
    text = re.sub(r"(?is)<h[1-6][^>]*>", "\n", text)
    text = re.sub(r"(?is)</h[1-6]>", "\n", text)
    text = re.sub(r"(?is)<[^>]+>", " ", text)
    return html.unescape(text)


def render_inline_markup(text: str) -> str:
    text = re.sub(r"!\[[^\]]*\]\([^)]+\)", " ", text)
    text = re.sub(r"\[([^\]]+)\]\([^)]+\)", r"\1", text)
    text = re.sub(r"<(https?://[^>]+)>", r"\1", text)
    text = re.sub(r"`([^`]+)`", r"\1", text)
    text = re.sub(r"\*\*([^*]+)\*\*", r"\1", text)
    text = re.sub(r"\*([^*]+)\*", r"\1", text)
    text = re.sub(r"_([^_]+)_", r"\1", text)
    return text


def clean_body_text(path: Path, text: str) -> str:
    normalized = normalize_newlines(text)
    frontmatter, body = parse_simple_frontmatter(normalized)
    del frontmatter
    suffix = path.suffix.lower()
    if suffix in {".html", ".htm"}:
        body = strip_html_markup(body)
    else:
        body = re.sub(r"```.*?```", "\n", body, flags=re.S)
        body = re.sub(r"(?m)^ {4,}.*$", "", body)
        body = re.sub(r"(?m)^\s*>\s?", "", body)
        body = re.sub(r"(?m)^\s*[-*+]\s+", "- ", body)
        body = re.sub(r"(?m)^\s*(\d+)\.\s+", r"\1. ", body)
        body = re.sub(r"(?m)^\s*[-*_]{3,}\s*$", "", body)
        if suffix == ".rst":
            body = re.sub(r"(?m)^\s*\.\.\s*_[^:]+:\s*$", "", body)
            body = re.sub(r"(?m)^\s*\.\.\s+[a-zA-Z0-9_-]+::.*$", "", body)
            body = re.sub(r"(?m)^\s+:[a-zA-Z0-9_-]+:.*$", "", body)
            body = re.sub(r"(?m)^[=\-~^*`#]{3,}\s*$", "", body)
        body = render_inline_markup(body)
        body = strip_html_markup(body)
    body = re.sub(r"[ \t]+", " ", body)
    return collapse_blank_lines(body)


def strip_markup(text: str) -> str:
    text = re.sub(r"```.*?```", " ", text, flags=re.S)
    text = re.sub(r"`[^`]+`", " ", text)
    text = re.sub(r"!\[[^\]]*\]\([^)]+\)", " ", text)
    text = re.sub(r"\[([^\]]+)\]\([^)]+\)", r"\1", text)
    text = re.sub(r"<[^>]+>", " ", text)
    text = re.sub(r"\s+", " ", text)
    return text.strip()


def detect_title(text: str, path: Path) -> str:
    title_match = re.search(r"(?im)^#{1,6}\s+(.+?)\s*$", text)
    if title_match:
        return title_match.group(1).strip()
    rst_lines = text.splitlines()
    for idx in range(len(rst_lines) - 1):
        line = rst_lines[idx].strip()
        underline = rst_lines[idx + 1].strip()
        if line and underline and len(underline) >= len(line) and set(underline) <= {"=", "-", "~", "^", "*"}:
            return line
    html_title = re.search(r"(?is)<title>(.*?)</title>", text)
    if html_title:
        return strip_markup(html_title.group(1))
    html_h1 = re.search(r"(?is)<h1[^>]*>(.*?)</h1>", text)
    if html_h1:
        return strip_markup(html_h1.group(1))
    for line in text.splitlines():
        candidate = line.strip()
        if candidate:
            return candidate[:120]
    return path.stem


def detect_headings(text: str) -> list[str]:
    headings: list[str] = []
    for match in re.finditer(r"(?im)^#{1,6}\s+(.+?)\s*$", text):
        headings.append(match.group(1).strip())
        if len(headings) >= 6:
            return headings
    lines = text.splitlines()
    for idx in range(len(lines) - 1):
        line = lines[idx].strip()
        underline = lines[idx + 1].strip()
        if line and underline and len(underline) >= len(line) and set(underline) <= {"=", "-", "~", "^", "*"}:
            headings.append(line)
            if len(headings) >= 6:
                break
    return headings


def extract_metadata(path: Path) -> dict[str, object]:
    raw_text = path.read_text(errors="ignore")
    frontmatter, body = parse_simple_frontmatter(raw_text)
    tags = frontmatter.get("tags", [])
    if isinstance(tags, str):
        tags = [tags]
    title = str(frontmatter.get("title") or detect_title(body, path))
    date = str(frontmatter.get("date") or "")
    preview = strip_markup(body)[:320]
    headings = detect_headings(body)
    word_count = len(re.findall(r"\b[\w'-]+\b", strip_markup(body)))
    sha1 = hashlib.sha1(path.read_bytes()).hexdigest()
    return {
        "title": title,
        "date": date,
        "tags": tags,
        "headings": headings,
        "preview": preview,
        "word_count": word_count,
        "sha1": sha1,
    }


def rel_to_asketc(path: Path) -> str:
    return str(path.relative_to(ASK_ETC_ROOT))


def absolute_entry_path(entry: dict[str, object]) -> Path:
    return ASK_ETC_ROOT / Path(str(entry["path"]))


def category_sort_key(category: str) -> tuple[int, str]:
    order = {
        "primary_doctrine": 0,
        "reference": 1,
        "history_reference": 2,
        "governance_specs": 3,
        "blog_history": 4,
        "blog_philosophy": 5,
        "blog_course": 6,
    }
    return (order.get(category, 99), category)


def section_sort_key(section: str) -> tuple[int, str]:
    order = {
        "foundation": 0,
        "governance": 1,
        "history": 2,
        "philosophy": 3,
        "course": 4,
    }
    return (order.get(section, 99), section)


def build_full_entries(full_manifest: dict[str, dict[str, str]]) -> list[dict[str, object]]:
    entries: list[dict[str, object]] = []
    for rel_path in sorted(full_manifest):
        row = full_manifest[rel_path]
        if row["queryable"] != "yes":
            continue
        file_path = CORPUS_ROOT / rel_path
        if not file_path.is_file():
            continue
        meta = extract_metadata(file_path)
        entries.append(
            {
                "path": rel_to_asketc(file_path),
                "rel_path": rel_path,
                "source_path": row["source_path"],
                "category": row["category"],
                "queryable": True,
                "canon_section": "",
                "canon_reason": "",
                **meta,
            }
        )
    return entries


def build_core_canon(full_manifest: dict[str, dict[str, str]], config_rows: list[dict[str, str]]) -> list[dict[str, object]]:
    if CORE_ROOT.exists():
        shutil.rmtree(CORE_ROOT)
    CORE_ROOT.mkdir(parents=True, exist_ok=True)

    missing = []
    manifest_rows: list[dict[str, object]] = []
    entries: list[dict[str, object]] = []
    for row in config_rows:
        rel_path = row["path"]
        src = CORPUS_ROOT / rel_path
        if not src.is_file():
            missing.append(rel_path)
            continue
        dest = CORE_ROOT / rel_path
        dest.parent.mkdir(parents=True, exist_ok=True)
        shutil.copy2(src, dest)
        source_meta = full_manifest.get(rel_path, {})
        meta = extract_metadata(dest)
        manifest_rows.append(
            {
                "section": row["section"],
                "path": rel_path,
                "reason": row["reason"],
                "source_category": source_meta.get("category", ""),
                "source_path": source_meta.get("source_path", rel_path),
                "title": meta["title"],
                "date": meta["date"],
            }
        )
        entries.append(
            {
                "path": rel_to_asketc(dest),
                "rel_path": rel_path,
                "source_path": source_meta.get("source_path", rel_path),
                "category": source_meta.get("category", ""),
                "queryable": True,
                "canon_section": row["section"],
                "canon_reason": row["reason"],
                **meta,
            }
        )
    if missing:
        raise FileNotFoundError("Missing core canon paths:\n" + "\n".join(missing))

    write_tsv(
        CORE_MANIFEST_PATH,
        ["section", "path", "reason", "source_category", "source_path", "title", "date"],
        manifest_rows,
    )
    summary = {
        "generated_at": datetime.now(timezone.utc).isoformat(),
        "file_count": len(entries),
        "by_section": dict(sorted(Counter(row["section"] for row in manifest_rows).items())),
        "by_source_category": dict(
            sorted(Counter(str(row["source_category"]) for row in manifest_rows).items())
        ),
    }
    CORE_SUMMARY_PATH.write_text(json.dumps(summary, indent=2) + "\n")
    return entries


def write_index(path: Path, scope: str, root_dir: Path, entries: list[dict[str, object]]) -> None:
    path.parent.mkdir(parents=True, exist_ok=True)
    payload = {
        "generated_at": datetime.now(timezone.utc).isoformat(),
        "scope": scope,
        "root": rel_to_asketc(root_dir),
        "file_count": len(entries),
        "by_category": dict(sorted(Counter(str(entry["category"]) for entry in entries).items())),
        "by_section": dict(
            sorted(Counter(str(entry["canon_section"]) for entry in entries if entry["canon_section"]).items())
        ),
        "files": entries,
    }
    path.write_text(json.dumps(payload, indent=2) + "\n")


def rebuild(_: argparse.Namespace | None = None) -> int:
    full_manifest = load_full_manifest()
    config_rows = load_core_config()
    full_entries = build_full_entries(full_manifest)
    core_entries = build_core_canon(full_manifest, config_rows)
    write_index(FULL_INDEX_PATH, "full", CORPUS_ROOT, full_entries)
    write_index(CORE_INDEX_PATH, "core", CORE_ROOT, core_entries)
    print(f"rebuilt full index: {len(full_entries)} files")
    print(f"rebuilt core index: {len(core_entries)} files")
    return 0


def ensure_indexes() -> None:
    if FULL_INDEX_PATH.is_file() and CORE_INDEX_PATH.is_file():
        return
    rebuild()


def load_index(path: Path) -> dict[str, object]:
    return json.loads(path.read_text())


def merge_entries(scope: str) -> list[dict[str, object]]:
    ensure_indexes()
    if scope == "full":
        return list(load_index(FULL_INDEX_PATH)["files"])
    if scope == "core":
        return list(load_index(CORE_INDEX_PATH)["files"])
    merged: dict[str, dict[str, object]] = {}
    scope_order = {"core": 0, "full": 1}
    for current_scope, index_path in (("core", CORE_INDEX_PATH), ("full", FULL_INDEX_PATH)):
        for entry in load_index(index_path)["files"]:
            key = str(entry["source_path"])
            if key not in merged:
                merged[key] = dict(entry)
                merged[key]["available_in"] = [current_scope]
            else:
                merged[key]["available_in"].append(current_scope)
                if scope_order[current_scope] < scope_order[merged[key]["available_in"][0]]:
                    merged[key].update(entry)
    result = list(merged.values())
    for entry in result:
        entry["available_in"] = sorted(set(entry["available_in"]), key=lambda item: scope_order[item])
    return result


def sort_entries_for_pack(scope: str, entries: list[dict[str, object]]) -> list[dict[str, object]]:
    if scope == "core":
        return sorted(
            entries,
            key=lambda entry: (
                section_sort_key(str(entry.get("canon_section", ""))),
                str(entry.get("date", "")),
                str(entry.get("title", "")).lower(),
                str(entry.get("path", "")),
            ),
        )
    return sorted(
        entries,
        key=lambda entry: (
            category_sort_key(str(entry.get("category", ""))),
            str(entry.get("canon_section", "")),
            str(entry.get("date", "")),
            str(entry.get("title", "")).lower(),
            str(entry.get("path", "")),
        ),
    )


def scope_pack_paths(scope: str) -> tuple[Path, Path]:
    if scope == "core":
        return CORE_PACK_TXT_PATH, CORE_PACK_HTML_PATH
    return FULL_PACK_TXT_PATH, FULL_PACK_HTML_PATH


def humanize_label(value: str, fallback: str = "Ungrouped") -> str:
    clean = value.strip().replace("_", " ").replace("-", " ")
    if not clean:
        return fallback
    return " ".join(part.upper() if part.isupper() else part.capitalize() for part in clean.split())


def scope_display_name(scope: str) -> str:
    if scope == "core":
        return "Core Canon"
    return "Full Corpus"


def scope_description(scope: str) -> str:
    if scope == "core":
        return "Compact ETC doctrine pack for direct reading, prompt loading, and source tracing."
    return "Broader ETC archive across history, philosophy, governance, and course material."


def build_pack_payload(scope: str) -> dict[str, object]:
    if scope not in {"core", "full"}:
        raise ValueError(f"unsupported pack scope: {scope}")
    entries = sort_entries_for_pack(scope, merge_entries(scope))
    documents: list[dict[str, object]] = []
    total_words = 0
    total_chars = 0
    for index, entry in enumerate(entries, start=1):
        file_path = absolute_entry_path(entry)
        clean_text = clean_body_text(file_path, file_path.read_text(errors="ignore"))
        word_count = len(re.findall(r"\b[\w'-]+\b", clean_text))
        total_words += word_count
        total_chars += len(clean_text)
        documents.append(
            {
                "index": index,
                "entry": entry,
                "clean_text": clean_text,
                "word_count": word_count,
            }
        )
    return {
        "scope": scope,
        "generated_at": datetime.now(timezone.utc).isoformat(),
        "file_count": len(documents),
        "total_words": total_words,
        "total_clean_chars": total_chars,
        "rough_tokens_chars_div4": round(total_chars / 4),
        "rough_tokens_words_x1_3": round(total_words * 1.3),
        "documents": documents,
    }


def render_pack_txt(payload: dict[str, object]) -> str:
    documents = payload["documents"]
    lines = [
        "askETC LLM Context Pack",
        f"scope: {payload['scope']}",
        f"generated_at: {payload['generated_at']}",
        f"files: {payload['file_count']}",
        f"total_words: {payload['total_words']}",
        f"total_clean_chars: {payload['total_clean_chars']}",
        f"rough_tokens_chars_div4: {payload['rough_tokens_chars_div4']}",
        f"rough_tokens_words_x1_3: {payload['rough_tokens_words_x1_3']}",
        "format: cleaned text with stable per-document metadata blocks",
        "=" * 80,
    ]
    for doc in documents:
        entry = doc["entry"]
        tags = ", ".join(str(tag) for tag in entry.get("tags", []))
        headings = " | ".join(str(item) for item in entry.get("headings", []))
        lines.extend(
            [
                f"DOCUMENT {doc['index']:04d}",
                f"title: {entry.get('title', '')}",
                f"date: {entry.get('date', '')}",
                f"category: {entry.get('category', '')}",
                f"canon_section: {entry.get('canon_section', '')}",
                f"source_path: {entry.get('source_path', '')}",
                f"packed_path: {entry.get('path', '')}",
                f"tags: {tags}",
                f"headings: {headings}",
                "-" * 80,
                str(doc["clean_text"]),
                "=" * 80,
            ]
        )
    return "\n".join(lines) + "\n"


def render_pack_html(payload: dict[str, object]) -> str:
    documents = payload["documents"]
    title = f"askETC {payload['scope']} context pack"
    group_key = "canon_section" if payload["scope"] == "core" else "category"
    grouped_documents: dict[str, list[dict[str, object]]] = defaultdict(list)
    for doc in documents:
        entry = doc["entry"]
        grouped_documents[str(entry.get(group_key, "")).strip()].append(doc)

    ordered_groups = sorted(
        grouped_documents.items(),
        key=lambda item: (
            section_sort_key(item[0]) if payload["scope"] == "core" else category_sort_key(item[0]),
            humanize_label(item[0]).lower(),
        ),
    )
    total_documents = len(documents)
    summary_lines = [
        f"scope: {payload['scope']}",
        f"generated_at: {payload['generated_at']}",
        f"files: {payload['file_count']}",
        f"total_words: {payload['total_words']}",
        f"total_clean_chars: {payload['total_clean_chars']}",
        f"rough_tokens_chars_div4: {payload['rough_tokens_chars_div4']}",
        f"rough_tokens_words_x1_3: {payload['rough_tokens_words_x1_3']}",
        "format: cleaned text with stable per-document metadata blocks",
    ]
    style_block = """
  <style>
    :root {
      color-scheme: light;
      --font-sans: "Avenir Next", "Segoe UI", "Helvetica Neue", sans-serif;
      --font-serif: Charter, "Iowan Old Style", "Palatino Linotype", "Book Antiqua", Georgia, serif;
      --font-mono: "SFMono-Regular", Menlo, Monaco, Consolas, "Liberation Mono", monospace;
      --radius-xl: 28px;
      --radius-lg: 22px;
      --radius-md: 16px;
      --radius-sm: 12px;
      --space-1: 0.4rem;
      --space-2: 0.7rem;
      --space-3: 1rem;
      --space-4: 1.4rem;
      --space-5: 1.9rem;
      --space-6: 2.5rem;
      --page-width: min(1380px, calc(100vw - 28px));
      --measure: 76ch;
      --bg: #eef2eb;
      --bg-accent: #f7f4eb;
      --panel: rgba(255, 252, 246, 0.8);
      --panel-strong: rgba(255, 255, 255, 0.92);
      --panel-soft: rgba(244, 247, 241, 0.88);
      --text: #13231c;
      --muted: #567164;
      --subtle: #7b9185;
      --border: rgba(34, 96, 68, 0.14);
      --accent: #167c5d;
      --accent-2: #1580a7;
      --accent-3: #d39a24;
      --chip: rgba(20, 124, 93, 0.1);
      --shadow: 0 24px 80px rgba(27, 55, 43, 0.13);
      --shadow-soft: 0 14px 40px rgba(27, 55, 43, 0.08);
      --rainbow: linear-gradient(96deg, #1f9d76 0%, #21a9b8 32%, #64a9ff 58%, #e1a632 100%);
      --hero-grid: linear-gradient(125deg, rgba(16, 124, 93, 0.08), transparent 40%, rgba(21, 128, 167, 0.08));
      --bg-glow-a: rgba(35, 146, 108, 0.18);
      --bg-glow-b: rgba(21, 128, 167, 0.16);
      --bg-glow-c: rgba(211, 154, 36, 0.14);
    }

    @media (prefers-color-scheme: dark) {
      :root {
        color-scheme: dark;
        --bg: #07110d;
        --bg-accent: #0d1915;
        --panel: rgba(10, 19, 16, 0.78);
        --panel-strong: rgba(10, 19, 16, 0.92);
        --panel-soft: rgba(12, 25, 21, 0.84);
        --text: #ecf7f1;
        --muted: #b2ccbe;
        --subtle: #7d9d8f;
        --border: rgba(120, 212, 171, 0.16);
        --accent: #7ce2a7;
        --accent-2: #72d8ff;
        --accent-3: #f2c35e;
        --chip: rgba(124, 226, 167, 0.1);
        --shadow: 0 28px 90px rgba(0, 0, 0, 0.42);
        --shadow-soft: 0 18px 48px rgba(0, 0, 0, 0.25);
        --rainbow: linear-gradient(96deg, #7ce2a7 0%, #72d8ff 34%, #8eb7ff 62%, #f2c35e 100%);
        --hero-grid: linear-gradient(125deg, rgba(124, 226, 167, 0.09), transparent 42%, rgba(114, 216, 255, 0.09));
        --bg-glow-a: rgba(124, 226, 167, 0.18);
        --bg-glow-b: rgba(114, 216, 255, 0.18);
        --bg-glow-c: rgba(242, 195, 94, 0.1);
      }
    }

    html[data-theme="light"] {
      color-scheme: light;
      --bg: #eef2eb;
      --bg-accent: #f7f4eb;
      --panel: rgba(255, 252, 246, 0.8);
      --panel-strong: rgba(255, 255, 255, 0.92);
      --panel-soft: rgba(244, 247, 241, 0.88);
      --text: #13231c;
      --muted: #567164;
      --subtle: #7b9185;
      --border: rgba(34, 96, 68, 0.14);
      --accent: #167c5d;
      --accent-2: #1580a7;
      --accent-3: #d39a24;
      --chip: rgba(20, 124, 93, 0.1);
      --shadow: 0 24px 80px rgba(27, 55, 43, 0.13);
      --shadow-soft: 0 14px 40px rgba(27, 55, 43, 0.08);
      --rainbow: linear-gradient(96deg, #1f9d76 0%, #21a9b8 32%, #64a9ff 58%, #e1a632 100%);
      --hero-grid: linear-gradient(125deg, rgba(16, 124, 93, 0.08), transparent 40%, rgba(21, 128, 167, 0.08));
      --bg-glow-a: rgba(35, 146, 108, 0.18);
      --bg-glow-b: rgba(21, 128, 167, 0.16);
      --bg-glow-c: rgba(211, 154, 36, 0.14);
    }

    html[data-theme="dark"] {
      color-scheme: dark;
      --bg: #07110d;
      --bg-accent: #0d1915;
      --panel: rgba(10, 19, 16, 0.78);
      --panel-strong: rgba(10, 19, 16, 0.92);
      --panel-soft: rgba(12, 25, 21, 0.84);
      --text: #ecf7f1;
      --muted: #b2ccbe;
      --subtle: #7d9d8f;
      --border: rgba(120, 212, 171, 0.16);
      --accent: #7ce2a7;
      --accent-2: #72d8ff;
      --accent-3: #f2c35e;
      --chip: rgba(124, 226, 167, 0.1);
      --shadow: 0 28px 90px rgba(0, 0, 0, 0.42);
      --shadow-soft: 0 18px 48px rgba(0, 0, 0, 0.25);
      --rainbow: linear-gradient(96deg, #7ce2a7 0%, #72d8ff 34%, #8eb7ff 62%, #f2c35e 100%);
      --hero-grid: linear-gradient(125deg, rgba(124, 226, 167, 0.09), transparent 42%, rgba(114, 216, 255, 0.09));
      --bg-glow-a: rgba(124, 226, 167, 0.18);
      --bg-glow-b: rgba(114, 216, 255, 0.18);
      --bg-glow-c: rgba(242, 195, 94, 0.1);
    }

    * {
      box-sizing: border-box;
    }

    html {
      scroll-behavior: smooth;
    }

    body {
      margin: 0;
      min-height: 100vh;
      font-family: var(--font-serif);
      color: var(--text);
      background:
        radial-gradient(circle at 10% 10%, var(--bg-glow-a), transparent 28%),
        radial-gradient(circle at 86% 12%, var(--bg-glow-b), transparent 30%),
        radial-gradient(circle at 82% 82%, var(--bg-glow-c), transparent 26%),
        linear-gradient(180deg, var(--bg-accent), var(--bg));
      background-attachment: fixed;
    }

    body::before {
      content: "";
      position: fixed;
      inset: 0;
      pointer-events: none;
      background:
        linear-gradient(120deg, rgba(255, 255, 255, 0.08), transparent 36%),
        repeating-linear-gradient(90deg, transparent 0 22px, rgba(255, 255, 255, 0.035) 22px 23px);
      mask-image: linear-gradient(180deg, rgba(0, 0, 0, 0.6), transparent 78%);
      opacity: 0.6;
      z-index: 0;
    }

    ::selection {
      background: rgba(21, 128, 167, 0.22);
      color: var(--text);
    }

    a {
      color: var(--accent-2);
      text-decoration: none;
    }

    a:hover {
      text-decoration: underline;
      text-decoration-color: color-mix(in srgb, var(--accent-3) 65%, transparent);
      text-underline-offset: 0.16em;
    }

    code,
    kbd,
    samp {
      font-family: var(--font-mono);
    }

    .page {
      position: relative;
      z-index: 1;
      width: var(--page-width);
      margin: 0 auto;
      padding: clamp(16px, 2vw, 28px) 0 48px;
    }

    .hero {
      position: relative;
      overflow: hidden;
      padding: clamp(20px, 4vw, 34px);
      border: 1px solid var(--border);
      border-radius: var(--radius-xl);
      background:
        linear-gradient(180deg, color-mix(in srgb, var(--panel-strong) 88%, transparent), color-mix(in srgb, var(--panel) 92%, transparent)),
        var(--hero-grid);
      box-shadow: var(--shadow);
      backdrop-filter: blur(18px) saturate(1.2);
    }

    .hero::after {
      content: "";
      position: absolute;
      inset: auto 0 0;
      height: 2px;
      background: var(--rainbow);
      opacity: 0.9;
    }

    .hero-topbar {
      display: flex;
      align-items: flex-start;
      justify-content: space-between;
      gap: var(--space-3);
      margin-bottom: var(--space-4);
    }

    .brand {
      display: grid;
      gap: 0.55rem;
      max-width: 42rem;
    }

    .eyebrow {
      display: inline-flex;
      align-items: center;
      gap: 0.55rem;
      flex-wrap: wrap;
      font-family: var(--font-sans);
      font-size: 0.76rem;
      font-weight: 700;
      letter-spacing: 0.18em;
      text-transform: uppercase;
      color: var(--muted);
    }

    .eyebrow::before {
      content: "";
      width: 2.6rem;
      height: 1px;
      background: var(--rainbow);
      border-radius: 999px;
    }

    .brand h1 {
      margin: 0;
      font-size: clamp(2.2rem, 4.8vw, 4.6rem);
      line-height: 0.98;
      letter-spacing: -0.035em;
      text-wrap: balance;
    }

    .brand h1 span {
      display: block;
      background: var(--rainbow);
      -webkit-background-clip: text;
      background-clip: text;
      color: transparent;
    }

    .lede {
      margin: 0;
      max-width: 66ch;
      font-size: clamp(1.03rem, 1.5vw, 1.2rem);
      line-height: 1.7;
      color: var(--muted);
      text-wrap: pretty;
    }

    .hero-actions {
      display: flex;
      flex-direction: column;
      align-items: flex-end;
      gap: 0.8rem;
      min-width: 11rem;
    }

    .theme-toggle {
      display: inline-flex;
      align-items: center;
      gap: 0.7rem;
      padding: 0.78rem 1rem;
      border: 1px solid var(--border);
      border-radius: 999px;
      background: color-mix(in srgb, var(--panel-soft) 84%, transparent);
      color: var(--text);
      font: 600 0.92rem/1 var(--font-sans);
      box-shadow: var(--shadow-soft);
      cursor: pointer;
      transition: transform 180ms ease, border-color 180ms ease, background 180ms ease;
    }

    .theme-toggle:hover {
      transform: translateY(-1px);
      border-color: color-mix(in srgb, var(--accent-2) 36%, var(--border));
    }

    .theme-toggle__swatch {
      width: 0.85rem;
      height: 0.85rem;
      border-radius: 999px;
      background: var(--rainbow);
      box-shadow: 0 0 0 1px rgba(255, 255, 255, 0.16);
      flex: 0 0 auto;
    }

    .summary-grid,
    .stats-grid {
      display: grid;
      gap: 0.95rem;
    }

    .stats-grid {
      grid-template-columns: repeat(4, minmax(0, 1fr));
      margin-top: var(--space-5);
    }

    .stat-card,
    .summary-panel,
    .nav-panel,
    .document {
      border: 1px solid var(--border);
      border-radius: var(--radius-lg);
      background: color-mix(in srgb, var(--panel) 92%, transparent);
      box-shadow: var(--shadow-soft);
      backdrop-filter: blur(16px) saturate(1.15);
    }

    .stat-card {
      padding: 1rem 1.05rem;
    }

    .stat-label {
      margin: 0 0 0.45rem;
      font: 700 0.72rem/1.3 var(--font-sans);
      letter-spacing: 0.14em;
      text-transform: uppercase;
      color: var(--subtle);
    }

    .stat-value {
      margin: 0;
      font-size: clamp(1.1rem, 2vw, 1.55rem);
      font-weight: 700;
      letter-spacing: -0.02em;
      line-height: 1.15;
    }

    .stat-note {
      margin: 0.35rem 0 0;
      color: var(--muted);
      font-size: 0.9rem;
      line-height: 1.45;
    }

    .layout {
      display: grid;
      gap: var(--space-5);
      grid-template-columns: minmax(0, 1fr);
      margin-top: var(--space-5);
    }

    .rail {
      display: grid;
      gap: var(--space-4);
      align-self: start;
    }

    .rail-inner {
      display: grid;
      gap: var(--space-4);
    }

    .summary-panel,
    .nav-panel {
      padding: 1.1rem;
    }

    .panel-title {
      margin: 0 0 0.75rem;
      font: 700 0.88rem/1.3 var(--font-sans);
      letter-spacing: 0.14em;
      text-transform: uppercase;
      color: var(--subtle);
    }

    .summary-block {
      margin: 0;
      white-space: pre-wrap;
      word-break: break-word;
      font-family: var(--font-mono);
      font-size: 0.82rem;
      line-height: 1.6;
      color: var(--muted);
    }

    .filter-stack {
      display: grid;
      gap: 0.8rem;
    }

    .filter-label {
      display: grid;
      gap: 0.45rem;
      font: 600 0.94rem/1.4 var(--font-sans);
      color: var(--text);
    }

    .filter-input {
      width: 100%;
      padding: 0.85rem 0.95rem;
      border: 1px solid var(--border);
      border-radius: var(--radius-sm);
      background: color-mix(in srgb, var(--panel-strong) 82%, transparent);
      color: var(--text);
      font: 500 0.98rem/1.3 var(--font-sans);
      outline: none;
    }

    .filter-input:focus {
      border-color: color-mix(in srgb, var(--accent-2) 45%, var(--border));
      box-shadow: 0 0 0 3px color-mix(in srgb, var(--accent-2) 16%, transparent);
    }

    .filter-status {
      margin: 0;
      color: var(--muted);
      font-size: 0.93rem;
      line-height: 1.45;
    }

    .index-groups {
      display: grid;
      gap: 0.9rem;
    }

    .index-group {
      border: 1px solid color-mix(in srgb, var(--border) 88%, transparent);
      border-radius: var(--radius-md);
      background: color-mix(in srgb, var(--panel-soft) 80%, transparent);
      overflow: hidden;
    }

    .index-group summary {
      display: flex;
      align-items: center;
      justify-content: space-between;
      gap: 1rem;
      padding: 0.9rem 1rem;
      cursor: pointer;
      list-style: none;
      font: 700 0.93rem/1.35 var(--font-sans);
      color: var(--text);
    }

    .index-group summary::-webkit-details-marker {
      display: none;
    }

    .index-count {
      padding: 0.25rem 0.55rem;
      border-radius: 999px;
      background: var(--chip);
      color: var(--accent);
      font-size: 0.8rem;
    }

    .doc-index-list {
      margin: 0;
      padding: 0 0.7rem 0.8rem;
      list-style: none;
      display: grid;
      gap: 0.3rem;
    }

    .doc-index-item[hidden] {
      display: none;
    }

    .doc-index-link {
      display: grid;
      gap: 0.18rem;
      padding: 0.75rem 0.7rem;
      border-radius: 12px;
      transition: background 160ms ease, transform 160ms ease;
    }

    .doc-index-link:hover {
      background: color-mix(in srgb, var(--chip) 88%, transparent);
      transform: translateX(2px);
      text-decoration: none;
    }

    .doc-index-kicker {
      color: var(--subtle);
      font: 700 0.72rem/1.3 var(--font-sans);
      letter-spacing: 0.08em;
      text-transform: uppercase;
    }

    .doc-index-title {
      color: var(--text);
      font: 600 0.96rem/1.4 var(--font-sans);
    }

    .doc-index-meta {
      color: var(--muted);
      font: 500 0.8rem/1.4 var(--font-sans);
      word-break: break-word;
    }

    .documents {
      display: grid;
      gap: var(--space-4);
      align-content: start;
    }

    .document {
      padding: clamp(1rem, 2vw, 1.5rem);
      scroll-margin-top: 1.25rem;
    }

    .document[hidden] {
      display: none;
    }

    .document-top {
      display: flex;
      align-items: flex-start;
      justify-content: space-between;
      gap: 1rem;
    }

    .document-kicker {
      margin: 0 0 0.75rem;
      font: 700 0.75rem/1.3 var(--font-sans);
      letter-spacing: 0.16em;
      text-transform: uppercase;
      color: var(--subtle);
    }

    .document-anchor {
      display: inline-flex;
      align-items: center;
      justify-content: center;
      min-width: 2.3rem;
      height: 2.3rem;
      padding: 0 0.7rem;
      border: 1px solid var(--border);
      border-radius: 999px;
      background: color-mix(in srgb, var(--panel-soft) 84%, transparent);
      color: var(--muted);
      font: 700 0.88rem/1 var(--font-sans);
      flex: 0 0 auto;
    }

    .document-anchor:hover {
      color: var(--accent-2);
      text-decoration: none;
    }

    .document h2 {
      margin: 0;
      font-size: clamp(1.45rem, 2.3vw, 2.2rem);
      line-height: 1.08;
      letter-spacing: -0.02em;
      text-wrap: balance;
    }

    .document-source {
      margin: 0.8rem 0 0;
      font: 500 0.88rem/1.55 var(--font-mono);
      color: var(--muted);
      word-break: break-word;
    }

    .chip-row {
      display: flex;
      flex-wrap: wrap;
      gap: 0.55rem;
      margin: 1rem 0 0;
    }

    .chip {
      display: inline-flex;
      align-items: center;
      gap: 0.35rem;
      padding: 0.35rem 0.65rem;
      border-radius: 999px;
      background: var(--chip);
      color: var(--accent);
      font: 700 0.78rem/1.2 var(--font-sans);
      letter-spacing: 0.03em;
    }

    .meta-details {
      margin-top: 1rem;
      border: 1px solid color-mix(in srgb, var(--border) 90%, transparent);
      border-radius: var(--radius-md);
      background: color-mix(in srgb, var(--panel-soft) 84%, transparent);
      overflow: hidden;
    }

    .meta-details summary {
      cursor: pointer;
      padding: 0.9rem 1rem;
      font: 700 0.92rem/1.4 var(--font-sans);
      color: var(--text);
      list-style: none;
    }

    .meta-details summary::-webkit-details-marker {
      display: none;
    }

    .meta-grid {
      margin: 0;
      padding: 0 1rem 1rem;
      display: grid;
      grid-template-columns: minmax(0, 12rem) minmax(0, 1fr);
      gap: 0.5rem 1rem;
      font-size: 0.88rem;
      line-height: 1.55;
    }

    .meta-grid dt {
      margin: 0;
      color: var(--subtle);
      font: 700 0.78rem/1.5 var(--font-sans);
      letter-spacing: 0.08em;
      text-transform: uppercase;
    }

    .meta-grid dd {
      margin: 0;
      color: var(--muted);
      font-family: var(--font-mono);
      word-break: break-word;
    }

    .document-body {
      margin: 1.2rem 0 0;
      max-width: var(--measure);
      white-space: pre-wrap;
      word-break: break-word;
      overflow-wrap: anywhere;
      font-family: var(--font-serif);
      font-size: clamp(1rem, 1.2vw, 1.12rem);
      line-height: 1.78;
      color: var(--text);
      tab-size: 2;
    }

    .empty-state {
      margin: 0;
      padding: 1.2rem 1.25rem;
      border: 1px dashed color-mix(in srgb, var(--border) 95%, transparent);
      border-radius: var(--radius-md);
      background: color-mix(in srgb, var(--panel-soft) 74%, transparent);
      color: var(--muted);
      font: 600 0.95rem/1.5 var(--font-sans);
    }

    .footer-note {
      margin-top: var(--space-5);
      padding: 0 0.2rem;
      color: var(--subtle);
      font: 600 0.86rem/1.5 var(--font-sans);
      text-align: center;
    }

    @media (min-width: 1080px) {
      .layout {
        grid-template-columns: minmax(300px, 360px) minmax(0, 1fr);
      }

      .rail-inner {
        position: sticky;
        top: 18px;
      }
    }

    @media (max-width: 960px) {
      .stats-grid {
        grid-template-columns: repeat(2, minmax(0, 1fr));
      }
    }

    @media (max-width: 720px) {
      .page {
        width: min(100vw - 18px, 100%);
        padding-top: 10px;
      }

      .hero-topbar,
      .document-top {
        flex-direction: column;
      }

      .hero-actions {
        align-items: stretch;
        width: 100%;
      }

      .theme-toggle {
        justify-content: center;
      }

      .stats-grid,
      .meta-grid {
        grid-template-columns: minmax(0, 1fr);
      }

      .document-anchor {
        min-width: 0;
      }
    }
  </style>
"""
    script_block = """
  <script>
    (() => {
      const storageKey = "asketc-theme";
      const root = document.documentElement;
      const toggle = document.querySelector("[data-theme-toggle]");
      const filterInput = document.querySelector("[data-doc-filter]");
      const filterStatus = document.querySelector("[data-filter-status]");
      const emptyState = document.querySelector("[data-empty-state]");
      const articles = Array.from(document.querySelectorAll("[data-document]"));
      const indexItems = Array.from(document.querySelectorAll("[data-index-item]"));
      const groups = Array.from(document.querySelectorAll("[data-index-group]"));
      const media = window.matchMedia("(prefers-color-scheme: dark)");

      const readStoredTheme = () => {
        try {
          const value = window.localStorage.getItem(storageKey);
          return value === "light" || value === "dark" ? value : "";
        } catch (error) {
          return "";
        }
      };

      const writeStoredTheme = (value) => {
        try {
          window.localStorage.setItem(storageKey, value);
        } catch (error) {
          // Ignore storage failures in restrictive contexts.
        }
      };

      const activeTheme = () => root.dataset.theme || (media.matches ? "dark" : "light");

      const syncToggle = () => {
        if (!toggle) {
          return;
        }
        const current = activeTheme();
        const next = current === "dark" ? "light" : "dark";
        toggle.setAttribute("aria-label", `Switch to ${next} mode`);
        toggle.querySelector("[data-theme-label]").textContent = `${current} mode`;
      };

      const applyStoredTheme = () => {
        const stored = readStoredTheme();
        if (stored) {
          root.dataset.theme = stored;
        }
        syncToggle();
      };

      const updateFilter = () => {
        if (!filterInput || !filterStatus) {
          return;
        }
        const query = filterInput.value.trim().toLowerCase();
        const terms = query ? query.split(/\\s+/).filter(Boolean) : [];
        const visibleIds = new Set();
        let visibleCount = 0;

        articles.forEach((article) => {
          const haystack = article.dataset.search || "";
          const matches = !terms.length || terms.every((term) => haystack.includes(term));
          article.hidden = !matches;
          if (matches) {
            visibleCount += 1;
            visibleIds.add(article.id);
          }
        });

        indexItems.forEach((item) => {
          const matches = visibleIds.has(item.dataset.target || "");
          item.hidden = !matches;
        });

        groups.forEach((group) => {
          const anyVisible = Array.from(group.querySelectorAll("[data-index-item]")).some((item) => !item.hidden);
          group.hidden = !anyVisible;
          if (terms.length) {
            group.open = anyVisible;
          }
        });

        filterStatus.textContent = terms.length
          ? `Showing ${visibleCount} of ${articles.length} documents for "${query}".`
          : `Showing all ${articles.length} documents.`;
        if (emptyState) {
          emptyState.hidden = visibleCount !== 0;
        }
      };

      if (toggle) {
        toggle.addEventListener("click", () => {
          const next = activeTheme() === "dark" ? "light" : "dark";
          root.dataset.theme = next;
          writeStoredTheme(next);
          syncToggle();
        });
      }

      if (typeof media.addEventListener === "function") {
        media.addEventListener("change", () => {
          if (!readStoredTheme()) {
            syncToggle();
          }
        });
      } else if (typeof media.addListener === "function") {
        media.addListener(() => {
          if (!readStoredTheme()) {
            syncToggle();
          }
        });
      }

      if (filterInput) {
        filterInput.addEventListener("input", updateFilter);
      }

      applyStoredTheme();
      updateFilter();
    })();
  </script>
"""
    parts = [
        "<!DOCTYPE html>",
        "<html lang=\"en\">",
        "<head>",
        "  <meta charset=\"utf-8\">",
        f"  <title>{html.escape(title)}</title>",
        "  <meta name=\"viewport\" content=\"width=device-width, initial-scale=1\">",
        f"  <meta name=\"description\" content=\"{html.escape(scope_description(str(payload['scope'])))}\">",
        style_block,
        "</head>",
        "<body>",
        "<main id=\"top\" class=\"page\">",
        "  <header class=\"hero\">",
        "    <div class=\"hero-topbar\">",
        "      <div class=\"brand\">",
        "        <div class=\"eyebrow\">askETC</div>",
        f"        <h1>{html.escape(scope_display_name(str(payload['scope'])))}<span>{html.escape(title)}</span></h1>",
        f"        <p class=\"lede\">{html.escape(scope_description(str(payload['scope'])))}</p>",
        "      </div>",
        "      <div class=\"hero-actions\">",
        "        <button type=\"button\" class=\"theme-toggle\" data-theme-toggle>",
        "          <span class=\"theme-toggle__swatch\" aria-hidden=\"true\"></span>",
        "          <span data-theme-label>theme</span>",
        "        </button>",
        "      </div>",
        "    </div>",
        "    <section class=\"stats-grid\" aria-label=\"Pack statistics\">",
        "      <article class=\"stat-card\">",
        "        <p class=\"stat-label\">Scope</p>",
        f"        <p class=\"stat-value\">{html.escape(scope_display_name(str(payload['scope'])))}</p>",
        "        <p class=\"stat-note\">Canonical doctrine pack or full ETC archive, rendered for long-form reading.</p>",
        "      </article>",
        "      <article class=\"stat-card\">",
        "        <p class=\"stat-label\">Documents</p>",
        f"        <p class=\"stat-value\">{payload['file_count']:,}</p>",
        "        <p class=\"stat-note\">Every document is anchor-linked, searchable by metadata, and wrapped for mobile reading.</p>",
        "      </article>",
        "      <article class=\"stat-card\">",
        "        <p class=\"stat-label\">Words</p>",
        f"        <p class=\"stat-value\">{payload['total_words']:,}</p>",
        f"        <p class=\"stat-note\">Approx. {payload['rough_tokens_words_x1_3']:,} tokens by word estimate.</p>",
        "      </article>",
        "      <article class=\"stat-card\">",
        "        <p class=\"stat-label\">Generated</p>",
        f"        <p class=\"stat-value\">{html.escape(str(payload['generated_at']))}</p>",
        f"        <p class=\"stat-note\">{len(ordered_groups)} grouped reading lanes for faster ETC navigation.</p>",
        "      </article>",
        "    </section>",
        "  </header>",
        "  <section class=\"layout\">",
        "    <aside class=\"rail\" aria-label=\"Document navigation\">",
        "      <div class=\"rail-inner\">",
        "        <section class=\"summary-panel\">",
        "          <h2 class=\"panel-title\">Pack manifest</h2>",
        "          <pre class=\"summary-block\">",
        html.escape("\n".join(summary_lines)),
        "          </pre>",
        "        </section>",
        "        <nav id=\"document-index\" class=\"nav-panel\" aria-label=\"Document index\">",
        "          <h2 class=\"panel-title\">Document index</h2>",
        "          <div class=\"filter-stack\">",
        "            <label class=\"filter-label\" for=\"doc-filter\">Filter titles, paths, headings, and tags</label>",
        "            <input id=\"doc-filter\" class=\"filter-input\" type=\"search\" placeholder=\"Search ETC topics\" autocomplete=\"off\" data-doc-filter>",
        "            <p class=\"filter-status\" data-filter-status></p>",
        "          </div>",
        "          <div class=\"index-groups\">",
    ]
    for group_index, (group_name, docs_in_group) in enumerate(ordered_groups):
        group_label = humanize_label(group_name)
        parts.extend(
            [
                f"            <details class=\"index-group\" data-index-group {'open' if payload['scope'] == 'core' or group_index == 0 else ''}>",
                "              <summary>",
                f"                <span>{html.escape(group_label)}</span>",
                f"                <span class=\"index-count\">{len(docs_in_group)}</span>",
                "              </summary>",
                "              <ol class=\"doc-index-list\">",
            ]
        )
        for doc in docs_in_group:
            entry = doc["entry"]
            meta_bits = []
            category = str(entry.get("category", "")).strip()
            section = str(entry.get("canon_section", "")).strip()
            date = str(entry.get("date", "")).strip()
            if category and category != group_name:
                meta_bits.append(humanize_label(category))
            if section and section != group_name:
                meta_bits.append(humanize_label(section))
            if date:
                meta_bits.append(date)
            meta_bits.append(str(entry.get("path", "")))
            parts.extend(
                [
                    f"                <li class=\"doc-index-item\" data-index-item data-target=\"document-{doc['index']:04d}\">",
                    f"                  <a class=\"doc-index-link\" href=\"#document-{doc['index']:04d}\">",
                    f"                    <span class=\"doc-index-kicker\">Document {doc['index']:04d}</span>",
                    f"                    <span class=\"doc-index-title\">{html.escape(str(entry.get('title', '')))}</span>",
                    f"                    <span class=\"doc-index-meta\">{html.escape(' | '.join(bit for bit in meta_bits if bit))}</span>",
                    "                  </a>",
                    "                </li>",
                ]
            )
        parts.extend(["              </ol>", "            </details>"])
    parts.extend(
        [
            "          </div>",
            "        </nav>",
            "      </div>",
            "    </aside>",
            "    <section id=\"documents\" class=\"documents\" aria-label=\"Packed documents\">",
            "      <p class=\"empty-state\" data-empty-state hidden>No documents match the current filter.</p>",
        ]
    )
    for doc in documents:
        entry = doc["entry"]
        tags = ", ".join(str(tag) for tag in entry.get("tags", []))
        headings = " | ".join(str(item) for item in entry.get("headings", []))
        search_terms = " ".join(
            item
            for item in [
                str(entry.get("title", "")),
                str(entry.get("date", "")),
                str(entry.get("category", "")),
                str(entry.get("canon_section", "")),
                str(entry.get("source_path", "")),
                str(entry.get("path", "")),
                tags,
                headings,
            ]
            if item
        ).lower()
        metadata_rows = [
            ("date", str(entry.get("date", "")).strip()),
            ("category", str(entry.get("category", "")).strip()),
            ("canon_section", str(entry.get("canon_section", "")).strip()),
            ("source_path", str(entry.get("source_path", "")).strip()),
            ("packed_path", str(entry.get("path", "")).strip()),
            ("tags", tags.strip()),
            ("headings", headings.strip()),
            ("word_count", f"{doc['word_count']:,}"),
        ]
        chips = []
        if entry.get("canon_section"):
            chips.append(humanize_label(str(entry["canon_section"])))
        if entry.get("category"):
            chips.append(humanize_label(str(entry["category"])))
        if entry.get("date"):
            chips.append(str(entry["date"]))
        chips.append(f"{doc['word_count']:,} words")
        parts.extend(
            [
                f"      <article id=\"document-{doc['index']:04d}\" class=\"document\" data-document data-search=\"{html.escape(search_terms, quote=True)}\">",
                "        <div class=\"document-top\">",
                "          <div>",
                f"            <p class=\"document-kicker\">Document {doc['index']:04d}</p>",
                f"            <h2>{html.escape(str(entry.get('title', '')))}</h2>",
                f"            <p class=\"document-source\">{html.escape(str(entry.get('source_path', '')))}</p>",
                "          </div>",
                f"          <a class=\"document-anchor\" href=\"#document-{doc['index']:04d}\" aria-label=\"Link to document {doc['index']:04d}\">#{doc['index']:04d}</a>",
                "        </div>",
                "        <div class=\"chip-row\">",
            ]
        )
        for chip in chips:
            parts.append(f"          <span class=\"chip\">{html.escape(chip)}</span>")
        parts.extend(
            [
                "        </div>",
                "        <details class=\"meta-details\">",
                "          <summary>Open metadata</summary>",
                "          <dl class=\"meta-grid\">",
            ]
        )
        for key, value in metadata_rows:
            if not value:
                continue
            parts.extend(
                [
                    f"            <dt>{html.escape(key)}</dt>",
                    f"            <dd>{html.escape(value)}</dd>",
                ]
            )
        parts.extend(
            [
                "          </dl>",
                "        </details>",
                f"        <pre class=\"document-body\">{html.escape(str(doc['clean_text']))}</pre>",
                "      </article>",
            ]
        )
    parts.extend(
        [
            "    </section>",
            "  </section>",
            "  <p class=\"footer-note\">Generated from the local askETC corpus.</p>",
            script_block,
            "</main>",
            "</body>",
            "</html>",
        ]
    )
    return "\n".join(parts) + "\n"


def pack(args: argparse.Namespace) -> int:
    scopes = ["core", "full"] if args.scope == "all" else [args.scope]
    PACKED_DIR.mkdir(parents=True, exist_ok=True)
    summary: dict[str, object] = {}
    for scope in scopes:
        payload = build_pack_payload(scope)
        txt_path, html_path = scope_pack_paths(scope)
        txt_output = render_pack_txt(payload)
        html_output = render_pack_html(payload)
        txt_path.write_text(txt_output)
        html_path.write_text(html_output)
        summary[scope] = {
            "generated_at": payload["generated_at"],
            "file_count": payload["file_count"],
            "total_words": payload["total_words"],
            "total_clean_chars": payload["total_clean_chars"],
            "rough_tokens_chars_div4": payload["rough_tokens_chars_div4"],
            "rough_tokens_words_x1_3": payload["rough_tokens_words_x1_3"],
            "txt_path": rel_to_asketc(txt_path),
            "html_path": rel_to_asketc(html_path),
        }
        print(f"packed {scope}: {payload['file_count']} files -> {txt_path.name}, {html_path.name}")
    if summary:
        PACKED_SUMMARY_PATH.write_text(json.dumps(summary, indent=2) + "\n")
    return 0


def compile_pattern(query: str, regex: bool, case_sensitive: bool) -> re.Pattern[str]:
    flags = 0 if case_sensitive else re.I
    source = query if regex else re.escape(query)
    return re.compile(source, flags)


def filter_entries(
    entries: list[dict[str, object]],
    category: str | None,
    section: str | None,
    path_filter: str | None,
) -> list[dict[str, object]]:
    filtered = []
    for entry in entries:
        if category and str(entry["category"]) != category:
            continue
        if section and str(entry.get("canon_section", "")) != section:
            continue
        if path_filter and path_filter not in str(entry["path"]):
            continue
        filtered.append(entry)
    return filtered


def search(args: argparse.Namespace) -> int:
    entries = merge_entries(args.scope)
    entries = filter_entries(entries, args.category, args.section, args.path_filter)
    pattern = compile_pattern(args.query, args.regex, args.case_sensitive)
    results = []
    for entry in entries:
        file_path = absolute_entry_path(entry)
        lines = file_path.read_text(errors="ignore").splitlines()
        matches = []
        for idx, line in enumerate(lines, start=1):
            if pattern.search(line):
                matches.append((idx, line.strip()))
                if len(matches) >= args.matches_per_file:
                    break
        if matches:
            results.append((entry, matches))
    results.sort(
        key=lambda item: (
            0 if "core" in item[0].get("available_in", [args.scope]) else 1,
            -len(item[1]),
            str(item[0].get("date", "")),
            str(item[0]["title"]).lower(),
        )
    )
    if not results:
        print("no matches")
        return 1
    for idx, (entry, matches) in enumerate(results[: args.limit], start=1):
        scopes = ",".join(entry.get("available_in", [args.scope]))
        date = f" {entry['date']}" if entry.get("date") else ""
        section = f" section={entry['canon_section']}" if entry.get("canon_section") else ""
        category = f" category={entry['category']}" if entry.get("category") else ""
        print(f"[{idx}] scope={scopes}{section}{category}{date}")
        print(f"title: {entry['title']}")
        print(absolute_entry_path(entry))
        for line_no, line in matches:
            print(f"  L{line_no}: {line}")
        print()
    return 0


def list_files(args: argparse.Namespace) -> int:
    entries = merge_entries(args.scope)
    entries = filter_entries(entries, args.category, args.section, args.path_filter)
    entries.sort(key=lambda entry: (str(entry.get("date", "")), str(entry["title"]).lower()))
    for idx, entry in enumerate(entries[: args.limit], start=1):
        scopes = ",".join(entry.get("available_in", [args.scope]))
        date = f" {entry['date']}" if entry.get("date") else ""
        section = f" section={entry['canon_section']}" if entry.get("canon_section") else ""
        category = f" category={entry['category']}" if entry.get("category") else ""
        print(f"[{idx}] scope={scopes}{section}{category}{date}")
        print(f"title: {entry['title']}")
        print(absolute_entry_path(entry))
        print()
    return 0


def stats(_: argparse.Namespace) -> int:
    ensure_indexes()
    for label, index_path in (("full", FULL_INDEX_PATH), ("core", CORE_INDEX_PATH)):
        payload = load_index(index_path)
        print(f"{label}: {payload['file_count']} files")
        categories = payload.get("by_category", {})
        if categories:
            print("  categories:")
            for name, count in categories.items():
                print(f"    {name}: {count}")
        sections = payload.get("by_section", {})
        if sections:
            print("  sections:")
            for name, count in sections.items():
                print(f"    {name}: {count}")
        print()
    if PACKED_SUMMARY_PATH.is_file():
        packed = json.loads(PACKED_SUMMARY_PATH.read_text())
        for label in ("full", "core"):
            if label not in packed:
                continue
            info = packed[label]
            print(f"packed {label}:")
            print(f"  txt: {info['txt_path']}")
            print(f"  html: {info['html_path']}")
            print(f"  total_clean_chars: {info['total_clean_chars']}")
            print(f"  rough_tokens_chars_div4: {info['rough_tokens_chars_div4']}")
            print()
    return 0


def build_parser() -> argparse.ArgumentParser:
    parser = argparse.ArgumentParser(description="Query the local askETC corpus.")
    subparsers = parser.add_subparsers(dest="command", required=True)

    rebuild_parser = subparsers.add_parser("rebuild", help="Rebuild the core canon mirror and both indexes.")
    rebuild_parser.set_defaults(func=rebuild)

    pack_parser = subparsers.add_parser("pack", help="Generate LLM-ready TXT and HTML context packs.")
    pack_parser.add_argument("--scope", choices=["core", "full", "all"], default="all")
    pack_parser.set_defaults(func=pack)

    search_parser = subparsers.add_parser("search", help="Search the indexed corpus or core canon.")
    search_parser.add_argument("query", help="Literal string or regex to search for.")
    search_parser.add_argument("--scope", choices=["core", "full", "both"], default="core")
    search_parser.add_argument("--regex", action="store_true", help="Interpret the query as a regular expression.")
    search_parser.add_argument("--case-sensitive", action="store_true")
    search_parser.add_argument("--category", help="Filter by source category.")
    search_parser.add_argument("--section", help="Filter by core canon section.")
    search_parser.add_argument("--path-filter", help="Only search files whose path contains this substring.")
    search_parser.add_argument("--limit", type=int, default=10)
    search_parser.add_argument("--matches-per-file", type=int, default=3)
    search_parser.set_defaults(func=search)

    files_parser = subparsers.add_parser("files", help="List indexed files for a scope.")
    files_parser.add_argument("--scope", choices=["core", "full", "both"], default="core")
    files_parser.add_argument("--category", help="Filter by source category.")
    files_parser.add_argument("--section", help="Filter by core canon section.")
    files_parser.add_argument("--path-filter", help="Only include files whose path contains this substring.")
    files_parser.add_argument("--limit", type=int, default=50)
    files_parser.set_defaults(func=list_files)

    stats_parser = subparsers.add_parser("stats", help="Show index counts for the full corpus and core canon.")
    stats_parser.set_defaults(func=stats)

    return parser


def main() -> int:
    parser = build_parser()
    args = parser.parse_args()
    try:
        return int(args.func(args))
    except FileNotFoundError as exc:
        return die(str(exc))


if __name__ == "__main__":
    raise SystemExit(main())
