"""Pipeline subprocess runner — captures and parses stdout/stderr into structured events."""
from __future__ import annotations

import ast
import datetime
import json
import queue
import re
import shutil
import subprocess
import threading
from pathlib import Path
from typing import Optional

DPG_ROOT  = Path(__file__).resolve().parent.parent
LOGS_DIR  = DPG_ROOT / "runs" / "logs"

# ─── Regex patterns ────────────────────────────────────────────────────────────

# Log lines: "HH:MM:SS | logger.name | LEVEL | message"
_LOG_PREFIX_RE = re.compile(r"^\d{2}:\d{2}:\d{2} \| [\w.]+ \| \w+ \| (.+)$")

# Orchestrator step number prints: "[01] / "[02c]" / "[07]" …
_STEP_NUM_RE = re.compile(r"^\[(\d{2}[a-z]?)\] (.+)$")

# Run ID: "[run_id] 20260506_143045"
_RUN_ID_RE = re.compile(r"^\[run_id\] (\S+)$")

# Project done: "[done] Project root: /path"
_DONE_RE = re.compile(r"^\[done\] Project root: (.+)$")

# Run summary: "       Total time: 1234.56s  |  LLM calls: 17  |  Tokens: in=364,064 out=129,352"
_SUMMARY_RE = re.compile(
    r"Total time:\s*([\d,.]+)s\s*\|\s*LLM calls:\s*(\d+)\s*\|\s*Tokens: in=([\d,]+) out=([\d,]+)"
)

# LLM call completed (from logging): "[LLM] step=X | model=Y | attempt=1/1 | in=N | out=M | total=P tok | Qms"
# Step names may contain spaces ("IR generation", "React generation (page)") so use lazy .+?
_LLM_RE = re.compile(
    r"\[LLM\] step=(.+?) \| model=(\S+) \| attempt=(\d+)/(\d+)"
    r"(?:\s*\|\s*page=(\S+))?"
    r"\s*\|\s*in=([\d,]+)\s*\|\s*out=([\d,]+)\s*\|\s*total=([\d,]+) tok\s*\|\s*([\d,]+)ms"
)

# LLM call starting: "     *  claude-sonnet-4-5  home_page calling..."
_LLM_CALLING_RE = re.compile(r"^\s+\*\s+(\S+)\s+(.*?)\s+calling\.\.\.")

# Detail lines from orchestrator (indented with spaces)
_DETAIL_RE = re.compile(r"^     (.+)$")

# Backend info: "     system='WMS'  modules=9  endpoints=139"
_BACKEND_INFO_RE = re.compile(r"system='([^']+)'\s+modules=(\d+)\s+endpoints=(\d+)")

# Pages list: "     pages=['home_page', 'dashboard', ...]"
_PAGES_RE = re.compile(r"pages=(\[.*?\])")

# DDL artifact in detail line
_DDL_RE = re.compile(r"DDL=(\S+)")

# PRD artifact in detail line
_PRD_RE = re.compile(r"(full_prd\.md|prd\.md)")

# Module names from backend: each module is logged separately
_MODULE_RE = re.compile(r"Generating module[:\s]+(\w+)", re.IGNORECASE)

# Sub-step progress prints from pipeline internals: "  ›  message"
_SUBSTEP_RE = re.compile(r"^\s+›\s+(.+)$")

# Step-02c: mantara + dalfin completion
_MANTARA_DONE_RE = re.compile(r"mantara_schema\.json \+ dalfin\.json")

# Step-07: DDL → database result
_DDL_DB_OK_RE   = re.compile(r"DDL deployed to database")
_DDL_DB_FAIL_RE = re.compile(r"DDL deployment (failed|skipped)|Step 07 skipped|Mantara/Dalfin skipped")

# management-tenant
_TENANT_RE        = re.compile(r"^\[tenant\] (.+)$")
_TENANT_RESULT_RE = re.compile(r"Result: (\w+)")


def _parse_int(s: str) -> int:
    return int(s.replace(",", ""))


class PipelineRunner:
    def __init__(self) -> None:
        self.events: queue.Queue = queue.Queue()
        self._proc: Optional[subprocess.Popen] = None
        self._thread: Optional[threading.Thread] = None
        self._upload_tmp: Optional[Path] = None
        self.is_running = False
        self.return_code: Optional[int] = None
        self._log_file: Optional[object] = None   # open file handle
        self._log_path: Optional[Path] = None     # current path (may be renamed)
        self._run_id: Optional[str] = None

    # ── Public API ─────────────────────────────────────────────────────────────

    @property
    def log_path(self) -> Optional[Path]:
        return self._log_path

    def start(self, prompt: str, file_path: Optional[Path] = None, domain: str = "", domain_url: str = "", db_info: Optional[dict] = None, run_id: Optional[str] = None) -> None:
        if self.is_running:
            return

        cmd = ["uv", "run", "python", "main.py", "generate"]
        if domain and domain.strip():
            cmd += ["--domain", domain.strip()]
        if domain_url and domain_url.strip():
            cmd += ["--domain-url", domain_url.strip()]
        if db_info:
            cmd += ["--db-info", json.dumps(db_info)]
        if prompt and prompt.strip():
            cmd += ["--prompt", prompt.strip()]
        if file_path:
            cmd += ["--file", str(file_path)]
        if run_id:
            cmd += ["--run-id", run_id]

        import os
        env = os.environ.copy()
        env.pop("VIRTUAL_ENV", None)  # prevent uv from seeing a stale venv path
        env["PYTHONUNBUFFERED"] = "1"  # force line-by-line output; without this logs only appear after the buffer fills

        self._proc = subprocess.Popen(
            cmd,
            cwd=str(DPG_ROOT),
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT,
            text=True,
            bufsize=1,
            env=env,
        )
        self.is_running = True
        self.return_code = None

        # Open a temporary log file; renamed once we learn the run_id.
        LOGS_DIR.mkdir(parents=True, exist_ok=True)
        ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
        self._log_path = LOGS_DIR / f"tmp_{ts}.log"
        self._log_file = self._log_path.open("w", encoding="utf-8", buffering=1)
        self._log_file.write(f"# DPG pipeline run — started {datetime.datetime.now().isoformat()}\n")
        self._log_file.write(f"# cmd: {' '.join(cmd)}\n\n")

        self._thread = threading.Thread(target=self._read_output, daemon=True)
        self._thread.start()

    def drain(self) -> list[dict]:
        """Return all pending events (non-blocking)."""
        events: list[dict] = []
        try:
            while True:
                events.append(self.events.get_nowait())
        except queue.Empty:
            pass
        return events

    def terminate(self) -> None:
        if self._proc and self.is_running:
            self._proc.terminate()
        self.is_running = False
        self._close_log()
        if self._upload_tmp:
            shutil.rmtree(self._upload_tmp, ignore_errors=True)

    # ── Internal ───────────────────────────────────────────────────────────────

    def _close_log(self) -> None:
        if self._log_file:
            try:
                self._log_file.write(f"\n# ended {datetime.datetime.now().isoformat()}\n")
                self._log_file.close()
            except Exception:
                pass
            self._log_file = None

    def _rename_log(self, run_id: str) -> None:
        """Rename tmp_*.log → {run_id}.log once the run_id is known."""
        if not self._log_path or not self._log_path.exists():
            return
        new_path = LOGS_DIR / f"{run_id}.log"
        try:
            self._log_path.rename(new_path)
            self._log_path = new_path
        except Exception:
            pass  # non-fatal: keep old path

    def _read_output(self) -> None:
        try:
            assert self._proc and self._proc.stdout
            for raw_line in self._proc.stdout:
                line = raw_line.rstrip()
                if line:
                    # Write every line to the log file
                    if self._log_file:
                        self._log_file.write(line + "\n")
                    self.events.put({"type": "log", "text": line})
                    self._parse_line(line)
            self._proc.wait()
            self.return_code = self._proc.returncode
            self.is_running = False
            self._close_log()
            self.events.put({
                "type": "finished",
                "return_code": self._proc.returncode,
                "log_path": str(self._log_path) if self._log_path else None,
            })
        except Exception as exc:
            self.is_running = False
            self._close_log()
            self.events.put({"type": "error", "text": str(exc)})

    def _parse_line(self, line: str) -> None:
        # Extract message body from logging prefix if present
        m = _LOG_PREFIX_RE.match(line)
        msg = m.group(1) if m else line

        # ── Run ID ─────────────────────────────────────────────────────────────
        m = _RUN_ID_RE.match(msg)
        if m:
            self._run_id = m.group(1)
            self._rename_log(self._run_id)
            self.events.put({"type": "run_id", "run_id": m.group(1)})
            return

        # ── Project done ───────────────────────────────────────────────────────
        m = _DONE_RE.match(msg)
        if m:
            self.events.put({"type": "project_done", "project_root": m.group(1).strip()})
            return

        # ── Run summary ────────────────────────────────────────────────────────
        m = _SUMMARY_RE.search(msg)
        if m:
            self.events.put({
                "type": "run_summary",
                "duration_s":  float(m.group(1).replace(",", "")),
                "llm_calls":   int(m.group(2)),
                "total_input": _parse_int(m.group(3)),
                "total_output": _parse_int(m.group(4)),
            })
            return

        # ── LLM call completed ─────────────────────────────────────────────────
        m = _LLM_RE.search(msg)
        if m:
            self.events.put({
                "type":           "llm_call",
                "step":           m.group(1),
                "model":          m.group(2),
                "attempt":        int(m.group(3)),
                "total_attempts": int(m.group(4)),
                "page":           m.group(5),
                "input_tokens":   _parse_int(m.group(6)),
                "output_tokens":  _parse_int(m.group(7)),
                "total_tokens":   _parse_int(m.group(8)),
                "duration_ms":    float(m.group(9).replace(",", "")),
            })
            return

        # ── LLM call starting ──────────────────────────────────────────────────
        m = _LLM_CALLING_RE.match(msg)
        if m:
            self.events.put({
                "type":  "llm_calling",
                "model": m.group(1),
                "label": m.group(2).strip(),
            })
            return

        # ── Orchestrator step prints: [01] / [02] … ────────────────────────────
        m = _STEP_NUM_RE.match(msg)
        if m:
            self.events.put({
                "type":     "step_print",
                "step_num": m.group(1),
                "message":  m.group(2).strip(),
            })
            self._parse_detail(m.group(2).strip())
            return

        # ── Sub-step progress prints: "  ›  message" ──────────────────────────
        m = _SUBSTEP_RE.match(line)
        if m:
            self.events.put({"type": "substep", "message": m.group(1).strip()})
            return

        # ── management-tenant ─────────────────────────────────────────────────
        m = _TENANT_RE.match(msg)
        if m:
            text = m.group(1)
            result_m = _TENANT_RESULT_RE.search(text)
            self.events.put({
                "type":   "tenant_update",
                "status": "done" if result_m else "running",
                "text":   text,
                "result": result_m.group(1) if result_m else None,
            })
            return

        # ── Indented detail lines ──────────────────────────────────────────────
        m = _DETAIL_RE.match(msg)
        if m:
            self._parse_detail(m.group(1))

    def _parse_detail(self, detail: str) -> None:
        """Emit structured events from detail / progress lines."""
        # Backend system info
        m = _BACKEND_INFO_RE.search(detail)
        if m:
            self.events.put({
                "type":      "backend_info",
                "system":    m.group(1),
                "modules":   int(m.group(2)),
                "endpoints": int(m.group(3)),
            })
            return

        # Pages list (step 04)
        m = _PAGES_RE.search(detail)
        if m:
            try:
                pages = ast.literal_eval(m.group(1))
                self.events.put({"type": "ir_pages", "pages": pages})
            except Exception:
                pass
            return

        # DDL artifact
        m = _DDL_RE.search(detail)
        if m:
            self.events.put({"type": "artifact", "name": "ddl", "filename": m.group(1)})

        # PRD artifact
        m = _PRD_RE.search(detail)
        if m:
            self.events.put({"type": "artifact", "name": "prd"})

        # Module generation
        m = _MODULE_RE.search(detail)
        if m:
            self.events.put({"type": "api_module", "module": m.group(1)})

        # Step-02c: mantara + dalfin done
        if _MANTARA_DONE_RE.search(detail):
            self.events.put({"type": "mantara_done"})
            return

        # Step-07: DDL → database
        if _DDL_DB_OK_RE.search(detail):
            self.events.put({"type": "ddl_to_db", "success": True})
            return
        if _DDL_DB_FAIL_RE.search(detail):
            self.events.put({"type": "ddl_to_db", "success": False})