#!/usr/bin/env python3
"""
main.py — PDF / PPTX / Image → PRD + schema.json + schema.sql

Supported inputs:
    .pdf          → pages rendered as PNGs
    .pptx         → embedded images extracted per slide
    .png/.jpg/.jpeg/.webp  → used directly
    folder        → all image files inside it

Keys are loaded from .env automatically — no export commands needed.
Edit .env to set OPENAI_API_KEY and AWS Bedrock credentials.
"""

import base64
import json
import os
import subprocess
import sys
import time
from pathlib import Path

# ── Load .env before anything else ────────────────────────────────────────────
ROOT = Path(__file__).parent.resolve()
_env_path = ROOT / ".env"
if _env_path.exists():
    for _line in _env_path.read_text().splitlines():
        _line = _line.strip()
        if _line and not _line.startswith("#") and "=" in _line:
            _k, _v = _line.split("=", 1)
            os.environ.setdefault(_k.strip(), _v.strip().strip('"').strip("'"))

# ── Paths ──────────────────────────────────────────────────────────────────────
STEP02_DIR  = ROOT / "step-02-vision-extraction"
STEP03_DIR  = ROOT / "step-03-cir-consolidation"
STEP04_DIR  = ROOT / "step-04-prd"
STEP05_DIR  = ROOT / "step-05-schema"
STEP07_DIR  = ROOT / "step-07-save-dll_to-db"
MANTARA_DIR = ROOT / "mantara_v8"
RUNS_ROOT   = ROOT / "runs"

IMAGE_EXTS = {".png", ".jpg", ".jpeg", ".webp", ".gif", ".bmp"}


# ── Key validation ─────────────────────────────────────────────────────────────

def check_keys():
    openai_key = os.environ.get("OPENAI_API_KEY", "")
    if not openai_key or openai_key in ("sk-...", "") or "REPLACE" in openai_key:
        print("ERROR: OPENAI_API_KEY is not set in .env")
        print(f"  Edit {_env_path} and set OPENAI_API_KEY=sk-...")
        sys.exit(1)

    has_bedrock = bool(
        os.environ.get("AWS_BEARER_TOKEN_BEDROCK") or
        (os.environ.get("AWS_ACCESS_KEY_ID") and os.environ.get("AWS_SECRET_ACCESS_KEY"))
    )
    if not has_bedrock:
        print("WARNING: AWS Bedrock credentials not set — Step 05 Mantara will use OpenAI instead.")
        print(f"  Edit {_env_path} and set AWS_ACCESS_KEY_ID + AWS_SECRET_ACCESS_KEY")

    return has_bedrock


# ── Input handlers ─────────────────────────────────────────────────────────────

def _to_data_uri(path: Path) -> str:
    mime_map = {".png": "image/png", ".jpg": "image/jpeg", ".jpeg": "image/jpeg",
                ".webp": "image/webp", ".gif": "image/gif", ".bmp": "image/bmp"}
    mime = mime_map.get(path.suffix.lower(), "image/png")
    return f"data:{mime};base64,{base64.b64encode(path.read_bytes()).decode()}"


def pdf_to_images(pdf_path: Path, dpi: int = 144) -> list:
    try:
        import fitz
    except ImportError:
        print("ERROR: pymupdf not installed.  pip install pymupdf"); sys.exit(1)
    doc = fitz.Document(str(pdf_path))
    mat = fitz.Matrix(dpi / 72.0, dpi / 72.0)
    uris = []
    for i in range(len(doc)):
        page = doc[i]
        pix = page.get_pixmap(matrix=mat, colorspace=fitz.csRGB)
        uris.append(f"data:image/png;base64,{base64.b64encode(pix.tobytes('png')).decode()}")
        print(f"  page {i+1}/{len(doc)} rendered", end="\r")
    print()
    return uris


def pptx_to_images(pptx_path: Path) -> list:
    try:
        from pptx import Presentation
        from pptx.shapes.picture import Picture
    except ImportError:
        print("ERROR: python-pptx not installed.  pip install python-pptx"); sys.exit(1)
    prs = Presentation(str(pptx_path))
    uris = []
    for i, slide in enumerate(prs.slides):
        for shape in slide.shapes:
            if isinstance(shape, Picture):
                img = shape.image
                mime = img.content_type or "image/png"
                uris.append(f"data:{mime};base64,{base64.b64encode(img.blob).decode()}")
        print(f"  slide {i+1}/{len(prs.slides)} processed", end="\r")
    print()
    if not uris:
        print("WARNING: no embedded images in PPTX. Try exporting slides as PNGs first.")
    return uris


def load_input(input_path: str) -> tuple:
    """Returns (name, list_of_data_uris)."""
    p = Path(input_path).resolve()
    if not p.exists():
        print(f"ERROR: not found: {p}"); sys.exit(1)

    ext = p.suffix.lower()
    if ext == ".pdf":
        print("[0/4] Rendering PDF pages…")
        return p.name, pdf_to_images(p)
    elif ext == ".pptx":
        print("[0/4] Extracting PPTX images…")
        return p.name, pptx_to_images(p)
    elif ext in IMAGE_EXTS:
        print("[0/4] Loading image…")
        return p.name, [_to_data_uri(p)]
    elif p.is_dir():
        print("[0/4] Loading images from folder…")
        files = sorted(f for f in p.iterdir() if f.suffix.lower() in IMAGE_EXTS)
        if not files:
            print(f"ERROR: no images in {p}"); sys.exit(1)
        return p.name, [_to_data_uri(f) for f in files]
    else:
        print(f"ERROR: unsupported type '{ext}'. Use .pdf, .pptx, .png, .jpg or a folder.")
        sys.exit(1)


# ── Pipeline ───────────────────────────────────────────────────────────────────

def create_run_dir(name: str, images: list, domain: str) -> Path:
    run_id  = f"{int(time.time()*1000)}-pdf"
    run_dir = RUNS_ROOT / run_id
    (run_dir / "cir").mkdir(parents=True, exist_ok=True)
    (run_dir / "input.json").write_text(json.dumps({
        "run_id": run_id, "prompt": f"UI flows from {name} ({len(images)} pages)",
        "images": images, "figma_url": None, "domain": domain or None,
    }, indent=2))
    return run_dir


def run_step(label: str, cwd: Path, module_args: list, extra_env: dict | None = None) -> bool:
    print(f"\n{'─'*56}\n  {label}\n{'─'*56}")
    env = os.environ.copy()
    env["MANTARA_ENGINE_PATH"] = str(MANTARA_DIR)
    if extra_env:
        env.update(extra_env)
    return subprocess.run([sys.executable, "-m"] + module_args, cwd=str(cwd), env=env).returncode == 0


def run_pipeline(input_path: str, domain: str = "", data_uris: list | None = None):
    has_bedrock = check_keys()

    if data_uris is not None:
        name = Path(input_path).name if input_path else "images"
        images = data_uris
    else:
        name, images = load_input(input_path)
    print(f"      {len(images)} pages/images loaded")

    run_dir = create_run_dir(name, images, domain)
    print(f"\n  Input:  {name}")
    print(f"  Domain: {domain or '(auto-detect)'}")
    print(f"  Run:    {run_dir.name}")

    # Step 02 — Vision extraction (OpenAI gpt-4o)
    print("\n[1/4] Step 02 — Vision extraction (gpt-4o)…")
    if not run_step("Step 02 — Vision Extraction", STEP02_DIR,
                    ["pipeline.runner", "--run-dir", str(run_dir), "--real"]):
        print("\nERROR: Step 02 failed."); return None

    # Step 03 — CIR consolidation
    print("\n[2/4] Step 03 — CIR consolidation…")
    if not run_step("Step 03 — CIR Consolidation", STEP03_DIR,
                    ["pipeline.runner", "--run-dir", str(run_dir)]):
        print("\nERROR: Step 03 failed."); return None

    # Step 04 — PRD generation
    print("\n[3/4] Step 04 — PRD generation…")
    if not run_step("Step 04 — PRD Generation", STEP04_DIR,
                    ["pipeline.runner", "--run-dir", str(run_dir)]):
        print("\nERROR: Step 04 failed."); return None

    # Step 05 — Mantara schema (Bedrock → Claude, else OpenAI fallback)
    print("\n[4/5] Step 05 — Mantara schema (Claude via Bedrock)…" if has_bedrock
          else "\n[4/5] Step 05 — Mantara schema (OpenAI fallback)…")
    step05_env = {"MANTARA_BACKEND": "bedrock"} if has_bedrock else {"MANTARA_BACKEND": "openai"}
    run_step("Step 05 — Mantara v8 Schema", STEP05_DIR,
             ["pipeline.runner", "--run-dir", str(run_dir)],
             extra_env=step05_env)
    schema_json = run_dir / "schema" / "schema.json"
    schema_sql  = run_dir / "schema" / "schema.sql"
    if not schema_json.exists():
        print("\nERROR: Step 05 failed — schema.json not produced."); return None
    print("\nWARNING: Step 05 completed with validation warnings — schema was generated.")

    # Step 07 — Save DDL to database
    print("\n[5/5] Step 07 — Save DDL to database…")
    env = os.environ.copy()
    env["MANTARA_ENGINE_PATH"] = str(MANTARA_DIR)
    step07_ok = subprocess.run(
        [sys.executable, str(STEP07_DIR / "main.py"),
         "--schema-path", str(schema_sql)],
        cwd=str(STEP07_DIR),
        env=env,
    ).returncode == 0
    if not step07_ok:
        print("\nWARNING: Step 07 failed — DDL was not deployed to the database.")

    print(f"\n{'═'*56}")
    print(f"  ✅  Done!")
    print(f"  full_prd.md: {run_dir / 'prd' / 'full_prd.md'}  ← single combined file")
    print(f"  schema.json: {schema_json}")
    print(f"  schema.sql:  {schema_sql}")
    if step07_ok:
        print(f"  db:          schema deployed to database ✅")
    print(f"{'═'*56}\n")
    return run_dir


# ── Entry point — edit INPUT_FILE and DOMAIN here ─────────────────────────────

# if __name__ == "__main__":

#     INPUT_FILE = "/Users/rijan/Downloads/LLMatica-Forge/ASN_Flow_Final_Combined.pdf"
#     DOMAIN     = "warehouse"

#     sys.exit(0 if run_pipeline(INPUT_FILE, domain=DOMAIN) is not None else 1)
