"""OpenAI backend — Structured Outputs for schema generation.

This is the default backend for Mantara v1. It uses the OpenAI API with
client.beta.chat.completions.parse() to guarantee schema-compliant JSON.

For v2 (local models), create a new backend implementing SchemaGenerator.
"""

import sys
import time

from openai import OpenAI, APITimeoutError, RateLimitError, APIConnectionError, APIStatusError

from config import (
    OPENAI_API_KEY, MODEL, MAX_TOKENS, TEMPERATURE,
    TIMEOUT_SECONDS, MAX_RETRIES,
)
from models import MantaraSchema


_RETRYABLE = (APITimeoutError, RateLimitError, APIConnectionError)


def _log(msg: str):
    print(f"  [openai] {msg}", file=sys.stderr)


class OpenAIBackend:
    """OpenAI Structured Outputs backend."""

    def generate(self, system_prompt: str, user_input: str, model: str | None = None) -> MantaraSchema:
        """Call OpenAI with Structured Outputs and return a validated MantaraSchema.

        Retries up to MAX_RETRIES times on transient errors with exponential backoff.
        """
        client = OpenAI(api_key=OPENAI_API_KEY, timeout=TIMEOUT_SECONDS)
        use_model = model or MODEL
        last_error = None

        for attempt in range(1 + MAX_RETRIES):
            try:
                start = time.time()

                completion = client.beta.chat.completions.parse(
                    model=use_model,
                    messages=[
                        {"role": "system", "content": system_prompt},
                        {"role": "user", "content": user_input},
                    ],
                    response_format=MantaraSchema,
                    max_tokens=MAX_TOKENS,
                    temperature=TEMPERATURE,
                )

                elapsed = round(time.time() - start, 1)

                usage = completion.usage
                if usage:
                    _log(
                        f"model={use_model}  "
                        f"prompt_tokens={usage.prompt_tokens}  "
                        f"completion_tokens={usage.completion_tokens}  "
                        f"total_tokens={usage.total_tokens}  "
                        f"latency={elapsed}s"
                    )

                message = completion.choices[0].message

                if message.refusal:
                    raise ValueError(f"Model refused the request: {message.refusal}")

                if message.parsed is None:
                    raise ValueError("Model returned no structured output")

                return message.parsed

            except _RETRYABLE as e:
                last_error = e
                if attempt < MAX_RETRIES:
                    wait = 2 ** (attempt + 1)
                    _log(
                        f"Transient error (attempt {attempt + 1}/{1 + MAX_RETRIES}): "
                        f"{type(e).__name__}: {e}  — retrying in {wait}s"
                    )
                    time.sleep(wait)
                else:
                    _log(f"All {1 + MAX_RETRIES} attempts failed.")

            except APIStatusError as e:
                raise RuntimeError(
                    f"OpenAI API error ({e.status_code}): {e.message}"
                ) from e

        raise RuntimeError(
            f"Failed after {1 + MAX_RETRIES} attempts. "
            f"Last error: {type(last_error).__name__}: {last_error}"
        )