#!/usr/bin/env python3
"""
uncloseai. - Python Client using OpenAI SDK
A Python client library for OpenAI-compatible APIs with streaming support
Compatible with vLLM, Ollama, and OpenAI-compatible endpoints
"""

from openai import OpenAI
import os
import requests
from typing import List, Dict, Optional, Iterator


class uncloseai:
    """Client for OpenAI-compatible API endpoints using OpenAI SDK"""

    def __init__(
        self,
        model_endpoints: Optional[List[str]] = None,
        tts_endpoints: Optional[List[str]] = None,
        api_key: str = "dummy-key",
        timeout: int = 30
    ):
        """
        Initialize uncloseai. client with automatic model discovery

        Args:
            model_endpoints: List of model endpoint URLs (defaults to MODEL_ENDPOINT_* env vars)
            tts_endpoints: List of TTS endpoint URLs (defaults to TTS_ENDPOINT_* env vars)
            api_key: API key for authentication (default: "dummy-key")
            timeout: Request timeout in seconds
        """
        self.timeout = timeout
        self.api_key = api_key
        self.models: List[Dict] = []
        self.tts_endpoints: List[str] = []

        # Discover endpoints from environment or use provided
        if model_endpoints is None:
            model_endpoints = self._discover_env_endpoints("MODEL_ENDPOINT")
        if tts_endpoints is None:
            tts_endpoints = self._discover_env_endpoints("TTS_ENDPOINT")

        # Discover models from each endpoint
        for endpoint in model_endpoints:
            self._discover_models_from_endpoint(endpoint)

        self.tts_endpoints = tts_endpoints

    def _discover_env_endpoints(self, prefix: str) -> List[str]:
        """Discover endpoints from environment variables like PREFIX_1, PREFIX_2, ..."""
        endpoints = []
        for i in range(1, 10000):
            endpoint = os.getenv(f"{prefix}_{i}")
            if not endpoint:
                break
            endpoints.append(endpoint)
        return endpoints

    def _discover_models_from_endpoint(self, endpoint: str) -> None:
        """Discover available models from an endpoint"""
        try:
            response = requests.get(f"{endpoint}/models", timeout=10)
            if response.status_code == 200:
                data = response.json()
                for model in data.get("data", []):
                    model_id = model["id"]

                    # Filter out modelperm-* and chatcmpl-* entries
                    if model_id.startswith("modelperm-") or model_id.startswith("chatcmpl-"):
                        continue

                    self.models.append({
                        "id": model_id,
                        "endpoint": endpoint,
                        "max_tokens": model.get("max_model_len", 8192)
                    })
        except Exception:
            # Silently skip failed endpoints
            pass

    def list_models(self) -> List[Dict]:
        """Return list of discovered models with their metadata"""
        return self.models.copy()

    def chat(
        self,
        messages: List[Dict[str, str]],
        model: Optional[str] = None,
        max_tokens: int = 100,
        temperature: float = 0.7,
        **kwargs
    ) -> Dict:
        """
        Non-streaming chat completion

        Args:
            messages: List of message dicts with 'role' and 'content'
            model: Model ID (defaults to first available model)
            max_tokens: Maximum tokens in response
            temperature: Sampling temperature
            **kwargs: Additional parameters to pass to the API

        Returns:
            Response dict with 'choices' containing the completion
        """
        model_info = self._get_model_info(model)

        client = OpenAI(
            base_url=model_info['endpoint'],
            api_key=self.api_key,
            timeout=self.timeout
        )

        response = client.chat.completions.create(
            model=model_info["id"],
            messages=messages,
            max_tokens=max_tokens,
            temperature=temperature,
            **kwargs
        )

        # Convert OpenAI response to dict format
        return {
            "id": response.id,
            "model": response.model,
            "choices": [
                {
                    "index": choice.index,
                    "message": {
                        "role": choice.message.role,
                        "content": choice.message.content
                    },
                    "finish_reason": choice.finish_reason
                }
                for choice in response.choices
            ],
            "usage": {
                "prompt_tokens": response.usage.prompt_tokens,
                "completion_tokens": response.usage.completion_tokens,
                "total_tokens": response.usage.total_tokens
            }
        }

    def chat_stream(
        self,
        messages: List[Dict[str, str]],
        model: Optional[str] = None,
        max_tokens: int = 500,
        temperature: float = 0.7,
        **kwargs
    ) -> Iterator[str]:
        """
        Streaming chat completion using OpenAI SDK

        Args:
            messages: List of message dicts with 'role' and 'content'
            model: Model ID (defaults to first available model)
            max_tokens: Maximum tokens in response
            temperature: Sampling temperature
            **kwargs: Additional parameters to pass to the API

        Yields:
            Content strings as they arrive
        """
        model_info = self._get_model_info(model)

        client = OpenAI(
            base_url=model_info['endpoint'],
            api_key=self.api_key,
            timeout=self.timeout
        )

        stream = client.chat.completions.create(
            model=model_info["id"],
            messages=messages,
            max_tokens=max_tokens,
            temperature=temperature,
            stream=True,
            **kwargs
        )

        for chunk in stream:
            if chunk.choices[0].delta.content:
                yield chunk.choices[0].delta.content

    def tts(
        self,
        text: str,
        voice: str = "alloy",
        model: str = "tts-1",
        output_file: str = "speech.mp3"
    ) -> str:
        """
        Generate speech from text

        Args:
            text: Input text to convert to speech
            voice: Voice name (alloy, echo, fable, onyx, nova, shimmer)
            model: TTS model (tts-1 or tts-1-hd)
            output_file: Path to save the audio file

        Returns:
            Path to the saved audio file
        """
        if not self.tts_endpoints:
            raise ValueError("No TTS endpoints available")

        endpoint = self.tts_endpoints[0]

        client = OpenAI(
            base_url=endpoint,
            api_key=self.api_key,
            timeout=self.timeout
        )

        with client.audio.speech.with_streaming_response.create(
            model=model,
            voice=voice,
            input=text
        ) as response:
            response.stream_to_file(output_file)

        return output_file

    def _get_model_info(self, model: Optional[str] = None) -> Dict:
        """Get model info by ID or return first available model"""
        if not self.models:
            raise ValueError("No models available. Check endpoint configuration.")

        if model is None:
            return self.models[0]

        for m in self.models:
            if m["id"] == model:
                return m

        raise ValueError(f"Model '{model}' not found in discovered models")


# Demo usage when run as script
if __name__ == "__main__":
    print("=== uncloseai. Python Client (OpenAI SDK) ===\n")

    # Initialize client (auto-discovers from environment)
    client = uncloseai()

    if not client.models:
        print("ERROR: No models discovered. Set environment variables:")
        print("  MODEL_ENDPOINT_1, MODEL_ENDPOINT_2, etc.")
        exit(1)

    print(f"Discovered {len(client.models)} model(s)")
    for model in client.models:
        print(f"  - {model['id']} (max_tokens: {model['max_tokens']})")
    print()

    # Non-streaming chat example
    print("=== Non-Streaming Chat ===")
    response = client.chat(
        messages=[
            {"role": "system", "content": "You are a helpful AI assistant."},
            {"role": "user", "content": "Explain quantum computing in one sentence."}
        ],
        max_tokens=100
    )
    print(f"Model: {response['model']}")
    print(f"Response: {response['choices'][0]['message']['content']}\n")

    # Streaming chat example
    print("=== Streaming Chat ===")
    if len(client.models) > 1:
        model_id = client.models[1]["id"]
    else:
        model_id = None

    print(f"Model: {model_id or client.models[0]['id']}")
    print("Response: ", end="", flush=True)

    for content in client.chat_stream(
        messages=[
            {"role": "system", "content": "You are a coding assistant."},
            {"role": "user", "content": "Write a Python function to check if a number is prime"}
        ],
        model=model_id,
        max_tokens=200
    ):
        print(content, end="", flush=True)

    print("\n")

    # TTS example
    if client.tts_endpoints:
        print("=== TTS Speech Generation ===")
        output_path = client.tts(
            text="Hello from uncloseai. Python client with OpenAI SDK! This demonstrates text to speech with streaming support.",
            voice="alloy",
            output_file="speech.mp3"
        )

        if os.path.exists(output_path):
            file_size = os.path.getsize(output_path)
            print(f"[OK] Speech file created: {output_path} ({file_size} bytes)\n")

    print("=== Examples Complete ===")