Spaces:

ianshank
/

langgraph-mcts-demo

Sleeping

File size: 7,360 Bytes

40ee6b4

"""
Mock and lightweight LLM clients for demo purposes.
"""

import asyncio
import random
from typing import Any


class MockLLMClient:
    """Mock LLM client that generates plausible demo responses."""

    def __init__(self):
        self.response_templates = {
            "architecture": [
                "Consider scalability requirements and team expertise",
                "Evaluate coupling, deployment complexity, and operational overhead",
                "Balance between development speed and long-term maintainability",
            ],
            "optimization": [
                "Profile first to identify actual bottlenecks",
                "Consider memory-mapped files and streaming processing",
                "Implement parallel processing with appropriate chunk sizes",
            ],
            "database": [
                "Consider data consistency requirements and query patterns",
                "Evaluate write-heavy vs read-heavy workload characteristics",
                "Plan for horizontal scaling and data distribution strategies",
            ],
            "distributed": [
                "Implement proper failure detection and recovery mechanisms",
                "Use circuit breakers and bulkhead patterns for resilience",
                "Consider eventual consistency vs strong consistency trade-offs",
            ],
            "default": [
                "Break down the problem into smaller components",
                "Consider trade-offs between different approaches",
                "Evaluate based on specific use case requirements",
            ],
        }

    async def generate(self, prompt: str, context: str = "") -> dict[str, Any]:
        """Generate a mock response based on the prompt and optional context."""
        # Simulate processing time
        await asyncio.sleep(random.uniform(0.1, 0.3))

        # Determine response category
        prompt_lower = prompt.lower()
        if "architecture" in prompt_lower or "microservice" in prompt_lower or "monolith" in prompt_lower:
            category = "architecture"
        elif "optim" in prompt_lower or "performance" in prompt_lower or "process" in prompt_lower:
            category = "optimization"
        elif "database" in prompt_lower or "sql" in prompt_lower or "nosql" in prompt_lower:
            category = "database"
        elif "distribut" in prompt_lower or "fault" in prompt_lower or "rate limit" in prompt_lower:
            category = "distributed"
        else:
            category = "default"

        templates = self.response_templates[category]

        # Generate response with some randomness
        response = random.choice(templates)
        confidence = random.uniform(0.6, 0.95)

        # Add more detail based on prompt length (simulating "understanding")
        if len(prompt) > 100:
            confidence = min(0.95, confidence + 0.1)
            response += f". Additionally, {random.choice(self.response_templates['default'])}"

        # Lightly incorporate context to simulate conditioning
        context_snippet = context.strip()
        if context_snippet:
            confidence = min(0.99, confidence + 0.05)
            response += f" (context signal: {context_snippet[:60]}{'...' if len(context_snippet) > 60 else ''})"

        return {
            "response": response,
            "confidence": round(confidence, 3),
            "tokens_used": len(prompt.split()) * 2 + len(response.split()),
        }

    async def generate_reasoning_steps(self, query: str, num_steps: int = 3) -> list[str]:
        """Generate mock reasoning steps."""
        await asyncio.sleep(random.uniform(0.05, 0.15))

        base_steps = [
            f"Analyzing query: '{query[:50]}...'",
            "Identifying key requirements and constraints",
            "Evaluating potential approaches",
            "Considering trade-offs and implications",
            "Synthesizing recommendations based on analysis",
            "Validating conclusions against requirements",
        ]

        return random.sample(base_steps, min(num_steps, len(base_steps)))


class HuggingFaceClient:
    """Lightweight Hugging Face Inference API client."""

    def __init__(self, model_id: str = "mistralai/Mistral-7B-Instruct-v0.2"):
        """Initialize with a Hugging Face model.

        Args:
            model_id: The model ID on Hugging Face Hub
        """
        self.model_id = model_id
        self._client = None

    def _get_client(self):
        """Lazy load the HF client."""
        if self._client is None:
            try:
                from huggingface_hub import InferenceClient

                self._client = InferenceClient(model=self.model_id)
            except ImportError:
                raise ImportError("huggingface_hub not installed. Install with: pip install huggingface_hub")
        return self._client

    async def generate(self, prompt: str, context: str = "") -> dict[str, Any]:
        """Generate response using Hugging Face Inference API."""
        try:
            client = self._get_client()

            # Format prompt
            if context:
                full_prompt = f"Context: {context}\n\nQuestion: {prompt}\n\nAnswer:"
            else:
                full_prompt = f"Question: {prompt}\n\nProvide a concise, technical answer:\n\nAnswer:"

            # Call HF Inference API (sync call wrapped in async)
            response_text = await asyncio.to_thread(
                client.text_generation, full_prompt, max_new_tokens=150, temperature=0.7, do_sample=True
            )

            # Estimate confidence based on response characteristics
            confidence = min(0.95, 0.6 + len(response_text) / 500)

            return {
                "response": response_text.strip(),
                "confidence": round(confidence, 3),
                "tokens_used": len(full_prompt.split()) + len(response_text.split()),
            }

        except Exception as e:
            # Fallback to mock on error
            print(f"HF Inference error: {e}. Falling back to mock.")
            mock = MockLLMClient()
            return await mock.generate(prompt, context)

    async def generate_reasoning_steps(self, query: str, num_steps: int = 3) -> list[str]:
        """Generate reasoning steps using HF model."""
        try:
            client = self._get_client()

            prompt = f"""Break down this question into {num_steps} reasoning steps:
Question: {query}

Reasoning steps (one per line):
1."""

            response = await asyncio.to_thread(client.text_generation, prompt, max_new_tokens=200, temperature=0.5)

            # Parse steps from response
            lines = response.strip().split("\n")
            steps = []
            for line in lines:
                line = line.strip()
                if line and not line.startswith("#"):
                    # Remove numbering
                    if line[0].isdigit() and "." in line[:3]:
                        line = line.split(".", 1)[1].strip()
                    steps.append(line)

            return steps[:num_steps] if steps else ["Analysis in progress"]

        except Exception as e:
            print(f"HF reasoning error: {e}. Falling back to mock.")
            mock = MockLLMClient()
            return await mock.generate_reasoning_steps(query, num_steps)