Spaces:
Sleeping
Sleeping
| """ | |
| Mock and lightweight LLM clients for demo purposes. | |
| """ | |
| import asyncio | |
| import random | |
| from typing import Any | |
| class MockLLMClient: | |
| """Mock LLM client that generates plausible demo responses.""" | |
| def __init__(self): | |
| self.response_templates = { | |
| "architecture": [ | |
| "Consider scalability requirements and team expertise", | |
| "Evaluate coupling, deployment complexity, and operational overhead", | |
| "Balance between development speed and long-term maintainability", | |
| ], | |
| "optimization": [ | |
| "Profile first to identify actual bottlenecks", | |
| "Consider memory-mapped files and streaming processing", | |
| "Implement parallel processing with appropriate chunk sizes", | |
| ], | |
| "database": [ | |
| "Consider data consistency requirements and query patterns", | |
| "Evaluate write-heavy vs read-heavy workload characteristics", | |
| "Plan for horizontal scaling and data distribution strategies", | |
| ], | |
| "distributed": [ | |
| "Implement proper failure detection and recovery mechanisms", | |
| "Use circuit breakers and bulkhead patterns for resilience", | |
| "Consider eventual consistency vs strong consistency trade-offs", | |
| ], | |
| "default": [ | |
| "Break down the problem into smaller components", | |
| "Consider trade-offs between different approaches", | |
| "Evaluate based on specific use case requirements", | |
| ], | |
| } | |
| async def generate(self, prompt: str, context: str = "") -> dict[str, Any]: | |
| """Generate a mock response based on the prompt and optional context.""" | |
| # Simulate processing time | |
| await asyncio.sleep(random.uniform(0.1, 0.3)) | |
| # Determine response category | |
| prompt_lower = prompt.lower() | |
| if "architecture" in prompt_lower or "microservice" in prompt_lower or "monolith" in prompt_lower: | |
| category = "architecture" | |
| elif "optim" in prompt_lower or "performance" in prompt_lower or "process" in prompt_lower: | |
| category = "optimization" | |
| elif "database" in prompt_lower or "sql" in prompt_lower or "nosql" in prompt_lower: | |
| category = "database" | |
| elif "distribut" in prompt_lower or "fault" in prompt_lower or "rate limit" in prompt_lower: | |
| category = "distributed" | |
| else: | |
| category = "default" | |
| templates = self.response_templates[category] | |
| # Generate response with some randomness | |
| response = random.choice(templates) | |
| confidence = random.uniform(0.6, 0.95) | |
| # Add more detail based on prompt length (simulating "understanding") | |
| if len(prompt) > 100: | |
| confidence = min(0.95, confidence + 0.1) | |
| response += f". Additionally, {random.choice(self.response_templates['default'])}" | |
| # Lightly incorporate context to simulate conditioning | |
| context_snippet = context.strip() | |
| if context_snippet: | |
| confidence = min(0.99, confidence + 0.05) | |
| response += f" (context signal: {context_snippet[:60]}{'...' if len(context_snippet) > 60 else ''})" | |
| return { | |
| "response": response, | |
| "confidence": round(confidence, 3), | |
| "tokens_used": len(prompt.split()) * 2 + len(response.split()), | |
| } | |
| async def generate_reasoning_steps(self, query: str, num_steps: int = 3) -> list[str]: | |
| """Generate mock reasoning steps.""" | |
| await asyncio.sleep(random.uniform(0.05, 0.15)) | |
| base_steps = [ | |
| f"Analyzing query: '{query[:50]}...'", | |
| "Identifying key requirements and constraints", | |
| "Evaluating potential approaches", | |
| "Considering trade-offs and implications", | |
| "Synthesizing recommendations based on analysis", | |
| "Validating conclusions against requirements", | |
| ] | |
| return random.sample(base_steps, min(num_steps, len(base_steps))) | |
| class HuggingFaceClient: | |
| """Lightweight Hugging Face Inference API client.""" | |
| def __init__(self, model_id: str = "mistralai/Mistral-7B-Instruct-v0.2"): | |
| """Initialize with a Hugging Face model. | |
| Args: | |
| model_id: The model ID on Hugging Face Hub | |
| """ | |
| self.model_id = model_id | |
| self._client = None | |
| def _get_client(self): | |
| """Lazy load the HF client.""" | |
| if self._client is None: | |
| try: | |
| from huggingface_hub import InferenceClient | |
| self._client = InferenceClient(model=self.model_id) | |
| except ImportError: | |
| raise ImportError("huggingface_hub not installed. Install with: pip install huggingface_hub") | |
| return self._client | |
| async def generate(self, prompt: str, context: str = "") -> dict[str, Any]: | |
| """Generate response using Hugging Face Inference API.""" | |
| try: | |
| client = self._get_client() | |
| # Format prompt | |
| if context: | |
| full_prompt = f"Context: {context}\n\nQuestion: {prompt}\n\nAnswer:" | |
| else: | |
| full_prompt = f"Question: {prompt}\n\nProvide a concise, technical answer:\n\nAnswer:" | |
| # Call HF Inference API (sync call wrapped in async) | |
| response_text = await asyncio.to_thread( | |
| client.text_generation, full_prompt, max_new_tokens=150, temperature=0.7, do_sample=True | |
| ) | |
| # Estimate confidence based on response characteristics | |
| confidence = min(0.95, 0.6 + len(response_text) / 500) | |
| return { | |
| "response": response_text.strip(), | |
| "confidence": round(confidence, 3), | |
| "tokens_used": len(full_prompt.split()) + len(response_text.split()), | |
| } | |
| except Exception as e: | |
| # Fallback to mock on error | |
| print(f"HF Inference error: {e}. Falling back to mock.") | |
| mock = MockLLMClient() | |
| return await mock.generate(prompt, context) | |
| async def generate_reasoning_steps(self, query: str, num_steps: int = 3) -> list[str]: | |
| """Generate reasoning steps using HF model.""" | |
| try: | |
| client = self._get_client() | |
| prompt = f"""Break down this question into {num_steps} reasoning steps: | |
| Question: {query} | |
| Reasoning steps (one per line): | |
| 1.""" | |
| response = await asyncio.to_thread(client.text_generation, prompt, max_new_tokens=200, temperature=0.5) | |
| # Parse steps from response | |
| lines = response.strip().split("\n") | |
| steps = [] | |
| for line in lines: | |
| line = line.strip() | |
| if line and not line.startswith("#"): | |
| # Remove numbering | |
| if line[0].isdigit() and "." in line[:3]: | |
| line = line.split(".", 1)[1].strip() | |
| steps.append(line) | |
| return steps[:num_steps] if steps else ["Analysis in progress"] | |
| except Exception as e: | |
| print(f"HF reasoning error: {e}. Falling back to mock.") | |
| mock = MockLLMClient() | |
| return await mock.generate_reasoning_steps(query, num_steps) | |