Spaces:
Sleeping
Sleeping
File size: 7,360 Bytes
40ee6b4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 |
"""
Mock and lightweight LLM clients for demo purposes.
"""
import asyncio
import random
from typing import Any
class MockLLMClient:
"""Mock LLM client that generates plausible demo responses."""
def __init__(self):
self.response_templates = {
"architecture": [
"Consider scalability requirements and team expertise",
"Evaluate coupling, deployment complexity, and operational overhead",
"Balance between development speed and long-term maintainability",
],
"optimization": [
"Profile first to identify actual bottlenecks",
"Consider memory-mapped files and streaming processing",
"Implement parallel processing with appropriate chunk sizes",
],
"database": [
"Consider data consistency requirements and query patterns",
"Evaluate write-heavy vs read-heavy workload characteristics",
"Plan for horizontal scaling and data distribution strategies",
],
"distributed": [
"Implement proper failure detection and recovery mechanisms",
"Use circuit breakers and bulkhead patterns for resilience",
"Consider eventual consistency vs strong consistency trade-offs",
],
"default": [
"Break down the problem into smaller components",
"Consider trade-offs between different approaches",
"Evaluate based on specific use case requirements",
],
}
async def generate(self, prompt: str, context: str = "") -> dict[str, Any]:
"""Generate a mock response based on the prompt and optional context."""
# Simulate processing time
await asyncio.sleep(random.uniform(0.1, 0.3))
# Determine response category
prompt_lower = prompt.lower()
if "architecture" in prompt_lower or "microservice" in prompt_lower or "monolith" in prompt_lower:
category = "architecture"
elif "optim" in prompt_lower or "performance" in prompt_lower or "process" in prompt_lower:
category = "optimization"
elif "database" in prompt_lower or "sql" in prompt_lower or "nosql" in prompt_lower:
category = "database"
elif "distribut" in prompt_lower or "fault" in prompt_lower or "rate limit" in prompt_lower:
category = "distributed"
else:
category = "default"
templates = self.response_templates[category]
# Generate response with some randomness
response = random.choice(templates)
confidence = random.uniform(0.6, 0.95)
# Add more detail based on prompt length (simulating "understanding")
if len(prompt) > 100:
confidence = min(0.95, confidence + 0.1)
response += f". Additionally, {random.choice(self.response_templates['default'])}"
# Lightly incorporate context to simulate conditioning
context_snippet = context.strip()
if context_snippet:
confidence = min(0.99, confidence + 0.05)
response += f" (context signal: {context_snippet[:60]}{'...' if len(context_snippet) > 60 else ''})"
return {
"response": response,
"confidence": round(confidence, 3),
"tokens_used": len(prompt.split()) * 2 + len(response.split()),
}
async def generate_reasoning_steps(self, query: str, num_steps: int = 3) -> list[str]:
"""Generate mock reasoning steps."""
await asyncio.sleep(random.uniform(0.05, 0.15))
base_steps = [
f"Analyzing query: '{query[:50]}...'",
"Identifying key requirements and constraints",
"Evaluating potential approaches",
"Considering trade-offs and implications",
"Synthesizing recommendations based on analysis",
"Validating conclusions against requirements",
]
return random.sample(base_steps, min(num_steps, len(base_steps)))
class HuggingFaceClient:
"""Lightweight Hugging Face Inference API client."""
def __init__(self, model_id: str = "mistralai/Mistral-7B-Instruct-v0.2"):
"""Initialize with a Hugging Face model.
Args:
model_id: The model ID on Hugging Face Hub
"""
self.model_id = model_id
self._client = None
def _get_client(self):
"""Lazy load the HF client."""
if self._client is None:
try:
from huggingface_hub import InferenceClient
self._client = InferenceClient(model=self.model_id)
except ImportError:
raise ImportError("huggingface_hub not installed. Install with: pip install huggingface_hub")
return self._client
async def generate(self, prompt: str, context: str = "") -> dict[str, Any]:
"""Generate response using Hugging Face Inference API."""
try:
client = self._get_client()
# Format prompt
if context:
full_prompt = f"Context: {context}\n\nQuestion: {prompt}\n\nAnswer:"
else:
full_prompt = f"Question: {prompt}\n\nProvide a concise, technical answer:\n\nAnswer:"
# Call HF Inference API (sync call wrapped in async)
response_text = await asyncio.to_thread(
client.text_generation, full_prompt, max_new_tokens=150, temperature=0.7, do_sample=True
)
# Estimate confidence based on response characteristics
confidence = min(0.95, 0.6 + len(response_text) / 500)
return {
"response": response_text.strip(),
"confidence": round(confidence, 3),
"tokens_used": len(full_prompt.split()) + len(response_text.split()),
}
except Exception as e:
# Fallback to mock on error
print(f"HF Inference error: {e}. Falling back to mock.")
mock = MockLLMClient()
return await mock.generate(prompt, context)
async def generate_reasoning_steps(self, query: str, num_steps: int = 3) -> list[str]:
"""Generate reasoning steps using HF model."""
try:
client = self._get_client()
prompt = f"""Break down this question into {num_steps} reasoning steps:
Question: {query}
Reasoning steps (one per line):
1."""
response = await asyncio.to_thread(client.text_generation, prompt, max_new_tokens=200, temperature=0.5)
# Parse steps from response
lines = response.strip().split("\n")
steps = []
for line in lines:
line = line.strip()
if line and not line.startswith("#"):
# Remove numbering
if line[0].isdigit() and "." in line[:3]:
line = line.split(".", 1)[1].strip()
steps.append(line)
return steps[:num_steps] if steps else ["Analysis in progress"]
except Exception as e:
print(f"HF reasoning error: {e}. Falling back to mock.")
mock = MockLLMClient()
return await mock.generate_reasoning_steps(query, num_steps)
|