ianshank
feat: add personality output and bug fixes
40ee6b4
raw
history blame
7.36 kB
"""
Mock and lightweight LLM clients for demo purposes.
"""
import asyncio
import random
from typing import Any
class MockLLMClient:
"""Mock LLM client that generates plausible demo responses."""
def __init__(self):
self.response_templates = {
"architecture": [
"Consider scalability requirements and team expertise",
"Evaluate coupling, deployment complexity, and operational overhead",
"Balance between development speed and long-term maintainability",
],
"optimization": [
"Profile first to identify actual bottlenecks",
"Consider memory-mapped files and streaming processing",
"Implement parallel processing with appropriate chunk sizes",
],
"database": [
"Consider data consistency requirements and query patterns",
"Evaluate write-heavy vs read-heavy workload characteristics",
"Plan for horizontal scaling and data distribution strategies",
],
"distributed": [
"Implement proper failure detection and recovery mechanisms",
"Use circuit breakers and bulkhead patterns for resilience",
"Consider eventual consistency vs strong consistency trade-offs",
],
"default": [
"Break down the problem into smaller components",
"Consider trade-offs between different approaches",
"Evaluate based on specific use case requirements",
],
}
async def generate(self, prompt: str, context: str = "") -> dict[str, Any]:
"""Generate a mock response based on the prompt and optional context."""
# Simulate processing time
await asyncio.sleep(random.uniform(0.1, 0.3))
# Determine response category
prompt_lower = prompt.lower()
if "architecture" in prompt_lower or "microservice" in prompt_lower or "monolith" in prompt_lower:
category = "architecture"
elif "optim" in prompt_lower or "performance" in prompt_lower or "process" in prompt_lower:
category = "optimization"
elif "database" in prompt_lower or "sql" in prompt_lower or "nosql" in prompt_lower:
category = "database"
elif "distribut" in prompt_lower or "fault" in prompt_lower or "rate limit" in prompt_lower:
category = "distributed"
else:
category = "default"
templates = self.response_templates[category]
# Generate response with some randomness
response = random.choice(templates)
confidence = random.uniform(0.6, 0.95)
# Add more detail based on prompt length (simulating "understanding")
if len(prompt) > 100:
confidence = min(0.95, confidence + 0.1)
response += f". Additionally, {random.choice(self.response_templates['default'])}"
# Lightly incorporate context to simulate conditioning
context_snippet = context.strip()
if context_snippet:
confidence = min(0.99, confidence + 0.05)
response += f" (context signal: {context_snippet[:60]}{'...' if len(context_snippet) > 60 else ''})"
return {
"response": response,
"confidence": round(confidence, 3),
"tokens_used": len(prompt.split()) * 2 + len(response.split()),
}
async def generate_reasoning_steps(self, query: str, num_steps: int = 3) -> list[str]:
"""Generate mock reasoning steps."""
await asyncio.sleep(random.uniform(0.05, 0.15))
base_steps = [
f"Analyzing query: '{query[:50]}...'",
"Identifying key requirements and constraints",
"Evaluating potential approaches",
"Considering trade-offs and implications",
"Synthesizing recommendations based on analysis",
"Validating conclusions against requirements",
]
return random.sample(base_steps, min(num_steps, len(base_steps)))
class HuggingFaceClient:
"""Lightweight Hugging Face Inference API client."""
def __init__(self, model_id: str = "mistralai/Mistral-7B-Instruct-v0.2"):
"""Initialize with a Hugging Face model.
Args:
model_id: The model ID on Hugging Face Hub
"""
self.model_id = model_id
self._client = None
def _get_client(self):
"""Lazy load the HF client."""
if self._client is None:
try:
from huggingface_hub import InferenceClient
self._client = InferenceClient(model=self.model_id)
except ImportError:
raise ImportError("huggingface_hub not installed. Install with: pip install huggingface_hub")
return self._client
async def generate(self, prompt: str, context: str = "") -> dict[str, Any]:
"""Generate response using Hugging Face Inference API."""
try:
client = self._get_client()
# Format prompt
if context:
full_prompt = f"Context: {context}\n\nQuestion: {prompt}\n\nAnswer:"
else:
full_prompt = f"Question: {prompt}\n\nProvide a concise, technical answer:\n\nAnswer:"
# Call HF Inference API (sync call wrapped in async)
response_text = await asyncio.to_thread(
client.text_generation, full_prompt, max_new_tokens=150, temperature=0.7, do_sample=True
)
# Estimate confidence based on response characteristics
confidence = min(0.95, 0.6 + len(response_text) / 500)
return {
"response": response_text.strip(),
"confidence": round(confidence, 3),
"tokens_used": len(full_prompt.split()) + len(response_text.split()),
}
except Exception as e:
# Fallback to mock on error
print(f"HF Inference error: {e}. Falling back to mock.")
mock = MockLLMClient()
return await mock.generate(prompt, context)
async def generate_reasoning_steps(self, query: str, num_steps: int = 3) -> list[str]:
"""Generate reasoning steps using HF model."""
try:
client = self._get_client()
prompt = f"""Break down this question into {num_steps} reasoning steps:
Question: {query}
Reasoning steps (one per line):
1."""
response = await asyncio.to_thread(client.text_generation, prompt, max_new_tokens=200, temperature=0.5)
# Parse steps from response
lines = response.strip().split("\n")
steps = []
for line in lines:
line = line.strip()
if line and not line.startswith("#"):
# Remove numbering
if line[0].isdigit() and "." in line[:3]:
line = line.split(".", 1)[1].strip()
steps.append(line)
return steps[:num_steps] if steps else ["Analysis in progress"]
except Exception as e:
print(f"HF reasoning error: {e}. Falling back to mock.")
mock = MockLLMClient()
return await mock.generate_reasoning_steps(query, num_steps)