ianshank
feat: add personality output and bug fixes
40ee6b4
"""
Mock and lightweight LLM clients for demo purposes.
"""
import asyncio
import random
from typing import Any
class MockLLMClient:
"""Mock LLM client that generates plausible demo responses."""
def __init__(self):
self.response_templates = {
"architecture": [
"Consider scalability requirements and team expertise",
"Evaluate coupling, deployment complexity, and operational overhead",
"Balance between development speed and long-term maintainability",
],
"optimization": [
"Profile first to identify actual bottlenecks",
"Consider memory-mapped files and streaming processing",
"Implement parallel processing with appropriate chunk sizes",
],
"database": [
"Consider data consistency requirements and query patterns",
"Evaluate write-heavy vs read-heavy workload characteristics",
"Plan for horizontal scaling and data distribution strategies",
],
"distributed": [
"Implement proper failure detection and recovery mechanisms",
"Use circuit breakers and bulkhead patterns for resilience",
"Consider eventual consistency vs strong consistency trade-offs",
],
"default": [
"Break down the problem into smaller components",
"Consider trade-offs between different approaches",
"Evaluate based on specific use case requirements",
],
}
async def generate(self, prompt: str, context: str = "") -> dict[str, Any]:
"""Generate a mock response based on the prompt and optional context."""
# Simulate processing time
await asyncio.sleep(random.uniform(0.1, 0.3))
# Determine response category
prompt_lower = prompt.lower()
if "architecture" in prompt_lower or "microservice" in prompt_lower or "monolith" in prompt_lower:
category = "architecture"
elif "optim" in prompt_lower or "performance" in prompt_lower or "process" in prompt_lower:
category = "optimization"
elif "database" in prompt_lower or "sql" in prompt_lower or "nosql" in prompt_lower:
category = "database"
elif "distribut" in prompt_lower or "fault" in prompt_lower or "rate limit" in prompt_lower:
category = "distributed"
else:
category = "default"
templates = self.response_templates[category]
# Generate response with some randomness
response = random.choice(templates)
confidence = random.uniform(0.6, 0.95)
# Add more detail based on prompt length (simulating "understanding")
if len(prompt) > 100:
confidence = min(0.95, confidence + 0.1)
response += f". Additionally, {random.choice(self.response_templates['default'])}"
# Lightly incorporate context to simulate conditioning
context_snippet = context.strip()
if context_snippet:
confidence = min(0.99, confidence + 0.05)
response += f" (context signal: {context_snippet[:60]}{'...' if len(context_snippet) > 60 else ''})"
return {
"response": response,
"confidence": round(confidence, 3),
"tokens_used": len(prompt.split()) * 2 + len(response.split()),
}
async def generate_reasoning_steps(self, query: str, num_steps: int = 3) -> list[str]:
"""Generate mock reasoning steps."""
await asyncio.sleep(random.uniform(0.05, 0.15))
base_steps = [
f"Analyzing query: '{query[:50]}...'",
"Identifying key requirements and constraints",
"Evaluating potential approaches",
"Considering trade-offs and implications",
"Synthesizing recommendations based on analysis",
"Validating conclusions against requirements",
]
return random.sample(base_steps, min(num_steps, len(base_steps)))
class HuggingFaceClient:
"""Lightweight Hugging Face Inference API client."""
def __init__(self, model_id: str = "mistralai/Mistral-7B-Instruct-v0.2"):
"""Initialize with a Hugging Face model.
Args:
model_id: The model ID on Hugging Face Hub
"""
self.model_id = model_id
self._client = None
def _get_client(self):
"""Lazy load the HF client."""
if self._client is None:
try:
from huggingface_hub import InferenceClient
self._client = InferenceClient(model=self.model_id)
except ImportError:
raise ImportError("huggingface_hub not installed. Install with: pip install huggingface_hub")
return self._client
async def generate(self, prompt: str, context: str = "") -> dict[str, Any]:
"""Generate response using Hugging Face Inference API."""
try:
client = self._get_client()
# Format prompt
if context:
full_prompt = f"Context: {context}\n\nQuestion: {prompt}\n\nAnswer:"
else:
full_prompt = f"Question: {prompt}\n\nProvide a concise, technical answer:\n\nAnswer:"
# Call HF Inference API (sync call wrapped in async)
response_text = await asyncio.to_thread(
client.text_generation, full_prompt, max_new_tokens=150, temperature=0.7, do_sample=True
)
# Estimate confidence based on response characteristics
confidence = min(0.95, 0.6 + len(response_text) / 500)
return {
"response": response_text.strip(),
"confidence": round(confidence, 3),
"tokens_used": len(full_prompt.split()) + len(response_text.split()),
}
except Exception as e:
# Fallback to mock on error
print(f"HF Inference error: {e}. Falling back to mock.")
mock = MockLLMClient()
return await mock.generate(prompt, context)
async def generate_reasoning_steps(self, query: str, num_steps: int = 3) -> list[str]:
"""Generate reasoning steps using HF model."""
try:
client = self._get_client()
prompt = f"""Break down this question into {num_steps} reasoning steps:
Question: {query}
Reasoning steps (one per line):
1."""
response = await asyncio.to_thread(client.text_generation, prompt, max_new_tokens=200, temperature=0.5)
# Parse steps from response
lines = response.strip().split("\n")
steps = []
for line in lines:
line = line.strip()
if line and not line.startswith("#"):
# Remove numbering
if line[0].isdigit() and "." in line[:3]:
line = line.split(".", 1)[1].strip()
steps.append(line)
return steps[:num_steps] if steps else ["Analysis in progress"]
except Exception as e:
print(f"HF reasoning error: {e}. Falling back to mock.")
mock = MockLLMClient()
return await mock.generate_reasoning_steps(query, num_steps)