Spaces:

ianshank
/

langgraph-mcts-demo

Sleeping

langgraph-mcts-demo / demo_src /llm_mock.py

ianshank

feat: add personality output and bug fixes

40ee6b4 21 days ago

7.36 kB

	"""
	Mock and lightweight LLM clients for demo purposes.
	"""

	import asyncio
	import random
	from typing import Any


	class MockLLMClient:
	"""Mock LLM client that generates plausible demo responses."""

	def __init__(self):
	self.response_templates = {
	"architecture": [
	"Consider scalability requirements and team expertise",
	"Evaluate coupling, deployment complexity, and operational overhead",
	"Balance between development speed and long-term maintainability",
	],
	"optimization": [
	"Profile first to identify actual bottlenecks",
	"Consider memory-mapped files and streaming processing",
	"Implement parallel processing with appropriate chunk sizes",
	],
	"database": [
	"Consider data consistency requirements and query patterns",
	"Evaluate write-heavy vs read-heavy workload characteristics",
	"Plan for horizontal scaling and data distribution strategies",
	],
	"distributed": [
	"Implement proper failure detection and recovery mechanisms",
	"Use circuit breakers and bulkhead patterns for resilience",
	"Consider eventual consistency vs strong consistency trade-offs",
	],
	"default": [
	"Break down the problem into smaller components",
	"Consider trade-offs between different approaches",
	"Evaluate based on specific use case requirements",
	],
	}

	async def generate(self, prompt: str, context: str = "") -> dict[str, Any]:
	"""Generate a mock response based on the prompt and optional context."""
	# Simulate processing time
	await asyncio.sleep(random.uniform(0.1, 0.3))

	# Determine response category
	prompt_lower = prompt.lower()
	if "architecture" in prompt_lower or "microservice" in prompt_lower or "monolith" in prompt_lower:
	category = "architecture"
	elif "optim" in prompt_lower or "performance" in prompt_lower or "process" in prompt_lower:
	category = "optimization"
	elif "database" in prompt_lower or "sql" in prompt_lower or "nosql" in prompt_lower:
	category = "database"
	elif "distribut" in prompt_lower or "fault" in prompt_lower or "rate limit" in prompt_lower:
	category = "distributed"
	else:
	category = "default"

	templates = self.response_templates[category]

	# Generate response with some randomness
	response = random.choice(templates)
	confidence = random.uniform(0.6, 0.95)

	# Add more detail based on prompt length (simulating "understanding")
	if len(prompt) > 100:
	confidence = min(0.95, confidence + 0.1)
	response += f". Additionally, {random.choice(self.response_templates['default'])}"

	# Lightly incorporate context to simulate conditioning
	context_snippet = context.strip()
	if context_snippet:
	confidence = min(0.99, confidence + 0.05)
	response += f" (context signal: {context_snippet[:60]}{'...' if len(context_snippet) > 60 else ''})"

	return {
	"response": response,
	"confidence": round(confidence, 3),
	"tokens_used": len(prompt.split()) * 2 + len(response.split()),
	}

	async def generate_reasoning_steps(self, query: str, num_steps: int = 3) -> list[str]:
	"""Generate mock reasoning steps."""
	await asyncio.sleep(random.uniform(0.05, 0.15))

	base_steps = [
	f"Analyzing query: '{query[:50]}...'",
	"Identifying key requirements and constraints",
	"Evaluating potential approaches",
	"Considering trade-offs and implications",
	"Synthesizing recommendations based on analysis",
	"Validating conclusions against requirements",
	]

	return random.sample(base_steps, min(num_steps, len(base_steps)))


	class HuggingFaceClient:
	"""Lightweight Hugging Face Inference API client."""

	def __init__(self, model_id: str = "mistralai/Mistral-7B-Instruct-v0.2"):
	"""Initialize with a Hugging Face model.

	Args:
	model_id: The model ID on Hugging Face Hub
	"""
	self.model_id = model_id
	self._client = None

	def _get_client(self):
	"""Lazy load the HF client."""
	if self._client is None:
	try:
	from huggingface_hub import InferenceClient

	self._client = InferenceClient(model=self.model_id)
	except ImportError:
	raise ImportError("huggingface_hub not installed. Install with: pip install huggingface_hub")
	return self._client

	async def generate(self, prompt: str, context: str = "") -> dict[str, Any]:
	"""Generate response using Hugging Face Inference API."""
	try:
	client = self._get_client()

	# Format prompt
	if context:
	full_prompt = f"Context: {context}\n\nQuestion: {prompt}\n\nAnswer:"
	else:
	full_prompt = f"Question: {prompt}\n\nProvide a concise, technical answer:\n\nAnswer:"

	# Call HF Inference API (sync call wrapped in async)
	response_text = await asyncio.to_thread(
	client.text_generation, full_prompt, max_new_tokens=150, temperature=0.7, do_sample=True
	)

	# Estimate confidence based on response characteristics
	confidence = min(0.95, 0.6 + len(response_text) / 500)

	return {
	"response": response_text.strip(),
	"confidence": round(confidence, 3),
	"tokens_used": len(full_prompt.split()) + len(response_text.split()),
	}

	except Exception as e:
	# Fallback to mock on error
	print(f"HF Inference error: {e}. Falling back to mock.")
	mock = MockLLMClient()
	return await mock.generate(prompt, context)

	async def generate_reasoning_steps(self, query: str, num_steps: int = 3) -> list[str]:
	"""Generate reasoning steps using HF model."""
	try:
	client = self._get_client()

	prompt = f"""Break down this question into {num_steps} reasoning steps:
	Question: {query}

	Reasoning steps (one per line):
	1."""

	response = await asyncio.to_thread(client.text_generation, prompt, max_new_tokens=200, temperature=0.5)

	# Parse steps from response
	lines = response.strip().split("\n")
	steps = []
	for line in lines:
	line = line.strip()
	if line and not line.startswith("#"):
	# Remove numbering
	if line[0].isdigit() and "." in line[:3]:
	line = line.split(".", 1)[1].strip()
	steps.append(line)

	return steps[:num_steps] if steps else ["Analysis in progress"]

	except Exception as e:
	print(f"HF reasoning error: {e}. Falling back to mock.")
	mock = MockLLMClient()
	return await mock.generate_reasoning_steps(query, num_steps)