Arif commited on
Commit
09c07f9
Β·
1 Parent(s): 697bc47

Updated config file so llm use docker model runner ollama style for better reproducable

Browse files
backend/app/config.py CHANGED
@@ -2,55 +2,114 @@
2
  Configuration for LLM Data Analyzer
3
  Supports both MLX (local) and Docker Model Runner modes
4
  All values from .env.local - NO hardcoded defaults
 
5
  """
6
  import logging
 
7
  from functools import lru_cache
8
  from pydantic_settings import BaseSettings
 
 
9
 
10
  logger = logging.getLogger(__name__)
11
 
 
12
  # Conditional MLX import
13
  HAS_MLX = False
14
 
 
15
  class Settings(BaseSettings):
16
- """Main settings - all from .env.local"""
17
 
18
  # ===== CORE SETTINGS =====
19
- fastapi_env: str
20
- fastapi_debug: bool
21
- log_level: str
22
 
23
  # ===== LLM MODE SELECTION =====
24
  # True = Use MLX locally (macOS Apple Silicon)
25
  # False = Use Docker Model Runner
26
- debug: bool
 
 
 
 
 
27
 
28
  # ===== MLX MODE (DEBUG=true) =====
29
- llm_model_name_mlx: str
30
- llm_max_tokens: int
31
- llm_temperature: float
32
- llm_device: str
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
  # ===== DOCKER MODEL RUNNER MODE (DEBUG=false) =====
35
- docker_model_runner_url: str
36
- llm_model_name_docker: str
37
- docker_timeout: int
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
  # ===== DATA PROCESSING =====
40
- max_file_size_mb: int
 
 
 
 
41
 
42
- # Hardcoded (lists can't be parsed from env vars)
43
  supported_file_types: list = ["csv", "xlsx", "xls"]
44
 
45
  class Config:
46
  env_file = ".env.local"
47
  case_sensitive = False
 
 
48
 
49
  @lru_cache
50
  def get_settings():
51
  """Get cached settings from .env.local"""
52
  return Settings()
53
 
 
54
  # Check if MLX is available (only needed for DEBUG=true)
55
  try:
56
  import mlx.core
@@ -62,8 +121,9 @@ except ImportError:
62
  HAS_MLX = False
63
  logger.warning("⚠️ MLX not available (will use Docker Model Runner or mock)")
64
 
 
65
  settings = get_settings()
66
 
67
- # Export both settings and MLX availability
68
- __all__ = ["settings", "get_settings", "HAS_MLX"]
69
 
 
 
 
2
  Configuration for LLM Data Analyzer
3
  Supports both MLX (local) and Docker Model Runner modes
4
  All values from .env.local - NO hardcoded defaults
5
+ Follows Metis pattern for portability
6
  """
7
  import logging
8
+ import os
9
  from functools import lru_cache
10
  from pydantic_settings import BaseSettings
11
+ from pydantic import Field
12
+
13
 
14
  logger = logging.getLogger(__name__)
15
 
16
+
17
  # Conditional MLX import
18
  HAS_MLX = False
19
 
20
+
21
  class Settings(BaseSettings):
22
+ """Main settings - all from .env.local with sensible defaults"""
23
 
24
  # ===== CORE SETTINGS =====
25
+ fastapi_env: str = Field(default="development", env="FASTAPI_ENV")
26
+ fastapi_debug: bool = Field(default=False, env="FASTAPI_DEBUG")
27
+ log_level: str = Field(default="INFO", env="LOG_LEVEL")
28
 
29
  # ===== LLM MODE SELECTION =====
30
  # True = Use MLX locally (macOS Apple Silicon)
31
  # False = Use Docker Model Runner
32
+ debug: bool = Field(default=False, env="DEBUG")
33
+ llm_mode: str = Field(
34
+ default="docker_model_runner",
35
+ env="LLM_MODE",
36
+ description="'mlx', 'docker_model_runner', or 'mock'"
37
+ )
38
 
39
  # ===== MLX MODE (DEBUG=true) =====
40
+ llm_model_name_mlx: str = Field(
41
+ default="mlx-community/Llama-3.2-3B-Instruct-4bit",
42
+ env="LLM_MODEL_NAME_MLX",
43
+ description="MLX model from HuggingFace"
44
+ )
45
+ llm_max_tokens: int = Field(
46
+ default=512,
47
+ env="LLM_MAX_TOKENS",
48
+ description="Max tokens for generation"
49
+ )
50
+ llm_temperature: float = Field(
51
+ default=0.7,
52
+ env="LLM_TEMPERATURE",
53
+ description="Temperature for sampling (0.0-1.0)"
54
+ )
55
+ llm_device: str = Field(
56
+ default="auto",
57
+ env="LLM_DEVICE",
58
+ description="MLX device: 'auto', 'cpu', 'gpu'"
59
+ )
60
 
61
  # ===== DOCKER MODEL RUNNER MODE (DEBUG=false) =====
62
+ # Metis pattern: stateless HTTP API to DMR on host
63
+ model_runner_url: str = Field(
64
+ default="http://host.docker.internal:11434/v1",
65
+ env="MODEL_RUNNER_URL",
66
+ description="Docker Model Runner API endpoint (from containers use host.docker.internal)"
67
+ )
68
+ model_name: str = Field(
69
+ default="llama3.2:1B-Q4_0",
70
+ env="MODEL_NAME",
71
+ description="Model name as shown in 'docker model ls'"
72
+ )
73
+ docker_timeout: int = Field(
74
+ default=300,
75
+ env="DOCKER_TIMEOUT",
76
+ description="Timeout for Docker Model Runner requests (seconds)"
77
+ )
78
+
79
+ # ===== BACKWARDS COMPATIBILITY (deprecated) =====
80
+ # These are kept for backwards compatibility but use new names above
81
+ @property
82
+ def docker_model_runner_url(self) -> str:
83
+ """Backwards compatible alias for model_runner_url"""
84
+ return self.model_runner_url
85
+
86
+ @property
87
+ def llm_model_name_docker(self) -> str:
88
+ """Backwards compatible alias for model_name"""
89
+ return self.model_name
90
 
91
  # ===== DATA PROCESSING =====
92
+ max_file_size_mb: int = Field(
93
+ default=50,
94
+ env="MAX_FILE_SIZE_MB",
95
+ description="Maximum file upload size in MB"
96
+ )
97
 
98
+ # Hardcoded (lists can't be parsed from env vars easily)
99
  supported_file_types: list = ["csv", "xlsx", "xls"]
100
 
101
  class Config:
102
  env_file = ".env.local"
103
  case_sensitive = False
104
+ extra = "allow"
105
+
106
 
107
  @lru_cache
108
  def get_settings():
109
  """Get cached settings from .env.local"""
110
  return Settings()
111
 
112
+
113
  # Check if MLX is available (only needed for DEBUG=true)
114
  try:
115
  import mlx.core
 
121
  HAS_MLX = False
122
  logger.warning("⚠️ MLX not available (will use Docker Model Runner or mock)")
123
 
124
+
125
  settings = get_settings()
126
 
 
 
127
 
128
+ # Export both settings and MLX availability
129
+ __all__ = ["settings", "get_settings", "HAS_MLX"]
backend/app/services/llm_service.py CHANGED
@@ -6,12 +6,15 @@ Dual-mode LLM Service
6
  """
7
  import asyncio
8
  import logging
 
9
  from abc import ABC, abstractmethod
10
  from typing import List, Optional
11
  import httpx
12
 
 
13
  logger = logging.getLogger(__name__)
14
 
 
15
  # Import MLX conditionally
16
  try:
17
  from mlx_lm import load
@@ -21,6 +24,7 @@ except ImportError:
21
  HAS_MLX = False
22
 
23
 
 
24
  class BaseLLMService(ABC):
25
  """Abstract base class for LLM services"""
26
 
@@ -43,7 +47,7 @@ class BaseLLMService(ABC):
43
  pass
44
 
45
  async def chat(self, messages: List[dict], system_prompt: str = None) -> str:
46
- """Chat interface"""
47
  prompt = self._build_prompt(messages, system_prompt)
48
  return await self.generate(prompt)
49
 
@@ -63,6 +67,7 @@ class BaseLLMService(ABC):
63
  return "".join(prompt_parts)
64
 
65
 
 
66
  class LLMServiceMLX(BaseLLMService):
67
  """MLX implementation for Apple Silicon (DEBUG=true)"""
68
 
@@ -134,17 +139,33 @@ class LLMServiceMLX(BaseLLMService):
134
 
135
 
136
 
 
137
  class LLMServiceDockerModelRunner(BaseLLMService):
138
- """Docker Model Runner implementation - OpenAI-compatible API"""
139
 
140
- def __init__(self, model_name: str, max_tokens: int, temperature: float, docker_url: str, timeout: int = 300):
 
 
 
 
 
 
 
 
 
 
 
141
  super().__init__(model_name, max_tokens, temperature)
142
  self.docker_url = docker_url.rstrip("/") # Remove trailing slash
143
  self.timeout = timeout
144
  self.client = None
145
 
146
  async def load_model(self) -> bool:
147
- """Initialize Docker Model Runner connection"""
 
 
 
 
148
  if self.is_loaded:
149
  return True
150
 
@@ -156,8 +177,10 @@ class LLMServiceDockerModelRunner(BaseLLMService):
156
  response = await self.client.get(f"{self.docker_url}/models")
157
 
158
  if response.status_code == 200:
159
- self.is_loaded = True
160
  self.logger.info(f"βœ… Docker Model Runner connected")
 
 
161
  return True
162
  else:
163
  self.logger.error(f"❌ Docker Model Runner returned {response.status_code}")
@@ -167,13 +190,17 @@ class LLMServiceDockerModelRunner(BaseLLMService):
167
  return False
168
 
169
  async def generate(self, prompt: str) -> str:
170
- """Generate with Docker Model Runner (OpenAI-compatible API)"""
 
 
 
 
171
  if not self.is_loaded:
172
  raise RuntimeError("Docker Model Runner not connected")
173
 
174
  try:
175
  payload = {
176
- "model": self.model_name, # "ai/llama3.2:1B-Q4_0"
177
  "messages": [{"role": "user", "content": prompt}],
178
  "temperature": self.temperature,
179
  "max_tokens": self.max_tokens,
@@ -238,13 +265,30 @@ class LLMServiceMock(BaseLLMService):
238
  return f"Mock response: I processed your prompt about '{prompt[:40]}...' - please note I'm in mock mode with no real LLM."
239
 
240
 
241
- def get_llm_service(debug: bool, mlx_config: dict = None, docker_config: dict = None, settings=None) -> BaseLLMService:
 
242
  """
243
  Factory function to get appropriate LLM service
244
- Fallback chain: MLX β†’ Docker Model Runner β†’ Mock
 
 
 
 
 
 
 
 
 
 
245
  """
246
 
247
- # Try MLX first
 
 
 
 
 
 
248
  if debug and HAS_MLX:
249
  try:
250
  config = mlx_config or {
@@ -253,31 +297,46 @@ def get_llm_service(debug: bool, mlx_config: dict = None, docker_config: dict =
253
  "temperature": 0.7,
254
  "device": "auto"
255
  }
256
- logger.info("πŸ“Œ Mode: MLX (DEBUG=true)")
257
  return LLMServiceMLX(**config)
258
  except Exception as e:
259
- logger.warning(f"⚠️ MLX failed: {e}")
260
 
261
- # Try Docker Model Runner
262
  docker_url = None
263
  if docker_config:
264
  docker_url = docker_config.get("docker_url")
265
  elif settings:
266
- docker_url = settings.docker_model_runner_url
 
 
267
 
268
  if docker_url:
269
  try:
270
- config = docker_config or {
271
- "model_name": settings.llm_model_name_docker if settings else "llama2",
272
- "max_tokens": settings.llm_max_tokens if settings else 512,
273
- "temperature": settings.llm_temperature if settings else 0.7,
 
 
 
 
 
 
 
 
 
 
274
  "docker_url": docker_url,
275
- "timeout": settings.docker_timeout if settings else 300
 
276
  }
277
  logger.info(f"πŸ“Œ Mode: Docker Model Runner at {docker_url}")
 
 
278
  return LLMServiceDockerModelRunner(**config)
279
  except Exception as e:
280
- logger.warning(f"⚠️ Docker Model Runner failed: {e}")
281
 
282
  # Fallback to mock
283
  logger.warning("⚠️ Using MOCK mode (no LLM available)")
@@ -285,4 +344,4 @@ def get_llm_service(debug: bool, mlx_config: dict = None, docker_config: dict =
285
  model_name="mock",
286
  max_tokens=512,
287
  temperature=0.7
288
- )
 
6
  """
7
  import asyncio
8
  import logging
9
+ import os
10
  from abc import ABC, abstractmethod
11
  from typing import List, Optional
12
  import httpx
13
 
14
+
15
  logger = logging.getLogger(__name__)
16
 
17
+
18
  # Import MLX conditionally
19
  try:
20
  from mlx_lm import load
 
24
  HAS_MLX = False
25
 
26
 
27
+
28
  class BaseLLMService(ABC):
29
  """Abstract base class for LLM services"""
30
 
 
47
  pass
48
 
49
  async def chat(self, messages: List[dict], system_prompt: str = None) -> str:
50
+ """Chat interface - converts chat format to prompt format"""
51
  prompt = self._build_prompt(messages, system_prompt)
52
  return await self.generate(prompt)
53
 
 
67
  return "".join(prompt_parts)
68
 
69
 
70
+
71
  class LLMServiceMLX(BaseLLMService):
72
  """MLX implementation for Apple Silicon (DEBUG=true)"""
73
 
 
139
 
140
 
141
 
142
+
143
  class LLMServiceDockerModelRunner(BaseLLMService):
144
+ """Docker Model Runner implementation - OpenAI-compatible API
145
 
146
+ Uses stateless HTTP calls to DMR running on host machine.
147
+ Optimal for Apple Silicon GPU acceleration via llama.cpp Metal backend.
148
+ """
149
+
150
+ def __init__(
151
+ self,
152
+ model_name: str,
153
+ max_tokens: int,
154
+ temperature: float,
155
+ docker_url: str,
156
+ timeout: int = 300
157
+ ):
158
  super().__init__(model_name, max_tokens, temperature)
159
  self.docker_url = docker_url.rstrip("/") # Remove trailing slash
160
  self.timeout = timeout
161
  self.client = None
162
 
163
  async def load_model(self) -> bool:
164
+ """Initialize Docker Model Runner connection
165
+
166
+ Tests connectivity to the DMR HTTP API endpoint.
167
+ DMR itself handles model loading on the host.
168
+ """
169
  if self.is_loaded:
170
  return True
171
 
 
177
  response = await self.client.get(f"{self.docker_url}/models")
178
 
179
  if response.status_code == 200:
180
+ models = response.json()
181
  self.logger.info(f"βœ… Docker Model Runner connected")
182
+ self.logger.info(f"πŸ“‹ Available models: {models}")
183
+ self.is_loaded = True
184
  return True
185
  else:
186
  self.logger.error(f"❌ Docker Model Runner returned {response.status_code}")
 
190
  return False
191
 
192
  async def generate(self, prompt: str) -> str:
193
+ """Generate with Docker Model Runner (OpenAI-compatible API)
194
+
195
+ Makes HTTP request to DMR at host.docker.internal:11434
196
+ Model inference happens on host GPU (Apple Metal backend)
197
+ """
198
  if not self.is_loaded:
199
  raise RuntimeError("Docker Model Runner not connected")
200
 
201
  try:
202
  payload = {
203
+ "model": self.model_name,
204
  "messages": [{"role": "user", "content": prompt}],
205
  "temperature": self.temperature,
206
  "max_tokens": self.max_tokens,
 
265
  return f"Mock response: I processed your prompt about '{prompt[:40]}...' - please note I'm in mock mode with no real LLM."
266
 
267
 
268
+
269
+ def get_llm_service(debug: bool = None, mlx_config: dict = None, docker_config: dict = None, settings=None) -> BaseLLMService:
270
  """
271
  Factory function to get appropriate LLM service
272
+
273
+ Fallback chain: MLX (DEBUG=true) β†’ Docker Model Runner β†’ Mock
274
+
275
+ Args:
276
+ debug: Force DEBUG mode (True=MLX, False=Docker). If None, reads from env/settings
277
+ mlx_config: Manual MLX config dict
278
+ docker_config: Manual Docker config dict
279
+ settings: Pydantic Settings object with llm config
280
+
281
+ Returns:
282
+ BaseLLMService: One of MLX, DockerModelRunner, or Mock implementation
283
  """
284
 
285
+ # Determine debug mode
286
+ if debug is None:
287
+ debug = os.getenv("DEBUG", "false").lower() == "true"
288
+ if hasattr(settings, "debug"):
289
+ debug = settings.debug
290
+
291
+ # Try MLX first (if DEBUG=true)
292
  if debug and HAS_MLX:
293
  try:
294
  config = mlx_config or {
 
297
  "temperature": 0.7,
298
  "device": "auto"
299
  }
300
+ logger.info("πŸ“Œ Mode: MLX (DEBUG=true) with Apple Silicon GPU")
301
  return LLMServiceMLX(**config)
302
  except Exception as e:
303
+ logger.warning(f"⚠️ MLX failed: {e}, falling back to Docker Model Runner")
304
 
305
+ # Try Docker Model Runner (Metis pattern)
306
  docker_url = None
307
  if docker_config:
308
  docker_url = docker_config.get("docker_url")
309
  elif settings:
310
+ docker_url = getattr(settings, "model_runner_url", None)
311
+ else:
312
+ docker_url = os.getenv("MODEL_RUNNER_URL")
313
 
314
  if docker_url:
315
  try:
316
+ model_name = None
317
+ if docker_config:
318
+ model_name = docker_config.get("model_name")
319
+ elif settings:
320
+ model_name = getattr(settings, "model_name", None)
321
+ else:
322
+ model_name = os.getenv("MODEL_NAME", "llama3.2:1B-Q4_0")
323
+
324
+ config = {
325
+ "model_name": model_name,
326
+ "max_tokens": (docker_config or {}).get("max_tokens",
327
+ getattr(settings, "llm_max_tokens", 512) if settings else 512),
328
+ "temperature": (docker_config or {}).get("temperature",
329
+ getattr(settings, "llm_temperature", 0.7) if settings else 0.7),
330
  "docker_url": docker_url,
331
+ "timeout": (docker_config or {}).get("timeout",
332
+ getattr(settings, "docker_timeout", 300) if settings else 300)
333
  }
334
  logger.info(f"πŸ“Œ Mode: Docker Model Runner at {docker_url}")
335
+ logger.info(f"πŸ“Œ Model: {config['model_name']}")
336
+ logger.info(f"βœ… Using host GPU acceleration (llama.cpp Metal backend)")
337
  return LLMServiceDockerModelRunner(**config)
338
  except Exception as e:
339
+ logger.warning(f"⚠️ Docker Model Runner failed: {e}, falling back to Mock")
340
 
341
  # Fallback to mock
342
  logger.warning("⚠️ Using MOCK mode (no LLM available)")
 
344
  model_name="mock",
345
  max_tokens=512,
346
  temperature=0.7
347
+ )