vachaspathi commited on
Commit
bfbe388
·
verified ·
1 Parent(s): 9c8efa2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -17
app.py CHANGED
@@ -1,7 +1,16 @@
1
  # app.py — MCP server using DeepSeek via Hugging Face transformers (or fallback)
2
  # - Put this file next to config.py (see example below)
3
- # - It loads the model in LOCAL_MODEL (e.g., a DeepSeek HF checkpoint) via transformers.pipeline
4
- # - If the model cannot be loaded (no transformers / OOM / missing weights), it falls back to a small CPU model or rule-based responder
 
 
 
 
 
 
 
 
 
5
 
6
  from mcp.server.fastmcp import FastMCP
7
  from typing import Optional, List, Tuple, Any, Dict
@@ -39,7 +48,7 @@ try:
39
  CLIENT_SECRET,
40
  REFRESH_TOKEN,
41
  API_BASE,
42
- LOCAL_MODEL, # e.g. "deepseek-ai/deepseek-r1-7b" or smaller/distilled variant
43
  )
44
  except Exception as e:
45
  raise SystemExit(
@@ -125,16 +134,31 @@ def _normalize_local_path_args(args: Any) -> Any:
125
  return args
126
 
127
  # ----------------------------
128
- # DeepSeek / HF model loader
129
  # ----------------------------
130
  LLM_PIPELINE = None
131
  TOKENIZER = None
132
  LOADED_MODEL_NAME = None
133
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  def init_deepseek_model():
135
  """
136
  Try to load LOCAL_MODEL via transformers.pipeline.
137
- If loading fails, try a fallback small model (distilgpt2 or flan-t5-small if seq2seq).
 
138
  """
139
  global LLM_PIPELINE, TOKENIZER, LOADED_MODEL_NAME
140
 
@@ -149,30 +173,38 @@ def init_deepseek_model():
149
  return
150
 
151
  try:
152
- tokenizer_name = LOCAL_TOKENIZER or LOCAL_MODEL
153
- model_name = LOCAL_MODEL
154
- LOADED_MODEL_NAME = model_name
 
 
155
 
156
  # If model looks like seq2seq (T5/flan) use text2text; else causal
157
  seq2seq_keywords = ["flan", "t5", "seq2seq"]
158
- if any(k in model_name.lower() for k in seq2seq_keywords):
159
- TOKENIZER = AutoTokenizer.from_pretrained(tokenizer_name, use_fast=True)
160
- model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
 
 
 
 
161
  LLM_PIPELINE = pipeline("text2text-generation", model=model, tokenizer=TOKENIZER)
162
- logger.info("Loaded seq2seq model: %s", model_name)
163
  else:
164
- TOKENIZER = AutoTokenizer.from_pretrained(tokenizer_name, use_fast=True)
165
- model = AutoModelForCausalLM.from_pretrained(model_name)
 
 
 
 
166
  LLM_PIPELINE = pipeline("text-generation", model=model, tokenizer=TOKENIZER)
167
- logger.info("Loaded causal model: %s", model_name)
168
 
169
  except Exception as e:
170
  logger.error("Failed to load requested model '%s': %s", LOCAL_MODEL, e)
171
  traceback.print_exc()
172
  # Try a small CPU-friendly fallback
173
- fallback = None
174
  try:
175
- # prefer an instruction-friendly small model if possible
176
  fallback = "google/flan-t5-small"
177
  if "flan" in fallback:
178
  TOKENIZER = AutoTokenizer.from_pretrained(fallback, use_fast=True)
 
1
  # app.py — MCP server using DeepSeek via Hugging Face transformers (or fallback)
2
  # - Put this file next to config.py (see example below)
3
+ # - Supports LOCAL_MODEL values like:
4
+ # "deepseek/deepseek-r1-0528" or "deepseek/deepseek-r1-0528:free"
5
+ # If a ":revision" suffix is present, it will be passed as the `revision=` argument
6
+ # to transformers.from_pretrained so HF validation is satisfied.
7
+ #
8
+ # - It loads the model via transformers.pipeline if available; otherwise falls back
9
+ # to google/flan-t5-small or rule-based responder.
10
+ #
11
+ # - Developer instruction: when a user pastes a local path (e.g. /mnt/data/...), the
12
+ # chat handler passes the path unchanged to process_document(); tool invocation
13
+ # normalization converts file_path -> file_url (file://...) and optionally file_b64.
14
 
15
  from mcp.server.fastmcp import FastMCP
16
  from typing import Optional, List, Tuple, Any, Dict
 
48
  CLIENT_SECRET,
49
  REFRESH_TOKEN,
50
  API_BASE,
51
+ LOCAL_MODEL, # e.g. "deepseek/deepseek-r1-7b" or "deepseek/deepseek-r1-0528:free"
52
  )
53
  except Exception as e:
54
  raise SystemExit(
 
134
  return args
135
 
136
  # ----------------------------
137
+ # DeepSeek / HF model loader (with revision parsing)
138
  # ----------------------------
139
  LLM_PIPELINE = None
140
  TOKENIZER = None
141
  LOADED_MODEL_NAME = None
142
 
143
+ def _parse_model_and_revision(model_string: str) -> Tuple[str, Optional[str]]:
144
+ """
145
+ Accepts model strings like:
146
+ - 'owner/repo'
147
+ - 'owner/repo:revision'
148
+ Returns (repo_id, revision_or_none).
149
+ """
150
+ if ":" in model_string:
151
+ repo_id, revision = model_string.split(":", 1)
152
+ repo_id = repo_id.strip()
153
+ revision = revision.strip()
154
+ return repo_id, revision
155
+ return model_string, None
156
+
157
  def init_deepseek_model():
158
  """
159
  Try to load LOCAL_MODEL via transformers.pipeline.
160
+ If a ':revision' is present, pass revision=... to from_pretrained to avoid HF repo-id validation errors.
161
+ If loading fails, try a fallback small model (flan-t5-small or distilgpt2).
162
  """
163
  global LLM_PIPELINE, TOKENIZER, LOADED_MODEL_NAME
164
 
 
173
  return
174
 
175
  try:
176
+ model_string = LOCAL_MODEL
177
+ repo_id, revision = _parse_model_and_revision(model_string)
178
+ tokenizer_name = LOCAL_TOKENIZER or repo_id
179
+ model_name_for_logging = f"{repo_id}" + (f" (rev={revision})" if revision else "")
180
+ LOADED_MODEL_NAME = model_name_for_logging
181
 
182
  # If model looks like seq2seq (T5/flan) use text2text; else causal
183
  seq2seq_keywords = ["flan", "t5", "seq2seq"]
184
+ if any(k in repo_id.lower() for k in seq2seq_keywords):
185
+ if revision:
186
+ TOKENIZER = AutoTokenizer.from_pretrained(tokenizer_name, use_fast=True, revision=revision)
187
+ model = AutoModelForSeq2SeqLM.from_pretrained(repo_id, revision=revision)
188
+ else:
189
+ TOKENIZER = AutoTokenizer.from_pretrained(tokenizer_name, use_fast=True)
190
+ model = AutoModelForSeq2SeqLM.from_pretrained(repo_id)
191
  LLM_PIPELINE = pipeline("text2text-generation", model=model, tokenizer=TOKENIZER)
192
+ logger.info("Loaded seq2seq model: %s", model_name_for_logging)
193
  else:
194
+ if revision:
195
+ TOKENIZER = AutoTokenizer.from_pretrained(tokenizer_name, use_fast=True, revision=revision)
196
+ model = AutoModelForCausalLM.from_pretrained(repo_id, revision=revision)
197
+ else:
198
+ TOKENIZER = AutoTokenizer.from_pretrained(tokenizer_name, use_fast=True)
199
+ model = AutoModelForCausalLM.from_pretrained(repo_id)
200
  LLM_PIPELINE = pipeline("text-generation", model=model, tokenizer=TOKENIZER)
201
+ logger.info("Loaded causal model: %s", model_name_for_logging)
202
 
203
  except Exception as e:
204
  logger.error("Failed to load requested model '%s': %s", LOCAL_MODEL, e)
205
  traceback.print_exc()
206
  # Try a small CPU-friendly fallback
 
207
  try:
 
208
  fallback = "google/flan-t5-small"
209
  if "flan" in fallback:
210
  TOKENIZER = AutoTokenizer.from_pretrained(fallback, use_fast=True)