ACloudCenter commited on
Commit
541dc83
·
1 Parent(s): f86dcfb

Fix: Add back model.llm.disable_adapter()

Browse files
Files changed (1) hide show
  1. app.py +13 -8
app.py CHANGED
@@ -60,7 +60,7 @@ def transcribe_audio(audio_filepath):
60
  return transcript, transcript, initial_message
61
 
62
 
63
- # Simple Q&A function
64
  @spaces.GPU
65
  def transcript_qa(transcript, question, history):
66
  if not transcript:
@@ -69,22 +69,27 @@ def transcript_qa(transcript, question, history):
69
  if not question:
70
  return history, ""
71
 
72
- # Add user message to history
73
  history = history + [{"role": "user", "content": question}]
74
 
75
  with torch.inference_mode(), model.llm.disable_adapter():
76
- prompt = f"Based on this transcript, answer the question:\n\nTranscript: {transcript}\n\nQuestion: {question}"
77
  output_ids = model.generate(
78
- prompts=[[{"role": "user", "content": prompt}]],
79
  max_new_tokens=256,
80
  )
81
 
82
- ans = model.tokenizer.ids_to_text(output_ids[0].cpu())
83
- ans = ans.split("<|im_start|>assistant")[-1] if "<|im_start|>assistant" in ans else ans
84
- ans = ans.replace("<|im_end|>", "").strip()
 
 
 
 
 
 
85
 
86
  # Add assistant response to history
87
- history = history + [{"role": "assistant", "content": ans}]
88
 
89
  return history, "" # Return updated history and clear input
90
 
 
60
  return transcript, transcript, initial_message
61
 
62
 
63
+ # Simple Q&A function - adapted from working version
64
  @spaces.GPU
65
  def transcript_qa(transcript, question, history):
66
  if not transcript:
 
69
  if not question:
70
  return history, ""
71
 
72
+ # Add user message to history first
73
  history = history + [{"role": "user", "content": question}]
74
 
75
  with torch.inference_mode(), model.llm.disable_adapter():
 
76
  output_ids = model.generate(
77
+ prompts=[[{"role": "user", "content": f"{question}\n\n{transcript}"}]],
78
  max_new_tokens=256,
79
  )
80
 
81
+ # Convert output IDs to text and extract answer
82
+ answer = model.tokenizer.ids_to_text(output_ids[0].cpu())
83
+ answer = answer.split("<|im_start|>assistant")[-1]
84
+
85
+ # Remove thinking tags if present
86
+ if "<think>" in answer:
87
+ _, answer = answer.split("</think>")
88
+
89
+ answer = answer.strip()
90
 
91
  # Add assistant response to history
92
+ history = history + [{"role": "assistant", "content": answer}]
93
 
94
  return history, "" # Return updated history and clear input
95