Spaces:

ACloudCenter
/

canary-qwen-transcriber-2.5b

Runtime error

ACloudCenter commited on Aug 29

Commit

541dc83

1 Parent(s): f86dcfb

Fix: Add back model.llm.disable_adapter()

Files changed (1) hide show

app.py CHANGED Viewed

@@ -60,7 +60,7 @@ def transcribe_audio(audio_filepath):
     return transcript, transcript, initial_message
-# Simple Q&A function
 @spaces.GPU
 def transcript_qa(transcript, question, history):
     if not transcript:
@@ -69,22 +69,27 @@ def transcript_qa(transcript, question, history):
     if not question:
         return history, ""
-    # Add user message to history
     history = history + [{"role": "user", "content": question}]
     with torch.inference_mode(), model.llm.disable_adapter():
-        prompt = f"Based on this transcript, answer the question:\n\nTranscript: {transcript}\n\nQuestion: {question}"
         output_ids = model.generate(
-            prompts=[[{"role": "user", "content": prompt}]],
             max_new_tokens=256,
         )
-    ans = model.tokenizer.ids_to_text(output_ids[0].cpu())
-    ans = ans.split("<|im_start|>assistant")[-1] if "<|im_start|>assistant" in ans else ans
-    ans = ans.replace("<|im_end|>", "").strip()
     # Add assistant response to history
-    history = history + [{"role": "assistant", "content": ans}]
     return history, ""  # Return updated history and clear input

     return transcript, transcript, initial_message
+# Simple Q&A function - adapted from working version
 @spaces.GPU
 def transcript_qa(transcript, question, history):
     if not transcript:
     if not question:
         return history, ""
+    # Add user message to history first
     history = history + [{"role": "user", "content": question}]
     with torch.inference_mode(), model.llm.disable_adapter():
         output_ids = model.generate(
+            prompts=[[{"role": "user", "content": f"{question}\n\n{transcript}"}]],
             max_new_tokens=256,
         )
+    # Convert output IDs to text and extract answer
+    answer = model.tokenizer.ids_to_text(output_ids[0].cpu())
+    answer = answer.split("<|im_start|>assistant")[-1]
+    # Remove thinking tags if present
+    if "<think>" in answer:
+        _, answer = answer.split("</think>")
+    answer = answer.strip()
     # Add assistant response to history
+    history = history + [{"role": "assistant", "content": answer}]
     return history, ""  # Return updated history and clear input