Spaces:

ACloudCenter
/

canary-qwen-transcriber-2.5b

Runtime error

App Files Files Community

ACloudCenter commited on Aug 29

Commit

b4f488c

1 Parent(s): bf79fdd

Fix: Mismatch between qa fn and call

Browse files

Files changed (1) hide show

app.py +15 -18

app.py CHANGED Viewed

@@ -51,7 +51,7 @@ def transcribe_audio(audio_filepath):
             prompts=[[{"role": "user", "content": f"Transcribe the following: {model.audio_locator_tag}"}]],
             audios=audio.to(device),
             audio_lens=torch.tensor([audio_lens]).to(device),
-            max_new_tokens=256,
         )
     # Convert output IDs to text
@@ -60,7 +60,7 @@ def transcribe_audio(audio_filepath):
     return transcript, transcript, initial_message
-# Simple Q&A function - adapted from working version
 @spaces.GPU
 def transcript_qa(transcript, question, history):
     if not transcript:
@@ -69,33 +69,27 @@ def transcript_qa(transcript, question, history):
     if not question:
         return history, ""
-    # Add user message to history first
     history = history + [{"role": "user", "content": question}]
     with torch.inference_mode(), model.llm.disable_adapter():
         output_ids = model.generate(
             prompts=[[{"role": "user", "content": f"{question}\n\n{transcript}"}]],
-            max_new_tokens=256,
         )
-    # Convert output IDs to text and extract answer
-    answer = model.tokenizer.ids_to_text(output_ids[0].cpu())
-    answer = answer.split("<|im_start|>assistant")[-1]
-    # Remove thinking tags if present
-    if "<think>" in answer:
-        if "</think>" in answer:
-            parts = answer.split("</think>")
-            if len(parts) > 1:
-                answer = parts[-1]  # Get text after thinking
-        else:
-            # If no closing tag, try to get text after opening tag
-            answer = answer.split("<think>")[0]  # Get text before thinking
-    answer = answer.strip()
     # Add assistant response to history
-    history = history + [{"role": "assistant", "content": answer}]
     return history, ""  # Return updated history and clear input
@@ -140,6 +134,9 @@ with gr.Blocks(theme=theme) as demo:
         bubble_full_width=False
     )
     with gr.Row():
         question_input = gr.Textbox(
             label="",

             prompts=[[{"role": "user", "content": f"Transcribe the following: {model.audio_locator_tag}"}]],
             audios=audio.to(device),
             audio_lens=torch.tensor([audio_lens]).to(device),
+            max_new_tokens=2048,
         )
     # Convert output IDs to text
     return transcript, transcript, initial_message
+# Simple Q&A function
 @spaces.GPU
 def transcript_qa(transcript, question, history):
     if not transcript:
     if not question:
         return history, ""
+    # Add user message to history
     history = history + [{"role": "user", "content": question}]
     with torch.inference_mode(), model.llm.disable_adapter():
         output_ids = model.generate(
             prompts=[[{"role": "user", "content": f"{question}\n\n{transcript}"}]],
+            max_new_tokens=2048,
         )
+    ans = model.tokenizer.ids_to_text(output_ids[0].cpu())
+    ans = ans.split("<|im_start|>assistant")[-1]  # get rid of the prompt
+    if "<think>" in ans:
+        if "</think>" in ans:
+            ans = ans.split("<think>")[-1]
+            _, ans = ans.split("</think>")  # get rid of the thinking
+    ans = ans.strip()
     # Add assistant response to history
+    history = history + [{"role": "assistant", "content": ans}]
     return history, ""  # Return updated history and clear input
         bubble_full_width=False
     )
+    def user(user_message, history: list):
+        return "", history + [{"role": "user", "content": user_message}]
     with gr.Row():
         question_input = gr.Textbox(
             label="",