Spaces:

MCP-1st-Birthday
/

VisionPro

Running

App Files Files Community

subhash4face commited on 18 days ago

Commit

036c510

verified ·

1 Parent(s): dbbbc60

Fix

Browse files

Files changed (1) hide show

app.py +65 -12

app.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import os
 import io
 import json
@@ -325,16 +326,45 @@ def format_tool_log(tool_name, reason, meta, output, style="A"):
         "detailed": format_tool_log(tool_name, reason, meta, output, style="B"),
     }
 with gr.Blocks(css=".gradio-container {background:#f7fafc}") as demo:
     gr.Markdown("# Accessibility Voice Agent — MCP Tools")
     with gr.Row():
         with gr.Column(scale=3):
-            chatbox = gr.Chatbot(label="Assistant", elem_id="chatbox")
             user_input = gr.Textbox(placeholder="Type or press the microphone to speak...", show_label=False)
             with gr.Row():
-                mic = gr.Audio(source="microphone", type="filepath", label="Record voice (press to record)")
                 send_btn = gr.Button("Send")
             with gr.Accordion("Advanced / Tools", open=False):
@@ -355,6 +385,8 @@ with gr.Blocks(css=".gradio-container {background:#f7fafc}") as demo:
     # Callbacks
     def on_send_text(text, chat_history, mic_file, style):
         tools_entries = []
         if mic_file:
             # transcribe audio
             tr = transcribe_audio_tool(mic_file)
@@ -362,9 +394,10 @@ with gr.Blocks(css=".gradio-container {background:#f7fafc}") as demo:
             log = format_tool_log("transcribe_audio", "User provided microphone audio", tr.meta or {}, tr.content, style)
             tools_entries.append(log)
         else:
-            user_text = text
-        chat_history = chat_history or []
-        chat_history.append((user_text, "..."))
         # demo assistant behavior
         if user_text and user_text.strip().lower().startswith("describe image:"):
@@ -382,7 +415,8 @@ with gr.Blocks(css=".gradio-container {background:#f7fafc}") as demo:
         else:
             assistant = "I heard: " + (user_text or "(empty)")
-        chat_history[-1] = (user_text, assistant)
         # update tools panel content
         panel_html = ''
@@ -393,7 +427,10 @@ with gr.Blocks(css=".gradio-container {background:#f7fafc}") as demo:
         else:
             for e in tools_entries:
                 panel_html += f"<pre style='background:#f1f5f9;border-radius:6px;padding:8px;margin-bottom:8px;'>{e}</pre>"
-        return chat_history, tools_log, gr.update(value=panel_html)
     send_btn.click(on_send_text, inputs=[user_input, chatbox, mic, log_style], outputs=[chatbox, tools_log, tools_panel])
@@ -415,14 +452,30 @@ with gr.Blocks(css=".gradio-container {background:#f7fafc}") as demo:
     def on_describe_image(file_obj, style):
         if not file_obj:
-            return "No file uploaded"
-        # file_obj may be a tempfile object or path
-        path = getattr(file_obj, 'name', file_obj)
         res = describe_image_tool(path)
         log = format_tool_log("describe_image", "User uploaded an image for description", res.meta or {}, res.content, style)
         panel_html = f"<pre style='background:#ecfdf5;padding:8px;border-radius:6px;'>{log}</pre>"
-        # show result in chatbox as assistant reply
-        return [("<image uploaded>", res.content)], gr.update(value=panel_html)
     img_btn.click(on_describe_image, inputs=[img_upload, log_style], outputs=[chatbox, tools_panel])

 import os
 import io
 import json
         "detailed": format_tool_log(tool_name, reason, meta, output, style="B"),
     }
+# Conversion helpers for chat history between 'messages' (gradio new) and tuple list used in logic
+def messages_to_tuples(messages):
+    # messages is a list of dicts {"role": "user"/"assistant", "content": "..."}
+    tuples = []
+    if not messages:
+        return tuples
+    for m in messages:
+        if isinstance(m, dict):
+            role = m.get("role", "user")
+            content = m.get("content", "")
+            tuples.append((content, "")) if role == "user" else tuples.append(("", content))
+        elif isinstance(m, (list, tuple)) and len(m) == 2:
+            tuples.append((m[0], m[1]))
+        else:
+            # fallback: treat as assistant reply
+            tuples.append(("", str(m)))
+    return tuples
+def tuples_to_messages(tuples):
+    messages = []
+    for user_text, assistant_text in tuples:
+        if user_text:
+            messages.append({"role":"user","content":user_text})
+        if assistant_text:
+            messages.append({"role":"assistant","content":assistant_text})
+    return messages
 with gr.Blocks(css=".gradio-container {background:#f7fafc}") as demo:
     gr.Markdown("# Accessibility Voice Agent — MCP Tools")
     with gr.Row():
         with gr.Column(scale=3):
+            # Set type='messages' to avoid the deprecation warning, and convert inside handlers.
+            chatbox = gr.Chatbot(label="Assistant", elem_id="chatbox", type="messages")
             user_input = gr.Textbox(placeholder="Type or press the microphone to speak...", show_label=False)
             with gr.Row():
+                # Some gradio versions don't accept 'source' kw; remove it to be broadly compatible.
+                mic = gr.Audio(type="filepath", label="Record voice (press to record)")
                 send_btn = gr.Button("Send")
             with gr.Accordion("Advanced / Tools", open=False):
     # Callbacks
     def on_send_text(text, chat_history, mic_file, style):
         tools_entries = []
+        # convert incoming chat_history (messages) into tuples for internal logic
+        tuples = messages_to_tuples(chat_history)
         if mic_file:
             # transcribe audio
             tr = transcribe_audio_tool(mic_file)
             log = format_tool_log("transcribe_audio", "User provided microphone audio", tr.meta or {}, tr.content, style)
             tools_entries.append(log)
         else:
+            user_text = text or ""
+        # Append user message to tuples and placeholder assistant
+        tuples.append((user_text, "..."))
         # demo assistant behavior
         if user_text and user_text.strip().lower().startswith("describe image:"):
         else:
             assistant = "I heard: " + (user_text or "(empty)")
+        # replace placeholder assistant
+        tuples[-1] = (tuples[-1][0], assistant)
         # update tools panel content
         panel_html = ''
         else:
             for e in tools_entries:
                 panel_html += f"<pre style='background:#f1f5f9;border-radius:6px;padding:8px;margin-bottom:8px;'>{e}</pre>"
+        # convert back to messages for gr.Chatbot
+        new_messages = tuples_to_messages(tuples)
+        return new_messages, gr.update(value="\n".join(tools_entries) or "Ready."), gr.update(value=panel_html)
     send_btn.click(on_send_text, inputs=[user_input, chatbox, mic, log_style], outputs=[chatbox, tools_log, tools_panel])
     def on_describe_image(file_obj, style):
         if not file_obj:
+            return [], gr.update(value="No file uploaded")
+        # file_obj may be an UploadFile-like object; get path or save to tmp file
+        path = getattr(file_obj, 'name', None)
+        # If it's a temporary file dict (from gr.File), it might be a dict with 'name' and 'tmp_path'
+        if isinstance(file_obj, dict) and 'tmp_path' in file_obj:
+            path = file_obj['tmp_path']
+        if not path:
+            # try to save bytes
+            try:
+                contents = file_obj.read()
+                tmp_path = "/tmp/gr_uploaded_image.jpg"
+                with open(tmp_path, "wb") as f:
+                    f.write(contents)
+                path = tmp_path
+            except Exception as e:
+                return [], gr.update(value=f"Failed to read uploaded file: {e}")
         res = describe_image_tool(path)
         log = format_tool_log("describe_image", "User uploaded an image for description", res.meta or {}, res.content, style)
         panel_html = f"<pre style='background:#ecfdf5;padding:8px;border-radius:6px;'>{log}</pre>"
+        # Return as messages for chatbox
+        messages = [{"role":"user","content":"<image uploaded>"}, {"role":"assistant","content":res.content}]
+        return messages, gr.update(value=panel_html)
     img_btn.click(on_describe_image, inputs=[img_upload, log_style], outputs=[chatbox, tools_panel])