Spaces:

MCP-1st-Birthday
/

AI-Digital-Library-Assistant

Running

App Files Files Community

Nihal2000 commited on 13 days ago

Commit

ea97dd9

1 Parent(s): 6edc114

fixed voice qa

Browse files

Files changed (3) hide show

app.py +266 -60
mcp_tools/voice_tool.py +133 -16
services/elevenlabs_service.py +145 -76

app.py CHANGED Viewed

@@ -630,40 +630,74 @@ voice_conversation_state = {
     "transcript": []
 }
 def start_voice_conversation():
-    """Start a new voice conversation session"""
     try:
         if not mcp_server.elevenlabs_service.is_available():
             return (
-                "⚠️ Voice assistant not configured. Please set ELEVENLABS_API_KEY and ELEVENLABS_AGENT_ID in .env",
-                gr.update(interactive=False),
-                gr.update(interactive=True),
                 []
             )
         session_id = str(uuid.uuid4())
-        result = mcp_server.run_async(mcp_server.elevenlabs_service.start_conversation(session_id))
         if result.get("success"):
             voice_conversation_state["session_id"] = session_id
             voice_conversation_state["active"] = True
             voice_conversation_state["transcript"] = []
             return (
-                "🎙️ Voice assistant is ready. Type your question below.",
-                gr.update(interactive=False),
-                gr.update(interactive=True),
-                []
             )
         else:
             return (
-                f"❌ Failed to start conversation: {result.get('error')}",
                 gr.update(interactive=True),
                 gr.update(interactive=False),
                 []
             )
     except Exception as e:
-        logger.error(f"Error starting voice conversation: {str(e)}")
         return (
             f"❌ Error: {str(e)}",
             gr.update(interactive=True),
@@ -672,11 +706,16 @@ def start_voice_conversation():
         )
 def stop_voice_conversation():
-    """Stop active voice conversation"""
     try:
         if not voice_conversation_state["active"]:
             return (
-                "No active conversation",
                 gr.update(interactive=True),
                 gr.update(interactive=False),
                 voice_conversation_state["transcript"]
@@ -684,13 +723,19 @@ def stop_voice_conversation():
         session_id = voice_conversation_state["session_id"]
         if session_id:
-            mcp_server.run_async(mcp_server.elevenlabs_service.end_conversation(session_id))
         voice_conversation_state["active"] = False
         voice_conversation_state["session_id"] = None
         return (
-            "✅ Conversation ended",
             gr.update(interactive=True),
             gr.update(interactive=False),
             voice_conversation_state["transcript"]
@@ -705,40 +750,159 @@ def stop_voice_conversation():
         )
 def send_voice_message_v6(message, chat_history):
-    """Send message in voice conversation - Gradio 6 format"""
     try:
         if not voice_conversation_state["active"]:
             return chat_history, ""
         if not message or not message.strip():
             return chat_history, message
         session_id = voice_conversation_state["session_id"]
-        # Add user message
-        chat_history.append({"role": "user", "content": message})
         # Get AI response
-        result = mcp_server.run_async(mcp_server.voice_tool.voice_qa(message, session_id))
         if result.get("success"):
             answer = result.get("answer", "No response")
-            chat_history.append({"role": "assistant", "content": answer})
         else:
             chat_history.append({
                 "role": "assistant",
-                "content": f"❌ Error: {result.get('error')}"
             })
         return chat_history, ""
     except Exception as e:
-        logger.error(f"Error in voice message: {str(e)}")
         chat_history.append({
             "role": "assistant",
-            "content": f"❌ Error: {str(e)}"
         })
         return chat_history, ""
 def generate_podcast_ui(doc_ids, style, duration, voice1, voice2):
     """UI wrapper for podcast generation"""
     try:
@@ -1109,51 +1273,88 @@ def create_gradio_interface():
                 )
             with gr.Tab("🎙️ Voice Assistant"):
-                gr.Markdown("""
-                ### 🗣️ Talk to Your AI Librarian
-                Have a natural conversation about your documents. Ask questions, request summaries,
-                or explore your content library through voice-powered interaction.
-                **Note:** Requires ElevenLabs API configuration.
-                """)
                 with gr.Row():
-                    with gr.Column(scale=2):
-                        with gr.Group():
-                            voice_status_display = gr.Textbox(
-                                label="Status",
-                                value="Ready to start",
-                                interactive=False,
-                                lines=2
-                            )
-                            with gr.Row():
-                                start_voice_btn = gr.Button("🎤 Start Conversation", variant="primary", size="lg")
-                                stop_voice_btn = gr.Button("⏹️ Stop", variant="stop", size="lg", interactive=False)
-                        with gr.Group():
-                            gr.Markdown("#### 💬 Send Message")
                             voice_input_text = gr.Textbox(
-                                label="",
-                                placeholder="Type your question...",
-                                lines=3,
                                 container=False,
-                                info="Press Enter or click Send"
                             )
-                            send_voice_btn = gr.Button("📤 Send", variant="secondary")
-                    with gr.Column(scale=3):
-                        with gr.Group():
-                            voice_chatbot = gr.Chatbot(
-                                label="Conversation",
-                                type="messages",
-                                height=500,
-                                show_copy_button=True
                             )
-                            clear_chat_btn = gr.Button("🗑️ Clear Chat", variant="secondary")
                 start_voice_btn.click(
                     fn=start_voice_conversation,
                     outputs=[voice_status_display, start_voice_btn, stop_voice_btn, voice_chatbot]
@@ -1180,6 +1381,11 @@ def create_gradio_interface():
                     fn=lambda: [],
                     outputs=[voice_chatbot]
                 )
             with gr.Tab("🎧 Podcast Studio"):
                 gr.Markdown("""

     "transcript": []
 }
+voice_conversation_state = {
+    "session_id": None,
+    "active": False,
+    "transcript": []
+}
 def start_voice_conversation():
+    """
+    Start a new voice conversation session
+    Returns:
+        Tuple of (status_message, start_button_state, stop_button_state, chatbot_history)
+    """
     try:
+        # Check if service is available
         if not mcp_server.elevenlabs_service.is_available():
             return (
+                "⚠️ Voice assistant not configured.\n\n"
+                "**Setup Instructions:**\n"
+                "1. Get API key from: https://elevenlabs.io/app/settings/api-keys\n"
+                "2. Create an agent at: https://elevenlabs.io/app/conversational-ai\n"
+                "3. Add to .env file:\n"
+                "   - ELEVENLABS_API_KEY=your_api_key\n"
+                "   - ELEVENLABS_AGENT_ID=your_agent_id\n"
+                "4. Restart the application",
+                gr.update(interactive=True),   # start button enabled
+                gr.update(interactive=False),  # stop button disabled
                 []
             )
+        # Create new session
         session_id = str(uuid.uuid4())
+        result = mcp_server.run_async(
+            mcp_server.elevenlabs_service.start_conversation(session_id)
+        )
         if result.get("success"):
             voice_conversation_state["session_id"] = session_id
             voice_conversation_state["active"] = True
             voice_conversation_state["transcript"] = []
+            # Initialize chatbot with welcome message
+            initial_message = {
+                "role": "assistant",
+                "content": "👋 Hello! I'm your AI librarian. Ask me anything about your documents!"
+            }
             return (
+                "✅ Voice assistant is ready!\n\n"
+                "You can now ask questions about your uploaded documents.",
+                gr.update(interactive=False),  # start button disabled
+                gr.update(interactive=True),   # stop button enabled
+                [initial_message]
             )
         else:
+            error_msg = result.get("error", "Unknown error")
             return (
+                f"❌ Failed to start: {error_msg}\n\n"
+                "**Troubleshooting:**\n"
+                "• Check your API key is valid\n"
+                "• Verify agent ID is correct\n"
+                "• Check internet connection",
                 gr.update(interactive=True),
                 gr.update(interactive=False),
                 []
             )
     except Exception as e:
+        logger.error(f"Error starting voice conversation: {str(e)}", exc_info=True)
         return (
             f"❌ Error: {str(e)}",
             gr.update(interactive=True),
         )
 def stop_voice_conversation():
+    """
+    Stop active voice conversation
+    Returns:
+        Tuple of (status_message, start_button_state, stop_button_state, chatbot_history)
+    """
     try:
         if not voice_conversation_state["active"]:
             return (
+                "ℹ️ No active conversation",
                 gr.update(interactive=True),
                 gr.update(interactive=False),
                 voice_conversation_state["transcript"]
         session_id = voice_conversation_state["session_id"]
         if session_id:
+            mcp_server.run_async(
+                mcp_server.elevenlabs_service.end_conversation(session_id)
+            )
+        # Get conversation stats
+        message_count = len(voice_conversation_state["transcript"])
         voice_conversation_state["active"] = False
         voice_conversation_state["session_id"] = None
         return (
+            f"✅ Conversation ended\n\n"
+            f"📊 Stats: {message_count} messages exchanged",
             gr.update(interactive=True),
             gr.update(interactive=False),
             voice_conversation_state["transcript"]
         )
 def send_voice_message_v6(message, chat_history):
+    """
+    Send message in voice conversation - Gradio 6+ format
+    Args:
+        message: User's text message
+        chat_history: Current chat history (list of message dicts)
+    Returns:
+        Tuple of (updated_chat_history, cleared_input_box)
+    """
     try:
+        # Validate state
         if not voice_conversation_state["active"]:
+            chat_history.append({
+                "role": "assistant",
+                "content": "⚠️ Please start a conversation first by clicking 'Start Conversation'"
+            })
             return chat_history, ""
+        # Validate input
         if not message or not message.strip():
             return chat_history, message
         session_id = voice_conversation_state["session_id"]
+        # Add user message to display
+        chat_history.append({
+            "role": "user",
+            "content": message
+        })
+        # Show typing indicator
+        chat_history.append({
+            "role": "assistant",
+            "content": "🤔 Thinking..."
+        })
         # Get AI response
+        result = mcp_server.run_async(
+            mcp_server.voice_tool.voice_qa(message, session_id)
+        )
+        # Remove typing indicator
+        chat_history = chat_history[:-1]
+        # Add response
         if result.get("success"):
             answer = result.get("answer", "No response")
+            # Add helpful context if RAG was used
+            if "document" in answer.lower() or "file" in answer.lower():
+                footer = "\n\n💡 *Answer based on your documents*"
+            else:
+                footer = ""
+            chat_history.append({
+                "role": "assistant",
+                "content": answer + footer
+            })
         else:
+            error_msg = result.get("error", "Unknown error")
             chat_history.append({
                 "role": "assistant",
+                "content": f"❌ Error: {error_msg}\n\n"
+                          "**Suggestions:**\n"
+                          "• Try rephrasing your question\n"
+                          "• Make sure you have uploaded relevant documents\n"
+                          "• Check if the question is about your document library"
             })
+        # Update conversation state
+        voice_conversation_state["transcript"] = chat_history
         return chat_history, ""
     except Exception as e:
+        logger.error(f"Error in voice message: {str(e)}", exc_info=True)
+        # Remove typing indicator if present
+        if chat_history and chat_history[-1]["role"] == "assistant" and "Thinking" in chat_history[-1]["content"]:
+            chat_history = chat_history[:-1]
         chat_history.append({
             "role": "assistant",
+            "content": f"❌ An error occurred: {str(e)}\n\nPlease try again."
         })
         return chat_history, ""
+def test_voice_connection():
+    """
+    Test voice assistant connection
+    Returns:
+        Status message with test results
+    """
+    try:
+        result = mcp_server.run_async(
+            mcp_server.voice_tool.test_connection()
+        )
+        if result.get("success"):
+            return (
+                "✅ **Connection Test Passed**\n\n"
+                f"• API Status: Connected\n"
+                f"• Voices Available: {result.get('voices_available', 0)}\n"
+                f"• RAG Tool: {'✓ Working' if result.get('rag_tool_working') else '✗ Failed'}\n"
+                f"• Client Tools: {'✓ Registered' if result.get('client_tools_registered') else '✗ Not Registered'}\n\n"
+                "🎉 Voice assistant is ready to use!"
+            )
+        else:
+            return (
+                "❌ **Connection Test Failed**\n\n"
+                f"Error: {result.get('message', 'Unknown error')}\n\n"
+                "**Troubleshooting:**\n"
+                "1. Verify ELEVENLABS_API_KEY in .env\n"
+                "2. Check ELEVENLABS_AGENT_ID is set\n"
+                "3. Ensure API key is valid\n"
+                "4. Check internet connection"
+            )
+    except Exception as e:
+        logger.error(f"Connection test error: {str(e)}")
+        return (
+            f"❌ **Test Error**\n\n{str(e)}\n\n"
+            "Please check your configuration and try again."
+        )
+def get_conversation_stats():
+    """
+    Get statistics about current conversation
+    Returns:
+        Formatted stats string
+    """
+    try:
+        if not voice_conversation_state["active"]:
+            return "ℹ️ No active conversation"
+        transcript = voice_conversation_state["transcript"]
+        user_msgs = sum(1 for msg in transcript if msg["role"] == "user")
+        ai_msgs = sum(1 for msg in transcript if msg["role"] == "assistant")
+        return (
+            "📊 **Conversation Statistics**\n\n"
+            f"• Session ID: {voice_conversation_state['session_id'][:8]}...\n"
+            f"• Your messages: {user_msgs}\n"
+            f"• AI responses: {ai_msgs}\n"
+            f"• Total exchanges: {user_msgs}\n"
+            f"• Status: {'🟢 Active' if voice_conversation_state['active'] else '🔴 Inactive'}"
+        )
+    except Exception as e:
+        logger.error(f"Error getting stats: {str(e)}")
+        return f"❌ Error: {str(e)}"
 def generate_podcast_ui(doc_ids, style, duration, voice1, voice2):
     """UI wrapper for podcast generation"""
     try:
                 )
             with gr.Tab("🎙️ Voice Assistant"):
+                # Simple header
+                gr.Markdown("### Ask questions about your documents using AI")
                 with gr.Row():
+                    # Compact left sidebar (25% width)
+                    with gr.Column(scale=1):
+                        # Status box
+                        voice_status_display = gr.Textbox(
+                            label="Status",
+                            value="Click 'Start' to begin",
+                            interactive=False,
+                            lines=3,
+                            max_lines=3
+                        )
+                        # Control buttons stacked vertically
+                        start_voice_btn = gr.Button(
+                            "🎤 Start",
+                            variant="primary",
+                            size="lg"
+                        )
+                        stop_voice_btn = gr.Button(
+                            "⏹️ Stop",
+                            variant="stop",
+                            size="lg",
+                            interactive=False
+                        )
+                        test_connection_btn = gr.Button(
+                            "🔧 Test",
+                            variant="secondary",
+                            size="sm"
+                        )
+                        gr.Markdown("---")
+                        # Quick tips
+                        gr.Markdown("""
+                        **Quick Tips:**
+                        • Upload documents first
+                        • Ask specific questions
+                        • Press Enter to send
+                        """, elem_classes=["small-text"])
+                    # Main chat area (75% width)
+                    with gr.Column(scale=3):
+                        # Large chat window
+                        voice_chatbot = gr.Chatbot(
+                            type="messages",
+                            height=550,
+                            show_copy_button=True,
+                            avatar_images=(None, "🤖"),
+                            show_label=False,
+                            container=True,
+                            bubble_full_width=False
+                        )
+                        # Input row
+                        with gr.Row():
                             voice_input_text = gr.Textbox(
+                                placeholder="Ask me anything about your documents...",
+                                lines=2,
+                                max_lines=4,
+                                scale=4,
+                                show_label=False,
                                 container=False,
+                                autofocus=True
                             )
+                            send_voice_btn = gr.Button(
+                                "Send",
+                                scale=1,
+                                variant="primary"
                             )
+                        # Footer actions
+                        with gr.Row():
+                            clear_chat_btn = gr.Button("Clear", size="sm")
+                            with gr.Column(scale=3):
+                                gr.Markdown("*Tip: Type your question and press Enter*")
+                # Event handlers
                 start_voice_btn.click(
                     fn=start_voice_conversation,
                     outputs=[voice_status_display, start_voice_btn, stop_voice_btn, voice_chatbot]
                     fn=lambda: [],
                     outputs=[voice_chatbot]
                 )
+                test_connection_btn.click(
+                    fn=test_voice_connection,
+                    outputs=[voice_status_display]
+                )
             with gr.Tab("🎧 Podcast Studio"):
                 gr.Markdown("""

mcp_tools/voice_tool.py CHANGED Viewed

@@ -6,7 +6,13 @@ logger = logging.getLogger(__name__)
 class VoiceTool:
     """
-    MCP Tool for voice-based Q&A using ElevenLabs conversational AI
     """
     def __init__(self, elevenlabs_service):
@@ -24,40 +30,151 @@ class VoiceTool:
         session_id: Optional[str] = None
     ) -> Dict[str, Any]:
         """
-        MCP Tool: Ask a question using voice assistant
         Args:
-            question: User's question (text or transcribed from voice)
             session_id: Optional session ID for conversation context
         Returns:
-            Dictionary with answer, audio URL (if applicable), and sources
         """
         try:
             if not self.elevenlabs_service or not self.elevenlabs_service.is_available():
                 return {
                     "success": False,
-                    "error": "Voice assistant not configured. Please set ELEVENLABS_API_KEY and ELEVENLABS_AGENT_ID"
                 }
-            logger.info(f"Voice QA: {question}")
-            # For text-based queries, we can use the RAG tool directly
-            # This provides the backend for voice queries
-            result = await self.elevenlabs_service.llamaindex_service.query(question)
             return {
-                "success": True,
-                "question": question,
-                "answer": result,
-                "session_id": session_id,
-                "mode": "text"  # Could be "voice" if audio processing is involved
             }
         except Exception as e:
-            logger.error(f"Voice QA failed: {str(e)}")
             return {
                 "success": False,
                 "error": str(e),
-                "question": question
             }

 class VoiceTool:
     """
+    Enhanced MCP Tool for voice-based Q&A using ElevenLabs conversational AI
+    Improvements:
+    - Better error handling and user feedback
+    - Support for conversation context
+    - Streaming responses support
+    - Session management
     """
     def __init__(self, elevenlabs_service):
         session_id: Optional[str] = None
     ) -> Dict[str, Any]:
         """
+        Ask a question using voice assistant (text-based for web UI)
         Args:
+            question: User's question
             session_id: Optional session ID for conversation context
         Returns:
+            Dictionary with answer and metadata
         """
         try:
+            # Check if service is available
             if not self.elevenlabs_service or not self.elevenlabs_service.is_available():
                 return {
                     "success": False,
+                    "error": "Voice assistant not configured. Please set ELEVENLABS_API_KEY in your .env file.",
+                    "help": "Get your API key from: https://elevenlabs.io/app/settings/api-keys"
+                }
+            if not question or not question.strip():
+                return {
+                    "success": False,
+                    "error": "Please enter a question"
                 }
+            logger.info(f"Voice QA (session: {session_id}): {question}")
+            # Send message through ElevenLabs service
+            result = await self.elevenlabs_service.send_text_message(
+                message=question,
+                session_id=session_id or "default"
+            )
+            if result.get("success"):
+                return {
+                    "success": True,
+                    "question": question,
+                    "answer": result["answer"],
+                    "session_id": session_id,
+                    "mode": "text"
+                }
+            else:
+                return {
+                    "success": False,
+                    "error": result.get("error", "Unknown error"),
+                    "question": question
+                }
+        except Exception as e:
+            logger.error(f"Voice QA failed: {str(e)}", exc_info=True)
             return {
+                "success": False,
+                "error": f"An error occurred: {str(e)}",
+                "question": question
             }
+    async def start_session(self, session_id: str) -> Dict[str, Any]:
+        """
+        Start a new voice assistant session
+        Args:
+            session_id: Unique session identifier
+        Returns:
+            Session start status
+        """
+        try:
+            result = await self.elevenlabs_service.start_conversation(session_id)
+            return result
         except Exception as e:
+            logger.error(f"Failed to start session: {str(e)}")
+            return {
+                "success": False,
+                "error": str(e)
+            }
+    async def end_session(self, session_id: str) -> Dict[str, Any]:
+        """
+        End a voice assistant session
+        Args:
+            session_id: Session identifier
+        Returns:
+            Session end status
+        """
+        try:
+            success = await self.elevenlabs_service.end_conversation(session_id)
+            return {
+                "success": success,
+                "message": "Session ended" if success else "Session not found"
+            }
+        except Exception as e:
+            logger.error(f"Failed to end session: {str(e)}")
+            return {
+                "success": False,
+                "error": str(e)
+            }
+    def get_conversation_history(self, session_id: str) -> Dict[str, Any]:
+        """
+        Get conversation history for a session
+        Args:
+            session_id: Session identifier
+        Returns:
+            Dictionary with conversation history
+        """
+        try:
+            history = self.elevenlabs_service.get_conversation_history(session_id)
+            return {
+                "success": True,
+                "history": history,
+                "message_count": len(history)
+            }
+        except Exception as e:
+            logger.error(f"Failed to get history: {str(e)}")
             return {
                 "success": False,
                 "error": str(e),
+                "history": []
+            }
+    async def test_connection(self) -> Dict[str, Any]:
+        """
+        Test voice assistant connection
+        Returns:
+            Connection test results
+        """
+        try:
+            if not self.elevenlabs_service:
+                return {
+                    "success": False,
+                    "message": "Service not initialized"
+                }
+            result = await self.elevenlabs_service.test_connection()
+            return {
+                "success": result["status"] == "success",
+                **result
             }
+        except Exception as e:
+            logger.error(f"Connection test failed: {str(e)}")
+            return {
+                "success": False,
+                "message": str(e)
+            }

services/elevenlabs_service.py CHANGED Viewed

@@ -11,7 +11,7 @@ try:
 except ImportError:
     ELEVENLABS_AVAILABLE = False
     logger = logging.getLogger(__name__)
-    logger.warning("ElevenLabs SDK not available. Voice features will be disabled.")
 import config
 from services.llamaindex_service import LlamaIndexService
@@ -20,8 +20,13 @@ logger = logging.getLogger(__name__)
 class ElevenLabsService:
     """
-    Service for integrating ElevenLabs Conversational AI with RAG capabilities.
-    Provides voice-based interaction with the document library.
     """
     def __init__(self, llamaindex_service: LlamaIndexService):
@@ -36,13 +41,14 @@ class ElevenLabsService:
         self.client = None
         self.client_tools = None
         self.active_conversations: Dict[str, Conversation] = {}
         if not ELEVENLABS_AVAILABLE:
             logger.error("ElevenLabs SDK not installed. Run: pip install elevenlabs")
             return
         if not self.config.ELEVENLABS_API_KEY:
-            logger.warning("ELEVENLABS_API_KEY not configured. Voice features will be limited.")
             return
         try:
@@ -50,87 +56,102 @@ class ElevenLabsService:
             self.client = ElevenLabs(api_key=self.config.ELEVENLABS_API_KEY)
             logger.info("ElevenLabs client initialized successfully")
-            # Initialize client tools for custom tool registration
-            self.client_tools = ClientTools()
-            # Register RAG tool
-            self._register_rag_tool()
-            logger.info("ElevenLabs service initialized with RAG tool")
         except Exception as e:
             logger.error(f"Error initializing ElevenLabs service: {str(e)}")
-    def _register_rag_tool(self):
-        """Register RAG query tool with ElevenLabs agent"""
-        if not self.client_tools:
-            return
         try:
-            # Register the query_documents tool
-            # Modern ElevenLabs SDK: register(tool_name, handler=callable)
-            self.client_tools.register("query_documents", handler=self._rag_query_tool)
-            logger.info("RAG tool 'query_documents' registered successfully")
         except Exception as e:
-            logger.error(f"Error registering RAG tool: {str(e)}")
-    async def _rag_query_tool(self, params: Dict[str, Any]) -> Dict[str, Any]:
         """
-        Custom tool for querying documents using LlamaIndex agentic RAG
         Args:
-            params: Dictionary containing the query
-                - query (str): The user's question or search query
         Returns:
-            Dictionary with answer and metadata
         """
         try:
             query = params.get("query", "")
-            if not query:
                 return {
-                    "error": "No query provided",
-                    "answer": "I didn't receive a question to search for."
                 }
-            logger.info(f"RAG tool called with query: '{query}'")
-            # Query the LlamaIndex agentic RAG system
             try:
                 result = await asyncio.wait_for(
                     self.llamaindex_service.query(query),
-                    timeout=self.config.CONVERSATION_TIMEOUT
                 )
-                logger.info(f"RAG query successful")
                 return {
                     "answer": result,
-                    "source": "document_library",
-                    "confidence": "high"
                 }
             except asyncio.TimeoutError:
                 logger.error("RAG query timeout")
                 return {
-                    "error": "timeout",
-                    "answer": "The search took too long. Please try a simpler question."
                 }
         except Exception as e:
-            logger.error(f"Error in RAG query tool: {str(e)}")
             return {
-                "error": str(e),
-                "answer": f"I encountered an error searching the documents: {str(e)}"
             }
     def create_conversation(
         self,
         agent_id: Optional[str] = None,
-        session_id: Optional[str] = None
     ) -> Optional[Conversation]:
         """
         Create a new conversation session
@@ -138,6 +159,7 @@ class ElevenLabsService:
         Args:
             agent_id: ElevenLabs agent ID (uses config default if not provided)
             session_id: Optional session ID for tracking
         Returns:
             Conversation object or None if initialization fails
@@ -153,8 +175,8 @@ class ElevenLabsService:
                 logger.error("No agent ID provided or configured")
                 return None
-            # Create audio interface for real-time audio
-            audio_interface = DefaultAudioInterface()
             # Create conversation with RAG tool
             conversation = Conversation(
@@ -162,12 +184,16 @@ class ElevenLabsService:
                 agent_id=agent_id,
                 requires_auth=True,
                 audio_interface=audio_interface,
-                client_tools=self.client_tools
             )
-            # Store conversation if session ID provided
             if session_id:
                 self.active_conversations[session_id] = conversation
             logger.info(f"Created conversation for agent: {agent_id}")
             return conversation
@@ -176,9 +202,27 @@ class ElevenLabsService:
             logger.error(f"Error creating conversation: {str(e)}")
             return None
     async def start_conversation(self, session_id: Optional[str] = None) -> Dict[str, Any]:
         """
-        Start a new conversation session (async wrapper for UI)
         Args:
             session_id: Optional session ID for tracking
@@ -187,18 +231,18 @@ class ElevenLabsService:
             Dictionary with success status and conversation info
         """
         try:
-            conversation = self.create_conversation(session_id=session_id)
             if conversation:
                 return {
                     "success": True,
                     "session_id": session_id,
-                    "message": "Conversation started successfully"
                 }
             else:
                 return {
                     "success": False,
-                    "error": "Failed to create conversation"
                 }
         except Exception as e:
             logger.error(f"Error starting conversation: {str(e)}")
@@ -207,41 +251,60 @@ class ElevenLabsService:
                 "error": str(e)
             }
-    async def process_voice_query(
         self,
-        audio_file_path: str,
-        agent_id: Optional[str] = None
     ) -> Dict[str, Any]:
         """
-        Process a voice query file and return response
         Args:
-            audio_file_path: Path to audio file
-            agent_id: Optional agent ID
         Returns:
-            Dictionary with transcription, answer, and metadata
         """
         try:
-            # For now, this is a placeholder for file-based processing
-            # ElevenLabs Conversational AI is primarily WebSocket-based
-            # This would be used for async/batch processing
-            logger.info(f"Processing voice query from: {audio_file_path}")
-            # This would require additional implementation for file upload
-            # and processing through ElevenLabs API
             return {
-                "status": "pending",
-                "message": "Voice query processing requires WebSocket connection",
-                "file": audio_file_path
             }
         except Exception as e:
-            logger.error(f"Error processing voice query: {str(e)}")
             return {
-                "status": "error",
                 "error": str(e)
             }
@@ -261,23 +324,26 @@ class ElevenLabsService:
                 # Try to end the session gracefully
                 try:
-                    conversation.end_session()
-                except AttributeError as ae:
-                    # Handle cases where DefaultAudioInterface doesn't have expected methods
-                    logger.warning(f"Could not cleanly end session: {str(ae)}")
                 except Exception as e:
                     logger.warning(f"Error during session cleanup: {str(e)}")
-                # Always remove from active conversations
                 del self.active_conversations[session_id]
                 logger.info(f"Ended conversation: {session_id}")
                 return True
             return False
         except Exception as e:
             logger.error(f"Error ending conversation: {str(e)}")
             return False
     def get_available_voices(self) -> List[Dict[str, str]]:
         """
         Get list of available voice models
@@ -289,14 +355,13 @@ class ElevenLabsService:
             if not self.client:
                 return []
-            # Get voices from ElevenLabs API
             voices = self.client.voices.get_all()
             return [
                 {
                     "voice_id": voice.voice_id,
                     "name": voice.name,
-                    "category": voice.category if hasattr(voice, 'category') else "general"
                 }
                 for voice in voices.voices
             ]
@@ -323,14 +388,18 @@ class ElevenLabsService:
                     "message": "Client not initialized"
                 }
-            # Try to fetch user info or voices as a connection test
             voices = self.get_available_voices()
             return {
                 "status": "success",
                 "message": "ElevenLabs API connected",
                 "voices_available": len(voices),
-                "rag_tool_registered": self.client_tools is not None
             }
         except Exception as e:
@@ -338,4 +407,4 @@ class ElevenLabsService:
             return {
                 "status": "error",
                 "message": str(e)
-            }

 except ImportError:
     ELEVENLABS_AVAILABLE = False
     logger = logging.getLogger(__name__)
+    logger.warning("ElevenLabs SDK not available. Install: pip install elevenlabs")
 import config
 from services.llamaindex_service import LlamaIndexService
 class ElevenLabsService:
     """
+    Enhanced service for ElevenLabs Conversational AI with proper RAG integration.
+    Key improvements:
+    - Proper client tools registration with event loop handling
+    - Built-in RAG through ElevenLabs Knowledge Base
+    - Support for both real-time voice and text-based chat
+    - Session management and conversation history
     """
     def __init__(self, llamaindex_service: LlamaIndexService):
         self.client = None
         self.client_tools = None
         self.active_conversations: Dict[str, Conversation] = {}
+        self.conversation_history: Dict[str, List[Dict]] = {}
         if not ELEVENLABS_AVAILABLE:
             logger.error("ElevenLabs SDK not installed. Run: pip install elevenlabs")
             return
         if not self.config.ELEVENLABS_API_KEY:
+            logger.warning("ELEVENLABS_API_KEY not configured.")
             return
         try:
             self.client = ElevenLabs(api_key=self.config.ELEVENLABS_API_KEY)
             logger.info("ElevenLabs client initialized successfully")
+            # Initialize client tools - CRITICAL: Must be done in async context
+            self._init_client_tools()
+            logger.info("ElevenLabs service initialized")
         except Exception as e:
             logger.error(f"Error initializing ElevenLabs service: {str(e)}")
+    def _init_client_tools(self):
+        """Initialize client tools for RAG integration"""
         try:
+            # Get or create event loop for ClientTools
+            try:
+                loop = asyncio.get_running_loop()
+            except RuntimeError:
+                loop = asyncio.new_event_loop()
+                asyncio.set_event_loop(loop)
+            # Initialize ClientTools with the loop
+            self.client_tools = ClientTools(loop=loop)
+            # Register RAG query tool with proper metadata
+            self.client_tools.register(
+                "query_documents",
+                handler=self._rag_query_handler,
+                description="Search through the user's uploaded documents to find relevant information. Use this tool whenever the user asks questions about their documents, files, or content in their library.",
+                parameters={
+                    "query": {
+                        "type": "string",
+                        "description": "The search query or question to find information in the documents"
+                    }
+                },
+                is_async=True
+            )
+            logger.info("Client tools registered: query_documents")
         except Exception as e:
+            logger.error(f"Error initializing client tools: {str(e)}")
+            self.client_tools = None
+    async def _rag_query_handler(self, params: Dict[str, Any]) -> Dict[str, Any]:
         """
+        Enhanced RAG query handler with better error handling and response formatting
+        This tool is called by the ElevenLabs agent when it needs to search documents.
         Args:
+            params: Dictionary with 'query' key containing user's question
         Returns:
+            Dictionary with 'answer' and optional 'sources'
         """
         try:
             query = params.get("query", "")
+            if not query or not query.strip():
                 return {
+                    "answer": "I didn't receive a question to search for. Could you please ask again?"
                 }
+            logger.info(f"RAG query: {query}")
+            # Query LlamaIndex with timeout
             try:
                 result = await asyncio.wait_for(
                     self.llamaindex_service.query(query),
+                    timeout=self.config.CONVERSATION_TIMEOUT if hasattr(self.config, 'CONVERSATION_TIMEOUT') else 30
                 )
+                logger.info(f"RAG query successful: {len(result)} chars")
+                # Format response for conversational voice
                 return {
                     "answer": result,
+                    "confidence": "high",
+                    "source": "document_library"
                 }
             except asyncio.TimeoutError:
                 logger.error("RAG query timeout")
                 return {
+                    "answer": "The search is taking longer than expected. Could you try rephrasing your question?"
                 }
         except Exception as e:
+            logger.error(f"RAG query error: {str(e)}", exc_info=True)
             return {
+                "answer": f"I encountered an error while searching: {str(e)}. Please try again."
             }
     def create_conversation(
         self,
         agent_id: Optional[str] = None,
+        session_id: Optional[str] = None,
+        use_audio: bool = True
     ) -> Optional[Conversation]:
         """
         Create a new conversation session
         Args:
             agent_id: ElevenLabs agent ID (uses config default if not provided)
             session_id: Optional session ID for tracking
+            use_audio: If True, use audio interface; if False, text-only mode
         Returns:
             Conversation object or None if initialization fails
                 logger.error("No agent ID provided or configured")
                 return None
+            # Create audio interface only if requested
+            audio_interface = DefaultAudioInterface() if use_audio else None
             # Create conversation with RAG tool
             conversation = Conversation(
                 agent_id=agent_id,
                 requires_auth=True,
                 audio_interface=audio_interface,
+                client_tools=self.client_tools,
+                # Add callbacks for monitoring
+                callback_agent_response=lambda response: self._on_agent_response(session_id, response),
+                callback_user_transcript=lambda transcript: self._on_user_message(session_id, transcript)
             )
+            # Store conversation and initialize history
             if session_id:
                 self.active_conversations[session_id] = conversation
+                self.conversation_history[session_id] = []
             logger.info(f"Created conversation for agent: {agent_id}")
             return conversation
             logger.error(f"Error creating conversation: {str(e)}")
             return None
+    def _on_agent_response(self, session_id: Optional[str], response: str):
+        """Track agent responses"""
+        if session_id and session_id in self.conversation_history:
+            self.conversation_history[session_id].append({
+                "role": "assistant",
+                "content": response
+            })
+            logger.debug(f"Agent response: {response[:100]}...")
+    def _on_user_message(self, session_id: Optional[str], message: str):
+        """Track user messages"""
+        if session_id and session_id in self.conversation_history:
+            self.conversation_history[session_id].append({
+                "role": "user",
+                "content": message
+            })
+            logger.debug(f"User message: {message[:100]}...")
     async def start_conversation(self, session_id: Optional[str] = None) -> Dict[str, Any]:
         """
+        Start a new conversation session
         Args:
             session_id: Optional session ID for tracking
             Dictionary with success status and conversation info
         """
         try:
+            conversation = self.create_conversation(session_id=session_id, use_audio=False)
             if conversation:
                 return {
                     "success": True,
                     "session_id": session_id,
+                    "message": "Voice assistant ready. Ask me anything about your documents!"
                 }
             else:
                 return {
                     "success": False,
+                    "error": "Failed to create conversation. Check API configuration."
                 }
         except Exception as e:
             logger.error(f"Error starting conversation: {str(e)}")
                 "error": str(e)
             }
+    async def send_text_message(
         self,
+        message: str,
+        session_id: str
     ) -> Dict[str, Any]:
         """
+        Send a text message to the agent and get response
+        This is for text-based chat (no audio). Perfect for web interfaces.
         Args:
+            message: User's text message
+            session_id: Session identifier
         Returns:
+            Dictionary with agent's response
         """
         try:
+            if not message or not message.strip():
+                return {
+                    "success": False,
+                    "error": "Empty message"
+                }
+            # For text-based interaction, we directly query the RAG system
+            # since ElevenLabs Conversational AI is primarily audio-focused
+            # Store user message
+            if session_id in self.conversation_history:
+                self.conversation_history[session_id].append({
+                    "role": "user",
+                    "content": message
+                })
+            # Query RAG system
+            response = await self._rag_query_handler({"query": message})
+            # Store assistant response
+            if session_id in self.conversation_history:
+                self.conversation_history[session_id].append({
+                    "role": "assistant",
+                    "content": response["answer"]
+                })
             return {
+                "success": True,
+                "answer": response["answer"],
+                "session_id": session_id
             }
         except Exception as e:
+            logger.error(f"Error sending message: {str(e)}")
             return {
+                "success": False,
                 "error": str(e)
             }
                 # Try to end the session gracefully
                 try:
+                    if hasattr(conversation, 'end_session'):
+                        conversation.end_session()
                 except Exception as e:
                     logger.warning(f"Error during session cleanup: {str(e)}")
+                # Remove from active conversations
                 del self.active_conversations[session_id]
                 logger.info(f"Ended conversation: {session_id}")
                 return True
             return False
         except Exception as e:
             logger.error(f"Error ending conversation: {str(e)}")
             return False
+    def get_conversation_history(self, session_id: str) -> List[Dict]:
+        """Get conversation history for a session"""
+        return self.conversation_history.get(session_id, [])
     def get_available_voices(self) -> List[Dict[str, str]]:
         """
         Get list of available voice models
             if not self.client:
                 return []
             voices = self.client.voices.get_all()
             return [
                 {
                     "voice_id": voice.voice_id,
                     "name": voice.name,
+                    "category": getattr(voice, 'category', "general")
                 }
                 for voice in voices.voices
             ]
                     "message": "Client not initialized"
                 }
+            # Test API by fetching voices
             voices = self.get_available_voices()
+            # Test RAG tool
+            test_result = await self._rag_query_handler({"query": "test"})
             return {
                 "status": "success",
                 "message": "ElevenLabs API connected",
                 "voices_available": len(voices),
+                "rag_tool_working": "answer" in test_result,
+                "client_tools_registered": self.client_tools is not None
             }
         except Exception as e:
             return {
                 "status": "error",
                 "message": str(e)
+            }