Nihal2000 commited on
Commit
e062f2f
Β·
1 Parent(s): da24e2c

Refactor: Restore MCP architecture - Import from mcp_server.py, remove duplicate code, rename to AI Digital Library Assistant

Browse files
__pycache__/config.cpython-313.pyc ADDED
Binary file (3.69 kB). View file
 
app.py CHANGED
@@ -3,7 +3,6 @@ import os
3
  import asyncio
4
  import json
5
  import logging
6
- import tempfile
7
  import uuid
8
  from datetime import datetime
9
  from pathlib import Path
@@ -13,301 +12,77 @@ import nest_asyncio
13
  # Apply nest_asyncio to handle nested event loops in Gradio
14
  nest_asyncio.apply()
15
 
16
- # Import our custom modules
17
- from mcp_tools.ingestion_tool import IngestionTool
18
- from mcp_tools.search_tool import SearchTool
19
- from mcp_tools.generative_tool import GenerativeTool
20
- from services.vector_store_service import VectorStoreService
21
- from services.document_store_service import DocumentStoreService
22
- from services.embedding_service import EmbeddingService
23
- from services.llm_service import LLMService
24
- from services.ocr_service import OCRService
25
- from core.models import SearchResult, Document
26
- import config
 
 
 
 
 
 
 
27
 
28
  # Setup logging
29
  logging.basicConfig(level=logging.INFO)
30
  logger = logging.getLogger(__name__)
31
- # Import our custom modules
32
- from mcp_tools.ingestion_tool import IngestionTool
33
- from mcp_tools.search_tool import SearchTool
34
- from mcp_tools.generative_tool import GenerativeTool
35
- from services.vector_store_service import VectorStoreService
36
- from services.document_store_service import DocumentStoreService
37
- from services.embedding_service import EmbeddingService
38
- from services.llm_service import LLMService
39
- from services.ocr_service import OCRService
40
- from core.models import SearchResult, Document
41
- import config
42
- from services.llamaindex_service import LlamaIndexService
43
- from services.elevenlabs_service import ElevenLabsService
44
- from services.podcast_generator_service import PodcastGeneratorService
45
- from mcp_tools.voice_tool import VoiceTool
46
- from mcp_tools.podcast_tool import PodcastTool
47
 
48
- # Setup logging
49
- logging.basicConfig(level=logging.INFO)
50
- logger = logging.getLogger(__name__)
51
-
52
- class ContentOrganizerMCPServer:
53
- def __init__(self):
54
- # Initialize services
55
- logger.info("Initializing Content Organizer MCP Server...")
56
- self.vector_store = VectorStoreService()
57
- self.document_store = DocumentStoreService()
58
- self.embedding_service = EmbeddingService()
59
- self.llm_service = LLMService()
60
- self.ocr_service = OCRService()
61
- self.llamaindex_service = LlamaIndexService(self.document_store)
62
-
63
- # Initialize ElevenLabs voice service
64
- self.elevenlabs_service = ElevenLabsService(self.llamaindex_service)
65
-
66
- # Initialize Podcast Generator
67
- self.podcast_generator = PodcastGeneratorService(
68
- llamaindex_service=self.llamaindex_service,
69
- llm_service=self.llm_service
70
- )
71
-
72
- # Initialize tools
73
- self.ingestion_tool = IngestionTool(
74
- vector_store=self.vector_store,
75
- document_store=self.document_store,
76
- embedding_service=self.embedding_service,
77
- ocr_service=self.ocr_service
78
- )
79
- self.search_tool = SearchTool(
80
- vector_store=self.vector_store,
81
- embedding_service=self.embedding_service,
82
- document_store=self.document_store
83
- )
84
- self.generative_tool = GenerativeTool(
85
- llm_service=self.llm_service,
86
- search_tool=self.search_tool
87
- )
88
- self.voice_tool = VoiceTool(self.elevenlabs_service)
89
- self.podcast_tool = PodcastTool(self.podcast_generator)
90
-
91
 
92
- # Track processing status
93
- self.processing_status = {}
94
-
95
- # Document cache for quick access
96
- self.document_cache = {}
97
- logger.info("Content Organizer MCP Server initialized successfully!")
98
-
99
- def run_async(self, coro):
100
- """Helper to run async functions in Gradio"""
101
- try:
102
- loop = asyncio.get_event_loop()
103
- except RuntimeError:
104
- loop = asyncio.new_event_loop()
105
- asyncio.set_event_loop(loop)
106
- if loop.is_running():
107
- # If loop is already running, create a task
108
- import concurrent.futures
109
- with concurrent.futures.ThreadPoolExecutor() as executor:
110
- future = executor.submit(asyncio.run, coro)
111
- return future.result()
112
- else:
113
- return loop.run_until_complete(coro)
114
-
115
- async def ingest_document_async(self, file_path: str, file_type: str) -> Dict[str, Any]:
116
- """MCP Tool: Ingest and process a document"""
117
- try:
118
- task_id = str(uuid.uuid4())
119
- self.processing_status[task_id] = {"status": "processing", "progress": 0}
120
- result = await self.ingestion_tool.process_document(file_path, file_type, task_id)
121
- if result.get("success"):
122
- self.processing_status[task_id] = {"status": "completed", "progress": 100}
123
- doc_id = result.get("document_id")
124
- if doc_id:
125
- doc = await self.document_store.get_document(doc_id)
126
- if doc:
127
- self.document_cache[doc_id] = doc
128
- return result
129
- else:
130
- self.processing_status[task_id] = {"status": "failed", "error": result.get("error")}
131
- return result
132
- except Exception as e:
133
- logger.error(f"Document ingestion failed: {str(e)}")
134
- return {"success": False, "error": str(e), "message": "Failed to process document"}
135
-
136
- async def get_document_content_async(self, document_id: str) -> Optional[str]:
137
- """Get document content by ID"""
138
- try:
139
- # Check cache first
140
- if document_id in self.document_cache:
141
- return self.document_cache[document_id].content
142
-
143
- # Get from store
144
- doc = await self.document_store.get_document(document_id)
145
- if doc:
146
- self.document_cache[document_id] = doc
147
- return doc.content
148
- return None
149
- except Exception as e:
150
- logger.error(f"Error getting document content: {str(e)}")
151
- return None
152
-
153
- async def semantic_search_async(self, query: str, top_k: int = 5, filters: Optional[Dict] = None) -> Dict[str, Any]:
154
- """MCP Tool: Perform semantic search"""
155
- try:
156
- results = await self.search_tool.search(query, top_k, filters)
157
- return {"success": True, "query": query, "results": [result.to_dict() for result in results], "total_results": len(results)}
158
- except Exception as e:
159
- logger.error(f"Semantic search failed: {str(e)}")
160
- return {"success": False, "error": str(e), "query": query, "results": []}
161
-
162
- async def summarize_content_async(self, content: str = None, document_id: str = None, style: str = "concise") -> Dict[str, Any]:
163
- try:
164
- if document_id and document_id != "none":
165
- content = await self.get_document_content_async(document_id)
166
- if not content:
167
- return {"success": False, "error": f"Document {document_id} not found"}
168
- if not content or not content.strip():
169
- return {"success": False, "error": "No content provided for summarization"}
170
- max_content_length = 4000
171
- if len(content) > max_content_length:
172
- content = content[:max_content_length] + "..."
173
- summary = await self.generative_tool.summarize(content, style)
174
- return {"success": True, "summary": summary, "original_length": len(content), "summary_length": len(summary), "style": style, "document_id": document_id}
175
- except Exception as e:
176
- logger.error(f"Summarization failed: {str(e)}")
177
- return {"success": False, "error": str(e)}
178
-
179
- async def generate_tags_async(self, content: str = None, document_id: str = None, max_tags: int = 5) -> Dict[str, Any]:
180
- """MCP Tool: Generate tags for content"""
181
- try:
182
- if document_id and document_id != "none":
183
- content = await self.get_document_content_async(document_id)
184
- if not content:
185
- return {"success": False, "error": f"Document {document_id} not found"}
186
- if not content or not content.strip():
187
- return {"success": False, "error": "No content provided for tag generation"}
188
- tags = await self.generative_tool.generate_tags(content, max_tags)
189
- if document_id and document_id != "none" and tags:
190
- await self.document_store.update_document_metadata(document_id, {"tags": tags})
191
- return {"success": True, "tags": tags, "content_length": len(content), "document_id": document_id}
192
- except Exception as e:
193
- logger.error(f"Tag generation failed: {str(e)}")
194
- return {"success": False, "error": str(e)}
195
- async def generate_podcast_async(
196
- self,
197
- document_ids: List[str],
198
- style: str = "conversational",
199
- duration_minutes: int = 10,
200
- host1_voice: str = "Rachel",
201
- host2_voice: str = "Adam"
202
- ) -> Dict[str, Any]:
203
- """Generate podcast from documents"""
204
- try:
205
- result = await self.podcast_tool.generate_podcast(
206
- document_ids=document_ids,
207
- style=style,
208
- duration_minutes=duration_minutes,
209
- host1_voice=host1_voice,
210
- host2_voice=host2_voice
211
- )
212
- return result
213
- except Exception as e:
214
- logger.error(f"Podcast generation failed: {str(e)}")
215
- return {"success": False, "error": str(e)}
216
-
217
- async def answer_question_async(self, question: str, context_filter: Optional[Dict] = None) -> Dict[str, Any]:
218
- try:
219
- search_results = await self.search_tool.search(question, top_k=5, filters=context_filter)
220
- if not search_results:
221
- return {"success": False, "error": "No relevant context found in your documents. Please make sure you have uploaded relevant documents.", "question": question}
222
- answer = await self.generative_tool.answer_question(question, search_results)
223
- return {"success": True, "question": question, "answer": answer, "sources": [result.to_dict() for result in search_results], "confidence": "high" if len(search_results) >= 3 else "medium"}
224
- except Exception as e:
225
- logger.error(f"Question answering failed: {str(e)}")
226
- return {"success": False, "error": str(e), "question": question}
227
-
228
- async def generate_outline_async(self, topic: str, num_sections: int = 5, detail_level: str = "medium") -> Dict[str, Any]:
229
- try:
230
- outline = await self.generative_tool.generate_outline(topic, num_sections, detail_level)
231
- return {"success": True, "result": outline}
232
- except Exception as e:
233
- return {"success": False, "error": str(e)}
234
-
235
- async def explain_concept_async(self, concept: str, audience: str = "general", length: str = "medium") -> Dict[str, Any]:
236
- try:
237
- explanation = await self.generative_tool.explain_concept(concept, audience, length)
238
- return {"success": True, "result": explanation}
239
- except Exception as e:
240
- return {"success": False, "error": str(e)}
241
-
242
- async def paraphrase_text_async(self, text: str, style: str = "formal") -> Dict[str, Any]:
243
- try:
244
- paraphrase = await self.generative_tool.paraphrase_text(text, style)
245
- return {"success": True, "result": paraphrase}
246
- except Exception as e:
247
- return {"success": False, "error": str(e)}
248
-
249
- async def categorize_content_async(self, content: str, categories: List[str]) -> Dict[str, Any]:
250
- try:
251
- category = await self.generative_tool.categorize(content, categories)
252
- return {"success": True, "result": category}
253
- except Exception as e:
254
- return {"success": False, "error": str(e)}
255
-
256
- async def extract_key_insights_async(self, content: str, num_insights: int = 5) -> Dict[str, Any]:
257
- try:
258
- insights = await self.generative_tool.extract_key_insights(content, num_insights)
259
- return {"success": True, "result": "\n".join([f"- {insight}" for insight in insights])}
260
- except Exception as e:
261
- return {"success": False, "error": str(e)}
262
-
263
- async def generate_questions_async(self, content: str, question_type: str = "comprehension", num_questions: int = 5) -> Dict[str, Any]:
264
- try:
265
- questions = await self.generative_tool.generate_questions(content, question_type, num_questions)
266
- return {"success": True, "result": "\n".join([f"{i+1}. {q}" for i, q in enumerate(questions)])}
267
- except Exception as e:
268
- return {"success": False, "error": str(e)}
269
-
270
- async def extract_key_information_async(self, content: str) -> Dict[str, Any]:
271
- try:
272
- info = await self.llm_service.extract_key_information(content)
273
- return {"success": True, "result": json.dumps(info, indent=2)}
274
- except Exception as e:
275
- return {"success": False, "error": str(e)}
276
-
277
- def list_documents_sync(self, limit: int = 100, offset: int = 0) -> Dict[str, Any]:
278
- try:
279
- documents = self.run_async(self.document_store.list_documents(limit, offset))
280
- return {"success": True, "documents": [doc.to_dict() for doc in documents], "total": len(documents)}
281
- except Exception as e:
282
- return {"success": False, "error": str(e)}
283
-
284
- mcp_server = ContentOrganizerMCPServer()
285
 
286
  def get_document_list():
 
287
  try:
288
- result = mcp_server.list_documents_sync(limit=100)
289
- if result["success"]:
290
- if result["documents"]:
291
- doc_list_str = "πŸ“š Documents in Library:\n\n"
292
- for i, doc_item in enumerate(result["documents"], 1):
293
- doc_list_str += f"{i}. {doc_item['filename']} (ID: {doc_item['id'][:8]}...)\n"
294
- doc_list_str += f" Type: {doc_item['doc_type']}, Size: {doc_item['file_size']} bytes\n"
295
- if doc_item.get('tags'):
296
- doc_list_str += f" Tags: {', '.join(doc_item['tags'])}\n"
297
- doc_list_str += f" Created: {doc_item['created_at'][:10]}\n\n"
298
- return doc_list_str
299
- else:
300
- return "No documents in library yet. Upload some documents to get started!"
301
  else:
302
- return f"Error loading documents: {result['error']}"
303
  except Exception as e:
 
304
  return f"Error: {str(e)}"
305
 
306
  def get_document_choices():
 
307
  try:
308
- result = mcp_server.list_documents_sync(limit=100)
309
- if result["success"] and result["documents"]:
310
- choices = [(f"{doc['filename']} ({doc['id'][:8]}...)", doc['id']) for doc in result["documents"]]
311
  logger.info(f"Generated {len(choices)} document choices")
312
  return choices
313
  return []
@@ -316,6 +91,7 @@ def get_document_choices():
316
  return []
317
 
318
  def refresh_library():
 
319
  doc_list_refreshed = get_document_list()
320
  doc_choices_refreshed = get_document_choices()
321
  logger.info(f"Refreshing library. Found {len(doc_choices_refreshed)} choices.")
@@ -327,6 +103,7 @@ def refresh_library():
327
  )
328
 
329
  def upload_and_process_file(file):
 
330
  if file is None:
331
  doc_list_initial = get_document_list()
332
  doc_choices_initial = get_document_choices()
@@ -338,9 +115,10 @@ def upload_and_process_file(file):
338
  )
339
  try:
340
  file_path = file.name if hasattr(file, 'name') else str(file)
341
- file_type = Path(file_path).suffix.lower().strip('.') # Ensure suffix is clean
342
  logger.info(f"Processing file: {file_path}, type: {file_type}")
343
- result = mcp_server.run_async(mcp_server.ingest_document_async(file_path, file_type))
 
344
 
345
  doc_list_updated = get_document_list()
346
  doc_choices_updated = get_document_choices()
@@ -374,114 +152,165 @@ def upload_and_process_file(file):
374
  gr.update(choices=doc_choices_error)
375
  )
376
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
377
  def perform_search(query, top_k):
 
378
  if not query.strip():
379
  return "Please enter a search query"
380
  try:
381
- result = mcp_server.run_async(mcp_server.semantic_search_async(query, int(top_k)))
382
- if result["success"]:
383
- if result["results"]:
384
- output_str = f"πŸ” Found {result['total_results']} results for: '{query}'\n\n"
385
- for i, res_item in enumerate(result["results"], 1):
386
- output_str += f"Result {i}:\n"
387
- output_str += f"πŸ“Š Relevance Score: {res_item['score']:.3f}\n"
388
- output_str += f"πŸ“„ Content: {res_item['content'][:300]}...\n"
389
- if 'document_filename' in res_item.get('metadata', {}):
390
- output_str += f"πŸ“ Source: {res_item['metadata']['document_filename']}\n"
391
- output_str += f"πŸ”— Document ID: {res_item.get('document_id', 'Unknown')}\n"
392
- output_str += "-" * 80 + "\n\n"
393
- return output_str
394
- else:
395
- return f"No results found for: '{query}'\n\nMake sure you have uploaded relevant documents first."
396
  else:
397
- return f"❌ Search failed: {result['error']}"
398
  except Exception as e:
399
  logger.error(f"Search error: {str(e)}")
400
  return f"❌ Error: {str(e)}"
401
 
 
 
 
 
402
  def update_options_visibility(task):
403
  """Update visibility of options based on selected task"""
404
  return (
405
- gr.update(visible=task == "Summarize"), # summary_style
406
- gr.update(visible=task == "Generate Outline"), # outline_sections
407
- gr.update(visible=task == "Generate Outline"), # outline_detail
408
- gr.update(visible=task == "Explain Concept"), # explain_audience
409
- gr.update(visible=task == "Explain Concept"), # explain_length
410
- gr.update(visible=task == "Paraphrase"), # paraphrase_style
411
- gr.update(visible=task == "Categorize"), # categories_input
412
- gr.update(visible=task in ["Key Insights", "Generate Questions"]), # num_items
413
- gr.update(visible=task == "Generate Questions") # question_type
414
  )
415
 
 
 
 
 
 
 
 
 
 
 
 
416
  def execute_content_task(task, doc_choice, custom_text,
417
  summary_style, outline_sections, outline_detail,
418
  explain_audience, explain_length,
419
  paraphrase_style, categories_input,
420
  num_items, question_type):
 
421
  try:
422
  # Get content
423
  content = ""
424
  if custom_text and custom_text.strip():
425
  content = custom_text
426
  elif doc_choice and doc_choice != "none":
427
- content = mcp_server.run_async(mcp_server.get_document_content_async(doc_choice))
428
  if not content:
429
  return "❌ Error: Document not found or empty"
430
  else:
431
  if task == "Generate Outline":
432
- content = custom_text # Topic is passed as text
433
  else:
434
  return "⚠️ Please select a document or enter text"
435
 
436
  # Execute task
437
- result = {"success": False, "error": "Unknown task"}
438
-
439
  if task == "Summarize":
440
- result = mcp_server.run_async(mcp_server.summarize_content_async(content=content, style=summary_style))
441
- if result["success"]:
442
- return f"πŸ“ Summary ({summary_style}):\n\n{result['summary']}"
443
 
444
  elif task == "Generate Outline":
445
- # For outline, content is the topic
446
- result = mcp_server.run_async(mcp_server.generate_outline_async(content, int(outline_sections), outline_detail))
447
- if result["success"]:
448
- return f"πŸ“ Outline for '{content}':\n\n{result['result']}"
449
 
450
  elif task == "Explain Concept":
451
- # For explain, content is the concept
452
- result = mcp_server.run_async(mcp_server.explain_concept_async(content, explain_audience, explain_length))
453
- if result["success"]:
454
- return f"πŸ’‘ Explanation ({explain_audience}):\n\n{result['result']}"
455
 
456
  elif task == "Paraphrase":
457
- result = mcp_server.run_async(mcp_server.paraphrase_text_async(content, paraphrase_style))
458
- if result["success"]:
459
- return f"πŸ”„ Paraphrased Text ({paraphrase_style}):\n\n{result['result']}"
460
 
461
  elif task == "Categorize":
462
  categories = [c.strip() for c in categories_input.split(',')] if categories_input else []
463
- result = mcp_server.run_async(mcp_server.categorize_content_async(content, categories))
464
- if result["success"]:
465
- return f"🏷️ Category:\n\n{result['result']}"
466
 
467
  elif task == "Key Insights":
468
- result = mcp_server.run_async(mcp_server.extract_key_insights_async(content, int(num_items)))
469
- if result["success"]:
470
- return f"πŸ” Key Insights:\n\n{result['result']}"
471
 
472
  elif task == "Generate Questions":
473
- result = mcp_server.run_async(mcp_server.generate_questions_async(content, question_type, int(num_items)))
474
- if result["success"]:
475
- return f"❓ Generated Questions ({question_type}):\n\n{result['result']}"
476
 
477
  elif task == "Extract Key Info":
478
- result = mcp_server.run_async(mcp_server.extract_key_information_async(content))
479
- if result["success"]:
480
- return f"πŸ“Š Key Information:\n\n{result['result']}"
481
 
482
- if not result["success"]:
483
- return f"❌ Error: {result.get('error', 'Unknown error')}"
484
-
485
  return "βœ… Task completed"
486
 
487
  except Exception as e:
@@ -489,105 +318,73 @@ def execute_content_task(task, doc_choice, custom_text,
489
  return f"❌ Error: {str(e)}"
490
 
491
  def generate_tags_for_document(doc_choice, custom_text, max_tags):
 
492
  try:
493
- logger.info(f"Generate tags called with doc_choice: {doc_choice}, type: {type(doc_choice)}")
494
  document_id = doc_choice if doc_choice and doc_choice != "none" and doc_choice != "" else None
495
 
496
  if custom_text and custom_text.strip():
497
  logger.info("Using custom text for tag generation")
498
- result = mcp_server.run_async(mcp_server.generate_tags_async(content=custom_text, max_tags=int(max_tags)))
 
 
499
  elif document_id:
500
  logger.info(f"Generating tags for document: {document_id}")
501
- result = mcp_server.run_async(mcp_server.generate_tags_async(document_id=document_id, max_tags=int(max_tags)))
 
 
 
 
 
 
 
502
  else:
503
  return "Please select a document from the dropdown or enter text to generate tags"
504
 
505
- if result["success"]:
506
- tags_str = ", ".join(result["tags"])
507
  output_str = f"🏷️ Generated Tags:\n\n{tags_str}\n\n"
508
  output_str += f"πŸ“Š Statistics:\n"
509
- output_str += f"- Content length: {result['content_length']} characters\n"
510
- output_str += f"- Number of tags: {len(result['tags'])}\n"
511
- if result.get('document_id'):
512
- output_str += f"- Document ID: {result['document_id']}\n"
513
  output_str += f"\nβœ… Tags have been saved to the document."
514
  return output_str
515
  else:
516
- return f"❌ Tag generation failed: {result['error']}"
517
  except Exception as e:
518
  logger.error(f"Tag generation error: {str(e)}")
519
  return f"❌ Error: {str(e)}"
520
 
521
  def ask_question(question):
 
522
  if not question.strip():
523
  return "Please enter a question"
524
  try:
525
- result = mcp_server.run_async(mcp_server.answer_question_async(question))
526
- if result["success"]:
527
- output_str = f"❓ Question: {result['question']}\n\n"
528
- output_str += f"πŸ’‘ Answer:\n{result['answer']}\n\n"
529
- output_str += f"🎯 Confidence: {result['confidence']}\n\n"
530
- output_str += f"πŸ“š Sources Used ({len(result['sources'])}):\n"
531
- for i, source_item in enumerate(result['sources'], 1):
532
- filename = source_item.get('metadata', {}).get('document_filename', 'Unknown')
533
- output_str += f"\n{i}. πŸ“„ {filename}\n"
534
- output_str += f" πŸ“ Excerpt: {source_item['content'][:150]}...\n"
535
- output_str += f" πŸ“Š Relevance: {source_item['score']:.3f}\n"
536
- return output_str
537
- else:
538
- return f"❌ {result.get('error', 'Failed to answer question')}"
 
 
539
  except Exception as e:
540
  return f"❌ Error: {str(e)}"
541
 
542
- def delete_document_from_library(document_id):
543
- if not document_id:
544
- doc_list_current = get_document_list()
545
- doc_choices_current = get_document_choices()
546
- return (
547
- "No document selected to delete.",
548
- doc_list_current,
549
- gr.update(choices=doc_choices_current),
550
- gr.update(choices=doc_choices_current),
551
- gr.update(choices=doc_choices_current)
552
- )
553
- try:
554
- delete_doc_store_result = mcp_server.run_async(mcp_server.document_store.delete_document(document_id))
555
- delete_vec_store_result = mcp_server.run_async(mcp_server.vector_store.delete_document(document_id))
556
 
557
- msg = ""
558
- if delete_doc_store_result:
559
- msg += f"πŸ—‘οΈ Document {document_id[:8]}... deleted from document store. "
560
- else:
561
- msg += f"❌ Failed to delete document {document_id[:8]}... from document store. "
562
-
563
- if delete_vec_store_result:
564
- msg += "Embeddings deleted from vector store."
565
- else:
566
- msg += "Failed to delete embeddings from vector store (or no embeddings existed)."
567
-
568
-
569
- doc_list_updated = get_document_list()
570
- doc_choices_updated = get_document_choices()
571
- return (
572
- msg,
573
- doc_list_updated,
574
- gr.update(choices=doc_choices_updated),
575
- gr.update(choices=doc_choices_updated),
576
- gr.update(choices=doc_choices_updated)
577
- )
578
- except Exception as e:
579
- logger.error(f"Error deleting document: {str(e)}")
580
- doc_list_error = get_document_list()
581
- doc_choices_error = get_document_choices()
582
- return (
583
- f"❌ Error deleting document: {str(e)}",
584
- doc_list_error,
585
- gr.update(choices=doc_choices_error),
586
- gr.update(choices=doc_choices_error),
587
- gr.update(choices=doc_choices_error)
588
- )
589
-
590
- # Voice conversation state - global scope
591
  voice_conversation_state = {
592
  "session_id": None,
593
  "active": False,
@@ -597,16 +394,16 @@ voice_conversation_state = {
597
  def start_voice_conversation():
598
  """Start a new voice conversation session"""
599
  try:
600
- if not mcp_server.elevenlabs_service.is_available():
601
  return (
602
  "⚠️ Voice assistant not configured. Please set ELEVENLABS_API_KEY and ELEVENLABS_AGENT_ID in .env",
603
  gr.update(interactive=False),
604
  gr.update(interactive=True),
605
- ""
606
  )
607
 
608
  session_id = str(uuid.uuid4())
609
- result = mcp_server.run_async(mcp_server.elevenlabs_service.start_conversation(session_id))
610
 
611
  if result.get("success"):
612
  voice_conversation_state["session_id"] = session_id
@@ -635,7 +432,6 @@ def start_voice_conversation():
635
  []
636
  )
637
 
638
-
639
  def stop_voice_conversation():
640
  """Stop active voice conversation"""
641
  try:
@@ -649,7 +445,7 @@ def stop_voice_conversation():
649
 
650
  session_id = voice_conversation_state["session_id"]
651
  if session_id:
652
- mcp_server.run_async(mcp_server.elevenlabs_service.end_conversation(session_id))
653
 
654
  voice_conversation_state["active"] = False
655
  voice_conversation_state["session_id"] = None
@@ -669,52 +465,6 @@ def stop_voice_conversation():
669
  voice_conversation_state["transcript"]
670
  )
671
 
672
-
673
- def send_voice_message(message):
674
- """Send a text message in voice conversation"""
675
- try:
676
- if not voice_conversation_state["active"]:
677
- return ("Please start a conversation first", "", format_transcript(voice_conversation_state["transcript"]))
678
-
679
- if not message or not message.strip():
680
- return ("Please enter a message", message, format_transcript(voice_conversation_state["transcript"]))
681
-
682
- session_id = voice_conversation_state["session_id"]
683
- voice_conversation_state["transcript"].append({"role": "user", "content": message})
684
-
685
- result = mcp_server.run_async(mcp_server.voice_tool.voice_qa(message, session_id))
686
-
687
- if result.get("success"):
688
- answer = result.get("answer", "No response")
689
- voice_conversation_state["transcript"].append({"role": "assistant", "content": answer})
690
- return ("βœ… Response received", "", format_transcript(voice_conversation_state["transcript"]))
691
- else:
692
- return (f"❌ Error: {result.get('error')}", message, format_transcript(voice_conversation_state["transcript"]))
693
- except Exception as e:
694
- logger.error(f"Error sending message: {str(e)}")
695
- return (f"❌ Error: {str(e)}", message, format_transcript(voice_conversation_state["transcript"]))
696
-
697
- def format_transcript(transcript):
698
- """Format conversation transcript for display"""
699
- if not transcript:
700
- return "No conversation yet. Start talking to the AI librarian!"
701
-
702
- formatted = ""
703
- for msg in transcript:
704
- role = msg["role"]
705
- content = msg["content"]
706
- if role == "user":
707
- formatted += f"πŸ‘€ **You:** {content}\n\n"
708
- else:
709
- formatted += f"πŸ€– **AI Librarian:** {content}\n\n"
710
- formatted += "---\n\n"
711
- return formatted
712
-
713
- def clear_voice_transcript():
714
- """Clear conversation transcript"""
715
- voice_conversation_state["transcript"] = []
716
- return ""
717
-
718
  def send_voice_message_v6(message, chat_history):
719
  """Send message in voice conversation - Gradio 6 format"""
720
  try:
@@ -726,11 +476,11 @@ def send_voice_message_v6(message, chat_history):
726
 
727
  session_id = voice_conversation_state["session_id"]
728
 
729
- # Add user message in Gradio 6 format
730
  chat_history.append({"role": "user", "content": message})
731
 
732
  # Get AI response
733
- result = mcp_server.run_async(mcp_server.voice_tool.voice_qa(message, session_id))
734
 
735
  if result.get("success"):
736
  answer = result.get("answer", "No response")
@@ -750,6 +500,10 @@ def send_voice_message_v6(message, chat_history):
750
  })
751
  return chat_history, ""
752
 
 
 
 
 
753
  def generate_podcast_ui(doc_ids, style, duration, voice1, voice2):
754
  """UI wrapper for podcast generation"""
755
  try:
@@ -758,8 +512,8 @@ def generate_podcast_ui(doc_ids, style, duration, voice1, voice2):
758
 
759
  logger.info(f"Generating podcast: {len(doc_ids)} docs, {style}, {duration}min")
760
 
761
- result = mcp_server.run_async(
762
- mcp_server.generate_podcast_async(
763
  document_ids=doc_ids,
764
  style=style,
765
  duration_minutes=int(duration),
@@ -787,41 +541,42 @@ def generate_podcast_ui(doc_ids, style, duration, voice1, voice2):
787
  logger.error(f"Podcast UI error: {str(e)}")
788
  return (f"❌ Error: {str(e)}", None, "An error occurred", "")
789
 
 
 
 
 
790
  def load_dashboard_stats():
791
- """Load dashboard statistics for the UI"""
792
  try:
793
- # Get document list
794
- docs_result = mcp_server.list_documents_sync(limit=1000)
795
- doc_count = 0
796
  total_chunks = 0
797
  total_size = 0
798
  recent_data = []
799
 
800
- if docs_result.get("success"):
801
- documents = docs_result.get("documents", [])
802
- doc_count = len(documents)
803
- total_chunks = sum(doc.get("metadata", {}).get("chunk_count", 0) for doc in documents)
804
- total_size = sum(doc.get("file_size", 0) for doc in documents)
805
  storage_mb = round(total_size / (1024 * 1024), 2) if total_size > 0 else 0.0
806
 
807
  # Get recent 5 documents
808
  recent = documents[:5]
809
  recent_data = [
810
  [
811
- doc.get("filename", "Unknown"),
812
- doc.get("doc_type", "unknown"),
813
- doc.get("created_at", "")[:10] if doc.get("created_at") else "N/A",
814
- f"{doc.get('file_size', 0)} bytes"
815
  ]
816
  for doc in recent
817
  ]
818
  else:
819
  storage_mb = 0.0
820
 
821
- # Service status indicators
822
- vector_stat = "βœ… Online" if getattr(mcp_server, "vector_store", None) else "❌ Offline"
823
- llm_stat = "βœ… Ready" if getattr(mcp_server, "llm_service", None) else "❌ Offline"
824
- voice_stat = "βœ… Ready" if (getattr(mcp_server, "elevenlabs_service", None) and mcp_server.elevenlabs_service.is_available()) else "⚠️ Configure API Key"
825
 
826
  return (
827
  doc_count,
@@ -836,8 +591,14 @@ def load_dashboard_stats():
836
  logger.error(f"Error loading dashboard stats: {str(e)}")
837
  return (0, 0, 0.0, [], "❌ Error", "❌ Error", "❌ Error")
838
 
 
 
 
 
839
  def create_gradio_interface():
840
- # Create custom theme with modern aesthetics
 
 
841
  custom_theme = gr.themes.Soft(
842
  primary_hue=gr.themes.colors.indigo,
843
  secondary_hue=gr.themes.colors.blue,
@@ -881,16 +642,19 @@ def create_gradio_interface():
881
 
882
  πŸ”— **For MCP Integration** (Claude Desktop, Cline, etc.):
883
  Add this endpoint to your MCP client configuration:
884
-
 
 
 
885
  πŸ’‘ **Powered by:** OpenAI, Mistral AI, Claude, ElevenLabs, LlamaIndex
886
  """)
 
887
  with gr.Tabs():
888
- # Dashboard Tab - New Landing Page
889
  with gr.Tab("🏠 Dashboard"):
890
  gr.Markdown("# Welcome to Your AI Library Assistant")
891
  gr.Markdown("*Your intelligent document management and analysis platform powered by AI*")
892
 
893
- # Quick Stats Section
894
  gr.Markdown("## πŸ“Š Quick Stats")
895
  with gr.Row():
896
  total_docs = gr.Number(
@@ -912,7 +676,6 @@ def create_gradio_interface():
912
  container=True
913
  )
914
 
915
- # Recent Activity Section
916
  gr.Markdown("## πŸ“Š Recent Activity")
917
  with gr.Group():
918
  recent_docs = gr.Dataframe(
@@ -924,8 +687,7 @@ def create_gradio_interface():
924
  label="Recently Added Documents"
925
  )
926
 
927
- # System Status Section
928
- gr.Markdown("## οΏ½ System Status")
929
  with gr.Row():
930
  vector_status = gr.Textbox(
931
  label="Vector Store",
@@ -946,16 +708,29 @@ def create_gradio_interface():
946
  container=True
947
  )
948
 
 
949
  with gr.Tab("πŸ“š Document Library"):
950
  with gr.Row():
951
  with gr.Column():
952
  gr.Markdown("### Your Document Collection")
953
- document_list_display = gr.Textbox(label="Documents in Library", value=get_document_list(), lines=20, interactive=False)
 
 
 
 
 
954
  refresh_btn_library = gr.Button("πŸ”„ Refresh Library", variant="secondary")
955
- delete_doc_dropdown_visible = gr.Dropdown(label="Select Document to Delete", choices=get_document_choices(), value=None, interactive=True, allow_custom_value=False)
 
 
 
 
 
 
956
  delete_btn = gr.Button("πŸ—‘οΈ Delete Selected Document", variant="stop")
957
  delete_output_display = gr.Textbox(label="Delete Status", visible=True)
958
 
 
959
  with gr.Tab("πŸ“„ Upload Documents"):
960
  gr.Markdown("""
961
  ### πŸ“₯ Add Documents to Library
@@ -974,7 +749,6 @@ def create_gradio_interface():
974
  )
975
 
976
  upload_btn_process = gr.Button("πŸš€ Upload & Process", variant="primary", size="lg")
977
-
978
 
979
  with gr.Group():
980
  upload_output_display = gr.Textbox(
@@ -990,7 +764,7 @@ def create_gradio_interface():
990
  visible=False
991
  )
992
 
993
-
994
  with gr.Tab("πŸ” Search Documents"):
995
  gr.Markdown("""
996
  ### πŸ”Ž Semantic Search
@@ -1024,8 +798,8 @@ def create_gradio_interface():
1024
  placeholder="Search results will appear here...",
1025
  show_copy_button=True
1026
  )
1027
-
1028
 
 
1029
  with gr.Tab("πŸ“ Content Studio"):
1030
  gr.Markdown("""
1031
  ### 🎨 Create & Analyze Content
@@ -1034,7 +808,6 @@ def create_gradio_interface():
1034
 
1035
  with gr.Row():
1036
  with gr.Column(scale=2):
1037
- # Source Selection with Group
1038
  with gr.Group():
1039
  gr.Markdown("#### πŸ“„ Content Source")
1040
  doc_dropdown_content = gr.Dropdown(
@@ -1054,7 +827,6 @@ def create_gradio_interface():
1054
  info="For outlines, enter a topic. For other tasks, paste text to analyze."
1055
  )
1056
 
1057
- # Task Configuration with Group
1058
  with gr.Group():
1059
  gr.Markdown("#### πŸ› οΈ Task Configuration")
1060
  task_dropdown = gr.Dropdown(
@@ -1069,7 +841,6 @@ def create_gradio_interface():
1069
  info="Choose the type of analysis to perform"
1070
  )
1071
 
1072
- # Dynamic Options with Accordion
1073
  with gr.Accordion("βš™οΈ Advanced Options", open=False):
1074
  summary_style_opt = gr.Dropdown(
1075
  label="Summary Style",
@@ -1135,7 +906,6 @@ def create_gradio_interface():
1135
  run_task_btn = gr.Button("πŸš€ Run Task", variant="primary", size="lg")
1136
 
1137
  with gr.Column(scale=3):
1138
- # Results with copy button and Group
1139
  with gr.Group():
1140
  gr.Markdown("#### πŸ“Š Result")
1141
  content_output_display = gr.Textbox(
@@ -1146,7 +916,7 @@ def create_gradio_interface():
1146
  container=False
1147
  )
1148
 
1149
- # Event Handlers
1150
  task_dropdown.change(
1151
  fn=update_options_visibility,
1152
  inputs=[task_dropdown],
@@ -1168,17 +938,36 @@ def create_gradio_interface():
1168
  outputs=[content_output_display]
1169
  )
1170
 
 
1171
  with gr.Tab("🏷️ Generate Tags"):
1172
  with gr.Row():
1173
  with gr.Column():
1174
  gr.Markdown("### Generate Document Tags")
1175
- doc_dropdown_tag_visible = gr.Dropdown(label="Select Document to Tag", choices=get_document_choices(), value=None, interactive=True, allow_custom_value=False)
1176
- tag_text_input = gr.Textbox(label="Or Paste Text to Generate Tags", placeholder="Paste any text here to generate tags...", lines=8)
1177
- max_tags_slider = gr.Slider(label="Number of Tags", minimum=3, maximum=15, value=5, step=1)
 
 
 
 
 
 
 
 
 
 
 
 
 
1178
  tag_btn_action = gr.Button("🏷️ Generate Tags", variant="primary", size="lg")
1179
  with gr.Column():
1180
- tag_output_display = gr.Textbox(label="Generated Tags", lines=10, placeholder="Tags will appear here...")
 
 
 
 
1181
 
 
1182
  with gr.Tab("πŸŽ™οΈ Voice Assistant"):
1183
  gr.Markdown("""
1184
  ### πŸ—£οΈ Talk to Your AI Librarian
@@ -1191,7 +980,6 @@ def create_gradio_interface():
1191
 
1192
  with gr.Row():
1193
  with gr.Column(scale=2):
1194
- # Status and Controls
1195
  with gr.Group():
1196
  voice_status_display = gr.Textbox(
1197
  label="Status",
@@ -1204,7 +992,6 @@ def create_gradio_interface():
1204
  start_voice_btn = gr.Button("🎀 Start Conversation", variant="primary", size="lg")
1205
  stop_voice_btn = gr.Button("⏹️ Stop", variant="stop", size="lg", interactive=False)
1206
 
1207
- # Message Input
1208
  with gr.Group():
1209
  gr.Markdown("#### πŸ’¬ Send Message")
1210
  voice_input_text = gr.Textbox(
@@ -1217,7 +1004,6 @@ def create_gradio_interface():
1217
  send_voice_btn = gr.Button("πŸ“€ Send", variant="secondary")
1218
 
1219
  with gr.Column(scale=3):
1220
- # Chat Interface with Gradio 6 Chatbot
1221
  with gr.Group():
1222
  voice_chatbot = gr.Chatbot(
1223
  label="Conversation",
@@ -1256,6 +1042,7 @@ def create_gradio_interface():
1256
  outputs=[voice_chatbot]
1257
  )
1258
 
 
1259
  with gr.Tab("🎧 Podcast Studio"):
1260
  gr.Markdown("""
1261
  ### πŸŽ™οΈ AI-Powered Podcast Generation
@@ -1268,7 +1055,6 @@ def create_gradio_interface():
1268
 
1269
  with gr.Row():
1270
  with gr.Column(scale=2):
1271
- # Configuration Panel
1272
  with gr.Group():
1273
  gr.Markdown("#### πŸ“š Select Content")
1274
 
@@ -1329,7 +1115,6 @@ def create_gradio_interface():
1329
  )
1330
 
1331
  with gr.Column(scale=3):
1332
- # Output Panel
1333
  with gr.Group():
1334
  gr.Markdown("#### 🎡 Generated Podcast")
1335
 
@@ -1364,17 +1149,27 @@ def create_gradio_interface():
1364
  ]
1365
  )
1366
 
 
1367
  with gr.Tab("❓ Ask Questions"):
1368
  with gr.Row():
1369
  with gr.Column():
1370
  gr.Markdown("""### Ask Questions About Your Documents
1371
  The AI will search through all your uploaded documents to find relevant information
1372
  and provide comprehensive answers with sources.""")
1373
- qa_question_input = gr.Textbox(label="Your Question", placeholder="Ask anything about your documents...", lines=3)
 
 
 
 
1374
  qa_btn_action = gr.Button("❓ Get Answer", variant="primary", size="lg")
1375
  with gr.Column():
1376
- qa_output_display = gr.Textbox(label="AI Answer", lines=20, placeholder="Answer will appear here with sources...")
 
 
 
 
1377
 
 
1378
  all_dropdowns_to_update = [delete_doc_dropdown_visible, doc_dropdown_content, doc_dropdown_tag_visible]
1379
 
1380
  refresh_outputs = [document_list_display] + [dd for dd in all_dropdowns_to_update]
@@ -1390,7 +1185,6 @@ def create_gradio_interface():
1390
  tag_btn_action.click(generate_tags_for_document, inputs=[doc_dropdown_tag_visible, tag_text_input, max_tags_slider], outputs=[tag_output_display])
1391
  qa_btn_action.click(ask_question, inputs=[qa_question_input], outputs=[qa_output_display])
1392
 
1393
-
1394
  # Load dashboard stats on interface load
1395
  interface.load(
1396
  fn=load_dashboard_stats,
@@ -1398,8 +1192,9 @@ def create_gradio_interface():
1398
  )
1399
 
1400
  interface.load(fn=refresh_library, outputs=refresh_outputs)
1401
- return interface
 
1402
 
1403
  if __name__ == "__main__":
1404
  gradio_interface = create_gradio_interface()
1405
- gradio_interface.launch()
 
3
  import asyncio
4
  import json
5
  import logging
 
6
  import uuid
7
  from datetime import datetime
8
  from pathlib import Path
 
12
  # Apply nest_asyncio to handle nested event loops in Gradio
13
  nest_asyncio.apply()
14
 
15
+ # Import services and tools from mcp_server
16
+ from mcp_server import (
17
+ # Services
18
+ vector_store_service,
19
+ document_store_service,
20
+ embedding_service_instance,
21
+ llm_service_instance,
22
+ ocr_service_instance,
23
+ llamaindex_service_instance,
24
+ elevenlabs_service_instance,
25
+ podcast_generator_instance,
26
+ # Tools
27
+ ingestion_tool_instance,
28
+ search_tool_instance,
29
+ generative_tool_instance,
30
+ voice_tool_instance,
31
+ podcast_tool_instance
32
+ )
33
 
34
  # Setup logging
35
  logging.basicConfig(level=logging.INFO)
36
  logger = logging.getLogger(__name__)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
+ # ============================================================================
39
+ # HELPER FUNCTIONS FOR ASYNC EXECUTION
40
+ # ============================================================================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
+ def run_async(coro):
43
+ """Helper to run async functions in Gradio"""
44
+ try:
45
+ loop = asyncio.get_event_loop()
46
+ except RuntimeError:
47
+ loop = asyncio.new_event_loop()
48
+ asyncio.set_event_loop(loop)
49
+ if loop.is_running():
50
+ import concurrent.futures
51
+ with concurrent.futures.ThreadPoolExecutor() as executor:
52
+ future = executor.submit(asyncio.run, coro)
53
+ return future.result()
54
+ else:
55
+ return loop.run_until_complete(coro)
56
+
57
+ # ============================================================================
58
+ # DOCUMENT MANAGEMENT FUNCTIONS
59
+ # ============================================================================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
  def get_document_list():
62
+ """Get formatted list of documents"""
63
  try:
64
+ documents = run_async(document_store_service.list_documents(limit=100))
65
+ if documents:
66
+ doc_list_str = "πŸ“š Documents in Library:\n\n"
67
+ for i, doc in enumerate(documents, 1):
68
+ doc_list_str += f"{i}. {doc.filename} (ID: {doc.id[:8]}...)\n"
69
+ doc_list_str += f" Type: {doc.doc_type}, Size: {doc.file_size} bytes\n"
70
+ if doc.metadata and doc.metadata.get('tags'):
71
+ doc_list_str += f" Tags: {', '.join(doc.metadata['tags'])}\n"
72
+ doc_list_str += f" Created: {doc.created_at[:10]}\n\n"
73
+ return doc_list_str
 
 
 
74
  else:
75
+ return "No documents in library yet. Upload some documents to get started!"
76
  except Exception as e:
77
+ logger.error(f"Error loading documents: {str(e)}")
78
  return f"Error: {str(e)}"
79
 
80
  def get_document_choices():
81
+ """Get document choices for dropdowns"""
82
  try:
83
+ documents = run_async(document_store_service.list_documents(limit=100))
84
+ if documents:
85
+ choices = [(f"{doc.filename} ({doc.id[:8]}...)", doc.id) for doc in documents]
86
  logger.info(f"Generated {len(choices)} document choices")
87
  return choices
88
  return []
 
91
  return []
92
 
93
  def refresh_library():
94
+ """Refresh library and update all dropdowns"""
95
  doc_list_refreshed = get_document_list()
96
  doc_choices_refreshed = get_document_choices()
97
  logger.info(f"Refreshing library. Found {len(doc_choices_refreshed)} choices.")
 
103
  )
104
 
105
  def upload_and_process_file(file):
106
+ """Upload and process a document file"""
107
  if file is None:
108
  doc_list_initial = get_document_list()
109
  doc_choices_initial = get_document_choices()
 
115
  )
116
  try:
117
  file_path = file.name if hasattr(file, 'name') else str(file)
118
+ file_type = Path(file_path).suffix.lower().strip('.')
119
  logger.info(f"Processing file: {file_path}, type: {file_type}")
120
+
121
+ result = run_async(ingestion_tool_instance.process_document(file_path, file_type))
122
 
123
  doc_list_updated = get_document_list()
124
  doc_choices_updated = get_document_choices()
 
152
  gr.update(choices=doc_choices_error)
153
  )
154
 
155
+ def delete_document_from_library(document_id):
156
+ """Delete a document from the library"""
157
+ if not document_id:
158
+ doc_list_current = get_document_list()
159
+ doc_choices_current = get_document_choices()
160
+ return (
161
+ "No document selected to delete.",
162
+ doc_list_current,
163
+ gr.update(choices=doc_choices_current),
164
+ gr.update(choices=doc_choices_current),
165
+ gr.update(choices=doc_choices_current)
166
+ )
167
+ try:
168
+ delete_doc_store_result = run_async(document_store_service.delete_document(document_id))
169
+ delete_vec_store_result = run_async(vector_store_service.delete_document(document_id))
170
+
171
+ msg = ""
172
+ if delete_doc_store_result:
173
+ msg += f"πŸ—‘οΈ Document {document_id[:8]}... deleted from document store. "
174
+ else:
175
+ msg += f"❌ Failed to delete document {document_id[:8]}... from document store. "
176
+
177
+ if delete_vec_store_result:
178
+ msg += "Embeddings deleted from vector store."
179
+ else:
180
+ msg += "Failed to delete embeddings from vector store (or no embeddings existed)."
181
+
182
+ doc_list_updated = get_document_list()
183
+ doc_choices_updated = get_document_choices()
184
+ return (
185
+ msg,
186
+ doc_list_updated,
187
+ gr.update(choices=doc_choices_updated),
188
+ gr.update(choices=doc_choices_updated),
189
+ gr.update(choices=doc_choices_updated)
190
+ )
191
+ except Exception as e:
192
+ logger.error(f"Error deleting document: {str(e)}")
193
+ doc_list_error = get_document_list()
194
+ doc_choices_error = get_document_choices()
195
+ return (
196
+ f"❌ Error deleting document: {str(e)}",
197
+ doc_list_error,
198
+ gr.update(choices=doc_choices_error),
199
+ gr.update(choices=doc_choices_error),
200
+ gr.update(choices=doc_choices_error)
201
+ )
202
+
203
+ # ============================================================================
204
+ # SEARCH FUNCTIONS
205
+ # ============================================================================
206
+
207
  def perform_search(query, top_k):
208
+ """Perform semantic search"""
209
  if not query.strip():
210
  return "Please enter a search query"
211
  try:
212
+ results = run_async(search_tool_instance.search(query, int(top_k)))
213
+ if results:
214
+ output_str = f"πŸ” Found {len(results)} results for: '{query}'\n\n"
215
+ for i, result in enumerate(results, 1):
216
+ output_str += f"Result {i}:\n"
217
+ output_str += f"πŸ“Š Relevance Score: {result.score:.3f}\n"
218
+ output_str += f"πŸ“„ Content: {result.content[:300]}...\n"
219
+ if result.metadata and 'document_filename' in result.metadata:
220
+ output_str += f"πŸ“ Source: {result.metadata['document_filename']}\n"
221
+ output_str += f"πŸ”— Document ID: {result.document_id}\n"
222
+ output_str += "-" * 80 + "\n\n"
223
+ return output_str
 
 
 
224
  else:
225
+ return f"No results found for: '{query}'\n\nMake sure you have uploaded relevant documents first."
226
  except Exception as e:
227
  logger.error(f"Search error: {str(e)}")
228
  return f"❌ Error: {str(e)}"
229
 
230
+ # ============================================================================
231
+ # CONTENT STUDIO FUNCTIONS
232
+ # ============================================================================
233
+
234
  def update_options_visibility(task):
235
  """Update visibility of options based on selected task"""
236
  return (
237
+ gr.update(visible=task == "Summarize"),
238
+ gr.update(visible=task == "Generate Outline"),
239
+ gr.update(visible=task == "Generate Outline"),
240
+ gr.update(visible=task == "Explain Concept"),
241
+ gr.update(visible=task == "Explain Concept"),
242
+ gr.update(visible=task == "Paraphrase"),
243
+ gr.update(visible=task == "Categorize"),
244
+ gr.update(visible=task in ["Key Insights", "Generate Questions"]),
245
+ gr.update(visible=task == "Generate Questions")
246
  )
247
 
248
+ async def get_document_content(document_id: str) -> Optional[str]:
249
+ """Get document content by ID"""
250
+ try:
251
+ doc = await document_store_service.get_document(document_id)
252
+ if doc:
253
+ return doc.content
254
+ return None
255
+ except Exception as e:
256
+ logger.error(f"Error getting document content: {str(e)}")
257
+ return None
258
+
259
  def execute_content_task(task, doc_choice, custom_text,
260
  summary_style, outline_sections, outline_detail,
261
  explain_audience, explain_length,
262
  paraphrase_style, categories_input,
263
  num_items, question_type):
264
+ """Execute content analysis tasks"""
265
  try:
266
  # Get content
267
  content = ""
268
  if custom_text and custom_text.strip():
269
  content = custom_text
270
  elif doc_choice and doc_choice != "none":
271
+ content = run_async(get_document_content(doc_choice))
272
  if not content:
273
  return "❌ Error: Document not found or empty"
274
  else:
275
  if task == "Generate Outline":
276
+ content = custom_text
277
  else:
278
  return "⚠️ Please select a document or enter text"
279
 
280
  # Execute task
 
 
281
  if task == "Summarize":
282
+ summary = run_async(generative_tool_instance.summarize(content, summary_style))
283
+ return f"πŸ“ Summary ({summary_style}):\n\n{summary}"
 
284
 
285
  elif task == "Generate Outline":
286
+ outline = run_async(generative_tool_instance.generate_outline(content, int(outline_sections), outline_detail))
287
+ return f"πŸ“ Outline for '{content}':\n\n{outline}"
 
 
288
 
289
  elif task == "Explain Concept":
290
+ explanation = run_async(generative_tool_instance.explain_concept(content, explain_audience, explain_length))
291
+ return f"πŸ’‘ Explanation ({explain_audience}):\n\n{explanation}"
 
 
292
 
293
  elif task == "Paraphrase":
294
+ paraphrase = run_async(generative_tool_instance.paraphrase_text(content, paraphrase_style))
295
+ return f"πŸ”„ Paraphrased Text ({paraphrase_style}):\n\n{paraphrase}"
 
296
 
297
  elif task == "Categorize":
298
  categories = [c.strip() for c in categories_input.split(',')] if categories_input else []
299
+ category = run_async(generative_tool_instance.categorize(content, categories))
300
+ return f"🏷️ Category:\n\n{category}"
 
301
 
302
  elif task == "Key Insights":
303
+ insights = run_async(generative_tool_instance.extract_key_insights(content, int(num_items)))
304
+ return f"πŸ” Key Insights:\n\n" + "\n".join([f"- {insight}" for insight in insights])
 
305
 
306
  elif task == "Generate Questions":
307
+ questions = run_async(generative_tool_instance.generate_questions(content, question_type, int(num_items)))
308
+ return f"❓ Generated Questions ({question_type}):\n\n" + "\n".join([f"{i+1}. {q}" for i, q in enumerate(questions)])
 
309
 
310
  elif task == "Extract Key Info":
311
+ info = run_async(llm_service_instance.extract_key_information(content))
312
+ return f"πŸ“Š Key Information:\n\n{json.dumps(info, indent=2)}"
 
313
 
 
 
 
314
  return "βœ… Task completed"
315
 
316
  except Exception as e:
 
318
  return f"❌ Error: {str(e)}"
319
 
320
  def generate_tags_for_document(doc_choice, custom_text, max_tags):
321
+ """Generate tags for document or text"""
322
  try:
323
+ logger.info(f"Generate tags called with doc_choice: {doc_choice}")
324
  document_id = doc_choice if doc_choice and doc_choice != "none" and doc_choice != "" else None
325
 
326
  if custom_text and custom_text.strip():
327
  logger.info("Using custom text for tag generation")
328
+ tags = run_async(generative_tool_instance.generate_tags(custom_text, int(max_tags)))
329
+ content_length = len(custom_text)
330
+ doc_id_display = None
331
  elif document_id:
332
  logger.info(f"Generating tags for document: {document_id}")
333
+ content = run_async(get_document_content(document_id))
334
+ if not content:
335
+ return "❌ Error: Document not found or empty"
336
+ tags = run_async(generative_tool_instance.generate_tags(content, int(max_tags)))
337
+ if tags:
338
+ run_async(document_store_service.update_document_metadata(document_id, {"tags": tags}))
339
+ content_length = len(content)
340
+ doc_id_display = document_id
341
  else:
342
  return "Please select a document from the dropdown or enter text to generate tags"
343
 
344
+ if tags:
345
+ tags_str = ", ".join(tags)
346
  output_str = f"🏷️ Generated Tags:\n\n{tags_str}\n\n"
347
  output_str += f"πŸ“Š Statistics:\n"
348
+ output_str += f"- Content length: {content_length} characters\n"
349
+ output_str += f"- Number of tags: {len(tags)}\n"
350
+ if doc_id_display:
351
+ output_str += f"- Document ID: {doc_id_display}\n"
352
  output_str += f"\nβœ… Tags have been saved to the document."
353
  return output_str
354
  else:
355
+ return "❌ Tag generation failed"
356
  except Exception as e:
357
  logger.error(f"Tag generation error: {str(e)}")
358
  return f"❌ Error: {str(e)}"
359
 
360
  def ask_question(question):
361
+ """Ask question with RAG"""
362
  if not question.strip():
363
  return "Please enter a question"
364
  try:
365
+ search_results = run_async(search_tool_instance.search(question, top_k=5))
366
+ if not search_results:
367
+ return "❌ No relevant context found in your documents. Please make sure you have uploaded relevant documents."
368
+
369
+ answer = run_async(generative_tool_instance.answer_question(question, search_results))
370
+
371
+ output_str = f"❓ Question: {question}\n\n"
372
+ output_str += f"πŸ’‘ Answer:\n{answer}\n\n"
373
+ output_str += f"🎯 Confidence: {'high' if len(search_results) >= 3 else 'medium'}\n\n"
374
+ output_str += f"πŸ“š Sources Used ({len(search_results)}):\n"
375
+ for i, source in enumerate(search_results, 1):
376
+ filename = source.metadata.get('document_filename', 'Unknown') if source.metadata else 'Unknown'
377
+ output_str += f"\n{i}. πŸ“„ {filename}\n"
378
+ output_str += f" πŸ“ Excerpt: {source.content[:150]}...\n"
379
+ output_str += f" πŸ“Š Relevance: {source.score:.3f}\n"
380
+ return output_str
381
  except Exception as e:
382
  return f"❌ Error: {str(e)}"
383
 
384
+ # ============================================================================
385
+ # VOICE ASSISTANT FUNCTIONS
386
+ # ============================================================================
 
 
 
 
 
 
 
 
 
 
 
387
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
388
  voice_conversation_state = {
389
  "session_id": None,
390
  "active": False,
 
394
  def start_voice_conversation():
395
  """Start a new voice conversation session"""
396
  try:
397
+ if not elevenlabs_service_instance.is_available():
398
  return (
399
  "⚠️ Voice assistant not configured. Please set ELEVENLABS_API_KEY and ELEVENLABS_AGENT_ID in .env",
400
  gr.update(interactive=False),
401
  gr.update(interactive=True),
402
+ []
403
  )
404
 
405
  session_id = str(uuid.uuid4())
406
+ result = run_async(elevenlabs_service_instance.start_conversation(session_id))
407
 
408
  if result.get("success"):
409
  voice_conversation_state["session_id"] = session_id
 
432
  []
433
  )
434
 
 
435
  def stop_voice_conversation():
436
  """Stop active voice conversation"""
437
  try:
 
445
 
446
  session_id = voice_conversation_state["session_id"]
447
  if session_id:
448
+ run_async(elevenlabs_service_instance.end_conversation(session_id))
449
 
450
  voice_conversation_state["active"] = False
451
  voice_conversation_state["session_id"] = None
 
465
  voice_conversation_state["transcript"]
466
  )
467
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
468
  def send_voice_message_v6(message, chat_history):
469
  """Send message in voice conversation - Gradio 6 format"""
470
  try:
 
476
 
477
  session_id = voice_conversation_state["session_id"]
478
 
479
+ # Add user message
480
  chat_history.append({"role": "user", "content": message})
481
 
482
  # Get AI response
483
+ result = run_async(voice_tool_instance.voice_qa(message, session_id))
484
 
485
  if result.get("success"):
486
  answer = result.get("answer", "No response")
 
500
  })
501
  return chat_history, ""
502
 
503
+ # ============================================================================
504
+ # PODCAST GENERATION FUNCTIONS
505
+ # ============================================================================
506
+
507
  def generate_podcast_ui(doc_ids, style, duration, voice1, voice2):
508
  """UI wrapper for podcast generation"""
509
  try:
 
512
 
513
  logger.info(f"Generating podcast: {len(doc_ids)} docs, {style}, {duration}min")
514
 
515
+ result = run_async(
516
+ podcast_tool_instance.generate_podcast(
517
  document_ids=doc_ids,
518
  style=style,
519
  duration_minutes=int(duration),
 
541
  logger.error(f"Podcast UI error: {str(e)}")
542
  return (f"❌ Error: {str(e)}", None, "An error occurred", "")
543
 
544
+ # ============================================================================
545
+ # DASHBOARD FUNCTIONS
546
+ # ============================================================================
547
+
548
  def load_dashboard_stats():
549
+ """Load dashboard statistics"""
550
  try:
551
+ documents = run_async(document_store_service.list_documents(limit=1000))
552
+ doc_count = len(documents) if documents else 0
 
553
  total_chunks = 0
554
  total_size = 0
555
  recent_data = []
556
 
557
+ if documents:
558
+ total_chunks = sum(doc.metadata.get("chunk_count", 0) for doc in documents if doc.metadata)
559
+ total_size = sum(doc.file_size for doc in documents)
 
 
560
  storage_mb = round(total_size / (1024 * 1024), 2) if total_size > 0 else 0.0
561
 
562
  # Get recent 5 documents
563
  recent = documents[:5]
564
  recent_data = [
565
  [
566
+ doc.filename,
567
+ doc.doc_type,
568
+ doc.created_at[:10] if doc.created_at else "N/A",
569
+ f"{doc.file_size} bytes"
570
  ]
571
  for doc in recent
572
  ]
573
  else:
574
  storage_mb = 0.0
575
 
576
+ # Service status
577
+ vector_stat = "βœ… Online" if vector_store_service else "❌ Offline"
578
+ llm_stat = "βœ… Ready" if llm_service_instance else "❌ Offline"
579
+ voice_stat = "βœ… Ready" if (elevenlabs_service_instance and elevenlabs_service_instance.is_available()) else "⚠️ Configure API Key"
580
 
581
  return (
582
  doc_count,
 
591
  logger.error(f"Error loading dashboard stats: {str(e)}")
592
  return (0, 0, 0.0, [], "❌ Error", "❌ Error", "❌ Error")
593
 
594
+ # ============================================================================
595
+ # GRADIO UI CREATION
596
+ # ============================================================================
597
+
598
  def create_gradio_interface():
599
+ """Create the Gradio interface"""
600
+
601
+ # Create custom theme
602
  custom_theme = gr.themes.Soft(
603
  primary_hue=gr.themes.colors.indigo,
604
  secondary_hue=gr.themes.colors.blue,
 
642
 
643
  πŸ”— **For MCP Integration** (Claude Desktop, Cline, etc.):
644
  Add this endpoint to your MCP client configuration:
645
+ ```
646
+ https://nihal2000-aidigitiallibrary assistant.hf.space/gradio_api/mcp/sse
647
+ ```
648
+
649
  πŸ’‘ **Powered by:** OpenAI, Mistral AI, Claude, ElevenLabs, LlamaIndex
650
  """)
651
+
652
  with gr.Tabs():
653
+ # Dashboard Tab
654
  with gr.Tab("🏠 Dashboard"):
655
  gr.Markdown("# Welcome to Your AI Library Assistant")
656
  gr.Markdown("*Your intelligent document management and analysis platform powered by AI*")
657
 
 
658
  gr.Markdown("## πŸ“Š Quick Stats")
659
  with gr.Row():
660
  total_docs = gr.Number(
 
676
  container=True
677
  )
678
 
 
679
  gr.Markdown("## πŸ“Š Recent Activity")
680
  with gr.Group():
681
  recent_docs = gr.Dataframe(
 
687
  label="Recently Added Documents"
688
  )
689
 
690
+ gr.Markdown("## βš™οΈ System Status")
 
691
  with gr.Row():
692
  vector_status = gr.Textbox(
693
  label="Vector Store",
 
708
  container=True
709
  )
710
 
711
+ # Document Library Tab
712
  with gr.Tab("πŸ“š Document Library"):
713
  with gr.Row():
714
  with gr.Column():
715
  gr.Markdown("### Your Document Collection")
716
+ document_list_display = gr.Textbox(
717
+ label="Documents in Library",
718
+ value=get_document_list(),
719
+ lines=20,
720
+ interactive=False
721
+ )
722
  refresh_btn_library = gr.Button("πŸ”„ Refresh Library", variant="secondary")
723
+ delete_doc_dropdown_visible = gr.Dropdown(
724
+ label="Select Document to Delete",
725
+ choices=get_document_choices(),
726
+ value=None,
727
+ interactive=True,
728
+ allow_custom_value=False
729
+ )
730
  delete_btn = gr.Button("πŸ—‘οΈ Delete Selected Document", variant="stop")
731
  delete_output_display = gr.Textbox(label="Delete Status", visible=True)
732
 
733
+ # Upload Documents Tab
734
  with gr.Tab("πŸ“„ Upload Documents"):
735
  gr.Markdown("""
736
  ### πŸ“₯ Add Documents to Library
 
749
  )
750
 
751
  upload_btn_process = gr.Button("πŸš€ Upload & Process", variant="primary", size="lg")
 
752
 
753
  with gr.Group():
754
  upload_output_display = gr.Textbox(
 
764
  visible=False
765
  )
766
 
767
+ # Search Documents Tab
768
  with gr.Tab("πŸ” Search Documents"):
769
  gr.Markdown("""
770
  ### πŸ”Ž Semantic Search
 
798
  placeholder="Search results will appear here...",
799
  show_copy_button=True
800
  )
 
801
 
802
+ # Content Studio Tab
803
  with gr.Tab("πŸ“ Content Studio"):
804
  gr.Markdown("""
805
  ### 🎨 Create & Analyze Content
 
808
 
809
  with gr.Row():
810
  with gr.Column(scale=2):
 
811
  with gr.Group():
812
  gr.Markdown("#### πŸ“„ Content Source")
813
  doc_dropdown_content = gr.Dropdown(
 
827
  info="For outlines, enter a topic. For other tasks, paste text to analyze."
828
  )
829
 
 
830
  with gr.Group():
831
  gr.Markdown("#### πŸ› οΈ Task Configuration")
832
  task_dropdown = gr.Dropdown(
 
841
  info="Choose the type of analysis to perform"
842
  )
843
 
 
844
  with gr.Accordion("βš™οΈ Advanced Options", open=False):
845
  summary_style_opt = gr.Dropdown(
846
  label="Summary Style",
 
906
  run_task_btn = gr.Button("πŸš€ Run Task", variant="primary", size="lg")
907
 
908
  with gr.Column(scale=3):
 
909
  with gr.Group():
910
  gr.Markdown("#### πŸ“Š Result")
911
  content_output_display = gr.Textbox(
 
916
  container=False
917
  )
918
 
919
+ # Event Handlers for Content Studio
920
  task_dropdown.change(
921
  fn=update_options_visibility,
922
  inputs=[task_dropdown],
 
938
  outputs=[content_output_display]
939
  )
940
 
941
+ # Generate Tags Tab
942
  with gr.Tab("🏷️ Generate Tags"):
943
  with gr.Row():
944
  with gr.Column():
945
  gr.Markdown("### Generate Document Tags")
946
+ doc_dropdown_tag_visible = gr.Dropdown(
947
+ label="Select Document to Tag",
948
+ choices=get_document_choices(),
949
+ value=None,
950
+ interactive=True,
951
+ allow_custom_value=False
952
+ )
953
+ tag_text_input = gr.Textbox(
954
+ label="Or Paste Text to Generate Tags",
955
+ placeholder="Paste any text here to generate tags...",
956
+ lines=8
957
+ )
958
+ max_tags_slider = gr.Slider(
959
+ label="Number of Tags",
960
+ minimum=3, maximum=15, value=5, step=1
961
+ )
962
  tag_btn_action = gr.Button("🏷️ Generate Tags", variant="primary", size="lg")
963
  with gr.Column():
964
+ tag_output_display = gr.Textbox(
965
+ label="Generated Tags",
966
+ lines=10,
967
+ placeholder="Tags will appear here..."
968
+ )
969
 
970
+ # Voice Assistant Tab
971
  with gr.Tab("πŸŽ™οΈ Voice Assistant"):
972
  gr.Markdown("""
973
  ### πŸ—£οΈ Talk to Your AI Librarian
 
980
 
981
  with gr.Row():
982
  with gr.Column(scale=2):
 
983
  with gr.Group():
984
  voice_status_display = gr.Textbox(
985
  label="Status",
 
992
  start_voice_btn = gr.Button("🎀 Start Conversation", variant="primary", size="lg")
993
  stop_voice_btn = gr.Button("⏹️ Stop", variant="stop", size="lg", interactive=False)
994
 
 
995
  with gr.Group():
996
  gr.Markdown("#### πŸ’¬ Send Message")
997
  voice_input_text = gr.Textbox(
 
1004
  send_voice_btn = gr.Button("πŸ“€ Send", variant="secondary")
1005
 
1006
  with gr.Column(scale=3):
 
1007
  with gr.Group():
1008
  voice_chatbot = gr.Chatbot(
1009
  label="Conversation",
 
1042
  outputs=[voice_chatbot]
1043
  )
1044
 
1045
+ # Podcast Studio Tab
1046
  with gr.Tab("🎧 Podcast Studio"):
1047
  gr.Markdown("""
1048
  ### πŸŽ™οΈ AI-Powered Podcast Generation
 
1055
 
1056
  with gr.Row():
1057
  with gr.Column(scale=2):
 
1058
  with gr.Group():
1059
  gr.Markdown("#### πŸ“š Select Content")
1060
 
 
1115
  )
1116
 
1117
  with gr.Column(scale=3):
 
1118
  with gr.Group():
1119
  gr.Markdown("#### 🎡 Generated Podcast")
1120
 
 
1149
  ]
1150
  )
1151
 
1152
+ # Ask Questions Tab
1153
  with gr.Tab("❓ Ask Questions"):
1154
  with gr.Row():
1155
  with gr.Column():
1156
  gr.Markdown("""### Ask Questions About Your Documents
1157
  The AI will search through all your uploaded documents to find relevant information
1158
  and provide comprehensive answers with sources.""")
1159
+ qa_question_input = gr.Textbox(
1160
+ label="Your Question",
1161
+ placeholder="Ask anything about your documents...",
1162
+ lines=3
1163
+ )
1164
  qa_btn_action = gr.Button("❓ Get Answer", variant="primary", size="lg")
1165
  with gr.Column():
1166
+ qa_output_display = gr.Textbox(
1167
+ label="AI Answer",
1168
+ lines=20,
1169
+ placeholder="Answer will appear here with sources..."
1170
+ )
1171
 
1172
+ # Wire up all dropdown updates
1173
  all_dropdowns_to_update = [delete_doc_dropdown_visible, doc_dropdown_content, doc_dropdown_tag_visible]
1174
 
1175
  refresh_outputs = [document_list_display] + [dd for dd in all_dropdowns_to_update]
 
1185
  tag_btn_action.click(generate_tags_for_document, inputs=[doc_dropdown_tag_visible, tag_text_input, max_tags_slider], outputs=[tag_output_display])
1186
  qa_btn_action.click(ask_question, inputs=[qa_question_input], outputs=[qa_output_display])
1187
 
 
1188
  # Load dashboard stats on interface load
1189
  interface.load(
1190
  fn=load_dashboard_stats,
 
1192
  )
1193
 
1194
  interface.load(fn=refresh_library, outputs=refresh_outputs)
1195
+
1196
+ return interface
1197
 
1198
  if __name__ == "__main__":
1199
  gradio_interface = create_gradio_interface()
1200
+ gradio_interface.launch(mcp_server=True)
data/podcasts/metadata_db.json ADDED
@@ -0,0 +1 @@
 
 
1
+ []