Spaces:

iamfaham
/

multimodal-sentiment-analysis

Running

App Files Files Community

Faham commited on Aug 27

Commit

b1acf7e

1 Parent(s): e2e4e08

UPDATE: codebase refactored to be more readble and optimized

Browse files

Files changed (19) hide show

.gitignore +2 -3
app.py +294 -973
pyproject.toml +2 -0
src/__init__.py +10 -0
src/config/__init__.py +5 -0
src/config/settings.py +153 -0
src/models/__init__.py +8 -0
src/models/audio_model.py +154 -0
src/models/fused_model.py +176 -0
src/models/text_model.py +81 -0
src/models/vision_model.py +157 -0
src/ui/__init__.py +7 -0
src/ui/styles.py +97 -0
src/utils/__init__.py +7 -0
src/utils/file_handling.py +189 -0
src/utils/preprocessing.py +467 -0
src/utils/sentiment_mapping.py +71 -0
simple_model_manager.py → src/utils/simple_model_manager.py +1 -1
uv.lock +302 -0

.gitignore CHANGED Viewed

@@ -1,6 +1,6 @@
 # Model files
 *.pth
-models/*.pth
 *.pt
 *.pkl
 *.h5
@@ -40,7 +40,6 @@ venv/
 env/
 ENV/
 .venv/
-.venv2/
 .env/
 # IDE
@@ -63,4 +62,4 @@ logs/
 .cache/
 .pytest_cache/
-models/

 # Model files
 *.pth
+model_weights/*.pth
 *.pt
 *.pkl
 *.h5
 env/
 ENV/
 .venv/
 .env/
 # IDE
 .cache/
 .pytest_cache/
+model_weights/

app.py CHANGED Viewed

@@ -1,781 +1,67 @@
 import streamlit as st
 import pandas as pd
 from PIL import Image
-import os
-import torch
-import torch.nn as nn
-from torchvision import transforms, models
-import torch.nn.functional as F
-import cv2
-# Import the Google Drive model manager
-from simple_model_manager import SimpleModelManager
 # Page configuration
 st.set_page_config(
-    page_title="Multimodal Sentiment Analysis",
-    page_icon="🧠",
-    layout="wide",
     initial_sidebar_state="expanded",
 )
-# Custom CSS for better styling
-st.markdown(
-    """
-<style>
-    .main-header {
-        font-size: 2.5rem;
-        font-weight: bold;
-        color: #1f77b4;
-        text-align: center;
-        margin-bottom: 2rem;
-    }
-    .model-card {
-        background-color: #f0f2f6;
-        padding: 1.5rem;
-        border-radius: 10px;
-        margin: 1rem 0;
-        border-left: 4px solid #1f77b4;
-    }
-    .result-box {
-        background-color: #e8f4fd;
-        padding: 1rem;
-        border-radius: 8px;
-        border: 1px solid #1f77b4;
-        margin: 1rem 0;
-    }
-    .upload-section {
-        background-color: #f8f9fa;
-        padding: 1.5rem;
-        border-radius: 10px;
-        border: 2px dashed #dee2e6;
-        text-align: center;
-        margin: 1rem 0;
-    }
-</style>
-""",
-    unsafe_allow_html=True,
-)
-# Initialize the Google Drive model manager
-@st.cache_resource
-def get_model_manager():
-    """Get the Google Drive model manager instance"""
-    try:
-        manager = SimpleModelManager()
-        return manager
-    except Exception as e:
-        st.error(f"Failed to initialize model manager: {e}")
-        return None
-# Global variables for models
-@st.cache_resource
-def load_vision_model():
-    """Load the pre-trained ResNet-50 vision sentiment model from Google Drive"""
-    try:
-        manager = get_model_manager()
-        if manager is None:
-            st.error("Model manager not available")
-            return None, None, None
-        # Load the model using the Google Drive manager
-        model, device, num_classes = manager.load_vision_model()
-        if model is None:
-            st.error("Failed to load vision model from Google Drive")
-            return None, None, None
-        st.success(f"Vision model loaded successfully with {num_classes} classes!")
-        return model, device, num_classes
-    except Exception as e:
-        st.error(f"Error loading vision model: {str(e)}")
-        return None, None, None
-@st.cache_data
-def get_vision_transforms():
-    """Get the image transforms used during FER2013 training"""
-    return transforms.Compose(
-        [
-            transforms.Resize(224),  # Match training: transforms.Resize(224)
-            transforms.CenterCrop(224),  # Match training: transforms.CenterCrop(224)
-            transforms.ToTensor(),
-            transforms.Normalize(
-                mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
-            ),  # ImageNet normalization
-        ]
-    )
-def detect_and_preprocess_face(image, crop_tightness=0.05):
-    """
-    Detect face in image, crop to face region, convert to grayscale, and resize to 224x224
-    to match FER2013 dataset format (grayscale converted to 3-channel RGB)
-    Args:
-        image: Input image (PIL Image or numpy array)
-        crop_tightness: Padding around face (0.0 = no padding, 0.3 = 30% padding)
-    """
-    try:
-        import cv2
-        import numpy as np
-        # Convert PIL image to OpenCV format
-        if isinstance(image, Image.Image):
-            # Convert PIL to numpy array
-            img_array = np.array(image)
-            # Convert RGB to BGR for OpenCV
-            if len(img_array.shape) == 3:
-                img_array = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR)
-        else:
-            img_array = image
-        # Load face detection cascade
-        face_cascade = cv2.CascadeClassifier(
-            cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
-        )
-        # Convert to grayscale for face detection (detection works better on grayscale)
-        gray = cv2.cvtColor(img_array, cv2.COLOR_BGR2GRAY)
-        # Detect faces
-        faces = face_cascade.detectMultiScale(
-            gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30)
-        )
-        if len(faces) == 0:
-            st.warning("No face detected in the image. Using center crop instead.")
-            # Fallback: center crop and resize
-            if isinstance(image, Image.Image):
-                # Convert to RGB first
-                rgb_pil = image.convert("RGB")
-                # Center crop to square
-                width, height = rgb_pil.size
-                size = min(width, height)
-                left = (width - size) // 2
-                top = (height - size) // 2
-                right = left + size
-                bottom = top + size
-                cropped = rgb_pil.crop((left, top, right, bottom))
-                # Resize to 224x224 (matching FER2013 training: transforms.Resize(224))
-                resized = cropped.resize((224, 224), Image.Resampling.LANCZOS)
-                # Convert to grayscale and then to 3-channel RGB
-                gray_pil = resized.convert("L")
-                # Convert back to RGB (this replicates grayscale values to all 3 channels)
-                gray_rgb_pil = gray_pil.convert("RGB")
-                return gray_rgb_pil
-            else:
-                return None
-        # Get the largest face (assuming it's the main subject)
-        x, y, w, h = max(faces, key=lambda rect: rect[2] * rect[3])
-        # Add padding around the face based on user preference
-        padding_x = int(w * crop_tightness)
-        padding_y = int(h * crop_tightness)
-        # Ensure we don't go out of bounds
-        x1 = max(0, x - padding_x)
-        y1 = max(0, y - padding_y)
-        x2 = min(img_array.shape[1], x + w + padding_x)
-        y2 = min(img_array.shape[0], y + h + padding_y)
-        # Crop to face region
-        face_crop = img_array[y1:y2, x1:x2]
-        # Convert BGR to RGB first
-        face_crop_rgb = cv2.cvtColor(face_crop, cv2.COLOR_BGR2RGB)
-        # Convert to grayscale
-        face_gray = cv2.cvtColor(face_crop_rgb, cv2.COLOR_RGB2GRAY)
-        # Resize to 224x224 (matching FER2013 training: transforms.Resize(224))
-        face_resized = cv2.resize(face_gray, (224, 224), interpolation=cv2.INTER_AREA)
-        # Convert grayscale to 3-channel RGB (replicate grayscale values)
-        face_rgb_3channel = cv2.cvtColor(face_resized, cv2.COLOR_GRAY2RGB)
-        # Convert back to PIL Image
-        face_pil = Image.fromarray(face_rgb_3channel)
-        return face_pil
-    except ImportError:
-        st.error(
-            "OpenCV not installed. Please install it with: pip install opencv-python"
-        )
-        st.info("Falling back to basic preprocessing...")
-        # Fallback: basic grayscale conversion and resize
-        if isinstance(image, Image.Image):
-            rgb_pil = image.convert("RGB")
-            resized = rgb_pil.resize((48, 48), Image.Resampling.LANCZOS)
-            # Convert to grayscale and then to 3-channel RGB
-            gray_pil = resized.convert("L")
-            gray_rgb_pil = gray_pil.convert("RGB")
-            return gray_rgb_pil
-        return None
-    except Exception as e:
-        st.error(f"Error in face detection: {str(e)}")
-        st.info("Falling back to basic preprocessing...")
-        # Fallback: basic grayscale conversion and resize
-        if isinstance(image, Image.Image):
-            rgb_pil = image.convert("RGB")
-            resized = rgb_pil.resize((48, 48), Image.Resampling.LANCZOS)
-            # Convert to grayscale and then to 3-channel RGB
-            gray_pil = resized.convert("L")
-            gray_rgb_pil = gray_pil.convert("RGB")
-            return gray_rgb_pil
-        return None
-def get_sentiment_mapping(num_classes):
-    """Get the sentiment mapping based on number of classes"""
-    if num_classes == 3:
-        return {0: "Negative", 1: "Neutral", 2: "Positive"}
-    elif num_classes == 4:
-        # Common 4-class emotion mapping
-        return {0: "Angry", 1: "Sad", 2: "Happy", 3: "Neutral"}
-    elif num_classes == 7:
-        # FER2013 7-class emotion mapping
-        return {
-            0: "Angry",
-            1: "Disgust",
-            2: "Fear",
-            3: "Happy",
-            4: "Sad",
-            5: "Surprise",
-            6: "Neutral",
-        }
-    else:
-        # Generic mapping for unknown number of classes
-        return {i: f"Class_{i}" for i in range(num_classes)}
-# Placeholder functions for model predictions
-def predict_text_sentiment(text):
-    """
-    Analyze text sentiment using TextBlob
-    """
-    if not text or text.strip() == "":
-        return "No text provided", 0.0
-    try:
-        from textblob import TextBlob
-        # Create TextBlob object
-        blob = TextBlob(text)
-        # Get polarity (-1 to 1, where -1 is very negative, 1 is very positive)
-        polarity = blob.sentiment.polarity
-        # Get subjectivity (0 to 1, where 0 is very objective, 1 is very subjective)
-        subjectivity = blob.sentiment.subjectivity
-        # Convert polarity to sentiment categories
-        if polarity > 0.1:
-            sentiment = "Positive"
-            confidence = min(0.95, 0.6 + abs(polarity) * 0.3)
-        elif polarity < -0.1:
-            sentiment = "Negative"
-            confidence = min(0.95, 0.6 + abs(polarity) * 0.3)
-        else:
-            sentiment = "Neutral"
-            confidence = 0.7 - abs(polarity) * 0.2
-        # Round confidence to 2 decimal places
-        confidence = round(confidence, 2)
-        return sentiment, confidence
-    except ImportError:
-        st.error("TextBlob not installed. Please install it with: pip install textblob")
-        return "TextBlob not available", 0.0
-    except Exception as e:
-        st.error(f"Error in text sentiment analysis: {str(e)}")
-        return "Error occurred", 0.0
-@st.cache_resource
-def load_audio_model():
-    """Load the pre-trained Wav2Vec2 audio sentiment model from Google Drive"""
-    try:
-        manager = get_model_manager()
-        if manager is None:
-            st.error("Model manager not available")
-            return None, None, None, None
-        # Load the model using the Google Drive manager
-        model, device = manager.load_audio_model()
-        if model is None:
-            st.error("Failed to load audio model from Google Drive")
-            return None, None, None, None
-        # For Wav2Vec2 models, we need to determine the number of classes
-        # This is typically available in the model configuration
-        try:
-            num_classes = model.config.num_labels
-        except:
-            # Fallback: try to infer from the model
-            try:
-                num_classes = model.classifier.out_features
-            except:
-                num_classes = 3  # Default assumption
-        # Load feature extractor
-        from transformers import AutoFeatureExtractor
-        feature_extractor = AutoFeatureExtractor.from_pretrained(
-            "facebook/wav2vec2-base"
-        )
-        st.success(f"Audio model loaded successfully with {num_classes} classes!")
-        return model, device, num_classes, feature_extractor
-    except Exception as e:
-        st.error(f"Error loading audio model: {str(e)}")
-        return None, None, None, None
-def predict_audio_sentiment(audio_bytes):
-    """
-    Analyze audio sentiment using fine-tuned Wav2Vec2 model
-    Preprocessing matches CREMA-D + RAVDESS training specifications:
-    - Target sampling rate: 16kHz
-    - Max duration: 5.0 seconds
-    - Feature extraction: AutoFeatureExtractor with max_length, truncation, padding
-    """
-    if audio_bytes is None:
-        return "No audio provided", 0.0
-    try:
-        # Load model if not already loaded
-        model, device, num_classes, feature_extractor = load_audio_model()
-        if model is None:
-            return "Model not loaded", 0.0
-        # Load and preprocess audio
-        import librosa
-        import tempfile
-        # Save audio bytes to temporary file
-        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
-            tmp_file.write(audio_bytes)
-            tmp_file_path = tmp_file.name
-        try:
-            # Load audio with librosa
-            audio, sr = librosa.load(tmp_file_path, sr=None)
-            # Resample to 16kHz if needed
-            if sr != 16000:
-                audio = librosa.resample(y=audio, orig_sr=sr, target_sr=16000)
-            # Preprocess with feature extractor (matching CREMA-D + RAVDESS training exactly)
-            # From training: max_length=int(max_duration_s * TARGET_SAMPLING_RATE) = 5.0 * 16000
-            inputs = feature_extractor(
-                audio,
-                sampling_rate=16000,
-                max_length=int(5.0 * 16000),  # 5 seconds max (matching training)
-                truncation=True,
-                padding="max_length",
-                return_tensors="pt",
-            )
-            # Move to device
-            input_values = inputs.input_values.to(device)
-            # Run inference
-            with torch.no_grad():
-                outputs = model(input_values)
-                probabilities = torch.softmax(outputs.logits, dim=1)
-                confidence, predicted = torch.max(probabilities, 1)
-                # Get sentiment mapping based on number of classes
-                if num_classes == 3:
-                    sentiment_map = {0: "Negative", 1: "Neutral", 2: "Positive"}
-                else:
-                    # Generic mapping for unknown number of classes
-                    sentiment_map = {i: f"Class_{i}" for i in range(num_classes)}
-                sentiment = sentiment_map[predicted.item()]
-                confidence_score = confidence.item()
-            return sentiment, confidence_score
-        finally:
-            # Clean up temporary file
-            os.unlink(tmp_file_path)
-    except ImportError as e:
-        st.error(f"Required library not installed: {str(e)}")
-        st.info("Please install: pip install librosa transformers")
-        return "Library not available", 0.0
-    except Exception as e:
-        st.error(f"Error in audio sentiment prediction: {str(e)}")
-        return "Error occurred", 0.0
-def predict_vision_sentiment(image, crop_tightness=0.05):
-    """
-    Load ResNet-50 and run inference for vision sentiment analysis
-    Args:
-        image: Input image (PIL Image or numpy array)
-        crop_tightness: Padding around face (0.0 = no padding, 0.3 = 30% padding)
-    """
-    if image is None:
-        return "No image provided", 0.0
-    try:
-        # Load model if not already loaded
-        model, device, num_classes = load_vision_model()
-        if model is None:
-            return "Model not loaded", 0.0
-        # Preprocess image to match FER2013 format
-        st.info(
-            "Detecting face and preprocessing image to match training data format..."
-        )
-        preprocessed_image = detect_and_preprocess_face(image, crop_tightness=0.0)
-        if preprocessed_image is None:
-            return "Image preprocessing failed", 0.0
-        # Show preprocessed image
-        st.image(
-            preprocessed_image,
-            caption="Preprocessed Image (48x48 Grayscale → 3-channel RGB)",
-            width=200,
-        )
-        # Get transforms
-        transform = get_vision_transforms()
-        # Convert preprocessed image to tensor
-        image_tensor = transform(preprocessed_image).unsqueeze(0).to(device)
-        # Run inference
-        with torch.no_grad():
-            outputs = model(image_tensor)
-            # Debug: print output shape
-            st.info(f"Model output shape: {outputs.shape}")
-            probabilities = F.softmax(outputs, dim=1)
-            confidence, predicted = torch.max(probabilities, 1)
-            # Get sentiment mapping based on number of classes
-            sentiment_map = get_sentiment_mapping(num_classes)
-            sentiment = sentiment_map[predicted.item()]
-            confidence_score = confidence.item()
-        return sentiment, confidence_score
-    except Exception as e:
-        st.error(f"Error in vision sentiment prediction: {str(e)}")
-        st.error(
-            f"Model output shape mismatch. Expected {num_classes} classes but got different."
-        )
-        return "Error occurred", 0.0
-def predict_fused_sentiment(text=None, audio_bytes=None, image=None):
-    """
-    TODO: Implement ensemble/fusion logic combining all three models
-    This is a placeholder function for fused sentiment analysis
-    """
-    # Placeholder logic - replace with actual fusion implementation
-    results = []
-    if text:
-        text_sentiment, text_conf = predict_text_sentiment(text)
-        results.append((text_sentiment, text_conf))
-    if audio_bytes:
-        audio_sentiment, audio_conf = predict_audio_sentiment(audio_bytes)
-        results.append((audio_sentiment, audio_conf))
-    if image:
-        vision_sentiment, vision_conf = predict_vision_sentiment(image)
-        results.append((vision_sentiment, vision_conf))
-    if not results:
-        return "No inputs provided", 0.0
-    # Simple ensemble logic (replace with your fusion strategy)
-    sentiment_counts = {}
-    total_confidence = 0
-    for sentiment, confidence in results:
-        sentiment_counts[sentiment] = sentiment_counts.get(sentiment, 0) + 1
-        total_confidence += confidence
-    # Majority voting with confidence averaging
-    final_sentiment = max(sentiment_counts, key=sentiment_counts.get)
-    avg_confidence = total_confidence / len(results)
-    return final_sentiment, avg_confidence
-def extract_frames_from_video(video_file, max_frames=10):
-    """
-    Extract frames from video file for vision sentiment analysis
-    Args:
-        video_file: StreamlitUploadedFile or bytes
-        max_frames: Maximum number of frames to extract
-    Returns:
-        List of PIL Image objects
-    """
-    try:
-        import cv2
-        import numpy as np
-        import tempfile
-        # Save video bytes to temporary file
-        with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp_file:
-            if hasattr(video_file, "getvalue"):
-                tmp_file.write(video_file.getvalue())
-            else:
-                tmp_file.write(video_file)
-            tmp_file_path = tmp_file.name
-        try:
-            # Open video with OpenCV
-            cap = cv2.VideoCapture(tmp_file_path)
-            if not cap.isOpened():
-                st.error("Could not open video file")
-                return []
-            frames = []
-            total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-            fps = cap.get(cv2.CAP_PROP_FPS)
-            duration = total_frames / fps if fps > 0 else 0
-            st.info(
-                f"📹 Video: {total_frames} frames, {fps:.1f} FPS, {duration:.1f}s duration"
-            )
-            # Extract frames at strategic intervals
-            if total_frames > 0:
-                # Select frames: start, 25%, 50%, 75%, end
-                frame_indices = [
-                    0,
-                    int(total_frames * 0.25),
-                    int(total_frames * 0.5),
-                    int(total_frames * 0.75),
-                    total_frames - 1,
-                ]
-                frame_indices = list(set(frame_indices))  # Remove duplicates
-                frame_indices.sort()
-                for frame_idx in frame_indices:
-                    cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
-                    ret, frame = cap.read()
-                    if ret:
-                        # Convert BGR to RGB
-                        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-                        # Convert to PIL Image
-                        pil_image = Image.fromarray(frame_rgb)
-                        frames.append(pil_image)
-            cap.release()
-            return frames
-        finally:
-            # Clean up temporary file
-            os.unlink(tmp_file_path)
-    except ImportError:
-        st.error(
-            "OpenCV not installed. Please install it with: pip install opencv-python"
-        )
-        return []
-    except Exception as e:
-        st.error(f"Error extracting frames: {str(e)}")
-        return []
-def extract_audio_from_video(video_file):
-    """
-    Extract audio from video file for audio sentiment analysis
-    Args:
-        video_file: StreamlitUploadedFile or bytes
-    Returns:
-        Audio bytes in WAV format
-    """
-    try:
-        import tempfile
-        try:
-            from moviepy import VideoFileClip
-        except ImportError as e:
-            st.error(f"MoviePy import failed: {e}")
-            st.error(
-                "This usually means the Docker build failed to install moviepy properly"
-            )
-            return None
-        # Save video bytes to temporary file
-        with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp_file:
-            if hasattr(video_file, "getvalue"):
-                tmp_file.write(video_file.getvalue())
-            else:
-                tmp_file.write(video_file)
-            tmp_file_path = tmp_file.name
-        try:
-            # Extract audio using moviepy
-            video = VideoFileClip(tmp_file_path)
-            audio = video.audio
-            if audio is None:
-                st.warning("No audio track found in video")
-                return None
-            # Save audio to temporary WAV file
-            with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as audio_file:
-                audio_path = audio_file.name
-            # Export audio as WAV
-            audio.write_audiofile(audio_path, logger=None)
-            # Read the audio file and return bytes
-            with open(audio_path, "rb") as f:
-                audio_bytes = f.read()
-            # Clean up temporary audio file
-            try:
-                os.unlink(audio_path)
-            except (OSError, PermissionError):
-                # File might be in use, skip cleanup
-                pass
-            return audio_bytes
-        finally:
-            # Clean up temporary video file
-            try:
-                # Close video and audio objects first
-                if "video" in locals():
-                    video.close()
-                if "audio" in locals() and audio:
-                    audio.close()
-                # Wait a bit before trying to delete
-                import time
-                time.sleep(0.1)
-                os.unlink(tmp_file_path)
-            except (OSError, PermissionError):
-                # File might be in use, skip cleanup
-                pass
-    except ImportError:
-        st.error("MoviePy not installed. Please install it with: pip install moviepy")
-        return None
-    except Exception as e:
-        st.error(f"Error extracting audio: {str(e)}")
-        return None
-def transcribe_audio(audio_bytes):
-    """
-    Transcribe audio to text for text sentiment analysis
-    Args:
-        audio_bytes: Audio bytes in WAV format
-    Returns:
-        Transcribed text string
-    """
-    if audio_bytes is None:
-        return ""
-    try:
-        import tempfile
-        import speech_recognition as sr
-        # Save audio bytes to temporary file
-        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
-            tmp_file.write(audio_bytes)
-            tmp_file_path = tmp_file.name
-        try:
-            # Initialize recognizer
-            recognizer = sr.Recognizer()
-            # Load audio file
-            with sr.AudioFile(tmp_file_path) as source:
-                # Read audio data
-                audio_data = recognizer.record(source)
-                # Transcribe using Google Speech Recognition
-                try:
-                    text = recognizer.recognize_google(audio_data)
-                    return text
-                except sr.UnknownValueError:
-                    st.warning("Speech could not be understood")
-                    return ""
-                except sr.RequestError as e:
-                    st.error(
-                        f"Could not request results from speech recognition service: {e}"
-                    )
-                    return ""
-        finally:
-            # Clean up temporary file
-            os.unlink(tmp_file_path)
-    except ImportError:
-        st.error(
-            "SpeechRecognition not installed. Please install it with: pip install SpeechRecognition"
-        )
-        return ""
-    except Exception as e:
-        st.error(f"Error transcribing audio: {str(e)}")
-        return ""
-# Sidebar navigation
-st.sidebar.title("Sentiment Analysis")
-st.sidebar.markdown("---")
-# Navigation
-page = st.sidebar.selectbox(
-    "Choose a page:",
-    [
-        "Home",
-        "Text Sentiment",
-        "Audio Sentiment",
-        "Vision Sentiment",
-        "Fused Model",
-        "Max Fusion",
-    ],
-)
-# Home Page
-if page == "Home":
     st.markdown(
-        '<h1 class="main-header">Multimodal Sentiment Analysis</h1>',
         unsafe_allow_html=True,
     )
     st.markdown(
         """
-    <div class="model-card">
-        <h2>Welcome to your Multi-Modal Sentiment Analysis Testing Platform!</h2>
-        <p>This application provides a comprehensive testing environment for your three independent sentiment analysis models:</p>
-    </div>
-    """,
         unsafe_allow_html=True,
     )
@@ -784,105 +70,106 @@ if page == "Home":
     with col1:
         st.markdown(
             """
-        <div class="model-card">
-            <h3>Text Sentiment Model</h3>
-            <p>READY TO USE - Analyze sentiment from text input using TextBlob</p>
-                         <ul>
-                 <li>Process any text input</li>
-                 <li>Get sentiment classification (Positive/Negative/Neutral)</li>
-                 <li>View confidence scores</li>
-                 <li>Real-time NLP analysis</li>
-             </ul>
-        </div>
-        """,
             unsafe_allow_html=True,
         )
     with col2:
         st.markdown(
             """
-        <div class="model-card">
-            <h3>Audio Sentiment Model</h3>
-            <p>READY TO USE - Analyze sentiment from audio files using fine-tuned Wav2Vec2</p>
-                         <ul>
-                 <li>Upload audio files (.wav, .mp3, .m4a, .flac)</li>
-                 <li>Record audio directly with microphone (max 5s)</li>
-                 <li>Automatic preprocessing: 16kHz sampling, 5s max duration (CREMA-D + RAVDESS format)</li>
-                 <li>Listen to uploaded/recorded audio</li>
-                 <li>Get sentiment predictions</li>
-                 <li>Real-time audio analysis</li>
-             </ul>
-        </div>
-        """,
             unsafe_allow_html=True,
         )
     with col3:
         st.markdown(
             """
-        <div class="model-card">
-            <h3>Vision Sentiment Model</h3>
-            <p>Analyze sentiment from images using fine-tuned ResNet-50</p>
-                         <ul>
-                 <li>Upload image files (.png, .jpg, .jpeg, .bmp, .tiff)</li>
-                 <li>Automatic face detection & preprocessing</li>
-                 <li>Fixed 0% padding for tightest face crop</li>
-                 <li>Convert to 224x224 grayscale → 3-channel RGB (FER2013 format)</li>
-                 <li>Transforms: Resize(224) → CenterCrop(224) → ImageNet Normalization</li>
-                 <li>Preview original & preprocessed images</li>
-                 <li>Get sentiment predictions</li>
-             </ul>
-        </div>
-        """,
             unsafe_allow_html=True,
         )
     st.markdown(
         """
-    <div class="model-card">
-        <h3>Fused Model</h3>
-        <p>Combine predictions from all three models for enhanced accuracy</p>
-        <ul>
-            <li>Multi-modal input processing</li>
-            <li>Ensemble prediction strategies</li>
-            <li>Comprehensive sentiment analysis</li>
-        </ul>
-    </div>
-    """,
         unsafe_allow_html=True,
     )
     st.markdown(
         """
-    <div class="model-card">
-        <h3>🎬 Max Fusion</h3>
-        <p>Ultimate video-based sentiment analysis combining all three modalities</p>
-        <ul>
-            <li>🎥 Record or upload 5-second videos</li>
-            <li>🔍 Extract frames for vision analysis</li>
-            <li>🎵 Extract audio for vocal sentiment</li>
-            <li>📝 Transcribe audio for text analysis</li>
-            <li>🚀 Comprehensive multi-modal results</li>
-        </ul>
-    </div>
-    """,
         unsafe_allow_html=True,
     )
     st.markdown("---")
     st.markdown(
         """
-    <div style="text-align: center; color: #666;">
-        <p><strong>Note:</strong> This application now has <strong>ALL THREE MODELS</strong> fully integrated and ready to use!</p>
-        <p><strong>TextBlob</strong> (Text) + <strong>Wav2Vec2</strong> (Audio) + <strong>ResNet-50</strong> (Vision)</p>
-        <p><strong>Models are now loaded from Google Drive automatically!</strong></p>
-    </div>
-    """,
         unsafe_allow_html=True,
     )
-# Text Sentiment Page
-elif page == "Text Sentiment":
     st.title("Text Sentiment Analysis")
     st.markdown("Analyze the sentiment of your text using our TextBlob-based model.")
@@ -910,28 +197,26 @@ elif page == "Text Sentiment":
                     st.metric("Confidence", f"{confidence:.2f}")
                 # Color-coded sentiment display
-                sentiment_colors = {
-                    "Positive": "🟢",
-                    "Negative": "🔴",
-                    "Neutral": "🟡",
-                }
                 st.markdown(
                     f"""
-                <div class="result-box">
-                    <h4>{sentiment_colors.get(sentiment, "❓")} Sentiment: {sentiment}</h4>
-                    <p><strong>Confidence:</strong> {confidence:.2f}</p>
-                    <p><strong>Input Text:</strong> "{text_input[:100]}{'...' if len(text_input) > 100 else ''}"</p>
-                    <p><strong>Model:</strong> TextBlob (Natural Language Processing)</p>
-                </div>
-                """,
                     unsafe_allow_html=True,
                 )
         else:
             st.error("Please enter some text to analyze.")
-# Audio Sentiment Page
-elif page == "Audio Sentiment":
     st.title("Audio Sentiment Analysis")
     st.markdown(
         "Analyze the sentiment of your audio files using our fine-tuned Wav2Vec2 model."
@@ -969,7 +254,7 @@ elif page == "Audio Sentiment":
         # File uploader
         uploaded_audio = st.file_uploader(
             "Choose an audio file",
-            type=["wav", "mp3", "m4a", "flac"],
             help="Supported formats: WAV, MP3, M4A, FLAC",
         )
@@ -979,12 +264,12 @@ elif page == "Audio Sentiment":
     else:  # Audio recording
         st.markdown(
             """
-        <div class="model-card">
-            <h3>Audio Recording</h3>
-            <p>Record audio directly with your microphone (max 5 seconds).</p>
-            <p><strong>Note:</strong> Make sure your microphone is accessible and you have permission to use it.</p>
-        </div>
-        """,
             unsafe_allow_html=True,
         )
@@ -1018,8 +303,10 @@ elif page == "Audio Sentiment":
                 uploaded_audio, format=f'audio/{uploaded_audio.name.split(".")[-1]}'
             )
             # File info for uploaded files
-            file_size = len(uploaded_audio.getvalue()) / 1024  # KB
-            st.info(f"File: {uploaded_audio.name} | Size: {file_size:.1f} KB")
         # Analyze button
         if st.button(
@@ -1042,17 +329,18 @@ elif page == "Audio Sentiment":
                     st.metric("Confidence", f"{confidence:.2f}")
                 # Color-coded sentiment display
-                sentiment_colors = {"Positive": "🟢", "Negative": "🔴", "Neutral": "🟡"}
                 st.markdown(
                     f"""
-                <div class="result-box">
-                    <h4>{sentiment_colors.get(sentiment, "❓")} Sentiment: {sentiment}</h4>
-                    <p><strong>Confidence:</strong> {confidence:.2f}</p>
-                    <p><strong>Audio Source:</strong> {audio_name}</p>
-                    <p><strong>Model:</strong> Wav2Vec2 (Fine-tuned on RAVDESS + CREMA-D)</p>
-                </div>
-                """,
                     unsafe_allow_html=True,
                 )
     else:
@@ -1061,8 +349,9 @@ elif page == "Audio Sentiment":
         else:
             st.info("Click the microphone button above to record audio for analysis.")
-# Vision Sentiment Page
-elif page == "Vision Sentiment":
     st.title("Vision Sentiment Analysis")
     st.markdown(
         "Analyze the sentiment of your images using our fine-tuned ResNet-50 model."
@@ -1101,7 +390,7 @@ elif page == "Vision Sentiment":
         # File uploader
         uploaded_image = st.file_uploader(
             "Choose an image file",
-            type=["png", "jpg", "jpeg", "bmp", "tiff"],
             help="Supported formats: PNG, JPG, JPEG, BMP, TIFF",
         )
@@ -1115,9 +404,9 @@ elif page == "Vision Sentiment":
             )
             # File info
-            file_size = len(uploaded_image.getvalue()) / 1024  # KB
             st.info(
-                f"File: {uploaded_image.name} | Size: {file_size:.1f} KB | Dimensions: {image.size[0]}x{image.size[1]}"
             )
             # Analyze button
@@ -1140,33 +429,30 @@ elif page == "Vision Sentiment":
                             st.metric("Confidence", f"{confidence:.2f}")
                         # Color-coded sentiment display
-                        sentiment_colors = {
-                            "Positive": "🟢",
-                            "Negative": "🔴",
-                            "Neutral": "🟡",
-                        }
                         st.markdown(
                             f"""
-                        <div class="result-box">
-                            <h4>{sentiment_colors.get(sentiment, "❓")} Sentiment: {sentiment}</h4>
-                            <p><strong>Confidence:</strong> {confidence:.2f}</p>
-                            <p><strong>Image File:</strong> {uploaded_image.name}</p>
-                            <p><strong>Model:</strong> ResNet-50 (Fine-tuned on FER2013)</p>
-                        </div>
-                        """,
                             unsafe_allow_html=True,
                         )
     else:  # Camera capture
         st.markdown(
             """
-        <div class="model-card">
-            <h3>Camera Capture</h3>
-            <p>Take a photo directly with your camera to analyze its sentiment.</p>
-            <p><strong>Note:</strong> Make sure your camera is accessible and you have permission to use it.</p>
-        </div>
-        """,
             unsafe_allow_html=True,
         )
@@ -1210,21 +496,18 @@ elif page == "Vision Sentiment":
                             st.metric("Confidence", f"{confidence:.2f}")
                         # Color-coded sentiment display
-                        sentiment_colors = {
-                            "Positive": "🟢",
-                            "Negative": "🔴",
-                            "Neutral": "🟡",
-                        }
                         st.markdown(
                             f"""
-                        <div class="result-box">
-                            <h4>{sentiment_colors.get(sentiment, "❓")} Sentiment: {sentiment}</h4>
-                            <p><strong>Confidence:</strong> {confidence:.2f}</p>
-                            <p><strong>Image Source:</strong> Camera Capture</p>
-                            <p><strong>Model:</strong> ResNet-50 (Fine-tuned on FER2013)</p>
-                        </div>
-                        """,
                             unsafe_allow_html=True,
                         )
@@ -1234,8 +517,9 @@ elif page == "Vision Sentiment":
     elif input_method == "Take Photo with Camera" and "camera_photo" not in locals():
         st.info("Click the camera button above to take a photo for analysis.")
-# Fused Model Page
-elif page == "Fused Model":
     st.title("Fused Model Analysis")
     st.markdown(
         "Combine predictions from all three models for enhanced sentiment analysis."
@@ -1243,12 +527,12 @@ elif page == "Fused Model":
     st.markdown(
         """
-    <div class="model-card">
-        <h3>Multi-Modal Sentiment Analysis</h3>
-        <p>This page allows you to input text, audio, and/or image data to get a comprehensive sentiment analysis
-        using all three models combined.</p>
-    </div>
-    """,
         unsafe_allow_html=True,
     )
@@ -1282,7 +566,7 @@ elif page == "Fused Model":
         if audio_input_method == "Upload File":
             uploaded_audio = st.file_uploader(
                 "Upload audio file (optional):",
-                type=["wav", "mp3", "m4a", "flac"],
                 key="fused_audio",
             )
             audio_source = "uploaded_file"
@@ -1325,7 +609,7 @@ elif page == "Fused Model":
         if image_input_method == "Upload File":
             uploaded_image = st.file_uploader(
                 "Upload image file (optional):",
-                type=["png", "jpg", "jpeg", "bmp", "tiff"],
                 key="fused_image",
             )
@@ -1421,16 +705,17 @@ elif page == "Fused Model":
                     st.dataframe(df, use_container_width=True)
                 # Final result display
-                sentiment_colors = {"Positive": "🟢", "Negative": "🔴", "Neutral": "🟡"}
                 st.markdown(
                     f"""
-                <div class="result-box">
-                    <h4>{sentiment_colors.get(sentiment, "❓")} Final Fused Sentiment: {sentiment}</h4>
-                    <p><strong>Overall Confidence:</strong> {confidence:.2f}</p>
-                    <p><strong>Models Used:</strong> {len(results_data)}</p>
-                </div>
-                """,
                     unsafe_allow_html=True,
                 )
         else:
@@ -1438,21 +723,22 @@ elif page == "Fused Model":
                 "Please provide at least one input (text, audio, or image) for fused analysis."
             )
-# Max Fusion Page
-elif page == "Max Fusion":
     st.title("Max Fusion - Multi-Modal Sentiment Analysis")
     st.markdown(
         """
-    <div class="model-card">
-        <h3>Ultimate Multi-Modal Sentiment Analysis</h3>
-        <p>Take photos with camera or upload videos to get comprehensive sentiment analysis from multiple modalities:</p>
-        <ul>
-            <li>📸 <strong>Vision Analysis:</strong> Camera photos or video frames for facial expression analysis</li>
-            <li>🎵 <strong>Audio Analysis:</strong> Audio files or extracted audio from videos for vocal sentiment</li>
-            <li>📝 <strong>Text Analysis:</strong> Transcribed audio for text sentiment analysis</li>
-        </ul>
-    </div>
-    """,
         unsafe_allow_html=True,
     )
@@ -1476,25 +762,25 @@ elif page == "Max Fusion":
         with col2:
             st.markdown(
                 """
-            <div style="text-align: center; padding: 20px; background: linear-gradient(90deg, #667eea 0%, #764ba2 100%); border-radius: 10px; color: white;">
-                <h3>🚧 Coming Soon 🚧</h3>
-                <p>Video recording feature is under development</p>
-                <p>Use Upload Video File for now!</p>
-            </div>
-            """,
                 unsafe_allow_html=True,
             )
         # Placeholder for future recording functionality
         st.markdown(
             """
-        **Future Features:**
-        - Real-time video recording with camera
-        - Audio capture during recording
-        - Automatic frame extraction
-        - Live transcription
-        - WebRTC integration for low-latency streaming
-        """
         )
         # Skip all the recording logic for now
@@ -1507,19 +793,19 @@ elif page == "Max Fusion":
         # File upload option
         st.markdown(
             """
-        <div class="upload-section">
-            <h4>📁 Upload Video File</h4>
-            <p>Upload a video file for comprehensive multimodal analysis.</p>
-            <p><strong>Supported Formats:</strong> MP4, AVI, MOV, MKV, WMV, FLV</p>
-            <p><strong>Recommended:</strong> Videos with clear audio and visual content</p>
-        </div>
-        """,
             unsafe_allow_html=True,
         )
         uploaded_video = st.file_uploader(
             "Choose a video file",
-            type=["mp4", "avi", "mov", "mkv", "wmv", "flv"],
             help="Supported formats: MP4, AVI, MOV, MKV, WMV, FLV",
         )
@@ -1527,8 +813,6 @@ elif page == "Max Fusion":
         video_name = uploaded_video.name if uploaded_video else None
         video_file = uploaded_video
-        # Video recording using streamlit-webrtc component - COMING SOON
     if video_file is not None:
         # Display video or photo
         if video_source == "camera_photo":
@@ -1543,7 +827,7 @@ elif page == "Max Fusion":
             uploaded_audio = st.file_uploader(
                 "Upload audio file for audio analysis:",
-                type=["wav", "mp3", "m4a", "flac"],
                 key="camera_audio",
                 help="Upload an audio file to complement the photo analysis",
             )
@@ -1561,11 +845,10 @@ elif page == "Max Fusion":
         else:
             # For uploaded videos
             st.video(video_file)
-            if hasattr(video_file, "getvalue"):
-                file_size = len(video_file.getvalue()) / 1024  # KB
-            else:
-                file_size = len(video_file) / 1024  # KB
-            st.info(f"File: {video_name} | Size: {file_size:.1f} KB")
             audio_bytes = None  # Will be extracted from video
         # Video Processing Pipeline
@@ -1709,20 +992,17 @@ elif page == "Max Fusion":
                         st.metric("📊 Overall Confidence", f"{confidence:.2f}")
                     # Color-coded sentiment display
-                    sentiment_colors = {
-                        "Positive": "🟢",
-                        "Negative": "🔴",
-                        "Neutral": "🟡",
-                    }
                     st.markdown(
                         f"""
-                    <div class="result-box">
-                        <h4>{sentiment_colors.get(sentiment, "❓")} Max Fusion Sentiment: {sentiment}</h4>
-                        <p><strong>Overall Confidence:</strong> {confidence:.2f}</p>
-                        <p><strong>Modalities Analyzed:</strong> {len(results_data)}</p>
-                        <p><strong>Video Source:</strong> {video_name}</p>
-                        <p><strong>Analysis Type:</strong> Comprehensive Multi-Modal Sentiment Analysis</p>
                         </div>
                         """,
                         unsafe_allow_html=True,
@@ -1740,13 +1020,54 @@ elif page == "Max Fusion":
         else:
             st.info("📁 Please upload a video file to begin Max Fusion analysis.")
-# Footer
-st.markdown("---")
-st.markdown(
-    """
-<div style="text-align: center; color: #666; padding: 1rem;">
-    <p>Built with ❤️ | by <a href="https://github.com/iamfaham">iamfaham</a></p>
-</div>
-""",
-    unsafe_allow_html=True,
-)

+"""
+Refactored Sentiment Fused - Multimodal Sentiment Analysis Application
+This is the main entry point for the application, now using a modular structure.
+"""
 import streamlit as st
 import pandas as pd
 from PIL import Image
+import logging
+# Import our modular components
+from src.config.settings import (
+    APP_NAME,
+    APP_VERSION,
+    APP_ICON,
+    APP_LAYOUT,
+    CUSTOM_CSS,
+    SUPPORTED_IMAGE_FORMATS,
+    SUPPORTED_AUDIO_FORMATS,
+    SUPPORTED_VIDEO_FORMATS,
+)
+from src.models.text_model import predict_text_sentiment
+from src.models.audio_model import predict_audio_sentiment, load_audio_model
+from src.models.vision_model import predict_vision_sentiment, load_vision_model
+from src.models.fused_model import predict_fused_sentiment
+from src.utils.preprocessing import (
+    extract_frames_from_video,
+    extract_audio_from_video,
+    transcribe_audio,
+)
+from src.utils.file_handling import get_file_info, format_file_size
+from src.utils.sentiment_mapping import get_sentiment_colors, format_sentiment_result
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
 # Page configuration
 st.set_page_config(
+    page_title=APP_NAME,
+    page_icon=APP_ICON,
+    layout=APP_LAYOUT,
     initial_sidebar_state="expanded",
 )
+# Apply custom CSS
+st.markdown(CUSTOM_CSS, unsafe_allow_html=True)
+def render_home_page():
+    """Render the home page with model information."""
     st.markdown(
+        f'<h1 class="main-header">{APP_NAME}</h1>',
         unsafe_allow_html=True,
     )
     st.markdown(
         """
+        <div class="model-card">
+            <h2>Welcome to your Multi-Modal Sentiment Analysis Testing Platform!</h2>
+            <p>This application provides a comprehensive testing environment for your three independent sentiment analysis models:</p>
+        </div>
+        """,
         unsafe_allow_html=True,
     )
     with col1:
         st.markdown(
             """
+            <div class="model-card">
+                <h3>Text Sentiment Model</h3>
+                <p>READY TO USE - Analyze sentiment from text input using TextBlob</p>
+                <ul>
+                    <li>Process any text input</li>
+                    <li>Get sentiment classification (Positive/Negative/Neutral)</li>
+                    <li>View confidence scores</li>
+                    <li>Real-time NLP analysis</li>
+                </ul>
+            </div>
+            """,
             unsafe_allow_html=True,
         )
     with col2:
         st.markdown(
             """
+            <div class="model-card">
+                <h3>Audio Sentiment Model</h3>
+                <p>READY TO USE - Analyze sentiment from audio files using fine-tuned Wav2Vec2</p>
+                <ul>
+                    <li>Upload audio files (.wav, .mp3, .m4a, .flac)</li>
+                    <li>Record audio directly with microphone (max 5s)</li>
+                    <li>Automatic preprocessing: 16kHz sampling, 5s max duration</li>
+                    <li>Listen to uploaded/recorded audio</li>
+                    <li>Get sentiment predictions</li>
+                    <li>Real-time audio analysis</li>
+                </ul>
+            </div>
+            """,
             unsafe_allow_html=True,
         )
     with col3:
         st.markdown(
             """
+            <div class="model-card">
+                <h3>Vision Sentiment Model</h3>
+                <p>Analyze sentiment from images using fine-tuned ResNet-50</p>
+                <ul>
+                    <li>Upload image files (.png, .jpg, .jpeg, .bmp, .tiff)</li>
+                    <li>Automatic face detection & preprocessing</li>
+                    <li>Fixed 0% padding for tightest face crop</li>
+                    <li>Convert to 224x224 grayscale → 3-channel RGB (FER2013 format)</li>
+                    <li>Transforms: Resize(224) → CenterCrop(224) → ImageNet Normalization</li>
+                    <li>Preview original & preprocessed images</li>
+                    <li>Get sentiment predictions</li>
+                </ul>
+            </div>
+            """,
             unsafe_allow_html=True,
         )
     st.markdown(
         """
+        <div class="model-card">
+            <h3>Fused Model</h3>
+            <p>Combine predictions from all three models for enhanced accuracy</p>
+            <ul>
+                <li>Multi-modal input processing</li>
+                <li>Ensemble prediction strategies</li>
+                <li>Comprehensive sentiment analysis</li>
+            </ul>
+        </div>
+        """,
         unsafe_allow_html=True,
     )
     st.markdown(
         """
+        <div class="model-card">
+            <h3>🎬 Max Fusion</h3>
+            <p>Ultimate video-based sentiment analysis combining all three modalities</p>
+            <ul>
+                <li>🎥 Record or upload 5-second videos</li>
+                <li>🔍 Extract frames for vision analysis</li>
+                <li>🎵 Extract audio for vocal sentiment</li>
+                <li>📝 Transcribe audio for text analysis</li>
+                <li>🚀 Comprehensive multi-modal results</li>
+            </ul>
+        </div>
+        """,
         unsafe_allow_html=True,
     )
     st.markdown("---")
     st.markdown(
         """
+        <div style="text-align: center; color: #666;">
+            <p><strong>Note:</strong> This application now has <strong>ALL THREE MODELS</strong> fully integrated and ready to use!</p>
+            <p><strong>TextBlob</strong> (Text) + <strong>Wav2Vec2</strong> (Audio) + <strong>ResNet-50</strong> (Vision)</p>
+            <p><strong>Models are now loaded from Google Drive automatically!</strong></p>
+        </div>
+        """,
         unsafe_allow_html=True,
     )
+def render_text_sentiment_page():
+    """Render the text sentiment analysis page."""
     st.title("Text Sentiment Analysis")
     st.markdown("Analyze the sentiment of your text using our TextBlob-based model.")
                     st.metric("Confidence", f"{confidence:.2f}")
                 # Color-coded sentiment display
+                sentiment_colors = get_sentiment_colors()
+                emoji = sentiment_colors.get(sentiment, "❓")
                 st.markdown(
                     f"""
+                    <div class="result-box">
+                        <h4>{emoji} Sentiment: {sentiment}</h4>
+                        <p><strong>Confidence:</strong> {confidence:.2f}</p>
+                        <p><strong>Input Text:</strong> "{text_input[:100]}{'...' if len(text_input) > 100 else ''}"</p>
+                        <p><strong>Model:</strong> TextBlob (Natural Language Processing)</p>
+                    </div>
+                    """,
                     unsafe_allow_html=True,
                 )
         else:
             st.error("Please enter some text to analyze.")
+def render_audio_sentiment_page():
+    """Render the audio sentiment analysis page."""
     st.title("Audio Sentiment Analysis")
     st.markdown(
         "Analyze the sentiment of your audio files using our fine-tuned Wav2Vec2 model."
         # File uploader
         uploaded_audio = st.file_uploader(
             "Choose an audio file",
+            type=SUPPORTED_AUDIO_FORMATS,
             help="Supported formats: WAV, MP3, M4A, FLAC",
         )
     else:  # Audio recording
         st.markdown(
             """
+            <div class="model-card">
+                <h3>Audio Recording</h3>
+                <p>Record audio directly with your microphone (max 5 seconds).</p>
+                <p><strong>Note:</strong> Make sure your microphone is accessible and you have permission to use it.</p>
+            </div>
+            """,
             unsafe_allow_html=True,
         )
                 uploaded_audio, format=f'audio/{uploaded_audio.name.split(".")[-1]}'
             )
             # File info for uploaded files
+            file_info = get_file_info(uploaded_audio)
+            st.info(
+                f"File: {file_info['name']} | Size: {format_file_size(file_info['size_bytes'])}"
+            )
         # Analyze button
         if st.button(
                     st.metric("Confidence", f"{confidence:.2f}")
                 # Color-coded sentiment display
+                sentiment_colors = get_sentiment_colors()
+                emoji = sentiment_colors.get(sentiment, "❓")
                 st.markdown(
                     f"""
+                    <div class="result-box">
+                        <h4>{emoji} Sentiment: {sentiment}</h4>
+                        <p><strong>Confidence:</strong> {confidence:.2f}</p>
+                        <p><strong>Audio Source:</strong> {audio_name}</p>
+                        <p><strong>Model:</strong> Wav2Vec2 (Fine-tuned on RAVDESS + CREMA-D)</p>
+                    </div>
+                    """,
                     unsafe_allow_html=True,
                 )
     else:
         else:
             st.info("Click the microphone button above to record audio for analysis.")
+def render_vision_sentiment_page():
+    """Render the vision sentiment analysis page."""
     st.title("Vision Sentiment Analysis")
     st.markdown(
         "Analyze the sentiment of your images using our fine-tuned ResNet-50 model."
         # File uploader
         uploaded_image = st.file_uploader(
             "Choose an image file",
+            type=SUPPORTED_IMAGE_FORMATS,
             help="Supported formats: PNG, JPG, JPEG, BMP, TIFF",
         )
             )
             # File info
+            file_info = get_file_info(uploaded_image)
             st.info(
+                f"File: {file_info['name']} | Size: {format_file_size(file_info['size_bytes'])} | Dimensions: {image.size[0]}x{image.size[1]}"
             )
             # Analyze button
                             st.metric("Confidence", f"{confidence:.2f}")
                         # Color-coded sentiment display
+                        sentiment_colors = get_sentiment_colors()
+                        emoji = sentiment_colors.get(sentiment, "❓")
                         st.markdown(
                             f"""
+                            <div class="result-box">
+                                <h4>{emoji} Sentiment: {sentiment}</h4>
+                                <p><strong>Confidence:</strong> {confidence:.2f}</p>
+                                <p><strong>Image File:</strong> {uploaded_image.name}</p>
+                                <p><strong>Model:</strong> ResNet-50 (Fine-tuned on FER2013)</p>
+                            </div>
+                            """,
                             unsafe_allow_html=True,
                         )
     else:  # Camera capture
         st.markdown(
             """
+            <div class="model-card">
+                <h3>Camera Capture</h3>
+                <p>Take a photo directly with your camera to analyze its sentiment.</p>
+                <p><strong>Note:</strong> Make sure your camera is accessible and you have permission to use it.</p>
+            </div>
+            """,
             unsafe_allow_html=True,
         )
                             st.metric("Confidence", f"{confidence:.2f}")
                         # Color-coded sentiment display
+                        sentiment_colors = get_sentiment_colors()
+                        emoji = sentiment_colors.get(sentiment, "❓")
                         st.markdown(
                             f"""
+                            <div class="result-box">
+                                <h4>{emoji} Sentiment: {sentiment}</h4>
+                                <p><strong>Confidence:</strong> {confidence:.2f}</p>
+                                <p><strong>Image Source:</strong> Camera Capture</p>
+                                <p><strong>Model:</strong> ResNet-50 (Fine-tuned on FER2013)</p>
+                            </div>
+                            """,
                             unsafe_allow_html=True,
                         )
     elif input_method == "Take Photo with Camera" and "camera_photo" not in locals():
         st.info("Click the camera button above to take a photo for analysis.")
+def render_fused_model_page():
+    """Render the fused model analysis page."""
     st.title("Fused Model Analysis")
     st.markdown(
         "Combine predictions from all three models for enhanced sentiment analysis."
     st.markdown(
         """
+        <div class="model-card">
+            <h3>Multi-Modal Sentiment Analysis</h3>
+            <p>This page allows you to input text, audio, and/or image data to get a comprehensive sentiment analysis
+            using all three models combined.</p>
+        </div>
+        """,
         unsafe_allow_html=True,
     )
         if audio_input_method == "Upload File":
             uploaded_audio = st.file_uploader(
                 "Upload audio file (optional):",
+                type=SUPPORTED_AUDIO_FORMATS,
                 key="fused_audio",
             )
             audio_source = "uploaded_file"
         if image_input_method == "Upload File":
             uploaded_image = st.file_uploader(
                 "Upload image file (optional):",
+                type=SUPPORTED_IMAGE_FORMATS,
                 key="fused_image",
             )
                     st.dataframe(df, use_container_width=True)
                 # Final result display
+                sentiment_colors = get_sentiment_colors()
+                emoji = sentiment_colors.get(sentiment, "❓")
                 st.markdown(
                     f"""
+                    <div class="result-box">
+                        <h4>{emoji} Final Fused Sentiment: {sentiment}</h4>
+                        <p><strong>Overall Confidence:</strong> {confidence:.2f}</p>
+                        <p><strong>Models Used:</strong> {len(results_data)}</p>
+                    </div>
+                    """,
                     unsafe_allow_html=True,
                 )
         else:
                 "Please provide at least one input (text, audio, or image) for fused analysis."
             )
+def render_max_fusion_page():
+    """Render the max fusion page for video-based analysis."""
     st.title("Max Fusion - Multi-Modal Sentiment Analysis")
     st.markdown(
         """
+        <div class="model-card">
+            <h3>Ultimate Multi-Modal Sentiment Analysis</h3>
+            <p>Take photos with camera or upload videos to get comprehensive sentiment analysis from multiple modalities:</p>
+            <ul>
+                <li>📸 <strong>Vision Analysis:</strong> Camera photos or video frames for facial expression analysis</li>
+                <li>🎵 <strong>Audio Analysis:</strong> Audio files or extracted audio from videos for vocal sentiment</li>
+                <li>📝 <strong>Text Analysis:</strong> Transcribed audio for text sentiment analysis</li>
+            </ul>
+        </div>
+        """,
         unsafe_allow_html=True,
     )
         with col2:
             st.markdown(
                 """
+                <div style="text-align: center; padding: 20px; background: linear-gradient(90deg, #667eea 0%, #764ba2 100%); border-radius: 10px; color: white;">
+                    <h3>🚧 Coming Soon 🚧</h3>
+                    <p>Video recording feature is under development</p>
+                    <p>Use Upload Video File for now!</p>
+                </div>
+                """,
                 unsafe_allow_html=True,
             )
         # Placeholder for future recording functionality
         st.markdown(
             """
+            **Future Features:**
+            - Real-time video recording with camera
+            - Audio capture during recording
+            - Automatic frame extraction
+            - Live transcription
+            - WebRTC integration for low-latency streaming
+            """
         )
         # Skip all the recording logic for now
         # File upload option
         st.markdown(
             """
+            <div class="upload-section">
+                <h4>📁 Upload Video File</h4>
+                <p>Upload a video file for comprehensive multimodal analysis.</p>
+                <p><strong>Supported Formats:</strong> MP4, AVI, MOV, MKV, WMV, FLV</p>
+                <p><strong>Recommended:</strong> Videos with clear audio and visual content</p>
+            </div>
+            """,
             unsafe_allow_html=True,
         )
         uploaded_video = st.file_uploader(
             "Choose a video file",
+            type=SUPPORTED_VIDEO_FORMATS,
             help="Supported formats: MP4, AVI, MOV, MKV, WMV, FLV",
         )
         video_name = uploaded_video.name if uploaded_video else None
         video_file = uploaded_video
     if video_file is not None:
         # Display video or photo
         if video_source == "camera_photo":
             uploaded_audio = st.file_uploader(
                 "Upload audio file for audio analysis:",
+                type=SUPPORTED_AUDIO_FORMATS,
                 key="camera_audio",
                 help="Upload an audio file to complement the photo analysis",
             )
         else:
             # For uploaded videos
             st.video(video_file)
+            file_info = get_file_info(video_file)
+            st.info(
+                f"File: {file_info['name']} | Size: {format_file_size(file_info['size_bytes'])}"
+            )
             audio_bytes = None  # Will be extracted from video
         # Video Processing Pipeline
                         st.metric("📊 Overall Confidence", f"{confidence:.2f}")
                     # Color-coded sentiment display
+                    sentiment_colors = get_sentiment_colors()
+                    emoji = sentiment_colors.get(sentiment, "❓")
                     st.markdown(
                         f"""
+                        <div class="result-box">
+                            <h4>{emoji} Max Fusion Sentiment: {sentiment}</h4>
+                            <p><strong>Overall Confidence:</strong> {confidence:.2f}</p>
+                            <p><strong>Modalities Analyzed:</strong> {len(results_data)}</p>
+                            <p><strong>Video Source:</strong> {video_name}</p>
+                            <p><strong>Analysis Type:</strong> Comprehensive Multi-Modal Sentiment Analysis</p>
                         </div>
                         """,
                         unsafe_allow_html=True,
         else:
             st.info("📁 Please upload a video file to begin Max Fusion analysis.")
+def main():
+    """Main application function."""
+    # Sidebar navigation
+    st.sidebar.title("Sentiment Analysis")
+    st.sidebar.markdown("---")
+    # Navigation
+    page = st.sidebar.selectbox(
+        "Choose a page:",
+        [
+            "Home",
+            "Text Sentiment",
+            "Audio Sentiment",
+            "Vision Sentiment",
+            "Fused Model",
+            "Max Fusion",
+        ],
+    )
+    # Page routing
+    if page == "Home":
+        render_home_page()
+    elif page == "Text Sentiment":
+        render_text_sentiment_page()
+    elif page == "Audio Sentiment":
+        render_audio_sentiment_page()
+    elif page == "Vision Sentiment":
+        render_vision_sentiment_page()
+    elif page == "Fused Model":
+        render_fused_model_page()
+    elif page == "Max Fusion":
+        render_max_fusion_page()
+    # Footer
+    st.markdown("---")
+    st.markdown(
+        """
+        <div style="text-align: center; color: #666; padding: 1rem;">
+            <p>Built with ❤️ | by <a href="https://github.com/iamfaham">iamfaham</a></p>
+            <p>Version: {version}</p>
+        </div>
+        """.format(
+            version=APP_VERSION
+        ),
+        unsafe_allow_html=True,
+    )
+if __name__ == "__main__":
+    main()

pyproject.toml CHANGED Viewed

@@ -11,4 +11,6 @@ dependencies = [
     "speechrecognition>=3.10.0",
     "streamlit-webrtc>=0.47.0",
     "opencv-python-headless>=4.8.0",
 ]

     "speechrecognition>=3.10.0",
     "streamlit-webrtc>=0.47.0",
     "opencv-python-headless>=4.8.0",
+    "torch>=2.8.0",
+    "pillow>=11.3.0",
 ]

src/__init__.py ADDED Viewed

	@@ -0,0 +1,10 @@

+"""
+Sentiment Fused - Multimodal Sentiment Analysis Package
+A comprehensive package for analyzing sentiment from text, audio, and visual inputs
+using state-of-the-art deep learning models.
+"""
+__version__ = "0.1.0"
+__author__ = "iamfaham"
+__description__ = "Multimodal Sentiment Analysis with Text, Audio, and Vision Models"

src/config/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+"""
+Configuration package for Sentiment Fused application.
+"""
+from .settings import *

src/config/settings.py ADDED Viewed

	@@ -0,0 +1,153 @@

+"""
+Centralized configuration settings for the Sentiment Fused application.
+"""
+import os
+from pathlib import Path
+from typing import Dict, Any
+# Application Configuration
+APP_NAME = "Multimodal Sentiment Analysis"
+APP_VERSION = "0.1.0"
+APP_ICON = "🧠"
+APP_LAYOUT = "wide"
+# Model Configuration
+VISION_MODEL_CONFIG = {
+    "model_name": "resnet50",
+    "input_size": 224,
+    "num_classes": 7,  # FER2013 default
+    "crop_tightness": 0.0,  # No padding for tightest crop
+}
+AUDIO_MODEL_CONFIG = {
+    "model_name": "facebook/wav2vec2-base",
+    "target_sampling_rate": 16000,
+    "max_duration": 5.0,
+    "num_classes": 3,  # Default sentiment classes
+}
+TEXT_MODEL_CONFIG = {
+    "model_name": "textblob",
+    "confidence_threshold": 0.1,
+}
+# File Processing Configuration
+SUPPORTED_IMAGE_FORMATS = ["png", "jpg", "jpeg", "bmp", "tiff"]
+SUPPORTED_AUDIO_FORMATS = ["wav", "mp3", "m4a", "flac"]
+SUPPORTED_VIDEO_FORMATS = ["mp4", "avi", "mov", "mkv", "wmv", "flv"]
+# Video Processing Configuration
+MAX_VIDEO_FRAMES = 5
+VIDEO_FRAME_INTERVALS = [0, 0.25, 0.5, 0.75, 1.0]  # Frame extraction points
+# Image Preprocessing Configuration
+IMAGE_TRANSFORMS = {
+    "resize": 224,
+    "center_crop": 224,
+    "normalize_mean": [0.485, 0.456, 0.406],
+    "normalize_std": [0.229, 0.224, 0.225],
+}
+# Sentiment Mapping Configuration
+SENTIMENT_MAPPINGS = {
+    3: {0: "Negative", 1: "Neutral", 2: "Positive"},
+    4: {0: "Angry", 1: "Sad", 2: "Happy", 3: "Neutral"},
+    7: {
+        0: "Angry",
+        1: "Disgust",
+        2: "Fear",
+        3: "Happy",
+        4: "Sad",
+        5: "Surprise",
+        6: "Neutral",
+    },
+}
+# UI Configuration
+UI_COLORS = {
+    "primary": "#1f77b4",
+    "success": "#28a745",
+    "warning": "#ffc107",
+    "danger": "#dc3545",
+    "info": "#17a2b8",
+    "light": "#f8f9fa",
+    "dark": "#343a40",
+}
+# CSS Styles
+CUSTOM_CSS = """
+<style>
+    .main-header {
+        font-size: 2.5rem;
+        font-weight: bold;
+        color: #1f77b4;
+        text-align: center;
+        margin-bottom: 2rem;
+    }
+    .model-card {
+        background-color: #f0f2f6;
+        padding: 1.5rem;
+        border-radius: 10px;
+        margin: 1rem 0;
+        border-left: 4px solid #1f77b4;
+    }
+    .result-box {
+        background-color: #e8f4fd;
+        padding: 1rem;
+        border-radius: 8px;
+        border: 1px solid #1f77b4;
+        margin: 1rem 0;
+    }
+    .upload-section {
+        background-color: #f8f9fa;
+        padding: 1.5rem;
+        border-radius: 10px;
+        border: 2px dashed #dee2e6;
+        text-align: center;
+        margin: 1rem 0;
+    }
+</style>
+"""
+# Paths
+BASE_DIR = Path(__file__).parent.parent.parent
+MODELS_DIR = BASE_DIR / "models"
+SRC_DIR = BASE_DIR / "src"
+UI_DIR = SRC_DIR / "ui"
+# Environment Variables
+ENV_VARS = {
+    "VISION_MODEL_DRIVE_ID": os.getenv("VISION_MODEL_DRIVE_ID", ""),
+    "AUDIO_MODEL_DRIVE_ID": os.getenv("AUDIO_MODEL_DRIVE_ID", ""),
+    "VISION_MODEL_FILENAME": os.getenv("VISION_MODEL_FILENAME", "resnet50_model.pth"),
+    "AUDIO_MODEL_FILENAME": os.getenv("AUDIO_MODEL_FILENAME", "wav2vec2_model.pth"),
+}
+# Logging Configuration
+LOGGING_CONFIG = {
+    "level": "INFO",
+    "format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+    "handlers": ["console", "file"],
+}
+# Cache Configuration
+CACHE_CONFIG = {
+    "ttl": 3600,  # 1 hour
+    "max_entries": 100,
+}
+def get_sentiment_mapping(num_classes: int) -> Dict[int, str]:
+    """Get sentiment mapping based on number of classes."""
+    return SENTIMENT_MAPPINGS.get(
+        num_classes, {i: f"Class_{i}" for i in range(num_classes)}
+    )
+def validate_environment() -> Dict[str, bool]:
+    """Validate that required environment variables are set."""
+    validation = {}
+    for var_name, var_value in ENV_VARS.items():
+        validation[var_name] = bool(var_value)
+    return validation

src/models/__init__.py ADDED Viewed

	@@ -0,0 +1,8 @@

+"""
+Model package for Sentiment Fused application.
+"""
+from .vision_model import *
+from .audio_model import *
+from .text_model import *
+from .fused_model import *

src/models/audio_model.py ADDED Viewed

	@@ -0,0 +1,154 @@

+"""
+Audio sentiment analysis model using fine-tuned Wav2Vec2.
+"""
+import logging
+import streamlit as st
+from typing import Tuple
+import torch
+from PIL import Image
+import os
+from ..config.settings import AUDIO_MODEL_CONFIG
+from ..utils.preprocessing import preprocess_audio_for_model
+from ..utils.sentiment_mapping import get_sentiment_mapping
+from src.utils.simple_model_manager import SimpleModelManager
+logger = logging.getLogger(__name__)
+@st.cache_resource
+def load_audio_model():
+    """Load the pre-trained Wav2Vec2 audio sentiment model from Google Drive."""
+    try:
+        manager = SimpleModelManager()
+        if manager is None:
+            logger.error("Model manager not available")
+            st.error("Model manager not available")
+            return None, None, None, None
+        # Load the model using the Google Drive manager
+        model, device = manager.load_audio_model()
+        if model is None:
+            logger.error("Failed to load audio model from Google Drive")
+            st.error("Failed to load audio model from Google Drive")
+            return None, None, None, None
+        # For Wav2Vec2 models, we need to determine the number of classes
+        # This is typically available in the model configuration
+        try:
+            num_classes = model.config.num_labels
+        except:
+            # Fallback: try to infer from the model
+            try:
+                num_classes = model.classifier.out_features
+            except:
+                num_classes = AUDIO_MODEL_CONFIG["num_classes"]  # Default assumption
+        # Load feature extractor
+        from transformers import AutoFeatureExtractor
+        feature_extractor = AutoFeatureExtractor.from_pretrained(
+            AUDIO_MODEL_CONFIG["model_name"]
+        )
+        logger.info(f"Audio model loaded successfully with {num_classes} classes!")
+        st.success(f"Audio model loaded successfully with {num_classes} classes!")
+        return model, device, num_classes, feature_extractor
+    except Exception as e:
+        logger.error(f"Error loading audio model: {str(e)}")
+        st.error(f"Error loading audio model: {str(e)}")
+        return None, None, None, None
+def predict_audio_sentiment(audio_bytes: bytes) -> Tuple[str, float]:
+    """
+    Analyze audio sentiment using fine-tuned Wav2Vec2 model.
+    Preprocessing matches CREMA-D + RAVDESS training specifications:
+    - Target sampling rate: 16kHz
+    - Max duration: 5.0 seconds
+    - Feature extraction: AutoFeatureExtractor with max_length, truncation, padding
+    Args:
+        audio_bytes: Raw audio bytes
+    Returns:
+        Tuple of (sentiment, confidence)
+    """
+    if audio_bytes is None:
+        return "No audio provided", 0.0
+    try:
+        # Load model if not already loaded
+        model, device, num_classes, feature_extractor = load_audio_model()
+        if model is None:
+            return "Model not loaded", 0.0
+        # Use our centralized preprocessing function
+        input_values = preprocess_audio_for_model(audio_bytes)
+        if input_values is None:
+            return "Preprocessing failed", 0.0
+        # Debug: Log the tensor shape
+        logger.info(f"Preprocessed audio tensor shape: {input_values.shape}")
+        # Ensure correct tensor shape: [batch_size, sequence_length]
+        if input_values.dim() == 1:
+            input_values = input_values.unsqueeze(0)  # Add batch dimension if missing
+        elif input_values.dim() == 3:
+            # If we get [batch, sequence, channels], squeeze the channels
+            input_values = input_values.squeeze(-1)
+        logger.info(f"Final audio tensor shape: {input_values.shape}")
+        # Move to device
+        input_values = input_values.to(device)
+        # Run inference
+        with torch.no_grad():
+            outputs = model(input_values)
+            probabilities = torch.softmax(outputs.logits, dim=1)
+            confidence, predicted = torch.max(probabilities, 1)
+            # Get sentiment mapping based on number of classes
+            sentiment_map = get_sentiment_mapping(num_classes)
+            sentiment = sentiment_map[predicted.item()]
+            confidence_score = confidence.item()
+        logger.info(
+            f"Audio sentiment analysis completed: {sentiment} (confidence: {confidence_score:.2f})"
+        )
+        return sentiment, confidence_score
+    except ImportError as e:
+        logger.error(f"Required library not installed: {str(e)}")
+        st.error(f"Required library not installed: {str(e)}")
+        st.info("Please install: pip install librosa transformers")
+        return "Library not available", 0.0
+    except Exception as e:
+        logger.error(f"Error in audio sentiment prediction: {str(e)}")
+        st.error(f"Error in audio sentiment prediction: {str(e)}")
+        return "Error occurred", 0.0
+def get_audio_model_info() -> dict:
+    """Get information about the audio sentiment model."""
+    return {
+        "model_name": AUDIO_MODEL_CONFIG["model_name"],
+        "description": "Fine-tuned Wav2Vec2 for audio sentiment analysis",
+        "capabilities": [
+            "Audio sentiment classification",
+            "Automatic audio preprocessing",
+            "CREMA-D + RAVDESS dataset compatibility",
+            "Real-time audio analysis",
+        ],
+        "input_format": "Audio files (WAV, MP3, M4A, FLAC)",
+        "output_format": "Sentiment label + confidence score",
+        "preprocessing": {
+            "sampling_rate": f"{AUDIO_MODEL_CONFIG['target_sampling_rate']} Hz",
+            "max_duration": f"{AUDIO_MODEL_CONFIG['max_duration']} seconds",
+            "feature_extraction": "AutoFeatureExtractor",
+            "normalization": "Model-specific",
+        },
+    }

src/models/fused_model.py ADDED Viewed

	@@ -0,0 +1,176 @@

+"""
+Fused sentiment analysis model combining text, audio, and vision models.
+"""
+import logging
+from typing import Tuple, Optional, List
+from PIL import Image
+from .text_model import predict_text_sentiment
+from .audio_model import predict_audio_sentiment
+from .vision_model import predict_vision_sentiment
+logger = logging.getLogger(__name__)
+def predict_fused_sentiment(
+    text: Optional[str] = None,
+    audio_bytes: Optional[bytes] = None,
+    image: Optional[Image.Image] = None,
+) -> Tuple[str, float]:
+    """
+    Implement ensemble/fusion logic combining all three models.
+    Args:
+        text: Input text for text sentiment analysis
+        audio_bytes: Audio bytes for audio sentiment analysis
+        image: Input image for vision sentiment analysis
+    Returns:
+        Tuple of (fused_sentiment, overall_confidence)
+    """
+    results = []
+    if text:
+        text_sentiment, text_conf = predict_text_sentiment(text)
+        results.append(("Text", text_sentiment, text_conf))
+    if audio_bytes:
+        audio_sentiment, audio_conf = predict_audio_sentiment(audio_bytes)
+        results.append(("Audio", audio_sentiment, audio_conf))
+    if image:
+        vision_sentiment, vision_conf = predict_vision_sentiment(image)
+        results.append(("Vision", vision_sentiment, vision_conf))
+    if not results:
+        return "No inputs provided", 0.0
+    # Simple ensemble logic (can be enhanced with more sophisticated fusion strategies)
+    sentiment_counts = {}
+    total_confidence = 0
+    modality_weights = {"Text": 0.3, "Audio": 0.35, "Vision": 0.35}  # Weighted voting
+    for modality, sentiment, confidence in results:
+        if sentiment not in sentiment_counts:
+            sentiment_counts[sentiment] = {"count": 0, "weighted_conf": 0}
+        sentiment_counts[sentiment]["count"] += 1
+        weight = modality_weights.get(modality, 0.33)
+        sentiment_counts[sentiment]["weighted_conf"] += confidence * weight
+        total_confidence += confidence
+    # Weighted majority voting with confidence averaging
+    if sentiment_counts:
+        # Find sentiment with highest weighted confidence
+        final_sentiment = max(
+            sentiment_counts.keys(), key=lambda s: sentiment_counts[s]["weighted_conf"]
+        )
+        # Calculate overall confidence as weighted average
+        avg_confidence = total_confidence / len(results)
+        logger.info(
+            f"Fused sentiment analysis completed: {final_sentiment} (confidence: {avg_confidence:.2f})"
+        )
+        logger.info(f"Individual results: {results}")
+        return final_sentiment, avg_confidence
+    else:
+        return "No valid predictions", 0.0
+def get_fusion_strategy_info() -> dict:
+    """Get information about the fusion strategy."""
+    return {
+        "strategy_name": "Weighted Ensemble Fusion",
+        "description": "Combines predictions from text, audio, and vision models using weighted voting",
+        "modality_weights": {"Text": 0.3, "Audio": 0.35, "Vision": 0.35},
+        "fusion_method": "Weighted majority voting with confidence averaging",
+        "advantages": [
+            "Robust to individual model failures",
+            "Leverages complementary information from different modalities",
+            "Configurable modality weights",
+            "Real-time ensemble prediction",
+        ],
+        "use_cases": [
+            "Multi-modal content analysis",
+            "Enhanced sentiment accuracy",
+            "Cross-validation of predictions",
+            "Comprehensive emotional understanding",
+        ],
+    }
+def analyze_modality_agreement(
+    text: Optional[str] = None,
+    audio_bytes: Optional[bytes] = None,
+    image: Optional[Image.Image] = None,
+) -> dict:
+    """
+    Analyze agreement between different modalities.
+    Args:
+        text: Input text
+        audio_bytes: Audio bytes
+        image: Input image
+    Returns:
+        Dictionary containing agreement analysis
+    """
+    results = {}
+    if text:
+        text_sentiment, text_conf = predict_text_sentiment(text)
+        results["text"] = {"sentiment": text_sentiment, "confidence": text_conf}
+    if audio_bytes:
+        audio_sentiment, audio_conf = predict_audio_sentiment(audio_bytes)
+        results["audio"] = {"sentiment": audio_sentiment, "confidence": audio_conf}
+    if image:
+        vision_sentiment, vision_conf = predict_vision_sentiment(image)
+        results["vision"] = {"sentiment": vision_sentiment, "confidence": vision_conf}
+    if len(results) < 2:
+        return {"agreement_level": "insufficient_modalities", "details": results}
+    # Analyze agreement
+    sentiments = [result["sentiment"] for result in results.values()]
+    unique_sentiments = set(sentiments)
+    if len(unique_sentiments) == 1:
+        agreement_level = "perfect"
+        agreement_score = 1.0
+    elif len(unique_sentiments) == 2:
+        agreement_level = "partial"
+        agreement_score = 0.5
+    else:
+        agreement_level = "low"
+        agreement_score = 0.0
+    # Calculate confidence consistency
+    confidences = [result["confidence"] for result in results.values()]
+    confidence_std = sum(confidences) / len(confidences) if confidences else 0
+    return {
+        "agreement_level": agreement_level,
+        "agreement_score": agreement_score,
+        "modalities_analyzed": len(results),
+        "sentiment_distribution": {s: sentiments.count(s) for s in unique_sentiments},
+        "confidence_consistency": confidence_std,
+        "individual_results": results,
+        "recommendation": _get_agreement_recommendation(agreement_level, len(results)),
+    }
+def _get_agreement_recommendation(agreement_level: str, num_modalities: int) -> str:
+    """Get recommendation based on agreement level."""
+    if agreement_level == "perfect":
+        return "High confidence in prediction - all modalities agree"
+    elif agreement_level == "partial":
+        return "Moderate confidence - consider modality-specific factors"
+    elif agreement_level == "low":
+        return "Low confidence - modalities disagree, consider context"
+    else:
+        return "Insufficient data for reliable fusion"

src/models/text_model.py ADDED Viewed

	@@ -0,0 +1,81 @@

+"""
+Text sentiment analysis model using TextBlob.
+"""
+import logging
+from typing import Tuple, Optional
+from ..config.settings import TEXT_MODEL_CONFIG
+logger = logging.getLogger(__name__)
+def predict_text_sentiment(text: str) -> Tuple[str, float]:
+    """
+    Analyze text sentiment using TextBlob.
+    Args:
+        text: Input text to analyze
+    Returns:
+        Tuple of (sentiment, confidence)
+    """
+    if not text or text.strip() == "":
+        return "No text provided", 0.0
+    try:
+        from textblob import TextBlob
+        # Create TextBlob object
+        blob = TextBlob(text)
+        # Get polarity (-1 to 1, where -1 is very negative, 1 is very positive)
+        polarity = blob.sentiment.polarity
+        # Get subjectivity (0 to 1, where 0 is very objective, 1 is very subjective)
+        subjectivity = blob.sentiment.subjectivity
+        # Convert polarity to sentiment categories
+        confidence_threshold = TEXT_MODEL_CONFIG["confidence_threshold"]
+        if polarity > confidence_threshold:
+            sentiment = "Positive"
+            confidence = min(0.95, 0.6 + abs(polarity) * 0.3)
+        elif polarity < -confidence_threshold:
+            sentiment = "Negative"
+            confidence = min(0.95, 0.6 + abs(polarity) * 0.3)
+        else:
+            sentiment = "Neutral"
+            confidence = 0.7 - abs(polarity) * 0.2
+        # Round confidence to 2 decimal places
+        confidence = round(confidence, 2)
+        logger.info(
+            f"Text sentiment analysis completed: {sentiment} (confidence: {confidence})"
+        )
+        return sentiment, confidence
+    except ImportError:
+        logger.error(
+            "TextBlob not installed. Please install it with: pip install textblob"
+        )
+        return "TextBlob not available", 0.0
+    except Exception as e:
+        logger.error(f"Error in text sentiment analysis: {str(e)}")
+        return "Error occurred", 0.0
+def get_text_model_info() -> dict:
+    """Get information about the text sentiment model."""
+    return {
+        "model_name": TEXT_MODEL_CONFIG["model_name"],
+        "description": "Natural Language Processing based sentiment analysis using TextBlob",
+        "capabilities": [
+            "Text sentiment classification (Positive/Negative/Neutral)",
+            "Confidence scoring",
+            "Real-time analysis",
+            "No external API required",
+        ],
+        "input_format": "Plain text",
+        "output_format": "Sentiment label + confidence score",
+    }

src/models/vision_model.py ADDED Viewed

	@@ -0,0 +1,157 @@

+"""
+Vision sentiment analysis model using fine-tuned ResNet-50.
+"""
+import logging
+import streamlit as st
+from typing import Tuple
+import torch
+import torch.nn.functional as F
+from PIL import Image
+from ..config.settings import VISION_MODEL_CONFIG
+from ..utils.preprocessing import detect_and_preprocess_face, get_vision_transforms
+from ..utils.sentiment_mapping import get_sentiment_mapping
+from src.utils.simple_model_manager import SimpleModelManager
+logger = logging.getLogger(__name__)
+@st.cache_resource
+def get_model_manager():
+    """Get the Google Drive model manager instance."""
+    try:
+        manager = SimpleModelManager()
+        return manager
+    except Exception as e:
+        logger.error(f"Failed to initialize model manager: {e}")
+        st.error(f"Failed to initialize model manager: {e}")
+        return None
+@st.cache_resource
+def load_vision_model():
+    """Load the pre-trained ResNet-50 vision sentiment model from Google Drive."""
+    try:
+        manager = get_model_manager()
+        if manager is None:
+            logger.error("Model manager not available")
+            st.error("Model manager not available")
+            return None, None, None
+        # Load the model using the Google Drive manager
+        model, device, num_classes = manager.load_vision_model()
+        if model is None:
+            logger.error("Failed to load vision model from Google Drive")
+            st.error("Failed to load vision model from Google Drive")
+            return None, None, None
+        logger.info(f"Vision model loaded successfully with {num_classes} classes!")
+        st.success(f"Vision model loaded successfully with {num_classes} classes!")
+        return model, device, num_classes
+    except Exception as e:
+        logger.error(f"Error loading vision model: {str(e)}")
+        st.error(f"Error loading vision model: {str(e)}")
+        return None, None, None
+def predict_vision_sentiment(
+    image: Image.Image, crop_tightness: float = None
+) -> Tuple[str, float]:
+    """
+    Load ResNet-50 and run inference for vision sentiment analysis.
+    Args:
+        image: Input image (PIL Image or numpy array)
+        crop_tightness: Padding around face (0.0 = no padding, 0.3 = 30% padding)
+    Returns:
+        Tuple of (sentiment, confidence)
+    """
+    if image is None:
+        return "No image provided", 0.0
+    try:
+        # Use default crop tightness if not specified
+        if crop_tightness is None:
+            crop_tightness = VISION_MODEL_CONFIG["crop_tightness"]
+        # Load model if not already loaded
+        model, device, num_classes = load_vision_model()
+        if model is None:
+            return "Model not loaded", 0.0
+        # Preprocess image to match FER2013 format
+        st.info(
+            "Detecting face and preprocessing image to match training data format..."
+        )
+        preprocessed_image = detect_and_preprocess_face(
+            image, crop_tightness=crop_tightness
+        )
+        if preprocessed_image is None:
+            return "Image preprocessing failed", 0.0
+        # Show preprocessed image
+        st.image(
+            preprocessed_image,
+            caption="Preprocessed Image (224x224 Grayscale → 3-channel RGB)",
+            width=200,
+        )
+        # Get transforms
+        transform = get_vision_transforms()
+        # Convert preprocessed image to tensor
+        image_tensor = transform(preprocessed_image).unsqueeze(0).to(device)
+        # Run inference
+        with torch.no_grad():
+            outputs = model(image_tensor)
+            # Debug: print output shape
+            st.info(f"Model output shape: {outputs.shape}")
+            probabilities = F.softmax(outputs, dim=1)
+            confidence, predicted = torch.max(probabilities, 1)
+            # Get sentiment mapping based on number of classes
+            sentiment_map = get_sentiment_mapping(num_classes)
+            sentiment = sentiment_map[predicted.item()]
+            confidence_score = confidence.item()
+        logger.info(
+            f"Vision sentiment analysis completed: {sentiment} (confidence: {confidence_score:.2f})"
+        )
+        return sentiment, confidence_score
+    except Exception as e:
+        logger.error(f"Error in vision sentiment prediction: {str(e)}")
+        st.error(f"Error in vision sentiment prediction: {str(e)}")
+        st.error(
+            f"Model output shape mismatch. Expected {num_classes} classes but got different."
+        )
+        return "Error occurred", 0.0
+def get_vision_model_info() -> dict:
+    """Get information about the vision sentiment model."""
+    return {
+        "model_name": VISION_MODEL_CONFIG["model_name"],
+        "description": "Fine-tuned ResNet-50 for facial expression sentiment analysis",
+        "capabilities": [
+            "Facial expression recognition",
+            "Automatic face detection and cropping",
+            "FER2013 dataset format compatibility",
+            "Real-time image analysis",
+        ],
+        "input_format": "Images (PNG, JPG, JPEG, BMP, TIFF)",
+        "output_format": "Sentiment label + confidence score",
+        "preprocessing": {
+            "face_detection": "OpenCV Haar Cascade",
+            "image_size": f"{VISION_MODEL_CONFIG['input_size']}x{VISION_MODEL_CONFIG['input_size']}",
+            "color_format": "Grayscale → 3-channel RGB",
+            "normalization": "ImageNet standard",
+        },
+    }

src/ui/__init__.py ADDED Viewed

	@@ -0,0 +1,7 @@

+"""
+UI package for Sentiment Fused application.
+"""
+from .components import *
+from .pages import *
+from .styles import *

src/ui/styles.py ADDED Viewed

	@@ -0,0 +1,97 @@

+"""
+UI styles and CSS for the Sentiment Fused application.
+"""
+from ..config.settings import CUSTOM_CSS, UI_COLORS
+def get_custom_css() -> str:
+    """Get the custom CSS styles for the application."""
+    return CUSTOM_CSS
+def get_ui_colors() -> dict:
+    """Get the UI color scheme."""
+    return UI_COLORS
+def get_sentiment_color_style(sentiment: str) -> str:
+    """Get color style for different sentiment types."""
+    colors = {
+        "Positive": "color: #28a745;",
+        "Negative": "color: #dc3545;",
+        "Neutral": "color: #ffc107;",
+        "Angry": "color: #dc3545;",
+        "Sad": "color: #17a2b8;",
+        "Happy": "color: #28a745;",
+        "Fear": "color: #6f42c1;",
+        "Disgust": "color: #fd7e14;",
+        "Surprise": "color: #ffc107;",
+    }
+    return colors.get(sentiment, "color: #6c757d;")
+def get_metric_style(metric_type: str = "default") -> str:
+    """Get styling for different metric types."""
+    styles = {
+        "default": "background-color: #f8f9fa; padding: 1rem; border-radius: 8px;",
+        "success": "background-color: #d4edda; padding: 1rem; border-radius: 8px; border: 1px solid #c3e6cb;",
+        "warning": "background-color: #fff3cd; padding: 1rem; border-radius: 8px; border: 1px solid #ffeaa7;",
+        "error": "background-color: #f8d7da; padding: 1rem; border-radius: 8px; border: 1px solid #f5c6cb;",
+        "info": "background-color: #d1ecf1; padding: 1rem; border-radius: 8px; border: 1px solid #bee5eb;",
+    }
+    return styles.get(metric_type, styles["default"])
+def get_card_style(card_type: str = "default") -> str:
+    """Get styling for different card types."""
+    styles = {
+        "default": "background-color: #f0f2f6; padding: 1.5rem; border-radius: 10px; margin: 1rem 0; border-left: 4px solid #1f77b4;",
+        "model": "background-color: #f0f2f6; padding: 1.5rem; border-radius: 10px; margin: 1rem 0; border-left: 4px solid #1f77b4;",
+        "result": "background-color: #e8f4fd; padding: 1rem; border-radius: 8px; border: 1px solid #1f77b4; margin: 1rem 0;",
+        "upload": "background-color: #f8f9fa; padding: 1.5rem; border-radius: 10px; border: 2px dashed #dee2e6; text-align: center; margin: 1rem 0;",
+        "info": "background-color: #d1ecf1; padding: 1rem; border-radius: 8px; border: 1px solid #bee5eb; margin: 1rem 0;",
+    }
+    return styles.get(card_type, styles["default"])
+def get_button_style(button_type: str = "primary") -> str:
+    """Get styling for different button types."""
+    styles = {
+        "primary": "background-color: #1f77b4; color: white; border: none; padding: 0.5rem 1rem; border-radius: 5px;",
+        "secondary": "background-color: #6c757d; color: white; border: none; padding: 0.5rem 1rem; border-radius: 5px;",
+        "success": "background-color: #28a745; color: white; border: none; padding: 0.5rem 1rem; border-radius: 5px;",
+        "warning": "background-color: #ffc107; color: black; border: none; padding: 0.5rem 1rem; border-radius: 5px;",
+        "danger": "background-color: #dc3545; color: white; border: none; padding: 0.5rem 1rem; border-radius: 5px;",
+    }
+    return styles.get(button_type, styles["primary"])
+def get_sidebar_style() -> str:
+    """Get styling for the sidebar."""
+    return """
+    <style>
+    .css-1d391kg {
+        background-color: #f8f9fa;
+    }
+    .css-1d391kg .sidebar-content {
+        padding: 1rem;
+    }
+    </style>
+    """
+def get_header_style() -> str:
+    """Get styling for the main header."""
+    return """
+    <style>
+    .main-header {
+        font-size: 2.5rem;
+        font-weight: bold;
+        color: #1f77b4;
+        text-align: center;
+        margin-bottom: 2rem;
+        text-shadow: 2px 2px 4px rgba(0,0,0,0.1);
+    }
+    </style>
+    """

src/utils/__init__.py ADDED Viewed

	@@ -0,0 +1,7 @@

+"""
+Utility functions package for Sentiment Fused application.
+"""
+from .preprocessing import *
+from .file_handling import *
+from .sentiment_mapping import *

src/utils/file_handling.py ADDED Viewed

	@@ -0,0 +1,189 @@

+"""
+File handling utilities for different input types.
+"""
+import os
+import tempfile
+import logging
+from typing import Optional, Union, Tuple
+from pathlib import Path
+from ..config.settings import (
+    SUPPORTED_IMAGE_FORMATS,
+    SUPPORTED_AUDIO_FORMATS,
+    SUPPORTED_VIDEO_FORMATS,
+)
+logger = logging.getLogger(__name__)
+def validate_file_format(filename: str, supported_formats: list) -> bool:
+    """
+    Validate if a file has a supported format.
+    Args:
+        filename: Name of the file to validate
+        supported_formats: List of supported file extensions
+    Returns:
+        True if file format is supported, False otherwise
+    """
+    if not filename:
+        return False
+    file_extension = Path(filename).suffix.lower().lstrip(".")
+    return file_extension in supported_formats
+def validate_image_file(filename: str) -> bool:
+    """Validate if a file is a supported image format."""
+    return validate_file_format(filename, SUPPORTED_IMAGE_FORMATS)
+def validate_audio_file(filename: str) -> bool:
+    """Validate if a file is a supported audio format."""
+    return validate_file_format(filename, SUPPORTED_AUDIO_FORMATS)
+def validate_video_file(filename: str) -> bool:
+    """Validate if a file is a supported video format."""
+    return validate_file_format(filename, SUPPORTED_VIDEO_FORMATS)
+def get_file_info(file_object) -> dict:
+    """
+    Extract file information from a file object.
+    Args:
+        file_object: File object (e.g., StreamlitUploadedFile)
+    Returns:
+        Dictionary containing file information
+    """
+    try:
+        if hasattr(file_object, "getvalue"):
+            file_size = len(file_object.getvalue())
+            file_name = getattr(file_object, "name", "Unknown")
+        else:
+            file_size = len(file_object)
+            file_name = "Unknown"
+        file_extension = (
+            Path(file_name).suffix.lower().lstrip(".")
+            if file_name != "Unknown"
+            else "Unknown"
+        )
+        return {
+            "name": file_name,
+            "size_bytes": file_size,
+            "size_kb": file_size / 1024,
+            "size_mb": file_size / (1024 * 1024),
+            "extension": file_extension,
+            "is_valid_image": (
+                validate_image_file(file_name) if file_extension != "Unknown" else False
+            ),
+            "is_valid_audio": (
+                validate_audio_file(file_name) if file_extension != "Unknown" else False
+            ),
+            "is_valid_video": (
+                validate_video_file(file_name) if file_extension != "Unknown" else False
+            ),
+        }
+    except Exception as e:
+        logger.error(f"Error getting file info: {str(e)}")
+        return {
+            "name": "Unknown",
+            "size_bytes": 0,
+            "size_kb": 0,
+            "size_mb": 0,
+            "extension": "Unknown",
+            "is_valid_image": False,
+            "is_valid_audio": False,
+            "is_valid_video": False,
+        }
+def create_temp_file(
+    suffix: str = "", prefix: str = "temp_"
+) -> Tuple[str, tempfile.NamedTemporaryFile]:
+    """
+    Create a temporary file with proper cleanup handling.
+    Args:
+        suffix: File extension suffix
+        prefix: File name prefix
+    Returns:
+        Tuple of (file_path, temp_file_object)
+    """
+    temp_file = tempfile.NamedTemporaryFile(suffix=suffix, prefix=prefix, delete=False)
+    return temp_file.name, temp_file
+def cleanup_temp_file(file_path: str) -> bool:
+    """
+    Safely cleanup a temporary file.
+    Args:
+        file_path: Path to the temporary file
+    Returns:
+        True if cleanup was successful, False otherwise
+    """
+    try:
+        if os.path.exists(file_path):
+            os.unlink(file_path)
+            return True
+        return True
+    except (OSError, PermissionError) as e:
+        logger.warning(f"Could not delete temporary file {file_path}: {e}")
+        return False
+def format_file_size(size_bytes: int) -> str:
+    """
+    Format file size in human-readable format.
+    Args:
+        size_bytes: File size in bytes
+    Returns:
+        Formatted file size string
+    """
+    if size_bytes < 1024:
+        return f"{size_bytes} B"
+    elif size_bytes < 1024 * 1024:
+        return f"{size_bytes / 1024:.1f} KB"
+    elif size_bytes < 1024 * 1024 * 1024:
+        return f"{size_bytes / (1024 * 1024):.1f} MB"
+    else:
+        return f"{size_bytes / (1024 * 1024 * 1024):.1f} GB"
+def safe_file_operation(operation_func, *args, **kwargs):
+    """
+    Safely execute a file operation with proper error handling.
+    Args:
+        operation_func: Function to execute
+        *args: Positional arguments for the function
+        **kwargs: Keyword arguments for the function
+    Returns:
+        Result of the operation or None if it fails
+    """
+    try:
+        return operation_func(*args, **kwargs)
+    except FileNotFoundError as e:
+        logger.error(f"File not found: {e}")
+        return None
+    except PermissionError as e:
+        logger.error(f"Permission denied: {e}")
+        return None
+    except OSError as e:
+        logger.error(f"OS error: {e}")
+        return None
+    except Exception as e:
+        logger.error(f"Unexpected error in file operation: {e}")
+        return None

src/utils/preprocessing.py ADDED Viewed

	@@ -0,0 +1,467 @@

+"""
+Preprocessing utilities for different input modalities.
+"""
+import os
+import tempfile
+import logging
+from typing import List, Optional, Tuple, Union
+try:
+    from PIL import Image
+    import numpy as np
+    PIL_AVAILABLE = True
+except ImportError:
+    PIL_AVAILABLE = False
+    Image = None
+    np = None
+from ..config.settings import (
+    IMAGE_TRANSFORMS,
+    AUDIO_MODEL_CONFIG,
+    VISION_MODEL_CONFIG,
+    SUPPORTED_IMAGE_FORMATS,
+    SUPPORTED_AUDIO_FORMATS,
+)
+# Add Any to typing imports
+from typing import List, Optional, Tuple, Union, Any
+# Add torch import for audio preprocessing
+try:
+    import torch
+    TORCH_AVAILABLE = True
+except ImportError:
+    TORCH_AVAILABLE = False
+    torch = None
+logger = logging.getLogger(__name__)
+def detect_and_preprocess_face(
+    image: Union[Image.Image, np.ndarray, Any], crop_tightness: float = 0.05
+) -> Optional[Image.Image]:
+    """
+    Detect face in image, crop to face region, convert to grayscale, and resize.
+    Args:
+        image: Input image (PIL Image or numpy array)
+        crop_tightness: Padding around face (0.0 = no padding, 0.3 = 30% padding)
+    Returns:
+        Preprocessed PIL Image or None if preprocessing fails
+    """
+    if not PIL_AVAILABLE:
+        logger.error("PIL (Pillow) not available. Cannot process images.")
+        return None
+    try:
+        import cv2
+        # Convert PIL image to OpenCV format
+        if isinstance(image, Image.Image):
+            img_array = np.array(image)
+            # Convert RGB to BGR for OpenCV
+            if len(img_array.shape) == 3:
+                img_array = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR)
+        else:
+            img_array = image
+        # Load face detection cascade
+        face_cascade = cv2.CascadeClassifier(
+            cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
+        )
+        # Convert to grayscale for face detection
+        gray = cv2.cvtColor(img_array, cv2.COLOR_BGR2GRAY)
+        # Detect faces
+        faces = face_cascade.detectMultiScale(
+            gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30)
+        )
+        if len(faces) == 0:
+            logger.warning("No face detected in the image. Using center crop instead.")
+            return _fallback_preprocessing(image)
+        # Get the largest face (assuming it's the main subject)
+        x, y, w, h = max(faces, key=lambda rect: rect[2] * rect[3])
+        # Add padding around the face based on user preference
+        padding_x = int(w * crop_tightness)
+        padding_y = int(h * crop_tightness)
+        # Ensure we don't go out of bounds
+        x1 = max(0, x - padding_x)
+        y1 = max(0, y - padding_y)
+        x2 = min(img_array.shape[1], x + w + padding_x)
+        y2 = min(img_array.shape[0], y + h + padding_y)
+        # Crop to face region
+        face_crop = img_array[y1:y2, x1:x2]
+        # Convert BGR to RGB first
+        face_crop_rgb = cv2.cvtColor(face_crop, cv2.COLOR_BGR2RGB)
+        # Convert to grayscale
+        face_gray = cv2.cvtColor(face_crop_rgb, cv2.COLOR_RGB2GRAY)
+        # Resize to target size
+        target_size = IMAGE_TRANSFORMS["resize"]
+        face_resized = cv2.resize(
+            face_gray, (target_size, target_size), interpolation=cv2.INTER_AREA
+        )
+        # Convert grayscale to 3-channel RGB (replicate grayscale values)
+        face_rgb_3channel = cv2.cvtColor(face_resized, cv2.COLOR_GRAY2RGB)
+        # Convert back to PIL Image
+        face_pil = Image.fromarray(face_rgb_3channel)
+        return face_pil
+    except ImportError:
+        logger.error(
+            "OpenCV not installed. Please install it with: pip install opencv-python"
+        )
+        return _fallback_preprocessing(image)
+    except Exception as e:
+        logger.error(f"Error in face detection: {str(e)}")
+        return _fallback_preprocessing(image)
+def _fallback_preprocessing(
+    image: Union[Image.Image, np.ndarray, Any],
+) -> Optional[Image.Image]:
+    """Fallback preprocessing when face detection fails."""
+    try:
+        if isinstance(image, Image.Image):
+            rgb_pil = image.convert("RGB")
+            target_size = IMAGE_TRANSFORMS["resize"]
+            resized = rgb_pil.resize(
+                (target_size, target_size), Image.Resampling.LANCZOS
+            )
+            # Convert to grayscale and then to 3-channel RGB
+            gray_pil = resized.convert("L")
+            gray_rgb_pil = gray_pil.convert("RGB")
+            return gray_rgb_pil
+        return None
+    except Exception as e:
+        logger.error(f"Fallback preprocessing failed: {str(e)}")
+        return None
+def get_vision_transforms():
+    """Get the image transforms used during training."""
+    from torchvision import transforms
+    return transforms.Compose(
+        [
+            transforms.Resize(IMAGE_TRANSFORMS["resize"]),
+            transforms.CenterCrop(IMAGE_TRANSFORMS["center_crop"]),
+            transforms.ToTensor(),
+            transforms.Normalize(
+                mean=IMAGE_TRANSFORMS["normalize_mean"],
+                std=IMAGE_TRANSFORMS["normalize_std"],
+            ),
+        ]
+    )
+def preprocess_audio_for_model(audio_bytes: bytes) -> Optional[torch.Tensor]:
+    """
+    Preprocess audio bytes for wav2vec2 model input using AutoFeatureExtractor.
+    Args:
+        audio_bytes: Raw audio bytes
+    Returns:
+        Preprocessed audio tensor ready for wav2vec2 model
+    """
+    if not TORCH_AVAILABLE:
+        logger.error("PyTorch not available. Cannot process audio.")
+        return None
+    try:
+        from transformers import AutoFeatureExtractor
+        import librosa
+        # Save audio bytes to temporary file
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
+            tmp_file.write(audio_bytes)
+            tmp_file_path = tmp_file.name
+        try:
+            # Load and resample audio to target sampling rate
+            audio, sr = librosa.load(
+                tmp_file_path, sr=AUDIO_MODEL_CONFIG["target_sampling_rate"]
+            )
+            # Use AutoFeatureExtractor (same as training)
+            feature_extractor = AutoFeatureExtractor.from_pretrained(
+                AUDIO_MODEL_CONFIG["model_name"]
+            )
+            # Calculate max length in samples (5 seconds * 16kHz = 80000 samples)
+            max_length = int(
+                AUDIO_MODEL_CONFIG["max_duration"]
+                * AUDIO_MODEL_CONFIG["target_sampling_rate"]
+            )
+            logger.info(f"Audio length: {len(audio)} samples, max_length: {max_length}")
+            inputs = feature_extractor(
+                audio,
+                sampling_rate=AUDIO_MODEL_CONFIG["target_sampling_rate"],
+                max_length=max_length,
+                truncation=True,
+                padding="max_length",
+                return_tensors="pt",
+            )
+            # Return tensor with correct shape for wav2vec2
+            # The model expects: [batch_size, sequence_length]
+            tensor = inputs.input_values
+            # Log the tensor shape for debugging
+            logger.info(f"Audio preprocessing output shape: {tensor.shape}")
+            return tensor
+        finally:
+            # Clean up temporary file
+            try:
+                os.unlink(tmp_file_path)
+            except (OSError, PermissionError):
+                pass
+    except ImportError as e:
+        logger.error(f"Required library not installed: {str(e)}")
+        raise ImportError("Please install: pip install transformers librosa torch")
+def extract_frames_from_video(video_file, max_frames: int = 5) -> List[Any]:
+    """
+    Extract frames from video file for vision sentiment analysis.
+    Args:
+        video_file: Video file object
+        max_frames: Maximum number of frames to extract
+    Returns:
+        List of PIL Image objects
+    """
+    try:
+        import cv2
+        # Save video bytes to temporary file
+        with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp_file:
+            if hasattr(video_file, "getvalue"):
+                tmp_file.write(video_file.getvalue())
+            else:
+                tmp_file.write(video_file)
+            tmp_file_path = tmp_file.name
+        try:
+            # Open video with OpenCV
+            cap = cv2.VideoCapture(tmp_file_path)
+            if not cap.isOpened():
+                logger.error("Could not open video file")
+                return []
+            frames = []
+            total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+            fps = cap.get(cv2.CAP_PROP_FPS)
+            duration = total_frames / fps if fps > 0 else 0
+            logger.info(
+                f"Video: {total_frames} frames, {fps:.1f} FPS, {duration:.1f}s duration"
+            )
+            # Extract frames at strategic intervals
+            if total_frames > 0:
+                # Select frames: start, 25%, 50%, 75%, end
+                frame_indices = [
+                    0,
+                    int(total_frames * 0.25),
+                    int(total_frames * 0.5),
+                    int(total_frames * 0.75),
+                    total_frames - 1,
+                ]
+                frame_indices = list(set(frame_indices))  # Remove duplicates
+                frame_indices.sort()
+                for frame_idx in frame_indices:
+                    cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
+                    ret, frame = cap.read()
+                    if ret:
+                        # Convert BGR to RGB
+                        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+                        # Convert to PIL Image
+                        pil_image = Image.fromarray(frame_rgb)
+                        frames.append(pil_image)
+            cap.release()
+            return frames
+        finally:
+            # Clean up temporary file
+            try:
+                os.unlink(tmp_file_path)
+            except (OSError, PermissionError):
+                pass
+    except ImportError:
+        logger.error(
+            "OpenCV not installed. Please install it with: pip install opencv-python"
+        )
+        return []
+    except Exception as e:
+        logger.error(f"Error extracting frames: {str(e)}")
+        return []
+def extract_audio_from_video(video_file) -> Optional[bytes]:
+    """
+    Extract audio from video file for audio sentiment analysis.
+    Args:
+        video_file: Video file object
+    Returns:
+        Audio bytes in WAV format or None if extraction fails
+    """
+    try:
+        import tempfile
+        try:
+            from moviepy import VideoFileClip
+        except ImportError as e:
+            logger.error(f"MoviePy import failed: {e}")
+            return None
+        # Save video bytes to temporary file
+        with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp_file:
+            if hasattr(video_file, "getvalue"):
+                tmp_file.write(video_file.getvalue())
+            else:
+                tmp_file.write(video_file)
+            tmp_file_path = tmp_file.name
+        try:
+            # Extract audio using moviepy
+            video = VideoFileClip(tmp_file_path)
+            audio = video.audio
+            if audio is None:
+                logger.warning("No audio track found in video")
+                return None
+            # Save audio to temporary WAV file
+            with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as audio_file:
+                audio_path = audio_file.name
+            # Export audio as WAV
+            audio.write_audiofile(audio_path, logger=None)
+            # Read the audio file and return bytes
+            with open(audio_path, "rb") as f:
+                audio_bytes = f.read()
+            # Clean up temporary audio file
+            try:
+                os.unlink(audio_path)
+            except (OSError, PermissionError):
+                pass
+            return audio_bytes
+        finally:
+            # Clean up temporary video file
+            try:
+                # Close video and audio objects first
+                if "video" in locals():
+                    video.close()
+                if "audio" in locals() and audio:
+                    audio.close()
+                # Wait a bit before trying to delete
+                import time
+                time.sleep(0.1)
+                os.unlink(tmp_file_path)
+            except (OSError, PermissionError):
+                pass
+    except ImportError:
+        logger.error(
+            "MoviePy not installed. Please install it with: pip install moviepy"
+        )
+        return None
+    except Exception as e:
+        logger.error(f"Error extracting audio: {str(e)}")
+        return None
+def transcribe_audio(audio_bytes: bytes) -> str:
+    """
+    Transcribe audio to text for text sentiment analysis.
+    Args:
+        audio_bytes: Audio bytes in WAV format
+    Returns:
+        Transcribed text string
+    """
+    if audio_bytes is None:
+        return ""
+    try:
+        import tempfile
+        import speech_recognition as sr
+        # Save audio bytes to temporary file
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
+            tmp_file.write(audio_bytes)
+            tmp_file_path = tmp_file.name
+        try:
+            # Initialize recognizer
+            recognizer = sr.Recognizer()
+            # Load audio file
+            with sr.AudioFile(tmp_file_path) as source:
+                # Read audio data
+                audio_data = recognizer.record(source)
+                # Transcribe using Google Speech Recognition
+                try:
+                    text = recognizer.recognize_google(audio_data)
+                    return text
+                except sr.UnknownValueError:
+                    logger.warning("Speech could not be understood")
+                    return ""
+                except sr.RequestError as e:
+                    logger.error(
+                        f"Could not request results from speech recognition service: {e}"
+                    )
+                    return ""
+        finally:
+            # Clean up temporary file
+            try:
+                os.unlink(tmp_file_path)
+            except (OSError, PermissionError):
+                pass
+    except ImportError:
+        logger.error(
+            "SpeechRecognition not installed. Please install it with: pip install SpeechRecognition"
+        )
+        return ""
+    except Exception as e:
+        logger.error(f"Error transcribing audio: {str(e)}")
+        return ""

src/utils/sentiment_mapping.py ADDED Viewed

	@@ -0,0 +1,71 @@

+"""
+Sentiment mapping utilities for different model outputs.
+"""
+from typing import Dict
+from ..config.settings import SENTIMENT_MAPPINGS
+def get_sentiment_mapping(num_classes: int) -> Dict[int, str]:
+    """
+    Get the sentiment mapping based on number of classes.
+    Args:
+        num_classes: Number of output classes from the model
+    Returns:
+        Dictionary mapping class indices to sentiment labels
+    """
+    return SENTIMENT_MAPPINGS.get(
+        num_classes, {i: f"Class_{i}" for i in range(num_classes)}
+    )
+def get_sentiment_colors() -> Dict[str, str]:
+    """
+    Get color-coded sentiment display mapping.
+    Returns:
+        Dictionary mapping sentiment labels to emoji indicators
+    """
+    return {
+        "Positive": "🟢",
+        "Negative": "🔴",
+        "Neutral": "🟡",
+        "Angry": "🔴",
+        "Sad": "🔵",
+        "Happy": "🟢",
+        "Fear": "🟣",
+        "Disgust": "🟠",
+        "Surprise": "🟡",
+    }
+def format_sentiment_result(
+    sentiment: str, confidence: float, input_info: str = "", model_name: str = ""
+) -> str:
+    """
+    Format sentiment analysis result for display.
+    Args:
+        sentiment: Predicted sentiment label
+        confidence: Confidence score
+        input_info: Information about the input
+        model_name: Name of the model used
+    Returns:
+        Formatted result string
+    """
+    colors = get_sentiment_colors()
+    emoji = colors.get(sentiment, "❓")
+    result = f"{emoji} Sentiment: {sentiment}\n"
+    result += f"Confidence: {confidence:.2f}\n"
+    if input_info:
+        result += f"Input: {input_info}\n"
+    if model_name:
+        result += f"Model: {model_name}\n"
+    return result

simple_model_manager.py → src/utils/simple_model_manager.py RENAMED Viewed

@@ -18,7 +18,7 @@ logger = logging.getLogger(__name__)
 class SimpleModelManager:
     """Simple model manager that downloads models from Google Drive using gdown"""
-    def __init__(self, model_dir: str = "models", cache_models: bool = True):
         """
         Initialize simple model manager

 class SimpleModelManager:
     """Simple model manager that downloads models from Google Drive using gdown"""
+    def __init__(self, model_dir: str = "model_weights", cache_models: bool = True):
         """
         Initialize simple model manager

uv.lock CHANGED Viewed

@@ -475,6 +475,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/42/14/42b2651a2f46b022ccd948bca9f2d5af0fd8929c4eec235b8d6d844fbe67/filelock-3.19.1-py3-none-any.whl", hash = "sha256:d38e30481def20772f5baf097c122c3babc4fcdb7e14e57049eb9d88c6dc017d", size = 15988, upload_time = "2025-08-14T16:56:01.633Z" },
 ]
 [[package]]
 name = "gdown"
 version = "5.2.0"
@@ -602,6 +611,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/2c/c6/fa760e12a2483469e2bf5058c5faff664acf66cadb4df2ad6205b016a73d/imageio_ffmpeg-0.6.0-py3-none-win_amd64.whl", hash = "sha256:02fa47c83703c37df6bfe4896aab339013f62bf02c5ebf2dce6da56af04ffc0a", size = 31246824, upload_time = "2025-01-16T21:34:28.6Z" },
 ]
 [[package]]
 name = "jinja2"
 version = "3.1.6"
@@ -728,6 +749,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/9a/73/7d3b2010baa0b5eb1e4dfa9e4385e89b6716be76f2fa21a6c0fe34b68e5a/moviepy-2.2.1-py3-none-any.whl", hash = "sha256:6b56803fec2ac54b557404126ac1160e65448e03798fa282bd23e8fab3795060", size = 129871, upload_time = "2025-05-21T19:31:50.11Z" },
 ]
 [[package]]
 name = "narwhals"
 version = "2.2.0"
@@ -737,6 +767,44 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/dd/54/1ecca75e51d7da8ca53d1ffa8636ef9077a6eaa31f43ade71360b3e6449a/narwhals-2.2.0-py3-none-any.whl", hash = "sha256:2b5e3d61a486fa4328c286b0c8018b3e781a964947ff725d66ba12f6d5ca3d2a", size = 401021, upload_time = "2025-08-25T07:51:56.97Z" },
 ]
 [[package]]
 name = "numpy"
 version = "2.0.2"
@@ -860,6 +928,132 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/37/48/ac2a9584402fb6c0cd5b5d1a91dcf176b15760130dd386bbafdbfe3640bf/numpy-2.2.6-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:d042d24c90c41b54fd506da306759e06e568864df8ec17ccc17e9e884634fd00", size = 12812666, upload_time = "2025-05-17T21:45:31.426Z" },
 ]
 [[package]]
 name = "opencv-python-headless"
 version = "4.12.0.88"
@@ -1446,9 +1640,11 @@ dependencies = [
     { name = "gdown" },
     { name = "moviepy" },
     { name = "opencv-python-headless" },
     { name = "python-dotenv" },
     { name = "speechrecognition" },
     { name = "streamlit-webrtc" },
 ]
 [package.metadata]
@@ -1456,9 +1652,20 @@ requires-dist = [
     { name = "gdown", specifier = ">=5.2.0" },
     { name = "moviepy", specifier = ">=2.2.1" },
     { name = "opencv-python-headless", specifier = ">=4.8.0" },
     { name = "python-dotenv", specifier = ">=1.1.1" },
     { name = "speechrecognition", specifier = ">=3.10.0" },
     { name = "streamlit-webrtc", specifier = ">=0.47.0" },
 ]
 [[package]]
@@ -1570,6 +1777,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/bf/8c/22b4506ee42436968ef02a432325008746bd8467299da374d68c98352eff/streamlit_webrtc-0.63.4-py3-none-any.whl", hash = "sha256:70852a930a94e3ecb86a2f3573048ec4c3cd9e2713c7ad4b77bb502620bd726c", size = 216926, upload_time = "2025-08-06T10:02:05.505Z" },
 ]
 [[package]]
 name = "tenacity"
 version = "9.1.2"
@@ -1588,6 +1807,63 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/44/6f/7120676b6d73228c96e17f1f794d8ab046fc910d781c8d151120c3f1569e/toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b", size = 16588, upload_time = "2020-11-01T01:40:20.672Z" },
 ]
 [[package]]
 name = "tornado"
 version = "6.5.2"
@@ -1619,6 +1895,23 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/d0/30/dc54f88dd4a2b5dc8a0279bdd7270e735851848b762aeb1c1184ed1f6b14/tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2", size = 78540, upload_time = "2024-11-24T20:12:19.698Z" },
 ]
 [[package]]
 name = "typing-extensions"
 version = "4.14.1"
@@ -1663,3 +1956,12 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/db/d9/c495884c6e548fce18a8f40568ff120bc3a4b7b99813081c8ac0c936fa64/watchdog-6.0.0-py3-none-win_amd64.whl", hash = "sha256:cbafb470cf848d93b5d013e2ecb245d4aa1c8fd0504e863ccefa32445359d680", size = 79070, upload_time = "2024-11-01T14:07:10.686Z" },
     { url = "https://files.pythonhosted.org/packages/33/e8/e40370e6d74ddba47f002a32919d91310d6074130fe4e17dabcafc15cbf1/watchdog-6.0.0-py3-none-win_ia64.whl", hash = "sha256:a1914259fa9e1454315171103c6a30961236f508b9b623eae470268bbcc6a22f", size = 79067, upload_time = "2024-11-01T14:07:11.845Z" },
 ]

     { url = "https://files.pythonhosted.org/packages/42/14/42b2651a2f46b022ccd948bca9f2d5af0fd8929c4eec235b8d6d844fbe67/filelock-3.19.1-py3-none-any.whl", hash = "sha256:d38e30481def20772f5baf097c122c3babc4fcdb7e14e57049eb9d88c6dc017d", size = 15988, upload_time = "2025-08-14T16:56:01.633Z" },
 ]
+[[package]]
+name = "fsspec"
+version = "2025.7.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/8b/02/0835e6ab9cfc03916fe3f78c0956cfcdb6ff2669ffa6651065d5ebf7fc98/fsspec-2025.7.0.tar.gz", hash = "sha256:786120687ffa54b8283d942929540d8bc5ccfa820deb555a2b5d0ed2b737bf58", size = 304432, upload_time = "2025-07-15T16:05:21.19Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2f/e0/014d5d9d7a4564cf1c40b5039bc882db69fd881111e03ab3657ac0b218e2/fsspec-2025.7.0-py3-none-any.whl", hash = "sha256:8b012e39f63c7d5f10474de957f3ab793b47b45ae7d39f2fb735f8bbe25c0e21", size = 199597, upload_time = "2025-07-15T16:05:19.529Z" },
+]
 [[package]]
 name = "gdown"
 version = "5.2.0"
     { url = "https://files.pythonhosted.org/packages/2c/c6/fa760e12a2483469e2bf5058c5faff664acf66cadb4df2ad6205b016a73d/imageio_ffmpeg-0.6.0-py3-none-win_amd64.whl", hash = "sha256:02fa47c83703c37df6bfe4896aab339013f62bf02c5ebf2dce6da56af04ffc0a", size = 31246824, upload_time = "2025-01-16T21:34:28.6Z" },
 ]
+[[package]]
+name = "importlib-metadata"
+version = "8.7.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "zipp", marker = "python_full_version < '3.10'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/76/66/650a33bd90f786193e4de4b3ad86ea60b53c89b669a5c7be931fac31cdb0/importlib_metadata-8.7.0.tar.gz", hash = "sha256:d13b81ad223b890aa16c5471f2ac3056cf76c5f10f82d6f9292f0b415f389000", size = 56641, upload_time = "2025-04-27T15:29:01.736Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/20/b0/36bd937216ec521246249be3bf9855081de4c5e06a0c9b4219dbeda50373/importlib_metadata-8.7.0-py3-none-any.whl", hash = "sha256:e5dd1551894c77868a30651cef00984d50e1002d06942a7101d34870c5f02afd", size = 27656, upload_time = "2025-04-27T15:29:00.214Z" },
+]
 [[package]]
 name = "jinja2"
 version = "3.1.6"
     { url = "https://files.pythonhosted.org/packages/9a/73/7d3b2010baa0b5eb1e4dfa9e4385e89b6716be76f2fa21a6c0fe34b68e5a/moviepy-2.2.1-py3-none-any.whl", hash = "sha256:6b56803fec2ac54b557404126ac1160e65448e03798fa282bd23e8fab3795060", size = 129871, upload_time = "2025-05-21T19:31:50.11Z" },
 ]
+[[package]]
+name = "mpmath"
+version = "1.3.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/e0/47/dd32fa426cc72114383ac549964eecb20ecfd886d1e5ccf5340b55b02f57/mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f", size = 508106, upload_time = "2023-03-07T16:47:11.061Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/43/e3/7d92a15f894aa0c9c4b49b8ee9ac9850d6e63b03c9c32c0367a13ae62209/mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c", size = 536198, upload_time = "2023-03-07T16:47:09.197Z" },
+]
 [[package]]
 name = "narwhals"
 version = "2.2.0"
     { url = "https://files.pythonhosted.org/packages/dd/54/1ecca75e51d7da8ca53d1ffa8636ef9077a6eaa31f43ade71360b3e6449a/narwhals-2.2.0-py3-none-any.whl", hash = "sha256:2b5e3d61a486fa4328c286b0c8018b3e781a964947ff725d66ba12f6d5ca3d2a", size = 401021, upload_time = "2025-08-25T07:51:56.97Z" },
 ]
+[[package]]
+name = "networkx"
+version = "3.2.1"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version < '3.10'",
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c4/80/a84676339aaae2f1cfdf9f418701dd634aef9cc76f708ef55c36ff39c3ca/networkx-3.2.1.tar.gz", hash = "sha256:9f1bb5cf3409bf324e0a722c20bdb4c20ee39bf1c30ce8ae499c8502b0b5e0c6", size = 2073928, upload_time = "2023-10-28T08:41:39.364Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d5/f0/8fbc882ca80cf077f1b246c0e3c3465f7f415439bdea6b899f6b19f61f70/networkx-3.2.1-py3-none-any.whl", hash = "sha256:f18c69adc97877c42332c170849c96cefa91881c99a7cb3e95b7c659ebdc1ec2", size = 1647772, upload_time = "2023-10-28T08:41:36.945Z" },
+]
+[[package]]
+name = "networkx"
+version = "3.4.2"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version == '3.10.*'",
+]
+sdist = { url = "https://files.pythonhosted.org/packages/fd/1d/06475e1cd5264c0b870ea2cc6fdb3e37177c1e565c43f56ff17a10e3937f/networkx-3.4.2.tar.gz", hash = "sha256:307c3669428c5362aab27c8a1260aa8f47c4e91d3891f48be0141738d8d053e1", size = 2151368, upload_time = "2024-10-21T12:39:38.695Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b9/54/dd730b32ea14ea797530a4479b2ed46a6fb250f682a9cfb997e968bf0261/networkx-3.4.2-py3-none-any.whl", hash = "sha256:df5d4365b724cf81b8c6a7312509d0c22386097011ad1abe274afd5e9d3bbc5f", size = 1723263, upload_time = "2024-10-21T12:39:36.247Z" },
+]
+[[package]]
+name = "networkx"
+version = "3.5"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version >= '3.13'",
+    "python_full_version == '3.12.*'",
+    "python_full_version == '3.11.*'",
+]
+sdist = { url = "https://files.pythonhosted.org/packages/6c/4f/ccdb8ad3a38e583f214547fd2f7ff1fc160c43a75af88e6aec213404b96a/networkx-3.5.tar.gz", hash = "sha256:d4c6f9cf81f52d69230866796b82afbccdec3db7ae4fbd1b65ea750feed50037", size = 2471065, upload_time = "2025-05-29T11:35:07.804Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/eb/8d/776adee7bbf76365fdd7f2552710282c79a4ead5d2a46408c9043a2b70ba/networkx-3.5-py3-none-any.whl", hash = "sha256:0030d386a9a06dee3565298b4a734b68589749a544acbb6c412dc9e2489ec6ec", size = 2034406, upload_time = "2025-05-29T11:35:04.961Z" },
+]
 [[package]]
 name = "numpy"
 version = "2.0.2"
     { url = "https://files.pythonhosted.org/packages/37/48/ac2a9584402fb6c0cd5b5d1a91dcf176b15760130dd386bbafdbfe3640bf/numpy-2.2.6-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:d042d24c90c41b54fd506da306759e06e568864df8ec17ccc17e9e884634fd00", size = 12812666, upload_time = "2025-05-17T21:45:31.426Z" },
 ]
+[[package]]
+name = "nvidia-cublas-cu12"
+version = "12.8.4.1"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/dc/61/e24b560ab2e2eaeb3c839129175fb330dfcfc29e5203196e5541a4c44682/nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:8ac4e771d5a348c551b2a426eda6193c19aa630236b418086020df5ba9667142", size = 594346921, upload_time = "2025-03-07T01:44:31.254Z" },
+]
+[[package]]
+name = "nvidia-cuda-cupti-cu12"
+version = "12.8.90"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f8/02/2adcaa145158bf1a8295d83591d22e4103dbfd821bcaf6f3f53151ca4ffa/nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ea0cb07ebda26bb9b29ba82cda34849e73c166c18162d3913575b0c9db9a6182", size = 10248621, upload_time = "2025-03-07T01:40:21.213Z" },
+]
+[[package]]
+name = "nvidia-cuda-nvrtc-cu12"
+version = "12.8.93"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/05/6b/32f747947df2da6994e999492ab306a903659555dddc0fbdeb9d71f75e52/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:a7756528852ef889772a84c6cd89d41dfa74667e24cca16bb31f8f061e3e9994", size = 88040029, upload_time = "2025-03-07T01:42:13.562Z" },
+]
+[[package]]
+name = "nvidia-cuda-runtime-cu12"
+version = "12.8.90"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/0d/9b/a997b638fcd068ad6e4d53b8551a7d30fe8b404d6f1804abf1df69838932/nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:adade8dcbd0edf427b7204d480d6066d33902cab2a4707dcfc48a2d0fd44ab90", size = 954765, upload_time = "2025-03-07T01:40:01.615Z" },
+]
+[[package]]
+name = "nvidia-cudnn-cu12"
+version = "9.10.2.21"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "nvidia-cublas-cu12" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ba/51/e123d997aa098c61d029f76663dedbfb9bc8dcf8c60cbd6adbe42f76d049/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:949452be657fa16687d0930933f032835951ef0892b37d2d53824d1a84dc97a8", size = 706758467, upload_time = "2025-06-06T21:54:08.597Z" },
+]
+[[package]]
+name = "nvidia-cufft-cu12"
+version = "11.3.3.83"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "nvidia-nvjitlink-cu12" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/1f/13/ee4e00f30e676b66ae65b4f08cb5bcbb8392c03f54f2d5413ea99a5d1c80/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d2dd21ec0b88cf61b62e6b43564355e5222e4a3fb394cac0db101f2dd0d4f74", size = 193118695, upload_time = "2025-03-07T01:45:27.821Z" },
+]
+[[package]]
+name = "nvidia-cufile-cu12"
+version = "1.13.1.3"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/bb/fe/1bcba1dfbfb8d01be8d93f07bfc502c93fa23afa6fd5ab3fc7c1df71038a/nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1d069003be650e131b21c932ec3d8969c1715379251f8d23a1860554b1cb24fc", size = 1197834, upload_time = "2025-03-07T01:45:50.723Z" },
+]
+[[package]]
+name = "nvidia-curand-cu12"
+version = "10.3.9.90"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/fb/aa/6584b56dc84ebe9cf93226a5cde4d99080c8e90ab40f0c27bda7a0f29aa1/nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:b32331d4f4df5d6eefa0554c565b626c7216f87a06a4f56fab27c3b68a830ec9", size = 63619976, upload_time = "2025-03-07T01:46:23.323Z" },
+]
+[[package]]
+name = "nvidia-cusolver-cu12"
+version = "11.7.3.90"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "nvidia-cublas-cu12" },
+    { name = "nvidia-cusparse-cu12" },
+    { name = "nvidia-nvjitlink-cu12" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/85/48/9a13d2975803e8cf2777d5ed57b87a0b6ca2cc795f9a4f59796a910bfb80/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:4376c11ad263152bd50ea295c05370360776f8c3427b30991df774f9fb26c450", size = 267506905, upload_time = "2025-03-07T01:47:16.273Z" },
+]
+[[package]]
+name = "nvidia-cusparse-cu12"
+version = "12.5.8.93"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "nvidia-nvjitlink-cu12" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c2/f5/e1854cb2f2bcd4280c44736c93550cc300ff4b8c95ebe370d0aa7d2b473d/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ec05d76bbbd8b61b06a80e1eaf8cf4959c3d4ce8e711b65ebd0443bb0ebb13b", size = 288216466, upload_time = "2025-03-07T01:48:13.779Z" },
+]
+[[package]]
+name = "nvidia-cusparselt-cu12"
+version = "0.7.1"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/56/79/12978b96bd44274fe38b5dde5cfb660b1d114f70a65ef962bcbbed99b549/nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f1bb701d6b930d5a7cea44c19ceb973311500847f81b634d802b7b539dc55623", size = 287193691, upload_time = "2025-02-26T00:15:44.104Z" },
+]
+[[package]]
+name = "nvidia-nccl-cu12"
+version = "2.27.3"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/5c/5b/4e4fff7bad39adf89f735f2bc87248c81db71205b62bcc0d5ca5b606b3c3/nvidia_nccl_cu12-2.27.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:adf27ccf4238253e0b826bce3ff5fa532d65fc42322c8bfdfaf28024c0fbe039", size = 322364134, upload_time = "2025-06-03T21:58:04.013Z" },
+]
+[[package]]
+name = "nvidia-nvjitlink-cu12"
+version = "12.8.93"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f6/74/86a07f1d0f42998ca31312f998bd3b9a7eff7f52378f4f270c8679c77fb9/nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:81ff63371a7ebd6e6451970684f916be2eab07321b73c9d244dc2b4da7f73b88", size = 39254836, upload_time = "2025-03-07T01:49:55.661Z" },
+]
+[[package]]
+name = "nvidia-nvtx-cu12"
+version = "12.8.90"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a2/eb/86626c1bbc2edb86323022371c39aa48df6fd8b0a1647bc274577f72e90b/nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5b17e2001cc0d751a5bc2c6ec6d26ad95913324a4adb86788c944f8ce9ba441f", size = 89954, upload_time = "2025-03-07T01:42:44.131Z" },
+]
 [[package]]
 name = "opencv-python-headless"
 version = "4.12.0.88"
     { name = "gdown" },
     { name = "moviepy" },
     { name = "opencv-python-headless" },
+    { name = "pillow" },
     { name = "python-dotenv" },
     { name = "speechrecognition" },
     { name = "streamlit-webrtc" },
+    { name = "torch" },
 ]
 [package.metadata]
     { name = "gdown", specifier = ">=5.2.0" },
     { name = "moviepy", specifier = ">=2.2.1" },
     { name = "opencv-python-headless", specifier = ">=4.8.0" },
+    { name = "pillow", specifier = ">=11.3.0" },
     { name = "python-dotenv", specifier = ">=1.1.1" },
     { name = "speechrecognition", specifier = ">=3.10.0" },
     { name = "streamlit-webrtc", specifier = ">=0.47.0" },
+    { name = "torch", specifier = ">=2.8.0" },
+]
+[[package]]
+name = "setuptools"
+version = "80.9.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/18/5d/3bf57dcd21979b887f014ea83c24ae194cfcd12b9e0fda66b957c69d1fca/setuptools-80.9.0.tar.gz", hash = "sha256:f36b47402ecde768dbfafc46e8e4207b4360c654f1f3bb84475f0a28628fb19c", size = 1319958, upload_time = "2025-05-27T00:56:51.443Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a3/dc/17031897dae0efacfea57dfd3a82fdd2a2aeb58e0ff71b77b87e44edc772/setuptools-80.9.0-py3-none-any.whl", hash = "sha256:062d34222ad13e0cc312a4c02d73f059e86a4acbfbdea8f8f76b28c99f306922", size = 1201486, upload_time = "2025-05-27T00:56:49.664Z" },
 ]
 [[package]]
     { url = "https://files.pythonhosted.org/packages/bf/8c/22b4506ee42436968ef02a432325008746bd8467299da374d68c98352eff/streamlit_webrtc-0.63.4-py3-none-any.whl", hash = "sha256:70852a930a94e3ecb86a2f3573048ec4c3cd9e2713c7ad4b77bb502620bd726c", size = 216926, upload_time = "2025-08-06T10:02:05.505Z" },
 ]
+[[package]]
+name = "sympy"
+version = "1.14.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "mpmath" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/83/d3/803453b36afefb7c2bb238361cd4ae6125a569b4db67cd9e79846ba2d68c/sympy-1.14.0.tar.gz", hash = "sha256:d3d3fe8df1e5a0b42f0e7bdf50541697dbe7d23746e894990c030e2b05e72517", size = 7793921, upload_time = "2025-04-27T18:05:01.611Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a2/09/77d55d46fd61b4a135c444fc97158ef34a095e5681d0a6c10b75bf356191/sympy-1.14.0-py3-none-any.whl", hash = "sha256:e091cc3e99d2141a0ba2847328f5479b05d94a6635cb96148ccb3f34671bd8f5", size = 6299353, upload_time = "2025-04-27T18:04:59.103Z" },
+]
 [[package]]
 name = "tenacity"
 version = "9.1.2"
     { url = "https://files.pythonhosted.org/packages/44/6f/7120676b6d73228c96e17f1f794d8ab046fc910d781c8d151120c3f1569e/toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b", size = 16588, upload_time = "2020-11-01T01:40:20.672Z" },
 ]
+[[package]]
+name = "torch"
+version = "2.8.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "filelock" },
+    { name = "fsspec" },
+    { name = "jinja2" },
+    { name = "networkx", version = "3.2.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
+    { name = "networkx", version = "3.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" },
+    { name = "networkx", version = "3.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
+    { name = "nvidia-cublas-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-cuda-cupti-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-cuda-nvrtc-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-cuda-runtime-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-cudnn-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-cufft-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-cufile-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-curand-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-cusolver-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-cusparse-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-cusparselt-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-nccl-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-nvjitlink-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-nvtx-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "setuptools", marker = "python_full_version >= '3.12'" },
+    { name = "sympy" },
+    { name = "triton", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "typing-extensions" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/63/28/110f7274254f1b8476c561dada127173f994afa2b1ffc044efb773c15650/torch-2.8.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:0be92c08b44009d4131d1ff7a8060d10bafdb7ddcb7359ef8d8c5169007ea905", size = 102052793, upload_time = "2025-08-06T14:53:15.852Z" },
+    { url = "https://files.pythonhosted.org/packages/70/1c/58da560016f81c339ae14ab16c98153d51c941544ae568da3cb5b1ceb572/torch-2.8.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:89aa9ee820bb39d4d72b794345cccef106b574508dd17dbec457949678c76011", size = 888025420, upload_time = "2025-08-06T14:54:18.014Z" },
+    { url = "https://files.pythonhosted.org/packages/70/87/f69752d0dd4ba8218c390f0438130c166fa264a33b7025adb5014b92192c/torch-2.8.0-cp310-cp310-win_amd64.whl", hash = "sha256:e8e5bf982e87e2b59d932769938b698858c64cc53753894be25629bdf5cf2f46", size = 241363614, upload_time = "2025-08-06T14:53:31.496Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/d6/e6d4c57e61c2b2175d3aafbfb779926a2cfd7c32eeda7c543925dceec923/torch-2.8.0-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:a3f16a58a9a800f589b26d47ee15aca3acf065546137fc2af039876135f4c760", size = 73611154, upload_time = "2025-08-06T14:53:10.919Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/c4/3e7a3887eba14e815e614db70b3b529112d1513d9dae6f4d43e373360b7f/torch-2.8.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:220a06fd7af8b653c35d359dfe1aaf32f65aa85befa342629f716acb134b9710", size = 102073391, upload_time = "2025-08-06T14:53:20.937Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/63/4fdc45a0304536e75a5e1b1bbfb1b56dd0e2743c48ee83ca729f7ce44162/torch-2.8.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:c12fa219f51a933d5f80eeb3a7a5d0cbe9168c0a14bbb4055f1979431660879b", size = 888063640, upload_time = "2025-08-06T14:55:05.325Z" },
+    { url = "https://files.pythonhosted.org/packages/84/57/2f64161769610cf6b1c5ed782bd8a780e18a3c9d48931319f2887fa9d0b1/torch-2.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:8c7ef765e27551b2fbfc0f41bcf270e1292d9bf79f8e0724848b1682be6e80aa", size = 241366752, upload_time = "2025-08-06T14:53:38.692Z" },
+    { url = "https://files.pythonhosted.org/packages/a4/5e/05a5c46085d9b97e928f3f037081d3d2b87fb4b4195030fc099aaec5effc/torch-2.8.0-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:5ae0524688fb6707c57a530c2325e13bb0090b745ba7b4a2cd6a3ce262572916", size = 73621174, upload_time = "2025-08-06T14:53:25.44Z" },
+    { url = "https://files.pythonhosted.org/packages/49/0c/2fd4df0d83a495bb5e54dca4474c4ec5f9c62db185421563deeb5dabf609/torch-2.8.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:e2fab4153768d433f8ed9279c8133a114a034a61e77a3a104dcdf54388838705", size = 101906089, upload_time = "2025-08-06T14:53:52.631Z" },
+    { url = "https://files.pythonhosted.org/packages/99/a8/6acf48d48838fb8fe480597d98a0668c2beb02ee4755cc136de92a0a956f/torch-2.8.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:b2aca0939fb7e4d842561febbd4ffda67a8e958ff725c1c27e244e85e982173c", size = 887913624, upload_time = "2025-08-06T14:56:44.33Z" },
+    { url = "https://files.pythonhosted.org/packages/af/8a/5c87f08e3abd825c7dfecef5a0f1d9aa5df5dd0e3fd1fa2f490a8e512402/torch-2.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:2f4ac52f0130275d7517b03a33d2493bab3693c83dcfadf4f81688ea82147d2e", size = 241326087, upload_time = "2025-08-06T14:53:46.503Z" },
+    { url = "https://files.pythonhosted.org/packages/be/66/5c9a321b325aaecb92d4d1855421e3a055abd77903b7dab6575ca07796db/torch-2.8.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:619c2869db3ada2c0105487ba21b5008defcc472d23f8b80ed91ac4a380283b0", size = 73630478, upload_time = "2025-08-06T14:53:57.144Z" },
+    { url = "https://files.pythonhosted.org/packages/10/4e/469ced5a0603245d6a19a556e9053300033f9c5baccf43a3d25ba73e189e/torch-2.8.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:2b2f96814e0345f5a5aed9bf9734efa913678ed19caf6dc2cddb7930672d6128", size = 101936856, upload_time = "2025-08-06T14:54:01.526Z" },
+    { url = "https://files.pythonhosted.org/packages/16/82/3948e54c01b2109238357c6f86242e6ecbf0c63a1af46906772902f82057/torch-2.8.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:65616ca8ec6f43245e1f5f296603e33923f4c30f93d65e103d9e50c25b35150b", size = 887922844, upload_time = "2025-08-06T14:55:50.78Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/54/941ea0a860f2717d86a811adf0c2cd01b3983bdd460d0803053c4e0b8649/torch-2.8.0-cp313-cp313-win_amd64.whl", hash = "sha256:659df54119ae03e83a800addc125856effda88b016dfc54d9f65215c3975be16", size = 241330968, upload_time = "2025-08-06T14:54:45.293Z" },
+    { url = "https://files.pythonhosted.org/packages/de/69/8b7b13bba430f5e21d77708b616f767683629fc4f8037564a177d20f90ed/torch-2.8.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:1a62a1ec4b0498930e2543535cf70b1bef8c777713de7ceb84cd79115f553767", size = 73915128, upload_time = "2025-08-06T14:54:34.769Z" },
+    { url = "https://files.pythonhosted.org/packages/15/0e/8a800e093b7f7430dbaefa80075aee9158ec22e4c4fc3c1a66e4fb96cb4f/torch-2.8.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:83c13411a26fac3d101fe8035a6b0476ae606deb8688e904e796a3534c197def", size = 102020139, upload_time = "2025-08-06T14:54:39.047Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/15/5e488ca0bc6162c86a33b58642bc577c84ded17c7b72d97e49b5833e2d73/torch-2.8.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:8f0a9d617a66509ded240add3754e462430a6c1fc5589f86c17b433dd808f97a", size = 887990692, upload_time = "2025-08-06T14:56:18.286Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/a8/6a04e4b54472fc5dba7ca2341ab219e529f3c07b6941059fbf18dccac31f/torch-2.8.0-cp313-cp313t-win_amd64.whl", hash = "sha256:a7242b86f42be98ac674b88a4988643b9bc6145437ec8f048fea23f72feb5eca", size = 241603453, upload_time = "2025-08-06T14:55:22.945Z" },
+    { url = "https://files.pythonhosted.org/packages/04/6e/650bb7f28f771af0cb791b02348db8b7f5f64f40f6829ee82aa6ce99aabe/torch-2.8.0-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:7b677e17f5a3e69fdef7eb3b9da72622f8d322692930297e4ccb52fefc6c8211", size = 73632395, upload_time = "2025-08-06T14:55:28.645Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/b0/a321f27270049baa12f5c3fb0d6ceea005634787e3af9a8d75dce8306b0a/torch-2.8.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:da6afa31c13b669d4ba49d8a2169f0db2c3ec6bec4af898aa714f401d4c38904", size = 102059214, upload_time = "2025-08-06T14:55:33.433Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/dd/1630cb51b10d3d2e97db95e5a84c32def81fc26b005bce6fc880b0e6db81/torch-2.8.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:06fcee8000e5c62a9f3e52a688b9c5abb7c6228d0e56e3452983416025c41381", size = 888024302, upload_time = "2025-08-06T14:57:28.23Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/dc/1f1f621afe15e3c496e1e8f94f8903f75f87e7d642d5a985e92210cc208d/torch-2.8.0-cp39-cp39-win_amd64.whl", hash = "sha256:5128fe752a355d9308e56af1ad28b15266fe2da5948660fad44de9e3a9e36e8c", size = 241249338, upload_time = "2025-08-06T14:57:05.669Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/95/ae26263aceb3d57b821179f827d0e321373ed49423e603dd5906ab14a730/torch-2.8.0-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:e9f071f5b52a9f6970dc8a919694b27a91ae9dc08898b2b988abbef5eddfd1ae", size = 73610795, upload_time = "2025-08-06T14:57:11.513Z" },
+]
 [[package]]
 name = "tornado"
 version = "6.5.2"
     { url = "https://files.pythonhosted.org/packages/d0/30/dc54f88dd4a2b5dc8a0279bdd7270e735851848b762aeb1c1184ed1f6b14/tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2", size = 78540, upload_time = "2024-11-24T20:12:19.698Z" },
 ]
+[[package]]
+name = "triton"
+version = "3.4.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "importlib-metadata", marker = "python_full_version < '3.10'" },
+    { name = "setuptools" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/62/ee/0ee5f64a87eeda19bbad9bc54ae5ca5b98186ed00055281fd40fb4beb10e/triton-3.4.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7ff2785de9bc02f500e085420273bb5cc9c9bb767584a4aa28d6e360cec70128", size = 155430069, upload_time = "2025-07-30T19:58:21.715Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/39/43325b3b651d50187e591eefa22e236b2981afcebaefd4f2fc0ea99df191/triton-3.4.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7b70f5e6a41e52e48cfc087436c8a28c17ff98db369447bcaff3b887a3ab4467", size = 155531138, upload_time = "2025-07-30T19:58:29.908Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/66/b1eb52839f563623d185f0927eb3530ee4d5ffe9d377cdaf5346b306689e/triton-3.4.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:31c1d84a5c0ec2c0f8e8a072d7fd150cab84a9c239eaddc6706c081bfae4eb04", size = 155560068, upload_time = "2025-07-30T19:58:37.081Z" },
+    { url = "https://files.pythonhosted.org/packages/30/7b/0a685684ed5322d2af0bddefed7906674f67974aa88b0fae6e82e3b766f6/triton-3.4.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:00be2964616f4c619193cb0d1b29a99bd4b001d7dc333816073f92cf2a8ccdeb", size = 155569223, upload_time = "2025-07-30T19:58:44.017Z" },
+    { url = "https://files.pythonhosted.org/packages/20/63/8cb444ad5cdb25d999b7d647abac25af0ee37d292afc009940c05b82dda0/triton-3.4.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7936b18a3499ed62059414d7df563e6c163c5e16c3773678a3ee3d417865035d", size = 155659780, upload_time = "2025-07-30T19:58:51.171Z" },
+    { url = "https://files.pythonhosted.org/packages/12/34/1251beb5a3cb93f3950ebe68732752014646003ef6eb11eb5f1a37ca78cd/triton-3.4.0-cp39-cp39-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:98e5c1442eaeabae2e2452ae765801bd53cd4ce873cab0d1bdd59a32ab2d9397", size = 155430799, upload_time = "2025-07-30T19:58:57.664Z" },
+]
 [[package]]
 name = "typing-extensions"
 version = "4.14.1"
     { url = "https://files.pythonhosted.org/packages/db/d9/c495884c6e548fce18a8f40568ff120bc3a4b7b99813081c8ac0c936fa64/watchdog-6.0.0-py3-none-win_amd64.whl", hash = "sha256:cbafb470cf848d93b5d013e2ecb245d4aa1c8fd0504e863ccefa32445359d680", size = 79070, upload_time = "2024-11-01T14:07:10.686Z" },
     { url = "https://files.pythonhosted.org/packages/33/e8/e40370e6d74ddba47f002a32919d91310d6074130fe4e17dabcafc15cbf1/watchdog-6.0.0-py3-none-win_ia64.whl", hash = "sha256:a1914259fa9e1454315171103c6a30961236f508b9b623eae470268bbcc6a22f", size = 79067, upload_time = "2024-11-01T14:07:11.845Z" },
 ]
+[[package]]
+name = "zipp"
+version = "3.23.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/e3/02/0f2892c661036d50ede074e376733dca2ae7c6eb617489437771209d4180/zipp-3.23.0.tar.gz", hash = "sha256:a07157588a12518c9d4034df3fbbee09c814741a33ff63c05fa29d26a2404166", size = 25547, upload_time = "2025-06-08T17:06:39.4Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2e/54/647ade08bf0db230bfea292f893923872fd20be6ac6f53b2b936ba839d75/zipp-3.23.0-py3-none-any.whl", hash = "sha256:071652d6115ed432f5ce1d34c336c0adfd6a884660d1e9712a256d3d3bd4b14e", size = 10276, upload_time = "2025-06-08T17:06:38.034Z" },
+]