Faham
commited on
Commit
·
b1acf7e
1
Parent(s):
e2e4e08
UPDATE: codebase refactored to be more readble and optimized
Browse files- .gitignore +2 -3
- app.py +294 -973
- pyproject.toml +2 -0
- src/__init__.py +10 -0
- src/config/__init__.py +5 -0
- src/config/settings.py +153 -0
- src/models/__init__.py +8 -0
- src/models/audio_model.py +154 -0
- src/models/fused_model.py +176 -0
- src/models/text_model.py +81 -0
- src/models/vision_model.py +157 -0
- src/ui/__init__.py +7 -0
- src/ui/styles.py +97 -0
- src/utils/__init__.py +7 -0
- src/utils/file_handling.py +189 -0
- src/utils/preprocessing.py +467 -0
- src/utils/sentiment_mapping.py +71 -0
- simple_model_manager.py → src/utils/simple_model_manager.py +1 -1
- uv.lock +302 -0
.gitignore
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
# Model files
|
| 2 |
*.pth
|
| 3 |
-
|
| 4 |
*.pt
|
| 5 |
*.pkl
|
| 6 |
*.h5
|
|
@@ -40,7 +40,6 @@ venv/
|
|
| 40 |
env/
|
| 41 |
ENV/
|
| 42 |
.venv/
|
| 43 |
-
.venv2/
|
| 44 |
.env/
|
| 45 |
|
| 46 |
# IDE
|
|
@@ -63,4 +62,4 @@ logs/
|
|
| 63 |
.cache/
|
| 64 |
.pytest_cache/
|
| 65 |
|
| 66 |
-
|
|
|
|
| 1 |
# Model files
|
| 2 |
*.pth
|
| 3 |
+
model_weights/*.pth
|
| 4 |
*.pt
|
| 5 |
*.pkl
|
| 6 |
*.h5
|
|
|
|
| 40 |
env/
|
| 41 |
ENV/
|
| 42 |
.venv/
|
|
|
|
| 43 |
.env/
|
| 44 |
|
| 45 |
# IDE
|
|
|
|
| 62 |
.cache/
|
| 63 |
.pytest_cache/
|
| 64 |
|
| 65 |
+
model_weights/
|
app.py
CHANGED
|
@@ -1,781 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
import pandas as pd
|
| 3 |
from PIL import Image
|
| 4 |
-
import
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
from
|
| 8 |
-
|
| 9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
-
#
|
| 12 |
-
|
|
|
|
| 13 |
|
| 14 |
# Page configuration
|
| 15 |
st.set_page_config(
|
| 16 |
-
page_title=
|
| 17 |
-
page_icon=
|
| 18 |
-
layout=
|
| 19 |
initial_sidebar_state="expanded",
|
| 20 |
)
|
| 21 |
|
| 22 |
-
#
|
| 23 |
-
st.markdown(
|
| 24 |
-
"""
|
| 25 |
-
<style>
|
| 26 |
-
.main-header {
|
| 27 |
-
font-size: 2.5rem;
|
| 28 |
-
font-weight: bold;
|
| 29 |
-
color: #1f77b4;
|
| 30 |
-
text-align: center;
|
| 31 |
-
margin-bottom: 2rem;
|
| 32 |
-
}
|
| 33 |
-
.model-card {
|
| 34 |
-
background-color: #f0f2f6;
|
| 35 |
-
padding: 1.5rem;
|
| 36 |
-
border-radius: 10px;
|
| 37 |
-
margin: 1rem 0;
|
| 38 |
-
border-left: 4px solid #1f77b4;
|
| 39 |
-
}
|
| 40 |
-
.result-box {
|
| 41 |
-
background-color: #e8f4fd;
|
| 42 |
-
padding: 1rem;
|
| 43 |
-
border-radius: 8px;
|
| 44 |
-
border: 1px solid #1f77b4;
|
| 45 |
-
margin: 1rem 0;
|
| 46 |
-
}
|
| 47 |
-
.upload-section {
|
| 48 |
-
background-color: #f8f9fa;
|
| 49 |
-
padding: 1.5rem;
|
| 50 |
-
border-radius: 10px;
|
| 51 |
-
border: 2px dashed #dee2e6;
|
| 52 |
-
text-align: center;
|
| 53 |
-
margin: 1rem 0;
|
| 54 |
-
}
|
| 55 |
-
</style>
|
| 56 |
-
""",
|
| 57 |
-
unsafe_allow_html=True,
|
| 58 |
-
)
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
# Initialize the Google Drive model manager
|
| 62 |
-
@st.cache_resource
|
| 63 |
-
def get_model_manager():
|
| 64 |
-
"""Get the Google Drive model manager instance"""
|
| 65 |
-
try:
|
| 66 |
-
manager = SimpleModelManager()
|
| 67 |
-
return manager
|
| 68 |
-
except Exception as e:
|
| 69 |
-
st.error(f"Failed to initialize model manager: {e}")
|
| 70 |
-
return None
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
# Global variables for models
|
| 74 |
-
@st.cache_resource
|
| 75 |
-
def load_vision_model():
|
| 76 |
-
"""Load the pre-trained ResNet-50 vision sentiment model from Google Drive"""
|
| 77 |
-
try:
|
| 78 |
-
manager = get_model_manager()
|
| 79 |
-
if manager is None:
|
| 80 |
-
st.error("Model manager not available")
|
| 81 |
-
return None, None, None
|
| 82 |
-
|
| 83 |
-
# Load the model using the Google Drive manager
|
| 84 |
-
model, device, num_classes = manager.load_vision_model()
|
| 85 |
-
|
| 86 |
-
if model is None:
|
| 87 |
-
st.error("Failed to load vision model from Google Drive")
|
| 88 |
-
return None, None, None
|
| 89 |
-
|
| 90 |
-
st.success(f"Vision model loaded successfully with {num_classes} classes!")
|
| 91 |
-
return model, device, num_classes
|
| 92 |
-
except Exception as e:
|
| 93 |
-
st.error(f"Error loading vision model: {str(e)}")
|
| 94 |
-
return None, None, None
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
@st.cache_data
|
| 98 |
-
def get_vision_transforms():
|
| 99 |
-
"""Get the image transforms used during FER2013 training"""
|
| 100 |
-
return transforms.Compose(
|
| 101 |
-
[
|
| 102 |
-
transforms.Resize(224), # Match training: transforms.Resize(224)
|
| 103 |
-
transforms.CenterCrop(224), # Match training: transforms.CenterCrop(224)
|
| 104 |
-
transforms.ToTensor(),
|
| 105 |
-
transforms.Normalize(
|
| 106 |
-
mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
|
| 107 |
-
), # ImageNet normalization
|
| 108 |
-
]
|
| 109 |
-
)
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
def detect_and_preprocess_face(image, crop_tightness=0.05):
|
| 113 |
-
"""
|
| 114 |
-
Detect face in image, crop to face region, convert to grayscale, and resize to 224x224
|
| 115 |
-
to match FER2013 dataset format (grayscale converted to 3-channel RGB)
|
| 116 |
-
|
| 117 |
-
Args:
|
| 118 |
-
image: Input image (PIL Image or numpy array)
|
| 119 |
-
crop_tightness: Padding around face (0.0 = no padding, 0.3 = 30% padding)
|
| 120 |
-
"""
|
| 121 |
-
try:
|
| 122 |
-
import cv2
|
| 123 |
-
import numpy as np
|
| 124 |
-
|
| 125 |
-
# Convert PIL image to OpenCV format
|
| 126 |
-
if isinstance(image, Image.Image):
|
| 127 |
-
# Convert PIL to numpy array
|
| 128 |
-
img_array = np.array(image)
|
| 129 |
-
# Convert RGB to BGR for OpenCV
|
| 130 |
-
if len(img_array.shape) == 3:
|
| 131 |
-
img_array = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR)
|
| 132 |
-
else:
|
| 133 |
-
img_array = image
|
| 134 |
-
|
| 135 |
-
# Load face detection cascade
|
| 136 |
-
face_cascade = cv2.CascadeClassifier(
|
| 137 |
-
cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
|
| 138 |
-
)
|
| 139 |
-
|
| 140 |
-
# Convert to grayscale for face detection (detection works better on grayscale)
|
| 141 |
-
gray = cv2.cvtColor(img_array, cv2.COLOR_BGR2GRAY)
|
| 142 |
-
|
| 143 |
-
# Detect faces
|
| 144 |
-
faces = face_cascade.detectMultiScale(
|
| 145 |
-
gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30)
|
| 146 |
-
)
|
| 147 |
-
|
| 148 |
-
if len(faces) == 0:
|
| 149 |
-
st.warning("No face detected in the image. Using center crop instead.")
|
| 150 |
-
# Fallback: center crop and resize
|
| 151 |
-
if isinstance(image, Image.Image):
|
| 152 |
-
# Convert to RGB first
|
| 153 |
-
rgb_pil = image.convert("RGB")
|
| 154 |
-
# Center crop to square
|
| 155 |
-
width, height = rgb_pil.size
|
| 156 |
-
size = min(width, height)
|
| 157 |
-
left = (width - size) // 2
|
| 158 |
-
top = (height - size) // 2
|
| 159 |
-
right = left + size
|
| 160 |
-
bottom = top + size
|
| 161 |
-
cropped = rgb_pil.crop((left, top, right, bottom))
|
| 162 |
-
# Resize to 224x224 (matching FER2013 training: transforms.Resize(224))
|
| 163 |
-
resized = cropped.resize((224, 224), Image.Resampling.LANCZOS)
|
| 164 |
-
|
| 165 |
-
# Convert to grayscale and then to 3-channel RGB
|
| 166 |
-
gray_pil = resized.convert("L")
|
| 167 |
-
# Convert back to RGB (this replicates grayscale values to all 3 channels)
|
| 168 |
-
gray_rgb_pil = gray_pil.convert("RGB")
|
| 169 |
-
return gray_rgb_pil
|
| 170 |
-
else:
|
| 171 |
-
return None
|
| 172 |
-
|
| 173 |
-
# Get the largest face (assuming it's the main subject)
|
| 174 |
-
x, y, w, h = max(faces, key=lambda rect: rect[2] * rect[3])
|
| 175 |
-
|
| 176 |
-
# Add padding around the face based on user preference
|
| 177 |
-
padding_x = int(w * crop_tightness)
|
| 178 |
-
padding_y = int(h * crop_tightness)
|
| 179 |
-
|
| 180 |
-
# Ensure we don't go out of bounds
|
| 181 |
-
x1 = max(0, x - padding_x)
|
| 182 |
-
y1 = max(0, y - padding_y)
|
| 183 |
-
x2 = min(img_array.shape[1], x + w + padding_x)
|
| 184 |
-
y2 = min(img_array.shape[0], y + h + padding_y)
|
| 185 |
-
|
| 186 |
-
# Crop to face region
|
| 187 |
-
face_crop = img_array[y1:y2, x1:x2]
|
| 188 |
-
|
| 189 |
-
# Convert BGR to RGB first
|
| 190 |
-
face_crop_rgb = cv2.cvtColor(face_crop, cv2.COLOR_BGR2RGB)
|
| 191 |
-
|
| 192 |
-
# Convert to grayscale
|
| 193 |
-
face_gray = cv2.cvtColor(face_crop_rgb, cv2.COLOR_RGB2GRAY)
|
| 194 |
-
|
| 195 |
-
# Resize to 224x224 (matching FER2013 training: transforms.Resize(224))
|
| 196 |
-
face_resized = cv2.resize(face_gray, (224, 224), interpolation=cv2.INTER_AREA)
|
| 197 |
-
|
| 198 |
-
# Convert grayscale to 3-channel RGB (replicate grayscale values)
|
| 199 |
-
face_rgb_3channel = cv2.cvtColor(face_resized, cv2.COLOR_GRAY2RGB)
|
| 200 |
-
|
| 201 |
-
# Convert back to PIL Image
|
| 202 |
-
face_pil = Image.fromarray(face_rgb_3channel)
|
| 203 |
-
|
| 204 |
-
return face_pil
|
| 205 |
-
|
| 206 |
-
except ImportError:
|
| 207 |
-
st.error(
|
| 208 |
-
"OpenCV not installed. Please install it with: pip install opencv-python"
|
| 209 |
-
)
|
| 210 |
-
st.info("Falling back to basic preprocessing...")
|
| 211 |
-
# Fallback: basic grayscale conversion and resize
|
| 212 |
-
if isinstance(image, Image.Image):
|
| 213 |
-
rgb_pil = image.convert("RGB")
|
| 214 |
-
resized = rgb_pil.resize((48, 48), Image.Resampling.LANCZOS)
|
| 215 |
-
# Convert to grayscale and then to 3-channel RGB
|
| 216 |
-
gray_pil = resized.convert("L")
|
| 217 |
-
gray_rgb_pil = gray_pil.convert("RGB")
|
| 218 |
-
return gray_rgb_pil
|
| 219 |
-
return None
|
| 220 |
-
except Exception as e:
|
| 221 |
-
st.error(f"Error in face detection: {str(e)}")
|
| 222 |
-
st.info("Falling back to basic preprocessing...")
|
| 223 |
-
# Fallback: basic grayscale conversion and resize
|
| 224 |
-
if isinstance(image, Image.Image):
|
| 225 |
-
rgb_pil = image.convert("RGB")
|
| 226 |
-
resized = rgb_pil.resize((48, 48), Image.Resampling.LANCZOS)
|
| 227 |
-
# Convert to grayscale and then to 3-channel RGB
|
| 228 |
-
gray_pil = resized.convert("L")
|
| 229 |
-
gray_rgb_pil = gray_pil.convert("RGB")
|
| 230 |
-
return gray_rgb_pil
|
| 231 |
-
return None
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
def get_sentiment_mapping(num_classes):
|
| 235 |
-
"""Get the sentiment mapping based on number of classes"""
|
| 236 |
-
if num_classes == 3:
|
| 237 |
-
return {0: "Negative", 1: "Neutral", 2: "Positive"}
|
| 238 |
-
elif num_classes == 4:
|
| 239 |
-
# Common 4-class emotion mapping
|
| 240 |
-
return {0: "Angry", 1: "Sad", 2: "Happy", 3: "Neutral"}
|
| 241 |
-
elif num_classes == 7:
|
| 242 |
-
# FER2013 7-class emotion mapping
|
| 243 |
-
return {
|
| 244 |
-
0: "Angry",
|
| 245 |
-
1: "Disgust",
|
| 246 |
-
2: "Fear",
|
| 247 |
-
3: "Happy",
|
| 248 |
-
4: "Sad",
|
| 249 |
-
5: "Surprise",
|
| 250 |
-
6: "Neutral",
|
| 251 |
-
}
|
| 252 |
-
else:
|
| 253 |
-
# Generic mapping for unknown number of classes
|
| 254 |
-
return {i: f"Class_{i}" for i in range(num_classes)}
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
# Placeholder functions for model predictions
|
| 258 |
-
def predict_text_sentiment(text):
|
| 259 |
-
"""
|
| 260 |
-
Analyze text sentiment using TextBlob
|
| 261 |
-
"""
|
| 262 |
-
if not text or text.strip() == "":
|
| 263 |
-
return "No text provided", 0.0
|
| 264 |
-
|
| 265 |
-
try:
|
| 266 |
-
from textblob import TextBlob
|
| 267 |
-
|
| 268 |
-
# Create TextBlob object
|
| 269 |
-
blob = TextBlob(text)
|
| 270 |
-
|
| 271 |
-
# Get polarity (-1 to 1, where -1 is very negative, 1 is very positive)
|
| 272 |
-
polarity = blob.sentiment.polarity
|
| 273 |
-
|
| 274 |
-
# Get subjectivity (0 to 1, where 0 is very objective, 1 is very subjective)
|
| 275 |
-
subjectivity = blob.sentiment.subjectivity
|
| 276 |
-
|
| 277 |
-
# Convert polarity to sentiment categories
|
| 278 |
-
if polarity > 0.1:
|
| 279 |
-
sentiment = "Positive"
|
| 280 |
-
confidence = min(0.95, 0.6 + abs(polarity) * 0.3)
|
| 281 |
-
elif polarity < -0.1:
|
| 282 |
-
sentiment = "Negative"
|
| 283 |
-
confidence = min(0.95, 0.6 + abs(polarity) * 0.3)
|
| 284 |
-
else:
|
| 285 |
-
sentiment = "Neutral"
|
| 286 |
-
confidence = 0.7 - abs(polarity) * 0.2
|
| 287 |
-
|
| 288 |
-
# Round confidence to 2 decimal places
|
| 289 |
-
confidence = round(confidence, 2)
|
| 290 |
-
|
| 291 |
-
return sentiment, confidence
|
| 292 |
-
|
| 293 |
-
except ImportError:
|
| 294 |
-
st.error("TextBlob not installed. Please install it with: pip install textblob")
|
| 295 |
-
return "TextBlob not available", 0.0
|
| 296 |
-
except Exception as e:
|
| 297 |
-
st.error(f"Error in text sentiment analysis: {str(e)}")
|
| 298 |
-
return "Error occurred", 0.0
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
@st.cache_resource
|
| 302 |
-
def load_audio_model():
|
| 303 |
-
"""Load the pre-trained Wav2Vec2 audio sentiment model from Google Drive"""
|
| 304 |
-
try:
|
| 305 |
-
manager = get_model_manager()
|
| 306 |
-
if manager is None:
|
| 307 |
-
st.error("Model manager not available")
|
| 308 |
-
return None, None, None, None
|
| 309 |
-
|
| 310 |
-
# Load the model using the Google Drive manager
|
| 311 |
-
model, device = manager.load_audio_model()
|
| 312 |
-
|
| 313 |
-
if model is None:
|
| 314 |
-
st.error("Failed to load audio model from Google Drive")
|
| 315 |
-
return None, None, None, None
|
| 316 |
-
|
| 317 |
-
# For Wav2Vec2 models, we need to determine the number of classes
|
| 318 |
-
# This is typically available in the model configuration
|
| 319 |
-
try:
|
| 320 |
-
num_classes = model.config.num_labels
|
| 321 |
-
except:
|
| 322 |
-
# Fallback: try to infer from the model
|
| 323 |
-
try:
|
| 324 |
-
num_classes = model.classifier.out_features
|
| 325 |
-
except:
|
| 326 |
-
num_classes = 3 # Default assumption
|
| 327 |
-
|
| 328 |
-
# Load feature extractor
|
| 329 |
-
from transformers import AutoFeatureExtractor
|
| 330 |
-
|
| 331 |
-
feature_extractor = AutoFeatureExtractor.from_pretrained(
|
| 332 |
-
"facebook/wav2vec2-base"
|
| 333 |
-
)
|
| 334 |
-
|
| 335 |
-
st.success(f"Audio model loaded successfully with {num_classes} classes!")
|
| 336 |
-
return model, device, num_classes, feature_extractor
|
| 337 |
-
except Exception as e:
|
| 338 |
-
st.error(f"Error loading audio model: {str(e)}")
|
| 339 |
-
return None, None, None, None
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
def predict_audio_sentiment(audio_bytes):
|
| 343 |
-
"""
|
| 344 |
-
Analyze audio sentiment using fine-tuned Wav2Vec2 model
|
| 345 |
-
Preprocessing matches CREMA-D + RAVDESS training specifications:
|
| 346 |
-
- Target sampling rate: 16kHz
|
| 347 |
-
- Max duration: 5.0 seconds
|
| 348 |
-
- Feature extraction: AutoFeatureExtractor with max_length, truncation, padding
|
| 349 |
-
"""
|
| 350 |
-
if audio_bytes is None:
|
| 351 |
-
return "No audio provided", 0.0
|
| 352 |
-
|
| 353 |
-
try:
|
| 354 |
-
# Load model if not already loaded
|
| 355 |
-
model, device, num_classes, feature_extractor = load_audio_model()
|
| 356 |
-
if model is None:
|
| 357 |
-
return "Model not loaded", 0.0
|
| 358 |
-
|
| 359 |
-
# Load and preprocess audio
|
| 360 |
-
import librosa
|
| 361 |
-
import tempfile
|
| 362 |
-
|
| 363 |
-
# Save audio bytes to temporary file
|
| 364 |
-
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
|
| 365 |
-
tmp_file.write(audio_bytes)
|
| 366 |
-
tmp_file_path = tmp_file.name
|
| 367 |
-
|
| 368 |
-
try:
|
| 369 |
-
# Load audio with librosa
|
| 370 |
-
audio, sr = librosa.load(tmp_file_path, sr=None)
|
| 371 |
-
|
| 372 |
-
# Resample to 16kHz if needed
|
| 373 |
-
if sr != 16000:
|
| 374 |
-
audio = librosa.resample(y=audio, orig_sr=sr, target_sr=16000)
|
| 375 |
-
|
| 376 |
-
# Preprocess with feature extractor (matching CREMA-D + RAVDESS training exactly)
|
| 377 |
-
# From training: max_length=int(max_duration_s * TARGET_SAMPLING_RATE) = 5.0 * 16000
|
| 378 |
-
inputs = feature_extractor(
|
| 379 |
-
audio,
|
| 380 |
-
sampling_rate=16000,
|
| 381 |
-
max_length=int(5.0 * 16000), # 5 seconds max (matching training)
|
| 382 |
-
truncation=True,
|
| 383 |
-
padding="max_length",
|
| 384 |
-
return_tensors="pt",
|
| 385 |
-
)
|
| 386 |
-
|
| 387 |
-
# Move to device
|
| 388 |
-
input_values = inputs.input_values.to(device)
|
| 389 |
-
|
| 390 |
-
# Run inference
|
| 391 |
-
with torch.no_grad():
|
| 392 |
-
outputs = model(input_values)
|
| 393 |
-
probabilities = torch.softmax(outputs.logits, dim=1)
|
| 394 |
-
confidence, predicted = torch.max(probabilities, 1)
|
| 395 |
-
|
| 396 |
-
# Get sentiment mapping based on number of classes
|
| 397 |
-
if num_classes == 3:
|
| 398 |
-
sentiment_map = {0: "Negative", 1: "Neutral", 2: "Positive"}
|
| 399 |
-
else:
|
| 400 |
-
# Generic mapping for unknown number of classes
|
| 401 |
-
sentiment_map = {i: f"Class_{i}" for i in range(num_classes)}
|
| 402 |
-
|
| 403 |
-
sentiment = sentiment_map[predicted.item()]
|
| 404 |
-
confidence_score = confidence.item()
|
| 405 |
-
|
| 406 |
-
return sentiment, confidence_score
|
| 407 |
-
|
| 408 |
-
finally:
|
| 409 |
-
# Clean up temporary file
|
| 410 |
-
os.unlink(tmp_file_path)
|
| 411 |
-
|
| 412 |
-
except ImportError as e:
|
| 413 |
-
st.error(f"Required library not installed: {str(e)}")
|
| 414 |
-
st.info("Please install: pip install librosa transformers")
|
| 415 |
-
return "Library not available", 0.0
|
| 416 |
-
except Exception as e:
|
| 417 |
-
st.error(f"Error in audio sentiment prediction: {str(e)}")
|
| 418 |
-
return "Error occurred", 0.0
|
| 419 |
-
|
| 420 |
-
|
| 421 |
-
def predict_vision_sentiment(image, crop_tightness=0.05):
|
| 422 |
-
"""
|
| 423 |
-
Load ResNet-50 and run inference for vision sentiment analysis
|
| 424 |
-
|
| 425 |
-
Args:
|
| 426 |
-
image: Input image (PIL Image or numpy array)
|
| 427 |
-
crop_tightness: Padding around face (0.0 = no padding, 0.3 = 30% padding)
|
| 428 |
-
"""
|
| 429 |
-
if image is None:
|
| 430 |
-
return "No image provided", 0.0
|
| 431 |
-
|
| 432 |
-
try:
|
| 433 |
-
# Load model if not already loaded
|
| 434 |
-
model, device, num_classes = load_vision_model()
|
| 435 |
-
if model is None:
|
| 436 |
-
return "Model not loaded", 0.0
|
| 437 |
-
|
| 438 |
-
# Preprocess image to match FER2013 format
|
| 439 |
-
st.info(
|
| 440 |
-
"Detecting face and preprocessing image to match training data format..."
|
| 441 |
-
)
|
| 442 |
-
preprocessed_image = detect_and_preprocess_face(image, crop_tightness=0.0)
|
| 443 |
-
|
| 444 |
-
if preprocessed_image is None:
|
| 445 |
-
return "Image preprocessing failed", 0.0
|
| 446 |
-
|
| 447 |
-
# Show preprocessed image
|
| 448 |
-
st.image(
|
| 449 |
-
preprocessed_image,
|
| 450 |
-
caption="Preprocessed Image (48x48 Grayscale → 3-channel RGB)",
|
| 451 |
-
width=200,
|
| 452 |
-
)
|
| 453 |
-
|
| 454 |
-
# Get transforms
|
| 455 |
-
transform = get_vision_transforms()
|
| 456 |
-
|
| 457 |
-
# Convert preprocessed image to tensor
|
| 458 |
-
image_tensor = transform(preprocessed_image).unsqueeze(0).to(device)
|
| 459 |
-
|
| 460 |
-
# Run inference
|
| 461 |
-
with torch.no_grad():
|
| 462 |
-
outputs = model(image_tensor)
|
| 463 |
-
|
| 464 |
-
# Debug: print output shape
|
| 465 |
-
st.info(f"Model output shape: {outputs.shape}")
|
| 466 |
-
|
| 467 |
-
probabilities = F.softmax(outputs, dim=1)
|
| 468 |
-
confidence, predicted = torch.max(probabilities, 1)
|
| 469 |
-
|
| 470 |
-
# Get sentiment mapping based on number of classes
|
| 471 |
-
sentiment_map = get_sentiment_mapping(num_classes)
|
| 472 |
-
sentiment = sentiment_map[predicted.item()]
|
| 473 |
-
confidence_score = confidence.item()
|
| 474 |
-
|
| 475 |
-
return sentiment, confidence_score
|
| 476 |
-
|
| 477 |
-
except Exception as e:
|
| 478 |
-
st.error(f"Error in vision sentiment prediction: {str(e)}")
|
| 479 |
-
st.error(
|
| 480 |
-
f"Model output shape mismatch. Expected {num_classes} classes but got different."
|
| 481 |
-
)
|
| 482 |
-
return "Error occurred", 0.0
|
| 483 |
-
|
| 484 |
-
|
| 485 |
-
def predict_fused_sentiment(text=None, audio_bytes=None, image=None):
|
| 486 |
-
"""
|
| 487 |
-
TODO: Implement ensemble/fusion logic combining all three models
|
| 488 |
-
This is a placeholder function for fused sentiment analysis
|
| 489 |
-
"""
|
| 490 |
-
# Placeholder logic - replace with actual fusion implementation
|
| 491 |
-
results = []
|
| 492 |
-
|
| 493 |
-
if text:
|
| 494 |
-
text_sentiment, text_conf = predict_text_sentiment(text)
|
| 495 |
-
results.append((text_sentiment, text_conf))
|
| 496 |
-
|
| 497 |
-
if audio_bytes:
|
| 498 |
-
audio_sentiment, audio_conf = predict_audio_sentiment(audio_bytes)
|
| 499 |
-
results.append((audio_sentiment, audio_conf))
|
| 500 |
-
|
| 501 |
-
if image:
|
| 502 |
-
vision_sentiment, vision_conf = predict_vision_sentiment(image)
|
| 503 |
-
results.append((vision_sentiment, vision_conf))
|
| 504 |
-
|
| 505 |
-
if not results:
|
| 506 |
-
return "No inputs provided", 0.0
|
| 507 |
-
|
| 508 |
-
# Simple ensemble logic (replace with your fusion strategy)
|
| 509 |
-
sentiment_counts = {}
|
| 510 |
-
total_confidence = 0
|
| 511 |
-
|
| 512 |
-
for sentiment, confidence in results:
|
| 513 |
-
sentiment_counts[sentiment] = sentiment_counts.get(sentiment, 0) + 1
|
| 514 |
-
total_confidence += confidence
|
| 515 |
-
|
| 516 |
-
# Majority voting with confidence averaging
|
| 517 |
-
final_sentiment = max(sentiment_counts, key=sentiment_counts.get)
|
| 518 |
-
avg_confidence = total_confidence / len(results)
|
| 519 |
-
|
| 520 |
-
return final_sentiment, avg_confidence
|
| 521 |
-
|
| 522 |
-
|
| 523 |
-
def extract_frames_from_video(video_file, max_frames=10):
|
| 524 |
-
"""
|
| 525 |
-
Extract frames from video file for vision sentiment analysis
|
| 526 |
-
|
| 527 |
-
Args:
|
| 528 |
-
video_file: StreamlitUploadedFile or bytes
|
| 529 |
-
max_frames: Maximum number of frames to extract
|
| 530 |
-
|
| 531 |
-
Returns:
|
| 532 |
-
List of PIL Image objects
|
| 533 |
-
"""
|
| 534 |
-
try:
|
| 535 |
-
import cv2
|
| 536 |
-
import numpy as np
|
| 537 |
-
import tempfile
|
| 538 |
-
|
| 539 |
-
# Save video bytes to temporary file
|
| 540 |
-
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp_file:
|
| 541 |
-
if hasattr(video_file, "getvalue"):
|
| 542 |
-
tmp_file.write(video_file.getvalue())
|
| 543 |
-
else:
|
| 544 |
-
tmp_file.write(video_file)
|
| 545 |
-
tmp_file_path = tmp_file.name
|
| 546 |
-
|
| 547 |
-
try:
|
| 548 |
-
# Open video with OpenCV
|
| 549 |
-
cap = cv2.VideoCapture(tmp_file_path)
|
| 550 |
-
|
| 551 |
-
if not cap.isOpened():
|
| 552 |
-
st.error("Could not open video file")
|
| 553 |
-
return []
|
| 554 |
-
|
| 555 |
-
frames = []
|
| 556 |
-
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
| 557 |
-
fps = cap.get(cv2.CAP_PROP_FPS)
|
| 558 |
-
duration = total_frames / fps if fps > 0 else 0
|
| 559 |
-
|
| 560 |
-
st.info(
|
| 561 |
-
f"📹 Video: {total_frames} frames, {fps:.1f} FPS, {duration:.1f}s duration"
|
| 562 |
-
)
|
| 563 |
-
|
| 564 |
-
# Extract frames at strategic intervals
|
| 565 |
-
if total_frames > 0:
|
| 566 |
-
# Select frames: start, 25%, 50%, 75%, end
|
| 567 |
-
frame_indices = [
|
| 568 |
-
0,
|
| 569 |
-
int(total_frames * 0.25),
|
| 570 |
-
int(total_frames * 0.5),
|
| 571 |
-
int(total_frames * 0.75),
|
| 572 |
-
total_frames - 1,
|
| 573 |
-
]
|
| 574 |
-
frame_indices = list(set(frame_indices)) # Remove duplicates
|
| 575 |
-
frame_indices.sort()
|
| 576 |
-
|
| 577 |
-
for frame_idx in frame_indices:
|
| 578 |
-
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
|
| 579 |
-
ret, frame = cap.read()
|
| 580 |
-
if ret:
|
| 581 |
-
# Convert BGR to RGB
|
| 582 |
-
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
| 583 |
-
# Convert to PIL Image
|
| 584 |
-
pil_image = Image.fromarray(frame_rgb)
|
| 585 |
-
frames.append(pil_image)
|
| 586 |
-
|
| 587 |
-
cap.release()
|
| 588 |
-
return frames
|
| 589 |
-
|
| 590 |
-
finally:
|
| 591 |
-
# Clean up temporary file
|
| 592 |
-
os.unlink(tmp_file_path)
|
| 593 |
-
|
| 594 |
-
except ImportError:
|
| 595 |
-
st.error(
|
| 596 |
-
"OpenCV not installed. Please install it with: pip install opencv-python"
|
| 597 |
-
)
|
| 598 |
-
return []
|
| 599 |
-
except Exception as e:
|
| 600 |
-
st.error(f"Error extracting frames: {str(e)}")
|
| 601 |
-
return []
|
| 602 |
-
|
| 603 |
-
|
| 604 |
-
def extract_audio_from_video(video_file):
|
| 605 |
-
"""
|
| 606 |
-
Extract audio from video file for audio sentiment analysis
|
| 607 |
-
|
| 608 |
-
Args:
|
| 609 |
-
video_file: StreamlitUploadedFile or bytes
|
| 610 |
-
|
| 611 |
-
Returns:
|
| 612 |
-
Audio bytes in WAV format
|
| 613 |
-
"""
|
| 614 |
-
try:
|
| 615 |
-
import tempfile
|
| 616 |
-
|
| 617 |
-
try:
|
| 618 |
-
from moviepy import VideoFileClip
|
| 619 |
-
except ImportError as e:
|
| 620 |
-
st.error(f"MoviePy import failed: {e}")
|
| 621 |
-
st.error(
|
| 622 |
-
"This usually means the Docker build failed to install moviepy properly"
|
| 623 |
-
)
|
| 624 |
-
return None
|
| 625 |
-
|
| 626 |
-
# Save video bytes to temporary file
|
| 627 |
-
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp_file:
|
| 628 |
-
if hasattr(video_file, "getvalue"):
|
| 629 |
-
tmp_file.write(video_file.getvalue())
|
| 630 |
-
else:
|
| 631 |
-
tmp_file.write(video_file)
|
| 632 |
-
tmp_file_path = tmp_file.name
|
| 633 |
-
|
| 634 |
-
try:
|
| 635 |
-
# Extract audio using moviepy
|
| 636 |
-
video = VideoFileClip(tmp_file_path)
|
| 637 |
-
audio = video.audio
|
| 638 |
-
|
| 639 |
-
if audio is None:
|
| 640 |
-
st.warning("No audio track found in video")
|
| 641 |
-
return None
|
| 642 |
-
|
| 643 |
-
# Save audio to temporary WAV file
|
| 644 |
-
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as audio_file:
|
| 645 |
-
audio_path = audio_file.name
|
| 646 |
-
|
| 647 |
-
# Export audio as WAV
|
| 648 |
-
audio.write_audiofile(audio_path, logger=None)
|
| 649 |
-
|
| 650 |
-
# Read the audio file and return bytes
|
| 651 |
-
with open(audio_path, "rb") as f:
|
| 652 |
-
audio_bytes = f.read()
|
| 653 |
-
|
| 654 |
-
# Clean up temporary audio file
|
| 655 |
-
try:
|
| 656 |
-
os.unlink(audio_path)
|
| 657 |
-
except (OSError, PermissionError):
|
| 658 |
-
# File might be in use, skip cleanup
|
| 659 |
-
pass
|
| 660 |
-
|
| 661 |
-
return audio_bytes
|
| 662 |
-
|
| 663 |
-
finally:
|
| 664 |
-
# Clean up temporary video file
|
| 665 |
-
try:
|
| 666 |
-
# Close video and audio objects first
|
| 667 |
-
if "video" in locals():
|
| 668 |
-
video.close()
|
| 669 |
-
if "audio" in locals() and audio:
|
| 670 |
-
audio.close()
|
| 671 |
-
|
| 672 |
-
# Wait a bit before trying to delete
|
| 673 |
-
import time
|
| 674 |
-
|
| 675 |
-
time.sleep(0.1)
|
| 676 |
-
|
| 677 |
-
os.unlink(tmp_file_path)
|
| 678 |
-
except (OSError, PermissionError):
|
| 679 |
-
# File might be in use, skip cleanup
|
| 680 |
-
pass
|
| 681 |
-
|
| 682 |
-
except ImportError:
|
| 683 |
-
st.error("MoviePy not installed. Please install it with: pip install moviepy")
|
| 684 |
-
return None
|
| 685 |
-
except Exception as e:
|
| 686 |
-
st.error(f"Error extracting audio: {str(e)}")
|
| 687 |
-
return None
|
| 688 |
-
|
| 689 |
-
|
| 690 |
-
def transcribe_audio(audio_bytes):
|
| 691 |
-
"""
|
| 692 |
-
Transcribe audio to text for text sentiment analysis
|
| 693 |
-
|
| 694 |
-
Args:
|
| 695 |
-
audio_bytes: Audio bytes in WAV format
|
| 696 |
-
|
| 697 |
-
Returns:
|
| 698 |
-
Transcribed text string
|
| 699 |
-
"""
|
| 700 |
-
if audio_bytes is None:
|
| 701 |
-
return ""
|
| 702 |
-
|
| 703 |
-
try:
|
| 704 |
-
import tempfile
|
| 705 |
-
import speech_recognition as sr
|
| 706 |
-
|
| 707 |
-
# Save audio bytes to temporary file
|
| 708 |
-
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
|
| 709 |
-
tmp_file.write(audio_bytes)
|
| 710 |
-
tmp_file_path = tmp_file.name
|
| 711 |
-
|
| 712 |
-
try:
|
| 713 |
-
# Initialize recognizer
|
| 714 |
-
recognizer = sr.Recognizer()
|
| 715 |
-
|
| 716 |
-
# Load audio file
|
| 717 |
-
with sr.AudioFile(tmp_file_path) as source:
|
| 718 |
-
# Read audio data
|
| 719 |
-
audio_data = recognizer.record(source)
|
| 720 |
-
|
| 721 |
-
# Transcribe using Google Speech Recognition
|
| 722 |
-
try:
|
| 723 |
-
text = recognizer.recognize_google(audio_data)
|
| 724 |
-
return text
|
| 725 |
-
except sr.UnknownValueError:
|
| 726 |
-
st.warning("Speech could not be understood")
|
| 727 |
-
return ""
|
| 728 |
-
except sr.RequestError as e:
|
| 729 |
-
st.error(
|
| 730 |
-
f"Could not request results from speech recognition service: {e}"
|
| 731 |
-
)
|
| 732 |
-
return ""
|
| 733 |
-
|
| 734 |
-
finally:
|
| 735 |
-
# Clean up temporary file
|
| 736 |
-
os.unlink(tmp_file_path)
|
| 737 |
|
| 738 |
-
except ImportError:
|
| 739 |
-
st.error(
|
| 740 |
-
"SpeechRecognition not installed. Please install it with: pip install SpeechRecognition"
|
| 741 |
-
)
|
| 742 |
-
return ""
|
| 743 |
-
except Exception as e:
|
| 744 |
-
st.error(f"Error transcribing audio: {str(e)}")
|
| 745 |
-
return ""
|
| 746 |
-
|
| 747 |
-
|
| 748 |
-
# Sidebar navigation
|
| 749 |
-
st.sidebar.title("Sentiment Analysis")
|
| 750 |
-
st.sidebar.markdown("---")
|
| 751 |
-
|
| 752 |
-
# Navigation
|
| 753 |
-
page = st.sidebar.selectbox(
|
| 754 |
-
"Choose a page:",
|
| 755 |
-
[
|
| 756 |
-
"Home",
|
| 757 |
-
"Text Sentiment",
|
| 758 |
-
"Audio Sentiment",
|
| 759 |
-
"Vision Sentiment",
|
| 760 |
-
"Fused Model",
|
| 761 |
-
"Max Fusion",
|
| 762 |
-
],
|
| 763 |
-
)
|
| 764 |
|
| 765 |
-
|
| 766 |
-
|
| 767 |
st.markdown(
|
| 768 |
-
'<h1 class="main-header">
|
| 769 |
unsafe_allow_html=True,
|
| 770 |
)
|
| 771 |
|
| 772 |
st.markdown(
|
| 773 |
"""
|
| 774 |
-
|
| 775 |
-
|
| 776 |
-
|
| 777 |
-
|
| 778 |
-
|
| 779 |
unsafe_allow_html=True,
|
| 780 |
)
|
| 781 |
|
|
@@ -784,105 +70,106 @@ if page == "Home":
|
|
| 784 |
with col1:
|
| 785 |
st.markdown(
|
| 786 |
"""
|
| 787 |
-
|
| 788 |
-
|
| 789 |
-
|
| 790 |
-
|
| 791 |
-
|
| 792 |
-
|
| 793 |
-
|
| 794 |
-
|
| 795 |
-
|
| 796 |
-
|
| 797 |
-
|
| 798 |
unsafe_allow_html=True,
|
| 799 |
)
|
| 800 |
|
| 801 |
with col2:
|
| 802 |
st.markdown(
|
| 803 |
"""
|
| 804 |
-
|
| 805 |
-
|
| 806 |
-
|
| 807 |
-
|
| 808 |
-
|
| 809 |
-
|
| 810 |
-
|
| 811 |
-
|
| 812 |
-
|
| 813 |
-
|
| 814 |
-
|
| 815 |
-
|
| 816 |
-
|
| 817 |
unsafe_allow_html=True,
|
| 818 |
)
|
| 819 |
|
| 820 |
with col3:
|
| 821 |
st.markdown(
|
| 822 |
"""
|
| 823 |
-
|
| 824 |
-
|
| 825 |
-
|
| 826 |
-
|
| 827 |
-
|
| 828 |
-
|
| 829 |
-
|
| 830 |
-
|
| 831 |
-
|
| 832 |
-
|
| 833 |
-
|
| 834 |
-
|
| 835 |
-
|
| 836 |
-
|
| 837 |
unsafe_allow_html=True,
|
| 838 |
)
|
| 839 |
|
| 840 |
st.markdown(
|
| 841 |
"""
|
| 842 |
-
|
| 843 |
-
|
| 844 |
-
|
| 845 |
-
|
| 846 |
-
|
| 847 |
-
|
| 848 |
-
|
| 849 |
-
|
| 850 |
-
|
| 851 |
-
|
| 852 |
unsafe_allow_html=True,
|
| 853 |
)
|
| 854 |
|
| 855 |
st.markdown(
|
| 856 |
"""
|
| 857 |
-
|
| 858 |
-
|
| 859 |
-
|
| 860 |
-
|
| 861 |
-
|
| 862 |
-
|
| 863 |
-
|
| 864 |
-
|
| 865 |
-
|
| 866 |
-
|
| 867 |
-
|
| 868 |
-
|
| 869 |
unsafe_allow_html=True,
|
| 870 |
)
|
| 871 |
|
| 872 |
st.markdown("---")
|
| 873 |
st.markdown(
|
| 874 |
"""
|
| 875 |
-
|
| 876 |
-
|
| 877 |
-
|
| 878 |
-
|
| 879 |
-
|
| 880 |
-
|
| 881 |
unsafe_allow_html=True,
|
| 882 |
)
|
| 883 |
|
| 884 |
-
|
| 885 |
-
|
|
|
|
| 886 |
st.title("Text Sentiment Analysis")
|
| 887 |
st.markdown("Analyze the sentiment of your text using our TextBlob-based model.")
|
| 888 |
|
|
@@ -910,28 +197,26 @@ elif page == "Text Sentiment":
|
|
| 910 |
st.metric("Confidence", f"{confidence:.2f}")
|
| 911 |
|
| 912 |
# Color-coded sentiment display
|
| 913 |
-
sentiment_colors =
|
| 914 |
-
|
| 915 |
-
"Negative": "🔴",
|
| 916 |
-
"Neutral": "🟡",
|
| 917 |
-
}
|
| 918 |
|
| 919 |
st.markdown(
|
| 920 |
f"""
|
| 921 |
-
|
| 922 |
-
|
| 923 |
-
|
| 924 |
-
|
| 925 |
-
|
| 926 |
-
|
| 927 |
-
|
| 928 |
unsafe_allow_html=True,
|
| 929 |
)
|
| 930 |
else:
|
| 931 |
st.error("Please enter some text to analyze.")
|
| 932 |
|
| 933 |
-
|
| 934 |
-
|
|
|
|
| 935 |
st.title("Audio Sentiment Analysis")
|
| 936 |
st.markdown(
|
| 937 |
"Analyze the sentiment of your audio files using our fine-tuned Wav2Vec2 model."
|
|
@@ -969,7 +254,7 @@ elif page == "Audio Sentiment":
|
|
| 969 |
# File uploader
|
| 970 |
uploaded_audio = st.file_uploader(
|
| 971 |
"Choose an audio file",
|
| 972 |
-
type=
|
| 973 |
help="Supported formats: WAV, MP3, M4A, FLAC",
|
| 974 |
)
|
| 975 |
|
|
@@ -979,12 +264,12 @@ elif page == "Audio Sentiment":
|
|
| 979 |
else: # Audio recording
|
| 980 |
st.markdown(
|
| 981 |
"""
|
| 982 |
-
|
| 983 |
-
|
| 984 |
-
|
| 985 |
-
|
| 986 |
-
|
| 987 |
-
|
| 988 |
unsafe_allow_html=True,
|
| 989 |
)
|
| 990 |
|
|
@@ -1018,8 +303,10 @@ elif page == "Audio Sentiment":
|
|
| 1018 |
uploaded_audio, format=f'audio/{uploaded_audio.name.split(".")[-1]}'
|
| 1019 |
)
|
| 1020 |
# File info for uploaded files
|
| 1021 |
-
|
| 1022 |
-
st.info(
|
|
|
|
|
|
|
| 1023 |
|
| 1024 |
# Analyze button
|
| 1025 |
if st.button(
|
|
@@ -1042,17 +329,18 @@ elif page == "Audio Sentiment":
|
|
| 1042 |
st.metric("Confidence", f"{confidence:.2f}")
|
| 1043 |
|
| 1044 |
# Color-coded sentiment display
|
| 1045 |
-
sentiment_colors =
|
|
|
|
| 1046 |
|
| 1047 |
st.markdown(
|
| 1048 |
f"""
|
| 1049 |
-
|
| 1050 |
-
|
| 1051 |
-
|
| 1052 |
-
|
| 1053 |
-
|
| 1054 |
-
|
| 1055 |
-
|
| 1056 |
unsafe_allow_html=True,
|
| 1057 |
)
|
| 1058 |
else:
|
|
@@ -1061,8 +349,9 @@ elif page == "Audio Sentiment":
|
|
| 1061 |
else:
|
| 1062 |
st.info("Click the microphone button above to record audio for analysis.")
|
| 1063 |
|
| 1064 |
-
|
| 1065 |
-
|
|
|
|
| 1066 |
st.title("Vision Sentiment Analysis")
|
| 1067 |
st.markdown(
|
| 1068 |
"Analyze the sentiment of your images using our fine-tuned ResNet-50 model."
|
|
@@ -1101,7 +390,7 @@ elif page == "Vision Sentiment":
|
|
| 1101 |
# File uploader
|
| 1102 |
uploaded_image = st.file_uploader(
|
| 1103 |
"Choose an image file",
|
| 1104 |
-
type=
|
| 1105 |
help="Supported formats: PNG, JPG, JPEG, BMP, TIFF",
|
| 1106 |
)
|
| 1107 |
|
|
@@ -1115,9 +404,9 @@ elif page == "Vision Sentiment":
|
|
| 1115 |
)
|
| 1116 |
|
| 1117 |
# File info
|
| 1118 |
-
|
| 1119 |
st.info(
|
| 1120 |
-
f"File: {
|
| 1121 |
)
|
| 1122 |
|
| 1123 |
# Analyze button
|
|
@@ -1140,33 +429,30 @@ elif page == "Vision Sentiment":
|
|
| 1140 |
st.metric("Confidence", f"{confidence:.2f}")
|
| 1141 |
|
| 1142 |
# Color-coded sentiment display
|
| 1143 |
-
sentiment_colors =
|
| 1144 |
-
|
| 1145 |
-
"Negative": "🔴",
|
| 1146 |
-
"Neutral": "🟡",
|
| 1147 |
-
}
|
| 1148 |
|
| 1149 |
st.markdown(
|
| 1150 |
f"""
|
| 1151 |
-
|
| 1152 |
-
|
| 1153 |
-
|
| 1154 |
-
|
| 1155 |
-
|
| 1156 |
-
|
| 1157 |
-
|
| 1158 |
unsafe_allow_html=True,
|
| 1159 |
)
|
| 1160 |
|
| 1161 |
else: # Camera capture
|
| 1162 |
st.markdown(
|
| 1163 |
"""
|
| 1164 |
-
|
| 1165 |
-
|
| 1166 |
-
|
| 1167 |
-
|
| 1168 |
-
|
| 1169 |
-
|
| 1170 |
unsafe_allow_html=True,
|
| 1171 |
)
|
| 1172 |
|
|
@@ -1210,21 +496,18 @@ elif page == "Vision Sentiment":
|
|
| 1210 |
st.metric("Confidence", f"{confidence:.2f}")
|
| 1211 |
|
| 1212 |
# Color-coded sentiment display
|
| 1213 |
-
sentiment_colors =
|
| 1214 |
-
|
| 1215 |
-
"Negative": "🔴",
|
| 1216 |
-
"Neutral": "🟡",
|
| 1217 |
-
}
|
| 1218 |
|
| 1219 |
st.markdown(
|
| 1220 |
f"""
|
| 1221 |
-
|
| 1222 |
-
|
| 1223 |
-
|
| 1224 |
-
|
| 1225 |
-
|
| 1226 |
-
|
| 1227 |
-
|
| 1228 |
unsafe_allow_html=True,
|
| 1229 |
)
|
| 1230 |
|
|
@@ -1234,8 +517,9 @@ elif page == "Vision Sentiment":
|
|
| 1234 |
elif input_method == "Take Photo with Camera" and "camera_photo" not in locals():
|
| 1235 |
st.info("Click the camera button above to take a photo for analysis.")
|
| 1236 |
|
| 1237 |
-
|
| 1238 |
-
|
|
|
|
| 1239 |
st.title("Fused Model Analysis")
|
| 1240 |
st.markdown(
|
| 1241 |
"Combine predictions from all three models for enhanced sentiment analysis."
|
|
@@ -1243,12 +527,12 @@ elif page == "Fused Model":
|
|
| 1243 |
|
| 1244 |
st.markdown(
|
| 1245 |
"""
|
| 1246 |
-
|
| 1247 |
-
|
| 1248 |
-
|
| 1249 |
-
|
| 1250 |
-
|
| 1251 |
-
|
| 1252 |
unsafe_allow_html=True,
|
| 1253 |
)
|
| 1254 |
|
|
@@ -1282,7 +566,7 @@ elif page == "Fused Model":
|
|
| 1282 |
if audio_input_method == "Upload File":
|
| 1283 |
uploaded_audio = st.file_uploader(
|
| 1284 |
"Upload audio file (optional):",
|
| 1285 |
-
type=
|
| 1286 |
key="fused_audio",
|
| 1287 |
)
|
| 1288 |
audio_source = "uploaded_file"
|
|
@@ -1325,7 +609,7 @@ elif page == "Fused Model":
|
|
| 1325 |
if image_input_method == "Upload File":
|
| 1326 |
uploaded_image = st.file_uploader(
|
| 1327 |
"Upload image file (optional):",
|
| 1328 |
-
type=
|
| 1329 |
key="fused_image",
|
| 1330 |
)
|
| 1331 |
|
|
@@ -1421,16 +705,17 @@ elif page == "Fused Model":
|
|
| 1421 |
st.dataframe(df, use_container_width=True)
|
| 1422 |
|
| 1423 |
# Final result display
|
| 1424 |
-
sentiment_colors =
|
|
|
|
| 1425 |
|
| 1426 |
st.markdown(
|
| 1427 |
f"""
|
| 1428 |
-
|
| 1429 |
-
|
| 1430 |
-
|
| 1431 |
-
|
| 1432 |
-
|
| 1433 |
-
|
| 1434 |
unsafe_allow_html=True,
|
| 1435 |
)
|
| 1436 |
else:
|
|
@@ -1438,21 +723,22 @@ elif page == "Fused Model":
|
|
| 1438 |
"Please provide at least one input (text, audio, or image) for fused analysis."
|
| 1439 |
)
|
| 1440 |
|
| 1441 |
-
|
| 1442 |
-
|
|
|
|
| 1443 |
st.title("Max Fusion - Multi-Modal Sentiment Analysis")
|
| 1444 |
st.markdown(
|
| 1445 |
"""
|
| 1446 |
-
|
| 1447 |
-
|
| 1448 |
-
|
| 1449 |
-
|
| 1450 |
-
|
| 1451 |
-
|
| 1452 |
-
|
| 1453 |
-
|
| 1454 |
-
|
| 1455 |
-
|
| 1456 |
unsafe_allow_html=True,
|
| 1457 |
)
|
| 1458 |
|
|
@@ -1476,25 +762,25 @@ elif page == "Max Fusion":
|
|
| 1476 |
with col2:
|
| 1477 |
st.markdown(
|
| 1478 |
"""
|
| 1479 |
-
|
| 1480 |
-
|
| 1481 |
-
|
| 1482 |
-
|
| 1483 |
-
|
| 1484 |
-
|
| 1485 |
unsafe_allow_html=True,
|
| 1486 |
)
|
| 1487 |
|
| 1488 |
# Placeholder for future recording functionality
|
| 1489 |
st.markdown(
|
| 1490 |
"""
|
| 1491 |
-
|
| 1492 |
-
|
| 1493 |
-
|
| 1494 |
-
|
| 1495 |
-
|
| 1496 |
-
|
| 1497 |
-
|
| 1498 |
)
|
| 1499 |
|
| 1500 |
# Skip all the recording logic for now
|
|
@@ -1507,19 +793,19 @@ elif page == "Max Fusion":
|
|
| 1507 |
# File upload option
|
| 1508 |
st.markdown(
|
| 1509 |
"""
|
| 1510 |
-
|
| 1511 |
-
|
| 1512 |
-
|
| 1513 |
-
|
| 1514 |
-
|
| 1515 |
-
|
| 1516 |
-
|
| 1517 |
unsafe_allow_html=True,
|
| 1518 |
)
|
| 1519 |
|
| 1520 |
uploaded_video = st.file_uploader(
|
| 1521 |
"Choose a video file",
|
| 1522 |
-
type=
|
| 1523 |
help="Supported formats: MP4, AVI, MOV, MKV, WMV, FLV",
|
| 1524 |
)
|
| 1525 |
|
|
@@ -1527,8 +813,6 @@ elif page == "Max Fusion":
|
|
| 1527 |
video_name = uploaded_video.name if uploaded_video else None
|
| 1528 |
video_file = uploaded_video
|
| 1529 |
|
| 1530 |
-
# Video recording using streamlit-webrtc component - COMING SOON
|
| 1531 |
-
|
| 1532 |
if video_file is not None:
|
| 1533 |
# Display video or photo
|
| 1534 |
if video_source == "camera_photo":
|
|
@@ -1543,7 +827,7 @@ elif page == "Max Fusion":
|
|
| 1543 |
|
| 1544 |
uploaded_audio = st.file_uploader(
|
| 1545 |
"Upload audio file for audio analysis:",
|
| 1546 |
-
type=
|
| 1547 |
key="camera_audio",
|
| 1548 |
help="Upload an audio file to complement the photo analysis",
|
| 1549 |
)
|
|
@@ -1561,11 +845,10 @@ elif page == "Max Fusion":
|
|
| 1561 |
else:
|
| 1562 |
# For uploaded videos
|
| 1563 |
st.video(video_file)
|
| 1564 |
-
|
| 1565 |
-
|
| 1566 |
-
|
| 1567 |
-
|
| 1568 |
-
st.info(f"File: {video_name} | Size: {file_size:.1f} KB")
|
| 1569 |
audio_bytes = None # Will be extracted from video
|
| 1570 |
|
| 1571 |
# Video Processing Pipeline
|
|
@@ -1709,20 +992,17 @@ elif page == "Max Fusion":
|
|
| 1709 |
st.metric("📊 Overall Confidence", f"{confidence:.2f}")
|
| 1710 |
|
| 1711 |
# Color-coded sentiment display
|
| 1712 |
-
sentiment_colors =
|
| 1713 |
-
|
| 1714 |
-
"Negative": "🔴",
|
| 1715 |
-
"Neutral": "🟡",
|
| 1716 |
-
}
|
| 1717 |
|
| 1718 |
st.markdown(
|
| 1719 |
f"""
|
| 1720 |
-
|
| 1721 |
-
|
| 1722 |
-
|
| 1723 |
-
|
| 1724 |
-
|
| 1725 |
-
|
| 1726 |
</div>
|
| 1727 |
""",
|
| 1728 |
unsafe_allow_html=True,
|
|
@@ -1740,13 +1020,54 @@ elif page == "Max Fusion":
|
|
| 1740 |
else:
|
| 1741 |
st.info("📁 Please upload a video file to begin Max Fusion analysis.")
|
| 1742 |
|
| 1743 |
-
|
| 1744 |
-
|
| 1745 |
-
|
| 1746 |
-
|
| 1747 |
-
|
| 1748 |
-
|
| 1749 |
-
|
| 1750 |
-
|
| 1751 |
-
|
| 1752 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Refactored Sentiment Fused - Multimodal Sentiment Analysis Application
|
| 3 |
+
|
| 4 |
+
This is the main entry point for the application, now using a modular structure.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
import streamlit as st
|
| 8 |
import pandas as pd
|
| 9 |
from PIL import Image
|
| 10 |
+
import logging
|
| 11 |
+
|
| 12 |
+
# Import our modular components
|
| 13 |
+
from src.config.settings import (
|
| 14 |
+
APP_NAME,
|
| 15 |
+
APP_VERSION,
|
| 16 |
+
APP_ICON,
|
| 17 |
+
APP_LAYOUT,
|
| 18 |
+
CUSTOM_CSS,
|
| 19 |
+
SUPPORTED_IMAGE_FORMATS,
|
| 20 |
+
SUPPORTED_AUDIO_FORMATS,
|
| 21 |
+
SUPPORTED_VIDEO_FORMATS,
|
| 22 |
+
)
|
| 23 |
+
from src.models.text_model import predict_text_sentiment
|
| 24 |
+
from src.models.audio_model import predict_audio_sentiment, load_audio_model
|
| 25 |
+
from src.models.vision_model import predict_vision_sentiment, load_vision_model
|
| 26 |
+
from src.models.fused_model import predict_fused_sentiment
|
| 27 |
+
from src.utils.preprocessing import (
|
| 28 |
+
extract_frames_from_video,
|
| 29 |
+
extract_audio_from_video,
|
| 30 |
+
transcribe_audio,
|
| 31 |
+
)
|
| 32 |
+
from src.utils.file_handling import get_file_info, format_file_size
|
| 33 |
+
from src.utils.sentiment_mapping import get_sentiment_colors, format_sentiment_result
|
| 34 |
|
| 35 |
+
# Configure logging
|
| 36 |
+
logging.basicConfig(level=logging.INFO)
|
| 37 |
+
logger = logging.getLogger(__name__)
|
| 38 |
|
| 39 |
# Page configuration
|
| 40 |
st.set_page_config(
|
| 41 |
+
page_title=APP_NAME,
|
| 42 |
+
page_icon=APP_ICON,
|
| 43 |
+
layout=APP_LAYOUT,
|
| 44 |
initial_sidebar_state="expanded",
|
| 45 |
)
|
| 46 |
|
| 47 |
+
# Apply custom CSS
|
| 48 |
+
st.markdown(CUSTOM_CSS, unsafe_allow_html=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
|
| 51 |
+
def render_home_page():
|
| 52 |
+
"""Render the home page with model information."""
|
| 53 |
st.markdown(
|
| 54 |
+
f'<h1 class="main-header">{APP_NAME}</h1>',
|
| 55 |
unsafe_allow_html=True,
|
| 56 |
)
|
| 57 |
|
| 58 |
st.markdown(
|
| 59 |
"""
|
| 60 |
+
<div class="model-card">
|
| 61 |
+
<h2>Welcome to your Multi-Modal Sentiment Analysis Testing Platform!</h2>
|
| 62 |
+
<p>This application provides a comprehensive testing environment for your three independent sentiment analysis models:</p>
|
| 63 |
+
</div>
|
| 64 |
+
""",
|
| 65 |
unsafe_allow_html=True,
|
| 66 |
)
|
| 67 |
|
|
|
|
| 70 |
with col1:
|
| 71 |
st.markdown(
|
| 72 |
"""
|
| 73 |
+
<div class="model-card">
|
| 74 |
+
<h3>Text Sentiment Model</h3>
|
| 75 |
+
<p>READY TO USE - Analyze sentiment from text input using TextBlob</p>
|
| 76 |
+
<ul>
|
| 77 |
+
<li>Process any text input</li>
|
| 78 |
+
<li>Get sentiment classification (Positive/Negative/Neutral)</li>
|
| 79 |
+
<li>View confidence scores</li>
|
| 80 |
+
<li>Real-time NLP analysis</li>
|
| 81 |
+
</ul>
|
| 82 |
+
</div>
|
| 83 |
+
""",
|
| 84 |
unsafe_allow_html=True,
|
| 85 |
)
|
| 86 |
|
| 87 |
with col2:
|
| 88 |
st.markdown(
|
| 89 |
"""
|
| 90 |
+
<div class="model-card">
|
| 91 |
+
<h3>Audio Sentiment Model</h3>
|
| 92 |
+
<p>READY TO USE - Analyze sentiment from audio files using fine-tuned Wav2Vec2</p>
|
| 93 |
+
<ul>
|
| 94 |
+
<li>Upload audio files (.wav, .mp3, .m4a, .flac)</li>
|
| 95 |
+
<li>Record audio directly with microphone (max 5s)</li>
|
| 96 |
+
<li>Automatic preprocessing: 16kHz sampling, 5s max duration</li>
|
| 97 |
+
<li>Listen to uploaded/recorded audio</li>
|
| 98 |
+
<li>Get sentiment predictions</li>
|
| 99 |
+
<li>Real-time audio analysis</li>
|
| 100 |
+
</ul>
|
| 101 |
+
</div>
|
| 102 |
+
""",
|
| 103 |
unsafe_allow_html=True,
|
| 104 |
)
|
| 105 |
|
| 106 |
with col3:
|
| 107 |
st.markdown(
|
| 108 |
"""
|
| 109 |
+
<div class="model-card">
|
| 110 |
+
<h3>Vision Sentiment Model</h3>
|
| 111 |
+
<p>Analyze sentiment from images using fine-tuned ResNet-50</p>
|
| 112 |
+
<ul>
|
| 113 |
+
<li>Upload image files (.png, .jpg, .jpeg, .bmp, .tiff)</li>
|
| 114 |
+
<li>Automatic face detection & preprocessing</li>
|
| 115 |
+
<li>Fixed 0% padding for tightest face crop</li>
|
| 116 |
+
<li>Convert to 224x224 grayscale → 3-channel RGB (FER2013 format)</li>
|
| 117 |
+
<li>Transforms: Resize(224) → CenterCrop(224) → ImageNet Normalization</li>
|
| 118 |
+
<li>Preview original & preprocessed images</li>
|
| 119 |
+
<li>Get sentiment predictions</li>
|
| 120 |
+
</ul>
|
| 121 |
+
</div>
|
| 122 |
+
""",
|
| 123 |
unsafe_allow_html=True,
|
| 124 |
)
|
| 125 |
|
| 126 |
st.markdown(
|
| 127 |
"""
|
| 128 |
+
<div class="model-card">
|
| 129 |
+
<h3>Fused Model</h3>
|
| 130 |
+
<p>Combine predictions from all three models for enhanced accuracy</p>
|
| 131 |
+
<ul>
|
| 132 |
+
<li>Multi-modal input processing</li>
|
| 133 |
+
<li>Ensemble prediction strategies</li>
|
| 134 |
+
<li>Comprehensive sentiment analysis</li>
|
| 135 |
+
</ul>
|
| 136 |
+
</div>
|
| 137 |
+
""",
|
| 138 |
unsafe_allow_html=True,
|
| 139 |
)
|
| 140 |
|
| 141 |
st.markdown(
|
| 142 |
"""
|
| 143 |
+
<div class="model-card">
|
| 144 |
+
<h3>🎬 Max Fusion</h3>
|
| 145 |
+
<p>Ultimate video-based sentiment analysis combining all three modalities</p>
|
| 146 |
+
<ul>
|
| 147 |
+
<li>🎥 Record or upload 5-second videos</li>
|
| 148 |
+
<li>🔍 Extract frames for vision analysis</li>
|
| 149 |
+
<li>🎵 Extract audio for vocal sentiment</li>
|
| 150 |
+
<li>📝 Transcribe audio for text analysis</li>
|
| 151 |
+
<li>🚀 Comprehensive multi-modal results</li>
|
| 152 |
+
</ul>
|
| 153 |
+
</div>
|
| 154 |
+
""",
|
| 155 |
unsafe_allow_html=True,
|
| 156 |
)
|
| 157 |
|
| 158 |
st.markdown("---")
|
| 159 |
st.markdown(
|
| 160 |
"""
|
| 161 |
+
<div style="text-align: center; color: #666;">
|
| 162 |
+
<p><strong>Note:</strong> This application now has <strong>ALL THREE MODELS</strong> fully integrated and ready to use!</p>
|
| 163 |
+
<p><strong>TextBlob</strong> (Text) + <strong>Wav2Vec2</strong> (Audio) + <strong>ResNet-50</strong> (Vision)</p>
|
| 164 |
+
<p><strong>Models are now loaded from Google Drive automatically!</strong></p>
|
| 165 |
+
</div>
|
| 166 |
+
""",
|
| 167 |
unsafe_allow_html=True,
|
| 168 |
)
|
| 169 |
|
| 170 |
+
|
| 171 |
+
def render_text_sentiment_page():
|
| 172 |
+
"""Render the text sentiment analysis page."""
|
| 173 |
st.title("Text Sentiment Analysis")
|
| 174 |
st.markdown("Analyze the sentiment of your text using our TextBlob-based model.")
|
| 175 |
|
|
|
|
| 197 |
st.metric("Confidence", f"{confidence:.2f}")
|
| 198 |
|
| 199 |
# Color-coded sentiment display
|
| 200 |
+
sentiment_colors = get_sentiment_colors()
|
| 201 |
+
emoji = sentiment_colors.get(sentiment, "❓")
|
|
|
|
|
|
|
|
|
|
| 202 |
|
| 203 |
st.markdown(
|
| 204 |
f"""
|
| 205 |
+
<div class="result-box">
|
| 206 |
+
<h4>{emoji} Sentiment: {sentiment}</h4>
|
| 207 |
+
<p><strong>Confidence:</strong> {confidence:.2f}</p>
|
| 208 |
+
<p><strong>Input Text:</strong> "{text_input[:100]}{'...' if len(text_input) > 100 else ''}"</p>
|
| 209 |
+
<p><strong>Model:</strong> TextBlob (Natural Language Processing)</p>
|
| 210 |
+
</div>
|
| 211 |
+
""",
|
| 212 |
unsafe_allow_html=True,
|
| 213 |
)
|
| 214 |
else:
|
| 215 |
st.error("Please enter some text to analyze.")
|
| 216 |
|
| 217 |
+
|
| 218 |
+
def render_audio_sentiment_page():
|
| 219 |
+
"""Render the audio sentiment analysis page."""
|
| 220 |
st.title("Audio Sentiment Analysis")
|
| 221 |
st.markdown(
|
| 222 |
"Analyze the sentiment of your audio files using our fine-tuned Wav2Vec2 model."
|
|
|
|
| 254 |
# File uploader
|
| 255 |
uploaded_audio = st.file_uploader(
|
| 256 |
"Choose an audio file",
|
| 257 |
+
type=SUPPORTED_AUDIO_FORMATS,
|
| 258 |
help="Supported formats: WAV, MP3, M4A, FLAC",
|
| 259 |
)
|
| 260 |
|
|
|
|
| 264 |
else: # Audio recording
|
| 265 |
st.markdown(
|
| 266 |
"""
|
| 267 |
+
<div class="model-card">
|
| 268 |
+
<h3>Audio Recording</h3>
|
| 269 |
+
<p>Record audio directly with your microphone (max 5 seconds).</p>
|
| 270 |
+
<p><strong>Note:</strong> Make sure your microphone is accessible and you have permission to use it.</p>
|
| 271 |
+
</div>
|
| 272 |
+
""",
|
| 273 |
unsafe_allow_html=True,
|
| 274 |
)
|
| 275 |
|
|
|
|
| 303 |
uploaded_audio, format=f'audio/{uploaded_audio.name.split(".")[-1]}'
|
| 304 |
)
|
| 305 |
# File info for uploaded files
|
| 306 |
+
file_info = get_file_info(uploaded_audio)
|
| 307 |
+
st.info(
|
| 308 |
+
f"File: {file_info['name']} | Size: {format_file_size(file_info['size_bytes'])}"
|
| 309 |
+
)
|
| 310 |
|
| 311 |
# Analyze button
|
| 312 |
if st.button(
|
|
|
|
| 329 |
st.metric("Confidence", f"{confidence:.2f}")
|
| 330 |
|
| 331 |
# Color-coded sentiment display
|
| 332 |
+
sentiment_colors = get_sentiment_colors()
|
| 333 |
+
emoji = sentiment_colors.get(sentiment, "❓")
|
| 334 |
|
| 335 |
st.markdown(
|
| 336 |
f"""
|
| 337 |
+
<div class="result-box">
|
| 338 |
+
<h4>{emoji} Sentiment: {sentiment}</h4>
|
| 339 |
+
<p><strong>Confidence:</strong> {confidence:.2f}</p>
|
| 340 |
+
<p><strong>Audio Source:</strong> {audio_name}</p>
|
| 341 |
+
<p><strong>Model:</strong> Wav2Vec2 (Fine-tuned on RAVDESS + CREMA-D)</p>
|
| 342 |
+
</div>
|
| 343 |
+
""",
|
| 344 |
unsafe_allow_html=True,
|
| 345 |
)
|
| 346 |
else:
|
|
|
|
| 349 |
else:
|
| 350 |
st.info("Click the microphone button above to record audio for analysis.")
|
| 351 |
|
| 352 |
+
|
| 353 |
+
def render_vision_sentiment_page():
|
| 354 |
+
"""Render the vision sentiment analysis page."""
|
| 355 |
st.title("Vision Sentiment Analysis")
|
| 356 |
st.markdown(
|
| 357 |
"Analyze the sentiment of your images using our fine-tuned ResNet-50 model."
|
|
|
|
| 390 |
# File uploader
|
| 391 |
uploaded_image = st.file_uploader(
|
| 392 |
"Choose an image file",
|
| 393 |
+
type=SUPPORTED_IMAGE_FORMATS,
|
| 394 |
help="Supported formats: PNG, JPG, JPEG, BMP, TIFF",
|
| 395 |
)
|
| 396 |
|
|
|
|
| 404 |
)
|
| 405 |
|
| 406 |
# File info
|
| 407 |
+
file_info = get_file_info(uploaded_image)
|
| 408 |
st.info(
|
| 409 |
+
f"File: {file_info['name']} | Size: {format_file_size(file_info['size_bytes'])} | Dimensions: {image.size[0]}x{image.size[1]}"
|
| 410 |
)
|
| 411 |
|
| 412 |
# Analyze button
|
|
|
|
| 429 |
st.metric("Confidence", f"{confidence:.2f}")
|
| 430 |
|
| 431 |
# Color-coded sentiment display
|
| 432 |
+
sentiment_colors = get_sentiment_colors()
|
| 433 |
+
emoji = sentiment_colors.get(sentiment, "❓")
|
|
|
|
|
|
|
|
|
|
| 434 |
|
| 435 |
st.markdown(
|
| 436 |
f"""
|
| 437 |
+
<div class="result-box">
|
| 438 |
+
<h4>{emoji} Sentiment: {sentiment}</h4>
|
| 439 |
+
<p><strong>Confidence:</strong> {confidence:.2f}</p>
|
| 440 |
+
<p><strong>Image File:</strong> {uploaded_image.name}</p>
|
| 441 |
+
<p><strong>Model:</strong> ResNet-50 (Fine-tuned on FER2013)</p>
|
| 442 |
+
</div>
|
| 443 |
+
""",
|
| 444 |
unsafe_allow_html=True,
|
| 445 |
)
|
| 446 |
|
| 447 |
else: # Camera capture
|
| 448 |
st.markdown(
|
| 449 |
"""
|
| 450 |
+
<div class="model-card">
|
| 451 |
+
<h3>Camera Capture</h3>
|
| 452 |
+
<p>Take a photo directly with your camera to analyze its sentiment.</p>
|
| 453 |
+
<p><strong>Note:</strong> Make sure your camera is accessible and you have permission to use it.</p>
|
| 454 |
+
</div>
|
| 455 |
+
""",
|
| 456 |
unsafe_allow_html=True,
|
| 457 |
)
|
| 458 |
|
|
|
|
| 496 |
st.metric("Confidence", f"{confidence:.2f}")
|
| 497 |
|
| 498 |
# Color-coded sentiment display
|
| 499 |
+
sentiment_colors = get_sentiment_colors()
|
| 500 |
+
emoji = sentiment_colors.get(sentiment, "❓")
|
|
|
|
|
|
|
|
|
|
| 501 |
|
| 502 |
st.markdown(
|
| 503 |
f"""
|
| 504 |
+
<div class="result-box">
|
| 505 |
+
<h4>{emoji} Sentiment: {sentiment}</h4>
|
| 506 |
+
<p><strong>Confidence:</strong> {confidence:.2f}</p>
|
| 507 |
+
<p><strong>Image Source:</strong> Camera Capture</p>
|
| 508 |
+
<p><strong>Model:</strong> ResNet-50 (Fine-tuned on FER2013)</p>
|
| 509 |
+
</div>
|
| 510 |
+
""",
|
| 511 |
unsafe_allow_html=True,
|
| 512 |
)
|
| 513 |
|
|
|
|
| 517 |
elif input_method == "Take Photo with Camera" and "camera_photo" not in locals():
|
| 518 |
st.info("Click the camera button above to take a photo for analysis.")
|
| 519 |
|
| 520 |
+
|
| 521 |
+
def render_fused_model_page():
|
| 522 |
+
"""Render the fused model analysis page."""
|
| 523 |
st.title("Fused Model Analysis")
|
| 524 |
st.markdown(
|
| 525 |
"Combine predictions from all three models for enhanced sentiment analysis."
|
|
|
|
| 527 |
|
| 528 |
st.markdown(
|
| 529 |
"""
|
| 530 |
+
<div class="model-card">
|
| 531 |
+
<h3>Multi-Modal Sentiment Analysis</h3>
|
| 532 |
+
<p>This page allows you to input text, audio, and/or image data to get a comprehensive sentiment analysis
|
| 533 |
+
using all three models combined.</p>
|
| 534 |
+
</div>
|
| 535 |
+
""",
|
| 536 |
unsafe_allow_html=True,
|
| 537 |
)
|
| 538 |
|
|
|
|
| 566 |
if audio_input_method == "Upload File":
|
| 567 |
uploaded_audio = st.file_uploader(
|
| 568 |
"Upload audio file (optional):",
|
| 569 |
+
type=SUPPORTED_AUDIO_FORMATS,
|
| 570 |
key="fused_audio",
|
| 571 |
)
|
| 572 |
audio_source = "uploaded_file"
|
|
|
|
| 609 |
if image_input_method == "Upload File":
|
| 610 |
uploaded_image = st.file_uploader(
|
| 611 |
"Upload image file (optional):",
|
| 612 |
+
type=SUPPORTED_IMAGE_FORMATS,
|
| 613 |
key="fused_image",
|
| 614 |
)
|
| 615 |
|
|
|
|
| 705 |
st.dataframe(df, use_container_width=True)
|
| 706 |
|
| 707 |
# Final result display
|
| 708 |
+
sentiment_colors = get_sentiment_colors()
|
| 709 |
+
emoji = sentiment_colors.get(sentiment, "❓")
|
| 710 |
|
| 711 |
st.markdown(
|
| 712 |
f"""
|
| 713 |
+
<div class="result-box">
|
| 714 |
+
<h4>{emoji} Final Fused Sentiment: {sentiment}</h4>
|
| 715 |
+
<p><strong>Overall Confidence:</strong> {confidence:.2f}</p>
|
| 716 |
+
<p><strong>Models Used:</strong> {len(results_data)}</p>
|
| 717 |
+
</div>
|
| 718 |
+
""",
|
| 719 |
unsafe_allow_html=True,
|
| 720 |
)
|
| 721 |
else:
|
|
|
|
| 723 |
"Please provide at least one input (text, audio, or image) for fused analysis."
|
| 724 |
)
|
| 725 |
|
| 726 |
+
|
| 727 |
+
def render_max_fusion_page():
|
| 728 |
+
"""Render the max fusion page for video-based analysis."""
|
| 729 |
st.title("Max Fusion - Multi-Modal Sentiment Analysis")
|
| 730 |
st.markdown(
|
| 731 |
"""
|
| 732 |
+
<div class="model-card">
|
| 733 |
+
<h3>Ultimate Multi-Modal Sentiment Analysis</h3>
|
| 734 |
+
<p>Take photos with camera or upload videos to get comprehensive sentiment analysis from multiple modalities:</p>
|
| 735 |
+
<ul>
|
| 736 |
+
<li>📸 <strong>Vision Analysis:</strong> Camera photos or video frames for facial expression analysis</li>
|
| 737 |
+
<li>🎵 <strong>Audio Analysis:</strong> Audio files or extracted audio from videos for vocal sentiment</li>
|
| 738 |
+
<li>📝 <strong>Text Analysis:</strong> Transcribed audio for text sentiment analysis</li>
|
| 739 |
+
</ul>
|
| 740 |
+
</div>
|
| 741 |
+
""",
|
| 742 |
unsafe_allow_html=True,
|
| 743 |
)
|
| 744 |
|
|
|
|
| 762 |
with col2:
|
| 763 |
st.markdown(
|
| 764 |
"""
|
| 765 |
+
<div style="text-align: center; padding: 20px; background: linear-gradient(90deg, #667eea 0%, #764ba2 100%); border-radius: 10px; color: white;">
|
| 766 |
+
<h3>🚧 Coming Soon 🚧</h3>
|
| 767 |
+
<p>Video recording feature is under development</p>
|
| 768 |
+
<p>Use Upload Video File for now!</p>
|
| 769 |
+
</div>
|
| 770 |
+
""",
|
| 771 |
unsafe_allow_html=True,
|
| 772 |
)
|
| 773 |
|
| 774 |
# Placeholder for future recording functionality
|
| 775 |
st.markdown(
|
| 776 |
"""
|
| 777 |
+
**Future Features:**
|
| 778 |
+
- Real-time video recording with camera
|
| 779 |
+
- Audio capture during recording
|
| 780 |
+
- Automatic frame extraction
|
| 781 |
+
- Live transcription
|
| 782 |
+
- WebRTC integration for low-latency streaming
|
| 783 |
+
"""
|
| 784 |
)
|
| 785 |
|
| 786 |
# Skip all the recording logic for now
|
|
|
|
| 793 |
# File upload option
|
| 794 |
st.markdown(
|
| 795 |
"""
|
| 796 |
+
<div class="upload-section">
|
| 797 |
+
<h4>📁 Upload Video File</h4>
|
| 798 |
+
<p>Upload a video file for comprehensive multimodal analysis.</p>
|
| 799 |
+
<p><strong>Supported Formats:</strong> MP4, AVI, MOV, MKV, WMV, FLV</p>
|
| 800 |
+
<p><strong>Recommended:</strong> Videos with clear audio and visual content</p>
|
| 801 |
+
</div>
|
| 802 |
+
""",
|
| 803 |
unsafe_allow_html=True,
|
| 804 |
)
|
| 805 |
|
| 806 |
uploaded_video = st.file_uploader(
|
| 807 |
"Choose a video file",
|
| 808 |
+
type=SUPPORTED_VIDEO_FORMATS,
|
| 809 |
help="Supported formats: MP4, AVI, MOV, MKV, WMV, FLV",
|
| 810 |
)
|
| 811 |
|
|
|
|
| 813 |
video_name = uploaded_video.name if uploaded_video else None
|
| 814 |
video_file = uploaded_video
|
| 815 |
|
|
|
|
|
|
|
| 816 |
if video_file is not None:
|
| 817 |
# Display video or photo
|
| 818 |
if video_source == "camera_photo":
|
|
|
|
| 827 |
|
| 828 |
uploaded_audio = st.file_uploader(
|
| 829 |
"Upload audio file for audio analysis:",
|
| 830 |
+
type=SUPPORTED_AUDIO_FORMATS,
|
| 831 |
key="camera_audio",
|
| 832 |
help="Upload an audio file to complement the photo analysis",
|
| 833 |
)
|
|
|
|
| 845 |
else:
|
| 846 |
# For uploaded videos
|
| 847 |
st.video(video_file)
|
| 848 |
+
file_info = get_file_info(video_file)
|
| 849 |
+
st.info(
|
| 850 |
+
f"File: {file_info['name']} | Size: {format_file_size(file_info['size_bytes'])}"
|
| 851 |
+
)
|
|
|
|
| 852 |
audio_bytes = None # Will be extracted from video
|
| 853 |
|
| 854 |
# Video Processing Pipeline
|
|
|
|
| 992 |
st.metric("📊 Overall Confidence", f"{confidence:.2f}")
|
| 993 |
|
| 994 |
# Color-coded sentiment display
|
| 995 |
+
sentiment_colors = get_sentiment_colors()
|
| 996 |
+
emoji = sentiment_colors.get(sentiment, "❓")
|
|
|
|
|
|
|
|
|
|
| 997 |
|
| 998 |
st.markdown(
|
| 999 |
f"""
|
| 1000 |
+
<div class="result-box">
|
| 1001 |
+
<h4>{emoji} Max Fusion Sentiment: {sentiment}</h4>
|
| 1002 |
+
<p><strong>Overall Confidence:</strong> {confidence:.2f}</p>
|
| 1003 |
+
<p><strong>Modalities Analyzed:</strong> {len(results_data)}</p>
|
| 1004 |
+
<p><strong>Video Source:</strong> {video_name}</p>
|
| 1005 |
+
<p><strong>Analysis Type:</strong> Comprehensive Multi-Modal Sentiment Analysis</p>
|
| 1006 |
</div>
|
| 1007 |
""",
|
| 1008 |
unsafe_allow_html=True,
|
|
|
|
| 1020 |
else:
|
| 1021 |
st.info("📁 Please upload a video file to begin Max Fusion analysis.")
|
| 1022 |
|
| 1023 |
+
|
| 1024 |
+
def main():
|
| 1025 |
+
"""Main application function."""
|
| 1026 |
+
# Sidebar navigation
|
| 1027 |
+
st.sidebar.title("Sentiment Analysis")
|
| 1028 |
+
st.sidebar.markdown("---")
|
| 1029 |
+
|
| 1030 |
+
# Navigation
|
| 1031 |
+
page = st.sidebar.selectbox(
|
| 1032 |
+
"Choose a page:",
|
| 1033 |
+
[
|
| 1034 |
+
"Home",
|
| 1035 |
+
"Text Sentiment",
|
| 1036 |
+
"Audio Sentiment",
|
| 1037 |
+
"Vision Sentiment",
|
| 1038 |
+
"Fused Model",
|
| 1039 |
+
"Max Fusion",
|
| 1040 |
+
],
|
| 1041 |
+
)
|
| 1042 |
+
|
| 1043 |
+
# Page routing
|
| 1044 |
+
if page == "Home":
|
| 1045 |
+
render_home_page()
|
| 1046 |
+
elif page == "Text Sentiment":
|
| 1047 |
+
render_text_sentiment_page()
|
| 1048 |
+
elif page == "Audio Sentiment":
|
| 1049 |
+
render_audio_sentiment_page()
|
| 1050 |
+
elif page == "Vision Sentiment":
|
| 1051 |
+
render_vision_sentiment_page()
|
| 1052 |
+
elif page == "Fused Model":
|
| 1053 |
+
render_fused_model_page()
|
| 1054 |
+
elif page == "Max Fusion":
|
| 1055 |
+
render_max_fusion_page()
|
| 1056 |
+
|
| 1057 |
+
# Footer
|
| 1058 |
+
st.markdown("---")
|
| 1059 |
+
st.markdown(
|
| 1060 |
+
"""
|
| 1061 |
+
<div style="text-align: center; color: #666; padding: 1rem;">
|
| 1062 |
+
<p>Built with ❤️ | by <a href="https://github.com/iamfaham">iamfaham</a></p>
|
| 1063 |
+
<p>Version: {version}</p>
|
| 1064 |
+
</div>
|
| 1065 |
+
""".format(
|
| 1066 |
+
version=APP_VERSION
|
| 1067 |
+
),
|
| 1068 |
+
unsafe_allow_html=True,
|
| 1069 |
+
)
|
| 1070 |
+
|
| 1071 |
+
|
| 1072 |
+
if __name__ == "__main__":
|
| 1073 |
+
main()
|
pyproject.toml
CHANGED
|
@@ -11,4 +11,6 @@ dependencies = [
|
|
| 11 |
"speechrecognition>=3.10.0",
|
| 12 |
"streamlit-webrtc>=0.47.0",
|
| 13 |
"opencv-python-headless>=4.8.0",
|
|
|
|
|
|
|
| 14 |
]
|
|
|
|
| 11 |
"speechrecognition>=3.10.0",
|
| 12 |
"streamlit-webrtc>=0.47.0",
|
| 13 |
"opencv-python-headless>=4.8.0",
|
| 14 |
+
"torch>=2.8.0",
|
| 15 |
+
"pillow>=11.3.0",
|
| 16 |
]
|
src/__init__.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Sentiment Fused - Multimodal Sentiment Analysis Package
|
| 3 |
+
|
| 4 |
+
A comprehensive package for analyzing sentiment from text, audio, and visual inputs
|
| 5 |
+
using state-of-the-art deep learning models.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
__version__ = "0.1.0"
|
| 9 |
+
__author__ = "iamfaham"
|
| 10 |
+
__description__ = "Multimodal Sentiment Analysis with Text, Audio, and Vision Models"
|
src/config/__init__.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Configuration package for Sentiment Fused application.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
from .settings import *
|
src/config/settings.py
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Centralized configuration settings for the Sentiment Fused application.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import os
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
from typing import Dict, Any
|
| 8 |
+
|
| 9 |
+
# Application Configuration
|
| 10 |
+
APP_NAME = "Multimodal Sentiment Analysis"
|
| 11 |
+
APP_VERSION = "0.1.0"
|
| 12 |
+
APP_ICON = "🧠"
|
| 13 |
+
APP_LAYOUT = "wide"
|
| 14 |
+
|
| 15 |
+
# Model Configuration
|
| 16 |
+
VISION_MODEL_CONFIG = {
|
| 17 |
+
"model_name": "resnet50",
|
| 18 |
+
"input_size": 224,
|
| 19 |
+
"num_classes": 7, # FER2013 default
|
| 20 |
+
"crop_tightness": 0.0, # No padding for tightest crop
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
AUDIO_MODEL_CONFIG = {
|
| 24 |
+
"model_name": "facebook/wav2vec2-base",
|
| 25 |
+
"target_sampling_rate": 16000,
|
| 26 |
+
"max_duration": 5.0,
|
| 27 |
+
"num_classes": 3, # Default sentiment classes
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
TEXT_MODEL_CONFIG = {
|
| 31 |
+
"model_name": "textblob",
|
| 32 |
+
"confidence_threshold": 0.1,
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
# File Processing Configuration
|
| 36 |
+
SUPPORTED_IMAGE_FORMATS = ["png", "jpg", "jpeg", "bmp", "tiff"]
|
| 37 |
+
SUPPORTED_AUDIO_FORMATS = ["wav", "mp3", "m4a", "flac"]
|
| 38 |
+
SUPPORTED_VIDEO_FORMATS = ["mp4", "avi", "mov", "mkv", "wmv", "flv"]
|
| 39 |
+
|
| 40 |
+
# Video Processing Configuration
|
| 41 |
+
MAX_VIDEO_FRAMES = 5
|
| 42 |
+
VIDEO_FRAME_INTERVALS = [0, 0.25, 0.5, 0.75, 1.0] # Frame extraction points
|
| 43 |
+
|
| 44 |
+
# Image Preprocessing Configuration
|
| 45 |
+
IMAGE_TRANSFORMS = {
|
| 46 |
+
"resize": 224,
|
| 47 |
+
"center_crop": 224,
|
| 48 |
+
"normalize_mean": [0.485, 0.456, 0.406],
|
| 49 |
+
"normalize_std": [0.229, 0.224, 0.225],
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
# Sentiment Mapping Configuration
|
| 53 |
+
SENTIMENT_MAPPINGS = {
|
| 54 |
+
3: {0: "Negative", 1: "Neutral", 2: "Positive"},
|
| 55 |
+
4: {0: "Angry", 1: "Sad", 2: "Happy", 3: "Neutral"},
|
| 56 |
+
7: {
|
| 57 |
+
0: "Angry",
|
| 58 |
+
1: "Disgust",
|
| 59 |
+
2: "Fear",
|
| 60 |
+
3: "Happy",
|
| 61 |
+
4: "Sad",
|
| 62 |
+
5: "Surprise",
|
| 63 |
+
6: "Neutral",
|
| 64 |
+
},
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
# UI Configuration
|
| 68 |
+
UI_COLORS = {
|
| 69 |
+
"primary": "#1f77b4",
|
| 70 |
+
"success": "#28a745",
|
| 71 |
+
"warning": "#ffc107",
|
| 72 |
+
"danger": "#dc3545",
|
| 73 |
+
"info": "#17a2b8",
|
| 74 |
+
"light": "#f8f9fa",
|
| 75 |
+
"dark": "#343a40",
|
| 76 |
+
}
|
| 77 |
+
|
| 78 |
+
# CSS Styles
|
| 79 |
+
CUSTOM_CSS = """
|
| 80 |
+
<style>
|
| 81 |
+
.main-header {
|
| 82 |
+
font-size: 2.5rem;
|
| 83 |
+
font-weight: bold;
|
| 84 |
+
color: #1f77b4;
|
| 85 |
+
text-align: center;
|
| 86 |
+
margin-bottom: 2rem;
|
| 87 |
+
}
|
| 88 |
+
.model-card {
|
| 89 |
+
background-color: #f0f2f6;
|
| 90 |
+
padding: 1.5rem;
|
| 91 |
+
border-radius: 10px;
|
| 92 |
+
margin: 1rem 0;
|
| 93 |
+
border-left: 4px solid #1f77b4;
|
| 94 |
+
}
|
| 95 |
+
.result-box {
|
| 96 |
+
background-color: #e8f4fd;
|
| 97 |
+
padding: 1rem;
|
| 98 |
+
border-radius: 8px;
|
| 99 |
+
border: 1px solid #1f77b4;
|
| 100 |
+
margin: 1rem 0;
|
| 101 |
+
}
|
| 102 |
+
.upload-section {
|
| 103 |
+
background-color: #f8f9fa;
|
| 104 |
+
padding: 1.5rem;
|
| 105 |
+
border-radius: 10px;
|
| 106 |
+
border: 2px dashed #dee2e6;
|
| 107 |
+
text-align: center;
|
| 108 |
+
margin: 1rem 0;
|
| 109 |
+
}
|
| 110 |
+
</style>
|
| 111 |
+
"""
|
| 112 |
+
|
| 113 |
+
# Paths
|
| 114 |
+
BASE_DIR = Path(__file__).parent.parent.parent
|
| 115 |
+
MODELS_DIR = BASE_DIR / "models"
|
| 116 |
+
SRC_DIR = BASE_DIR / "src"
|
| 117 |
+
UI_DIR = SRC_DIR / "ui"
|
| 118 |
+
|
| 119 |
+
# Environment Variables
|
| 120 |
+
ENV_VARS = {
|
| 121 |
+
"VISION_MODEL_DRIVE_ID": os.getenv("VISION_MODEL_DRIVE_ID", ""),
|
| 122 |
+
"AUDIO_MODEL_DRIVE_ID": os.getenv("AUDIO_MODEL_DRIVE_ID", ""),
|
| 123 |
+
"VISION_MODEL_FILENAME": os.getenv("VISION_MODEL_FILENAME", "resnet50_model.pth"),
|
| 124 |
+
"AUDIO_MODEL_FILENAME": os.getenv("AUDIO_MODEL_FILENAME", "wav2vec2_model.pth"),
|
| 125 |
+
}
|
| 126 |
+
|
| 127 |
+
# Logging Configuration
|
| 128 |
+
LOGGING_CONFIG = {
|
| 129 |
+
"level": "INFO",
|
| 130 |
+
"format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
| 131 |
+
"handlers": ["console", "file"],
|
| 132 |
+
}
|
| 133 |
+
|
| 134 |
+
# Cache Configuration
|
| 135 |
+
CACHE_CONFIG = {
|
| 136 |
+
"ttl": 3600, # 1 hour
|
| 137 |
+
"max_entries": 100,
|
| 138 |
+
}
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
def get_sentiment_mapping(num_classes: int) -> Dict[int, str]:
|
| 142 |
+
"""Get sentiment mapping based on number of classes."""
|
| 143 |
+
return SENTIMENT_MAPPINGS.get(
|
| 144 |
+
num_classes, {i: f"Class_{i}" for i in range(num_classes)}
|
| 145 |
+
)
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
def validate_environment() -> Dict[str, bool]:
|
| 149 |
+
"""Validate that required environment variables are set."""
|
| 150 |
+
validation = {}
|
| 151 |
+
for var_name, var_value in ENV_VARS.items():
|
| 152 |
+
validation[var_name] = bool(var_value)
|
| 153 |
+
return validation
|
src/models/__init__.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Model package for Sentiment Fused application.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
from .vision_model import *
|
| 6 |
+
from .audio_model import *
|
| 7 |
+
from .text_model import *
|
| 8 |
+
from .fused_model import *
|
src/models/audio_model.py
ADDED
|
@@ -0,0 +1,154 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Audio sentiment analysis model using fine-tuned Wav2Vec2.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import logging
|
| 6 |
+
import streamlit as st
|
| 7 |
+
from typing import Tuple
|
| 8 |
+
import torch
|
| 9 |
+
from PIL import Image
|
| 10 |
+
import os
|
| 11 |
+
from ..config.settings import AUDIO_MODEL_CONFIG
|
| 12 |
+
from ..utils.preprocessing import preprocess_audio_for_model
|
| 13 |
+
from ..utils.sentiment_mapping import get_sentiment_mapping
|
| 14 |
+
from src.utils.simple_model_manager import SimpleModelManager
|
| 15 |
+
|
| 16 |
+
logger = logging.getLogger(__name__)
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
@st.cache_resource
|
| 20 |
+
def load_audio_model():
|
| 21 |
+
"""Load the pre-trained Wav2Vec2 audio sentiment model from Google Drive."""
|
| 22 |
+
try:
|
| 23 |
+
manager = SimpleModelManager()
|
| 24 |
+
if manager is None:
|
| 25 |
+
logger.error("Model manager not available")
|
| 26 |
+
st.error("Model manager not available")
|
| 27 |
+
return None, None, None, None
|
| 28 |
+
|
| 29 |
+
# Load the model using the Google Drive manager
|
| 30 |
+
model, device = manager.load_audio_model()
|
| 31 |
+
|
| 32 |
+
if model is None:
|
| 33 |
+
logger.error("Failed to load audio model from Google Drive")
|
| 34 |
+
st.error("Failed to load audio model from Google Drive")
|
| 35 |
+
return None, None, None, None
|
| 36 |
+
|
| 37 |
+
# For Wav2Vec2 models, we need to determine the number of classes
|
| 38 |
+
# This is typically available in the model configuration
|
| 39 |
+
try:
|
| 40 |
+
num_classes = model.config.num_labels
|
| 41 |
+
except:
|
| 42 |
+
# Fallback: try to infer from the model
|
| 43 |
+
try:
|
| 44 |
+
num_classes = model.classifier.out_features
|
| 45 |
+
except:
|
| 46 |
+
num_classes = AUDIO_MODEL_CONFIG["num_classes"] # Default assumption
|
| 47 |
+
|
| 48 |
+
# Load feature extractor
|
| 49 |
+
from transformers import AutoFeatureExtractor
|
| 50 |
+
|
| 51 |
+
feature_extractor = AutoFeatureExtractor.from_pretrained(
|
| 52 |
+
AUDIO_MODEL_CONFIG["model_name"]
|
| 53 |
+
)
|
| 54 |
+
|
| 55 |
+
logger.info(f"Audio model loaded successfully with {num_classes} classes!")
|
| 56 |
+
st.success(f"Audio model loaded successfully with {num_classes} classes!")
|
| 57 |
+
return model, device, num_classes, feature_extractor
|
| 58 |
+
except Exception as e:
|
| 59 |
+
logger.error(f"Error loading audio model: {str(e)}")
|
| 60 |
+
st.error(f"Error loading audio model: {str(e)}")
|
| 61 |
+
return None, None, None, None
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
def predict_audio_sentiment(audio_bytes: bytes) -> Tuple[str, float]:
|
| 65 |
+
"""
|
| 66 |
+
Analyze audio sentiment using fine-tuned Wav2Vec2 model.
|
| 67 |
+
|
| 68 |
+
Preprocessing matches CREMA-D + RAVDESS training specifications:
|
| 69 |
+
- Target sampling rate: 16kHz
|
| 70 |
+
- Max duration: 5.0 seconds
|
| 71 |
+
- Feature extraction: AutoFeatureExtractor with max_length, truncation, padding
|
| 72 |
+
|
| 73 |
+
Args:
|
| 74 |
+
audio_bytes: Raw audio bytes
|
| 75 |
+
|
| 76 |
+
Returns:
|
| 77 |
+
Tuple of (sentiment, confidence)
|
| 78 |
+
"""
|
| 79 |
+
if audio_bytes is None:
|
| 80 |
+
return "No audio provided", 0.0
|
| 81 |
+
|
| 82 |
+
try:
|
| 83 |
+
# Load model if not already loaded
|
| 84 |
+
model, device, num_classes, feature_extractor = load_audio_model()
|
| 85 |
+
if model is None:
|
| 86 |
+
return "Model not loaded", 0.0
|
| 87 |
+
|
| 88 |
+
# Use our centralized preprocessing function
|
| 89 |
+
input_values = preprocess_audio_for_model(audio_bytes)
|
| 90 |
+
if input_values is None:
|
| 91 |
+
return "Preprocessing failed", 0.0
|
| 92 |
+
|
| 93 |
+
# Debug: Log the tensor shape
|
| 94 |
+
logger.info(f"Preprocessed audio tensor shape: {input_values.shape}")
|
| 95 |
+
|
| 96 |
+
# Ensure correct tensor shape: [batch_size, sequence_length]
|
| 97 |
+
if input_values.dim() == 1:
|
| 98 |
+
input_values = input_values.unsqueeze(0) # Add batch dimension if missing
|
| 99 |
+
elif input_values.dim() == 3:
|
| 100 |
+
# If we get [batch, sequence, channels], squeeze the channels
|
| 101 |
+
input_values = input_values.squeeze(-1)
|
| 102 |
+
|
| 103 |
+
logger.info(f"Final audio tensor shape: {input_values.shape}")
|
| 104 |
+
|
| 105 |
+
# Move to device
|
| 106 |
+
input_values = input_values.to(device)
|
| 107 |
+
|
| 108 |
+
# Run inference
|
| 109 |
+
with torch.no_grad():
|
| 110 |
+
outputs = model(input_values)
|
| 111 |
+
probabilities = torch.softmax(outputs.logits, dim=1)
|
| 112 |
+
confidence, predicted = torch.max(probabilities, 1)
|
| 113 |
+
|
| 114 |
+
# Get sentiment mapping based on number of classes
|
| 115 |
+
sentiment_map = get_sentiment_mapping(num_classes)
|
| 116 |
+
sentiment = sentiment_map[predicted.item()]
|
| 117 |
+
confidence_score = confidence.item()
|
| 118 |
+
|
| 119 |
+
logger.info(
|
| 120 |
+
f"Audio sentiment analysis completed: {sentiment} (confidence: {confidence_score:.2f})"
|
| 121 |
+
)
|
| 122 |
+
return sentiment, confidence_score
|
| 123 |
+
|
| 124 |
+
except ImportError as e:
|
| 125 |
+
logger.error(f"Required library not installed: {str(e)}")
|
| 126 |
+
st.error(f"Required library not installed: {str(e)}")
|
| 127 |
+
st.info("Please install: pip install librosa transformers")
|
| 128 |
+
return "Library not available", 0.0
|
| 129 |
+
except Exception as e:
|
| 130 |
+
logger.error(f"Error in audio sentiment prediction: {str(e)}")
|
| 131 |
+
st.error(f"Error in audio sentiment prediction: {str(e)}")
|
| 132 |
+
return "Error occurred", 0.0
|
| 133 |
+
|
| 134 |
+
|
| 135 |
+
def get_audio_model_info() -> dict:
|
| 136 |
+
"""Get information about the audio sentiment model."""
|
| 137 |
+
return {
|
| 138 |
+
"model_name": AUDIO_MODEL_CONFIG["model_name"],
|
| 139 |
+
"description": "Fine-tuned Wav2Vec2 for audio sentiment analysis",
|
| 140 |
+
"capabilities": [
|
| 141 |
+
"Audio sentiment classification",
|
| 142 |
+
"Automatic audio preprocessing",
|
| 143 |
+
"CREMA-D + RAVDESS dataset compatibility",
|
| 144 |
+
"Real-time audio analysis",
|
| 145 |
+
],
|
| 146 |
+
"input_format": "Audio files (WAV, MP3, M4A, FLAC)",
|
| 147 |
+
"output_format": "Sentiment label + confidence score",
|
| 148 |
+
"preprocessing": {
|
| 149 |
+
"sampling_rate": f"{AUDIO_MODEL_CONFIG['target_sampling_rate']} Hz",
|
| 150 |
+
"max_duration": f"{AUDIO_MODEL_CONFIG['max_duration']} seconds",
|
| 151 |
+
"feature_extraction": "AutoFeatureExtractor",
|
| 152 |
+
"normalization": "Model-specific",
|
| 153 |
+
},
|
| 154 |
+
}
|
src/models/fused_model.py
ADDED
|
@@ -0,0 +1,176 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Fused sentiment analysis model combining text, audio, and vision models.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import logging
|
| 6 |
+
from typing import Tuple, Optional, List
|
| 7 |
+
from PIL import Image
|
| 8 |
+
|
| 9 |
+
from .text_model import predict_text_sentiment
|
| 10 |
+
from .audio_model import predict_audio_sentiment
|
| 11 |
+
from .vision_model import predict_vision_sentiment
|
| 12 |
+
|
| 13 |
+
logger = logging.getLogger(__name__)
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def predict_fused_sentiment(
|
| 17 |
+
text: Optional[str] = None,
|
| 18 |
+
audio_bytes: Optional[bytes] = None,
|
| 19 |
+
image: Optional[Image.Image] = None,
|
| 20 |
+
) -> Tuple[str, float]:
|
| 21 |
+
"""
|
| 22 |
+
Implement ensemble/fusion logic combining all three models.
|
| 23 |
+
|
| 24 |
+
Args:
|
| 25 |
+
text: Input text for text sentiment analysis
|
| 26 |
+
audio_bytes: Audio bytes for audio sentiment analysis
|
| 27 |
+
image: Input image for vision sentiment analysis
|
| 28 |
+
|
| 29 |
+
Returns:
|
| 30 |
+
Tuple of (fused_sentiment, overall_confidence)
|
| 31 |
+
"""
|
| 32 |
+
results = []
|
| 33 |
+
|
| 34 |
+
if text:
|
| 35 |
+
text_sentiment, text_conf = predict_text_sentiment(text)
|
| 36 |
+
results.append(("Text", text_sentiment, text_conf))
|
| 37 |
+
|
| 38 |
+
if audio_bytes:
|
| 39 |
+
audio_sentiment, audio_conf = predict_audio_sentiment(audio_bytes)
|
| 40 |
+
results.append(("Audio", audio_sentiment, audio_conf))
|
| 41 |
+
|
| 42 |
+
if image:
|
| 43 |
+
vision_sentiment, vision_conf = predict_vision_sentiment(image)
|
| 44 |
+
results.append(("Vision", vision_sentiment, vision_conf))
|
| 45 |
+
|
| 46 |
+
if not results:
|
| 47 |
+
return "No inputs provided", 0.0
|
| 48 |
+
|
| 49 |
+
# Simple ensemble logic (can be enhanced with more sophisticated fusion strategies)
|
| 50 |
+
sentiment_counts = {}
|
| 51 |
+
total_confidence = 0
|
| 52 |
+
modality_weights = {"Text": 0.3, "Audio": 0.35, "Vision": 0.35} # Weighted voting
|
| 53 |
+
|
| 54 |
+
for modality, sentiment, confidence in results:
|
| 55 |
+
if sentiment not in sentiment_counts:
|
| 56 |
+
sentiment_counts[sentiment] = {"count": 0, "weighted_conf": 0}
|
| 57 |
+
|
| 58 |
+
sentiment_counts[sentiment]["count"] += 1
|
| 59 |
+
weight = modality_weights.get(modality, 0.33)
|
| 60 |
+
sentiment_counts[sentiment]["weighted_conf"] += confidence * weight
|
| 61 |
+
total_confidence += confidence
|
| 62 |
+
|
| 63 |
+
# Weighted majority voting with confidence averaging
|
| 64 |
+
if sentiment_counts:
|
| 65 |
+
# Find sentiment with highest weighted confidence
|
| 66 |
+
final_sentiment = max(
|
| 67 |
+
sentiment_counts.keys(), key=lambda s: sentiment_counts[s]["weighted_conf"]
|
| 68 |
+
)
|
| 69 |
+
|
| 70 |
+
# Calculate overall confidence as weighted average
|
| 71 |
+
avg_confidence = total_confidence / len(results)
|
| 72 |
+
|
| 73 |
+
logger.info(
|
| 74 |
+
f"Fused sentiment analysis completed: {final_sentiment} (confidence: {avg_confidence:.2f})"
|
| 75 |
+
)
|
| 76 |
+
logger.info(f"Individual results: {results}")
|
| 77 |
+
|
| 78 |
+
return final_sentiment, avg_confidence
|
| 79 |
+
else:
|
| 80 |
+
return "No valid predictions", 0.0
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
def get_fusion_strategy_info() -> dict:
|
| 84 |
+
"""Get information about the fusion strategy."""
|
| 85 |
+
return {
|
| 86 |
+
"strategy_name": "Weighted Ensemble Fusion",
|
| 87 |
+
"description": "Combines predictions from text, audio, and vision models using weighted voting",
|
| 88 |
+
"modality_weights": {"Text": 0.3, "Audio": 0.35, "Vision": 0.35},
|
| 89 |
+
"fusion_method": "Weighted majority voting with confidence averaging",
|
| 90 |
+
"advantages": [
|
| 91 |
+
"Robust to individual model failures",
|
| 92 |
+
"Leverages complementary information from different modalities",
|
| 93 |
+
"Configurable modality weights",
|
| 94 |
+
"Real-time ensemble prediction",
|
| 95 |
+
],
|
| 96 |
+
"use_cases": [
|
| 97 |
+
"Multi-modal content analysis",
|
| 98 |
+
"Enhanced sentiment accuracy",
|
| 99 |
+
"Cross-validation of predictions",
|
| 100 |
+
"Comprehensive emotional understanding",
|
| 101 |
+
],
|
| 102 |
+
}
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
def analyze_modality_agreement(
|
| 106 |
+
text: Optional[str] = None,
|
| 107 |
+
audio_bytes: Optional[bytes] = None,
|
| 108 |
+
image: Optional[Image.Image] = None,
|
| 109 |
+
) -> dict:
|
| 110 |
+
"""
|
| 111 |
+
Analyze agreement between different modalities.
|
| 112 |
+
|
| 113 |
+
Args:
|
| 114 |
+
text: Input text
|
| 115 |
+
audio_bytes: Audio bytes
|
| 116 |
+
image: Input image
|
| 117 |
+
|
| 118 |
+
Returns:
|
| 119 |
+
Dictionary containing agreement analysis
|
| 120 |
+
"""
|
| 121 |
+
results = {}
|
| 122 |
+
|
| 123 |
+
if text:
|
| 124 |
+
text_sentiment, text_conf = predict_text_sentiment(text)
|
| 125 |
+
results["text"] = {"sentiment": text_sentiment, "confidence": text_conf}
|
| 126 |
+
|
| 127 |
+
if audio_bytes:
|
| 128 |
+
audio_sentiment, audio_conf = predict_audio_sentiment(audio_bytes)
|
| 129 |
+
results["audio"] = {"sentiment": audio_sentiment, "confidence": audio_conf}
|
| 130 |
+
|
| 131 |
+
if image:
|
| 132 |
+
vision_sentiment, vision_conf = predict_vision_sentiment(image)
|
| 133 |
+
results["vision"] = {"sentiment": vision_sentiment, "confidence": vision_conf}
|
| 134 |
+
|
| 135 |
+
if len(results) < 2:
|
| 136 |
+
return {"agreement_level": "insufficient_modalities", "details": results}
|
| 137 |
+
|
| 138 |
+
# Analyze agreement
|
| 139 |
+
sentiments = [result["sentiment"] for result in results.values()]
|
| 140 |
+
unique_sentiments = set(sentiments)
|
| 141 |
+
|
| 142 |
+
if len(unique_sentiments) == 1:
|
| 143 |
+
agreement_level = "perfect"
|
| 144 |
+
agreement_score = 1.0
|
| 145 |
+
elif len(unique_sentiments) == 2:
|
| 146 |
+
agreement_level = "partial"
|
| 147 |
+
agreement_score = 0.5
|
| 148 |
+
else:
|
| 149 |
+
agreement_level = "low"
|
| 150 |
+
agreement_score = 0.0
|
| 151 |
+
|
| 152 |
+
# Calculate confidence consistency
|
| 153 |
+
confidences = [result["confidence"] for result in results.values()]
|
| 154 |
+
confidence_std = sum(confidences) / len(confidences) if confidences else 0
|
| 155 |
+
|
| 156 |
+
return {
|
| 157 |
+
"agreement_level": agreement_level,
|
| 158 |
+
"agreement_score": agreement_score,
|
| 159 |
+
"modalities_analyzed": len(results),
|
| 160 |
+
"sentiment_distribution": {s: sentiments.count(s) for s in unique_sentiments},
|
| 161 |
+
"confidence_consistency": confidence_std,
|
| 162 |
+
"individual_results": results,
|
| 163 |
+
"recommendation": _get_agreement_recommendation(agreement_level, len(results)),
|
| 164 |
+
}
|
| 165 |
+
|
| 166 |
+
|
| 167 |
+
def _get_agreement_recommendation(agreement_level: str, num_modalities: int) -> str:
|
| 168 |
+
"""Get recommendation based on agreement level."""
|
| 169 |
+
if agreement_level == "perfect":
|
| 170 |
+
return "High confidence in prediction - all modalities agree"
|
| 171 |
+
elif agreement_level == "partial":
|
| 172 |
+
return "Moderate confidence - consider modality-specific factors"
|
| 173 |
+
elif agreement_level == "low":
|
| 174 |
+
return "Low confidence - modalities disagree, consider context"
|
| 175 |
+
else:
|
| 176 |
+
return "Insufficient data for reliable fusion"
|
src/models/text_model.py
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Text sentiment analysis model using TextBlob.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import logging
|
| 6 |
+
from typing import Tuple, Optional
|
| 7 |
+
from ..config.settings import TEXT_MODEL_CONFIG
|
| 8 |
+
|
| 9 |
+
logger = logging.getLogger(__name__)
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def predict_text_sentiment(text: str) -> Tuple[str, float]:
|
| 13 |
+
"""
|
| 14 |
+
Analyze text sentiment using TextBlob.
|
| 15 |
+
|
| 16 |
+
Args:
|
| 17 |
+
text: Input text to analyze
|
| 18 |
+
|
| 19 |
+
Returns:
|
| 20 |
+
Tuple of (sentiment, confidence)
|
| 21 |
+
"""
|
| 22 |
+
if not text or text.strip() == "":
|
| 23 |
+
return "No text provided", 0.0
|
| 24 |
+
|
| 25 |
+
try:
|
| 26 |
+
from textblob import TextBlob
|
| 27 |
+
|
| 28 |
+
# Create TextBlob object
|
| 29 |
+
blob = TextBlob(text)
|
| 30 |
+
|
| 31 |
+
# Get polarity (-1 to 1, where -1 is very negative, 1 is very positive)
|
| 32 |
+
polarity = blob.sentiment.polarity
|
| 33 |
+
|
| 34 |
+
# Get subjectivity (0 to 1, where 0 is very objective, 1 is very subjective)
|
| 35 |
+
subjectivity = blob.sentiment.subjectivity
|
| 36 |
+
|
| 37 |
+
# Convert polarity to sentiment categories
|
| 38 |
+
confidence_threshold = TEXT_MODEL_CONFIG["confidence_threshold"]
|
| 39 |
+
|
| 40 |
+
if polarity > confidence_threshold:
|
| 41 |
+
sentiment = "Positive"
|
| 42 |
+
confidence = min(0.95, 0.6 + abs(polarity) * 0.3)
|
| 43 |
+
elif polarity < -confidence_threshold:
|
| 44 |
+
sentiment = "Negative"
|
| 45 |
+
confidence = min(0.95, 0.6 + abs(polarity) * 0.3)
|
| 46 |
+
else:
|
| 47 |
+
sentiment = "Neutral"
|
| 48 |
+
confidence = 0.7 - abs(polarity) * 0.2
|
| 49 |
+
|
| 50 |
+
# Round confidence to 2 decimal places
|
| 51 |
+
confidence = round(confidence, 2)
|
| 52 |
+
|
| 53 |
+
logger.info(
|
| 54 |
+
f"Text sentiment analysis completed: {sentiment} (confidence: {confidence})"
|
| 55 |
+
)
|
| 56 |
+
return sentiment, confidence
|
| 57 |
+
|
| 58 |
+
except ImportError:
|
| 59 |
+
logger.error(
|
| 60 |
+
"TextBlob not installed. Please install it with: pip install textblob"
|
| 61 |
+
)
|
| 62 |
+
return "TextBlob not available", 0.0
|
| 63 |
+
except Exception as e:
|
| 64 |
+
logger.error(f"Error in text sentiment analysis: {str(e)}")
|
| 65 |
+
return "Error occurred", 0.0
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
def get_text_model_info() -> dict:
|
| 69 |
+
"""Get information about the text sentiment model."""
|
| 70 |
+
return {
|
| 71 |
+
"model_name": TEXT_MODEL_CONFIG["model_name"],
|
| 72 |
+
"description": "Natural Language Processing based sentiment analysis using TextBlob",
|
| 73 |
+
"capabilities": [
|
| 74 |
+
"Text sentiment classification (Positive/Negative/Neutral)",
|
| 75 |
+
"Confidence scoring",
|
| 76 |
+
"Real-time analysis",
|
| 77 |
+
"No external API required",
|
| 78 |
+
],
|
| 79 |
+
"input_format": "Plain text",
|
| 80 |
+
"output_format": "Sentiment label + confidence score",
|
| 81 |
+
}
|
src/models/vision_model.py
ADDED
|
@@ -0,0 +1,157 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Vision sentiment analysis model using fine-tuned ResNet-50.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import logging
|
| 6 |
+
import streamlit as st
|
| 7 |
+
from typing import Tuple
|
| 8 |
+
import torch
|
| 9 |
+
import torch.nn.functional as F
|
| 10 |
+
from PIL import Image
|
| 11 |
+
|
| 12 |
+
from ..config.settings import VISION_MODEL_CONFIG
|
| 13 |
+
from ..utils.preprocessing import detect_and_preprocess_face, get_vision_transforms
|
| 14 |
+
from ..utils.sentiment_mapping import get_sentiment_mapping
|
| 15 |
+
from src.utils.simple_model_manager import SimpleModelManager
|
| 16 |
+
|
| 17 |
+
logger = logging.getLogger(__name__)
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
@st.cache_resource
|
| 21 |
+
def get_model_manager():
|
| 22 |
+
"""Get the Google Drive model manager instance."""
|
| 23 |
+
try:
|
| 24 |
+
manager = SimpleModelManager()
|
| 25 |
+
return manager
|
| 26 |
+
except Exception as e:
|
| 27 |
+
logger.error(f"Failed to initialize model manager: {e}")
|
| 28 |
+
st.error(f"Failed to initialize model manager: {e}")
|
| 29 |
+
return None
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
@st.cache_resource
|
| 33 |
+
def load_vision_model():
|
| 34 |
+
"""Load the pre-trained ResNet-50 vision sentiment model from Google Drive."""
|
| 35 |
+
try:
|
| 36 |
+
manager = get_model_manager()
|
| 37 |
+
if manager is None:
|
| 38 |
+
logger.error("Model manager not available")
|
| 39 |
+
st.error("Model manager not available")
|
| 40 |
+
return None, None, None
|
| 41 |
+
|
| 42 |
+
# Load the model using the Google Drive manager
|
| 43 |
+
model, device, num_classes = manager.load_vision_model()
|
| 44 |
+
|
| 45 |
+
if model is None:
|
| 46 |
+
logger.error("Failed to load vision model from Google Drive")
|
| 47 |
+
st.error("Failed to load vision model from Google Drive")
|
| 48 |
+
return None, None, None
|
| 49 |
+
|
| 50 |
+
logger.info(f"Vision model loaded successfully with {num_classes} classes!")
|
| 51 |
+
st.success(f"Vision model loaded successfully with {num_classes} classes!")
|
| 52 |
+
return model, device, num_classes
|
| 53 |
+
except Exception as e:
|
| 54 |
+
logger.error(f"Error loading vision model: {str(e)}")
|
| 55 |
+
st.error(f"Error loading vision model: {str(e)}")
|
| 56 |
+
return None, None, None
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
def predict_vision_sentiment(
|
| 60 |
+
image: Image.Image, crop_tightness: float = None
|
| 61 |
+
) -> Tuple[str, float]:
|
| 62 |
+
"""
|
| 63 |
+
Load ResNet-50 and run inference for vision sentiment analysis.
|
| 64 |
+
|
| 65 |
+
Args:
|
| 66 |
+
image: Input image (PIL Image or numpy array)
|
| 67 |
+
crop_tightness: Padding around face (0.0 = no padding, 0.3 = 30% padding)
|
| 68 |
+
|
| 69 |
+
Returns:
|
| 70 |
+
Tuple of (sentiment, confidence)
|
| 71 |
+
"""
|
| 72 |
+
if image is None:
|
| 73 |
+
return "No image provided", 0.0
|
| 74 |
+
|
| 75 |
+
try:
|
| 76 |
+
# Use default crop tightness if not specified
|
| 77 |
+
if crop_tightness is None:
|
| 78 |
+
crop_tightness = VISION_MODEL_CONFIG["crop_tightness"]
|
| 79 |
+
|
| 80 |
+
# Load model if not already loaded
|
| 81 |
+
model, device, num_classes = load_vision_model()
|
| 82 |
+
if model is None:
|
| 83 |
+
return "Model not loaded", 0.0
|
| 84 |
+
|
| 85 |
+
# Preprocess image to match FER2013 format
|
| 86 |
+
st.info(
|
| 87 |
+
"Detecting face and preprocessing image to match training data format..."
|
| 88 |
+
)
|
| 89 |
+
preprocessed_image = detect_and_preprocess_face(
|
| 90 |
+
image, crop_tightness=crop_tightness
|
| 91 |
+
)
|
| 92 |
+
|
| 93 |
+
if preprocessed_image is None:
|
| 94 |
+
return "Image preprocessing failed", 0.0
|
| 95 |
+
|
| 96 |
+
# Show preprocessed image
|
| 97 |
+
st.image(
|
| 98 |
+
preprocessed_image,
|
| 99 |
+
caption="Preprocessed Image (224x224 Grayscale → 3-channel RGB)",
|
| 100 |
+
width=200,
|
| 101 |
+
)
|
| 102 |
+
|
| 103 |
+
# Get transforms
|
| 104 |
+
transform = get_vision_transforms()
|
| 105 |
+
|
| 106 |
+
# Convert preprocessed image to tensor
|
| 107 |
+
image_tensor = transform(preprocessed_image).unsqueeze(0).to(device)
|
| 108 |
+
|
| 109 |
+
# Run inference
|
| 110 |
+
with torch.no_grad():
|
| 111 |
+
outputs = model(image_tensor)
|
| 112 |
+
|
| 113 |
+
# Debug: print output shape
|
| 114 |
+
st.info(f"Model output shape: {outputs.shape}")
|
| 115 |
+
|
| 116 |
+
probabilities = F.softmax(outputs, dim=1)
|
| 117 |
+
confidence, predicted = torch.max(probabilities, 1)
|
| 118 |
+
|
| 119 |
+
# Get sentiment mapping based on number of classes
|
| 120 |
+
sentiment_map = get_sentiment_mapping(num_classes)
|
| 121 |
+
sentiment = sentiment_map[predicted.item()]
|
| 122 |
+
confidence_score = confidence.item()
|
| 123 |
+
|
| 124 |
+
logger.info(
|
| 125 |
+
f"Vision sentiment analysis completed: {sentiment} (confidence: {confidence_score:.2f})"
|
| 126 |
+
)
|
| 127 |
+
return sentiment, confidence_score
|
| 128 |
+
|
| 129 |
+
except Exception as e:
|
| 130 |
+
logger.error(f"Error in vision sentiment prediction: {str(e)}")
|
| 131 |
+
st.error(f"Error in vision sentiment prediction: {str(e)}")
|
| 132 |
+
st.error(
|
| 133 |
+
f"Model output shape mismatch. Expected {num_classes} classes but got different."
|
| 134 |
+
)
|
| 135 |
+
return "Error occurred", 0.0
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
def get_vision_model_info() -> dict:
|
| 139 |
+
"""Get information about the vision sentiment model."""
|
| 140 |
+
return {
|
| 141 |
+
"model_name": VISION_MODEL_CONFIG["model_name"],
|
| 142 |
+
"description": "Fine-tuned ResNet-50 for facial expression sentiment analysis",
|
| 143 |
+
"capabilities": [
|
| 144 |
+
"Facial expression recognition",
|
| 145 |
+
"Automatic face detection and cropping",
|
| 146 |
+
"FER2013 dataset format compatibility",
|
| 147 |
+
"Real-time image analysis",
|
| 148 |
+
],
|
| 149 |
+
"input_format": "Images (PNG, JPG, JPEG, BMP, TIFF)",
|
| 150 |
+
"output_format": "Sentiment label + confidence score",
|
| 151 |
+
"preprocessing": {
|
| 152 |
+
"face_detection": "OpenCV Haar Cascade",
|
| 153 |
+
"image_size": f"{VISION_MODEL_CONFIG['input_size']}x{VISION_MODEL_CONFIG['input_size']}",
|
| 154 |
+
"color_format": "Grayscale → 3-channel RGB",
|
| 155 |
+
"normalization": "ImageNet standard",
|
| 156 |
+
},
|
| 157 |
+
}
|
src/ui/__init__.py
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
UI package for Sentiment Fused application.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
from .components import *
|
| 6 |
+
from .pages import *
|
| 7 |
+
from .styles import *
|
src/ui/styles.py
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
UI styles and CSS for the Sentiment Fused application.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
from ..config.settings import CUSTOM_CSS, UI_COLORS
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def get_custom_css() -> str:
|
| 9 |
+
"""Get the custom CSS styles for the application."""
|
| 10 |
+
return CUSTOM_CSS
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def get_ui_colors() -> dict:
|
| 14 |
+
"""Get the UI color scheme."""
|
| 15 |
+
return UI_COLORS
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def get_sentiment_color_style(sentiment: str) -> str:
|
| 19 |
+
"""Get color style for different sentiment types."""
|
| 20 |
+
colors = {
|
| 21 |
+
"Positive": "color: #28a745;",
|
| 22 |
+
"Negative": "color: #dc3545;",
|
| 23 |
+
"Neutral": "color: #ffc107;",
|
| 24 |
+
"Angry": "color: #dc3545;",
|
| 25 |
+
"Sad": "color: #17a2b8;",
|
| 26 |
+
"Happy": "color: #28a745;",
|
| 27 |
+
"Fear": "color: #6f42c1;",
|
| 28 |
+
"Disgust": "color: #fd7e14;",
|
| 29 |
+
"Surprise": "color: #ffc107;",
|
| 30 |
+
}
|
| 31 |
+
return colors.get(sentiment, "color: #6c757d;")
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def get_metric_style(metric_type: str = "default") -> str:
|
| 35 |
+
"""Get styling for different metric types."""
|
| 36 |
+
styles = {
|
| 37 |
+
"default": "background-color: #f8f9fa; padding: 1rem; border-radius: 8px;",
|
| 38 |
+
"success": "background-color: #d4edda; padding: 1rem; border-radius: 8px; border: 1px solid #c3e6cb;",
|
| 39 |
+
"warning": "background-color: #fff3cd; padding: 1rem; border-radius: 8px; border: 1px solid #ffeaa7;",
|
| 40 |
+
"error": "background-color: #f8d7da; padding: 1rem; border-radius: 8px; border: 1px solid #f5c6cb;",
|
| 41 |
+
"info": "background-color: #d1ecf1; padding: 1rem; border-radius: 8px; border: 1px solid #bee5eb;",
|
| 42 |
+
}
|
| 43 |
+
return styles.get(metric_type, styles["default"])
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def get_card_style(card_type: str = "default") -> str:
|
| 47 |
+
"""Get styling for different card types."""
|
| 48 |
+
styles = {
|
| 49 |
+
"default": "background-color: #f0f2f6; padding: 1.5rem; border-radius: 10px; margin: 1rem 0; border-left: 4px solid #1f77b4;",
|
| 50 |
+
"model": "background-color: #f0f2f6; padding: 1.5rem; border-radius: 10px; margin: 1rem 0; border-left: 4px solid #1f77b4;",
|
| 51 |
+
"result": "background-color: #e8f4fd; padding: 1rem; border-radius: 8px; border: 1px solid #1f77b4; margin: 1rem 0;",
|
| 52 |
+
"upload": "background-color: #f8f9fa; padding: 1.5rem; border-radius: 10px; border: 2px dashed #dee2e6; text-align: center; margin: 1rem 0;",
|
| 53 |
+
"info": "background-color: #d1ecf1; padding: 1rem; border-radius: 8px; border: 1px solid #bee5eb; margin: 1rem 0;",
|
| 54 |
+
}
|
| 55 |
+
return styles.get(card_type, styles["default"])
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def get_button_style(button_type: str = "primary") -> str:
|
| 59 |
+
"""Get styling for different button types."""
|
| 60 |
+
styles = {
|
| 61 |
+
"primary": "background-color: #1f77b4; color: white; border: none; padding: 0.5rem 1rem; border-radius: 5px;",
|
| 62 |
+
"secondary": "background-color: #6c757d; color: white; border: none; padding: 0.5rem 1rem; border-radius: 5px;",
|
| 63 |
+
"success": "background-color: #28a745; color: white; border: none; padding: 0.5rem 1rem; border-radius: 5px;",
|
| 64 |
+
"warning": "background-color: #ffc107; color: black; border: none; padding: 0.5rem 1rem; border-radius: 5px;",
|
| 65 |
+
"danger": "background-color: #dc3545; color: white; border: none; padding: 0.5rem 1rem; border-radius: 5px;",
|
| 66 |
+
}
|
| 67 |
+
return styles.get(button_type, styles["primary"])
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
def get_sidebar_style() -> str:
|
| 71 |
+
"""Get styling for the sidebar."""
|
| 72 |
+
return """
|
| 73 |
+
<style>
|
| 74 |
+
.css-1d391kg {
|
| 75 |
+
background-color: #f8f9fa;
|
| 76 |
+
}
|
| 77 |
+
.css-1d391kg .sidebar-content {
|
| 78 |
+
padding: 1rem;
|
| 79 |
+
}
|
| 80 |
+
</style>
|
| 81 |
+
"""
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
def get_header_style() -> str:
|
| 85 |
+
"""Get styling for the main header."""
|
| 86 |
+
return """
|
| 87 |
+
<style>
|
| 88 |
+
.main-header {
|
| 89 |
+
font-size: 2.5rem;
|
| 90 |
+
font-weight: bold;
|
| 91 |
+
color: #1f77b4;
|
| 92 |
+
text-align: center;
|
| 93 |
+
margin-bottom: 2rem;
|
| 94 |
+
text-shadow: 2px 2px 4px rgba(0,0,0,0.1);
|
| 95 |
+
}
|
| 96 |
+
</style>
|
| 97 |
+
"""
|
src/utils/__init__.py
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Utility functions package for Sentiment Fused application.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
from .preprocessing import *
|
| 6 |
+
from .file_handling import *
|
| 7 |
+
from .sentiment_mapping import *
|
src/utils/file_handling.py
ADDED
|
@@ -0,0 +1,189 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
File handling utilities for different input types.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import os
|
| 6 |
+
import tempfile
|
| 7 |
+
import logging
|
| 8 |
+
from typing import Optional, Union, Tuple
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
|
| 11 |
+
from ..config.settings import (
|
| 12 |
+
SUPPORTED_IMAGE_FORMATS,
|
| 13 |
+
SUPPORTED_AUDIO_FORMATS,
|
| 14 |
+
SUPPORTED_VIDEO_FORMATS,
|
| 15 |
+
)
|
| 16 |
+
|
| 17 |
+
logger = logging.getLogger(__name__)
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def validate_file_format(filename: str, supported_formats: list) -> bool:
|
| 21 |
+
"""
|
| 22 |
+
Validate if a file has a supported format.
|
| 23 |
+
|
| 24 |
+
Args:
|
| 25 |
+
filename: Name of the file to validate
|
| 26 |
+
supported_formats: List of supported file extensions
|
| 27 |
+
|
| 28 |
+
Returns:
|
| 29 |
+
True if file format is supported, False otherwise
|
| 30 |
+
"""
|
| 31 |
+
if not filename:
|
| 32 |
+
return False
|
| 33 |
+
|
| 34 |
+
file_extension = Path(filename).suffix.lower().lstrip(".")
|
| 35 |
+
return file_extension in supported_formats
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def validate_image_file(filename: str) -> bool:
|
| 39 |
+
"""Validate if a file is a supported image format."""
|
| 40 |
+
return validate_file_format(filename, SUPPORTED_IMAGE_FORMATS)
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def validate_audio_file(filename: str) -> bool:
|
| 44 |
+
"""Validate if a file is a supported audio format."""
|
| 45 |
+
return validate_file_format(filename, SUPPORTED_AUDIO_FORMATS)
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def validate_video_file(filename: str) -> bool:
|
| 49 |
+
"""Validate if a file is a supported video format."""
|
| 50 |
+
return validate_file_format(filename, SUPPORTED_VIDEO_FORMATS)
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
def get_file_info(file_object) -> dict:
|
| 54 |
+
"""
|
| 55 |
+
Extract file information from a file object.
|
| 56 |
+
|
| 57 |
+
Args:
|
| 58 |
+
file_object: File object (e.g., StreamlitUploadedFile)
|
| 59 |
+
|
| 60 |
+
Returns:
|
| 61 |
+
Dictionary containing file information
|
| 62 |
+
"""
|
| 63 |
+
try:
|
| 64 |
+
if hasattr(file_object, "getvalue"):
|
| 65 |
+
file_size = len(file_object.getvalue())
|
| 66 |
+
file_name = getattr(file_object, "name", "Unknown")
|
| 67 |
+
else:
|
| 68 |
+
file_size = len(file_object)
|
| 69 |
+
file_name = "Unknown"
|
| 70 |
+
|
| 71 |
+
file_extension = (
|
| 72 |
+
Path(file_name).suffix.lower().lstrip(".")
|
| 73 |
+
if file_name != "Unknown"
|
| 74 |
+
else "Unknown"
|
| 75 |
+
)
|
| 76 |
+
|
| 77 |
+
return {
|
| 78 |
+
"name": file_name,
|
| 79 |
+
"size_bytes": file_size,
|
| 80 |
+
"size_kb": file_size / 1024,
|
| 81 |
+
"size_mb": file_size / (1024 * 1024),
|
| 82 |
+
"extension": file_extension,
|
| 83 |
+
"is_valid_image": (
|
| 84 |
+
validate_image_file(file_name) if file_extension != "Unknown" else False
|
| 85 |
+
),
|
| 86 |
+
"is_valid_audio": (
|
| 87 |
+
validate_audio_file(file_name) if file_extension != "Unknown" else False
|
| 88 |
+
),
|
| 89 |
+
"is_valid_video": (
|
| 90 |
+
validate_video_file(file_name) if file_extension != "Unknown" else False
|
| 91 |
+
),
|
| 92 |
+
}
|
| 93 |
+
except Exception as e:
|
| 94 |
+
logger.error(f"Error getting file info: {str(e)}")
|
| 95 |
+
return {
|
| 96 |
+
"name": "Unknown",
|
| 97 |
+
"size_bytes": 0,
|
| 98 |
+
"size_kb": 0,
|
| 99 |
+
"size_mb": 0,
|
| 100 |
+
"extension": "Unknown",
|
| 101 |
+
"is_valid_image": False,
|
| 102 |
+
"is_valid_audio": False,
|
| 103 |
+
"is_valid_video": False,
|
| 104 |
+
}
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
def create_temp_file(
|
| 108 |
+
suffix: str = "", prefix: str = "temp_"
|
| 109 |
+
) -> Tuple[str, tempfile.NamedTemporaryFile]:
|
| 110 |
+
"""
|
| 111 |
+
Create a temporary file with proper cleanup handling.
|
| 112 |
+
|
| 113 |
+
Args:
|
| 114 |
+
suffix: File extension suffix
|
| 115 |
+
prefix: File name prefix
|
| 116 |
+
|
| 117 |
+
Returns:
|
| 118 |
+
Tuple of (file_path, temp_file_object)
|
| 119 |
+
"""
|
| 120 |
+
temp_file = tempfile.NamedTemporaryFile(suffix=suffix, prefix=prefix, delete=False)
|
| 121 |
+
return temp_file.name, temp_file
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
def cleanup_temp_file(file_path: str) -> bool:
|
| 125 |
+
"""
|
| 126 |
+
Safely cleanup a temporary file.
|
| 127 |
+
|
| 128 |
+
Args:
|
| 129 |
+
file_path: Path to the temporary file
|
| 130 |
+
|
| 131 |
+
Returns:
|
| 132 |
+
True if cleanup was successful, False otherwise
|
| 133 |
+
"""
|
| 134 |
+
try:
|
| 135 |
+
if os.path.exists(file_path):
|
| 136 |
+
os.unlink(file_path)
|
| 137 |
+
return True
|
| 138 |
+
return True
|
| 139 |
+
except (OSError, PermissionError) as e:
|
| 140 |
+
logger.warning(f"Could not delete temporary file {file_path}: {e}")
|
| 141 |
+
return False
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
def format_file_size(size_bytes: int) -> str:
|
| 145 |
+
"""
|
| 146 |
+
Format file size in human-readable format.
|
| 147 |
+
|
| 148 |
+
Args:
|
| 149 |
+
size_bytes: File size in bytes
|
| 150 |
+
|
| 151 |
+
Returns:
|
| 152 |
+
Formatted file size string
|
| 153 |
+
"""
|
| 154 |
+
if size_bytes < 1024:
|
| 155 |
+
return f"{size_bytes} B"
|
| 156 |
+
elif size_bytes < 1024 * 1024:
|
| 157 |
+
return f"{size_bytes / 1024:.1f} KB"
|
| 158 |
+
elif size_bytes < 1024 * 1024 * 1024:
|
| 159 |
+
return f"{size_bytes / (1024 * 1024):.1f} MB"
|
| 160 |
+
else:
|
| 161 |
+
return f"{size_bytes / (1024 * 1024 * 1024):.1f} GB"
|
| 162 |
+
|
| 163 |
+
|
| 164 |
+
def safe_file_operation(operation_func, *args, **kwargs):
|
| 165 |
+
"""
|
| 166 |
+
Safely execute a file operation with proper error handling.
|
| 167 |
+
|
| 168 |
+
Args:
|
| 169 |
+
operation_func: Function to execute
|
| 170 |
+
*args: Positional arguments for the function
|
| 171 |
+
**kwargs: Keyword arguments for the function
|
| 172 |
+
|
| 173 |
+
Returns:
|
| 174 |
+
Result of the operation or None if it fails
|
| 175 |
+
"""
|
| 176 |
+
try:
|
| 177 |
+
return operation_func(*args, **kwargs)
|
| 178 |
+
except FileNotFoundError as e:
|
| 179 |
+
logger.error(f"File not found: {e}")
|
| 180 |
+
return None
|
| 181 |
+
except PermissionError as e:
|
| 182 |
+
logger.error(f"Permission denied: {e}")
|
| 183 |
+
return None
|
| 184 |
+
except OSError as e:
|
| 185 |
+
logger.error(f"OS error: {e}")
|
| 186 |
+
return None
|
| 187 |
+
except Exception as e:
|
| 188 |
+
logger.error(f"Unexpected error in file operation: {e}")
|
| 189 |
+
return None
|
src/utils/preprocessing.py
ADDED
|
@@ -0,0 +1,467 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Preprocessing utilities for different input modalities.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import os
|
| 6 |
+
import tempfile
|
| 7 |
+
import logging
|
| 8 |
+
from typing import List, Optional, Tuple, Union
|
| 9 |
+
|
| 10 |
+
try:
|
| 11 |
+
from PIL import Image
|
| 12 |
+
import numpy as np
|
| 13 |
+
|
| 14 |
+
PIL_AVAILABLE = True
|
| 15 |
+
except ImportError:
|
| 16 |
+
PIL_AVAILABLE = False
|
| 17 |
+
Image = None
|
| 18 |
+
np = None
|
| 19 |
+
|
| 20 |
+
from ..config.settings import (
|
| 21 |
+
IMAGE_TRANSFORMS,
|
| 22 |
+
AUDIO_MODEL_CONFIG,
|
| 23 |
+
VISION_MODEL_CONFIG,
|
| 24 |
+
SUPPORTED_IMAGE_FORMATS,
|
| 25 |
+
SUPPORTED_AUDIO_FORMATS,
|
| 26 |
+
)
|
| 27 |
+
|
| 28 |
+
# Add Any to typing imports
|
| 29 |
+
from typing import List, Optional, Tuple, Union, Any
|
| 30 |
+
|
| 31 |
+
# Add torch import for audio preprocessing
|
| 32 |
+
try:
|
| 33 |
+
import torch
|
| 34 |
+
|
| 35 |
+
TORCH_AVAILABLE = True
|
| 36 |
+
except ImportError:
|
| 37 |
+
TORCH_AVAILABLE = False
|
| 38 |
+
torch = None
|
| 39 |
+
|
| 40 |
+
logger = logging.getLogger(__name__)
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def detect_and_preprocess_face(
|
| 44 |
+
image: Union[Image.Image, np.ndarray, Any], crop_tightness: float = 0.05
|
| 45 |
+
) -> Optional[Image.Image]:
|
| 46 |
+
"""
|
| 47 |
+
Detect face in image, crop to face region, convert to grayscale, and resize.
|
| 48 |
+
|
| 49 |
+
Args:
|
| 50 |
+
image: Input image (PIL Image or numpy array)
|
| 51 |
+
crop_tightness: Padding around face (0.0 = no padding, 0.3 = 30% padding)
|
| 52 |
+
|
| 53 |
+
Returns:
|
| 54 |
+
Preprocessed PIL Image or None if preprocessing fails
|
| 55 |
+
"""
|
| 56 |
+
if not PIL_AVAILABLE:
|
| 57 |
+
logger.error("PIL (Pillow) not available. Cannot process images.")
|
| 58 |
+
return None
|
| 59 |
+
|
| 60 |
+
try:
|
| 61 |
+
import cv2
|
| 62 |
+
|
| 63 |
+
# Convert PIL image to OpenCV format
|
| 64 |
+
if isinstance(image, Image.Image):
|
| 65 |
+
img_array = np.array(image)
|
| 66 |
+
# Convert RGB to BGR for OpenCV
|
| 67 |
+
if len(img_array.shape) == 3:
|
| 68 |
+
img_array = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR)
|
| 69 |
+
else:
|
| 70 |
+
img_array = image
|
| 71 |
+
|
| 72 |
+
# Load face detection cascade
|
| 73 |
+
face_cascade = cv2.CascadeClassifier(
|
| 74 |
+
cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
|
| 75 |
+
)
|
| 76 |
+
|
| 77 |
+
# Convert to grayscale for face detection
|
| 78 |
+
gray = cv2.cvtColor(img_array, cv2.COLOR_BGR2GRAY)
|
| 79 |
+
|
| 80 |
+
# Detect faces
|
| 81 |
+
faces = face_cascade.detectMultiScale(
|
| 82 |
+
gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30)
|
| 83 |
+
)
|
| 84 |
+
|
| 85 |
+
if len(faces) == 0:
|
| 86 |
+
logger.warning("No face detected in the image. Using center crop instead.")
|
| 87 |
+
return _fallback_preprocessing(image)
|
| 88 |
+
|
| 89 |
+
# Get the largest face (assuming it's the main subject)
|
| 90 |
+
x, y, w, h = max(faces, key=lambda rect: rect[2] * rect[3])
|
| 91 |
+
|
| 92 |
+
# Add padding around the face based on user preference
|
| 93 |
+
padding_x = int(w * crop_tightness)
|
| 94 |
+
padding_y = int(h * crop_tightness)
|
| 95 |
+
|
| 96 |
+
# Ensure we don't go out of bounds
|
| 97 |
+
x1 = max(0, x - padding_x)
|
| 98 |
+
y1 = max(0, y - padding_y)
|
| 99 |
+
x2 = min(img_array.shape[1], x + w + padding_x)
|
| 100 |
+
y2 = min(img_array.shape[0], y + h + padding_y)
|
| 101 |
+
|
| 102 |
+
# Crop to face region
|
| 103 |
+
face_crop = img_array[y1:y2, x1:x2]
|
| 104 |
+
|
| 105 |
+
# Convert BGR to RGB first
|
| 106 |
+
face_crop_rgb = cv2.cvtColor(face_crop, cv2.COLOR_BGR2RGB)
|
| 107 |
+
|
| 108 |
+
# Convert to grayscale
|
| 109 |
+
face_gray = cv2.cvtColor(face_crop_rgb, cv2.COLOR_RGB2GRAY)
|
| 110 |
+
|
| 111 |
+
# Resize to target size
|
| 112 |
+
target_size = IMAGE_TRANSFORMS["resize"]
|
| 113 |
+
face_resized = cv2.resize(
|
| 114 |
+
face_gray, (target_size, target_size), interpolation=cv2.INTER_AREA
|
| 115 |
+
)
|
| 116 |
+
|
| 117 |
+
# Convert grayscale to 3-channel RGB (replicate grayscale values)
|
| 118 |
+
face_rgb_3channel = cv2.cvtColor(face_resized, cv2.COLOR_GRAY2RGB)
|
| 119 |
+
|
| 120 |
+
# Convert back to PIL Image
|
| 121 |
+
face_pil = Image.fromarray(face_rgb_3channel)
|
| 122 |
+
return face_pil
|
| 123 |
+
|
| 124 |
+
except ImportError:
|
| 125 |
+
logger.error(
|
| 126 |
+
"OpenCV not installed. Please install it with: pip install opencv-python"
|
| 127 |
+
)
|
| 128 |
+
return _fallback_preprocessing(image)
|
| 129 |
+
except Exception as e:
|
| 130 |
+
logger.error(f"Error in face detection: {str(e)}")
|
| 131 |
+
return _fallback_preprocessing(image)
|
| 132 |
+
|
| 133 |
+
|
| 134 |
+
def _fallback_preprocessing(
|
| 135 |
+
image: Union[Image.Image, np.ndarray, Any],
|
| 136 |
+
) -> Optional[Image.Image]:
|
| 137 |
+
"""Fallback preprocessing when face detection fails."""
|
| 138 |
+
try:
|
| 139 |
+
if isinstance(image, Image.Image):
|
| 140 |
+
rgb_pil = image.convert("RGB")
|
| 141 |
+
target_size = IMAGE_TRANSFORMS["resize"]
|
| 142 |
+
resized = rgb_pil.resize(
|
| 143 |
+
(target_size, target_size), Image.Resampling.LANCZOS
|
| 144 |
+
)
|
| 145 |
+
# Convert to grayscale and then to 3-channel RGB
|
| 146 |
+
gray_pil = resized.convert("L")
|
| 147 |
+
gray_rgb_pil = gray_pil.convert("RGB")
|
| 148 |
+
return gray_rgb_pil
|
| 149 |
+
return None
|
| 150 |
+
except Exception as e:
|
| 151 |
+
logger.error(f"Fallback preprocessing failed: {str(e)}")
|
| 152 |
+
return None
|
| 153 |
+
|
| 154 |
+
|
| 155 |
+
def get_vision_transforms():
|
| 156 |
+
"""Get the image transforms used during training."""
|
| 157 |
+
from torchvision import transforms
|
| 158 |
+
|
| 159 |
+
return transforms.Compose(
|
| 160 |
+
[
|
| 161 |
+
transforms.Resize(IMAGE_TRANSFORMS["resize"]),
|
| 162 |
+
transforms.CenterCrop(IMAGE_TRANSFORMS["center_crop"]),
|
| 163 |
+
transforms.ToTensor(),
|
| 164 |
+
transforms.Normalize(
|
| 165 |
+
mean=IMAGE_TRANSFORMS["normalize_mean"],
|
| 166 |
+
std=IMAGE_TRANSFORMS["normalize_std"],
|
| 167 |
+
),
|
| 168 |
+
]
|
| 169 |
+
)
|
| 170 |
+
|
| 171 |
+
|
| 172 |
+
def preprocess_audio_for_model(audio_bytes: bytes) -> Optional[torch.Tensor]:
|
| 173 |
+
"""
|
| 174 |
+
Preprocess audio bytes for wav2vec2 model input using AutoFeatureExtractor.
|
| 175 |
+
|
| 176 |
+
Args:
|
| 177 |
+
audio_bytes: Raw audio bytes
|
| 178 |
+
|
| 179 |
+
Returns:
|
| 180 |
+
Preprocessed audio tensor ready for wav2vec2 model
|
| 181 |
+
"""
|
| 182 |
+
if not TORCH_AVAILABLE:
|
| 183 |
+
logger.error("PyTorch not available. Cannot process audio.")
|
| 184 |
+
return None
|
| 185 |
+
|
| 186 |
+
try:
|
| 187 |
+
from transformers import AutoFeatureExtractor
|
| 188 |
+
import librosa
|
| 189 |
+
|
| 190 |
+
# Save audio bytes to temporary file
|
| 191 |
+
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
|
| 192 |
+
tmp_file.write(audio_bytes)
|
| 193 |
+
tmp_file_path = tmp_file.name
|
| 194 |
+
|
| 195 |
+
try:
|
| 196 |
+
# Load and resample audio to target sampling rate
|
| 197 |
+
audio, sr = librosa.load(
|
| 198 |
+
tmp_file_path, sr=AUDIO_MODEL_CONFIG["target_sampling_rate"]
|
| 199 |
+
)
|
| 200 |
+
|
| 201 |
+
# Use AutoFeatureExtractor (same as training)
|
| 202 |
+
feature_extractor = AutoFeatureExtractor.from_pretrained(
|
| 203 |
+
AUDIO_MODEL_CONFIG["model_name"]
|
| 204 |
+
)
|
| 205 |
+
|
| 206 |
+
# Calculate max length in samples (5 seconds * 16kHz = 80000 samples)
|
| 207 |
+
max_length = int(
|
| 208 |
+
AUDIO_MODEL_CONFIG["max_duration"]
|
| 209 |
+
* AUDIO_MODEL_CONFIG["target_sampling_rate"]
|
| 210 |
+
)
|
| 211 |
+
|
| 212 |
+
logger.info(f"Audio length: {len(audio)} samples, max_length: {max_length}")
|
| 213 |
+
|
| 214 |
+
inputs = feature_extractor(
|
| 215 |
+
audio,
|
| 216 |
+
sampling_rate=AUDIO_MODEL_CONFIG["target_sampling_rate"],
|
| 217 |
+
max_length=max_length,
|
| 218 |
+
truncation=True,
|
| 219 |
+
padding="max_length",
|
| 220 |
+
return_tensors="pt",
|
| 221 |
+
)
|
| 222 |
+
|
| 223 |
+
# Return tensor with correct shape for wav2vec2
|
| 224 |
+
# The model expects: [batch_size, sequence_length]
|
| 225 |
+
tensor = inputs.input_values
|
| 226 |
+
|
| 227 |
+
# Log the tensor shape for debugging
|
| 228 |
+
logger.info(f"Audio preprocessing output shape: {tensor.shape}")
|
| 229 |
+
|
| 230 |
+
return tensor
|
| 231 |
+
|
| 232 |
+
finally:
|
| 233 |
+
# Clean up temporary file
|
| 234 |
+
try:
|
| 235 |
+
os.unlink(tmp_file_path)
|
| 236 |
+
except (OSError, PermissionError):
|
| 237 |
+
pass
|
| 238 |
+
|
| 239 |
+
except ImportError as e:
|
| 240 |
+
logger.error(f"Required library not installed: {str(e)}")
|
| 241 |
+
raise ImportError("Please install: pip install transformers librosa torch")
|
| 242 |
+
|
| 243 |
+
|
| 244 |
+
def extract_frames_from_video(video_file, max_frames: int = 5) -> List[Any]:
|
| 245 |
+
"""
|
| 246 |
+
Extract frames from video file for vision sentiment analysis.
|
| 247 |
+
|
| 248 |
+
Args:
|
| 249 |
+
video_file: Video file object
|
| 250 |
+
max_frames: Maximum number of frames to extract
|
| 251 |
+
|
| 252 |
+
Returns:
|
| 253 |
+
List of PIL Image objects
|
| 254 |
+
"""
|
| 255 |
+
try:
|
| 256 |
+
import cv2
|
| 257 |
+
|
| 258 |
+
# Save video bytes to temporary file
|
| 259 |
+
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp_file:
|
| 260 |
+
if hasattr(video_file, "getvalue"):
|
| 261 |
+
tmp_file.write(video_file.getvalue())
|
| 262 |
+
else:
|
| 263 |
+
tmp_file.write(video_file)
|
| 264 |
+
tmp_file_path = tmp_file.name
|
| 265 |
+
|
| 266 |
+
try:
|
| 267 |
+
# Open video with OpenCV
|
| 268 |
+
cap = cv2.VideoCapture(tmp_file_path)
|
| 269 |
+
|
| 270 |
+
if not cap.isOpened():
|
| 271 |
+
logger.error("Could not open video file")
|
| 272 |
+
return []
|
| 273 |
+
|
| 274 |
+
frames = []
|
| 275 |
+
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
| 276 |
+
fps = cap.get(cv2.CAP_PROP_FPS)
|
| 277 |
+
duration = total_frames / fps if fps > 0 else 0
|
| 278 |
+
|
| 279 |
+
logger.info(
|
| 280 |
+
f"Video: {total_frames} frames, {fps:.1f} FPS, {duration:.1f}s duration"
|
| 281 |
+
)
|
| 282 |
+
|
| 283 |
+
# Extract frames at strategic intervals
|
| 284 |
+
if total_frames > 0:
|
| 285 |
+
# Select frames: start, 25%, 50%, 75%, end
|
| 286 |
+
frame_indices = [
|
| 287 |
+
0,
|
| 288 |
+
int(total_frames * 0.25),
|
| 289 |
+
int(total_frames * 0.5),
|
| 290 |
+
int(total_frames * 0.75),
|
| 291 |
+
total_frames - 1,
|
| 292 |
+
]
|
| 293 |
+
frame_indices = list(set(frame_indices)) # Remove duplicates
|
| 294 |
+
frame_indices.sort()
|
| 295 |
+
|
| 296 |
+
for frame_idx in frame_indices:
|
| 297 |
+
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
|
| 298 |
+
ret, frame = cap.read()
|
| 299 |
+
if ret:
|
| 300 |
+
# Convert BGR to RGB
|
| 301 |
+
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
| 302 |
+
# Convert to PIL Image
|
| 303 |
+
pil_image = Image.fromarray(frame_rgb)
|
| 304 |
+
frames.append(pil_image)
|
| 305 |
+
|
| 306 |
+
cap.release()
|
| 307 |
+
return frames
|
| 308 |
+
|
| 309 |
+
finally:
|
| 310 |
+
# Clean up temporary file
|
| 311 |
+
try:
|
| 312 |
+
os.unlink(tmp_file_path)
|
| 313 |
+
except (OSError, PermissionError):
|
| 314 |
+
pass
|
| 315 |
+
|
| 316 |
+
except ImportError:
|
| 317 |
+
logger.error(
|
| 318 |
+
"OpenCV not installed. Please install it with: pip install opencv-python"
|
| 319 |
+
)
|
| 320 |
+
return []
|
| 321 |
+
except Exception as e:
|
| 322 |
+
logger.error(f"Error extracting frames: {str(e)}")
|
| 323 |
+
return []
|
| 324 |
+
|
| 325 |
+
|
| 326 |
+
def extract_audio_from_video(video_file) -> Optional[bytes]:
|
| 327 |
+
"""
|
| 328 |
+
Extract audio from video file for audio sentiment analysis.
|
| 329 |
+
|
| 330 |
+
Args:
|
| 331 |
+
video_file: Video file object
|
| 332 |
+
|
| 333 |
+
Returns:
|
| 334 |
+
Audio bytes in WAV format or None if extraction fails
|
| 335 |
+
"""
|
| 336 |
+
try:
|
| 337 |
+
import tempfile
|
| 338 |
+
|
| 339 |
+
try:
|
| 340 |
+
from moviepy import VideoFileClip
|
| 341 |
+
except ImportError as e:
|
| 342 |
+
logger.error(f"MoviePy import failed: {e}")
|
| 343 |
+
return None
|
| 344 |
+
|
| 345 |
+
# Save video bytes to temporary file
|
| 346 |
+
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp_file:
|
| 347 |
+
if hasattr(video_file, "getvalue"):
|
| 348 |
+
tmp_file.write(video_file.getvalue())
|
| 349 |
+
else:
|
| 350 |
+
tmp_file.write(video_file)
|
| 351 |
+
tmp_file_path = tmp_file.name
|
| 352 |
+
|
| 353 |
+
try:
|
| 354 |
+
# Extract audio using moviepy
|
| 355 |
+
video = VideoFileClip(tmp_file_path)
|
| 356 |
+
audio = video.audio
|
| 357 |
+
|
| 358 |
+
if audio is None:
|
| 359 |
+
logger.warning("No audio track found in video")
|
| 360 |
+
return None
|
| 361 |
+
|
| 362 |
+
# Save audio to temporary WAV file
|
| 363 |
+
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as audio_file:
|
| 364 |
+
audio_path = audio_file.name
|
| 365 |
+
|
| 366 |
+
# Export audio as WAV
|
| 367 |
+
audio.write_audiofile(audio_path, logger=None)
|
| 368 |
+
|
| 369 |
+
# Read the audio file and return bytes
|
| 370 |
+
with open(audio_path, "rb") as f:
|
| 371 |
+
audio_bytes = f.read()
|
| 372 |
+
|
| 373 |
+
# Clean up temporary audio file
|
| 374 |
+
try:
|
| 375 |
+
os.unlink(audio_path)
|
| 376 |
+
except (OSError, PermissionError):
|
| 377 |
+
pass
|
| 378 |
+
|
| 379 |
+
return audio_bytes
|
| 380 |
+
|
| 381 |
+
finally:
|
| 382 |
+
# Clean up temporary video file
|
| 383 |
+
try:
|
| 384 |
+
# Close video and audio objects first
|
| 385 |
+
if "video" in locals():
|
| 386 |
+
video.close()
|
| 387 |
+
if "audio" in locals() and audio:
|
| 388 |
+
audio.close()
|
| 389 |
+
|
| 390 |
+
# Wait a bit before trying to delete
|
| 391 |
+
import time
|
| 392 |
+
|
| 393 |
+
time.sleep(0.1)
|
| 394 |
+
|
| 395 |
+
os.unlink(tmp_file_path)
|
| 396 |
+
except (OSError, PermissionError):
|
| 397 |
+
pass
|
| 398 |
+
|
| 399 |
+
except ImportError:
|
| 400 |
+
logger.error(
|
| 401 |
+
"MoviePy not installed. Please install it with: pip install moviepy"
|
| 402 |
+
)
|
| 403 |
+
return None
|
| 404 |
+
except Exception as e:
|
| 405 |
+
logger.error(f"Error extracting audio: {str(e)}")
|
| 406 |
+
return None
|
| 407 |
+
|
| 408 |
+
|
| 409 |
+
def transcribe_audio(audio_bytes: bytes) -> str:
|
| 410 |
+
"""
|
| 411 |
+
Transcribe audio to text for text sentiment analysis.
|
| 412 |
+
|
| 413 |
+
Args:
|
| 414 |
+
audio_bytes: Audio bytes in WAV format
|
| 415 |
+
|
| 416 |
+
Returns:
|
| 417 |
+
Transcribed text string
|
| 418 |
+
"""
|
| 419 |
+
if audio_bytes is None:
|
| 420 |
+
return ""
|
| 421 |
+
|
| 422 |
+
try:
|
| 423 |
+
import tempfile
|
| 424 |
+
import speech_recognition as sr
|
| 425 |
+
|
| 426 |
+
# Save audio bytes to temporary file
|
| 427 |
+
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
|
| 428 |
+
tmp_file.write(audio_bytes)
|
| 429 |
+
tmp_file_path = tmp_file.name
|
| 430 |
+
|
| 431 |
+
try:
|
| 432 |
+
# Initialize recognizer
|
| 433 |
+
recognizer = sr.Recognizer()
|
| 434 |
+
|
| 435 |
+
# Load audio file
|
| 436 |
+
with sr.AudioFile(tmp_file_path) as source:
|
| 437 |
+
# Read audio data
|
| 438 |
+
audio_data = recognizer.record(source)
|
| 439 |
+
|
| 440 |
+
# Transcribe using Google Speech Recognition
|
| 441 |
+
try:
|
| 442 |
+
text = recognizer.recognize_google(audio_data)
|
| 443 |
+
return text
|
| 444 |
+
except sr.UnknownValueError:
|
| 445 |
+
logger.warning("Speech could not be understood")
|
| 446 |
+
return ""
|
| 447 |
+
except sr.RequestError as e:
|
| 448 |
+
logger.error(
|
| 449 |
+
f"Could not request results from speech recognition service: {e}"
|
| 450 |
+
)
|
| 451 |
+
return ""
|
| 452 |
+
|
| 453 |
+
finally:
|
| 454 |
+
# Clean up temporary file
|
| 455 |
+
try:
|
| 456 |
+
os.unlink(tmp_file_path)
|
| 457 |
+
except (OSError, PermissionError):
|
| 458 |
+
pass
|
| 459 |
+
|
| 460 |
+
except ImportError:
|
| 461 |
+
logger.error(
|
| 462 |
+
"SpeechRecognition not installed. Please install it with: pip install SpeechRecognition"
|
| 463 |
+
)
|
| 464 |
+
return ""
|
| 465 |
+
except Exception as e:
|
| 466 |
+
logger.error(f"Error transcribing audio: {str(e)}")
|
| 467 |
+
return ""
|
src/utils/sentiment_mapping.py
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Sentiment mapping utilities for different model outputs.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
from typing import Dict
|
| 6 |
+
from ..config.settings import SENTIMENT_MAPPINGS
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def get_sentiment_mapping(num_classes: int) -> Dict[int, str]:
|
| 10 |
+
"""
|
| 11 |
+
Get the sentiment mapping based on number of classes.
|
| 12 |
+
|
| 13 |
+
Args:
|
| 14 |
+
num_classes: Number of output classes from the model
|
| 15 |
+
|
| 16 |
+
Returns:
|
| 17 |
+
Dictionary mapping class indices to sentiment labels
|
| 18 |
+
"""
|
| 19 |
+
return SENTIMENT_MAPPINGS.get(
|
| 20 |
+
num_classes, {i: f"Class_{i}" for i in range(num_classes)}
|
| 21 |
+
)
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def get_sentiment_colors() -> Dict[str, str]:
|
| 25 |
+
"""
|
| 26 |
+
Get color-coded sentiment display mapping.
|
| 27 |
+
|
| 28 |
+
Returns:
|
| 29 |
+
Dictionary mapping sentiment labels to emoji indicators
|
| 30 |
+
"""
|
| 31 |
+
return {
|
| 32 |
+
"Positive": "🟢",
|
| 33 |
+
"Negative": "🔴",
|
| 34 |
+
"Neutral": "🟡",
|
| 35 |
+
"Angry": "🔴",
|
| 36 |
+
"Sad": "🔵",
|
| 37 |
+
"Happy": "🟢",
|
| 38 |
+
"Fear": "🟣",
|
| 39 |
+
"Disgust": "🟠",
|
| 40 |
+
"Surprise": "🟡",
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def format_sentiment_result(
|
| 45 |
+
sentiment: str, confidence: float, input_info: str = "", model_name: str = ""
|
| 46 |
+
) -> str:
|
| 47 |
+
"""
|
| 48 |
+
Format sentiment analysis result for display.
|
| 49 |
+
|
| 50 |
+
Args:
|
| 51 |
+
sentiment: Predicted sentiment label
|
| 52 |
+
confidence: Confidence score
|
| 53 |
+
input_info: Information about the input
|
| 54 |
+
model_name: Name of the model used
|
| 55 |
+
|
| 56 |
+
Returns:
|
| 57 |
+
Formatted result string
|
| 58 |
+
"""
|
| 59 |
+
colors = get_sentiment_colors()
|
| 60 |
+
emoji = colors.get(sentiment, "❓")
|
| 61 |
+
|
| 62 |
+
result = f"{emoji} Sentiment: {sentiment}\n"
|
| 63 |
+
result += f"Confidence: {confidence:.2f}\n"
|
| 64 |
+
|
| 65 |
+
if input_info:
|
| 66 |
+
result += f"Input: {input_info}\n"
|
| 67 |
+
|
| 68 |
+
if model_name:
|
| 69 |
+
result += f"Model: {model_name}\n"
|
| 70 |
+
|
| 71 |
+
return result
|
simple_model_manager.py → src/utils/simple_model_manager.py
RENAMED
|
@@ -18,7 +18,7 @@ logger = logging.getLogger(__name__)
|
|
| 18 |
class SimpleModelManager:
|
| 19 |
"""Simple model manager that downloads models from Google Drive using gdown"""
|
| 20 |
|
| 21 |
-
def __init__(self, model_dir: str = "
|
| 22 |
"""
|
| 23 |
Initialize simple model manager
|
| 24 |
|
|
|
|
| 18 |
class SimpleModelManager:
|
| 19 |
"""Simple model manager that downloads models from Google Drive using gdown"""
|
| 20 |
|
| 21 |
+
def __init__(self, model_dir: str = "model_weights", cache_models: bool = True):
|
| 22 |
"""
|
| 23 |
Initialize simple model manager
|
| 24 |
|
uv.lock
CHANGED
|
@@ -475,6 +475,15 @@ wheels = [
|
|
| 475 |
{ url = "https://files.pythonhosted.org/packages/42/14/42b2651a2f46b022ccd948bca9f2d5af0fd8929c4eec235b8d6d844fbe67/filelock-3.19.1-py3-none-any.whl", hash = "sha256:d38e30481def20772f5baf097c122c3babc4fcdb7e14e57049eb9d88c6dc017d", size = 15988, upload_time = "2025-08-14T16:56:01.633Z" },
|
| 476 |
]
|
| 477 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 478 |
[[package]]
|
| 479 |
name = "gdown"
|
| 480 |
version = "5.2.0"
|
|
@@ -602,6 +611,18 @@ wheels = [
|
|
| 602 |
{ url = "https://files.pythonhosted.org/packages/2c/c6/fa760e12a2483469e2bf5058c5faff664acf66cadb4df2ad6205b016a73d/imageio_ffmpeg-0.6.0-py3-none-win_amd64.whl", hash = "sha256:02fa47c83703c37df6bfe4896aab339013f62bf02c5ebf2dce6da56af04ffc0a", size = 31246824, upload_time = "2025-01-16T21:34:28.6Z" },
|
| 603 |
]
|
| 604 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 605 |
[[package]]
|
| 606 |
name = "jinja2"
|
| 607 |
version = "3.1.6"
|
|
@@ -728,6 +749,15 @@ wheels = [
|
|
| 728 |
{ url = "https://files.pythonhosted.org/packages/9a/73/7d3b2010baa0b5eb1e4dfa9e4385e89b6716be76f2fa21a6c0fe34b68e5a/moviepy-2.2.1-py3-none-any.whl", hash = "sha256:6b56803fec2ac54b557404126ac1160e65448e03798fa282bd23e8fab3795060", size = 129871, upload_time = "2025-05-21T19:31:50.11Z" },
|
| 729 |
]
|
| 730 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 731 |
[[package]]
|
| 732 |
name = "narwhals"
|
| 733 |
version = "2.2.0"
|
|
@@ -737,6 +767,44 @@ wheels = [
|
|
| 737 |
{ url = "https://files.pythonhosted.org/packages/dd/54/1ecca75e51d7da8ca53d1ffa8636ef9077a6eaa31f43ade71360b3e6449a/narwhals-2.2.0-py3-none-any.whl", hash = "sha256:2b5e3d61a486fa4328c286b0c8018b3e781a964947ff725d66ba12f6d5ca3d2a", size = 401021, upload_time = "2025-08-25T07:51:56.97Z" },
|
| 738 |
]
|
| 739 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 740 |
[[package]]
|
| 741 |
name = "numpy"
|
| 742 |
version = "2.0.2"
|
|
@@ -860,6 +928,132 @@ wheels = [
|
|
| 860 |
{ url = "https://files.pythonhosted.org/packages/37/48/ac2a9584402fb6c0cd5b5d1a91dcf176b15760130dd386bbafdbfe3640bf/numpy-2.2.6-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:d042d24c90c41b54fd506da306759e06e568864df8ec17ccc17e9e884634fd00", size = 12812666, upload_time = "2025-05-17T21:45:31.426Z" },
|
| 861 |
]
|
| 862 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 863 |
[[package]]
|
| 864 |
name = "opencv-python-headless"
|
| 865 |
version = "4.12.0.88"
|
|
@@ -1446,9 +1640,11 @@ dependencies = [
|
|
| 1446 |
{ name = "gdown" },
|
| 1447 |
{ name = "moviepy" },
|
| 1448 |
{ name = "opencv-python-headless" },
|
|
|
|
| 1449 |
{ name = "python-dotenv" },
|
| 1450 |
{ name = "speechrecognition" },
|
| 1451 |
{ name = "streamlit-webrtc" },
|
|
|
|
| 1452 |
]
|
| 1453 |
|
| 1454 |
[package.metadata]
|
|
@@ -1456,9 +1652,20 @@ requires-dist = [
|
|
| 1456 |
{ name = "gdown", specifier = ">=5.2.0" },
|
| 1457 |
{ name = "moviepy", specifier = ">=2.2.1" },
|
| 1458 |
{ name = "opencv-python-headless", specifier = ">=4.8.0" },
|
|
|
|
| 1459 |
{ name = "python-dotenv", specifier = ">=1.1.1" },
|
| 1460 |
{ name = "speechrecognition", specifier = ">=3.10.0" },
|
| 1461 |
{ name = "streamlit-webrtc", specifier = ">=0.47.0" },
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1462 |
]
|
| 1463 |
|
| 1464 |
[[package]]
|
|
@@ -1570,6 +1777,18 @@ wheels = [
|
|
| 1570 |
{ url = "https://files.pythonhosted.org/packages/bf/8c/22b4506ee42436968ef02a432325008746bd8467299da374d68c98352eff/streamlit_webrtc-0.63.4-py3-none-any.whl", hash = "sha256:70852a930a94e3ecb86a2f3573048ec4c3cd9e2713c7ad4b77bb502620bd726c", size = 216926, upload_time = "2025-08-06T10:02:05.505Z" },
|
| 1571 |
]
|
| 1572 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1573 |
[[package]]
|
| 1574 |
name = "tenacity"
|
| 1575 |
version = "9.1.2"
|
|
@@ -1588,6 +1807,63 @@ wheels = [
|
|
| 1588 |
{ url = "https://files.pythonhosted.org/packages/44/6f/7120676b6d73228c96e17f1f794d8ab046fc910d781c8d151120c3f1569e/toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b", size = 16588, upload_time = "2020-11-01T01:40:20.672Z" },
|
| 1589 |
]
|
| 1590 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1591 |
[[package]]
|
| 1592 |
name = "tornado"
|
| 1593 |
version = "6.5.2"
|
|
@@ -1619,6 +1895,23 @@ wheels = [
|
|
| 1619 |
{ url = "https://files.pythonhosted.org/packages/d0/30/dc54f88dd4a2b5dc8a0279bdd7270e735851848b762aeb1c1184ed1f6b14/tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2", size = 78540, upload_time = "2024-11-24T20:12:19.698Z" },
|
| 1620 |
]
|
| 1621 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1622 |
[[package]]
|
| 1623 |
name = "typing-extensions"
|
| 1624 |
version = "4.14.1"
|
|
@@ -1663,3 +1956,12 @@ wheels = [
|
|
| 1663 |
{ url = "https://files.pythonhosted.org/packages/db/d9/c495884c6e548fce18a8f40568ff120bc3a4b7b99813081c8ac0c936fa64/watchdog-6.0.0-py3-none-win_amd64.whl", hash = "sha256:cbafb470cf848d93b5d013e2ecb245d4aa1c8fd0504e863ccefa32445359d680", size = 79070, upload_time = "2024-11-01T14:07:10.686Z" },
|
| 1664 |
{ url = "https://files.pythonhosted.org/packages/33/e8/e40370e6d74ddba47f002a32919d91310d6074130fe4e17dabcafc15cbf1/watchdog-6.0.0-py3-none-win_ia64.whl", hash = "sha256:a1914259fa9e1454315171103c6a30961236f508b9b623eae470268bbcc6a22f", size = 79067, upload_time = "2024-11-01T14:07:11.845Z" },
|
| 1665 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 475 |
{ url = "https://files.pythonhosted.org/packages/42/14/42b2651a2f46b022ccd948bca9f2d5af0fd8929c4eec235b8d6d844fbe67/filelock-3.19.1-py3-none-any.whl", hash = "sha256:d38e30481def20772f5baf097c122c3babc4fcdb7e14e57049eb9d88c6dc017d", size = 15988, upload_time = "2025-08-14T16:56:01.633Z" },
|
| 476 |
]
|
| 477 |
|
| 478 |
+
[[package]]
|
| 479 |
+
name = "fsspec"
|
| 480 |
+
version = "2025.7.0"
|
| 481 |
+
source = { registry = "https://pypi.org/simple" }
|
| 482 |
+
sdist = { url = "https://files.pythonhosted.org/packages/8b/02/0835e6ab9cfc03916fe3f78c0956cfcdb6ff2669ffa6651065d5ebf7fc98/fsspec-2025.7.0.tar.gz", hash = "sha256:786120687ffa54b8283d942929540d8bc5ccfa820deb555a2b5d0ed2b737bf58", size = 304432, upload_time = "2025-07-15T16:05:21.19Z" }
|
| 483 |
+
wheels = [
|
| 484 |
+
{ url = "https://files.pythonhosted.org/packages/2f/e0/014d5d9d7a4564cf1c40b5039bc882db69fd881111e03ab3657ac0b218e2/fsspec-2025.7.0-py3-none-any.whl", hash = "sha256:8b012e39f63c7d5f10474de957f3ab793b47b45ae7d39f2fb735f8bbe25c0e21", size = 199597, upload_time = "2025-07-15T16:05:19.529Z" },
|
| 485 |
+
]
|
| 486 |
+
|
| 487 |
[[package]]
|
| 488 |
name = "gdown"
|
| 489 |
version = "5.2.0"
|
|
|
|
| 611 |
{ url = "https://files.pythonhosted.org/packages/2c/c6/fa760e12a2483469e2bf5058c5faff664acf66cadb4df2ad6205b016a73d/imageio_ffmpeg-0.6.0-py3-none-win_amd64.whl", hash = "sha256:02fa47c83703c37df6bfe4896aab339013f62bf02c5ebf2dce6da56af04ffc0a", size = 31246824, upload_time = "2025-01-16T21:34:28.6Z" },
|
| 612 |
]
|
| 613 |
|
| 614 |
+
[[package]]
|
| 615 |
+
name = "importlib-metadata"
|
| 616 |
+
version = "8.7.0"
|
| 617 |
+
source = { registry = "https://pypi.org/simple" }
|
| 618 |
+
dependencies = [
|
| 619 |
+
{ name = "zipp", marker = "python_full_version < '3.10'" },
|
| 620 |
+
]
|
| 621 |
+
sdist = { url = "https://files.pythonhosted.org/packages/76/66/650a33bd90f786193e4de4b3ad86ea60b53c89b669a5c7be931fac31cdb0/importlib_metadata-8.7.0.tar.gz", hash = "sha256:d13b81ad223b890aa16c5471f2ac3056cf76c5f10f82d6f9292f0b415f389000", size = 56641, upload_time = "2025-04-27T15:29:01.736Z" }
|
| 622 |
+
wheels = [
|
| 623 |
+
{ url = "https://files.pythonhosted.org/packages/20/b0/36bd937216ec521246249be3bf9855081de4c5e06a0c9b4219dbeda50373/importlib_metadata-8.7.0-py3-none-any.whl", hash = "sha256:e5dd1551894c77868a30651cef00984d50e1002d06942a7101d34870c5f02afd", size = 27656, upload_time = "2025-04-27T15:29:00.214Z" },
|
| 624 |
+
]
|
| 625 |
+
|
| 626 |
[[package]]
|
| 627 |
name = "jinja2"
|
| 628 |
version = "3.1.6"
|
|
|
|
| 749 |
{ url = "https://files.pythonhosted.org/packages/9a/73/7d3b2010baa0b5eb1e4dfa9e4385e89b6716be76f2fa21a6c0fe34b68e5a/moviepy-2.2.1-py3-none-any.whl", hash = "sha256:6b56803fec2ac54b557404126ac1160e65448e03798fa282bd23e8fab3795060", size = 129871, upload_time = "2025-05-21T19:31:50.11Z" },
|
| 750 |
]
|
| 751 |
|
| 752 |
+
[[package]]
|
| 753 |
+
name = "mpmath"
|
| 754 |
+
version = "1.3.0"
|
| 755 |
+
source = { registry = "https://pypi.org/simple" }
|
| 756 |
+
sdist = { url = "https://files.pythonhosted.org/packages/e0/47/dd32fa426cc72114383ac549964eecb20ecfd886d1e5ccf5340b55b02f57/mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f", size = 508106, upload_time = "2023-03-07T16:47:11.061Z" }
|
| 757 |
+
wheels = [
|
| 758 |
+
{ url = "https://files.pythonhosted.org/packages/43/e3/7d92a15f894aa0c9c4b49b8ee9ac9850d6e63b03c9c32c0367a13ae62209/mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c", size = 536198, upload_time = "2023-03-07T16:47:09.197Z" },
|
| 759 |
+
]
|
| 760 |
+
|
| 761 |
[[package]]
|
| 762 |
name = "narwhals"
|
| 763 |
version = "2.2.0"
|
|
|
|
| 767 |
{ url = "https://files.pythonhosted.org/packages/dd/54/1ecca75e51d7da8ca53d1ffa8636ef9077a6eaa31f43ade71360b3e6449a/narwhals-2.2.0-py3-none-any.whl", hash = "sha256:2b5e3d61a486fa4328c286b0c8018b3e781a964947ff725d66ba12f6d5ca3d2a", size = 401021, upload_time = "2025-08-25T07:51:56.97Z" },
|
| 768 |
]
|
| 769 |
|
| 770 |
+
[[package]]
|
| 771 |
+
name = "networkx"
|
| 772 |
+
version = "3.2.1"
|
| 773 |
+
source = { registry = "https://pypi.org/simple" }
|
| 774 |
+
resolution-markers = [
|
| 775 |
+
"python_full_version < '3.10'",
|
| 776 |
+
]
|
| 777 |
+
sdist = { url = "https://files.pythonhosted.org/packages/c4/80/a84676339aaae2f1cfdf9f418701dd634aef9cc76f708ef55c36ff39c3ca/networkx-3.2.1.tar.gz", hash = "sha256:9f1bb5cf3409bf324e0a722c20bdb4c20ee39bf1c30ce8ae499c8502b0b5e0c6", size = 2073928, upload_time = "2023-10-28T08:41:39.364Z" }
|
| 778 |
+
wheels = [
|
| 779 |
+
{ url = "https://files.pythonhosted.org/packages/d5/f0/8fbc882ca80cf077f1b246c0e3c3465f7f415439bdea6b899f6b19f61f70/networkx-3.2.1-py3-none-any.whl", hash = "sha256:f18c69adc97877c42332c170849c96cefa91881c99a7cb3e95b7c659ebdc1ec2", size = 1647772, upload_time = "2023-10-28T08:41:36.945Z" },
|
| 780 |
+
]
|
| 781 |
+
|
| 782 |
+
[[package]]
|
| 783 |
+
name = "networkx"
|
| 784 |
+
version = "3.4.2"
|
| 785 |
+
source = { registry = "https://pypi.org/simple" }
|
| 786 |
+
resolution-markers = [
|
| 787 |
+
"python_full_version == '3.10.*'",
|
| 788 |
+
]
|
| 789 |
+
sdist = { url = "https://files.pythonhosted.org/packages/fd/1d/06475e1cd5264c0b870ea2cc6fdb3e37177c1e565c43f56ff17a10e3937f/networkx-3.4.2.tar.gz", hash = "sha256:307c3669428c5362aab27c8a1260aa8f47c4e91d3891f48be0141738d8d053e1", size = 2151368, upload_time = "2024-10-21T12:39:38.695Z" }
|
| 790 |
+
wheels = [
|
| 791 |
+
{ url = "https://files.pythonhosted.org/packages/b9/54/dd730b32ea14ea797530a4479b2ed46a6fb250f682a9cfb997e968bf0261/networkx-3.4.2-py3-none-any.whl", hash = "sha256:df5d4365b724cf81b8c6a7312509d0c22386097011ad1abe274afd5e9d3bbc5f", size = 1723263, upload_time = "2024-10-21T12:39:36.247Z" },
|
| 792 |
+
]
|
| 793 |
+
|
| 794 |
+
[[package]]
|
| 795 |
+
name = "networkx"
|
| 796 |
+
version = "3.5"
|
| 797 |
+
source = { registry = "https://pypi.org/simple" }
|
| 798 |
+
resolution-markers = [
|
| 799 |
+
"python_full_version >= '3.13'",
|
| 800 |
+
"python_full_version == '3.12.*'",
|
| 801 |
+
"python_full_version == '3.11.*'",
|
| 802 |
+
]
|
| 803 |
+
sdist = { url = "https://files.pythonhosted.org/packages/6c/4f/ccdb8ad3a38e583f214547fd2f7ff1fc160c43a75af88e6aec213404b96a/networkx-3.5.tar.gz", hash = "sha256:d4c6f9cf81f52d69230866796b82afbccdec3db7ae4fbd1b65ea750feed50037", size = 2471065, upload_time = "2025-05-29T11:35:07.804Z" }
|
| 804 |
+
wheels = [
|
| 805 |
+
{ url = "https://files.pythonhosted.org/packages/eb/8d/776adee7bbf76365fdd7f2552710282c79a4ead5d2a46408c9043a2b70ba/networkx-3.5-py3-none-any.whl", hash = "sha256:0030d386a9a06dee3565298b4a734b68589749a544acbb6c412dc9e2489ec6ec", size = 2034406, upload_time = "2025-05-29T11:35:04.961Z" },
|
| 806 |
+
]
|
| 807 |
+
|
| 808 |
[[package]]
|
| 809 |
name = "numpy"
|
| 810 |
version = "2.0.2"
|
|
|
|
| 928 |
{ url = "https://files.pythonhosted.org/packages/37/48/ac2a9584402fb6c0cd5b5d1a91dcf176b15760130dd386bbafdbfe3640bf/numpy-2.2.6-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:d042d24c90c41b54fd506da306759e06e568864df8ec17ccc17e9e884634fd00", size = 12812666, upload_time = "2025-05-17T21:45:31.426Z" },
|
| 929 |
]
|
| 930 |
|
| 931 |
+
[[package]]
|
| 932 |
+
name = "nvidia-cublas-cu12"
|
| 933 |
+
version = "12.8.4.1"
|
| 934 |
+
source = { registry = "https://pypi.org/simple" }
|
| 935 |
+
wheels = [
|
| 936 |
+
{ url = "https://files.pythonhosted.org/packages/dc/61/e24b560ab2e2eaeb3c839129175fb330dfcfc29e5203196e5541a4c44682/nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:8ac4e771d5a348c551b2a426eda6193c19aa630236b418086020df5ba9667142", size = 594346921, upload_time = "2025-03-07T01:44:31.254Z" },
|
| 937 |
+
]
|
| 938 |
+
|
| 939 |
+
[[package]]
|
| 940 |
+
name = "nvidia-cuda-cupti-cu12"
|
| 941 |
+
version = "12.8.90"
|
| 942 |
+
source = { registry = "https://pypi.org/simple" }
|
| 943 |
+
wheels = [
|
| 944 |
+
{ url = "https://files.pythonhosted.org/packages/f8/02/2adcaa145158bf1a8295d83591d22e4103dbfd821bcaf6f3f53151ca4ffa/nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ea0cb07ebda26bb9b29ba82cda34849e73c166c18162d3913575b0c9db9a6182", size = 10248621, upload_time = "2025-03-07T01:40:21.213Z" },
|
| 945 |
+
]
|
| 946 |
+
|
| 947 |
+
[[package]]
|
| 948 |
+
name = "nvidia-cuda-nvrtc-cu12"
|
| 949 |
+
version = "12.8.93"
|
| 950 |
+
source = { registry = "https://pypi.org/simple" }
|
| 951 |
+
wheels = [
|
| 952 |
+
{ url = "https://files.pythonhosted.org/packages/05/6b/32f747947df2da6994e999492ab306a903659555dddc0fbdeb9d71f75e52/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:a7756528852ef889772a84c6cd89d41dfa74667e24cca16bb31f8f061e3e9994", size = 88040029, upload_time = "2025-03-07T01:42:13.562Z" },
|
| 953 |
+
]
|
| 954 |
+
|
| 955 |
+
[[package]]
|
| 956 |
+
name = "nvidia-cuda-runtime-cu12"
|
| 957 |
+
version = "12.8.90"
|
| 958 |
+
source = { registry = "https://pypi.org/simple" }
|
| 959 |
+
wheels = [
|
| 960 |
+
{ url = "https://files.pythonhosted.org/packages/0d/9b/a997b638fcd068ad6e4d53b8551a7d30fe8b404d6f1804abf1df69838932/nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:adade8dcbd0edf427b7204d480d6066d33902cab2a4707dcfc48a2d0fd44ab90", size = 954765, upload_time = "2025-03-07T01:40:01.615Z" },
|
| 961 |
+
]
|
| 962 |
+
|
| 963 |
+
[[package]]
|
| 964 |
+
name = "nvidia-cudnn-cu12"
|
| 965 |
+
version = "9.10.2.21"
|
| 966 |
+
source = { registry = "https://pypi.org/simple" }
|
| 967 |
+
dependencies = [
|
| 968 |
+
{ name = "nvidia-cublas-cu12" },
|
| 969 |
+
]
|
| 970 |
+
wheels = [
|
| 971 |
+
{ url = "https://files.pythonhosted.org/packages/ba/51/e123d997aa098c61d029f76663dedbfb9bc8dcf8c60cbd6adbe42f76d049/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:949452be657fa16687d0930933f032835951ef0892b37d2d53824d1a84dc97a8", size = 706758467, upload_time = "2025-06-06T21:54:08.597Z" },
|
| 972 |
+
]
|
| 973 |
+
|
| 974 |
+
[[package]]
|
| 975 |
+
name = "nvidia-cufft-cu12"
|
| 976 |
+
version = "11.3.3.83"
|
| 977 |
+
source = { registry = "https://pypi.org/simple" }
|
| 978 |
+
dependencies = [
|
| 979 |
+
{ name = "nvidia-nvjitlink-cu12" },
|
| 980 |
+
]
|
| 981 |
+
wheels = [
|
| 982 |
+
{ url = "https://files.pythonhosted.org/packages/1f/13/ee4e00f30e676b66ae65b4f08cb5bcbb8392c03f54f2d5413ea99a5d1c80/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d2dd21ec0b88cf61b62e6b43564355e5222e4a3fb394cac0db101f2dd0d4f74", size = 193118695, upload_time = "2025-03-07T01:45:27.821Z" },
|
| 983 |
+
]
|
| 984 |
+
|
| 985 |
+
[[package]]
|
| 986 |
+
name = "nvidia-cufile-cu12"
|
| 987 |
+
version = "1.13.1.3"
|
| 988 |
+
source = { registry = "https://pypi.org/simple" }
|
| 989 |
+
wheels = [
|
| 990 |
+
{ url = "https://files.pythonhosted.org/packages/bb/fe/1bcba1dfbfb8d01be8d93f07bfc502c93fa23afa6fd5ab3fc7c1df71038a/nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1d069003be650e131b21c932ec3d8969c1715379251f8d23a1860554b1cb24fc", size = 1197834, upload_time = "2025-03-07T01:45:50.723Z" },
|
| 991 |
+
]
|
| 992 |
+
|
| 993 |
+
[[package]]
|
| 994 |
+
name = "nvidia-curand-cu12"
|
| 995 |
+
version = "10.3.9.90"
|
| 996 |
+
source = { registry = "https://pypi.org/simple" }
|
| 997 |
+
wheels = [
|
| 998 |
+
{ url = "https://files.pythonhosted.org/packages/fb/aa/6584b56dc84ebe9cf93226a5cde4d99080c8e90ab40f0c27bda7a0f29aa1/nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:b32331d4f4df5d6eefa0554c565b626c7216f87a06a4f56fab27c3b68a830ec9", size = 63619976, upload_time = "2025-03-07T01:46:23.323Z" },
|
| 999 |
+
]
|
| 1000 |
+
|
| 1001 |
+
[[package]]
|
| 1002 |
+
name = "nvidia-cusolver-cu12"
|
| 1003 |
+
version = "11.7.3.90"
|
| 1004 |
+
source = { registry = "https://pypi.org/simple" }
|
| 1005 |
+
dependencies = [
|
| 1006 |
+
{ name = "nvidia-cublas-cu12" },
|
| 1007 |
+
{ name = "nvidia-cusparse-cu12" },
|
| 1008 |
+
{ name = "nvidia-nvjitlink-cu12" },
|
| 1009 |
+
]
|
| 1010 |
+
wheels = [
|
| 1011 |
+
{ url = "https://files.pythonhosted.org/packages/85/48/9a13d2975803e8cf2777d5ed57b87a0b6ca2cc795f9a4f59796a910bfb80/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:4376c11ad263152bd50ea295c05370360776f8c3427b30991df774f9fb26c450", size = 267506905, upload_time = "2025-03-07T01:47:16.273Z" },
|
| 1012 |
+
]
|
| 1013 |
+
|
| 1014 |
+
[[package]]
|
| 1015 |
+
name = "nvidia-cusparse-cu12"
|
| 1016 |
+
version = "12.5.8.93"
|
| 1017 |
+
source = { registry = "https://pypi.org/simple" }
|
| 1018 |
+
dependencies = [
|
| 1019 |
+
{ name = "nvidia-nvjitlink-cu12" },
|
| 1020 |
+
]
|
| 1021 |
+
wheels = [
|
| 1022 |
+
{ url = "https://files.pythonhosted.org/packages/c2/f5/e1854cb2f2bcd4280c44736c93550cc300ff4b8c95ebe370d0aa7d2b473d/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ec05d76bbbd8b61b06a80e1eaf8cf4959c3d4ce8e711b65ebd0443bb0ebb13b", size = 288216466, upload_time = "2025-03-07T01:48:13.779Z" },
|
| 1023 |
+
]
|
| 1024 |
+
|
| 1025 |
+
[[package]]
|
| 1026 |
+
name = "nvidia-cusparselt-cu12"
|
| 1027 |
+
version = "0.7.1"
|
| 1028 |
+
source = { registry = "https://pypi.org/simple" }
|
| 1029 |
+
wheels = [
|
| 1030 |
+
{ url = "https://files.pythonhosted.org/packages/56/79/12978b96bd44274fe38b5dde5cfb660b1d114f70a65ef962bcbbed99b549/nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f1bb701d6b930d5a7cea44c19ceb973311500847f81b634d802b7b539dc55623", size = 287193691, upload_time = "2025-02-26T00:15:44.104Z" },
|
| 1031 |
+
]
|
| 1032 |
+
|
| 1033 |
+
[[package]]
|
| 1034 |
+
name = "nvidia-nccl-cu12"
|
| 1035 |
+
version = "2.27.3"
|
| 1036 |
+
source = { registry = "https://pypi.org/simple" }
|
| 1037 |
+
wheels = [
|
| 1038 |
+
{ url = "https://files.pythonhosted.org/packages/5c/5b/4e4fff7bad39adf89f735f2bc87248c81db71205b62bcc0d5ca5b606b3c3/nvidia_nccl_cu12-2.27.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:adf27ccf4238253e0b826bce3ff5fa532d65fc42322c8bfdfaf28024c0fbe039", size = 322364134, upload_time = "2025-06-03T21:58:04.013Z" },
|
| 1039 |
+
]
|
| 1040 |
+
|
| 1041 |
+
[[package]]
|
| 1042 |
+
name = "nvidia-nvjitlink-cu12"
|
| 1043 |
+
version = "12.8.93"
|
| 1044 |
+
source = { registry = "https://pypi.org/simple" }
|
| 1045 |
+
wheels = [
|
| 1046 |
+
{ url = "https://files.pythonhosted.org/packages/f6/74/86a07f1d0f42998ca31312f998bd3b9a7eff7f52378f4f270c8679c77fb9/nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:81ff63371a7ebd6e6451970684f916be2eab07321b73c9d244dc2b4da7f73b88", size = 39254836, upload_time = "2025-03-07T01:49:55.661Z" },
|
| 1047 |
+
]
|
| 1048 |
+
|
| 1049 |
+
[[package]]
|
| 1050 |
+
name = "nvidia-nvtx-cu12"
|
| 1051 |
+
version = "12.8.90"
|
| 1052 |
+
source = { registry = "https://pypi.org/simple" }
|
| 1053 |
+
wheels = [
|
| 1054 |
+
{ url = "https://files.pythonhosted.org/packages/a2/eb/86626c1bbc2edb86323022371c39aa48df6fd8b0a1647bc274577f72e90b/nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5b17e2001cc0d751a5bc2c6ec6d26ad95913324a4adb86788c944f8ce9ba441f", size = 89954, upload_time = "2025-03-07T01:42:44.131Z" },
|
| 1055 |
+
]
|
| 1056 |
+
|
| 1057 |
[[package]]
|
| 1058 |
name = "opencv-python-headless"
|
| 1059 |
version = "4.12.0.88"
|
|
|
|
| 1640 |
{ name = "gdown" },
|
| 1641 |
{ name = "moviepy" },
|
| 1642 |
{ name = "opencv-python-headless" },
|
| 1643 |
+
{ name = "pillow" },
|
| 1644 |
{ name = "python-dotenv" },
|
| 1645 |
{ name = "speechrecognition" },
|
| 1646 |
{ name = "streamlit-webrtc" },
|
| 1647 |
+
{ name = "torch" },
|
| 1648 |
]
|
| 1649 |
|
| 1650 |
[package.metadata]
|
|
|
|
| 1652 |
{ name = "gdown", specifier = ">=5.2.0" },
|
| 1653 |
{ name = "moviepy", specifier = ">=2.2.1" },
|
| 1654 |
{ name = "opencv-python-headless", specifier = ">=4.8.0" },
|
| 1655 |
+
{ name = "pillow", specifier = ">=11.3.0" },
|
| 1656 |
{ name = "python-dotenv", specifier = ">=1.1.1" },
|
| 1657 |
{ name = "speechrecognition", specifier = ">=3.10.0" },
|
| 1658 |
{ name = "streamlit-webrtc", specifier = ">=0.47.0" },
|
| 1659 |
+
{ name = "torch", specifier = ">=2.8.0" },
|
| 1660 |
+
]
|
| 1661 |
+
|
| 1662 |
+
[[package]]
|
| 1663 |
+
name = "setuptools"
|
| 1664 |
+
version = "80.9.0"
|
| 1665 |
+
source = { registry = "https://pypi.org/simple" }
|
| 1666 |
+
sdist = { url = "https://files.pythonhosted.org/packages/18/5d/3bf57dcd21979b887f014ea83c24ae194cfcd12b9e0fda66b957c69d1fca/setuptools-80.9.0.tar.gz", hash = "sha256:f36b47402ecde768dbfafc46e8e4207b4360c654f1f3bb84475f0a28628fb19c", size = 1319958, upload_time = "2025-05-27T00:56:51.443Z" }
|
| 1667 |
+
wheels = [
|
| 1668 |
+
{ url = "https://files.pythonhosted.org/packages/a3/dc/17031897dae0efacfea57dfd3a82fdd2a2aeb58e0ff71b77b87e44edc772/setuptools-80.9.0-py3-none-any.whl", hash = "sha256:062d34222ad13e0cc312a4c02d73f059e86a4acbfbdea8f8f76b28c99f306922", size = 1201486, upload_time = "2025-05-27T00:56:49.664Z" },
|
| 1669 |
]
|
| 1670 |
|
| 1671 |
[[package]]
|
|
|
|
| 1777 |
{ url = "https://files.pythonhosted.org/packages/bf/8c/22b4506ee42436968ef02a432325008746bd8467299da374d68c98352eff/streamlit_webrtc-0.63.4-py3-none-any.whl", hash = "sha256:70852a930a94e3ecb86a2f3573048ec4c3cd9e2713c7ad4b77bb502620bd726c", size = 216926, upload_time = "2025-08-06T10:02:05.505Z" },
|
| 1778 |
]
|
| 1779 |
|
| 1780 |
+
[[package]]
|
| 1781 |
+
name = "sympy"
|
| 1782 |
+
version = "1.14.0"
|
| 1783 |
+
source = { registry = "https://pypi.org/simple" }
|
| 1784 |
+
dependencies = [
|
| 1785 |
+
{ name = "mpmath" },
|
| 1786 |
+
]
|
| 1787 |
+
sdist = { url = "https://files.pythonhosted.org/packages/83/d3/803453b36afefb7c2bb238361cd4ae6125a569b4db67cd9e79846ba2d68c/sympy-1.14.0.tar.gz", hash = "sha256:d3d3fe8df1e5a0b42f0e7bdf50541697dbe7d23746e894990c030e2b05e72517", size = 7793921, upload_time = "2025-04-27T18:05:01.611Z" }
|
| 1788 |
+
wheels = [
|
| 1789 |
+
{ url = "https://files.pythonhosted.org/packages/a2/09/77d55d46fd61b4a135c444fc97158ef34a095e5681d0a6c10b75bf356191/sympy-1.14.0-py3-none-any.whl", hash = "sha256:e091cc3e99d2141a0ba2847328f5479b05d94a6635cb96148ccb3f34671bd8f5", size = 6299353, upload_time = "2025-04-27T18:04:59.103Z" },
|
| 1790 |
+
]
|
| 1791 |
+
|
| 1792 |
[[package]]
|
| 1793 |
name = "tenacity"
|
| 1794 |
version = "9.1.2"
|
|
|
|
| 1807 |
{ url = "https://files.pythonhosted.org/packages/44/6f/7120676b6d73228c96e17f1f794d8ab046fc910d781c8d151120c3f1569e/toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b", size = 16588, upload_time = "2020-11-01T01:40:20.672Z" },
|
| 1808 |
]
|
| 1809 |
|
| 1810 |
+
[[package]]
|
| 1811 |
+
name = "torch"
|
| 1812 |
+
version = "2.8.0"
|
| 1813 |
+
source = { registry = "https://pypi.org/simple" }
|
| 1814 |
+
dependencies = [
|
| 1815 |
+
{ name = "filelock" },
|
| 1816 |
+
{ name = "fsspec" },
|
| 1817 |
+
{ name = "jinja2" },
|
| 1818 |
+
{ name = "networkx", version = "3.2.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
|
| 1819 |
+
{ name = "networkx", version = "3.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" },
|
| 1820 |
+
{ name = "networkx", version = "3.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
|
| 1821 |
+
{ name = "nvidia-cublas-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
|
| 1822 |
+
{ name = "nvidia-cuda-cupti-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
|
| 1823 |
+
{ name = "nvidia-cuda-nvrtc-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
|
| 1824 |
+
{ name = "nvidia-cuda-runtime-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
|
| 1825 |
+
{ name = "nvidia-cudnn-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
|
| 1826 |
+
{ name = "nvidia-cufft-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
|
| 1827 |
+
{ name = "nvidia-cufile-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
|
| 1828 |
+
{ name = "nvidia-curand-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
|
| 1829 |
+
{ name = "nvidia-cusolver-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
|
| 1830 |
+
{ name = "nvidia-cusparse-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
|
| 1831 |
+
{ name = "nvidia-cusparselt-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
|
| 1832 |
+
{ name = "nvidia-nccl-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
|
| 1833 |
+
{ name = "nvidia-nvjitlink-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
|
| 1834 |
+
{ name = "nvidia-nvtx-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
|
| 1835 |
+
{ name = "setuptools", marker = "python_full_version >= '3.12'" },
|
| 1836 |
+
{ name = "sympy" },
|
| 1837 |
+
{ name = "triton", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
|
| 1838 |
+
{ name = "typing-extensions" },
|
| 1839 |
+
]
|
| 1840 |
+
wheels = [
|
| 1841 |
+
{ url = "https://files.pythonhosted.org/packages/63/28/110f7274254f1b8476c561dada127173f994afa2b1ffc044efb773c15650/torch-2.8.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:0be92c08b44009d4131d1ff7a8060d10bafdb7ddcb7359ef8d8c5169007ea905", size = 102052793, upload_time = "2025-08-06T14:53:15.852Z" },
|
| 1842 |
+
{ url = "https://files.pythonhosted.org/packages/70/1c/58da560016f81c339ae14ab16c98153d51c941544ae568da3cb5b1ceb572/torch-2.8.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:89aa9ee820bb39d4d72b794345cccef106b574508dd17dbec457949678c76011", size = 888025420, upload_time = "2025-08-06T14:54:18.014Z" },
|
| 1843 |
+
{ url = "https://files.pythonhosted.org/packages/70/87/f69752d0dd4ba8218c390f0438130c166fa264a33b7025adb5014b92192c/torch-2.8.0-cp310-cp310-win_amd64.whl", hash = "sha256:e8e5bf982e87e2b59d932769938b698858c64cc53753894be25629bdf5cf2f46", size = 241363614, upload_time = "2025-08-06T14:53:31.496Z" },
|
| 1844 |
+
{ url = "https://files.pythonhosted.org/packages/ef/d6/e6d4c57e61c2b2175d3aafbfb779926a2cfd7c32eeda7c543925dceec923/torch-2.8.0-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:a3f16a58a9a800f589b26d47ee15aca3acf065546137fc2af039876135f4c760", size = 73611154, upload_time = "2025-08-06T14:53:10.919Z" },
|
| 1845 |
+
{ url = "https://files.pythonhosted.org/packages/8f/c4/3e7a3887eba14e815e614db70b3b529112d1513d9dae6f4d43e373360b7f/torch-2.8.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:220a06fd7af8b653c35d359dfe1aaf32f65aa85befa342629f716acb134b9710", size = 102073391, upload_time = "2025-08-06T14:53:20.937Z" },
|
| 1846 |
+
{ url = "https://files.pythonhosted.org/packages/5a/63/4fdc45a0304536e75a5e1b1bbfb1b56dd0e2743c48ee83ca729f7ce44162/torch-2.8.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:c12fa219f51a933d5f80eeb3a7a5d0cbe9168c0a14bbb4055f1979431660879b", size = 888063640, upload_time = "2025-08-06T14:55:05.325Z" },
|
| 1847 |
+
{ url = "https://files.pythonhosted.org/packages/84/57/2f64161769610cf6b1c5ed782bd8a780e18a3c9d48931319f2887fa9d0b1/torch-2.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:8c7ef765e27551b2fbfc0f41bcf270e1292d9bf79f8e0724848b1682be6e80aa", size = 241366752, upload_time = "2025-08-06T14:53:38.692Z" },
|
| 1848 |
+
{ url = "https://files.pythonhosted.org/packages/a4/5e/05a5c46085d9b97e928f3f037081d3d2b87fb4b4195030fc099aaec5effc/torch-2.8.0-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:5ae0524688fb6707c57a530c2325e13bb0090b745ba7b4a2cd6a3ce262572916", size = 73621174, upload_time = "2025-08-06T14:53:25.44Z" },
|
| 1849 |
+
{ url = "https://files.pythonhosted.org/packages/49/0c/2fd4df0d83a495bb5e54dca4474c4ec5f9c62db185421563deeb5dabf609/torch-2.8.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:e2fab4153768d433f8ed9279c8133a114a034a61e77a3a104dcdf54388838705", size = 101906089, upload_time = "2025-08-06T14:53:52.631Z" },
|
| 1850 |
+
{ url = "https://files.pythonhosted.org/packages/99/a8/6acf48d48838fb8fe480597d98a0668c2beb02ee4755cc136de92a0a956f/torch-2.8.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:b2aca0939fb7e4d842561febbd4ffda67a8e958ff725c1c27e244e85e982173c", size = 887913624, upload_time = "2025-08-06T14:56:44.33Z" },
|
| 1851 |
+
{ url = "https://files.pythonhosted.org/packages/af/8a/5c87f08e3abd825c7dfecef5a0f1d9aa5df5dd0e3fd1fa2f490a8e512402/torch-2.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:2f4ac52f0130275d7517b03a33d2493bab3693c83dcfadf4f81688ea82147d2e", size = 241326087, upload_time = "2025-08-06T14:53:46.503Z" },
|
| 1852 |
+
{ url = "https://files.pythonhosted.org/packages/be/66/5c9a321b325aaecb92d4d1855421e3a055abd77903b7dab6575ca07796db/torch-2.8.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:619c2869db3ada2c0105487ba21b5008defcc472d23f8b80ed91ac4a380283b0", size = 73630478, upload_time = "2025-08-06T14:53:57.144Z" },
|
| 1853 |
+
{ url = "https://files.pythonhosted.org/packages/10/4e/469ced5a0603245d6a19a556e9053300033f9c5baccf43a3d25ba73e189e/torch-2.8.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:2b2f96814e0345f5a5aed9bf9734efa913678ed19caf6dc2cddb7930672d6128", size = 101936856, upload_time = "2025-08-06T14:54:01.526Z" },
|
| 1854 |
+
{ url = "https://files.pythonhosted.org/packages/16/82/3948e54c01b2109238357c6f86242e6ecbf0c63a1af46906772902f82057/torch-2.8.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:65616ca8ec6f43245e1f5f296603e33923f4c30f93d65e103d9e50c25b35150b", size = 887922844, upload_time = "2025-08-06T14:55:50.78Z" },
|
| 1855 |
+
{ url = "https://files.pythonhosted.org/packages/e3/54/941ea0a860f2717d86a811adf0c2cd01b3983bdd460d0803053c4e0b8649/torch-2.8.0-cp313-cp313-win_amd64.whl", hash = "sha256:659df54119ae03e83a800addc125856effda88b016dfc54d9f65215c3975be16", size = 241330968, upload_time = "2025-08-06T14:54:45.293Z" },
|
| 1856 |
+
{ url = "https://files.pythonhosted.org/packages/de/69/8b7b13bba430f5e21d77708b616f767683629fc4f8037564a177d20f90ed/torch-2.8.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:1a62a1ec4b0498930e2543535cf70b1bef8c777713de7ceb84cd79115f553767", size = 73915128, upload_time = "2025-08-06T14:54:34.769Z" },
|
| 1857 |
+
{ url = "https://files.pythonhosted.org/packages/15/0e/8a800e093b7f7430dbaefa80075aee9158ec22e4c4fc3c1a66e4fb96cb4f/torch-2.8.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:83c13411a26fac3d101fe8035a6b0476ae606deb8688e904e796a3534c197def", size = 102020139, upload_time = "2025-08-06T14:54:39.047Z" },
|
| 1858 |
+
{ url = "https://files.pythonhosted.org/packages/4a/15/5e488ca0bc6162c86a33b58642bc577c84ded17c7b72d97e49b5833e2d73/torch-2.8.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:8f0a9d617a66509ded240add3754e462430a6c1fc5589f86c17b433dd808f97a", size = 887990692, upload_time = "2025-08-06T14:56:18.286Z" },
|
| 1859 |
+
{ url = "https://files.pythonhosted.org/packages/b4/a8/6a04e4b54472fc5dba7ca2341ab219e529f3c07b6941059fbf18dccac31f/torch-2.8.0-cp313-cp313t-win_amd64.whl", hash = "sha256:a7242b86f42be98ac674b88a4988643b9bc6145437ec8f048fea23f72feb5eca", size = 241603453, upload_time = "2025-08-06T14:55:22.945Z" },
|
| 1860 |
+
{ url = "https://files.pythonhosted.org/packages/04/6e/650bb7f28f771af0cb791b02348db8b7f5f64f40f6829ee82aa6ce99aabe/torch-2.8.0-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:7b677e17f5a3e69fdef7eb3b9da72622f8d322692930297e4ccb52fefc6c8211", size = 73632395, upload_time = "2025-08-06T14:55:28.645Z" },
|
| 1861 |
+
{ url = "https://files.pythonhosted.org/packages/5b/b0/a321f27270049baa12f5c3fb0d6ceea005634787e3af9a8d75dce8306b0a/torch-2.8.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:da6afa31c13b669d4ba49d8a2169f0db2c3ec6bec4af898aa714f401d4c38904", size = 102059214, upload_time = "2025-08-06T14:55:33.433Z" },
|
| 1862 |
+
{ url = "https://files.pythonhosted.org/packages/fd/dd/1630cb51b10d3d2e97db95e5a84c32def81fc26b005bce6fc880b0e6db81/torch-2.8.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:06fcee8000e5c62a9f3e52a688b9c5abb7c6228d0e56e3452983416025c41381", size = 888024302, upload_time = "2025-08-06T14:57:28.23Z" },
|
| 1863 |
+
{ url = "https://files.pythonhosted.org/packages/b9/dc/1f1f621afe15e3c496e1e8f94f8903f75f87e7d642d5a985e92210cc208d/torch-2.8.0-cp39-cp39-win_amd64.whl", hash = "sha256:5128fe752a355d9308e56af1ad28b15266fe2da5948660fad44de9e3a9e36e8c", size = 241249338, upload_time = "2025-08-06T14:57:05.669Z" },
|
| 1864 |
+
{ url = "https://files.pythonhosted.org/packages/ae/95/ae26263aceb3d57b821179f827d0e321373ed49423e603dd5906ab14a730/torch-2.8.0-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:e9f071f5b52a9f6970dc8a919694b27a91ae9dc08898b2b988abbef5eddfd1ae", size = 73610795, upload_time = "2025-08-06T14:57:11.513Z" },
|
| 1865 |
+
]
|
| 1866 |
+
|
| 1867 |
[[package]]
|
| 1868 |
name = "tornado"
|
| 1869 |
version = "6.5.2"
|
|
|
|
| 1895 |
{ url = "https://files.pythonhosted.org/packages/d0/30/dc54f88dd4a2b5dc8a0279bdd7270e735851848b762aeb1c1184ed1f6b14/tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2", size = 78540, upload_time = "2024-11-24T20:12:19.698Z" },
|
| 1896 |
]
|
| 1897 |
|
| 1898 |
+
[[package]]
|
| 1899 |
+
name = "triton"
|
| 1900 |
+
version = "3.4.0"
|
| 1901 |
+
source = { registry = "https://pypi.org/simple" }
|
| 1902 |
+
dependencies = [
|
| 1903 |
+
{ name = "importlib-metadata", marker = "python_full_version < '3.10'" },
|
| 1904 |
+
{ name = "setuptools" },
|
| 1905 |
+
]
|
| 1906 |
+
wheels = [
|
| 1907 |
+
{ url = "https://files.pythonhosted.org/packages/62/ee/0ee5f64a87eeda19bbad9bc54ae5ca5b98186ed00055281fd40fb4beb10e/triton-3.4.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7ff2785de9bc02f500e085420273bb5cc9c9bb767584a4aa28d6e360cec70128", size = 155430069, upload_time = "2025-07-30T19:58:21.715Z" },
|
| 1908 |
+
{ url = "https://files.pythonhosted.org/packages/7d/39/43325b3b651d50187e591eefa22e236b2981afcebaefd4f2fc0ea99df191/triton-3.4.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7b70f5e6a41e52e48cfc087436c8a28c17ff98db369447bcaff3b887a3ab4467", size = 155531138, upload_time = "2025-07-30T19:58:29.908Z" },
|
| 1909 |
+
{ url = "https://files.pythonhosted.org/packages/d0/66/b1eb52839f563623d185f0927eb3530ee4d5ffe9d377cdaf5346b306689e/triton-3.4.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:31c1d84a5c0ec2c0f8e8a072d7fd150cab84a9c239eaddc6706c081bfae4eb04", size = 155560068, upload_time = "2025-07-30T19:58:37.081Z" },
|
| 1910 |
+
{ url = "https://files.pythonhosted.org/packages/30/7b/0a685684ed5322d2af0bddefed7906674f67974aa88b0fae6e82e3b766f6/triton-3.4.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:00be2964616f4c619193cb0d1b29a99bd4b001d7dc333816073f92cf2a8ccdeb", size = 155569223, upload_time = "2025-07-30T19:58:44.017Z" },
|
| 1911 |
+
{ url = "https://files.pythonhosted.org/packages/20/63/8cb444ad5cdb25d999b7d647abac25af0ee37d292afc009940c05b82dda0/triton-3.4.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7936b18a3499ed62059414d7df563e6c163c5e16c3773678a3ee3d417865035d", size = 155659780, upload_time = "2025-07-30T19:58:51.171Z" },
|
| 1912 |
+
{ url = "https://files.pythonhosted.org/packages/12/34/1251beb5a3cb93f3950ebe68732752014646003ef6eb11eb5f1a37ca78cd/triton-3.4.0-cp39-cp39-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:98e5c1442eaeabae2e2452ae765801bd53cd4ce873cab0d1bdd59a32ab2d9397", size = 155430799, upload_time = "2025-07-30T19:58:57.664Z" },
|
| 1913 |
+
]
|
| 1914 |
+
|
| 1915 |
[[package]]
|
| 1916 |
name = "typing-extensions"
|
| 1917 |
version = "4.14.1"
|
|
|
|
| 1956 |
{ url = "https://files.pythonhosted.org/packages/db/d9/c495884c6e548fce18a8f40568ff120bc3a4b7b99813081c8ac0c936fa64/watchdog-6.0.0-py3-none-win_amd64.whl", hash = "sha256:cbafb470cf848d93b5d013e2ecb245d4aa1c8fd0504e863ccefa32445359d680", size = 79070, upload_time = "2024-11-01T14:07:10.686Z" },
|
| 1957 |
{ url = "https://files.pythonhosted.org/packages/33/e8/e40370e6d74ddba47f002a32919d91310d6074130fe4e17dabcafc15cbf1/watchdog-6.0.0-py3-none-win_ia64.whl", hash = "sha256:a1914259fa9e1454315171103c6a30961236f508b9b623eae470268bbcc6a22f", size = 79067, upload_time = "2024-11-01T14:07:11.845Z" },
|
| 1958 |
]
|
| 1959 |
+
|
| 1960 |
+
[[package]]
|
| 1961 |
+
name = "zipp"
|
| 1962 |
+
version = "3.23.0"
|
| 1963 |
+
source = { registry = "https://pypi.org/simple" }
|
| 1964 |
+
sdist = { url = "https://files.pythonhosted.org/packages/e3/02/0f2892c661036d50ede074e376733dca2ae7c6eb617489437771209d4180/zipp-3.23.0.tar.gz", hash = "sha256:a07157588a12518c9d4034df3fbbee09c814741a33ff63c05fa29d26a2404166", size = 25547, upload_time = "2025-06-08T17:06:39.4Z" }
|
| 1965 |
+
wheels = [
|
| 1966 |
+
{ url = "https://files.pythonhosted.org/packages/2e/54/647ade08bf0db230bfea292f893923872fd20be6ac6f53b2b936ba839d75/zipp-3.23.0-py3-none-any.whl", hash = "sha256:071652d6115ed432f5ce1d34c336c0adfd6a884660d1e9712a256d3d3bd4b14e", size = 10276, upload_time = "2025-06-08T17:06:38.034Z" },
|
| 1967 |
+
]
|