Spaces:
Sleeping
Sleeping
| """ | |
| Sentiment AI Analysis - Core sentiment/emotion detection engine | |
| Analyzes text and detects emotions using keyword matching | |
| Focuses on the LAST SENTENCE for real-time accuracy | |
| """ | |
| import re | |
| from typing import Dict, Any | |
| from .sentiment_keyword_map import KeywordMap | |
| class SentimentAnalyzer: | |
| """ | |
| Ultra-fast emotion analyzer optimized for real-time detection | |
| Detects 50+ emotions from text as it's being typed or streamed | |
| Focuses on the LAST SENTENCE/PHRASE for accurate real-time updates | |
| """ | |
| SENTENCE_SEPARATORS = re.compile(r'[.!?;:\n]+') | |
| def __init__(self, custom_keywords: Dict[str, str] = None): | |
| """Initialize with keyword mappings""" | |
| self.keyword_map = KeywordMap(custom_keywords) | |
| self._word_pattern = re.compile(r'\b\w+\b') | |
| def _get_last_sentence(self, text: str) -> str: | |
| """ | |
| Extract the last sentence/phrase from text | |
| This ensures emoji reflects CURRENT sentiment, not cumulative | |
| """ | |
| parts = self.SENTENCE_SEPARATORS.split(text) | |
| parts = [p.strip() for p in parts if p.strip()] | |
| if not parts: | |
| return text.strip() | |
| return parts[-1] | |
| def _analyze_segment(self, text: str, position_weight: float = 1.0) -> Dict[str, float]: | |
| """ | |
| Analyze a text segment and return emotion scores | |
| Args: | |
| text: Text to analyze | |
| position_weight: Multiplier for recency (higher = more recent) | |
| Returns: | |
| Dict mapping emotions to scores | |
| """ | |
| text_lower = text.lower() | |
| words = self._word_pattern.findall(text_lower) | |
| if not words: | |
| return {} | |
| emotion_scores: Dict[str, float] = {} | |
| negation_active = False | |
| intensifier_active = False | |
| # Process words with position weighting | |
| word_count = len(words) | |
| for idx, word in enumerate(words): | |
| # Position weight: last word gets full weight, first gets 0.5 | |
| word_position_weight = 0.5 + (0.5 * (idx / max(word_count - 1, 1))) | |
| # Check for negation | |
| if self.keyword_map.is_negation(word): | |
| negation_active = True | |
| continue | |
| # Check for intensifier | |
| if self.keyword_map.is_intensifier(word): | |
| intensifier_active = True | |
| continue | |
| # Check if word maps to emotion | |
| emotion = self.keyword_map.get_emotion_for_word(word) | |
| if emotion: | |
| # Handle negation | |
| if negation_active: | |
| emotion = self.keyword_map.get_opposite_emotion(emotion) | |
| negation_active = False | |
| # Calculate score with position weighting | |
| base_score = 1.5 if intensifier_active else 1.0 | |
| final_score = base_score * word_position_weight * position_weight | |
| intensifier_active = False | |
| # Accumulate | |
| emotion_scores[emotion] = emotion_scores.get(emotion, 0) + final_score | |
| else: | |
| negation_active = False | |
| intensifier_active = False | |
| return emotion_scores | |
| def analyze(self, text: str) -> Dict[str, Any]: | |
| """ | |
| Analyze text and return detected emotion | |
| FOCUSES ON LAST SENTENCE for real-time accuracy | |
| Args: | |
| text: Text to analyze | |
| Returns: | |
| Dict with 'label' (emotion), 'score', and 'details' | |
| """ | |
| if not text or not text.strip(): | |
| return {"label": "neutral", "score": 0.0, "details": {}} | |
| # Get the last sentence (what user is currently typing) | |
| last_sentence = self._get_last_sentence(text) | |
| # Analyze last sentence with full weight | |
| last_sentence_scores = self._analyze_segment(last_sentence, position_weight=2.0) | |
| # If found emotions in last sentence, use those | |
| if last_sentence_scores: | |
| primary_emotion = max(last_sentence_scores, key=lambda k: last_sentence_scores[k]) | |
| max_score = last_sentence_scores[primary_emotion] | |
| normalized_score = min(max_score / 3.0, 1.0) | |
| return { | |
| "label": primary_emotion, | |
| "score": normalized_score, | |
| "details": {"segment": "last_sentence", "text": last_sentence[:50]} | |
| } | |
| # Fallback: analyze full text if last sentence has no emotion words | |
| full_text_scores = self._analyze_segment(text, position_weight=1.0) | |
| if not full_text_scores: | |
| return {"label": "neutral", "score": 0.0, "details": {"segment": "none"}} | |
| primary_emotion = max(full_text_scores, key=lambda k: full_text_scores[k]) | |
| max_score = full_text_scores[primary_emotion] | |
| normalized_score = min(max_score / 3.0, 1.0) | |
| return { | |
| "label": primary_emotion, | |
| "score": normalized_score, | |
| "details": {"segment": "full_text", "all_emotions": full_text_scores} | |
| } | |
| if __name__ == "__main__": | |
| print("=" * 60) | |
| print("Testing Sentiment Analyzer") | |
| print("=" * 60) | |
| analyzer = SentimentAnalyzer() | |
| test_cases = [ | |
| ("I am so happy today!", "happiness"), | |
| ("I love this!", "love"), | |
| ("This is exciting!", "excitement"), | |
| ("Thank you so much!", "gratitude"), | |
| ("I am really sad", "sadness"), | |
| ("This makes me angry", "anger"), | |
| ("I am scared", "fear"), | |
| ("So frustrating!", "frustration"), | |
| ("I am curious about this", "curiosity"), | |
| ("Wow, that is amazing!", "amazement"), | |
| ("I am so confused", "confused"), | |
| ("I miss you", "longing"), | |
| ] | |
| print("\nBasic Emotion Detection:") | |
| passed = 0 | |
| for text, expected in test_cases: | |
| result = analyzer.analyze(text) | |
| match = expected in result["label"] or result["label"] == expected | |
| status = "✓" if match else "✗" | |
| if match: | |
| passed += 1 | |
| print(f"{status} '{text}' → {result['label']}") | |
| print(f"\n{passed}/{len(test_cases)} tests passed") | |
| print("\n" + "=" * 60) | |
| print("Last Sentence Focus Tests (Real-Time Updates)") | |
| print("=" * 60) | |
| multi_tests = [ | |
| ("I love this! But now I am angry", "anger"), | |
| ("Happy day! Wait, I'm confused", "confused"), | |
| ("Great work! This is frustrating", "frustration"), | |
| ("Sad news. But I'm grateful now", "gratitude"), | |
| ("I was scared. Now I'm excited!", "excitement"), | |
| ] | |
| print("\nLast Sentence Detection:") | |
| passed_multi = 0 | |
| for text, expected in multi_tests: | |
| result = analyzer.analyze(text) | |
| match = expected in result["label"] or result["label"] == expected | |
| status = "✓" if match else "✗" | |
| if match: | |
| passed_multi += 1 | |
| print(f"{status} '{text[:50]}...' → {result['label']}") | |
| print(f"\n{passed_multi}/{len(multi_tests)} tests passed") | |
| print("\n✅ Sentiment Analyzer ready!") | |