Spaces:
Sleeping
Sleeping
| # -*- coding: utf-8 -*- | |
| """ | |
| Run Evaluation - Execute full sentiment analysis evaluation | |
| Runs all tests and generates comprehensive reports | |
| """ | |
| import sys | |
| import os | |
| import time | |
| # Add parent to path | |
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | |
| from avatar import SentimentAnalyzer, EmojiMapper | |
| from evaluation.emotion_test_suite import EmotionTestSuite | |
| from evaluation.accuracy_benchmark import AccuracyBenchmark | |
| from evaluation.live_stream_test import LiveStreamTest | |
| from evaluation.report_generator import ReportGenerator | |
| def run_full_evaluation(): | |
| """Run complete evaluation and generate reports""" | |
| print("=" * 70) | |
| print("EMOJI AI AVATAR - SENTIMENT ANALYSIS EVALUATION") | |
| print("=" * 70) | |
| print() | |
| # Initialize components | |
| print("[1/6] Initializing components...") | |
| start_time = time.time() | |
| analyzer = SentimentAnalyzer() | |
| mapper = EmojiMapper() | |
| test_suite = EmotionTestSuite() | |
| benchmark = AccuracyBenchmark(analyzer, mapper) | |
| stream_test = LiveStreamTest(analyzer, mapper) | |
| report_gen = ReportGenerator() | |
| init_time = time.time() - start_time | |
| print(f" Components initialized in {init_time:.2f}s") | |
| print(f" Emotions to test: {test_suite.get_emotion_count()}") | |
| print(f" Test cases: {test_suite.get_test_count()}") | |
| print() | |
| # Run accuracy benchmark | |
| print("[2/6] Running accuracy benchmark...") | |
| benchmark_start = time.time() | |
| benchmark_results = benchmark.run_benchmark(test_suite.EMOTION_TEST_DATA) | |
| benchmark_time = time.time() - benchmark_start | |
| print(f" Benchmark completed in {benchmark_time:.2f}s") | |
| print(f" Accuracy: {benchmark_results.accuracy:.1%}") | |
| print() | |
| # Run live stream tests | |
| print("[3/6] Running live stream transition tests...") | |
| stream_start = time.time() | |
| transition_results = stream_test.run_all_transitions() | |
| stream_time = time.time() - stream_start | |
| correct_transitions = sum(1 for r in transition_results if r.transition_correct) | |
| trans_accuracy = correct_transitions / len(transition_results) if transition_results else 0 | |
| print(f" Transitions completed in {stream_time:.2f}s") | |
| print(f" Transition accuracy: {trans_accuracy:.1%}") | |
| print() | |
| # Generate console summary | |
| print("[4/6] Generating console summary...") | |
| summary = report_gen.generate_summary_report(benchmark_results) | |
| print() | |
| print(summary) | |
| print() | |
| # Generate markdown report | |
| print("[5/6] Generating markdown report...") | |
| md_path = report_gen.generate_markdown_report(benchmark_results, transition_results) | |
| print(f" Saved to: {md_path}") | |
| print() | |
| # Generate JSON report | |
| print("[6/6] Generating JSON report...") | |
| json_path = report_gen.generate_json_report(benchmark_results, transition_results) | |
| print(f" Saved to: {json_path}") | |
| print() | |
| # Print live stream report | |
| print(stream_test.get_transition_report(transition_results)) | |
| print() | |
| # Final summary | |
| total_time = time.time() - start_time | |
| print("=" * 70) | |
| print("EVALUATION COMPLETE") | |
| print("=" * 70) | |
| print(f"Total time: {total_time:.2f}s") | |
| print(f"Overall accuracy: {benchmark_results.accuracy:.1%}") | |
| print(f"Transition accuracy: {trans_accuracy:.1%}") | |
| print(f"Avg inference time: {benchmark_results.avg_inference_time_ms:.2f}ms") | |
| print() | |
| print("Reports saved to:") | |
| print(f" - {md_path}") | |
| print(f" - {json_path}") | |
| print("=" * 70) | |
| # Return pass/fail for CI/CD | |
| passing_threshold = 0.5 # 50% minimum accuracy | |
| if benchmark_results.accuracy >= passing_threshold: | |
| print("\n✅ EVALUATION PASSED") | |
| return 0 | |
| else: | |
| print(f"\n❌ EVALUATION FAILED (accuracy below {passing_threshold:.0%})") | |
| return 1 | |
| if __name__ == "__main__": | |
| exit_code = run_full_evaluation() | |
| sys.exit(exit_code) | |