Spaces:

abhaypratapsingh111
/

chronos2-forecasting

Sleeping

App Files Files Community

abhaypratapsingh111 commited on Oct 27

Commit

6b9e3e8

verified ·

1 Parent(s): fc813d5

Upload folder using huggingface_hub

Browse files

Files changed (6) hide show

utils/__init__.py +0 -0
utils/__pycache__/__init__.cpython-311.pyc +0 -0
utils/__pycache__/metrics.cpython-311.pyc +0 -0
utils/__pycache__/validators.cpython-311.pyc +0 -0
utils/metrics.py +208 -0
utils/validators.py +203 -0

utils/__init__.py ADDED Viewed

File without changes

utils/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (161 Bytes). View file

utils/__pycache__/metrics.cpython-311.pyc ADDED Viewed

Binary file (8.67 kB). View file

utils/__pycache__/validators.cpython-311.pyc ADDED Viewed

Binary file (7.47 kB). View file

utils/metrics.py ADDED Viewed

	@@ -0,0 +1,208 @@

+"""
+Forecast evaluation metrics
+"""
+import logging
+from typing import Dict, Any
+import numpy as np
+import pandas as pd
+logger = logging.getLogger(__name__)
+def calculate_metrics(
+    actual: pd.Series,
+    forecast: pd.Series,
+    include_percentage: bool = True
+) -> Dict[str, float]:
+    """
+    Calculate forecast accuracy metrics
+    Args:
+        actual: Actual values
+        forecast: Forecasted values
+        include_percentage: Include percentage-based metrics
+    Returns:
+        Dictionary of metrics
+    """
+    try:
+        # Ensure same length
+        min_len = min(len(actual), len(forecast))
+        actual = actual.iloc[:min_len].values
+        forecast = forecast.iloc[:min_len].values
+        # Remove NaN values
+        mask = ~(np.isnan(actual) | np.isnan(forecast))
+        actual = actual[mask]
+        forecast = forecast[mask]
+        if len(actual) == 0:
+            return {'error': 'No valid values for metric calculation'}
+        metrics = {}
+        # Mean Absolute Error
+        metrics['MAE'] = float(np.mean(np.abs(actual - forecast)))
+        # Root Mean Squared Error
+        metrics['RMSE'] = float(np.sqrt(np.mean((actual - forecast) ** 2)))
+        # Mean Error (bias)
+        metrics['ME'] = float(np.mean(forecast - actual))
+        if include_percentage:
+            # Mean Absolute Percentage Error
+            # Avoid division by zero
+            mask_nonzero = actual != 0
+            if mask_nonzero.any():
+                mape = np.mean(np.abs((actual[mask_nonzero] - forecast[mask_nonzero]) / actual[mask_nonzero])) * 100
+                metrics['MAPE'] = float(mape)
+            # Symmetric MAPE
+            denominator = (np.abs(actual) + np.abs(forecast)) / 2
+            mask_nonzero = denominator != 0
+            if mask_nonzero.any():
+                smape = np.mean(np.abs(actual[mask_nonzero] - forecast[mask_nonzero]) / denominator[mask_nonzero]) * 100
+                metrics['sMAPE'] = float(smape)
+        # R-squared
+        ss_res = np.sum((actual - forecast) ** 2)
+        ss_tot = np.sum((actual - np.mean(actual)) ** 2)
+        if ss_tot != 0:
+            metrics['R2'] = float(1 - (ss_res / ss_tot))
+        return metrics
+    except Exception as e:
+        logger.error(f"Error calculating metrics: {str(e)}", exc_info=True)
+        return {'error': str(e)}
+def calculate_coverage(
+    actual: pd.Series,
+    lower_bound: pd.Series,
+    upper_bound: pd.Series
+) -> float:
+    """
+    Calculate coverage of prediction intervals
+    Args:
+        actual: Actual values
+        lower_bound: Lower bound of prediction interval
+        upper_bound: Upper bound of prediction interval
+    Returns:
+        Coverage percentage (0-100)
+    """
+    try:
+        # Ensure same length
+        min_len = min(len(actual), len(lower_bound), len(upper_bound))
+        actual = actual.iloc[:min_len].values
+        lower_bound = lower_bound.iloc[:min_len].values
+        upper_bound = upper_bound.iloc[:min_len].values
+        # Count values within bounds
+        within_bounds = (actual >= lower_bound) & (actual <= upper_bound)
+        coverage = np.mean(within_bounds) * 100
+        return float(coverage)
+    except Exception as e:
+        logger.error(f"Error calculating coverage: {str(e)}", exc_info=True)
+        return 0.0
+def calculate_interval_width(
+    lower_bound: pd.Series,
+    upper_bound: pd.Series
+) -> Dict[str, float]:
+    """
+    Calculate statistics about prediction interval width
+    Args:
+        lower_bound: Lower bound of prediction interval
+        upper_bound: Upper bound of prediction interval
+    Returns:
+        Dictionary with width statistics
+    """
+    try:
+        widths = upper_bound - lower_bound
+        return {
+            'mean_width': float(widths.mean()),
+            'median_width': float(widths.median()),
+            'min_width': float(widths.min()),
+            'max_width': float(widths.max()),
+            'std_width': float(widths.std())
+        }
+    except Exception as e:
+        logger.error(f"Error calculating interval width: {str(e)}", exc_info=True)
+        return {}
+def format_metric(value: float, metric_name: str) -> str:
+    """
+    Format metric value for display
+    Args:
+        value: Metric value
+        metric_name: Name of the metric
+    Returns:
+        Formatted string
+    """
+    if metric_name in ['MAPE', 'sMAPE', 'R2']:
+        return f"{value:.2f}%"
+    elif metric_name in ['MAE', 'RMSE', 'ME']:
+        if abs(value) >= 1000:
+            return f"{value:,.2f}"
+        else:
+            return f"{value:.4f}"
+    else:
+        return f"{value:.4f}"
+def summarize_forecast_quality(
+    forecast_df: pd.DataFrame,
+    confidence_levels: list
+) -> Dict[str, Any]:
+    """
+    Summarize the quality of a forecast
+    Args:
+        forecast_df: DataFrame with forecast results
+        confidence_levels: List of confidence levels
+    Returns:
+        Summary dictionary
+    """
+    try:
+        summary = {
+            'horizon': len(forecast_df),
+            'forecast_range': {
+                'min': float(forecast_df['forecast'].min()),
+                'max': float(forecast_df['forecast'].max()),
+                'mean': float(forecast_df['forecast'].mean())
+            }
+        }
+        # Analyze interval widths for each confidence level
+        interval_widths = {}
+        for cl in confidence_levels:
+            lower_col = f'lower_{cl}'
+            upper_col = f'upper_{cl}'
+            if lower_col in forecast_df.columns and upper_col in forecast_df.columns:
+                width = (forecast_df[upper_col] - forecast_df[lower_col]).mean()
+                interval_widths[f'{cl}%'] = float(width)
+        summary['interval_widths'] = interval_widths
+        return summary
+    except Exception as e:
+        logger.error(f"Error summarizing forecast: {str(e)}", exc_info=True)
+        return {}

utils/validators.py ADDED Viewed

	@@ -0,0 +1,203 @@

+"""
+Input validation utilities
+"""
+import logging
+from typing import Dict, List, Optional, Any
+import pandas as pd
+from config.constants import MAX_FILE_SIZE, ALLOWED_EXTENSIONS
+logger = logging.getLogger(__name__)
+def validate_file_upload(filename: str, filesize: int) -> Dict[str, Any]:
+    """
+    Validate uploaded file
+    Args:
+        filename: Name of the uploaded file
+        filesize: Size of the file in bytes
+    Returns:
+        Validation result dictionary
+    """
+    issues = []
+    # Check file extension
+    extension = filename.split('.')[-1].lower() if '.' in filename else ''
+    if extension not in ALLOWED_EXTENSIONS:
+        issues.append(f"Invalid file type '{extension}'. Allowed: {', '.join(ALLOWED_EXTENSIONS)}")
+    # Check file size
+    if filesize > MAX_FILE_SIZE:
+        max_mb = MAX_FILE_SIZE / (1024 * 1024)
+        actual_mb = filesize / (1024 * 1024)
+        issues.append(f"File too large ({actual_mb:.1f}MB). Maximum: {max_mb:.0f}MB")
+    if filesize == 0:
+        issues.append("File is empty")
+    if issues:
+        return {'valid': False, 'issues': issues}
+    return {'valid': True}
+def validate_column_selection(
+    data: pd.DataFrame,
+    date_column: Optional[str],
+    target_column: Optional[str]
+) -> Dict[str, Any]:
+    """
+    Validate column selection
+    Args:
+        data: DataFrame to validate
+        date_column: Selected date column
+        target_column: Selected target column
+    Returns:
+        Validation result dictionary
+    """
+    issues = []
+    if date_column is None:
+        issues.append("Please select a date column")
+    elif date_column not in data.columns:
+        issues.append(f"Date column '{date_column}' not found in data")
+    if target_column is None:
+        issues.append("Please select a target column")
+    elif target_column not in data.columns:
+        issues.append(f"Target column '{target_column}' not found in data")
+    # Check if columns are the same
+    if date_column and target_column and date_column == target_column:
+        issues.append("Date and target columns must be different")
+    if issues:
+        return {'valid': False, 'issues': issues}
+    return {'valid': True}
+def validate_forecast_parameters(
+    horizon: int,
+    confidence_levels: List[int],
+    data_length: int
+) -> Dict[str, Any]:
+    """
+    Validate forecast parameters
+    Args:
+        horizon: Forecast horizon
+        confidence_levels: List of confidence levels
+        data_length: Length of the input data
+    Returns:
+        Validation result dictionary
+    """
+    issues = []
+    warnings = []
+    # Validate horizon
+    if horizon <= 0:
+        issues.append("Forecast horizon must be positive")
+    elif horizon > 365:
+        warnings.append("Very long forecast horizon (>365 days) may be unreliable")
+    # Check if sufficient data
+    if data_length < horizon * 2:
+        warnings.append(
+            f"Limited historical data ({data_length} points) for {horizon}-period forecast. "
+            "Recommend at least 2x horizon length."
+        )
+    # Validate confidence levels
+    if not confidence_levels:
+        issues.append("Please select at least one confidence level")
+    for cl in confidence_levels:
+        if cl <= 0 or cl >= 100:
+            issues.append(f"Invalid confidence level: {cl}%. Must be between 0 and 100.")
+    if issues:
+        return {'valid': False, 'issues': issues, 'warnings': warnings}
+    if warnings:
+        return {'valid': True, 'warnings': warnings}
+    return {'valid': True}
+def sanitize_input(text: str, max_length: int = 1000) -> str:
+    """
+    Sanitize text input
+    Args:
+        text: Input text
+        max_length: Maximum allowed length
+    Returns:
+        Sanitized text
+    """
+    if text is None:
+        return ""
+    # Remove control characters
+    text = ''.join(char for char in text if ord(char) >= 32 or char in '\n\r\t')
+    # Limit length
+    if len(text) > max_length:
+        text = text[:max_length]
+    return text.strip()
+def validate_data_quality(data: pd.DataFrame, target_column: str) -> Dict[str, Any]:
+    """
+    Validate data quality for forecasting
+    Args:
+        data: Input DataFrame
+        target_column: Name of the target column
+    Returns:
+        Quality validation result
+    """
+    issues = []
+    warnings = []
+    # Check for all NaN values
+    if data[target_column].isna().all():
+        issues.append("Target column contains only missing values")
+        return {'valid': False, 'issues': issues}
+    # Check for constant values
+    if data[target_column].nunique() == 1:
+        warnings.append("Target column has constant values - forecast may be trivial")
+    # Check for infinite values
+    inf_count = np.isinf(data[target_column]).sum()
+    if inf_count > 0:
+        issues.append(f"Target column contains {inf_count} infinite values")
+    # Check for very high variance
+    if data[target_column].std() > 1e6:
+        warnings.append("Target column has very high variance - consider scaling")
+    # Check for zeros
+    zero_pct = (data[target_column] == 0).sum() / len(data) * 100
+    if zero_pct > 50:
+        warnings.append(f"{zero_pct:.1f}% of values are zero")
+    if issues:
+        return {'valid': False, 'issues': issues, 'warnings': warnings}
+    if warnings:
+        return {'valid': True, 'warnings': warnings}
+    return {'valid': True}
+import numpy as np