# FILE: ai-service/training/train_revenue_forecaster.py (NEW FILE) import pandas as pd import joblib import os import sys # Holt's Exponential Smoothing is a powerful forecasting model from statsmodels.tsa.api import Holt from pathlib import Path def train_revenue_forecaster(): """ Loads the monthly revenue data, trains a Holt's Exponential Smoothing model on it, and saves the trained model to the /models folder. """ print("--- Starting AI Revenue Forecaster Model Training ---") try: # --- Setup to find files from the root directory --- root_dir = Path(__file__).resolve().parents[1] sys.path.append(str(root_dir)) data_path = root_dir / 'data' / 'revenue_training_data.csv' # --- Load and prepare the data --- df = pd.read_csv(data_path) print(f"Loaded {len(df)} monthly records from {data_path}") # The model needs a clean time-series index to learn properly df['month'] = pd.to_datetime(df['month']) df.set_index('month', inplace=True) # Ensure the data is sorted by date df.sort_index(inplace=True) # We are training on the 'total_revenue' column time_series = df['total_revenue'] if len(time_series) < 4: print("🔴 ERROR: Not enough historical data (at least 4 months required). Aborting training.") return except (FileNotFoundError, pd.errors.EmptyDataError): print(f"🔴 ERROR: Data file not found or is empty at {data_path}") return except Exception as e: print(f"🔴 ERROR during data loading/preparation: {e}") return # --- Train the Holt's Forecasting Model --- try: print("Training the Holt's Exponential Smoothing model...") # 'initialization_method="estimated"' helps the model find the best starting parameters # 'fit(optimized=True)' tells it to find the best possible alpha and beta values model = Holt(time_series, initialization_method="estimated").fit(optimized=True) print("--- Model training complete! ---") # --- Save the trained model --- models_dir = root_dir / 'models' models_dir.mkdir(exist_ok=True) # Ensure the directory exists model_path = models_dir / 'revenue_forecaster_v1.joblib' joblib.dump(model, model_path) print(f"🎉 Success! Revenue Forecaster model saved to: {model_path}") except Exception as e: print(f"🔴 ERROR during model training or saving: {e}") if __name__ == '__main__': train_revenue_forecaster()