|
|
import pandas as pd |
|
|
from sklearn.model_selection import train_test_split |
|
|
from sklearn.ensemble import GradientBoostingRegressor |
|
|
from sklearn.preprocessing import OneHotEncoder |
|
|
from sklearn.compose import ColumnTransformer |
|
|
from sklearn.pipeline import Pipeline |
|
|
import joblib |
|
|
import os |
|
|
|
|
|
print("--- Starting Performance Model Training ---") |
|
|
|
|
|
|
|
|
data_path = os.path.join(os.path.dirname(__file__), '..', 'data', 'dummy_campaigns.csv') |
|
|
df = pd.read_csv(data_path) |
|
|
print(f"Loaded {len(df)} rows from {data_path}") |
|
|
|
|
|
|
|
|
|
|
|
features = ['category', 'budget', 'location', 'platform'] |
|
|
target = 'final_reach' |
|
|
|
|
|
X = df[features] |
|
|
y = df[target] |
|
|
|
|
|
|
|
|
categorical_features = ['category', 'location', 'platform'] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
preprocessor = ColumnTransformer( |
|
|
transformers=[ |
|
|
('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features) |
|
|
], |
|
|
remainder='passthrough' |
|
|
) |
|
|
|
|
|
|
|
|
model_pipeline = Pipeline(steps=[ |
|
|
('preprocessor', preprocessor), |
|
|
('regressor', GradientBoostingRegressor(n_estimators=100, random_state=42)) |
|
|
]) |
|
|
|
|
|
|
|
|
print("Training the Gradient Boosting Regressor model...") |
|
|
model_pipeline.fit(X, y) |
|
|
print("--- Model training complete! ---") |
|
|
|
|
|
|
|
|
model_path = os.path.join(os.path.dirname(__file__), '..', 'models', 'performance_predictor_v1.joblib') |
|
|
joblib.dump(model_pipeline, model_path) |
|
|
print(f"--- Model saved successfully to {model_path} ---") |