|
|
import pandas as pd |
|
|
from sklearn.ensemble import GradientBoostingRegressor |
|
|
from sklearn.model_selection import train_test_split |
|
|
from sklearn.preprocessing import OneHotEncoder |
|
|
from sklearn.compose import ColumnTransformer |
|
|
from sklearn.pipeline import Pipeline |
|
|
import joblib |
|
|
|
|
|
print("--- Starting Budget Predictor Model Training ---") |
|
|
|
|
|
|
|
|
df = pd.read_csv('data/dummy_campaigns.csv') |
|
|
|
|
|
|
|
|
|
|
|
X = df.drop('budget', axis=1) |
|
|
y = df['budget'] |
|
|
|
|
|
|
|
|
categorical_features = ['category', 'location', 'platform'] |
|
|
preprocessor = ColumnTransformer( |
|
|
transformers=[('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)], |
|
|
remainder='passthrough' |
|
|
) |
|
|
|
|
|
|
|
|
model = GradientBoostingRegressor(n_estimators=100, random_state=42) |
|
|
|
|
|
|
|
|
pipeline = Pipeline(steps=[('preprocessor', preprocessor), |
|
|
('regressor', model)]) |
|
|
|
|
|
|
|
|
pipeline.fit(X, y) |
|
|
print("--- Model training complete. ---") |
|
|
|
|
|
|
|
|
model_path = 'models/budget_predictor_v1.joblib' |
|
|
joblib.dump(pipeline, model_path) |
|
|
print(f"--- Budget predictor model saved to {model_path} ---") |