File size: 1,801 Bytes
0914e96 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import joblib
import os
print("--- Starting Performance Model Training ---")
# Data load karna
data_path = os.path.join(os.path.dirname(__file__), '..', 'data', 'dummy_campaigns.csv')
df = pd.read_csv(data_path)
print(f"Loaded {len(df)} rows from {data_path}")
# Features (X) aur Target (y) define karna
# Hum in features se 'final_reach' predict karna chahte hain
features = ['category', 'budget', 'location', 'platform']
target = 'final_reach'
X = df[features]
y = df[target]
# Categorical features ko pehchanna
categorical_features = ['category', 'location', 'platform']
# Budget ek numerical feature hai, usko chhod denge
# Preprocessing pipeline banana
# OneHotEncoder categorical data ko numerical format mein badalta hai
preprocessor = ColumnTransformer(
transformers=[
('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
],
remainder='passthrough' # 'budget' column ko aise hi rehne do
)
# Poora ML Pipeline: 1. Preprocess karo, 2. Model se train karo
model_pipeline = Pipeline(steps=[
('preprocessor', preprocessor),
('regressor', GradientBoostingRegressor(n_estimators=100, random_state=42))
])
# Model ko train karna
print("Training the Gradient Boosting Regressor model...")
model_pipeline.fit(X, y)
print("--- Model training complete! ---")
# Trained model ko save karna
model_path = os.path.join(os.path.dirname(__file__), '..', 'models', 'performance_predictor_v1.joblib')
joblib.dump(model_pipeline, model_path)
print(f"--- Model saved successfully to {model_path} ---") |