reachify-ai-service / training /train_performance_model.py
amitbhatt6075's picture
Complete fresh start - FINAL UPLOAD
0914e96
raw
history blame
1.8 kB
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import joblib
import os
print("--- Starting Performance Model Training ---")
# Data load karna
data_path = os.path.join(os.path.dirname(__file__), '..', 'data', 'dummy_campaigns.csv')
df = pd.read_csv(data_path)
print(f"Loaded {len(df)} rows from {data_path}")
# Features (X) aur Target (y) define karna
# Hum in features se 'final_reach' predict karna chahte hain
features = ['category', 'budget', 'location', 'platform']
target = 'final_reach'
X = df[features]
y = df[target]
# Categorical features ko pehchanna
categorical_features = ['category', 'location', 'platform']
# Budget ek numerical feature hai, usko chhod denge
# Preprocessing pipeline banana
# OneHotEncoder categorical data ko numerical format mein badalta hai
preprocessor = ColumnTransformer(
transformers=[
('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
],
remainder='passthrough' # 'budget' column ko aise hi rehne do
)
# Poora ML Pipeline: 1. Preprocess karo, 2. Model se train karo
model_pipeline = Pipeline(steps=[
('preprocessor', preprocessor),
('regressor', GradientBoostingRegressor(n_estimators=100, random_state=42))
])
# Model ko train karna
print("Training the Gradient Boosting Regressor model...")
model_pipeline.fit(X, y)
print("--- Model training complete! ---")
# Trained model ko save karna
model_path = os.path.join(os.path.dirname(__file__), '..', 'models', 'performance_predictor_v1.joblib')
joblib.dump(model_pipeline, model_path)
print(f"--- Model saved successfully to {model_path} ---")