import pandas as pd from sklearn.ensemble import GradientBoostingRegressor from sklearn.model_selection import train_test_split from sklearn.preprocessing import OneHotEncoder from sklearn.compose import ColumnTransformer from sklearn.pipeline import Pipeline import joblib print("--- Starting Budget Predictor Model Training ---") # 1. Data Load Karna df = pd.read_csv('data/dummy_campaigns.csv') # 2. Features aur Target ko Alag Karna # Hum `final_reach` ke basis par `budget` predict karna seekhenge X = df.drop('budget', axis=1) y = df['budget'] # 3. Preprocessing (Text data ko numbers mein badalna) categorical_features = ['category', 'location', 'platform'] preprocessor = ColumnTransformer( transformers=[('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)], remainder='passthrough' ) # 4. Model Banana model = GradientBoostingRegressor(n_estimators=100, random_state=42) # 5. Full Pipeline Banana pipeline = Pipeline(steps=[('preprocessor', preprocessor), ('regressor', model)]) # 6. Model ko Train Karna pipeline.fit(X, y) print("--- Model training complete. ---") # 7. Trained Model ko Save Karna model_path = 'models/budget_predictor_v1.joblib' joblib.dump(pipeline, model_path) print(f"--- Budget predictor model saved to {model_path} ---")