import pandas as pd from sklearn.ensemble import RandomForestRegressor from sklearn.compose import ColumnTransformer from sklearn.pipeline import Pipeline from sklearn.preprocessing import OneHotEncoder import joblib import os print("Training script started...") # 1. Data Load Karna df = pd.read_csv('data/sample_data.csv') # 2. Features (X) aur Target (y) ko Alag Karna X = df.drop('match_score', axis=1) y = df['match_score'] # 3. Data Preprocessing Pipeline Banana categorical_features = ['niche', 'country'] numeric_features = ['followers', 'engagement_rate'] preprocessor = ColumnTransformer( transformers=[ ('num', 'passthrough', numeric_features), ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features) ]) # 4. Model Banana model = RandomForestRegressor(n_estimators=100, random_state=42) # 5. Full Pipeline Banana (Preprocessing + Model) pipeline = Pipeline(steps=[('preprocessor', preprocessor), ('regressor', model)]) # 6. Model ko Train Karna pipeline.fit(X, y) print("Model training complete.") # 7. Trained Model ko Save Karna # Ensure the models directory exists if not os.path.exists('models'): os.makedirs('models') model_path = 'models/influencer_matcher_v1.joblib' joblib.dump(pipeline, model_path) print(f"Model successfully saved to {model_path}")