reachify-ai-service / training /train_matching_model.py
amitbhatt6075's picture
Complete fresh start - FINAL UPLOAD
0914e96
raw
history blame
1.35 kB
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
import joblib
import os
print("Training script started...")
# 1. Data Load Karna
df = pd.read_csv('data/sample_data.csv')
# 2. Features (X) aur Target (y) ko Alag Karna
X = df.drop('match_score', axis=1)
y = df['match_score']
# 3. Data Preprocessing Pipeline Banana
categorical_features = ['niche', 'country']
numeric_features = ['followers', 'engagement_rate']
preprocessor = ColumnTransformer(
transformers=[
('num', 'passthrough', numeric_features),
('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
])
# 4. Model Banana
model = RandomForestRegressor(n_estimators=100, random_state=42)
# 5. Full Pipeline Banana (Preprocessing + Model)
pipeline = Pipeline(steps=[('preprocessor', preprocessor),
('regressor', model)])
# 6. Model ko Train Karna
pipeline.fit(X, y)
print("Model training complete.")
# 7. Trained Model ko Save Karna
# Ensure the models directory exists
if not os.path.exists('models'):
os.makedirs('models')
model_path = 'models/influencer_matcher_v1.joblib'
joblib.dump(pipeline, model_path)
print(f"Model successfully saved to {model_path}")