reachify-ai-service / training /train_performance_scorer.py
amitbhatt6075's picture
Complete fresh start - FINAL UPLOAD
0914e96
raw
history blame
1.68 kB
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score
import joblib
import os
print("Starting model training...")
# ✨ RENAMED: File ka naam badal diya gaya hai taaki conflict na ho
DATA_PATH = os.path.join(os.path.dirname(__file__), '..', 'data', 'sample_performance_training_data.csv')
MODEL_PATH = os.path.join(os.path.dirname(__file__), '..', 'models', 'performance_scorer_v1.joblib')
# Model directory banayein agar exist nahi karti
os.makedirs(os.path.dirname(MODEL_PATH), exist_ok=True)
# 1. Data Load karein
try:
df = pd.read_csv(DATA_PATH)
except FileNotFoundError:
print(f"ERROR: Training data file not found at {DATA_PATH}")
print("Please ensure 'sample_performance_training_data.csv' exists in the 'ai-service/data' directory.")
exit()
# 2. Features (X) aur Target (y) define karein
features = ['avg_engagement_rate', 'on_time_submission_rate', 'avg_brand_rating', 'monthly_earnings']
target = 'performance_score'
X = df[features]
y = df[target]
# 3. Data ko training aur testing sets mein split karein
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# 4. Model ko initialize aur train karein
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
# 5. Model ki accuracy check karein (optional but good practice)
y_pred = model.predict(X_test)
accuracy = r2_score(y_test, y_pred)
print(f"Model trained successfully! R^2 Score: {accuracy:.2f}")
# 6. Trained model ko file mein save karein
joblib.dump(model, MODEL_PATH)
print(f"Model saved to {MODEL_PATH}")