reachify-ai-service / training /train_performance_predictor.py
amitbhatt6075's picture
Complete fresh start - FINAL UPLOAD
0914e96
raw
history blame
2.91 kB
# File: ai-service/training/train_performance_predictor.py
import pandas as pd
import xgboost as xgb
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
import joblib
import os
import sys
# Root directory ko path mein add karein
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(ROOT_DIR)
def train_performance_models():
"""
Loads the performance data CSV and trains two separate models:
one for predicting 'likes' and one for predicting 'comments'.
"""
print("--- Starting Performance Predictor Model Training ---")
data_path = os.path.join(ROOT_DIR, 'data', 'performance_training_data.csv')
try:
df = pd.read_csv(data_path)
print(f"Loaded {len(df)} rows from {data_path}")
if df.empty:
print("⚠️ CSV file is empty. Aborting model training.")
return
except (FileNotFoundError, pd.errors.EmptyDataError):
print(f"πŸ”΄ ERROR: Data file not found or is empty at {data_path}")
return
# Features (X) hum in cheezon se likes/comments ka anuman lagayenge
features = ['follower_count', 'caption_length', 'campaign_niche', 'content_format']
X = df[features]
# Targets (y)
y_likes = df['likes']
y_comments = df['comments']
# Preprocessing pipeline (categorical features ke liye)
categorical_features = ['campaign_niche', 'content_format']
preprocessor = ColumnTransformer(
transformers=[('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)],
remainder='passthrough'
)
# ---- Model #1: Likes Predictor ----
print("\n--- Training Likes Predictor Model ---")
likes_pipeline = Pipeline(steps=[
('preprocessor', preprocessor),
('regressor', xgb.XGBRegressor(objective='reg:squarederror', n_estimators=100, random_state=42))
])
likes_pipeline.fit(X, y_likes)
likes_model_path = os.path.join(ROOT_DIR, 'models', 'likes_predictor_v1.joblib')
joblib.dump(likes_pipeline, likes_model_path)
print(f"βœ… Likes Predictor model saved to: {likes_model_path}")
# ---- Model #2: Comments Predictor ----
print("\n--- Training Comments Predictor Model ---")
comments_pipeline = Pipeline(steps=[
('preprocessor', preprocessor),
('regressor', xgb.XGBRegressor(objective='reg:squarederror', n_estimators=100, learning_rate=0.05, random_state=42))
])
comments_pipeline.fit(X, y_comments)
comments_model_path = os.path.join(ROOT_DIR, 'models', 'comments_predictor_v1.joblib')
joblib.dump(comments_pipeline, comments_model_path)
print(f"βœ… Comments Predictor model saved to: {comments_model_path}")
print("\nπŸŽ‰ All performance models trained and saved successfully!")
if __name__ == '__main__':
train_performance_models()