Spaces:

amitbhatt6075
/

reachify-ai-service

Running

App Files Files Community

reachify-ai-service / training /train_performance_predictor.py

amitbhatt6075

Complete fresh start - FINAL UPLOAD

0914e96 16 days ago

raw

history blame

2.91 kB

	# File: ai-service/training/train_performance_predictor.py

	import pandas as pd
	import xgboost as xgb
	from sklearn.pipeline import Pipeline
	from sklearn.compose import ColumnTransformer
	from sklearn.preprocessing import OneHotEncoder
	import joblib
	import os
	import sys

	# Root directory ko path mein add karein
	ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
	sys.path.append(ROOT_DIR)

	def train_performance_models():
	"""
	Loads the performance data CSV and trains two separate models:
	one for predicting 'likes' and one for predicting 'comments'.
	"""
	print("--- Starting Performance Predictor Model Training ---")

	data_path = os.path.join(ROOT_DIR, 'data', 'performance_training_data.csv')

	try:
	df = pd.read_csv(data_path)
	print(f"Loaded {len(df)} rows from {data_path}")
	if df.empty:
	print("⚠️ CSV file is empty. Aborting model training.")
	return
	except (FileNotFoundError, pd.errors.EmptyDataError):
	print(f"🔴 ERROR: Data file not found or is empty at {data_path}")
	return

	# Features (X) hum in cheezon se likes/comments ka anuman lagayenge
	features = ['follower_count', 'caption_length', 'campaign_niche', 'content_format']
	X = df[features]

	# Targets (y)
	y_likes = df['likes']
	y_comments = df['comments']

	# Preprocessing pipeline (categorical features ke liye)
	categorical_features = ['campaign_niche', 'content_format']
	preprocessor = ColumnTransformer(
	transformers=[('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)],
	remainder='passthrough'
	)

	# ---- Model #1: Likes Predictor ----
	print("\n--- Training Likes Predictor Model ---")
	likes_pipeline = Pipeline(steps=[
	('preprocessor', preprocessor),
	('regressor', xgb.XGBRegressor(objective='reg:squarederror', n_estimators=100, random_state=42))
	])
	likes_pipeline.fit(X, y_likes)

	likes_model_path = os.path.join(ROOT_DIR, 'models', 'likes_predictor_v1.joblib')
	joblib.dump(likes_pipeline, likes_model_path)
	print(f"✅ Likes Predictor model saved to: {likes_model_path}")

	# ---- Model #2: Comments Predictor ----
	print("\n--- Training Comments Predictor Model ---")
	comments_pipeline = Pipeline(steps=[
	('preprocessor', preprocessor),
	('regressor', xgb.XGBRegressor(objective='reg:squarederror', n_estimators=100, learning_rate=0.05, random_state=42))
	])
	comments_pipeline.fit(X, y_comments)

	comments_model_path = os.path.join(ROOT_DIR, 'models', 'comments_predictor_v1.joblib')
	joblib.dump(comments_pipeline, comments_model_path)
	print(f"✅ Comments Predictor model saved to: {comments_model_path}")

	print("\n🎉 All performance models trained and saved successfully!")

	if __name__ == '__main__':
	train_performance_models()