Spaces:

amitbhatt6075
/

reachify-ai-service

Running

App Files Files Community

reachify-ai-service / training /train_earning_optimizer.py

amitbhatt6075

Complete fresh start - FINAL UPLOAD

0914e96 16 days ago

raw

history blame

2.8 kB

	# File: ai-service/training/train_earning_optimizer.py (FINAL SIMPLIFIED VERSION)

	import pandas as pd
	import xgboost as xgb
	import joblib
	import os
	import sys
	from sklearn.preprocessing import OneHotEncoder

	ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
	sys.path.append(ROOT_DIR)

	def train_earning_optimizer():
	print("--- Starting Earning Optimizer Model Training (Simplified) ---")

	data_path = os.path.join(ROOT_DIR, 'data', 'earnings_training_data.csv')
	try:
	df = pd.read_csv(data_path)
	if df.empty:
	print("⚠️ CSV file is empty. Aborting.")
	return
	except (FileNotFoundError, pd.errors.EmptyDataError):
	print(f"🔴 ERROR: Data file not found or is empty at {data_path}")
	return

	# ... (Feature engineering code is the same)
	print("Creating 'Smart Performance Score'...")
	df['roi'] = df['payment_amount'] / df['follower_count']
	df['norm_engagement'] = 0.5 if df['engagement_rate'].nunique() == 1 else (df['engagement_rate'] - df['engagement_rate'].min()) / (df['engagement_rate'].max() - df['engagement_rate'].min())
	df['norm_roi'] = 0.5 if df['roi'].nunique() == 1 else (df['roi'] - df['roi'].min()) / (df['roi'].max() - df['roi'].min())
	df['smart_performance_score'] = 0.6 * df['norm_engagement'] + 0.4 * df['norm_roi']

	# === ✨ THE FIX STARTS HERE ✨ ===
	print("Preparing data MANUALLY without Pipeline...")

	# 1. Manually encode categorical features
	categorical_features = ['campaign_niche', 'content_format']
	encoder = OneHotEncoder(handle_unknown='ignore', sparse_output=False)
	encoded_cats = encoder.fit_transform(df[categorical_features])
	encoded_df = pd.DataFrame(encoded_cats, columns=encoder.get_feature_names_out(categorical_features))

	# 2. Combine with numerical features
	numerical_features = df[['follower_count']].reset_index(drop=True)
	X_final = pd.concat([encoded_df, numerical_features], axis=1)
	y = df['smart_performance_score']

	# Save the encoder along with the model
	joblib.dump(encoder, os.path.join(ROOT_DIR, 'models', 'earnings_encoder.joblib'))
	print("--- Encoder saved successfully! ---")

	# Train the model DIRECTLY on the prepared data
	print("Training the XGBoost model...")
	model = xgb.XGBRegressor(objective='reg:squarederror', n_estimators=100, random_state=42)
	model.fit(X_final, y)
	print("--- Model training complete! ---")

	# Save the simple model (not the pipeline)
	model_path = os.path.join(ROOT_DIR, 'models', 'earnings_model.joblib')
	joblib.dump(model, model_path)
	print(f"--- SIMPLE Model saved successfully to {model_path} ---")
	# === ✨ THE FIX ENDS HERE ✨ ===

	if __name__ == '__main__':
	train_earning_optimizer()