reachify-ai-service / training /train_earning_optimizer.py
amitbhatt6075's picture
Complete fresh start - FINAL UPLOAD
0914e96
raw
history blame
2.8 kB
# File: ai-service/training/train_earning_optimizer.py (FINAL SIMPLIFIED VERSION)
import pandas as pd
import xgboost as xgb
import joblib
import os
import sys
from sklearn.preprocessing import OneHotEncoder
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(ROOT_DIR)
def train_earning_optimizer():
print("--- Starting Earning Optimizer Model Training (Simplified) ---")
data_path = os.path.join(ROOT_DIR, 'data', 'earnings_training_data.csv')
try:
df = pd.read_csv(data_path)
if df.empty:
print("⚠️ CSV file is empty. Aborting.")
return
except (FileNotFoundError, pd.errors.EmptyDataError):
print(f"🔴 ERROR: Data file not found or is empty at {data_path}")
return
# ... (Feature engineering code is the same)
print("Creating 'Smart Performance Score'...")
df['roi'] = df['payment_amount'] / df['follower_count']
df['norm_engagement'] = 0.5 if df['engagement_rate'].nunique() == 1 else (df['engagement_rate'] - df['engagement_rate'].min()) / (df['engagement_rate'].max() - df['engagement_rate'].min())
df['norm_roi'] = 0.5 if df['roi'].nunique() == 1 else (df['roi'] - df['roi'].min()) / (df['roi'].max() - df['roi'].min())
df['smart_performance_score'] = 0.6 * df['norm_engagement'] + 0.4 * df['norm_roi']
# === ✨ THE FIX STARTS HERE ✨ ===
print("Preparing data MANUALLY without Pipeline...")
# 1. Manually encode categorical features
categorical_features = ['campaign_niche', 'content_format']
encoder = OneHotEncoder(handle_unknown='ignore', sparse_output=False)
encoded_cats = encoder.fit_transform(df[categorical_features])
encoded_df = pd.DataFrame(encoded_cats, columns=encoder.get_feature_names_out(categorical_features))
# 2. Combine with numerical features
numerical_features = df[['follower_count']].reset_index(drop=True)
X_final = pd.concat([encoded_df, numerical_features], axis=1)
y = df['smart_performance_score']
# Save the encoder along with the model
joblib.dump(encoder, os.path.join(ROOT_DIR, 'models', 'earnings_encoder.joblib'))
print("--- Encoder saved successfully! ---")
# Train the model DIRECTLY on the prepared data
print("Training the XGBoost model...")
model = xgb.XGBRegressor(objective='reg:squarederror', n_estimators=100, random_state=42)
model.fit(X_final, y)
print("--- Model training complete! ---")
# Save the simple model (not the pipeline)
model_path = os.path.join(ROOT_DIR, 'models', 'earnings_model.joblib')
joblib.dump(model, model_path)
print(f"--- SIMPLE Model saved successfully to {model_path} ---")
# === ✨ THE FIX ENDS HERE ✨ ===
if __name__ == '__main__':
train_earning_optimizer()