|
|
|
|
|
|
|
|
import pandas as pd |
|
|
import xgboost as xgb |
|
|
import joblib |
|
|
import os |
|
|
import sys |
|
|
from sklearn.preprocessing import OneHotEncoder |
|
|
|
|
|
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) |
|
|
sys.path.append(ROOT_DIR) |
|
|
|
|
|
def train_earning_optimizer(): |
|
|
print("--- Starting Earning Optimizer Model Training (Simplified) ---") |
|
|
|
|
|
data_path = os.path.join(ROOT_DIR, 'data', 'earnings_training_data.csv') |
|
|
try: |
|
|
df = pd.read_csv(data_path) |
|
|
if df.empty: |
|
|
print("⚠️ CSV file is empty. Aborting.") |
|
|
return |
|
|
except (FileNotFoundError, pd.errors.EmptyDataError): |
|
|
print(f"🔴 ERROR: Data file not found or is empty at {data_path}") |
|
|
return |
|
|
|
|
|
|
|
|
print("Creating 'Smart Performance Score'...") |
|
|
df['roi'] = df['payment_amount'] / df['follower_count'] |
|
|
df['norm_engagement'] = 0.5 if df['engagement_rate'].nunique() == 1 else (df['engagement_rate'] - df['engagement_rate'].min()) / (df['engagement_rate'].max() - df['engagement_rate'].min()) |
|
|
df['norm_roi'] = 0.5 if df['roi'].nunique() == 1 else (df['roi'] - df['roi'].min()) / (df['roi'].max() - df['roi'].min()) |
|
|
df['smart_performance_score'] = 0.6 * df['norm_engagement'] + 0.4 * df['norm_roi'] |
|
|
|
|
|
|
|
|
print("Preparing data MANUALLY without Pipeline...") |
|
|
|
|
|
|
|
|
categorical_features = ['campaign_niche', 'content_format'] |
|
|
encoder = OneHotEncoder(handle_unknown='ignore', sparse_output=False) |
|
|
encoded_cats = encoder.fit_transform(df[categorical_features]) |
|
|
encoded_df = pd.DataFrame(encoded_cats, columns=encoder.get_feature_names_out(categorical_features)) |
|
|
|
|
|
|
|
|
numerical_features = df[['follower_count']].reset_index(drop=True) |
|
|
X_final = pd.concat([encoded_df, numerical_features], axis=1) |
|
|
y = df['smart_performance_score'] |
|
|
|
|
|
|
|
|
joblib.dump(encoder, os.path.join(ROOT_DIR, 'models', 'earnings_encoder.joblib')) |
|
|
print("--- Encoder saved successfully! ---") |
|
|
|
|
|
|
|
|
print("Training the XGBoost model...") |
|
|
model = xgb.XGBRegressor(objective='reg:squarederror', n_estimators=100, random_state=42) |
|
|
model.fit(X_final, y) |
|
|
print("--- Model training complete! ---") |
|
|
|
|
|
|
|
|
model_path = os.path.join(ROOT_DIR, 'models', 'earnings_model.joblib') |
|
|
joblib.dump(model, model_path) |
|
|
print(f"--- SIMPLE Model saved successfully to {model_path} ---") |
|
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
|
train_earning_optimizer() |