|
|
|
|
|
|
|
|
import pandas as pd |
|
|
import xgboost as xgb |
|
|
from sklearn.pipeline import Pipeline |
|
|
from sklearn.compose import ColumnTransformer |
|
|
from sklearn.preprocessing import OneHotEncoder |
|
|
import joblib |
|
|
import os |
|
|
import sys |
|
|
|
|
|
|
|
|
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) |
|
|
sys.path.append(ROOT_DIR) |
|
|
|
|
|
def train_performance_models(): |
|
|
""" |
|
|
Loads the performance data CSV and trains two separate models: |
|
|
one for predicting 'likes' and one for predicting 'comments'. |
|
|
""" |
|
|
print("--- Starting Performance Predictor Model Training ---") |
|
|
|
|
|
data_path = os.path.join(ROOT_DIR, 'data', 'performance_training_data.csv') |
|
|
|
|
|
try: |
|
|
df = pd.read_csv(data_path) |
|
|
print(f"Loaded {len(df)} rows from {data_path}") |
|
|
if df.empty: |
|
|
print("β οΈ CSV file is empty. Aborting model training.") |
|
|
return |
|
|
except (FileNotFoundError, pd.errors.EmptyDataError): |
|
|
print(f"π΄ ERROR: Data file not found or is empty at {data_path}") |
|
|
return |
|
|
|
|
|
|
|
|
features = ['follower_count', 'caption_length', 'campaign_niche', 'content_format'] |
|
|
X = df[features] |
|
|
|
|
|
|
|
|
y_likes = df['likes'] |
|
|
y_comments = df['comments'] |
|
|
|
|
|
|
|
|
categorical_features = ['campaign_niche', 'content_format'] |
|
|
preprocessor = ColumnTransformer( |
|
|
transformers=[('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)], |
|
|
remainder='passthrough' |
|
|
) |
|
|
|
|
|
|
|
|
print("\n--- Training Likes Predictor Model ---") |
|
|
likes_pipeline = Pipeline(steps=[ |
|
|
('preprocessor', preprocessor), |
|
|
('regressor', xgb.XGBRegressor(objective='reg:squarederror', n_estimators=100, random_state=42)) |
|
|
]) |
|
|
likes_pipeline.fit(X, y_likes) |
|
|
|
|
|
likes_model_path = os.path.join(ROOT_DIR, 'models', 'likes_predictor_v1.joblib') |
|
|
joblib.dump(likes_pipeline, likes_model_path) |
|
|
print(f"β
Likes Predictor model saved to: {likes_model_path}") |
|
|
|
|
|
|
|
|
print("\n--- Training Comments Predictor Model ---") |
|
|
comments_pipeline = Pipeline(steps=[ |
|
|
('preprocessor', preprocessor), |
|
|
('regressor', xgb.XGBRegressor(objective='reg:squarederror', n_estimators=100, learning_rate=0.05, random_state=42)) |
|
|
]) |
|
|
comments_pipeline.fit(X, y_comments) |
|
|
|
|
|
comments_model_path = os.path.join(ROOT_DIR, 'models', 'comments_predictor_v1.joblib') |
|
|
joblib.dump(comments_pipeline, comments_model_path) |
|
|
print(f"β
Comments Predictor model saved to: {comments_model_path}") |
|
|
|
|
|
print("\nπ All performance models trained and saved successfully!") |
|
|
|
|
|
if __name__ == '__main__': |
|
|
train_performance_models() |