reachify-ai-service / core /predictor.py
amitbhatt6075's picture
Complete fresh start - FINAL UPLOAD
0914e96
raw
history blame
3.63 kB
# FILE: ai-service/core/predictor.py (REPLACE EVERYTHING IN YOUR FILE WITH THIS)
import joblib
import pandas as pd
from typing import List, Dict
print(">>> Loading ai-service/core/predictor.py (Version: FINAL, COMPLETE)")
try:
# Model ko load karna
influencer_pipeline = joblib.load('models/influencer_matcher_v1.joblib')
print("--- Predictor: Influencer Matcher model loaded successfully. ---")
except FileNotFoundError as e:
print(f"--- Predictor FATAL ERROR: Model file not found: {e}. Predictions will fail. ---")
influencer_pipeline = None
# Performance predictor ko bhi yahan theek se load karte hain
try:
performance_pipeline = joblib.load('models/performance_predictor_v1.joblib')
print("--- Predictor: Performance Predictor model loaded successfully. ---")
except FileNotFoundError:
performance_pipeline = None
def rank_influencers_by_match(influencers: List[Dict], campaign_details: Dict, top_n: int = 5) -> List[Dict]:
"""
Influencers ko rank karta hai, model ko saari zaroori details dekar.
"""
print(f"--- Predictor Skill: Ranking {len(influencers)} influencers...")
if not influencers or influencer_pipeline is None:
return []
try:
# Step 1: Influencers ki list se DataFrame banayein
influencer_df = pd.DataFrame(influencers)
# === ✨ YEH AAKHRI AUR SABSE ZAROORI FIX HAI ✨ ===
# Model ko woh saari jaankari de rahe hain jo use chahiye.
# 1. Influencer ki taraf se aane wali jaankari
features = influencer_df[['category', 'bio']].copy()
# 2. Campaign ki taraf se aane wali jaankari (jo har influencer ke liye same hogi)
# Hum default values bhi de rahe hain agar backend se data na aaye
features['niche'] = campaign_details.get('category', '') # Assume campaign category maps to niche
features['country'] = campaign_details.get('location', 'USA')
features['followers'] = campaign_details.get('followers', 10000)
features['engagement_rate'] = campaign_details.get('engagement_rate', 0.03)
print(f"--- Predictor Skill: Preparing features for model: {features.columns.to_list()}")
# Step 2: Sahi features ke saath predict karein
match_scores = influencer_pipeline.predict(features)
influencer_df['match_score'] = match_scores
# Step 3: Score ke hisaab se sort karke top influencers nikalein
top_influencers_df = influencer_df.sort_values(by='match_score', ascending=False).head(top_n)
# Step 4: Jawab ko saaf-suthre format mein wapas bhejein
result_cols = ['id', 'name', 'handle', 'followers', 'category', 'bio']
# Jo columns exist nahi karte, unhein a gracefully handle karein
final_cols = [col for col in result_cols if col in top_influencers_df.columns]
results = top_influencers_df[final_cols].to_dict(orient='records')
print(f"--- Predictor Skill: Successfully ranked and returning top {len(results)} influencers.")
return results
except Exception as e:
print(f"--- Predictor Skill ERROR: Failed during prediction process. Error: {e}")
import traceback
traceback.print_exc()
return []
def predict_performance(data: dict) -> int:
"""
Campaign details ke aadhar par 'final_reach' predict karta hai.
"""
if performance_pipeline is None:
return 800000 # Fallback value agar model load na ho
df = pd.DataFrame(data, index=[0])
prediction = performance_pipeline.predict(df)
return int(prediction[0])