# FILE: ai-service/core/predictor.py (REPLACE EVERYTHING IN YOUR FILE WITH THIS) import joblib import pandas as pd from typing import List, Dict print(">>> Loading ai-service/core/predictor.py (Version: FINAL, COMPLETE)") try: # Model ko load karna influencer_pipeline = joblib.load('models/influencer_matcher_v1.joblib') print("--- Predictor: Influencer Matcher model loaded successfully. ---") except FileNotFoundError as e: print(f"--- Predictor FATAL ERROR: Model file not found: {e}. Predictions will fail. ---") influencer_pipeline = None # Performance predictor ko bhi yahan theek se load karte hain try: performance_pipeline = joblib.load('models/performance_predictor_v1.joblib') print("--- Predictor: Performance Predictor model loaded successfully. ---") except FileNotFoundError: performance_pipeline = None def rank_influencers_by_match(influencers: List[Dict], campaign_details: Dict, top_n: int = 5) -> List[Dict]: """ Influencers ko rank karta hai, model ko saari zaroori details dekar. """ print(f"--- Predictor Skill: Ranking {len(influencers)} influencers...") if not influencers or influencer_pipeline is None: return [] try: # Step 1: Influencers ki list se DataFrame banayein influencer_df = pd.DataFrame(influencers) # === ✨ YEH AAKHRI AUR SABSE ZAROORI FIX HAI ✨ === # Model ko woh saari jaankari de rahe hain jo use chahiye. # 1. Influencer ki taraf se aane wali jaankari features = influencer_df[['category', 'bio']].copy() # 2. Campaign ki taraf se aane wali jaankari (jo har influencer ke liye same hogi) # Hum default values bhi de rahe hain agar backend se data na aaye features['niche'] = campaign_details.get('category', '') # Assume campaign category maps to niche features['country'] = campaign_details.get('location', 'USA') features['followers'] = campaign_details.get('followers', 10000) features['engagement_rate'] = campaign_details.get('engagement_rate', 0.03) print(f"--- Predictor Skill: Preparing features for model: {features.columns.to_list()}") # Step 2: Sahi features ke saath predict karein match_scores = influencer_pipeline.predict(features) influencer_df['match_score'] = match_scores # Step 3: Score ke hisaab se sort karke top influencers nikalein top_influencers_df = influencer_df.sort_values(by='match_score', ascending=False).head(top_n) # Step 4: Jawab ko saaf-suthre format mein wapas bhejein result_cols = ['id', 'name', 'handle', 'followers', 'category', 'bio'] # Jo columns exist nahi karte, unhein a gracefully handle karein final_cols = [col for col in result_cols if col in top_influencers_df.columns] results = top_influencers_df[final_cols].to_dict(orient='records') print(f"--- Predictor Skill: Successfully ranked and returning top {len(results)} influencers.") return results except Exception as e: print(f"--- Predictor Skill ERROR: Failed during prediction process. Error: {e}") import traceback traceback.print_exc() return [] def predict_performance(data: dict) -> int: """ Campaign details ke aadhar par 'final_reach' predict karta hai. """ if performance_pipeline is None: return 800000 # Fallback value agar model load na ho df = pd.DataFrame(data, index=[0]) prediction = performance_pipeline.predict(df) return int(prediction[0])