# File: ai-service/scripts/export_performance_data.py import pandas as pd from sqlalchemy import create_engine, text import os from dotenv import load_dotenv import sys # Root directory ko path mein add karein taaki .env file mil sake ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) sys.path.append(ROOT_DIR) load_dotenv(dotenv_path=os.path.join(ROOT_DIR, '.env')) def export_performance_data(): """ Connects to the Supabase database, fetches data from approved submissions, and saves it to a CSV file for training the performance prediction model. """ print("--- Starting Performance Data Export Process ---") db_url = os.getenv("DATABASE_URL") if not db_url: print("🔴 ERROR: DATABASE_URL not found. Please check your .env file in the ai-service root.") return # Yeh SQL query hamare AI model ke liye 'khana' (training data) nikalegi. sql_query = """ SELECT cs.likes, cs.comments, cs.caption, ip.follower_count, CASE WHEN c.title ILIKE '%tech%' OR c.description ILIKE '%tech%' THEN 'Tech' WHEN c.title ILIKE '%fashion%' OR c.description ILIKE '%fashion%' THEN 'Fashion' WHEN c.title ILIKE '%food%' OR c.description ILIKE '%food%' THEN 'Food' WHEN c.title ILIKE '%gaming%' OR c.description ILIKE '%gaming%' THEN 'Gaming' ELSE 'General' END AS campaign_niche, CASE WHEN c.content_guidelines ILIKE '%reel%' THEN 'Reel' WHEN c.content_guidelines ILIKE '%story%' THEN 'Story' ELSE 'Post' END AS content_format FROM public.campaign_submissions cs JOIN public.campaigns c ON cs.campaign_id = c.id JOIN public.influencer_profiles ip ON cs.influencer_id = ip.profile_id WHERE cs.status = 'approved' -- Sirf approved submissions se seekhein AND cs.likes IS NOT NULL -- Jin par likes ka data ho AND cs.comments IS NOT NULL -- Jin par comments ka data ho AND cs.caption IS NOT NULL -- Jin par caption ho AND ip.follower_count > 0; -- Jin influencers ke followers pata ho """ try: print("Connecting to Supabase to fetch performance data...") engine = create_engine(db_url) with engine.connect() as connection: df = pd.DataFrame(connection.execute(text(sql_query))) print(f"✅ Fetched {len(df)} approved submission records from the database.") except Exception as e: print(f"🔴 ERROR fetching data: {e}") return if df.empty: print("⚠️ No valid training data found. A blank CSV will be created.") else: # Feature Engineering: Caption ki lambai (length) ko ek feature banayein df['caption_length'] = df['caption'].str.len() # Sirf zaroori columns ko CSV me save karein columns_to_save = ['likes', 'comments', 'follower_count', 'caption_length', 'campaign_niche', 'content_format'] # Agar koi column na ho (khaali df ke case mein), toh use ignore karein df_to_save = df.reindex(columns=columns_to_save).fillna(0) # Data ko /data folder mein save karein output_path = os.path.join(ROOT_DIR, 'data', 'performance_training_data.csv') df_to_save.to_csv(output_path, index=False) print(f"🎉 Success! Performance data saved to {output_path}") if __name__ == '__main__': export_performance_data()