# File: ai-service/scripts/export_training_data.py (FINAL VERSION) import pandas as pd from sqlalchemy import create_engine, text import os from dotenv import load_dotenv import sys # Get the root directory of the ai-service ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) sys.path.append(ROOT_DIR) load_dotenv(dotenv_path=os.path.join(ROOT_DIR, '.env')) def export_data(): """Connects to the database and saves the training data to a CSV file.""" print("--- Starting Data Export Process ---") db_url = os.getenv("DATABASE_URL") if not db_url: print("🔴 ERROR: DATABASE_URL not found.") return # SQL Query waisi hi rahegi sql_query = """ SELECT cs.engagement_rate, ip.follower_count, p.amount AS payment_amount, CASE WHEN c.title ILIKE '%tech%' OR c.description ILIKE '%tech%' THEN 'Tech' WHEN c.title ILIKE '%fashion%' OR c.description ILIKE '%fashion%' THEN 'Fashion' WHEN c.title ILIKE '%food%' OR c.description ILIKE '%food%' THEN 'Food' WHEN c.title ILIKE '%gaming%' OR c.description ILIKE '%gaming%' THEN 'Gaming' ELSE 'General' END AS campaign_niche, CASE WHEN c.content_guidelines ILIKE '%reel%' THEN 'Reel' WHEN c.content_guidelines ILIKE '%story%' THEN 'Story' ELSE 'Post' END AS content_format FROM public.campaign_submissions cs JOIN public.campaigns c ON cs.campaign_id = c.id JOIN public.payments p ON c.id = p.campaign_id AND cs.influencer_id = p.influencer_id JOIN public.influencer_profiles ip ON cs.influencer_id = ip.profile_id WHERE cs.engagement_rate IS NOT NULL AND ip.follower_count > 0 AND p.amount IS NOT NULL; """ try: print("Connecting to Supabase to fetch live data...") engine = create_engine(db_url) # === ✨ THE FIX IS HERE ✨ === # Hum ab seedhe connection ka istemal karke data nikalenge with engine.connect() as connection: # `text()` function zaroori hai SQLAlchemy ke naye versions ke liye result = connection.execute(text(sql_query)) rows = result.fetchall() # Saari rows ko ek list mein nikaalo # Agar koi data nahi mila if not rows: print("⚠️ WARNING: No training data found in the database. An empty CSV will be created.") df = pd.DataFrame() else: # Us list of rows se DataFrame banao df = pd.DataFrame(rows, columns=result.keys()) # === ✨ FIX ENDS HERE ✨ === print(f"✅ Fetched {len(df)} records from the database.") except Exception as e: print(f"🔴 ERROR fetching data: {e}") return # Data ko /data folder mein save karo output_path = os.path.join(ROOT_DIR, 'data', 'earnings_training_data.csv') df.to_csv(output_path, index=False) print(f"🎉 Success! Data saved to {output_path}") if __name__ == '__main__': export_data()