# FILE: ai-service/scripts/export_revenue_data.py (NEW FILE) import pandas as pd from sqlalchemy import create_engine, text import os from dotenv import load_dotenv import sys from pathlib import Path def export_revenue_data(): """ Connects to the database, fetches all 'paid' invoices, aggregates the revenue by month, and saves it to a time-series CSV file. """ print("--- Starting Revenue Data Export Process ---") # --- Setup to find the .env file in the root directory --- try: # Assumes the script is in a 'scripts' folder, two levels down from root. # E.g. /ai-service/scripts/ -> /ai-service/ # If your script is elsewhere, adjust the Path().resolve().parents index. root_dir = Path(__file__).resolve().parents[1] sys.path.append(str(root_dir)) load_dotenv(dotenv_path=root_dir / '.env') db_url = os.getenv("DATABASE_URL") if not db_url: raise ValueError("DATABASE_URL not found in .env file.") except Exception as e: print(f"🔴 ERROR setting up environment: {e}") return # --- SQL Query to get monthly revenue from paid invoices --- # It's important to have an 'updated_at' field that is correctly set when status becomes 'paid'. # We assume 'updated_at' is the payment date for this query. sql_query = """ SELECT date_trunc('month', updated_at)::date AS month, SUM(amount) AS total_revenue FROM public.invoices WHERE status = 'paid' GROUP BY month ORDER BY month ASC; """ try: print("Connecting to Supabase to fetch revenue data...") engine = create_engine(db_url) with engine.connect() as connection: df = pd.DataFrame(connection.execute(text(sql_query))) print(f"✅ Fetched {len(df)} months of revenue data from the database.") except Exception as e: print(f"🔴 ERROR fetching revenue data: {e}") return if df.empty: print("⚠️ No 'paid' invoices found. An empty CSV will be created.") df = pd.DataFrame(columns=['month', 'total_revenue']) # Ensure CSV has correct headers # --- Save the data to the /data folder --- output_path = root_dir / 'data' / 'revenue_training_data.csv' df.to_csv(output_path, index=False) print(f"🎉 Success! Revenue data saved to {output_path}") if __name__ == '__main__': export_revenue_data()