File size: 2,491 Bytes
0914e96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# FILE: ai-service/scripts/export_revenue_data.py (NEW FILE)

import pandas as pd
from sqlalchemy import create_engine, text
import os
from dotenv import load_dotenv
import sys
from pathlib import Path

def export_revenue_data():
    """
    Connects to the database, fetches all 'paid' invoices, aggregates
    the revenue by month, and saves it to a time-series CSV file.
    """
    print("--- Starting Revenue Data Export Process ---")

    # --- Setup to find the .env file in the root directory ---
    try:
        # Assumes the script is in a 'scripts' folder, two levels down from root.
        # E.g. /ai-service/scripts/ -> /ai-service/
        # If your script is elsewhere, adjust the Path().resolve().parents index.
        root_dir = Path(__file__).resolve().parents[1]
        sys.path.append(str(root_dir))
        load_dotenv(dotenv_path=root_dir / '.env')
        db_url = os.getenv("DATABASE_URL")
        if not db_url:
            raise ValueError("DATABASE_URL not found in .env file.")
    except Exception as e:
         print(f"πŸ”΄ ERROR setting up environment: {e}")
         return

    # --- SQL Query to get monthly revenue from paid invoices ---
    # It's important to have an 'updated_at' field that is correctly set when status becomes 'paid'.
    # We assume 'updated_at' is the payment date for this query.
    sql_query = """
    SELECT
        date_trunc('month', updated_at)::date AS month,
        SUM(amount) AS total_revenue
    FROM
        public.invoices
    WHERE
        status = 'paid'
    GROUP BY
        month
    ORDER BY
        month ASC;
    """
    try:
        print("Connecting to Supabase to fetch revenue data...")
        engine = create_engine(db_url)
        
        with engine.connect() as connection:
            df = pd.DataFrame(connection.execute(text(sql_query)))

        print(f"βœ… Fetched {len(df)} months of revenue data from the database.")
    
    except Exception as e:
        print(f"πŸ”΄ ERROR fetching revenue data: {e}")
        return

    if df.empty:
        print("⚠️ No 'paid' invoices found. An empty CSV will be created.")
        df = pd.DataFrame(columns=['month', 'total_revenue']) # Ensure CSV has correct headers
    
    # --- Save the data to the /data folder ---
    output_path = root_dir / 'data' / 'revenue_training_data.csv'
    df.to_csv(output_path, index=False)
    print(f"πŸŽ‰ Success! Revenue data saved to {output_path}")

if __name__ == '__main__':
    export_revenue_data()