File size: 2,491 Bytes
0914e96 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
# FILE: ai-service/scripts/export_revenue_data.py (NEW FILE)
import pandas as pd
from sqlalchemy import create_engine, text
import os
from dotenv import load_dotenv
import sys
from pathlib import Path
def export_revenue_data():
"""
Connects to the database, fetches all 'paid' invoices, aggregates
the revenue by month, and saves it to a time-series CSV file.
"""
print("--- Starting Revenue Data Export Process ---")
# --- Setup to find the .env file in the root directory ---
try:
# Assumes the script is in a 'scripts' folder, two levels down from root.
# E.g. /ai-service/scripts/ -> /ai-service/
# If your script is elsewhere, adjust the Path().resolve().parents index.
root_dir = Path(__file__).resolve().parents[1]
sys.path.append(str(root_dir))
load_dotenv(dotenv_path=root_dir / '.env')
db_url = os.getenv("DATABASE_URL")
if not db_url:
raise ValueError("DATABASE_URL not found in .env file.")
except Exception as e:
print(f"π΄ ERROR setting up environment: {e}")
return
# --- SQL Query to get monthly revenue from paid invoices ---
# It's important to have an 'updated_at' field that is correctly set when status becomes 'paid'.
# We assume 'updated_at' is the payment date for this query.
sql_query = """
SELECT
date_trunc('month', updated_at)::date AS month,
SUM(amount) AS total_revenue
FROM
public.invoices
WHERE
status = 'paid'
GROUP BY
month
ORDER BY
month ASC;
"""
try:
print("Connecting to Supabase to fetch revenue data...")
engine = create_engine(db_url)
with engine.connect() as connection:
df = pd.DataFrame(connection.execute(text(sql_query)))
print(f"β
Fetched {len(df)} months of revenue data from the database.")
except Exception as e:
print(f"π΄ ERROR fetching revenue data: {e}")
return
if df.empty:
print("β οΈ No 'paid' invoices found. An empty CSV will be created.")
df = pd.DataFrame(columns=['month', 'total_revenue']) # Ensure CSV has correct headers
# --- Save the data to the /data folder ---
output_path = root_dir / 'data' / 'revenue_training_data.csv'
df.to_csv(output_path, index=False)
print(f"π Success! Revenue data saved to {output_path}")
if __name__ == '__main__':
export_revenue_data() |