|
|
|
|
|
|
|
|
import pandas as pd |
|
|
from sqlalchemy import create_engine, text |
|
|
import os |
|
|
from dotenv import load_dotenv |
|
|
import sys |
|
|
from pathlib import Path |
|
|
|
|
|
def export_revenue_data(): |
|
|
""" |
|
|
Connects to the database, fetches all 'paid' invoices, aggregates |
|
|
the revenue by month, and saves it to a time-series CSV file. |
|
|
""" |
|
|
print("--- Starting Revenue Data Export Process ---") |
|
|
|
|
|
|
|
|
try: |
|
|
|
|
|
|
|
|
|
|
|
root_dir = Path(__file__).resolve().parents[1] |
|
|
sys.path.append(str(root_dir)) |
|
|
load_dotenv(dotenv_path=root_dir / '.env') |
|
|
db_url = os.getenv("DATABASE_URL") |
|
|
if not db_url: |
|
|
raise ValueError("DATABASE_URL not found in .env file.") |
|
|
except Exception as e: |
|
|
print(f"π΄ ERROR setting up environment: {e}") |
|
|
return |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sql_query = """ |
|
|
SELECT |
|
|
date_trunc('month', updated_at)::date AS month, |
|
|
SUM(amount) AS total_revenue |
|
|
FROM |
|
|
public.invoices |
|
|
WHERE |
|
|
status = 'paid' |
|
|
GROUP BY |
|
|
month |
|
|
ORDER BY |
|
|
month ASC; |
|
|
""" |
|
|
try: |
|
|
print("Connecting to Supabase to fetch revenue data...") |
|
|
engine = create_engine(db_url) |
|
|
|
|
|
with engine.connect() as connection: |
|
|
df = pd.DataFrame(connection.execute(text(sql_query))) |
|
|
|
|
|
print(f"β
Fetched {len(df)} months of revenue data from the database.") |
|
|
|
|
|
except Exception as e: |
|
|
print(f"π΄ ERROR fetching revenue data: {e}") |
|
|
return |
|
|
|
|
|
if df.empty: |
|
|
print("β οΈ No 'paid' invoices found. An empty CSV will be created.") |
|
|
df = pd.DataFrame(columns=['month', 'total_revenue']) |
|
|
|
|
|
|
|
|
output_path = root_dir / 'data' / 'revenue_training_data.csv' |
|
|
df.to_csv(output_path, index=False) |
|
|
print(f"π Success! Revenue data saved to {output_path}") |
|
|
|
|
|
if __name__ == '__main__': |
|
|
export_revenue_data() |