reachify-ai-service / scripts /export_revenue_data.py
amitbhatt6075's picture
Complete fresh start - FINAL UPLOAD
0914e96
raw
history blame
2.49 kB
# FILE: ai-service/scripts/export_revenue_data.py (NEW FILE)
import pandas as pd
from sqlalchemy import create_engine, text
import os
from dotenv import load_dotenv
import sys
from pathlib import Path
def export_revenue_data():
"""
Connects to the database, fetches all 'paid' invoices, aggregates
the revenue by month, and saves it to a time-series CSV file.
"""
print("--- Starting Revenue Data Export Process ---")
# --- Setup to find the .env file in the root directory ---
try:
# Assumes the script is in a 'scripts' folder, two levels down from root.
# E.g. /ai-service/scripts/ -> /ai-service/
# If your script is elsewhere, adjust the Path().resolve().parents index.
root_dir = Path(__file__).resolve().parents[1]
sys.path.append(str(root_dir))
load_dotenv(dotenv_path=root_dir / '.env')
db_url = os.getenv("DATABASE_URL")
if not db_url:
raise ValueError("DATABASE_URL not found in .env file.")
except Exception as e:
print(f"πŸ”΄ ERROR setting up environment: {e}")
return
# --- SQL Query to get monthly revenue from paid invoices ---
# It's important to have an 'updated_at' field that is correctly set when status becomes 'paid'.
# We assume 'updated_at' is the payment date for this query.
sql_query = """
SELECT
date_trunc('month', updated_at)::date AS month,
SUM(amount) AS total_revenue
FROM
public.invoices
WHERE
status = 'paid'
GROUP BY
month
ORDER BY
month ASC;
"""
try:
print("Connecting to Supabase to fetch revenue data...")
engine = create_engine(db_url)
with engine.connect() as connection:
df = pd.DataFrame(connection.execute(text(sql_query)))
print(f"βœ… Fetched {len(df)} months of revenue data from the database.")
except Exception as e:
print(f"πŸ”΄ ERROR fetching revenue data: {e}")
return
if df.empty:
print("⚠️ No 'paid' invoices found. An empty CSV will be created.")
df = pd.DataFrame(columns=['month', 'total_revenue']) # Ensure CSV has correct headers
# --- Save the data to the /data folder ---
output_path = root_dir / 'data' / 'revenue_training_data.csv'
df.to_csv(output_path, index=False)
print(f"πŸŽ‰ Success! Revenue data saved to {output_path}")
if __name__ == '__main__':
export_revenue_data()