Spaces:

amitbhatt6075
/

reachify-ai-service

Running

App Files Files Community

reachify-ai-service / scripts /export_revenue_data.py

amitbhatt6075

Complete fresh start - FINAL UPLOAD

0914e96 17 days ago

raw

history blame

2.49 kB

	# FILE: ai-service/scripts/export_revenue_data.py (NEW FILE)

	import pandas as pd
	from sqlalchemy import create_engine, text
	import os
	from dotenv import load_dotenv
	import sys
	from pathlib import Path

	def export_revenue_data():
	"""
	Connects to the database, fetches all 'paid' invoices, aggregates
	the revenue by month, and saves it to a time-series CSV file.
	"""
	print("--- Starting Revenue Data Export Process ---")

	# --- Setup to find the .env file in the root directory ---
	try:
	# Assumes the script is in a 'scripts' folder, two levels down from root.
	# E.g. /ai-service/scripts/ -> /ai-service/
	# If your script is elsewhere, adjust the Path().resolve().parents index.
	root_dir = Path(__file__).resolve().parents[1]
	sys.path.append(str(root_dir))
	load_dotenv(dotenv_path=root_dir / '.env')
	db_url = os.getenv("DATABASE_URL")
	if not db_url:
	raise ValueError("DATABASE_URL not found in .env file.")
	except Exception as e:
	print(f"🔴 ERROR setting up environment: {e}")
	return

	# --- SQL Query to get monthly revenue from paid invoices ---
	# It's important to have an 'updated_at' field that is correctly set when status becomes 'paid'.
	# We assume 'updated_at' is the payment date for this query.
	sql_query = """
	SELECT
	date_trunc('month', updated_at)::date AS month,
	SUM(amount) AS total_revenue
	FROM
	public.invoices
	WHERE
	status = 'paid'
	GROUP BY
	month
	ORDER BY
	month ASC;
	"""
	try:
	print("Connecting to Supabase to fetch revenue data...")
	engine = create_engine(db_url)

	with engine.connect() as connection:
	df = pd.DataFrame(connection.execute(text(sql_query)))

	print(f"✅ Fetched {len(df)} months of revenue data from the database.")

	except Exception as e:
	print(f"🔴 ERROR fetching revenue data: {e}")
	return

	if df.empty:
	print("⚠️ No 'paid' invoices found. An empty CSV will be created.")
	df = pd.DataFrame(columns=['month', 'total_revenue']) # Ensure CSV has correct headers

	# --- Save the data to the /data folder ---
	output_path = root_dir / 'data' / 'revenue_training_data.csv'
	df.to_csv(output_path, index=False)
	print(f"🎉 Success! Revenue data saved to {output_path}")

	if __name__ == '__main__':
	export_revenue_data()