|
|
import os |
|
|
import pandas as pd |
|
|
from datetime import datetime, timedelta |
|
|
from dotenv import load_dotenv |
|
|
from supabase import create_client, Client |
|
|
from pytrends.request import TrendReq |
|
|
import time |
|
|
import random |
|
|
|
|
|
|
|
|
load_dotenv() |
|
|
SUPABASE_URL = os.getenv("SUPABASE_URL") |
|
|
SUPABASE_KEY = os.getenv("SUPABASE_SERVICE_KEY") |
|
|
if not SUPABASE_URL or not SUPABASE_KEY: |
|
|
raise ValueError("Supabase URL and Service Key must be set.") |
|
|
supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY) |
|
|
NICHES_TO_TRACK = ["fashion", "gaming", "fitness", "skincare", "finance"] |
|
|
MONTHS_TO_FETCH = 12 |
|
|
OUTPUT_FILE = os.path.join(os.path.dirname(__file__), '..', 'data', 'thunderbird_market_trends.csv') |
|
|
|
|
|
|
|
|
def get_successful_campaign_counts() -> pd.DataFrame: |
|
|
print("π Fetching successful campaign data from Supabase...") |
|
|
end_date = datetime.now() |
|
|
start_date = end_date - timedelta(days=MONTHS_TO_FETCH * 30) |
|
|
try: |
|
|
response = supabase.table('campaigns').select('id, title, description, created_at') \ |
|
|
.eq('status', 'completed') \ |
|
|
.gte('created_at', start_date.isoformat()) \ |
|
|
.lte('created_at', end_date.isoformat()) \ |
|
|
.execute() |
|
|
if not response.data: |
|
|
print("β οΈ No campaign data found in the specified date range.") |
|
|
return pd.DataFrame() |
|
|
df = pd.DataFrame(response.data) |
|
|
df['created_at'] = pd.to_datetime(df['created_at']) |
|
|
df['month'] = df['created_at'].dt.to_period('M') |
|
|
def assign_niche(row): |
|
|
text_to_search = f"{row.get('title', '')} {row.get('description', '')}".lower() |
|
|
for niche in NICHES_TO_TRACK: |
|
|
if niche in text_to_search: |
|
|
return niche |
|
|
return "general" |
|
|
df['niche'] = df.apply(assign_niche, axis=1) |
|
|
monthly_counts = df.groupby(['month', 'niche']).size().reset_index(name='successful_campaigns') |
|
|
print(f"β
Found and processed {len(df)} successful campaigns.") |
|
|
return monthly_counts |
|
|
except Exception as e: |
|
|
print(f"β Error fetching data from Supabase: {e}") |
|
|
return pd.DataFrame() |
|
|
|
|
|
|
|
|
def get_google_trends_data() -> pd.DataFrame: |
|
|
print("\nπ Fetching historical market interest from Google Trends (Robust Mode)...") |
|
|
|
|
|
|
|
|
pytrends = TrendReq(hl='en-US', tz=360, retries=5, backoff_factor=1) |
|
|
|
|
|
end_date = datetime.now() |
|
|
start_date = end_date - timedelta(days=MONTHS_TO_FETCH * 30) |
|
|
timeframe = f"{start_date.strftime('%Y-%m-%d')} {end_date.strftime('%Y-%m-%d')}" |
|
|
|
|
|
all_trends_df = pd.DataFrame() |
|
|
|
|
|
for niche in NICHES_TO_TRACK: |
|
|
print(f" - Fetching trend data for '{niche}'...") |
|
|
try: |
|
|
pytrends.build_payload([niche], cat=0, timeframe=timeframe, geo='', gprop='') |
|
|
interest_over_time_df = pytrends.interest_over_time() |
|
|
|
|
|
if not interest_over_time_df.empty and niche in interest_over_time_df: |
|
|
interest_over_time_df = interest_over_time_df.rename(columns={niche: 'trend_score'}) |
|
|
interest_over_time_df['niche'] = niche |
|
|
all_trends_df = pd.concat([all_trends_df, interest_over_time_df[['trend_score', 'niche']]]) |
|
|
else: |
|
|
print(f" - βΉοΈ No trend data returned for '{niche}'.") |
|
|
|
|
|
|
|
|
sleep_time = random.uniform(5, 12) |
|
|
print(f" - π΄ Sleeping for {sleep_time:.2f} seconds...") |
|
|
time.sleep(sleep_time) |
|
|
|
|
|
|
|
|
except Exception as e: |
|
|
|
|
|
if "response with code 429" in str(e) or "too many 429 error responses" in str(e): |
|
|
print(f" - π Hit rate limit hard for '{niche}'. Taking a long 2-minute break...") |
|
|
time.sleep(120) |
|
|
else: |
|
|
print(f" - β οΈ A non-rate-limit error occurred for '{niche}'. Error: {e}") |
|
|
continue |
|
|
|
|
|
if all_trends_df.empty: |
|
|
print("β οΈ Warning: Could not fetch any data from Google Trends. Proceeding without trend scores.") |
|
|
return pd.DataFrame() |
|
|
|
|
|
all_trends_df['month'] = all_trends_df.index.to_period('M') |
|
|
monthly_trends = all_trends_df.groupby(['month', 'niche'])['trend_score'].mean().reset_index() |
|
|
|
|
|
print(f"β
Successfully fetched and processed Google Trends data.") |
|
|
return monthly_trends |
|
|
|
|
|
|
|
|
|
|
|
def main(): |
|
|
"""Main function to run the script.""" |
|
|
print("--- Starting Project Thunderbird Data Export ---") |
|
|
|
|
|
campaign_df = get_successful_campaign_counts() |
|
|
|
|
|
if campaign_df.empty: |
|
|
print("\nβ No campaign data found. Aborting training file creation.") |
|
|
return |
|
|
|
|
|
trends_df = get_google_trends_data() |
|
|
|
|
|
|
|
|
if not trends_df.empty: |
|
|
print("\nπ Merging campaign success data with market trend data...") |
|
|
training_df = pd.merge(campaign_df, trends_df, on=['month', 'niche'], how='left') |
|
|
|
|
|
training_df['trend_score'].fillna(0, inplace=True) |
|
|
else: |
|
|
print("\nβ οΈ No trends data was fetched. Creating training file with only campaign data.") |
|
|
training_df = campaign_df |
|
|
training_df['trend_score'] = 0 |
|
|
|
|
|
|
|
|
|
|
|
training_df['month'] = training_df['month'].astype(str) |
|
|
|
|
|
|
|
|
try: |
|
|
training_df.to_csv(OUTPUT_FILE, index=False) |
|
|
print(f"\nβ
Success! Training data has been saved to:") |
|
|
print(f" {OUTPUT_FILE}") |
|
|
except Exception as e: |
|
|
print(f"\nβ Error saving training data to CSV: {e}") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |