"""Collect Latest Weather Forecast (ECMWF IFS 0.25°) =================================================== Fetches the latest ECMWF IFS 0.25° weather forecast from OpenMeteo API for all 51 strategic grid points. Use Case: Run before inference to get fresh weather forecasts extending 15 days ahead. Model: ECMWF IFS 0.25° (Integrated Forecasting System) - Resolution: 0.25° (~25 km, high quality for Europe) - Forecast horizon: 15 days (360 hours) - Free tier: Enabled since ECMWF October 2025 open data release - Higher quality than GFS, especially for European weather systems Output: data/raw/weather_forecast_latest.parquet Size: ~850 KB (51 points × 7 vars × 360 hours) Runtime: ~1-2 minutes (51 API requests at 1 req/sec) This forecast extends the existing 375 weather features into future timestamps. During inference, concatenate historical weather + this forecast for continuous time series. Author: Claude Date: 2025-11-10 (Updated: 2025-11-11 - upgraded to ECMWF IFS 0.25° 15-day forecasts) """ import sys from pathlib import Path # Add src to path sys.path.append(str(Path(__file__).parent.parent)) from src.data_collection.collect_openmeteo_forecast import OpenMeteoForecastCollector # Output file OUTPUT_DIR = Path(__file__).parent.parent / 'data' / 'raw' OUTPUT_FILE = OUTPUT_DIR / 'weather_forecast_latest.parquet' print("="*80) print("LATEST WEATHER FORECAST COLLECTION (ECMWF IFS 0.25\u00b0)") print("="*80) print() print("Model: ECMWF IFS 0.25\u00b0 (Integrated Forecasting System)") print("Forecast horizon: 15 days (360 hours)") print("Temporal resolution: Hourly") print("Grid points: 51 strategic locations across FBMC") print("Variables: 7 weather parameters") print("Estimated runtime: ~1-2 minutes") print() print("Free tier: Enabled since ECMWF October 2025 open data release") print() # Initialize collector with conservative rate limiting (1 req/sec = 60/min) print("Initializing OpenMeteo forecast collector...") collector = OpenMeteoForecastCollector(requests_per_minute=60) print("[OK] Collector initialized") print() # Run collection try: forecast_df = collector.collect_all_forecasts(OUTPUT_FILE) if not forecast_df.is_empty(): print() print("="*80) print("COLLECTION SUCCESS") print("="*80) print() print(f"Output: {OUTPUT_FILE}") print(f"Shape: {forecast_df.shape[0]:,} rows x {forecast_df.shape[1]} columns") print(f"Date range: {forecast_df['timestamp'].min()} to {forecast_df['timestamp'].max()}") print(f"Grid points: {forecast_df['grid_point'].n_unique()}") print(f"Weather variables: {len([c for c in forecast_df.columns if c not in ['timestamp', 'grid_point', 'latitude', 'longitude']])}") print() # Data quality summary null_count_total = forecast_df.null_count().sum_horizontal()[0] null_pct = (null_count_total / (forecast_df.shape[0] * forecast_df.shape[1])) * 100 print(f"Data completeness: {100 - null_pct:.2f}%") if null_pct > 0: print() print("Missing data by column:") for col in forecast_df.columns: null_count = forecast_df[col].null_count() if null_count > 0: pct = (null_count / len(forecast_df)) * 100 print(f" - {col}: {null_count:,} ({pct:.2f}%)") print() print("="*80) print("NEXT STEPS") print("="*80) print() print("1. During inference, extend weather time series:") print(" weather_hist = pl.read_parquet('data/processed/features_weather_24month.parquet')") print(" weather_fcst = pl.read_parquet('data/raw/weather_forecast_latest.parquet')") print(" # Engineer forecast features (pivot to match historical structure)") print(" weather_fcst_features = engineer_forecast_features(weather_fcst)") print(" weather_full = pl.concat([weather_hist, weather_fcst_features])") print() print("2. Feed extended time series to Chronos 2:") print(" - Historical period: Actual observations") print(" - Forecast period: ECMWF IFS 0.25\u00b0 forecast (15 days)") print(" - Model sees continuous weather time series") print() print("[OK] Weather forecast collection COMPLETE!") else: print() print("[ERROR] No weather forecast data collected") print() print("Possible causes:") print(" - OpenMeteo API access issues") print(" - Network connectivity problems") print(" - ECMWF model unavailable") print() sys.exit(1) except KeyboardInterrupt: print() print() print("="*80) print("COLLECTION INTERRUPTED") print("="*80) print() print("Collection was stopped by user.") print() print("To restart: Run this script again") print() sys.exit(130) except Exception as e: print() print() print("="*80) print("COLLECTION FAILED") print("="*80) print() print(f"Error: {e}") print() import traceback traceback.print_exc() print() sys.exit(1)