Spaces:
Sleeping
Sleeping
| """OpenMeteo Weather Forecast Collection | |
| Collects weather forecasts from OpenMeteo API using ECMWF IFS 0.25° model. | |
| Used for inference to extend weather time series into the future. | |
| Model: ECMWF IFS 0.25° (Integrated Forecasting System) | |
| - Resolution: 0.25° (~25 km, high resolution) | |
| - Forecast horizon: 15 days (360 hours) | |
| - Temporal resolution: Hourly | |
| - Update frequency: Every 6 hours (00, 06, 12, 18 UTC) | |
| - Free tier: Fully accessible since ECMWF October 2025 open data release | |
| ECMWF provides higher quality forecasts than GFS, especially for Europe. | |
| The October 2025 open data initiative made ECMWF IFS freely accessible via OpenMeteo. | |
| This module fetches the LATEST 15-day forecast for all 51 grid points and saves to parquet. | |
| The forecast extends existing weather features (375) into future timestamps. | |
| Author: Claude | |
| Date: 2025-11-10 (Updated: 2025-11-11 - upgraded to ECMWF IFS 0.25° 15-day forecasts) | |
| """ | |
| import requests | |
| import polars as pl | |
| from pathlib import Path | |
| from datetime import datetime | |
| import time | |
| from typing import Dict, List | |
| from tqdm import tqdm | |
| # Same 51 grid points as historical collection | |
| GRID_POINTS = { | |
| # Germany (6 points) | |
| "DE_North_Sea": {"lat": 54.5, "lon": 7.0, "name": "Offshore North Sea"}, | |
| "DE_Hamburg": {"lat": 53.5, "lon": 10.0, "name": "Hamburg/Schleswig-Holstein"}, | |
| "DE_Berlin": {"lat": 52.5, "lon": 13.5, "name": "Berlin/Brandenburg"}, | |
| "DE_Frankfurt": {"lat": 50.1, "lon": 8.7, "name": "Frankfurt"}, | |
| "DE_Munich": {"lat": 48.1, "lon": 11.6, "name": "Munich/Bavaria"}, | |
| "DE_Baltic": {"lat": 54.5, "lon": 13.0, "name": "Offshore Baltic"}, | |
| # France (5 points) | |
| "FR_Dunkirk": {"lat": 51.0, "lon": 2.3, "name": "Dunkirk/Lille"}, | |
| "FR_Paris": {"lat": 48.9, "lon": 2.3, "name": "Paris"}, | |
| "FR_Lyon": {"lat": 45.8, "lon": 4.8, "name": "Lyon"}, | |
| "FR_Marseille": {"lat": 43.3, "lon": 5.4, "name": "Marseille"}, | |
| "FR_Strasbourg": {"lat": 48.6, "lon": 7.8, "name": "Strasbourg"}, | |
| # Netherlands (4 points) | |
| "NL_Offshore": {"lat": 53.5, "lon": 4.5, "name": "Offshore North"}, | |
| "NL_Amsterdam": {"lat": 52.4, "lon": 4.9, "name": "Amsterdam"}, | |
| "NL_Rotterdam": {"lat": 51.9, "lon": 4.5, "name": "Rotterdam"}, | |
| "NL_Groningen": {"lat": 53.2, "lon": 6.6, "name": "Groningen"}, | |
| # Austria (3 points) | |
| "AT_Kaprun": {"lat": 47.26, "lon": 12.74, "name": "Kaprun"}, | |
| "AT_St_Peter": {"lat": 48.26, "lon": 13.08, "name": "St. Peter"}, | |
| "AT_Vienna": {"lat": 48.15, "lon": 16.45, "name": "Vienna"}, | |
| # Belgium (3 points) | |
| "BE_Offshore": {"lat": 51.5, "lon": 2.8, "name": "Belgian Offshore"}, | |
| "BE_Doel": {"lat": 51.32, "lon": 4.26, "name": "Doel"}, | |
| "BE_Avelgem": {"lat": 50.78, "lon": 3.45, "name": "Avelgem"}, | |
| # Czech Republic (3 points) | |
| "CZ_Hradec": {"lat": 50.70, "lon": 13.80, "name": "Hradec-RPST"}, | |
| "CZ_Bohemia": {"lat": 50.50, "lon": 13.60, "name": "Northwest Bohemia"}, | |
| "CZ_Temelin": {"lat": 49.18, "lon": 14.37, "name": "Temelin"}, | |
| # Poland (4 points) | |
| "PL_Baltic": {"lat": 54.8, "lon": 17.5, "name": "Baltic Offshore"}, | |
| "PL_SHVDC": {"lat": 54.5, "lon": 17.0, "name": "SwePol Link"}, | |
| "PL_Belchatow": {"lat": 51.27, "lon": 19.32, "name": "Belchatow"}, | |
| "PL_Mikulowa": {"lat": 51.5, "lon": 15.2, "name": "Mikulowa PST"}, | |
| # Hungary (3 points) | |
| "HU_Paks": {"lat": 46.57, "lon": 18.86, "name": "Paks Nuclear"}, | |
| "HU_Bekescsaba": {"lat": 46.68, "lon": 21.09, "name": "Bekescsaba"}, | |
| "HU_Gyor": {"lat": 47.68, "lon": 17.63, "name": "Gyor"}, | |
| # Romania (3 points) | |
| "RO_Fantanele": {"lat": 44.59, "lon": 28.57, "name": "Fantanele-Cogealac"}, | |
| "RO_Iron_Gates": {"lat": 44.67, "lon": 22.53, "name": "Iron Gates"}, | |
| "RO_Cernavoda": {"lat": 44.32, "lon": 28.03, "name": "Cernavoda"}, | |
| # Slovakia (3 points) | |
| "SK_Bohunice": {"lat": 48.49, "lon": 17.68, "name": "Bohunice/Mochovce"}, | |
| "SK_Gabcikovo": {"lat": 47.88, "lon": 17.54, "name": "Gabcikovo"}, | |
| "SK_Rimavska": {"lat": 48.38, "lon": 20.00, "name": "Rimavska Sobota"}, | |
| # Slovenia (2 points) | |
| "SI_Krsko": {"lat": 45.94, "lon": 15.52, "name": "Krsko Nuclear"}, | |
| "SI_Divaca": {"lat": 45.68, "lon": 13.97, "name": "Divaca"}, | |
| # Croatia (3 points) | |
| "HR_Ernestinovo": {"lat": 45.47, "lon": 18.67, "name": "Ernestinovo"}, | |
| "HR_Zerjavinec": {"lat": 46.30, "lon": 16.20, "name": "Zerjavinec"}, | |
| "HR_Melina": {"lat": 45.43, "lon": 14.17, "name": "Melina"}, | |
| # Additional strategic points (9) | |
| "DE_Ruhr": {"lat": 51.5, "lon": 7.2, "name": "Ruhr Valley"}, | |
| "FR_Brittany": {"lat": 48.0, "lon": -3.0, "name": "Brittany"}, | |
| "NL_IJmuiden": {"lat": 52.5, "lon": 4.6, "name": "IJmuiden"}, | |
| "PL_Krajnik": {"lat": 52.85, "lon": 14.37, "name": "Krajnik PST"}, | |
| "CZ_Kletne": {"lat": 50.80, "lon": 14.50, "name": "Kletne PST"}, | |
| "AT_Salzburg": {"lat": 47.80, "lon": 13.04, "name": "Salzburg"}, | |
| "SK_Velke": {"lat": 48.85, "lon": 21.93, "name": "Velke Kapusany"}, | |
| "HU_Sandorfalva": {"lat": 46.3, "lon": 20.2, "name": "Sandorfalva"}, | |
| "RO_Isaccea": {"lat": 45.27, "lon": 28.45, "name": "Isaccea"} | |
| } | |
| class OpenMeteoForecastCollector: | |
| """Collects ECMWF IFS 0.25° weather forecasts from OpenMeteo API.""" | |
| def __init__(self, requests_per_minute: int = 60): | |
| """Initialize forecast collector. | |
| Args: | |
| requests_per_minute: Rate limit (default 60 = 1 req/sec, safe for free tier) | |
| """ | |
| self.api_url = "https://api.open-meteo.com/v1/ecmwf" # ECMWF-specific endpoint | |
| self.requests_per_minute = requests_per_minute | |
| self.delay_between_requests = 60 / requests_per_minute | |
| def fetch_forecast_for_location( | |
| self, | |
| location_id: str, | |
| lat: float, | |
| lon: float | |
| ) -> pl.DataFrame: | |
| """Fetch ECMWF IFS 0.25° forecast for a single location. | |
| Args: | |
| location_id: Grid point identifier | |
| lat: Latitude | |
| lon: Longitude | |
| Returns: | |
| DataFrame with hourly forecasts for 15 days (360 hours) | |
| """ | |
| # ECMWF API parameters (15-day horizon) | |
| # ECMWF IFS 0.25° became freely accessible in October 2025 via OpenMeteo | |
| params = { | |
| 'latitude': lat, | |
| 'longitude': lon, | |
| 'hourly': [ | |
| 'temperature_2m', | |
| 'windspeed_10m', | |
| 'windspeed_100m', | |
| 'winddirection_100m', | |
| 'shortwave_radiation', | |
| 'cloudcover', | |
| 'surface_pressure' | |
| ], | |
| 'forecast_days': 15, # 15-day horizon (360 hours) | |
| 'timezone': 'UTC' | |
| } | |
| try: | |
| response = requests.get(self.api_url, params=params, timeout=30) | |
| response.raise_for_status() | |
| data = response.json() | |
| # Parse response | |
| hourly = data.get('hourly', {}) | |
| timestamps = hourly.get('time', []) | |
| if not timestamps: | |
| print(f"[WARNING] No forecast data for {location_id}") | |
| return pl.DataFrame() | |
| # Build DataFrame | |
| forecast_data = { | |
| 'timestamp': pl.Series(timestamps).str.to_datetime(), | |
| 'grid_point': location_id, | |
| 'latitude': lat, | |
| 'longitude': lon, | |
| 'temperature_2m': hourly.get('temperature_2m', [None] * len(timestamps)), | |
| 'windspeed_10m': hourly.get('windspeed_10m', [None] * len(timestamps)), | |
| 'windspeed_100m': hourly.get('windspeed_100m', [None] * len(timestamps)), | |
| 'winddirection_100m': hourly.get('winddirection_100m', [None] * len(timestamps)), | |
| 'shortwave_radiation': hourly.get('shortwave_radiation', [None] * len(timestamps)), | |
| 'cloudcover': hourly.get('cloudcover', [None] * len(timestamps)), | |
| 'surface_pressure': hourly.get('surface_pressure', [None] * len(timestamps)) | |
| } | |
| return pl.DataFrame(forecast_data) | |
| except requests.exceptions.RequestException as e: | |
| print(f"[ERROR] Failed to fetch forecast for {location_id}: {str(e)}") | |
| return pl.DataFrame() | |
| def collect_all_forecasts(self, output_path: Path) -> pl.DataFrame: | |
| """Collect forecasts for all 51 grid points. | |
| Args: | |
| output_path: Where to save combined forecast parquet | |
| Returns: | |
| Combined DataFrame with forecasts for all locations | |
| """ | |
| print(f"Collecting ECMWF HRES forecasts for {len(GRID_POINTS)} locations...") | |
| print(f"Rate limit: {self.requests_per_minute} requests/minute") | |
| print() | |
| all_forecasts = [] | |
| for i, (location_id, coords) in enumerate(tqdm(GRID_POINTS.items(), desc="Fetching forecasts"), 1): | |
| # Fetch forecast | |
| forecast_df = self.fetch_forecast_for_location( | |
| location_id, | |
| coords['lat'], | |
| coords['lon'] | |
| ) | |
| if not forecast_df.is_empty(): | |
| all_forecasts.append(forecast_df) | |
| print(f" [{i}/{len(GRID_POINTS)}] {location_id}: {len(forecast_df)} forecast hours") | |
| else: | |
| print(f" [{i}/{len(GRID_POINTS)}] {location_id}: [FAILED]") | |
| # Rate limiting | |
| if i < len(GRID_POINTS): | |
| time.sleep(self.delay_between_requests) | |
| # Combine all forecasts | |
| if all_forecasts: | |
| combined = pl.concat(all_forecasts) | |
| combined = combined.sort(['timestamp', 'grid_point']) | |
| # Save to parquet | |
| output_path.parent.mkdir(parents=True, exist_ok=True) | |
| combined.write_parquet(output_path) | |
| print() | |
| print("[SUCCESS] Forecast collection complete") | |
| print(f"Total forecast hours: {len(combined):,}") | |
| print(f"Grid points: {combined['grid_point'].n_unique()}") | |
| print(f"Date range: {combined['timestamp'].min()} to {combined['timestamp'].max()}") | |
| print(f"Saved to: {output_path}") | |
| return combined | |
| else: | |
| print() | |
| print("[ERROR] No forecasts collected") | |
| return pl.DataFrame() | |
| def main(): | |
| """Main execution for testing.""" | |
| # Paths | |
| base_dir = Path.cwd() | |
| raw_dir = base_dir / 'data' / 'raw' | |
| output_path = raw_dir / 'weather_forecast_latest.parquet' | |
| print("="*80) | |
| print("ECMWF IFS 0.25° WEATHER FORECAST COLLECTION") | |
| print("="*80) | |
| print() | |
| print("Model: ECMWF IFS 0.25° (Integrated Forecasting System)") | |
| print("Forecast horizon: 15 days (360 hours)") | |
| print("Temporal resolution: Hourly") | |
| print("Grid points: 51 strategic locations") | |
| print("Free tier: Enabled since ECMWF October 2025 open data release") | |
| print() | |
| # Initialize collector | |
| collector = OpenMeteoForecastCollector(requests_per_minute=60) | |
| # Collect forecasts | |
| forecast_df = collector.collect_all_forecasts(output_path) | |
| if not forecast_df.is_empty(): | |
| print() | |
| print("="*80) | |
| print("FORECAST DATA SUMMARY") | |
| print("="*80) | |
| print() | |
| print(f"Shape: {forecast_df.shape}") | |
| print() | |
| print("Sample (first 5 rows):") | |
| print(forecast_df.head(5)) | |
| print() | |
| # Completeness check | |
| null_count_total = forecast_df.null_count().sum_horizontal()[0] | |
| completeness = (1 - null_count_total / (forecast_df.shape[0] * forecast_df.shape[1])) * 100 | |
| print(f"Data completeness: {completeness:.2f}%") | |
| print() | |
| print("[OK] Weather forecast collection complete!") | |
| else: | |
| print("[ERROR] Forecast collection failed") | |
| if __name__ == '__main__': | |
| main() | |