{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# FBMC Chronos-2 Zero-Shot Inference - Smoke Test\n", "\n", "**Quick validation**: 1 border × 7 days (168 hours)\n", "\n", "This notebook tests the complete inference pipeline on HuggingFace Space with GPU acceleration." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 1. Environment Setup" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import time\n", "import os\n", "import polars as pl\n", "import torch\n", "from datetime import datetime, timedelta\n", "from datasets import load_dataset\n", "from chronos import ChronosPipeline\n", "import altair as alt\n", "\n", "# Add src to path for imports\n", "import sys\n", "sys.path.append('/home/user/app/src') # HF Space path\n", "\n", "from forecasting.dynamic_forecast import DynamicForecast\n", "from forecasting.feature_availability import FeatureAvailability\n", "\n", "print(\"Environment setup complete\")\n", "print(f\"PyTorch version: {torch.__version__}\")\n", "print(f\"GPU available: {torch.cuda.is_available()}\")\n", "if torch.cuda.is_available():\n", " print(f\"GPU device: {torch.cuda.get_device_name(0)}\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 2. Load Extended Dataset from HuggingFace" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(\"Loading dataset from HuggingFace...\")\n", "start_time = time.time()\n", "\n", "# Load dataset\n", "hf_token = os.getenv(\"HF_TOKEN\")\n", "dataset = load_dataset(\n", " \"evgueni-p/fbmc-features-24month\",\n", " split=\"train\",\n", " token=hf_token\n", ")\n", "\n", "# Convert to Polars\n", "df = pl.from_arrow(dataset.data.table)\n", "\n", "print(f\"✓ Loaded: {df.shape}\")\n", "print(f\" Date range: {df['timestamp'].min()} to {df['timestamp'].max()}\")\n", "print(f\" Load time: {time.time() - start_time:.1f}s\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 3. Configure Dynamic Forecast System" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Categorize features by availability\n", "categories = FeatureAvailability.categorize_features(df.columns)\n", "\n", "print(\"Feature categorization:\")\n", "print(f\" Full-horizon D+14: {len(categories['full_horizon_d14'])} features\")\n", "print(f\" Partial D+1: {len(categories['partial_d1'])} features\")\n", "print(f\" Historical only: {len(categories['historical'])} features\")\n", "print(f\" Total: {sum(len(v) for v in categories.values())} features\")\n", "\n", "# Identify target borders\n", "target_cols = [col for col in df.columns if col.startswith('target_border_')]\n", "borders = [col.replace('target_border_', '') for col in target_cols]\n", "print(f\"\\n✓ Found {len(borders)} borders\")\n", "print(f\" Test border: {borders[0]}\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 4. Prepare Test Data with Time-Aware Extraction" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Test configuration\n", "test_border = borders[0]\n", "prediction_hours = 168 # 7 days\n", "context_hours = 512 # Context window\n", "\n", "# Use Sept 30 as run date (requires Oct 1-7 future covariates)\n", "run_date = datetime(2025, 9, 30, 23, 0)\n", "\n", "print(f\"Test configuration:\")\n", "print(f\" Run date: {run_date}\")\n", "print(f\" Context: {context_hours} hours (historical)\")\n", "print(f\" Forecast: {prediction_hours} hours (7 days)\")\n", "print(f\" Forecast range: Oct 1 00:00 to Oct 7 23:00\")\n", "\n", "# Initialize dynamic forecast\n", "forecaster = DynamicForecast(\n", " df=df,\n", " feature_categories=categories\n", ")\n", "\n", "# Extract data with leakage prevention\n", "context_data, future_data = forecaster.prepare_forecast_data(\n", " run_date=run_date,\n", " border=test_border\n", ")\n", "\n", "print(f\"\\n✓ Data extracted:\")\n", "print(f\" Context: {context_data.shape}\")\n", "print(f\" Future: {future_data.shape}\")\n", "print(f\" Leakage check: PASSED\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 5. Load Chronos-2 Model on GPU" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(\"Loading Chronos-2 Large model...\")\n", "start_time = time.time()\n", "\n", "pipeline = ChronosPipeline.from_pretrained(\n", " \"amazon/chronos-t5-large\",\n", " device_map=\"cuda\",\n", " torch_dtype=torch.bfloat16\n", ")\n", "\n", "print(f\"✓ Model loaded in {time.time() - start_time:.1f}s\")\n", "print(f\" Device: {next(pipeline.model.parameters()).device}\")\n", "print(f\" Dtype: {next(pipeline.model.parameters()).dtype}\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 6. Run Zero-Shot Inference" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(\"Running zero-shot inference...\")\n", "start_time = time.time()\n", "\n", "# Take last 512 hours of context\n", "context = context_data.select([test_border]).to_numpy()[-context_hours:].flatten()\n", "\n", "# Run forecast\n", "forecast = pipeline.predict(\n", " context=context,\n", " prediction_length=prediction_hours,\n", " num_samples=20\n", ")\n", "\n", "# Get median forecast\n", "forecast_median = forecast.numpy().median(axis=0)\n", "\n", "inference_time = time.time() - start_time\n", "print(f\"✓ Inference complete in {inference_time:.1f}s\")\n", "print(f\" Forecast shape: {forecast.shape}\")\n", "print(f\" Median forecast range: [{forecast_median.min():.0f}, {forecast_median.max():.0f}] MW\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 7. Visualize Results" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Prepare data for visualization\n", "forecast_timestamps = pl.datetime_range(\n", " datetime(2025, 10, 1, 0, 0),\n", " datetime(2025, 10, 7, 23, 0),\n", " interval='1h',\n", " eager=True\n", ")\n", "\n", "viz_data = pl.DataFrame({\n", " 'timestamp': forecast_timestamps,\n", " 'forecast': forecast_median.tolist()\n", "})\n", "\n", "# Create chart\n", "chart = alt.Chart(viz_data.to_pandas()).mark_line().encode(\n", " x=alt.X('timestamp:T', title='Date'),\n", " y=alt.Y('forecast:Q', title='Flow (MW)'),\n", " tooltip=['timestamp:T', alt.Tooltip('forecast:Q', format='.0f')]\n", ").properties(\n", " width=800,\n", " height=400,\n", " title=f'Zero-Shot Forecast: {test_border} (Oct 1-7, 2025)'\n", ")\n", "\n", "chart" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 8. Summary" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(\"=\"*60)\n", "print(\"SMOKE TEST COMPLETE\")\n", "print(\"=\"*60)\n", "print(f\"Border: {test_border}\")\n", "print(f\"Forecast period: Oct 1-7, 2025 (168 hours)\")\n", "print(f\"Inference time: {inference_time:.1f}s\")\n", "print(f\"GPU: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'N/A'}\")\n", "print(f\"\\n✓ Zero-shot forecasting working on HuggingFace Space!\")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.0" } }, "nbformat": 4, "nbformat_minor": 4 }