Spaces:

evgueni-p
/

fbmc-chronos2

Sleeping

File size: 8,626 Bytes

3c8562f

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# FBMC Chronos-2 Zero-Shot Inference - Smoke Test\n",
    "\n",
    "**Quick validation**: 1 border × 7 days (168 hours)\n",
    "\n",
    "This notebook tests the complete inference pipeline on HuggingFace Space with GPU acceleration."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1. Environment Setup"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import time\n",
    "import os\n",
    "import polars as pl\n",
    "import torch\n",
    "from datetime import datetime, timedelta\n",
    "from datasets import load_dataset\n",
    "from chronos import ChronosPipeline\n",
    "import altair as alt\n",
    "\n",
    "# Add src to path for imports\n",
    "import sys\n",
    "sys.path.append('/home/user/app/src')  # HF Space path\n",
    "\n",
    "from forecasting.dynamic_forecast import DynamicForecast\n",
    "from forecasting.feature_availability import FeatureAvailability\n",
    "\n",
    "print(\"Environment setup complete\")\n",
    "print(f\"PyTorch version: {torch.__version__}\")\n",
    "print(f\"GPU available: {torch.cuda.is_available()}\")\n",
    "if torch.cuda.is_available():\n",
    "    print(f\"GPU device: {torch.cuda.get_device_name(0)}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2. Load Extended Dataset from HuggingFace"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(\"Loading dataset from HuggingFace...\")\n",
    "start_time = time.time()\n",
    "\n",
    "# Load dataset\n",
    "hf_token = os.getenv(\"HF_TOKEN\")\n",
    "dataset = load_dataset(\n",
    "    \"evgueni-p/fbmc-features-24month\",\n",
    "    split=\"train\",\n",
    "    token=hf_token\n",
    ")\n",
    "\n",
    "# Convert to Polars\n",
    "df = pl.from_arrow(dataset.data.table)\n",
    "\n",
    "print(f\"✓ Loaded: {df.shape}\")\n",
    "print(f\"  Date range: {df['timestamp'].min()} to {df['timestamp'].max()}\")\n",
    "print(f\"  Load time: {time.time() - start_time:.1f}s\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3. Configure Dynamic Forecast System"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Categorize features by availability\n",
    "categories = FeatureAvailability.categorize_features(df.columns)\n",
    "\n",
    "print(\"Feature categorization:\")\n",
    "print(f\"  Full-horizon D+14: {len(categories['full_horizon_d14'])} features\")\n",
    "print(f\"  Partial D+1: {len(categories['partial_d1'])} features\")\n",
    "print(f\"  Historical only: {len(categories['historical'])} features\")\n",
    "print(f\"  Total: {sum(len(v) for v in categories.values())} features\")\n",
    "\n",
    "# Identify target borders\n",
    "target_cols = [col for col in df.columns if col.startswith('target_border_')]\n",
    "borders = [col.replace('target_border_', '') for col in target_cols]\n",
    "print(f\"\\n✓ Found {len(borders)} borders\")\n",
    "print(f\"  Test border: {borders[0]}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4. Prepare Test Data with Time-Aware Extraction"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Test configuration\n",
    "test_border = borders[0]\n",
    "prediction_hours = 168  # 7 days\n",
    "context_hours = 512     # Context window\n",
    "\n",
    "# Use Sept 30 as run date (requires Oct 1-7 future covariates)\n",
    "run_date = datetime(2025, 9, 30, 23, 0)\n",
    "\n",
    "print(f\"Test configuration:\")\n",
    "print(f\"  Run date: {run_date}\")\n",
    "print(f\"  Context: {context_hours} hours (historical)\")\n",
    "print(f\"  Forecast: {prediction_hours} hours (7 days)\")\n",
    "print(f\"  Forecast range: Oct 1 00:00 to Oct 7 23:00\")\n",
    "\n",
    "# Initialize dynamic forecast\n",
    "forecaster = DynamicForecast(\n",
    "    df=df,\n",
    "    feature_categories=categories\n",
    ")\n",
    "\n",
    "# Extract data with leakage prevention\n",
    "context_data, future_data = forecaster.prepare_forecast_data(\n",
    "    run_date=run_date,\n",
    "    border=test_border\n",
    ")\n",
    "\n",
    "print(f\"\\n✓ Data extracted:\")\n",
    "print(f\"  Context: {context_data.shape}\")\n",
    "print(f\"  Future: {future_data.shape}\")\n",
    "print(f\"  Leakage check: PASSED\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 5. Load Chronos-2 Model on GPU"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(\"Loading Chronos-2 Large model...\")\n",
    "start_time = time.time()\n",
    "\n",
    "pipeline = ChronosPipeline.from_pretrained(\n",
    "    \"amazon/chronos-t5-large\",\n",
    "    device_map=\"cuda\",\n",
    "    torch_dtype=torch.bfloat16\n",
    ")\n",
    "\n",
    "print(f\"✓ Model loaded in {time.time() - start_time:.1f}s\")\n",
    "print(f\"  Device: {next(pipeline.model.parameters()).device}\")\n",
    "print(f\"  Dtype: {next(pipeline.model.parameters()).dtype}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 6. Run Zero-Shot Inference"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(\"Running zero-shot inference...\")\n",
    "start_time = time.time()\n",
    "\n",
    "# Take last 512 hours of context\n",
    "context = context_data.select([test_border]).to_numpy()[-context_hours:].flatten()\n",
    "\n",
    "# Run forecast\n",
    "forecast = pipeline.predict(\n",
    "    context=context,\n",
    "    prediction_length=prediction_hours,\n",
    "    num_samples=20\n",
    ")\n",
    "\n",
    "# Get median forecast\n",
    "forecast_median = forecast.numpy().median(axis=0)\n",
    "\n",
    "inference_time = time.time() - start_time\n",
    "print(f\"✓ Inference complete in {inference_time:.1f}s\")\n",
    "print(f\"  Forecast shape: {forecast.shape}\")\n",
    "print(f\"  Median forecast range: [{forecast_median.min():.0f}, {forecast_median.max():.0f}] MW\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 7. Visualize Results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Prepare data for visualization\n",
    "forecast_timestamps = pl.datetime_range(\n",
    "    datetime(2025, 10, 1, 0, 0),\n",
    "    datetime(2025, 10, 7, 23, 0),\n",
    "    interval='1h',\n",
    "    eager=True\n",
    ")\n",
    "\n",
    "viz_data = pl.DataFrame({\n",
    "    'timestamp': forecast_timestamps,\n",
    "    'forecast': forecast_median.tolist()\n",
    "})\n",
    "\n",
    "# Create chart\n",
    "chart = alt.Chart(viz_data.to_pandas()).mark_line().encode(\n",
    "    x=alt.X('timestamp:T', title='Date'),\n",
    "    y=alt.Y('forecast:Q', title='Flow (MW)'),\n",
    "    tooltip=['timestamp:T', alt.Tooltip('forecast:Q', format='.0f')]\n",
    ").properties(\n",
    "    width=800,\n",
    "    height=400,\n",
    "    title=f'Zero-Shot Forecast: {test_border} (Oct 1-7, 2025)'\n",
    ")\n",
    "\n",
    "chart"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 8. Summary"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(\"=\"*60)\n",
    "print(\"SMOKE TEST COMPLETE\")\n",
    "print(\"=\"*60)\n",
    "print(f\"Border: {test_border}\")\n",
    "print(f\"Forecast period: Oct 1-7, 2025 (168 hours)\")\n",
    "print(f\"Inference time: {inference_time:.1f}s\")\n",
    "print(f\"GPU: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'N/A'}\")\n",
    "print(f\"\\n✓ Zero-shot forecasting working on HuggingFace Space!\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}