Spaces:
Sleeping
Sleeping
| import os | |
| from pathlib import Path | |
| from datetime import date, timedelta, datetime as dt | |
| from typing import List, Optional, Tuple | |
| import numpy as np | |
| import pandas as pd | |
| import gradio as gr | |
| import requests | |
| from bs4 import BeautifulSoup | |
| import json | |
| import joblib | |
| from ingest_data import load_epl_data | |
| from preprocess_data import prepare_features | |
| from inference_utils import ( | |
| clean_team, | |
| implied_from_odds, | |
| build_features_for_fixture, | |
| ) | |
| # --------- Load pipeline artifacts once --------- | |
| def _next_saturday_str(today: Optional[date] = None) -> str: | |
| if today is None: | |
| today = date.today() | |
| days_ahead = 5 - today.weekday() # 5=Saturday | |
| if days_ahead <= 0: | |
| days_ahead += 7 | |
| return (today + timedelta(days=days_ahead)).strftime("%Y-%m-%d") | |
| def _read_team_list(path: Path) -> List[str]: | |
| if not path.exists(): | |
| return [] | |
| names: List[str] = [] | |
| with open(path, "r", encoding="utf-8") as f: | |
| for line in f: | |
| name = line.strip() | |
| if not name: | |
| continue | |
| names.append(name) | |
| return names | |
| def _load_feature_meta() -> Tuple[List[str], int]: | |
| candidates = [Path("feature_columns.json"), Path("data") / "feature_columns.json"] | |
| for p in candidates: | |
| if p.exists(): | |
| with open(p, "r", encoding="utf-8") as f: | |
| meta = json.load(f) | |
| return meta.get("X_cols", []), int(meta.get("WINDOW", 7)) | |
| return [], 7 | |
| def init_pipeline(): | |
| # Data | |
| data_raw = load_epl_data(start_y1=2010, end_y1=None, verbose=False) | |
| feat_df, X_cols_generated, WINDOW_generated, base_df = prepare_features(data_raw, window=7, verbose=False) | |
| # Features meta (prefer saved training order) | |
| X_cols_saved, WINDOW_saved = _load_feature_meta() | |
| X_cols = X_cols_saved if X_cols_saved else X_cols_generated | |
| window = WINDOW_saved if X_cols_saved else WINDOW_generated | |
| # Model | |
| model = None | |
| for mp in [Path("model") / "model_xgb_isotonic.joblib", Path("model_xgb_isotonic.joblib")]: | |
| if mp.exists(): | |
| model = joblib.load(str(mp)) | |
| break | |
| if model is None: | |
| raise FileNotFoundError("Model not found at ./model/model_xgb_isotonic.joblib") | |
| # Team list (for UI) | |
| team_list = _read_team_list(Path("data") / "team name.txt") | |
| if not team_list: | |
| # fallback to unique teams from data | |
| team_list = sorted(set(base_df["home"]).union(set(base_df["away"]))) | |
| return { | |
| "feat_df": feat_df, | |
| "df": base_df, | |
| "X_cols": X_cols, | |
| "window": window, | |
| "model": model, | |
| "team_list": team_list, | |
| } | |
| PIPE = init_pipeline() | |
| # --------- Inference helpers for UI --------- | |
| def manual_predict(home_team: str, away_team: str, match_date: str, | |
| home_odds: str = "", draw_odds: str = "", away_odds: str = ""): | |
| if not home_team or not away_team or not match_date: | |
| return "Please select Home, Away and Date.", None | |
| odds_tuple: Optional[Tuple[float, float, float]] = None | |
| if home_odds and draw_odds and away_odds: | |
| try: | |
| odds_tuple = (float(home_odds), float(draw_odds), float(away_odds)) | |
| except Exception: | |
| return "Invalid odds input. Leave blank or enter numeric decimals.", None | |
| try: | |
| X_new, ctx = build_features_for_fixture( | |
| home_team, away_team, match_date, | |
| df_all=PIPE["df"], X_cols=PIPE["X_cols"], window=PIPE["window"], | |
| odds_tuple=odds_tuple, feat_df_for_medians=PIPE["feat_df"], | |
| ) | |
| proba = PIPE["model"].predict_proba(X_new)[0] | |
| labels = ["H (Home Win)", "D (Draw)", "A (Away Win)"] | |
| res = pd.DataFrame({"Outcome": labels, "Probability": [float(p) for p in proba]}) | |
| return res, ctx | |
| except Exception as e: | |
| return f"Error: {e}", None | |
| def fetch_next_week_fixtures_and_predict(api_key: Optional[str] = None): | |
| # Use football-data.org if API key provided, else return message | |
| if not api_key: | |
| return "Set FOOTBALL_DATA_API_KEY env or provide API key in the textbox to auto-fetch fixtures.", None | |
| base_url = "https://api.football-data.org/v4/competitions/PL/matches" | |
| d_from = date.today() | |
| d_to = d_from + timedelta(days=7) | |
| params = { | |
| "status": "SCHEDULED", | |
| "dateFrom": d_from.strftime("%Y-%m-%d"), | |
| "dateTo": d_to.strftime("%Y-%m-%d"), | |
| } | |
| headers = {"X-Auth-Token": api_key} | |
| r = requests.get(base_url, headers=headers, params=params, timeout=30) | |
| if r.status_code != 200: | |
| return f"API error {r.status_code}: {r.text}", None | |
| data = r.json() | |
| matches = data.get("matches", []) | |
| if not matches: | |
| return "No scheduled PL matches in the next 7 days.", None | |
| rows = [] | |
| for m in matches: | |
| home = clean_team(m.get("homeTeam", {}).get("name", "")) | |
| away = clean_team(m.get("awayTeam", {}).get("name", "")) | |
| when = m.get("utcDate", "") | |
| try: | |
| match_date = dt.fromisoformat(when.replace("Z", "+00:00")).date().strftime("%Y-%m-%d") | |
| except Exception: | |
| match_date = _next_saturday_str() | |
| try: | |
| X_new, ctx = build_features_for_fixture( | |
| home, away, match_date, | |
| df_all=PIPE["df"], X_cols=PIPE["X_cols"], window=PIPE["window"], | |
| odds_tuple=None, feat_df_for_medians=PIPE["feat_df"], | |
| ) | |
| proba = PIPE["model"].predict_proba(X_new)[0] | |
| rows.append({ | |
| "Date": match_date, | |
| "Home": home, | |
| "Away": away, | |
| "P(Home)": float(proba[0]), | |
| "P(Draw)": float(proba[1]), | |
| "P(Away)": float(proba[2]), | |
| }) | |
| except Exception as e: | |
| rows.append({ | |
| "Date": match_date, | |
| "Home": home, | |
| "Away": away, | |
| "Error": str(e), | |
| }) | |
| df_out = pd.DataFrame(rows) | |
| if not df_out.empty: | |
| df_out = df_out.sort_values(["Date", "Home"]).reset_index(drop=True) | |
| return df_out, None | |
| def _alias_team_name(name: str) -> str: | |
| """Map scraped team names to our canonical names when obvious. | |
| Add common aliases here. Fallback to cleaned name. | |
| """ | |
| aliases = { | |
| "Man City": "Manchester City", | |
| "Man Utd": "Manchester United", | |
| "Nott'm Forest": "Nottingham Forest", | |
| "Newcastle Utd": "Newcastle", | |
| "Spurs": "Tottenham", | |
| "Brighton & Hove Albion": "Brighton", | |
| "Sheff Utd": "Sheffield United", | |
| "Sheff Wed": "Sheffield Wednesday", | |
| "West Bromwich Albion": "West Brom", | |
| "West Brom": "West Brom", | |
| "Wolverhampton Wanderers": "Wolves", | |
| "Queens Park Rangers": "QPR", | |
| } | |
| n = clean_team(name) | |
| return aliases.get(n, n) | |
| def fetch_next_week_fixtures_and_predict_free(): | |
| """Scrape BBC Sport fixtures for the next 7 days (Premier League) and predict all. | |
| No API key required. BBC structure may change over time; this parser is best-effort. | |
| """ | |
| def _scrape_bbc_for_date(day: date): | |
| """Return list of (home, away) for given date from BBC.""" | |
| urls = [ | |
| f"https://www.bbc.com/sport/football/premier-league/scores-fixtures/{day:%Y-%m-%d}", | |
| f"https://www.bbc.com/sport/football/scores-fixtures/{day:%Y-%m-%d}?competition=premier-league", | |
| f"https://www.bbc.co.uk/sport/football/premier-league/scores-fixtures/{day:%Y-%m-%d}", | |
| ] | |
| pairs = [] | |
| headers = {"User-Agent": "Mozilla/5.0"} | |
| for url in urls: | |
| try: | |
| r = requests.get(url, timeout=30, headers=headers) | |
| if r.status_code != 200 or not r.text: | |
| continue | |
| soup = BeautifulSoup(r.text, "html.parser") | |
| # Several selector strategies | |
| # 1) sp-c-fixture blocks | |
| for fx in soup.select(".sp-c-fixture"): | |
| tnames = fx.select(".sp-c-fixture__team-name, .sp-c-fixture__team-name-trunc, [data-testid='team-name']") | |
| if len(tnames) >= 2: | |
| home = _alias_team_name(tnames[0].get_text(strip=True)) | |
| away = _alias_team_name(tnames[1].get_text(strip=True)) | |
| if home and away: | |
| pairs.append((home, away)) | |
| # 2) generic match-block containers | |
| for blk in soup.select('[data-component="match-block"], [data-testid="match-block"]'): | |
| teams = blk.select('[itemprop="name"], .sp-c-fixture__team-name, [data-testid="team-name"]') | |
| # If page bundles many team names, take pairs sequentially | |
| buf = [t.get_text(strip=True) for t in teams] | |
| for i in range(0, len(buf) - 1, 2): | |
| home = _alias_team_name(buf[i]) | |
| away = _alias_team_name(buf[i+1]) | |
| if home and away: | |
| pairs.append((home, away)) | |
| if pairs: | |
| break | |
| except Exception: | |
| continue | |
| # de-duplicate | |
| uniq = [] | |
| seen = set() | |
| for h, a in pairs: | |
| key = (h, a) | |
| if key not in seen: | |
| seen.add(key) | |
| uniq.append((h, a)) | |
| return uniq | |
| rows = [] | |
| today = date.today() | |
| for d in range(0, 7): | |
| day = today + timedelta(days=d) | |
| pairs = _scrape_bbc_for_date(day) | |
| for home, away in pairs: | |
| match_date = day.strftime("%Y-%m-%d") | |
| try: | |
| X_new, ctx = build_features_for_fixture( | |
| home, away, match_date, | |
| df_all=PIPE["df"], X_cols=PIPE["X_cols"], window=PIPE["window"], | |
| odds_tuple=None, feat_df_for_medians=PIPE["feat_df"], | |
| ) | |
| proba = PIPE["model"].predict_proba(X_new)[0] | |
| rows.append({ | |
| "Date": match_date, | |
| "Home": home, | |
| "Away": away, | |
| "P(Home)": float(proba[0]), | |
| "P(Draw)": float(proba[1]), | |
| "P(Away)": float(proba[2]), | |
| }) | |
| except Exception as e: | |
| rows.append({ | |
| "Date": match_date, | |
| "Home": home, | |
| "Away": away, | |
| "Error": str(e), | |
| }) | |
| if not rows: | |
| return "Could not find PL fixtures from BBC for the next 7 days.", None | |
| df_out = pd.DataFrame(rows) | |
| df_out = df_out.sort_values(["Date", "Home"]).reset_index(drop=True) | |
| return df_out, None | |
| # --------- Build Gradio UI --------- | |
| def make_app(): | |
| with gr.Blocks(title="EPL Match Prediction") as demo: | |
| gr.Markdown(""" | |
| # EPL Match Prediction | |
| - Manual mode: pick teams and a date (optionally odds) and get predicted probabilities. | |
| - Auto mode: fetch next week's Premier League fixtures (requires football-data.org API key) and predict all. | |
| """) | |
| with gr.Tab("Manual"): | |
| with gr.Row(): | |
| home_dd = gr.Dropdown(choices=PIPE["team_list"], label="Home Team", value=PIPE["team_list"][0] if PIPE["team_list"] else None) | |
| away_dd = gr.Dropdown(choices=PIPE["team_list"], label="Away Team", value=PIPE["team_list"][1] if len(PIPE["team_list"])>1 else None) | |
| date_tb = gr.Textbox(label="Match Date (YYYY-MM-DD)", value=_next_saturday_str()) | |
| with gr.Accordion("Optional: Odds (decimal)", open=False): | |
| home_od = gr.Textbox(label="Home Odds") | |
| draw_od = gr.Textbox(label="Draw Odds") | |
| away_od = gr.Textbox(label="Away Odds") | |
| btn = gr.Button("Predict") | |
| out_tbl = gr.Dataframe(label="Probabilities", interactive=False) | |
| out_json = gr.JSON(label="Context") | |
| def _on_predict(h, a, d, ho, do, ao): | |
| res, ctx = manual_predict(h, a, d, ho, do, ao) | |
| if isinstance(res, str): | |
| return pd.DataFrame({"Message":[res]}), ctx | |
| return res, ctx | |
| btn.click(_on_predict, inputs=[home_dd, away_dd, date_tb, home_od, draw_od, away_od], outputs=[out_tbl, out_json]) | |
| with gr.Tab("Next Week Fixtures"): | |
| gr.Markdown("Fetch next week's Premier League fixtures via API or scraping (no API key).") | |
| api_key_tb = gr.Textbox(label="FOOTBALL_DATA_API_KEY", value=os.getenv("FOOTBALL_DATA_API_KEY", ""), type="password") | |
| with gr.Row(): | |
| btn2 = gr.Button("Fetch via API and Predict") | |
| btn3 = gr.Button("Fetch via Scraping (No API Key)") | |
| out_tbl2 = gr.Dataframe(label="Next 7 days fixtures predictions", interactive=False) | |
| msg = gr.Markdown(visible=True) | |
| def _on_fetch(k): | |
| res, _ = fetch_next_week_fixtures_and_predict(k.strip() or None) | |
| if isinstance(res, str): | |
| return pd.DataFrame(), res | |
| return res, f"Found {len(res)} fixtures." | |
| btn2.click(_on_fetch, inputs=[api_key_tb], outputs=[out_tbl2, msg]) | |
| def _on_scrape(): | |
| res, _ = fetch_next_week_fixtures_and_predict_free() | |
| if isinstance(res, str): | |
| return pd.DataFrame(), res | |
| return res, f"Found {len(res)} fixtures (scraped)." | |
| btn3.click(_on_scrape, inputs=[], outputs=[out_tbl2, msg]) | |
| return demo | |
| def main(): | |
| app = make_app() | |
| app.launch() | |
| if __name__ == "__main__": | |
| main() | |