import gradio as gr import pandas as pd from datetime import datetime import os from datasets import load_dataset, Dataset # Configuration DATASET_ID = "siro1/amd-hackathon" HF_TOKEN = os.environ.get("HF_TOKEN") # Optional: for write access dataset = None def load_results(): global dataset """Load results from Hugging Face dataset""" dataset = load_dataset(DATASET_ID, split="train") # Convert dataset to dictionary format matching the expected structure results = dataset.map( lambda item: { "Team": item["team"], "Timestamp": item["timestamp"], "TTFT (ms)": item["ttft"], "TPOT (ms)": item["tpot"], "ITL (ms)": item["itl"], "E2E Latency (ms)": item["e2e"], "Throughput (tokens/s)": item["throughput"], "Bits per Byte": item["bits_per_byte"], "Byte Perplexity": item["byte_perplexity"], "Word Perplexity": item["word_perplexity"], }, batch_size=64, remove_columns=dataset.column_names, ) df = results.to_pandas() df = df.sort_values("Throughput (tokens/s)", ascending=False) return df def update_dataset( team_name, ttft, tpot, itl, e2e, throughput, bits_per_byte, byte_perplexity, word_perplexity, ): """Insert a new row into the Hugging Face dataset""" existing_data = dataset.to_list() new_entry = { "team": team_name, "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "ttft": float(ttft), "tpot": float(tpot), "itl": float(itl), "e2e": float(e2e), "throughput": float(throughput), "bits_per_byte": float(bits_per_byte), "byte_perplexity": float(byte_perplexity), "word_perplexity": float(word_perplexity), } existing_data.append(new_entry) updated_dataset = Dataset.from_list(existing_data) updated_dataset.push_to_hub(DATASET_ID, token=HF_TOKEN) return True def api_submit_results( team_name: str, ttft: float, tpot: float, itl: float, e2e: float, throughput: float, bits_per_byte: float, byte_perplexity: float, word_perplexity: float, ) -> str: try: # Update the dataset with new submission success = update_dataset( team_name=team_name, ttft=ttft, tpot=tpot, itl=itl, e2e=e2e, throughput=throughput, bits_per_byte=bits_per_byte, byte_perplexity=byte_perplexity, word_perplexity=word_perplexity, ) if success: return f"Your submission for {team_name} has been accepted 🤗" else: return f"Failed to submit results for {team_name} 😢" except Exception as e: return f"Failed to submit results for {team_name} 😢: {str(e)}" # Create Gradio interface def create_interface(): with gr.Blocks(title="AMD vLLM Benchmark Leaderboard") as demo: gr.Markdown("# AMD vLLM Benchmark Leaderboard") gr.Markdown( "Track and compare performance and accuracy metrics for vLLM benchmarks" ) with gr.Tab("Leaderboard"): def refresh_leaderboard(): return load_results() # Initial load leaderboard_table = gr.DataFrame( value=refresh_leaderboard(), label="Benchmark Results", interactive=False, ) refresh_btn = gr.Button("Refresh Leaderboard") refresh_btn.click( fn=refresh_leaderboard, outputs=leaderboard_table, ) # Auto-refresh every 30 seconds timer = gr.Timer(30) # 30 seconds interval timer.tick( fn=refresh_leaderboard, outputs=leaderboard_table, ) with gr.Column(visible=False): team_input = gr.Textbox() ttft_input = gr.Number() tpot_input = gr.Number() itl_input = gr.Number() e2e_input = gr.Number() throughput_input = gr.Number() bits_input = gr.Number() byte_perp_input = gr.Number() word_perp_input = gr.Number() submit_output = gr.Textbox() submit_btn = gr.Button("Submit") submit_btn.click( fn=api_submit_results, inputs=[ team_input, ttft_input, tpot_input, itl_input, e2e_input, throughput_input, bits_input, byte_perp_input, word_perp_input, ], outputs=submit_output, api_name="submit_results", concurrency_limit=10, show_progress="full", ) return demo # Create and launch the app if __name__ == "__main__": demo = create_interface() demo.queue(max_size=100) demo.launch( server_name="0.0.0.0", server_port=7860, share=True, max_threads=40, )