amd-leaderboard / app.py
siro1's picture
More work
41bed2b
raw
history blame
5.28 kB
import gradio as gr
import pandas as pd
from datetime import datetime
import os
from datasets import load_dataset, Dataset
# Configuration
DATASET_ID = "siro1/amd-hackathon"
HF_TOKEN = os.environ.get("HF_TOKEN") # Optional: for write access
dataset = None
def load_results():
global dataset
"""Load results from Hugging Face dataset"""
dataset = load_dataset(DATASET_ID, split="train")
# Convert dataset to dictionary format matching the expected structure
results = dataset.map(
lambda item: {
"Team": item["team"],
"Timestamp": item["timestamp"],
"TTFT (ms)": item["ttft"],
"TPOT (ms)": item["tpot"],
"ITL (ms)": item["itl"],
"E2E Latency (ms)": item["e2e"],
"Throughput (tokens/s)": item["throughput"],
"Bits per Byte": item["bits_per_byte"],
"Byte Perplexity": item["byte_perplexity"],
"Word Perplexity": item["word_perplexity"],
},
batch_size=64,
remove_columns=dataset.column_names,
)
df = results.to_pandas()
df = df.sort_values("Throughput (tokens/s)", ascending=False)
return df
def update_dataset(
team_name,
ttft,
tpot,
itl,
e2e,
throughput,
bits_per_byte,
byte_perplexity,
word_perplexity,
):
"""Insert a new row into the Hugging Face dataset"""
existing_data = dataset.to_list()
new_entry = {
"team": team_name,
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"ttft": float(ttft),
"tpot": float(tpot),
"itl": float(itl),
"e2e": float(e2e),
"throughput": float(throughput),
"bits_per_byte": float(bits_per_byte),
"byte_perplexity": float(byte_perplexity),
"word_perplexity": float(word_perplexity),
}
existing_data.append(new_entry)
updated_dataset = Dataset.from_list(existing_data)
updated_dataset.push_to_hub(DATASET_ID, token=HF_TOKEN)
return True
def api_submit_results(
team_name: str,
ttft: float,
tpot: float,
itl: float,
e2e: float,
throughput: float,
bits_per_byte: float,
byte_perplexity: float,
word_perplexity: float,
) -> str:
try:
# Update the dataset with new submission
success = update_dataset(
team_name=team_name,
ttft=ttft,
tpot=tpot,
itl=itl,
e2e=e2e,
throughput=throughput,
bits_per_byte=bits_per_byte,
byte_perplexity=byte_perplexity,
word_perplexity=word_perplexity,
)
if success:
return f"Your submission for {team_name} has been accepted πŸ€—"
else:
return f"Failed to submit results for {team_name} 😒"
except Exception as e:
return f"Failed to submit results for {team_name} 😒: {str(e)}"
# Create Gradio interface
def create_interface():
with gr.Blocks(title="AMD vLLM Benchmark Leaderboard") as demo:
gr.Markdown("# AMD vLLM Benchmark Leaderboard")
gr.Markdown(
"Track and compare performance and accuracy metrics for vLLM benchmarks"
)
with gr.Tab("Leaderboard"):
def refresh_leaderboard():
return load_results()
# Initial load
leaderboard_table = gr.DataFrame(
value=refresh_leaderboard(),
label="Benchmark Results",
interactive=False,
)
refresh_btn = gr.Button("Refresh Leaderboard")
refresh_btn.click(
fn=refresh_leaderboard,
outputs=leaderboard_table,
)
# Auto-refresh every 30 seconds
timer = gr.Timer(30) # 30 seconds interval
timer.tick(
fn=refresh_leaderboard,
outputs=leaderboard_table,
)
with gr.Column(visible=False):
team_input = gr.Textbox()
ttft_input = gr.Number()
tpot_input = gr.Number()
itl_input = gr.Number()
e2e_input = gr.Number()
throughput_input = gr.Number()
bits_input = gr.Number()
byte_perp_input = gr.Number()
word_perp_input = gr.Number()
submit_output = gr.Textbox()
submit_btn = gr.Button("Submit")
submit_btn.click(
fn=api_submit_results,
inputs=[
team_input,
ttft_input,
tpot_input,
itl_input,
e2e_input,
throughput_input,
bits_input,
byte_perp_input,
word_perp_input,
],
outputs=submit_output,
api_name="submit_results",
concurrency_limit=10,
show_progress="full",
)
return demo
# Create and launch the app
if __name__ == "__main__":
demo = create_interface()
demo.queue(max_size=100)
demo.launch(
server_name="0.0.0.0",
server_port=7860,
share=True,
max_threads=40,
)