Spaces:
Sleeping
Sleeping
| # Example result structure for vLLM benchmarks | |
| RESULT_SCHEMA = { | |
| "run_id": { | |
| "model": "model_name", | |
| "timestamp": "YYYY-MM-DD HH:MM:SS", | |
| "config": { | |
| "input_length": 128, | |
| "output_length": 128, | |
| "concurrent_requests": 16 | |
| }, | |
| "performance": { | |
| "median_ttft_ms": 0.0, # Time To First Token | |
| "median_tpot_ms": 0.0, # Time Per Output Token | |
| "median_itl_ms": 0.0, # Inter-Token Latency | |
| "median_e2el_ms": 0.0, # End-to-End Latency | |
| "total_token_throughput": 0.0 | |
| }, | |
| "accuracy": { | |
| "wikitext_perplexity": 0.0 | |
| } | |
| } | |
| } | |
| # Mock data for testing | |
| MOCK_RESULTS = { | |
| "run_2024_12_01_baseline": { | |
| "model": "amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV", | |
| "timestamp": "2024-12-01 10:30:00", | |
| "config": { | |
| "input_length": 128, | |
| "output_length": 128, | |
| "concurrent_requests": 16 | |
| }, | |
| "performance": { | |
| "median_ttft_ms": 45.23, | |
| "median_tpot_ms": 12.56, | |
| "median_itl_ms": 11.89, | |
| "median_e2el_ms": 1589.45, | |
| "total_token_throughput": 2048.67 | |
| }, | |
| "accuracy": { | |
| "wikitext_perplexity": 7.89 | |
| } | |
| }, | |
| "run_2024_12_02_optimized": { | |
| "model": "amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV", | |
| "timestamp": "2024-12-02 14:15:00", | |
| "config": { | |
| "input_length": 128, | |
| "output_length": 128, | |
| "concurrent_requests": 16 | |
| }, | |
| "performance": { | |
| "median_ttft_ms": 42.11, | |
| "median_tpot_ms": 11.23, | |
| "median_itl_ms": 10.95, | |
| "median_e2el_ms": 1456.78, | |
| "total_token_throughput": 2234.89 | |
| }, | |
| "accuracy": { | |
| "wikitext_perplexity": 7.91 | |
| } | |
| } | |
| } | |