Spaces:

siro1
/

amd-leaderboard

Sleeping

amd-leaderboard / src /utils.py

Initial commit

48bb3eb 6 months ago

1.9 kB

	# Example result structure for vLLM benchmarks
	RESULT_SCHEMA = {
	"run_id": {
	"model": "model_name",
	"timestamp": "YYYY-MM-DD HH:MM:SS",
	"config": {
	"input_length": 128,
	"output_length": 128,
	"concurrent_requests": 16
	},
	"performance": {
	"median_ttft_ms": 0.0, # Time To First Token
	"median_tpot_ms": 0.0, # Time Per Output Token
	"median_itl_ms": 0.0, # Inter-Token Latency
	"median_e2el_ms": 0.0, # End-to-End Latency
	"total_token_throughput": 0.0
	},
	"accuracy": {
	"wikitext_perplexity": 0.0
	}
	}
	}

	# Mock data for testing
	MOCK_RESULTS = {
	"run_2024_12_01_baseline": {
	"model": "amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV",
	"timestamp": "2024-12-01 10:30:00",
	"config": {
	"input_length": 128,
	"output_length": 128,
	"concurrent_requests": 16
	},
	"performance": {
	"median_ttft_ms": 45.23,
	"median_tpot_ms": 12.56,
	"median_itl_ms": 11.89,
	"median_e2el_ms": 1589.45,
	"total_token_throughput": 2048.67
	},
	"accuracy": {
	"wikitext_perplexity": 7.89
	}
	},
	"run_2024_12_02_optimized": {
	"model": "amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV",
	"timestamp": "2024-12-02 14:15:00",
	"config": {
	"input_length": 128,
	"output_length": 128,
	"concurrent_requests": 16
	},
	"performance": {
	"median_ttft_ms": 42.11,
	"median_tpot_ms": 11.23,
	"median_itl_ms": 10.95,
	"median_e2el_ms": 1456.78,
	"total_token_throughput": 2234.89
	},
	"accuracy": {
	"wikitext_perplexity": 7.91
	}
	}
	}