|
|
|
|
|
|
|
|
LLM_CONFIG = { |
|
|
"primary_provider": "huggingface", |
|
|
"models": { |
|
|
"reasoning_primary": { |
|
|
"model_id": "meta-llama/Llama-3.1-8B-Instruct:cerebras", |
|
|
"task": "general_reasoning", |
|
|
"max_tokens": 10000, |
|
|
"temperature": 0.7, |
|
|
"cost_per_token": 0.000015, |
|
|
"fallback": "Qwen/Qwen2.5-7B-Instruct", |
|
|
"is_chat_model": True, |
|
|
"use_4bit_quantization": True, |
|
|
"use_8bit_quantization": False |
|
|
}, |
|
|
"embedding_specialist": { |
|
|
"model_id": "intfloat/e5-large-v2", |
|
|
"task": "embeddings", |
|
|
"vector_dimensions": 1024, |
|
|
"purpose": "semantic_similarity", |
|
|
"cost_advantage": "90%_cheaper_than_primary", |
|
|
"is_chat_model": False |
|
|
}, |
|
|
"classification_specialist": { |
|
|
"model_id": "meta-llama/Llama-3.1-8B-Instruct:cerebras", |
|
|
"task": "intent_classification", |
|
|
"max_length": 512, |
|
|
"specialization": "fast_inference", |
|
|
"latency_target": "<100ms", |
|
|
"is_chat_model": True, |
|
|
"use_4bit_quantization": True |
|
|
}, |
|
|
"safety_checker": { |
|
|
"model_id": "meta-llama/Llama-3.1-8B-Instruct:cerebras", |
|
|
"task": "content_moderation", |
|
|
"confidence_threshold": 0.85, |
|
|
"purpose": "bias_detection", |
|
|
"is_chat_model": True, |
|
|
"use_4bit_quantization": True |
|
|
} |
|
|
}, |
|
|
"routing_logic": { |
|
|
"strategy": "task_based_routing", |
|
|
"fallback_chain": ["primary", "fallback", "degraded_mode"], |
|
|
"load_balancing": "round_robin_with_health_check" |
|
|
}, |
|
|
"quantization_settings": { |
|
|
"default_4bit": True, |
|
|
"default_8bit": False, |
|
|
"bnb_4bit_compute_dtype": "float16", |
|
|
"bnb_4bit_use_double_quant": True, |
|
|
"bnb_4bit_quant_type": "nf4" |
|
|
} |
|
|
} |
|
|
|