Spaces:
Running
Running
File size: 1,884 Bytes
accf76b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 | /**
* Inference Router
* Routes inference requests to WebGPU
*/
import { getWebGPUInference, clearModelCache, getCacheInfo } from './webgpu-inference.js';
// Re-export cache utilities for UI
export { clearModelCache, getCacheInfo };
// Engine instance (lazy initialized)
let webgpuEngine = null;
/**
* Load a model
* @param {string} modelId - Model ID from config
* @param {object} options - Loading options
*/
export async function loadModel(modelId, options = {}) {
if (!webgpuEngine) {
webgpuEngine = getWebGPUInference();
}
await webgpuEngine.loadModel(modelId, options);
}
/**
* Check if a model is loaded
* @returns {boolean}
*/
export function isModelLoaded() {
return webgpuEngine && webgpuEngine.isModelLoaded();
}
/**
* Get current model ID
* @returns {string|null}
*/
export function getCurrentModelId() {
return webgpuEngine ? webgpuEngine.getCurrentModelId() : null;
}
/**
* Generate a response from messages
* @param {Array<Object>} messages - Array of message objects with role and content
* @param {object} options - Generation options
* @param {function} options.onToken - Token callback for streaming
* @returns {Promise<string>} Generated response
*/
export async function generate(messages, options = {}) {
if (!webgpuEngine) {
webgpuEngine = getWebGPUInference();
}
if (!webgpuEngine.isModelLoaded()) {
throw new Error('Model not loaded. Please load a model first.');
}
return await webgpuEngine.generate(messages, options);
}
/**
* Clear the image embedding cache (call when starting a new conversation)
*/
export function clearImageCache() {
if (webgpuEngine) {
webgpuEngine.clearImageCache();
}
}
/**
* Dispose resources
*/
export function dispose() {
if (webgpuEngine) {
webgpuEngine.dispose();
webgpuEngine = null;
}
}
|