Spaces:
Running
Running
Update index.js
Browse files
index.js
CHANGED
|
@@ -28,35 +28,49 @@ let animationId = null;
|
|
| 28 |
// Initialize the ATOM model
|
| 29 |
async function initModel() {
|
| 30 |
try {
|
| 31 |
-
status.textContent = 'Loading ATOM model... This may take a minute.';
|
| 32 |
status.className = 'loading';
|
| 33 |
|
| 34 |
const device = useWebGPUCheckbox.checked ? 'webgpu' : 'wasm';
|
| 35 |
|
| 36 |
// Load your custom ATOM model
|
|
|
|
|
|
|
| 37 |
transcriber = await pipeline(
|
| 38 |
'automatic-speech-recognition',
|
| 39 |
'Chillarmo/ATOM',
|
| 40 |
{
|
| 41 |
device: device,
|
|
|
|
|
|
|
| 42 |
progress_callback: (progress) => {
|
| 43 |
if (progress.status === 'downloading') {
|
| 44 |
const percent = Math.round((progress.loaded / progress.total) * 100);
|
| 45 |
status.textContent = `Downloading ${progress.file}: ${percent}%`;
|
| 46 |
} else if (progress.status === 'loading') {
|
| 47 |
status.textContent = `Loading ${progress.file}...`;
|
|
|
|
|
|
|
|
|
|
| 48 |
}
|
| 49 |
}
|
| 50 |
}
|
| 51 |
);
|
| 52 |
|
| 53 |
-
|
|
|
|
| 54 |
status.className = 'ready';
|
| 55 |
startBtn.disabled = false;
|
| 56 |
} catch (error) {
|
| 57 |
console.error('Model loading error:', error);
|
| 58 |
status.textContent = `Error loading model: ${error.message}`;
|
| 59 |
status.className = 'error';
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
}
|
| 61 |
}
|
| 62 |
|
|
@@ -219,6 +233,8 @@ async function processAudioChunk(chunks) {
|
|
| 219 |
const audioData = audioBuffer.getChannelData(0);
|
| 220 |
|
| 221 |
// Transcribe with ATOM model
|
|
|
|
|
|
|
| 222 |
const result = await transcriber(audioData, {
|
| 223 |
sampling_rate: audioBuffer.sampleRate,
|
| 224 |
});
|
|
|
|
| 28 |
// Initialize the ATOM model
|
| 29 |
async function initModel() {
|
| 30 |
try {
|
| 31 |
+
status.textContent = 'Loading ATOM model with custom tokenizer... This may take a minute.';
|
| 32 |
status.className = 'loading';
|
| 33 |
|
| 34 |
const device = useWebGPUCheckbox.checked ? 'webgpu' : 'wasm';
|
| 35 |
|
| 36 |
// Load your custom ATOM model
|
| 37 |
+
// IMPORTANT: This automatically loads the custom tokenizer from Chillarmo/ATOM
|
| 38 |
+
// The tokenizer is specific to your model's training and must be used
|
| 39 |
transcriber = await pipeline(
|
| 40 |
'automatic-speech-recognition',
|
| 41 |
'Chillarmo/ATOM',
|
| 42 |
{
|
| 43 |
device: device,
|
| 44 |
+
// Ensure we use the model's own tokenizer, not a default one
|
| 45 |
+
revision: 'main', // Use main branch which has your custom tokenizer
|
| 46 |
progress_callback: (progress) => {
|
| 47 |
if (progress.status === 'downloading') {
|
| 48 |
const percent = Math.round((progress.loaded / progress.total) * 100);
|
| 49 |
status.textContent = `Downloading ${progress.file}: ${percent}%`;
|
| 50 |
} else if (progress.status === 'loading') {
|
| 51 |
status.textContent = `Loading ${progress.file}...`;
|
| 52 |
+
} else if (progress.status === 'progress') {
|
| 53 |
+
const percent = Math.round(progress.progress);
|
| 54 |
+
status.textContent = `Loading model: ${percent}%`;
|
| 55 |
}
|
| 56 |
}
|
| 57 |
}
|
| 58 |
);
|
| 59 |
|
| 60 |
+
console.log('ATOM model loaded successfully with custom Armenian tokenizer');
|
| 61 |
+
status.textContent = 'Model & custom tokenizer loaded! Ready to transcribe Armenian speech.';
|
| 62 |
status.className = 'ready';
|
| 63 |
startBtn.disabled = false;
|
| 64 |
} catch (error) {
|
| 65 |
console.error('Model loading error:', error);
|
| 66 |
status.textContent = `Error loading model: ${error.message}`;
|
| 67 |
status.className = 'error';
|
| 68 |
+
|
| 69 |
+
// Log more details for debugging tokenizer issues
|
| 70 |
+
console.error('Full error details:', error);
|
| 71 |
+
if (error.message.includes('tokenizer')) {
|
| 72 |
+
status.textContent = 'Error: Custom tokenizer failed to load. Check console.';
|
| 73 |
+
}
|
| 74 |
}
|
| 75 |
}
|
| 76 |
|
|
|
|
| 233 |
const audioData = audioBuffer.getChannelData(0);
|
| 234 |
|
| 235 |
// Transcribe with ATOM model
|
| 236 |
+
// This uses the custom Armenian tokenizer that comes with the model
|
| 237 |
+
// The tokenizer is critical - it was trained with the model
|
| 238 |
const result = await transcriber(audioData, {
|
| 239 |
sampling_rate: audioBuffer.sampleRate,
|
| 240 |
});
|