Spaces:

Chillarmo
/

ATOM-WebGPU

Running

App Files Files Community

Chillarmo commited on 21 days ago

Commit

0eb7e14

verified ·

1 Parent(s): 07ef252

Update index.js

Browse files

Files changed (1) hide show

index.js +18 -2

index.js CHANGED Viewed

@@ -28,35 +28,49 @@ let animationId = null;
 // Initialize the ATOM model
 async function initModel() {
     try {
-        status.textContent = 'Loading ATOM model... This may take a minute.';
         status.className = 'loading';
         const device = useWebGPUCheckbox.checked ? 'webgpu' : 'wasm';
         // Load your custom ATOM model
         transcriber = await pipeline(
             'automatic-speech-recognition',
             'Chillarmo/ATOM',
             {
                 device: device,
                 progress_callback: (progress) => {
                     if (progress.status === 'downloading') {
                         const percent = Math.round((progress.loaded / progress.total) * 100);
                         status.textContent = `Downloading ${progress.file}: ${percent}%`;
                     } else if (progress.status === 'loading') {
                         status.textContent = `Loading ${progress.file}...`;
                     }
                 }
             }
         );
-        status.textContent = 'Model loaded! Ready to transcribe Armenian speech.';
         status.className = 'ready';
         startBtn.disabled = false;
     } catch (error) {
         console.error('Model loading error:', error);
         status.textContent = `Error loading model: ${error.message}`;
         status.className = 'error';
     }
 }
@@ -219,6 +233,8 @@ async function processAudioChunk(chunks) {
         const audioData = audioBuffer.getChannelData(0);
         // Transcribe with ATOM model
         const result = await transcriber(audioData, {
             sampling_rate: audioBuffer.sampleRate,
         });

 // Initialize the ATOM model
 async function initModel() {
     try {
+        status.textContent = 'Loading ATOM model with custom tokenizer... This may take a minute.';
         status.className = 'loading';
         const device = useWebGPUCheckbox.checked ? 'webgpu' : 'wasm';
         // Load your custom ATOM model
+        // IMPORTANT: This automatically loads the custom tokenizer from Chillarmo/ATOM
+        // The tokenizer is specific to your model's training and must be used
         transcriber = await pipeline(
             'automatic-speech-recognition',
             'Chillarmo/ATOM',
             {
                 device: device,
+                // Ensure we use the model's own tokenizer, not a default one
+                revision: 'main', // Use main branch which has your custom tokenizer
                 progress_callback: (progress) => {
                     if (progress.status === 'downloading') {
                         const percent = Math.round((progress.loaded / progress.total) * 100);
                         status.textContent = `Downloading ${progress.file}: ${percent}%`;
                     } else if (progress.status === 'loading') {
                         status.textContent = `Loading ${progress.file}...`;
+                    } else if (progress.status === 'progress') {
+                        const percent = Math.round(progress.progress);
+                        status.textContent = `Loading model: ${percent}%`;
                     }
                 }
             }
         );
+        console.log('ATOM model loaded successfully with custom Armenian tokenizer');
+        status.textContent = 'Model & custom tokenizer loaded! Ready to transcribe Armenian speech.';
         status.className = 'ready';
         startBtn.disabled = false;
     } catch (error) {
         console.error('Model loading error:', error);
         status.textContent = `Error loading model: ${error.message}`;
         status.className = 'error';
+        // Log more details for debugging tokenizer issues
+        console.error('Full error details:', error);
+        if (error.message.includes('tokenizer')) {
+            status.textContent = 'Error: Custom tokenizer failed to load. Check console.';
+        }
     }
 }
         const audioData = audioBuffer.getChannelData(0);
         // Transcribe with ATOM model
+        // This uses the custom Armenian tokenizer that comes with the model
+        // The tokenizer is critical - it was trained with the model
         const result = await transcriber(audioData, {
             sampling_rate: audioBuffer.sampleRate,
         });