Chillarmo commited on
Commit
0eb7e14
·
verified ·
1 Parent(s): 07ef252

Update index.js

Browse files
Files changed (1) hide show
  1. index.js +18 -2
index.js CHANGED
@@ -28,35 +28,49 @@ let animationId = null;
28
  // Initialize the ATOM model
29
  async function initModel() {
30
  try {
31
- status.textContent = 'Loading ATOM model... This may take a minute.';
32
  status.className = 'loading';
33
 
34
  const device = useWebGPUCheckbox.checked ? 'webgpu' : 'wasm';
35
 
36
  // Load your custom ATOM model
 
 
37
  transcriber = await pipeline(
38
  'automatic-speech-recognition',
39
  'Chillarmo/ATOM',
40
  {
41
  device: device,
 
 
42
  progress_callback: (progress) => {
43
  if (progress.status === 'downloading') {
44
  const percent = Math.round((progress.loaded / progress.total) * 100);
45
  status.textContent = `Downloading ${progress.file}: ${percent}%`;
46
  } else if (progress.status === 'loading') {
47
  status.textContent = `Loading ${progress.file}...`;
 
 
 
48
  }
49
  }
50
  }
51
  );
52
 
53
- status.textContent = 'Model loaded! Ready to transcribe Armenian speech.';
 
54
  status.className = 'ready';
55
  startBtn.disabled = false;
56
  } catch (error) {
57
  console.error('Model loading error:', error);
58
  status.textContent = `Error loading model: ${error.message}`;
59
  status.className = 'error';
 
 
 
 
 
 
60
  }
61
  }
62
 
@@ -219,6 +233,8 @@ async function processAudioChunk(chunks) {
219
  const audioData = audioBuffer.getChannelData(0);
220
 
221
  // Transcribe with ATOM model
 
 
222
  const result = await transcriber(audioData, {
223
  sampling_rate: audioBuffer.sampleRate,
224
  });
 
28
  // Initialize the ATOM model
29
  async function initModel() {
30
  try {
31
+ status.textContent = 'Loading ATOM model with custom tokenizer... This may take a minute.';
32
  status.className = 'loading';
33
 
34
  const device = useWebGPUCheckbox.checked ? 'webgpu' : 'wasm';
35
 
36
  // Load your custom ATOM model
37
+ // IMPORTANT: This automatically loads the custom tokenizer from Chillarmo/ATOM
38
+ // The tokenizer is specific to your model's training and must be used
39
  transcriber = await pipeline(
40
  'automatic-speech-recognition',
41
  'Chillarmo/ATOM',
42
  {
43
  device: device,
44
+ // Ensure we use the model's own tokenizer, not a default one
45
+ revision: 'main', // Use main branch which has your custom tokenizer
46
  progress_callback: (progress) => {
47
  if (progress.status === 'downloading') {
48
  const percent = Math.round((progress.loaded / progress.total) * 100);
49
  status.textContent = `Downloading ${progress.file}: ${percent}%`;
50
  } else if (progress.status === 'loading') {
51
  status.textContent = `Loading ${progress.file}...`;
52
+ } else if (progress.status === 'progress') {
53
+ const percent = Math.round(progress.progress);
54
+ status.textContent = `Loading model: ${percent}%`;
55
  }
56
  }
57
  }
58
  );
59
 
60
+ console.log('ATOM model loaded successfully with custom Armenian tokenizer');
61
+ status.textContent = 'Model & custom tokenizer loaded! Ready to transcribe Armenian speech.';
62
  status.className = 'ready';
63
  startBtn.disabled = false;
64
  } catch (error) {
65
  console.error('Model loading error:', error);
66
  status.textContent = `Error loading model: ${error.message}`;
67
  status.className = 'error';
68
+
69
+ // Log more details for debugging tokenizer issues
70
+ console.error('Full error details:', error);
71
+ if (error.message.includes('tokenizer')) {
72
+ status.textContent = 'Error: Custom tokenizer failed to load. Check console.';
73
+ }
74
  }
75
  }
76
 
 
233
  const audioData = audioBuffer.getChannelData(0);
234
 
235
  // Transcribe with ATOM model
236
+ // This uses the custom Armenian tokenizer that comes with the model
237
+ // The tokenizer is critical - it was trained with the model
238
  const result = await transcriber(audioData, {
239
  sampling_rate: audioBuffer.sampleRate,
240
  });