Spaces:

datbkpro
/

voicebot

Running

App Files Files Community

datbkpro commited on Nov 4

Commit

a0a209b

verified ·

1 Parent(s): abac351

Update core/silero_vad.py

Browse files

Files changed (1) hide show

core/silero_vad.py +19 -11

core/silero_vad.py CHANGED Viewed

@@ -1,5 +1,3 @@
 import io
 import numpy as np
 import soundfile as sf
@@ -114,7 +112,7 @@ class SileroVAD:
         # Logic state machine cải tiến
         if self.state == "silence":
             if speech_prob > self.speech_threshold:
-                print(" Bắt đầu phát hiện speech")
                 self.state = "speech"
                 self.speech_start_time = current_time
                 self.last_voice_time = current_time
@@ -135,24 +133,34 @@ class SileroVAD:
             if speech_prob > self.speech_threshold:
                 self.last_voice_time = current_time
-            # Kiểm tra kết thúc speech
             silence_duration = current_time - self.last_voice_time
             speech_duration = current_time - self.speech_start_time
-            # Điều kiện kết thúc: im lặng đủ lâu VÀ đã nói đủ dài
-            if (silence_duration >= self.min_silence_duration and
-                speech_duration >= self.min_speech_duration):
-                print(f" Kết thúc speech segment (duration: {speech_duration:.2f}s)")
                 self._finalize_speech()
-            # Hoặc speech quá dài (timeout)
             elif speech_duration > settings.MAX_AUDIO_DURATION:
-                print(f"Speech timeout ({speech_duration:.2f}s)")
                 self._finalize_speech()
         elif self.state == "processing":
             # Đang xử lý, không nhận thêm audio
             pass
     def _finalize_speech(self):
         """Hoàn thành xử lý speech segment"""
         if not self.speech_buffer or len(self.speech_buffer) == 0:

 import io
 import numpy as np
 import soundfile as sf
         # Logic state machine cải tiến
         if self.state == "silence":
             if speech_prob > self.speech_threshold:
+                print("🎤 Bắt đầu phát hiện speech")
                 self.state = "speech"
                 self.speech_start_time = current_time
                 self.last_voice_time = current_time
             if speech_prob > self.speech_threshold:
                 self.last_voice_time = current_time
+            # Tính toán thời gian
             silence_duration = current_time - self.last_voice_time
             speech_duration = current_time - self.speech_start_time
+            # 🎯 LOGIC KẾT THÚC THÔNG MINH - 3 TRƯỜNG HỢP:
+            # 1. User nói ngắn (dưới min_speech) nhưng đã im lặng đủ lâu -> XỬ LÝ NGAY
+            is_short_response = speech_duration < self.min_speech_duration
+            is_long_silence_after_short = silence_duration >= self.min_silence_duration
+            if is_short_response and is_long_silence_after_short:
+                print(f"🎯 Phát hiện phản hồi ngắn: {speech_duration:.2f}s, im lặng: {silence_duration:.2f}s")
                 self._finalize_speech()
+            # 2. User nói đủ dài VÀ im lặng đủ lâu -> XỬ LÝ BÌNH THƯỜNG
+            elif (speech_duration >= self.min_speech_duration and
+                  silence_duration >= self.min_silence_duration):
+                print(f"🎯 Kết thúc speech dài: {speech_duration:.2f}s")
+                self._finalize_speech()
+            # 3. Speech quá dài (timeout) -> XỬ LÝ DÙ ĐANG NÓI
             elif speech_duration > settings.MAX_AUDIO_DURATION:
+                print(f"⏰ Speech timeout ({speech_duration:.2f}s) - xử lý dù đang nói")
                 self._finalize_speech()
         elif self.state == "processing":
             # Đang xử lý, không nhận thêm audio
             pass
     def _finalize_speech(self):
         """Hoàn thành xử lý speech segment"""
         if not self.speech_buffer or len(self.speech_buffer) == 0: