Spaces:

datbkpro
/

voicebot

Running

App Files Files Community

datbkpro commited on Nov 12

Commit

da4c725

verified ·

1 Parent(s): ee7155e

Update services/streaming_voice_service.py

Browse files

Files changed (1) hide show

services/streaming_voice_service.py +30 -5

services/streaming_voice_service.py CHANGED Viewed

@@ -596,7 +596,7 @@ class VoskStreamingASR:
             return False
     def process_audio_chunk(self, audio_chunk: np.ndarray, sample_rate: int = None) -> Dict[str, Any]:
-        """Xử lý audio chunk và trả về kết quả - FIXED VERSION"""
         if self.recognizer is None or not self.is_streaming:
             return {"text": "", "partial": "", "is_final": False}
@@ -616,12 +616,16 @@ class VoskStreamingASR:
                 else:
                     audio_chunk = audio_chunk.astype(np.int16)
-            # Kiểm tra âm lượng
             audio_rms = np.sqrt(np.mean(audio_chunk.astype(np.float32)**2)) / 32767.0
-            print(f"📊 Audio RMS: {audio_rms:.4f}")
-            if audio_rms < 0.01:  # Âm lượng quá thấp
-                print("⚠️ Âm lượng quá thấp, bỏ qua")
                 return {"text": "", "partial": "", "is_final": False}
             # Chuyển đổi sang bytes
@@ -651,6 +655,27 @@ class VoskStreamingASR:
         return {"text": "", "partial": "", "is_final": False}
     def stop_stream(self) -> str:
         """Kết thúc stream và lấy kết quả cuối"""
         if self.recognizer:

             return False
     def process_audio_chunk(self, audio_chunk: np.ndarray, sample_rate: int = None) -> Dict[str, Any]:
+        """Xử lý audio chunk và trả về kết quả - FIXED VOLUME VERSION"""
         if self.recognizer is None or not self.is_streaming:
             return {"text": "", "partial": "", "is_final": False}
                 else:
                     audio_chunk = audio_chunk.astype(np.int16)
+            # FIXED: Tăng cường âm lượng trước khi kiểm tra
+            audio_chunk = self._boost_audio_volume(audio_chunk)
+            # Kiểm tra âm lượng - GIẢM ngưỡng xuống
             audio_rms = np.sqrt(np.mean(audio_chunk.astype(np.float32)**2)) / 32767.0
+            print(f"📊 Audio RMS: {audio_rms:.4f}, Max: {np.max(audio_chunk)}")
+            # FIXED: Giảm ngưỡng âm lượng từ 0.01 xuống 0.001
+            if audio_rms < 0.001:  # Giảm ngưỡng 10 lần
+                print(f"⚠️ Âm lượng quá thấp (RMS: {audio_rms:.6f}), bỏ qua")
                 return {"text": "", "partial": "", "is_final": False}
             # Chuyển đổi sang bytes
         return {"text": "", "partial": "", "is_final": False}
+    def _boost_audio_volume(self, audio_chunk: np.ndarray, boost_factor: float = 5.0) -> np.ndarray:
+        """Tăng cường âm lượng audio"""
+        try:
+            # Chuyển sang float để xử lý
+            audio_float = audio_chunk.astype(np.float32) / 32768.0
+            # Tăng âm lượng
+            boosted_audio = audio_float * boost_factor
+            # Ngăn chặn clipping
+            boosted_audio = np.clip(boosted_audio, -1.0, 1.0)
+            # Chuyển lại sang int16
+            boosted_audio_int16 = (boosted_audio * 32767).astype(np.int16)
+            print(f"🔊 Volume boosted: {boost_factor}x, New max: {np.max(boosted_audio_int16)}")
+            return boosted_audio_int16
+        except Exception as e:
+            print(f"⚠️ Lỗi boost volume: {e}")
+            return audio_chunk
     def stop_stream(self) -> str:
         """Kết thúc stream và lấy kết quả cuối"""
         if self.recognizer: