Spaces:

datbkpro
/

voicebot

Sleeping

App Files Files Community

datbkpro commited on Nov 12

Commit

5928c48

verified ·

1 Parent(s): da4c725

Update services/streaming_voice_service.py

Browse files

Files changed (1) hide show

services/streaming_voice_service.py +39 -12

services/streaming_voice_service.py CHANGED Viewed

@@ -966,28 +966,35 @@ class StreamingVoiceService:
                 time.sleep(0.5)
     def process_streaming_audio(self, audio_data: tuple) -> Dict[str, Any]:
-        """Xử lý audio streaming manual mode với VOSK - FIXED VERSION"""
         if not audio_data:
             return self._create_error_response("❌ Không có dữ liệu âm thanh")
         try:
             sample_rate, audio_array = audio_data
-            print(f"🎤 Manual audio: {len(audio_array)} samples, {sample_rate}Hz")
-            # Kiểm tra âm lượng
             if isinstance(audio_array, np.ndarray):
                 if audio_array.dtype in [np.float32, np.float64]:
                     audio_rms = np.sqrt(np.mean(audio_array**2))
-                    print(f"📊 Manual audio RMS: {audio_rms:.4f}")
-                    if audio_rms < 0.01:
-                        return {
-                            'transcription': "Âm thanh quá nhỏ, hãy nói to hơn",
-                            'response': "",
-                            'tts_audio': None,
-                            'status': 'listening'
-                        }
             # Khởi động VOSK stream tạm thời
             if not self.vosk_asr.start_stream():
@@ -1019,7 +1026,7 @@ class StreamingVoiceService:
                 }
             else:
                 return {
-                    'transcription': "Đang nghe... Hãy nói rõ hơn",
                     'response': "",
                     'tts_audio': None,
                     'status': 'listening'
@@ -1029,6 +1036,26 @@ class StreamingVoiceService:
             print(f"❌ Lỗi xử lý streaming audio: {e}")
             traceback.print_exc()
             return self._create_error_response(f"❌ Lỗi: {str(e)}")
     def _generate_ai_response_optimized(self, transcription: str) -> str:
         """Tạo phản hồi AI tối ưu hóa"""

                 time.sleep(0.5)
     def process_streaming_audio(self, audio_data: tuple) -> Dict[str, Any]:
+        """Xử lý audio streaming manual mode với VOSK - FIXED VOLUME VERSION"""
         if not audio_data:
             return self._create_error_response("❌ Không có dữ liệu âm thanh")
         try:
             sample_rate, audio_array = audio_data
+            print(f"🎤 Manual audio: {len(audio_array)} samples, {sample_rate}Hz, Max: {np.max(audio_array)}")
+            # FIXED: Tăng cường âm lượng trước khi xử lý
+            audio_array = self._boost_input_volume(audio_array)
+            # Kiểm tra âm lượng với ngưỡng thấp hơn
             if isinstance(audio_array, np.ndarray):
                 if audio_array.dtype in [np.float32, np.float64]:
                     audio_rms = np.sqrt(np.mean(audio_array**2))
+                else:
+                    audio_rms = np.sqrt(np.mean(audio_array.astype(np.float32)**2)) / 32768.0
+                print(f"📊 Manual audio RMS: {audio_rms:.6f}, Max: {np.max(audio_array)}")
+                # FIXED: Giảm ngưỡng âm lượng
+                if audio_rms < 0.001:  # Giảm từ 0.01 xuống 0.001
+                    return {
+                        'transcription': f"Âm thanh quá nhỏ (RMS: {audio_rms:.6f}), hãy nói to hơn hoặc điều chỉnh microphone",
+                        'response': "",
+                        'tts_audio': None,
+                        'status': 'listening'
+                    }
             # Khởi động VOSK stream tạm thời
             if not self.vosk_asr.start_stream():
                 }
             else:
                 return {
+                    'transcription': "Đang nghe... Hãy nói rõ hơn và gần microphone",
                     'response': "",
                     'tts_audio': None,
                     'status': 'listening'
             print(f"❌ Lỗi xử lý streaming audio: {e}")
             traceback.print_exc()
             return self._create_error_response(f"❌ Lỗi: {str(e)}")
+    def _boost_input_volume(self, audio_array: np.ndarray, boost_factor: float = 10.0) -> np.ndarray:
+        """Tăng cường âm lượng input audio"""
+        try:
+            if audio_array.dtype in [np.float32, np.float64]:
+                # Audio đã ở dạng float
+                boosted = audio_array * boost_factor
+                boosted = np.clip(boosted, -1.0, 1.0)
+            else:
+                # Audio ở dạng int
+                boosted = audio_array.astype(np.float32) * boost_factor
+                max_val = np.iinfo(audio_array.dtype).max
+                boosted = np.clip(boosted, -max_val, max_val).astype(audio_array.dtype)
+            print(f"🔊 Input volume boosted: {boost_factor}x")
+            return boosted
+        except Exception as e:
+            print(f"⚠️ Lỗi boost input volume: {e}")
+            return audio_array
     def _generate_ai_response_optimized(self, transcription: str) -> str:
         """Tạo phản hồi AI tối ưu hóa"""