Spaces:

datbkpro
/

voicebot

Running

App Files Files Community

datbkpro commited on Oct 23

Commit

348d3f2

verified ·

1 Parent(s): 2302a4d

Update services/streaming_voice_service.py

Browse files

Files changed (1) hide show

services/streaming_voice_service.py +2 -195

services/streaming_voice_service.py CHANGED Viewed

@@ -9,6 +9,7 @@ from config.settings import settings
 from core.rag_system import EnhancedRAGSystem
 from core.tts_service import EnhancedTTSService
 from core.speechbrain_vad import SpeechBrainVAD
 class StreamingVoiceService:
@@ -18,7 +19,7 @@ class StreamingVoiceService:
         self.tts_service = tts_service
         # Khởi tạo VAD
-        self.vad_processor = SpeechBrainVAD()
         self.is_listening = False
         self.speech_callback = None
@@ -364,197 +365,3 @@ Thông tin tham khảo:
             'current_transcription': self.current_transcription,
             'last_update': time.strftime("%H:%M:%S")
         }
-# import io
-# import numpy as np
-# import soundfile as sf
-# import time  # THÊM IMPORT NÀY
-# import traceback
-# from groq import Groq
-# from typing import Optional, Dict, Any
-# from config.settings import settings
-# from core.rag_system import EnhancedRAGSystem
-# from core.tts_service import EnhancedTTSService
-# class StreamingVoiceService:
-#     def __init__(self, groq_client: Groq, rag_system: EnhancedRAGSystem, tts_service: EnhancedTTSService):
-#         self.client = groq_client
-#         self.rag_system = rag_system
-#         self.tts_service = tts_service
-#         # Conversation context
-#         self.conversation_history = []
-#         self.current_transcription = ""
-#     def process_streaming_audio(self, audio_data: tuple) -> Dict[str, Any]:
-#         """Xử lý audio streaming từ Gradio microphone component"""
-#         if not audio_data:
-#             return {
-#                 'transcription': "❌ Không có dữ liệu âm thanh",
-#                 'response': "Vui lòng nói lại",
-#                 'tts_audio': None
-#             }
-#         try:
-#             # Lấy dữ liệu audio từ Gradio
-#             sample_rate, audio_array = audio_data
-#             print(f"🎯 Nhận audio: {len(audio_array)} samples, SR: {sample_rate}")
-#             # Kiểm tra audio có dữ liệu không
-#             if len(audio_array) == 0 or np.max(np.abs(audio_array)) < 0.01:
-#                 return {
-#                     'transcription': "❌ Âm thanh quá yếu",
-#                     'response': "Xin vui lòng nói to hơn và rõ hơn",
-#                     'tts_audio': None
-#                 }
-#             # Chuyển đổi thành văn bản
-#             transcription = self._transcribe_audio(audio_array, sample_rate)
-#             if not transcription or len(transcription.strip()) == 0:
-#                 return {
-#                     'transcription': "❌ Không nghe rõ",
-#                     'response': "Xin vui lòng nói lại rõ hơn",
-#                     'tts_audio': None
-#                 }
-#             print(f"📝 Đã chuyển đổi: {transcription}")
-#             # Cập nhật transcription hiện tại
-#             self.current_transcription = transcription
-#             # Tạo phản hồi AI
-#             response = self._generate_ai_response(transcription)
-#             # Tạo TTS
-#             tts_audio_path = self._text_to_speech(response)
-#             return {
-#                 'transcription': transcription,
-#                 'response': response,
-#                 'tts_audio': tts_audio_path
-#             }
-#         except Exception as e:
-#             print(f"❌ Lỗi xử lý streaming audio: {e}")
-#             print(f"Chi tiết lỗi: {traceback.format_exc()}")
-#             return {
-#                 'transcription': f"❌ Lỗi: {str(e)}",
-#                 'response': "Xin lỗi, có lỗi xảy ra trong quá trình xử lý",
-#                 'tts_audio': None
-#             }
-#     def _transcribe_audio(self, audio_data: np.ndarray, sample_rate: int) -> Optional[str]:
-#         """Chuyển audio -> text"""
-#         try:
-#             # Chuẩn hóa audio data
-#             if audio_data.ndim > 1:
-#                 audio_data = np.mean(audio_data, axis=1)  # Chuyển sang mono
-#             # Normalize âm lượng
-#             audio_max = np.max(np.abs(audio_data))
-#             if audio_max > 0:
-#                 audio_data = audio_data / audio_max
-#             # Giới hạn độ dài audio (tránh quá dài)
-#             max_duration = 10  # giây
-#             max_samples = sample_rate * max_duration
-#             if len(audio_data) > max_samples:
-#                 audio_data = audio_data[:max_samples]
-#                 print(f"⚠️ Cắt audio xuống còn {max_duration} giây")
-#             buffer = io.BytesIO()
-#             sf.write(buffer, audio_data, sample_rate, format='wav', subtype='PCM_16')
-#             buffer.seek(0)
-#             # Gọi API Whisper
-#             transcription = self.client.audio.transcriptions.create(
-#                 model=settings.WHISPER_MODEL,
-#                 file=("speech.wav", buffer.read(), "audio/wav"),
-#                 response_format="text",
-#                 language="vi"
-#             )
-#             # Xử lý response
-#             if hasattr(transcription, 'text'):
-#                 result = transcription.text.strip()
-#             elif isinstance(transcription, str):
-#                 result = transcription.strip()
-#             else:
-#                 result = str(transcription).strip()
-#             print(f"✅ Transcription thành công: {result}")
-#             return result
-#         except Exception as e:
-#             print(f"❌ Lỗi transcription: {e}")
-#             return None
-#     def _generate_ai_response(self, user_input: str) -> str:
-#         """Sinh phản hồi AI"""
-#         try:
-#             # Thêm vào lịch sử
-#             self.conversation_history.append({"role": "user", "content": user_input})
-#             # Tìm kiếm RAG
-#             rag_results = self.rag_system.semantic_search(user_input, top_k=2)
-#             context_text = "\n".join([f"- {result.get('text', str(result))}" for result in rag_results]) if rag_results else ""
-#             system_prompt = f"""Bạn là trợ lý AI thông minh chuyên về tiếng Việt.
-# Hãy trả lời ngắn gọn, tự nhiên và hữu ích (dưới 100 từ).
-# Thông tin tham khảo:
-# {context_text}
-# """
-#             messages = [{"role": "system", "content": system_prompt}]
-#             # Giữ lại 4 tin nhắn gần nhất
-#             messages.extend(self.conversation_history[-4:])
-#             completion = self.client.chat.completions.create(
-#                 model="llama-3.1-8b-instant",
-#                 messages=messages,
-#                 max_tokens=150,
-#                 temperature=0.7
-#             )
-#             response = completion.choices[0].message.content
-#             self.conversation_history.append({"role": "assistant", "content": response})
-#             # Giới hạn lịch sử
-#             if len(self.conversation_history) > 8:
-#                 self.conversation_history = self.conversation_history[-8:]
-#             return response
-#         except Exception as e:
-#             return f"Xin lỗi, tôi gặp lỗi khi tạo phản hồi: {str(e)}"
-#     def _text_to_speech(self, text: str) -> Optional[str]:
-#         """Chuyển văn bản thành giọng nói"""
-#         try:
-#             if not text or text.startswith("❌") or text.startswith("Xin lỗi"):
-#                 return None
-#             tts_bytes = self.tts_service.text_to_speech(text, 'vi')
-#             if tts_bytes:
-#                 audio_path = self.tts_service.save_audio_to_file(tts_bytes)
-#                 print(f"✅ Đã tạo TTS: {audio_path}")
-#                 return audio_path
-#         except Exception as e:
-#             print(f"❌ Lỗi TTS: {e}")
-#         return None
-#     def clear_conversation(self):
-#         """Xóa lịch sử hội thoại"""
-#         self.conversation_history = []
-#         self.current_transcription = ""
-#         print("🗑️ Đã xóa lịch sử hội thoại")
-#     def get_conversation_state(self) -> dict:
-#         """Lấy trạng thái hội thoại"""
-#         return {
-#             'history_length': len(self.conversation_history),
-#             'current_transcription': self.current_transcription,
-#             'last_update': time.strftime("%H:%M:%S")
-#         }

 from core.rag_system import EnhancedRAGSystem
 from core.tts_service import EnhancedTTSService
 from core.speechbrain_vad import SpeechBrainVAD
+from core.silero_vad import SileroVAD
 class StreamingVoiceService:
         self.tts_service = tts_service
         # Khởi tạo VAD
+        self.vad_processor = SileroVAD()
         self.is_listening = False
         self.speech_callback = None
             'current_transcription': self.current_transcription,
             'last_update': time.strftime("%H:%M:%S")
         }