Spaces:

datbkpro
/

voicebot

Running

App Files Files Community

datbkpro commited on Oct 23

Commit

b68989b

verified ·

1 Parent(s): 06ce176

Update ui/tabs.py

Browse files

Files changed (1) hide show

ui/tabs.py +121 -52

ui/tabs.py CHANGED Viewed

@@ -200,65 +200,134 @@ def create_audio_tab(audio_service: AudioService):
         outputs=[transcription_output, response_output, tts_audio_output, language_display]  # UPDATED
     )
 def create_streaming_voice_tab(streaming_service: StreamingVoiceService):
-    """Tạo tab streaming voice sử dụng Gradio microphone"""
-    # Create components - CHỈ NHẬN 7 GIÁ TRỊ (đã bỏ conversation_state)
-    (microphone, clear_btn, status_display, state_display,
-     transcription, ai_response, tts_output) = create_streaming_voice_components()
-    def process_audio_stream(audio_data):
-        """Xử lý audio stream từ microphone"""
-        if audio_data is None:
-            return "❌ Không có âm thanh", "Vui lòng nói lại", None, "Đang chờ...", {}
-        try:
-            print("🎯 Đang xử lý audio stream...")
-            # Xử lý audio
-            result = streaming_service.process_streaming_audio(audio_data)
-            # Cập nhật state
             state = streaming_service.get_conversation_state()
-            status = f"✅ Đã xử lý - {len(result['transcription'])} ký tự"
-            return result['transcription'], result['response'], result['tts_audio'], status, state
-        except Exception as e:
-            error_msg = f"❌ Lỗi xử lý: {str(e)}"
-            print(f"Lỗi: {traceback.format_exc()}")
-            return error_msg, "Xin lỗi, có lỗi xảy ra", None, "❌ Lỗi", {}
-    def clear_conversation():
-        """Xóa hội thoại"""
-        streaming_service.clear_conversation()
-        state = streaming_service.get_conversation_state()
-        return "", "", None, "🗑️ Đã xóa hội thoại", state
-    # Event handlers - SỬA: số lượng outputs phải khớp
-    microphone.stream(
-        process_audio_stream,
-        inputs=[microphone],
-        outputs=[transcription, ai_response, tts_output, status_display, state_display]
-    )
-    clear_btn.click(
-        clear_conversation,
-        outputs=[transcription, ai_response, tts_output, status_display, state_display]
-    )
-    # Initial state
-    def get_initial_state():
-        state = streaming_service.get_conversation_state()
-        return state
-    # Load initial state khi tab được mở
-    def initialize_tab():
-        state = streaming_service.get_conversation_state()
-        return "Sẵn sàng - nhấn nút microphone để nói", state
-    # Khởi tạo giá trị ban đầu
-    status_display.value = "Sẵn sàng - nhấn nút microphone để nói"
-    state_display.value = streaming_service.get_conversation_state()
 def create_image_tab(image_service: ImageService):
     gr.Markdown("## Phân tích hình ảnh")
     with gr.Row():

         outputs=[transcription_output, response_output, tts_audio_output, language_display]  # UPDATED
     )
 def create_streaming_voice_tab(streaming_service: StreamingVoiceService):
+    """Tạo tab streaming voice với VAD"""
+    with gr.Blocks() as streaming_tab:
+        gr.Markdown("## 🎤 Trò chuyện giọng nói thời gian thực với VAD")
+        gr.Markdown("""
+        ### 🎯 Chế độ VAD (Voice Activity Detection)
+        - **Tự động phát hiện** khi bạn bắt đầu nói
+        - **Không cần giữ nút** - hệ thống tự nhận diện
+        - **Loại bỏ tiếng ồn** - chỉ xử lý giọng nói thật
+        """)
+        with gr.Row():
+            with gr.Column(scale=1):
+                # VAD Controls
+                with gr.Row():
+                    start_vad_btn = gr.Button("🎙️ Bắt đầu VAD", variant="primary")
+                    stop_vad_btn = gr.Button("🛑 Dừng VAD", variant="secondary")
+                # Microphone component (vẫn giữ cho manual mode)
+                microphone = gr.Microphone(
+                    label="🎤 Hoặc nhấn để nói thủ công",
+                    type="numpy",
+                    streaming=True,
+                    show_download_button=False
+                )
+                # Clear conversation button
+                clear_btn = gr.Button("🗑️ Xóa hội thoại", variant="secondary")
+                # Status display
+                status_display = gr.Textbox(
+                    label="Trạng thái",
+                    value="Chưa bắt đầu - nhấn 'Bắt đầu VAD'",
+                    interactive=False
+                )
+                # Conversation state
+                state_display = gr.JSON(
+                    label="Thông tin hệ thống",
+                    value={}
+                )
+            with gr.Column(scale=2):
+                # Real-time transcription
+                realtime_transcription = gr.Textbox(
+                    label="📝 Bạn vừa nói",
+                    lines=2,
+                    interactive=False,
+                    placeholder="Văn bản được chuyển đổi sẽ xuất hiện ở đây..."
+                )
+                # AI Response
+                ai_response = gr.Textbox(
+                    label="🤖 Phản hồi AI",
+                    lines=3,
+                    interactive=False,
+                    placeholder="Phản hồi của AI sẽ xuất hiện ở đây..."
+                )
+                # TTS Audio output
+                tts_output = gr.Audio(
+                    label="🔊 Phản hồi bằng giọng nói",
+                    interactive=False,
+                    autoplay=True
+                )
+        def start_vad():
+            """Bắt đầu VAD listening"""
+            def vad_callback(result):
+                """Callback khi VAD phát hiện speech"""
+                # Cập nhật UI với kết quả
+                # Cần sử dụng Gradio events để cập nhật real-time
+                print(f"VAD Result: {result}")
+            success = streaming_service.start_listening(vad_callback)
+            status = "✅ Đang lắng nghe với VAD..." if success else "❌ Lỗi khởi động VAD"
             state = streaming_service.get_conversation_state()
+            return status, state
+        def stop_vad():
+            """Dừng VAD listening"""
+            streaming_service.stop_listening()
+            state = streaming_service.get_conversation_state()
+            return "🛑 Đã dừng VAD", state
+        def process_manual_audio(audio_data):
+            """Xử lý audio manual (không dùng VAD)"""
+            if audio_data is None:
+                return "❌ Không có âm thanh", "Vui lòng nói lại", None, "Đang chờ...", {}
+            try:
+                result = streaming_service.process_streaming_audio(audio_data)
+                state = streaming_service.get_conversation_state()
+                status = "✅ Đã xử lý manual audio"
+                return result['transcription'], result['response'], result['tts_audio'], status, state
+            except Exception as e:
+                error_msg = f"❌ Lỗi: {str(e)}"
+                return error_msg, "Xin lỗi, có lỗi xảy ra", None, "❌ Lỗi", {}
+        def clear_conversation():
+            """Xóa hội thoại"""
+            streaming_service.clear_conversation()
+            state = streaming_service.get_conversation_state()
+            return "", "", None, "🗑️ Đã xóa hội thoại", state
+        # Event handlers
+        start_vad_btn.click(
+            start_vad,
+            outputs=[status_display, state_display]
+        )
+        stop_vad_btn.click(
+            stop_vad,
+            outputs=[status_display, state_display]
+        )
+        microphone.stream(
+            process_manual_audio,
+            inputs=[microphone],
+            outputs=[realtime_transcription, ai_response, tts_output, status_display, state_display]
+        )
+        clear_btn.click(
+            clear_conversation,
+            outputs=[realtime_transcription, ai_response, tts_output, status_display, state_display]
+        )
+    return streaming_tab
 def create_image_tab(image_service: ImageService):
     gr.Markdown("## Phân tích hình ảnh")
     with gr.Row():