Spaces:

datbkpro
/

voicebot

Sleeping

App Files Files Community

datbkpro commited on Nov 13

Commit

7d0b18d

verified ·

1 Parent(s): 6c27998

Create sambanova_voice_service.py

Browse files

Files changed (1) hide show

services/sambanova_voice_service.py +150 -0

services/sambanova_voice_service.py ADDED Viewed

	@@ -0,0 +1,150 @@

+import base64
+import json
+import os
+from pathlib import Path
+import gradio as gr
+import numpy as np
+import openai
+from fastapi import FastAPI
+from fastapi.responses import HTMLResponse, StreamingResponse
+from fastrtc import (
+    AdditionalOutputs,
+    ReplyOnStopWords,
+    Stream,
+    get_stt_model,
+    get_twilio_turn_credentials,
+)
+from gradio.utils import get_space
+from pydantic import BaseModel
+class SambanovaVoiceService:
+    """Dịch vụ Voice AI với Sambanova API"""
+    def __init__(self):
+        self.curr_dir = Path(__file__).parent
+        # Khởi tạo client Sambanova
+        self.client = openai.OpenAI(
+            api_key=os.environ.get("SAMBANOVA_API_KEY"),
+            base_url="https://api.sambanova.ai/v1",
+        )
+        # STT model
+        self.model = get_stt_model()
+        # RTC configuration
+        self.rtc_configuration = get_twilio_turn_credentials() if get_space() else None
+        # FastAPI app
+        self.app = FastAPI()
+    def create_response_handler(self):
+        """Tạo response handler cho voice streaming"""
+        def response(
+            audio: tuple[int, np.ndarray],
+            gradio_chatbot: list[dict] | None = None,
+            conversation_state: list[dict] | None = None,
+        ):
+            gradio_chatbot = gradio_chatbot or []
+            conversation_state = conversation_state or []
+            # Speech to Text
+            text = self.model.stt(audio)
+            print("🎤 STT Result:", text)
+            # Thêm audio vào chatbot
+            sample_rate, array = audio
+            gradio_chatbot.append(
+                {"role": "user", "content": gr.Audio((sample_rate, array.squeeze()))}
+            )
+            yield AdditionalOutputs(gradio_chatbot, conversation_state)
+            # Thêm text vào conversation state
+            conversation_state.append({"role": "user", "content": text})
+            # Gọi Sambanova API
+            request = self.client.chat.completions.create(
+                model="Meta-Llama-3.2-3B-Instruct",
+                messages=conversation_state,
+                temperature=0.1,
+                top_p=0.1,
+            )
+            response_content = {"role": "assistant", "content": request.choices[0].message.content}
+            conversation_state.append(response_content)
+            gradio_chatbot.append(response_content)
+            yield AdditionalOutputs(gradio_chatbot, conversation_state)
+        return response
+    def create_stream(self):
+        """Tạo FastRTC stream"""
+        response_handler = self.create_response_handler()
+        return Stream(
+            ReplyOnStopWords(
+                response_handler,
+                stop_words=["computer", "hey", "hello", "xin chào"],
+                input_sample_rate=16000,
+            ),
+            mode="send",
+            modality="audio",
+            additional_inputs=[gr.Chatbot(type="messages", value=[]), gr.State(value=[])],
+            additional_outputs=[gr.Chatbot(type="messages", value=[]), gr.State(value=[])],
+            additional_outputs_handler=lambda *a: (a[2], a[3]),
+            concurrency_limit=5 if get_space() else None,
+            time_limit=90 if get_space() else None,
+            rtc_configuration=self.rtc_configuration,
+        )
+    def setup_fastapi_routes(self):
+        """Thiết lập FastAPI routes"""
+        class Message(BaseModel):
+            role: str
+            content: str
+        class InputData(BaseModel):
+            webrtc_id: str
+            chatbot: list[Message]
+            state: list[Message]
+        @self.app.get("/")
+        async def home():
+            rtc_config = get_twilio_turn_credentials() if get_space() else None
+            html_content = (self.curr_dir / "templates" / "sambanova_index.html").read_text()
+            html_content = html_content.replace("__RTC_CONFIGURATION__", json.dumps(rtc_config))
+            return HTMLResponse(content=html_content)
+        @self.app.post("/input_hook")
+        async def input_hook(data: InputData):
+            body = data.model_dump()
+            # stream.set_input(data.webrtc_id, body["chatbot"], body["state"])
+            return {"status": "ok"}
+        def audio_to_base64(file_path):
+            audio_format = "wav"
+            with open(file_path, "rb") as audio_file:
+                encoded_audio = base64.b64encode(audio_file.read()).decode("utf-8")
+            return f"data:audio/{audio_format};base64,{encoded_audio}"
+        @self.app.get("/outputs")
+        async def outputs(webrtc_id: str):
+            async def output_stream():
+                # async for output in stream.output_stream(webrtc_id):
+                #     chatbot = output.args[0]
+                #     state = output.args[1]
+                #     data = {
+                #         "message": state[-1],
+                #         "audio": audio_to_base64(chatbot[-1]["content"].value["path"])
+                #         if chatbot[-1]["role"] == "user"
+                #         else None,
+                #     }
+                #     yield f"event: output\ndata: {json.dumps(data)}\n\n"
+                yield f"event: output\ndata: {json.dumps({'message': 'Stream ready'})}\n\n"
+            return StreamingResponse(output_stream(), media_type="text/event-stream")
+        return self.app