engine / asr_client.py
VeuReu's picture
Create asr_client.py
7153ef5 verified
raw
history blame
3.54 kB
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
from gradio_client import Client, handle_file
from typing import Any, Dict, List
from PIL import Image
import json
# Connect to the remote Space
asr_client = Client("VeuReu/asr")
def extract_audio_from_video(video_path: str) -> str:
"""
Call the /extract_audio_ffmpeg endpoint of the remote VeuReu/asr Space.
This function uploads a video file to the remote ASR service and extracts its audio track.
Parameters
----------
video_path : str
Path to the input video file from which audio will be extracted.
Returns
-------
str
Path or identifier of the extracted audio file returned by the remote service.
"""
result = asr_client.predict(
video_file={"video": handle_file(video_path)},
api_name="/extract_audio_ffmpeg"
)
return result
def diarize_audio(audio_path: str) -> str:
"""
Call the /diaritzar_audio endpoint of the remote VeuReu/asr Space.
This function performs speaker diarization, identifying segments of speech
belonging to different speakers in the audio file.
Parameters
----------
audio_path : str
Path to the audio file to be diarized.
Returns
-------
str
JSON-like diarization output containing speaker segments and timings.
"""
result = asr_client.predict(
wav_archivo=handle_file(audio_path),
api_name="/diaritzar_audio"
)
return result
def transcribe_long_audio(audio_path: str) -> str:
"""
Call the /transcribe_long_audio endpoint of the remote VeuReu/asr Space.
Designed for long audio recordings, this function sends the audio to the ASR model
optimized for processing extended durations.
Parameters
----------
audio_path : str
Path to the long audio file to be transcribed.
Returns
-------
str
Transcribed text returned by the remote ASR service.
"""
result = asr_client.predict(
wav_path=handle_file(audio_path),
api_name="/transcribe_long_audio"
)
return result
def transcribe_short_audio(audio_path: str) -> str:
"""
Call the /transcribe_wav endpoint of the remote VeuReu/asr Space.
This function is optimized for short-duration audio samples and produces fast transcriptions.
Parameters
----------
audio_path : str
Path to the short audio file to be transcribed.
Returns
-------
str
Transcribed text returned by the remote service.
"""
result = asr_client.predict(
wav_path=handle_file(audio_path),
api_name="/transcribe_wav"
)
return result
def identificar_veu(clip_path: str, voice_col: List[Dict[str, Any]]):
"""
Call the /identificar_veu endpoint of the remote VeuReu/asr Space.
This function attempts to identify which known speaker (from a provided
collection of voice profiles) appears in the given audio clip.
Parameters
----------
clip_path : str
Path to the audio clip whose speaker is to be identified.
voice_col : List[Dict[str, Any]]
List of dictionaries containing metadata or embeddings for known voices.
Returns
-------
Any
Output returned by the remote speaker identification model.
"""
voice_col_str = json.dumps(voice_col)
result = asr_client.predict(
wav_archivo=handle_file(clip_path),
voice_col=voice_col_str,
api_name="/identificar_veu"
)
return result