File size: 3,536 Bytes
7153ef5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
from gradio_client import Client, handle_file
from typing import Any, Dict, List
from PIL import Image
import json
# Connect to the remote Space
asr_client = Client("VeuReu/asr")
def extract_audio_from_video(video_path: str) -> str:
"""
Call the /extract_audio_ffmpeg endpoint of the remote VeuReu/asr Space.
This function uploads a video file to the remote ASR service and extracts its audio track.
Parameters
----------
video_path : str
Path to the input video file from which audio will be extracted.
Returns
-------
str
Path or identifier of the extracted audio file returned by the remote service.
"""
result = asr_client.predict(
video_file={"video": handle_file(video_path)},
api_name="/extract_audio_ffmpeg"
)
return result
def diarize_audio(audio_path: str) -> str:
"""
Call the /diaritzar_audio endpoint of the remote VeuReu/asr Space.
This function performs speaker diarization, identifying segments of speech
belonging to different speakers in the audio file.
Parameters
----------
audio_path : str
Path to the audio file to be diarized.
Returns
-------
str
JSON-like diarization output containing speaker segments and timings.
"""
result = asr_client.predict(
wav_archivo=handle_file(audio_path),
api_name="/diaritzar_audio"
)
return result
def transcribe_long_audio(audio_path: str) -> str:
"""
Call the /transcribe_long_audio endpoint of the remote VeuReu/asr Space.
Designed for long audio recordings, this function sends the audio to the ASR model
optimized for processing extended durations.
Parameters
----------
audio_path : str
Path to the long audio file to be transcribed.
Returns
-------
str
Transcribed text returned by the remote ASR service.
"""
result = asr_client.predict(
wav_path=handle_file(audio_path),
api_name="/transcribe_long_audio"
)
return result
def transcribe_short_audio(audio_path: str) -> str:
"""
Call the /transcribe_wav endpoint of the remote VeuReu/asr Space.
This function is optimized for short-duration audio samples and produces fast transcriptions.
Parameters
----------
audio_path : str
Path to the short audio file to be transcribed.
Returns
-------
str
Transcribed text returned by the remote service.
"""
result = asr_client.predict(
wav_path=handle_file(audio_path),
api_name="/transcribe_wav"
)
return result
def identificar_veu(clip_path: str, voice_col: List[Dict[str, Any]]):
"""
Call the /identificar_veu endpoint of the remote VeuReu/asr Space.
This function attempts to identify which known speaker (from a provided
collection of voice profiles) appears in the given audio clip.
Parameters
----------
clip_path : str
Path to the audio clip whose speaker is to be identified.
voice_col : List[Dict[str, Any]]
List of dictionaries containing metadata or embeddings for known voices.
Returns
-------
Any
Output returned by the remote speaker identification model.
"""
voice_col_str = json.dumps(voice_col)
result = asr_client.predict(
wav_archivo=handle_file(clip_path),
voice_col=voice_col_str,
api_name="/identificar_veu"
)
return result
|