Spaces:

VeuReu
/

engine

Running

App Files Files Community

engine / asr_client.py

VeuReu

Create asr_client.py

7153ef5 verified about 1 month ago

raw

history blame

3.54 kB

	import os
	os.environ["CUDA_VISIBLE_DEVICES"] = "1"

	from gradio_client import Client, handle_file
	from typing import Any, Dict, List
	from PIL import Image
	import json

	# Connect to the remote Space
	asr_client = Client("VeuReu/asr")


	def extract_audio_from_video(video_path: str) -> str:
	"""
	Call the /extract_audio_ffmpeg endpoint of the remote VeuReu/asr Space.

	This function uploads a video file to the remote ASR service and extracts its audio track.

	Parameters
	----------
	video_path : str
	Path to the input video file from which audio will be extracted.

	Returns
	-------
	str
	Path or identifier of the extracted audio file returned by the remote service.
	"""
	result = asr_client.predict(
	video_file={"video": handle_file(video_path)},
	api_name="/extract_audio_ffmpeg"
	)
	return result


	def diarize_audio(audio_path: str) -> str:
	"""
	Call the /diaritzar_audio endpoint of the remote VeuReu/asr Space.

	This function performs speaker diarization, identifying segments of speech
	belonging to different speakers in the audio file.

	Parameters
	----------
	audio_path : str
	Path to the audio file to be diarized.

	Returns
	-------
	str
	JSON-like diarization output containing speaker segments and timings.
	"""
	result = asr_client.predict(
	wav_archivo=handle_file(audio_path),
	api_name="/diaritzar_audio"
	)
	return result


	def transcribe_long_audio(audio_path: str) -> str:
	"""
	Call the /transcribe_long_audio endpoint of the remote VeuReu/asr Space.

	Designed for long audio recordings, this function sends the audio to the ASR model
	optimized for processing extended durations.

	Parameters
	----------
	audio_path : str
	Path to the long audio file to be transcribed.

	Returns
	-------
	str
	Transcribed text returned by the remote ASR service.
	"""
	result = asr_client.predict(
	wav_path=handle_file(audio_path),
	api_name="/transcribe_long_audio"
	)
	return result


	def transcribe_short_audio(audio_path: str) -> str:
	"""
	Call the /transcribe_wav endpoint of the remote VeuReu/asr Space.

	This function is optimized for short-duration audio samples and produces fast transcriptions.

	Parameters
	----------
	audio_path : str
	Path to the short audio file to be transcribed.

	Returns
	-------
	str
	Transcribed text returned by the remote service.
	"""
	result = asr_client.predict(
	wav_path=handle_file(audio_path),
	api_name="/transcribe_wav"
	)
	return result


	def identificar_veu(clip_path: str, voice_col: List[Dict[str, Any]]):
	"""
	Call the /identificar_veu endpoint of the remote VeuReu/asr Space.

	This function attempts to identify which known speaker (from a provided
	collection of voice profiles) appears in the given audio clip.

	Parameters
	----------
	clip_path : str
	Path to the audio clip whose speaker is to be identified.
	voice_col : List[Dict[str, Any]]
	List of dictionaries containing metadata or embeddings for known voices.

	Returns
	-------
	Any
	Output returned by the remote speaker identification model.
	"""
	voice_col_str = json.dumps(voice_col)
	result = asr_client.predict(
	wav_archivo=handle_file(clip_path),
	voice_col=voice_col_str,
	api_name="/identificar_veu"
	)
	return result