|
|
import os |
|
|
os.environ["CUDA_VISIBLE_DEVICES"] = "1" |
|
|
|
|
|
from gradio_client import Client, handle_file |
|
|
from typing import Any, Dict, List, Optional, Tuple, Union |
|
|
import json |
|
|
|
|
|
|
|
|
svision_client = Client("VeuReu/svision") |
|
|
|
|
|
|
|
|
def extract_scenes(video_path: str, threshold: float = 30.0, offset_frames: int = 5, crop_ratio: float = 0.1): |
|
|
""" |
|
|
Call the /scenes_extraction endpoint of the remote Space VeuReu/svision. |
|
|
|
|
|
Parameters |
|
|
---------- |
|
|
video_path : str |
|
|
Path to the input video file. |
|
|
threshold : float, optional |
|
|
Scene change detection threshold; higher values make detection less sensitive. |
|
|
offset_frames : int, optional |
|
|
Number of frames to include before and after a detected scene boundary. |
|
|
crop_ratio : float, optional |
|
|
Ratio for cropping borders before performing scene detection. |
|
|
|
|
|
Returns |
|
|
------- |
|
|
Any |
|
|
Response returned by the remote /scenes_extraction endpoint. |
|
|
""" |
|
|
result = svision_client.predict( |
|
|
video_file={"video": handle_file(video_path)}, |
|
|
threshold=threshold, |
|
|
offset_frames=offset_frames, |
|
|
crop_ratio=crop_ratio, |
|
|
api_name="/scenes_extraction" |
|
|
) |
|
|
return result |
|
|
|
|
|
|
|
|
def keyframes_every_second_extraction(video_path: str): |
|
|
""" |
|
|
Call the /keyframes_every_second_extraction endpoint of the remote Space VeuReu/svision. |
|
|
|
|
|
Parameters |
|
|
---------- |
|
|
video_path : str |
|
|
Path to the input video file. |
|
|
|
|
|
Returns |
|
|
------- |
|
|
Any |
|
|
Response returned by the remote /keyframes_every_second_extraction endpoint. |
|
|
""" |
|
|
result = svision_client.predict( |
|
|
video_path={"video": handle_file(video_path)}, |
|
|
api_name="/keyframes_every_second_extraction" |
|
|
) |
|
|
return result |
|
|
|
|
|
|
|
|
def add_ocr_and_faces(imagen_path: str, informacion_image: Dict[str, Any], face_col: List[Dict[str, Any]]) -> Dict[str, Any]: |
|
|
""" |
|
|
Call the /add_ocr_and_faces endpoint of the remote Space VeuReu/svision. |
|
|
|
|
|
This function sends an image together with metadata and face collection data |
|
|
to perform OCR, face detection, and annotation enhancement. |
|
|
|
|
|
Parameters |
|
|
---------- |
|
|
imagen_path : str |
|
|
Path to the input image file. |
|
|
informacion_image : Dict[str, Any] |
|
|
Dictionary containing image-related metadata. |
|
|
face_col : List[Dict[str, Any]] |
|
|
List of dictionaries representing detected faces or face metadata. |
|
|
|
|
|
Returns |
|
|
------- |
|
|
Dict[str, Any] |
|
|
Processed output containing OCR results, face detection data, and annotations. |
|
|
""" |
|
|
informacion_image_str = json.dumps(informacion_image) |
|
|
face_col_str = json.dumps(face_col) |
|
|
result = svision_client.predict( |
|
|
image=handle_file(imagen_path), |
|
|
informacion_image=informacion_image_str, |
|
|
face_col=face_col_str, |
|
|
api_name="/add_ocr_and_faces" |
|
|
) |
|
|
return result |
|
|
|
|
|
|
|
|
def extract_descripcion_escena(imagen_path: str) -> str: |
|
|
""" |
|
|
Call the /describe_images endpoint of the remote Space VeuReu/svision. |
|
|
|
|
|
This function sends an image to receive a textual description of its visual content. |
|
|
|
|
|
Parameters |
|
|
---------- |
|
|
imagen_path : str |
|
|
Path to the input image file. |
|
|
|
|
|
Returns |
|
|
------- |
|
|
str |
|
|
Description generated for the given image. |
|
|
""" |
|
|
print("Calling svision to describe the scene...") |
|
|
result = svision_client.predict( |
|
|
images=[{"image": handle_file(imagen_path)}], |
|
|
api_name="/describe_images" |
|
|
) |
|
|
return result |
|
|
|