Spaces:

VeuReu
/

engine

Running

App Files Files Community

VeuReu commited on 14 days ago

Commit

391b4d9

verified ·

1 Parent(s): b5d35bf

Update svision_client.py

Browse files

Files changed (1) hide show

svision_client.py +249 -249

svision_client.py CHANGED Viewed

@@ -1,249 +1,249 @@
-import os
-os.environ["CUDA_VISIBLE_DEVICES"] = "1"
-from gradio_client import Client, handle_file
-from typing import Any, Dict, List, Optional, Tuple, Union
-import requests
-import json
-# Lazy initialization to avoid crash if Space is down at import time
-_svision_client = None
-def _get_svision_client():
-    """Get or create the svision client (lazy initialization)."""
-    global _svision_client
-    if _svision_client is None:
-        _svision_client = Client("VeuReu/svision")
-    return _svision_client
-def extract_scenes(video_path: str, threshold: float = 30.0, offset_frames: int = 5, crop_ratio: float = 0.1):
-    """
-    Call the /scenes_extraction endpoint of the remote Space VeuReu/svision.
-    Parameters
-    ----------
-    video_path : str
-        Path to the input video file.
-    threshold : float, optional
-        Scene change detection threshold; higher values make detection less sensitive.
-    offset_frames : int, optional
-        Number of frames to include before and after a detected scene boundary.
-    crop_ratio : float, optional
-        Ratio for cropping borders before performing scene detection.
-    Returns
-    -------
-    Any
-        Response returned by the remote /scenes_extraction endpoint.
-    """
-    result = _get_svision_client().predict(
-        video_file={"video": handle_file(video_path)},
-        threshold=threshold,
-        offset_frames=offset_frames,
-        crop_ratio=crop_ratio,
-        api_name="/scenes_extraction"
-    )
-    return result
-def keyframes_every_second_extraction(video_path: str):
-    """
-    Call the /keyframes_every_second_extraction endpoint of the remote Space VeuReu/svision.
-    Parameters
-    ----------
-    video_path : str
-        Path to the input video file.
-    Returns
-    -------
-    Any
-        Response returned by the remote /keyframes_every_second_extraction endpoint.
-    """
-    result = _get_svision_client().predict(
-        video_path={"video": handle_file(video_path)},
-        api_name="/keyframes_every_second_extraction"
-    )
-    return result
-def add_ocr_and_faces(imagen_path: str, informacion_image: Dict[str, Any], face_col: List[Dict[str, Any]]) -> Dict[str, Any]:
-    """
-    Call the /add_ocr_and_faces endpoint of the remote Space VeuReu/svision.
-    This function sends an image together with metadata and face collection data
-    to perform OCR, face detection, and annotation enhancement.
-    Parameters
-    ----------
-    imagen_path : str
-        Path to the input image file.
-    informacion_image : Dict[str, Any]
-        Dictionary containing image-related metadata.
-    face_col : List[Dict[str, Any]]
-        List of dictionaries representing detected faces or face metadata.
-    Returns
-    -------
-    Dict[str, Any]
-        Processed output containing OCR results, face detection data, and annotations.
-    """
-    informacion_image_str = json.dumps(informacion_image)
-    face_col_str = json.dumps(face_col)
-    result = _get_svision_client().predict(
-            image=handle_file(imagen_path),
-            informacion_image=informacion_image_str,
-            face_col=face_col_str,
-            api_name="/add_ocr_and_faces"
-    )
-    return result
-def extract_descripcion_escena(imagen_path: str) -> str:
-    """
-    Call the /describe_images endpoint of the remote Space VeuReu/svision.
-    This function sends an image to receive a textual description of its visual content.
-    Parameters
-    ----------
-    imagen_path : str
-        Path to the input image file.
-    Returns
-    -------
-    str
-        Description generated for the given image.
-    """
-    result = _get_svision_client().predict(
-        images=[{"image": handle_file(imagen_path)}],
-        api_name="/describe_images"
-    )
-    return result
-def _extract_path_from_gradio_file(file_obj) -> Optional[str]:
-    """Extract file path from Gradio file object (can be dict, str, tuple, or other).
-    Gradio Gallery returns different formats depending on version:
-    - List of tuples: [(path, caption), ...]
-    - List of dicts: [{"name": path, "data": None, "is_file": True}, ...]
-    - List of FileData: [FileData(path=..., url=...), ...]
-    - List of paths: [path, ...]
-    """
-    if file_obj is None:
-        return None
-    # Handle tuple format: (path, caption)
-    if isinstance(file_obj, tuple) and len(file_obj) >= 1:
-        return _extract_path_from_gradio_file(file_obj[0])
-    # Handle string path/URL
-    if isinstance(file_obj, str):
-        return file_obj
-    # Handle dict format: {"path": "...", "url": "...", "name": "..."}
-    if isinstance(file_obj, dict):
-        return file_obj.get("path") or file_obj.get("url") or file_obj.get("name") or file_obj.get("image")
-    # Handle FileData or similar object with attributes
-    if hasattr(file_obj, "path") and file_obj.path:
-        return file_obj.path
-    if hasattr(file_obj, "url") and file_obj.url:
-        return file_obj.url
-    if hasattr(file_obj, "name") and file_obj.name:
-        return file_obj.name
-    # Last resort: convert to string
-    return str(file_obj) if file_obj else None
-def get_face_embeddings_from_image(image_path: str) -> List[Dict[str, Any]]:
-    """
-    Call the /face_image_embedding_casting endpoint to detect faces and get embeddings.
-    This replaces local DeepFace/face_recognition processing by delegating to svision Space.
-    Parameters
-    ----------
-    image_path : str
-        Path to the input image file (a video frame).
-    Returns
-    -------
-    List[Dict[str, Any]]
-        List of dicts with 'embedding' (list of floats) and 'face_crop_path' (image path string).
-        Returns empty list if no faces detected or on error.
-    """
-    try:
-        # Returns: (face_crops: list of images/dicts, face_embeddings: list of dicts)
-        result = _get_svision_client().predict(
-            image=handle_file(image_path),
-            api_name="/face_image_embedding_casting"
-        )
-        print(f"[svision_client] Raw result type: {type(result)}, len: {len(result) if result else 0}")
-        # result is a tuple: (list of image paths/dicts, list of embedding dicts)
-        if result and len(result) >= 2:
-            face_crops_raw = result[0] if result[0] else []
-            face_embeddings = result[1] if result[1] else []
-            print(f"[svision_client] face_crops_raw type: {type(face_crops_raw)}, len: {len(face_crops_raw) if isinstance(face_crops_raw, list) else 'N/A'}")
-            if face_crops_raw and len(face_crops_raw) > 0:
-                print(f"[svision_client] First crop type: {type(face_crops_raw[0])}, value: {str(face_crops_raw[0])[:200]}")
-            # Combine into unified structure, extracting paths correctly
-            faces = []
-            for i, emb_dict in enumerate(face_embeddings):
-                # Extract path from Gradio file object (might be dict or string)
-                crop_path = None
-                if i < len(face_crops_raw):
-                    raw_crop = face_crops_raw[i]
-                    crop_path = _extract_path_from_gradio_file(raw_crop)
-                    if not crop_path:
-                        print(f"[svision_client] Could not extract path from crop {i}: {type(raw_crop)} = {str(raw_crop)[:100]}")
-                embedding = emb_dict.get("embedding", []) if isinstance(emb_dict, dict) else []
-                faces.append({
-                    "embedding": embedding,
-                    "face_crop_path": crop_path,
-                    "index": emb_dict.get("index", i) if isinstance(emb_dict, dict) else i,
-                })
-            print(f"[svision_client] Detected {len(faces)} faces from image")
-            return faces
-        return []
-    except Exception as e:
-        print(f"[svision_client] get_face_embeddings_from_image error: {e}")
-        import traceback
-        traceback.print_exc()
-        return []
-def get_face_embeddings_simple(image_path: str) -> List[List[float]]:
-    """
-    Call the /face_image_embedding endpoint to get face embeddings only.
-    Parameters
-    ----------
-    image_path : str
-        Path to the input image file.
-    Returns
-    -------
-    List[List[float]]
-        List of embedding vectors (one per detected face).
-    """
-    try:
-        result = _get_svision_client().predict(
-            image=handle_file(image_path),
-            api_name="/face_image_embedding"
-        )
-        return result if result else []
-    except Exception as e:
-        print(f"[svision_client] get_face_embeddings_simple error: {e}")
-        return []

+import os
+os.environ["CUDA_VISIBLE_DEVICES"] = "1"
+from gradio_client import Client, handle_file
+from typing import Any, Dict, List, Optional, Tuple, Union
+import requests
+import json
+# Lazy initialization to avoid crash if Space is down at import time
+_svision_client = None
+def _get_svision_client():
+    """Get or create the svision client (lazy initialization)."""
+    global _svision_client
+    if _svision_client is None:
+        _svision_client = Client("VeuReu/svision")
+    return _svision_client
+def extract_scenes(video_path: str, threshold: float = 240, offset_frames: int = 5, crop_ratio: float = 0.1):
+    """
+    Call the /scenes_extraction endpoint of the remote Space VeuReu/svision.
+    Parameters
+    ----------
+    video_path : str
+        Path to the input video file.
+    threshold : float, optional
+        Scene change detection threshold; higher values make detection less sensitive.
+    offset_frames : int, optional
+        Number of frames to include before and after a detected scene boundary.
+    crop_ratio : float, optional
+        Ratio for cropping borders before performing scene detection.
+    Returns
+    -------
+    Any
+        Response returned by the remote /scenes_extraction endpoint.
+    """
+    result = _get_svision_client().predict(
+        video_file={"video": handle_file(video_path)},
+        threshold=threshold,
+        offset_frames=offset_frames,
+        crop_ratio=crop_ratio,
+        api_name="/scenes_extraction"
+    )
+    return result
+def keyframes_every_second_extraction(video_path: str):
+    """
+    Call the /keyframes_every_second_extraction endpoint of the remote Space VeuReu/svision.
+    Parameters
+    ----------
+    video_path : str
+        Path to the input video file.
+    Returns
+    -------
+    Any
+        Response returned by the remote /keyframes_every_second_extraction endpoint.
+    """
+    result = _get_svision_client().predict(
+        video_path={"video": handle_file(video_path)},
+        api_name="/keyframes_every_second_extraction"
+    )
+    return result
+def add_ocr_and_faces(imagen_path: str, informacion_image: Dict[str, Any], face_col: List[Dict[str, Any]]) -> Dict[str, Any]:
+    """
+    Call the /add_ocr_and_faces endpoint of the remote Space VeuReu/svision.
+    This function sends an image together with metadata and face collection data
+    to perform OCR, face detection, and annotation enhancement.
+    Parameters
+    ----------
+    imagen_path : str
+        Path to the input image file.
+    informacion_image : Dict[str, Any]
+        Dictionary containing image-related metadata.
+    face_col : List[Dict[str, Any]]
+        List of dictionaries representing detected faces or face metadata.
+    Returns
+    -------
+    Dict[str, Any]
+        Processed output containing OCR results, face detection data, and annotations.
+    """
+    informacion_image_str = json.dumps(informacion_image)
+    face_col_str = json.dumps(face_col)
+    result = _get_svision_client().predict(
+            image=handle_file(imagen_path),
+            informacion_image=informacion_image_str,
+            face_col=face_col_str,
+            api_name="/add_ocr_and_faces"
+    )
+    return result
+def extract_descripcion_escena(imagen_path: str) -> str:
+    """
+    Call the /describe_images endpoint of the remote Space VeuReu/svision.
+    This function sends an image to receive a textual description of its visual content.
+    Parameters
+    ----------
+    imagen_path : str
+        Path to the input image file.
+    Returns
+    -------
+    str
+        Description generated for the given image.
+    """
+    result = _get_svision_client().predict(
+        images=[{"image": handle_file(imagen_path)}],
+        api_name="/describe_images"
+    )
+    return result
+def _extract_path_from_gradio_file(file_obj) -> Optional[str]:
+    """Extract file path from Gradio file object (can be dict, str, tuple, or other).
+    Gradio Gallery returns different formats depending on version:
+    - List of tuples: [(path, caption), ...]
+    - List of dicts: [{"name": path, "data": None, "is_file": True}, ...]
+    - List of FileData: [FileData(path=..., url=...), ...]
+    - List of paths: [path, ...]
+    """
+    if file_obj is None:
+        return None
+    # Handle tuple format: (path, caption)
+    if isinstance(file_obj, tuple) and len(file_obj) >= 1:
+        return _extract_path_from_gradio_file(file_obj[0])
+    # Handle string path/URL
+    if isinstance(file_obj, str):
+        return file_obj
+    # Handle dict format: {"path": "...", "url": "...", "name": "..."}
+    if isinstance(file_obj, dict):
+        return file_obj.get("path") or file_obj.get("url") or file_obj.get("name") or file_obj.get("image")
+    # Handle FileData or similar object with attributes
+    if hasattr(file_obj, "path") and file_obj.path:
+        return file_obj.path
+    if hasattr(file_obj, "url") and file_obj.url:
+        return file_obj.url
+    if hasattr(file_obj, "name") and file_obj.name:
+        return file_obj.name
+    # Last resort: convert to string
+    return str(file_obj) if file_obj else None
+def get_face_embeddings_from_image(image_path: str) -> List[Dict[str, Any]]:
+    """
+    Call the /face_image_embedding_casting endpoint to detect faces and get embeddings.
+    This replaces local DeepFace/face_recognition processing by delegating to svision Space.
+    Parameters
+    ----------
+    image_path : str
+        Path to the input image file (a video frame).
+    Returns
+    -------
+    List[Dict[str, Any]]
+        List of dicts with 'embedding' (list of floats) and 'face_crop_path' (image path string).
+        Returns empty list if no faces detected or on error.
+    """
+    try:
+        # Returns: (face_crops: list of images/dicts, face_embeddings: list of dicts)
+        result = _get_svision_client().predict(
+            image=handle_file(image_path),
+            api_name="/face_image_embedding_casting"
+        )
+        print(f"[svision_client] Raw result type: {type(result)}, len: {len(result) if result else 0}")
+        # result is a tuple: (list of image paths/dicts, list of embedding dicts)
+        if result and len(result) >= 2:
+            face_crops_raw = result[0] if result[0] else []
+            face_embeddings = result[1] if result[1] else []
+            print(f"[svision_client] face_crops_raw type: {type(face_crops_raw)}, len: {len(face_crops_raw) if isinstance(face_crops_raw, list) else 'N/A'}")
+            if face_crops_raw and len(face_crops_raw) > 0:
+                print(f"[svision_client] First crop type: {type(face_crops_raw[0])}, value: {str(face_crops_raw[0])[:200]}")
+            # Combine into unified structure, extracting paths correctly
+            faces = []
+            for i, emb_dict in enumerate(face_embeddings):
+                # Extract path from Gradio file object (might be dict or string)
+                crop_path = None
+                if i < len(face_crops_raw):
+                    raw_crop = face_crops_raw[i]
+                    crop_path = _extract_path_from_gradio_file(raw_crop)
+                    if not crop_path:
+                        print(f"[svision_client] Could not extract path from crop {i}: {type(raw_crop)} = {str(raw_crop)[:100]}")
+                embedding = emb_dict.get("embedding", []) if isinstance(emb_dict, dict) else []
+                faces.append({
+                    "embedding": embedding,
+                    "face_crop_path": crop_path,
+                    "index": emb_dict.get("index", i) if isinstance(emb_dict, dict) else i,
+                })
+            print(f"[svision_client] Detected {len(faces)} faces from image")
+            return faces
+        return []
+    except Exception as e:
+        print(f"[svision_client] get_face_embeddings_from_image error: {e}")
+        import traceback
+        traceback.print_exc()
+        return []
+def get_face_embeddings_simple(image_path: str) -> List[List[float]]:
+    """
+    Call the /face_image_embedding endpoint to get face embeddings only.
+    Parameters
+    ----------
+    image_path : str
+        Path to the input image file.
+    Returns
+    -------
+    List[List[float]]
+        List of embedding vectors (one per detected face).
+    """
+    try:
+        result = _get_svision_client().predict(
+            image=handle_file(image_path),
+            api_name="/face_image_embedding"
+        )
+        return result if result else []
+    except Exception as e:
+        print(f"[svision_client] get_face_embeddings_simple error: {e}")
+        return []