--- a/depth_anything_3/app/modules/model_inference.py +++ b/depth_anything_3/app/modules/model_inference.py @@ -31,47 +31,67 @@ from depth_anything_3.utils.export.glb import export_to_glb from depth_anything_3.utils.export.gs import export_to_gs_video +# Global cache for model (used in GPU subprocess) +# This is safe because @spaces.GPU runs in isolated subprocess +_MODEL_CACHE = None + + class ModelInference: """ Handles model inference and data processing for Depth Anything 3. """ def __init__(self): - """Initialize the model inference handler.""" - self.model = None - - def initialize_model(self, device: str = "cuda") -> None: + """Initialize the model inference handler. + + Note: Do NOT store model in instance variable to avoid + state sharing issues with @spaces.GPU decorator. + """ + pass # No instance variables + + def initialize_model(self, device: str = "cuda"): """ Initialize the DepthAnything3 model. + + Uses global cache to store model safely in GPU subprocess. + This avoids CUDA initialization in main process. Args: device: Device to load the model on + + Returns: + Model instance """ - if self.model is None: + global _MODEL_CACHE + + if _MODEL_CACHE is None: # Get model directory from environment variable or use default model_dir = os.environ.get( "DA3_MODEL_DIR", "/dev/shm/da3_models/DA3HF-VITG-METRIC_VITL" ) - self.model = DepthAnything3.from_pretrained(model_dir) - self.model = self.model.to(device) + print(f"Loading model from {model_dir}...") + _MODEL_CACHE = DepthAnything3.from_pretrained(model_dir) + _MODEL_CACHE = _MODEL_CACHE.to(device) + _MODEL_CACHE.eval() + print("Model loaded and moved to GPU") else: - self.model = self.model.to(device) - - self.model.eval() + print("Using cached model") + # Ensure model is on correct device + _MODEL_CACHE = _MODEL_CACHE.to(device) + + return _MODEL_CACHE def run_inference( self, ... # Initialize model if needed - self.initialize_model(device) + model = self.initialize_model(device) ... # Run model inference print(f"Running inference with method: {actual_method}") with torch.no_grad(): - prediction = self.model.inference( + prediction = model.inference( image_paths, export_dir=None, process_res_method=actual_method, infer_gs=infer_gs ) @@ -192,6 +212,10 @@ class ModelInference: # Process results processed_data = self._process_results(target_dir, prediction, image_paths) + # CRITICAL: Move all CUDA tensors to CPU before returning + # This prevents CUDA initialization in main process during unpickling + prediction = self._move_prediction_to_cpu(prediction) + # Clean up torch.cuda.empty_cache() @@ -282,6 +306,45 @@ class ModelInference: return processed_data + def _move_prediction_to_cpu(self, prediction: Any) -> Any: + """ + Move all CUDA tensors in prediction to CPU for safe pickling. + + This is REQUIRED for HF Spaces with @spaces.GPU decorator to avoid + CUDA initialization in the main process during unpickling. + + Args: + prediction: Prediction object that may contain CUDA tensors + + Returns: + Prediction object with all tensors moved to CPU + """ + # Move gaussians tensors to CPU + if hasattr(prediction, 'gaussians') and prediction.gaussians is not None: + gaussians = prediction.gaussians + + # Move each tensor attribute to CPU + tensor_attrs = ['means', 'scales', 'rotations', 'harmonics', 'opacities'] + for attr in tensor_attrs: + if hasattr(gaussians, attr): + tensor = getattr(gaussians, attr) + if isinstance(tensor, torch.Tensor) and tensor.is_cuda: + setattr(gaussians, attr, tensor.cpu()) + print(f"Moved gaussians.{attr} to CPU") + + # Move any tensors in aux dict to CPU + if hasattr(prediction, 'aux') and prediction.aux is not None: + for key, value in list(prediction.aux.items()): + if isinstance(value, torch.Tensor) and value.is_cuda: + prediction.aux[key] = value.cpu() + print(f"Moved aux['{key}'] to CPU") + elif isinstance(value, dict): + # Recursively handle nested dicts + for k, v in list(value.items()): + if isinstance(v, torch.Tensor) and v.is_cuda: + value[k] = v.cpu() + print(f"Moved aux['{key}']['{k}'] to CPU") + + return prediction + def cleanup(self) -> None: """Clean up GPU memory."""