Spaces:

rizavelioglu
/

vae-comparison

Running on Zero

App Files Files Community

rizavelioglu commited on 3 days ago

Commit

331d5ce

1 Parent(s): 9175dc1

- remove remote-VAE support

Browse files

Files changed (1) hide show

app.py +16 -49

app.py CHANGED Viewed

@@ -1,8 +1,7 @@
 import spaces
 import gradio as gr
 import torch
-from diffusers import AutoencoderKL, AutoencoderDC
-from diffusers.utils.remote_utils import remote_decode
 import torchvision.transforms.v2 as transforms
 from torchvision.io import read_image
 from typing import Dict
@@ -42,18 +41,9 @@ class VAETester:
         self.output_transform = transforms.Normalize(mean=[-1], std=[2])
         self.vae_models = self._load_all_vaes()
-    def _get_endpoint(self, base_name: str) -> str:
-        """Helper method to get the endpoint for a given base model name"""
-        endpoints = {
-            "sd-vae-ft-mse": "https://q1bj3bpq6kzilnsu.us-east-1.aws.endpoints.huggingface.cloud",
-            "sdxl-vae": "https://x2dmsqunjd6k9prw.us-east-1.aws.endpoints.huggingface.cloud",
-            "FLUX.1": "https://whhx50ex1aryqvw6.us-east-1.aws.endpoints.huggingface.cloud",
-        }
-        return endpoints[base_name]
     def _load_all_vaes(self) -> Dict[str, Dict]:
-        """Load configurations for local and remote VAE models"""
-        local_vaes = {
             "stable-diffusion-v1-4": AutoencoderKL.from_pretrained("CompVis/stable-diffusion-v1-4", subfolder="vae").to(self.device),
             "eq-vae-ema": AutoencoderKL.from_pretrained("zelaki/eq-vae-ema").to(self.device),
             "eq-sdxl-vae": AutoencoderKL.from_pretrained("KBlueLeaf/EQ-SDXL-VAE").to(self.device),
@@ -66,6 +56,7 @@ class VAETester:
 			# "dc-ae-f32c32-sana-1.0": AutoencoderDC.from_pretrained("mit-han-lab/dc-ae-f32c32-sana-1.0-diffusers").to(self.device),
             "FLUX.1-Kontext": AutoencoderKL.from_pretrained("black-forest-labs/FLUX.1-Kontext-dev", subfolder="vae").to(self.device),
             "FLUX.2": AutoencoderKL.from_pretrained("black-forest-labs/FLUX.2-dev", subfolder="vae").to(self.device),
         }
         # Define the desired order of models
         order = [
@@ -73,66 +64,42 @@ class VAETester:
             "eq-vae-ema",
             "eq-sdxl-vae",
             "sd-vae-ft-mse",
-            #"sd-vae-ft-mse (remote)",
             "sdxl-vae",
-            #"sdxl-vae (remote)",
             "playground-v2.5",
             "stable-diffusion-3-medium",
             "FLUX.1",
-            #"FLUX.1 (remote)",
             "CogView4-6B",
 			# "dc-ae-f32c32-sana-1.0",
             "FLUX.1-Kontext",
             "FLUX.2",
         ]
         # Construct the vae_models dictionary in the specified order
-        vae_models = {}
-        for name in order:
-            if "(remote)" not in name:
-                # Local model
-                vae_models[name] = {"type": "local", "vae": local_vaes[name]}
-            else:
-                # Remote model
-                base_name = name.replace(" (remote)", "")
-                vae_models[name] = {
-                    "type": "remote",
-                    "local_vae_key": base_name,
-                    "endpoint": self._get_endpoint(base_name),
-                }
-        return vae_models
     def process_image(self, img: torch.Tensor, model_config: Dict, tolerance: float):
-        """Process image through a single VAE (local or remote)"""
-        img_transformed = self.input_transform(img).to(self.device).unsqueeze(0)
         original_base = self.base_transform(img).cpu()
         # Start timer
         start_time = time.time()
-        if model_config["type"] == "local":
-            vae = model_config["vae"]
-            with torch.no_grad():
                 encoded = vae.encode(img_transformed).latent_dist.sample()
                 decoded = vae.decode(encoded).sample
-        elif model_config["type"] == "remote":
-            local_vae = self.vae_models[model_config["local_vae_key"]]["vae"]
-            with torch.no_grad():
-                encoded = local_vae.encode(img_transformed).latent_dist.sample()
-            decoded = remote_decode(
-                endpoint=model_config["endpoint"],
-                tensor=encoded,
-                do_scaling=False,
-                output_type="pt",
-                return_type="pt",
-                partial_postprocess=False,
-            )
         # End timer
         processing_time = time.time() - start_time
-        decoded_transformed = self.output_transform(decoded.squeeze(0)).cpu()
         reconstructed = decoded_transformed.clip(0, 1)
         diff = (original_base - reconstructed).abs()
         bw_diff = (diff > tolerance).any(dim=0).float()

 import spaces
 import gradio as gr
 import torch
+from diffusers import AutoencoderKL, AutoencoderDC, AutoModel
 import torchvision.transforms.v2 as transforms
 from torchvision.io import read_image
 from typing import Dict
         self.output_transform = transforms.Normalize(mean=[-1], std=[2])
         self.vae_models = self._load_all_vaes()
     def _load_all_vaes(self) -> Dict[str, Dict]:
+        """Load configurations for all VAE models"""
+        vaes = {
             "stable-diffusion-v1-4": AutoencoderKL.from_pretrained("CompVis/stable-diffusion-v1-4", subfolder="vae").to(self.device),
             "eq-vae-ema": AutoencoderKL.from_pretrained("zelaki/eq-vae-ema").to(self.device),
             "eq-sdxl-vae": AutoencoderKL.from_pretrained("KBlueLeaf/EQ-SDXL-VAE").to(self.device),
 			# "dc-ae-f32c32-sana-1.0": AutoencoderDC.from_pretrained("mit-han-lab/dc-ae-f32c32-sana-1.0-diffusers").to(self.device),
             "FLUX.1-Kontext": AutoencoderKL.from_pretrained("black-forest-labs/FLUX.1-Kontext-dev", subfolder="vae").to(self.device),
             "FLUX.2": AutoencoderKL.from_pretrained("black-forest-labs/FLUX.2-dev", subfolder="vae").to(self.device),
+            "FLUX.2-TinyAutoEncoder": AutoModel.from_pretrained("fal/FLUX.2-Tiny-AutoEncoder", trust_remote_code=True, torch_dtype=torch.bfloat16).to(self.device),
         }
         # Define the desired order of models
         order = [
             "eq-vae-ema",
             "eq-sdxl-vae",
             "sd-vae-ft-mse",
             "sdxl-vae",
             "playground-v2.5",
             "stable-diffusion-3-medium",
             "FLUX.1",
             "CogView4-6B",
 			# "dc-ae-f32c32-sana-1.0",
             "FLUX.1-Kontext",
             "FLUX.2",
+            "FLUX.2-TinyAutoEncoder",
         ]
         # Construct the vae_models dictionary in the specified order
+        return {name: {"vae": vaes[name], "dtype": torch.bfloat16 if name == "FLUX.2-TinyAutoEncoder" else torch.float32} for name in order}
     def process_image(self, img: torch.Tensor, model_config: Dict, tolerance: float):
+        """Process image through a single VAE model"""
+        dtype = model_config["dtype"]
+        img_transformed = self.input_transform(img).to(dtype).to(self.device).unsqueeze(0)
         original_base = self.base_transform(img).cpu()
         # Start timer
         start_time = time.time()
+        vae = model_config["vae"]
+        with torch.no_grad():
+            if isinstance(vae, AutoModel):
+                encoded = vae.encode(img_transformed, return_dict=False)
+                decoded = vae.decode(encoded, return_dict=False)
+            else:
                 encoded = vae.encode(img_transformed).latent_dist.sample()
                 decoded = vae.decode(encoded).sample
         # End timer
         processing_time = time.time() - start_time
+        decoded_transformed = self.output_transform(decoded.squeeze(0).to(torch.float32)).cpu()
         reconstructed = decoded_transformed.clip(0, 1)
         diff = (original_base - reconstructed).abs()
         bw_diff = (diff > tolerance).any(dim=0).float()