hunyuanvideo-community
/

HunyuanVideo-1.5-Diffusers-480p_t2v_distilled

@@ -4,12 +4,19 @@ tags:
 - text-to-video
 ---
 ```py
 import torch
 dtype = torch.bfloat16
 device = "cuda:0"
-from diffusers import HunyuanVideo15Pipeline
 from diffusers.utils import export_to_video
 pipe = HunyuanVideo15Pipeline.from_pretrained("hunyuanvideo-community/HunyuanVideo-1.5-Diffusers-480p_t2v_distilled", torch_dtype=dtype)
@@ -17,27 +24,24 @@ pipe.enable_model_cpu_offload()
 pipe.vae.enable_tiling()
 generator = torch.Generator(device=device).manual_seed(seed)
-video = pipe(
-    prompt=prompt,
-    generator=generator,
-    num_frames=121,
-    num_inference_steps=50,
-).frames[0]
-export_to_video(video, "output.mp4", fps=24)
 ```
-Hunyuan1.5 use attention masks with variable-length sequences. For best performance, we recommend using an attention backend that handles padding efficiently.
-We recommend installing [kernels](https://github.com/huggingface/kernels) (`pip install kernels`) to access prebuilt attention kernels.
 ```py
 import torch
 dtype = torch.bfloat16
 device = "cuda:0"
-from diffusers import HunyuanVideo15Pipeline, attention_backend
 from diffusers.utils import export_to_video
 pipe = HunyuanVideo15Pipeline.from_pretrained("hunyuanvideo-community/HunyuanVideo-1.5-Diffusers-480p_t2v_distilled", torch_dtype=dtype)
@@ -45,12 +49,12 @@ pipe.enable_model_cpu_offload()
 pipe.vae.enable_tiling()
 generator = torch.Generator(device=device).manual_seed(seed)
-with attention_backend("_flash_3_hub"): # or `"flash_hub"` if you are not using H100/H800
-    video = pipe(
-        prompt=prompt,
-        generator=generator,
-        num_frames=121,
-        num_inference_steps=50,
-    ).frames[0]
-    export_to_video(video, "output.mp4", fps=24)
-```

 - text-to-video
 ---
+Hunyuan1.5 use attention masks with variable-length sequences. For best performance, we recommend using an attention backend that handles padding efficiently.
+We recommend installing [kernels](https://github.com/huggingface/kernels) (`pip install kernels`) to access prebuilt attention kernels.
+You can check our [documentation](https://huggingface.co/docs/diffusers/main/en/optimization/attention_backends) to learn more about all the different attention backends we support.
 ```py
 import torch
 dtype = torch.bfloat16
 device = "cuda:0"
+from diffusers import HunyuanVideo15Pipeline, attention_backend
 from diffusers.utils import export_to_video
 pipe = HunyuanVideo15Pipeline.from_pretrained("hunyuanvideo-community/HunyuanVideo-1.5-Diffusers-480p_t2v_distilled", torch_dtype=dtype)
 pipe.vae.enable_tiling()
 generator = torch.Generator(device=device).manual_seed(seed)
+with attention_backend("_flash_3_hub"): # or `"flash_hub"` if you are not using H100/H800
+    video = pipe(
+        prompt=prompt,
+        generator=generator,
+        num_frames=121,
+        num_inference_steps=50,
+    ).frames[0]
+    export_to_video(video, "output.mp4", fps=24)
 ```
+To use default attention backend
 ```py
 import torch
 dtype = torch.bfloat16
 device = "cuda:0"
+from diffusers import HunyuanVideo15Pipeline
 from diffusers.utils import export_to_video
 pipe = HunyuanVideo15Pipeline.from_pretrained("hunyuanvideo-community/HunyuanVideo-1.5-Diffusers-480p_t2v_distilled", torch_dtype=dtype)
 pipe.vae.enable_tiling()
 generator = torch.Generator(device=device).manual_seed(seed)
+video = pipe(
+    prompt=prompt,
+    generator=generator,
+    num_frames=121,
+    num_inference_steps=50,
+).frames[0]
+export_to_video(video, "output.mp4", fps=24)
+```