YiYiXu commited on
Commit
1abb14f
·
verified ·
1 Parent(s): dc5f3d7

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +27 -23
README.md CHANGED
@@ -4,12 +4,19 @@ tags:
4
  - text-to-video
5
  ---
6
 
 
 
 
 
 
 
 
7
  ```py
8
  import torch
9
 
10
  dtype = torch.bfloat16
11
  device = "cuda:0"
12
- from diffusers import HunyuanVideo15Pipeline
13
  from diffusers.utils import export_to_video
14
 
15
  pipe = HunyuanVideo15Pipeline.from_pretrained("hunyuanvideo-community/HunyuanVideo-1.5-Diffusers-480p_t2v_distilled", torch_dtype=dtype)
@@ -17,27 +24,24 @@ pipe.enable_model_cpu_offload()
17
  pipe.vae.enable_tiling()
18
 
19
  generator = torch.Generator(device=device).manual_seed(seed)
20
-
21
- video = pipe(
22
- prompt=prompt,
23
- generator=generator,
24
- num_frames=121,
25
- num_inference_steps=50,
26
- ).frames[0]
27
- export_to_video(video, "output.mp4", fps=24)
28
  ```
29
 
30
- Hunyuan1.5 use attention masks with variable-length sequences. For best performance, we recommend using an attention backend that handles padding efficiently.
31
-
32
- We recommend installing [kernels](https://github.com/huggingface/kernels) (`pip install kernels`) to access prebuilt attention kernels.
33
-
34
 
35
  ```py
36
  import torch
37
 
38
  dtype = torch.bfloat16
39
  device = "cuda:0"
40
- from diffusers import HunyuanVideo15Pipeline, attention_backend
41
  from diffusers.utils import export_to_video
42
 
43
  pipe = HunyuanVideo15Pipeline.from_pretrained("hunyuanvideo-community/HunyuanVideo-1.5-Diffusers-480p_t2v_distilled", torch_dtype=dtype)
@@ -45,12 +49,12 @@ pipe.enable_model_cpu_offload()
45
  pipe.vae.enable_tiling()
46
 
47
  generator = torch.Generator(device=device).manual_seed(seed)
48
- with attention_backend("_flash_3_hub"): # or `"flash_hub"` if you are not using H100/H800
49
- video = pipe(
50
- prompt=prompt,
51
- generator=generator,
52
- num_frames=121,
53
- num_inference_steps=50,
54
- ).frames[0]
55
- export_to_video(video, "output.mp4", fps=24)
56
- ```
 
4
  - text-to-video
5
  ---
6
 
7
+ Hunyuan1.5 use attention masks with variable-length sequences. For best performance, we recommend using an attention backend that handles padding efficiently.
8
+
9
+ We recommend installing [kernels](https://github.com/huggingface/kernels) (`pip install kernels`) to access prebuilt attention kernels.
10
+
11
+ You can check our [documentation](https://huggingface.co/docs/diffusers/main/en/optimization/attention_backends) to learn more about all the different attention backends we support.
12
+
13
+
14
  ```py
15
  import torch
16
 
17
  dtype = torch.bfloat16
18
  device = "cuda:0"
19
+ from diffusers import HunyuanVideo15Pipeline, attention_backend
20
  from diffusers.utils import export_to_video
21
 
22
  pipe = HunyuanVideo15Pipeline.from_pretrained("hunyuanvideo-community/HunyuanVideo-1.5-Diffusers-480p_t2v_distilled", torch_dtype=dtype)
 
24
  pipe.vae.enable_tiling()
25
 
26
  generator = torch.Generator(device=device).manual_seed(seed)
27
+ with attention_backend("_flash_3_hub"): # or `"flash_hub"` if you are not using H100/H800
28
+ video = pipe(
29
+ prompt=prompt,
30
+ generator=generator,
31
+ num_frames=121,
32
+ num_inference_steps=50,
33
+ ).frames[0]
34
+ export_to_video(video, "output.mp4", fps=24)
35
  ```
36
 
37
+ To use default attention backend
 
 
 
38
 
39
  ```py
40
  import torch
41
 
42
  dtype = torch.bfloat16
43
  device = "cuda:0"
44
+ from diffusers import HunyuanVideo15Pipeline
45
  from diffusers.utils import export_to_video
46
 
47
  pipe = HunyuanVideo15Pipeline.from_pretrained("hunyuanvideo-community/HunyuanVideo-1.5-Diffusers-480p_t2v_distilled", torch_dtype=dtype)
 
49
  pipe.vae.enable_tiling()
50
 
51
  generator = torch.Generator(device=device).manual_seed(seed)
52
+
53
+ video = pipe(
54
+ prompt=prompt,
55
+ generator=generator,
56
+ num_frames=121,
57
+ num_inference_steps=50,
58
+ ).frames[0]
59
+ export_to_video(video, "output.mp4", fps=24)
60
+ ```