Spaces:

rahul7star
/

Hunyuan-Avatar

Paused

rahul7star commited on Jun 13

Commit

e7b392b

verified ·

1 Parent(s): 477e759

Update hymm_sp/modules/models_audio.py

Files changed (1) hide show

hymm_sp/modules/models_audio.py CHANGED Viewed

@@ -7,7 +7,7 @@ import torch.nn.functional as F
 from diffusers.models import ModelMixin
 from diffusers.configuration_utils import ConfigMixin, register_to_config
-from flash_attn.flash_attn_interface import flash_attn_varlen_func
@@ -173,29 +173,30 @@ class DoubleStreamBlock(nn.Module):
                 x.view(x.shape[0] * x.shape[1], *x.shape[2:])
                 for x in [q, k, v]
             ]
-            attn = flash_attn_varlen_func(
-                q,
-                k,
-                v,
-                cu_seqlens_q,
-                cu_seqlens_kv,
-                max_seqlen_q,
-                max_seqlen_kv,
-            )
             attn = attn.view(img_k.shape[0], max_seqlen_q, -1).contiguous()
         else:
-                attn, _ = parallel_attention(
-                (img_q, txt_q),
-                (img_k, txt_k),
-                (img_v, txt_v),
-                img_q_len=img_q.shape[1],
-                img_kv_len=img_k.shape[1],
-                cu_seqlens_q=cu_seqlens_q,
-                cu_seqlens_kv=cu_seqlens_kv,
-                max_seqlen_q=max_seqlen_q,
-                max_seqlen_kv=max_seqlen_kv,
-            )
         img_attn, txt_attn = attn[:, :img.shape[1]], attn[:, img.shape[1]:]
         if CPU_OFFLOAD: torch.cuda.empty_cache()

 from diffusers.models import ModelMixin
 from diffusers.configuration_utils import ConfigMixin, register_to_config
+#from flash_attn.flash_attn_interface import flash_attn_varlen_func
                 x.view(x.shape[0] * x.shape[1], *x.shape[2:])
                 for x in [q, k, v]
             ]
+            attn = None
+            # attn = flash_attn_varlen_func(
+            #     q,
+            #     k,
+            #     v,
+            #     cu_seqlens_q,
+            #     cu_seqlens_kv,
+            #     max_seqlen_q,
+            #     max_seqlen_kv,
+            # )
             attn = attn.view(img_k.shape[0], max_seqlen_q, -1).contiguous()
         else:
+            #     attn, _ = parallel_attention(
+            #     (img_q, txt_q),
+            #     (img_k, txt_k),
+            #     (img_v, txt_v),
+            #     img_q_len=img_q.shape[1],
+            #     img_kv_len=img_k.shape[1],
+            #     cu_seqlens_q=cu_seqlens_q,
+            #     cu_seqlens_kv=cu_seqlens_kv,
+            #     max_seqlen_q=max_seqlen_q,
+            #     max_seqlen_kv=max_seqlen_kv,
+            # )
         img_attn, txt_attn = attn[:, :img.shape[1]], attn[:, img.shape[1]:]
         if CPU_OFFLOAD: torch.cuda.empty_cache()