Spaces:

jiuface
/

wan-fast

Running on Zero

App Files Files Community

jiuface commited on Aug 31

Commit

5ecfc10

verified ·

1 Parent(s): 2e72c82

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -62

app.py CHANGED Viewed

@@ -1,5 +1,10 @@
 import torch
-from diffusers import AutoencoderKLWan, WanImageToVideoPipeline, UniPCMultistepScheduler
 from diffusers.utils import export_to_video
 from transformers import CLIPVisionModel
 import gradio as gr
@@ -19,24 +24,22 @@ import boto3
 from io import BytesIO
 from diffusers.utils import load_image
-MODEL_ID = "Wan-AI/Wan2.1-I2V-14B-480P-Diffusers"
-LORA_REPO_ID = "Kijai/WanVideo_comfy"
-LORA_FILENAME = "Wan21_CausVid_14B_T2V_lora_rank32.safetensors"
-image_encoder = CLIPVisionModel.from_pretrained(MODEL_ID, subfolder="image_encoder", torch_dtype=torch.float32)
-vae = AutoencoderKLWan.from_pretrained(MODEL_ID, subfolder="vae", torch_dtype=torch.float32)
-pipe = WanImageToVideoPipeline.from_pretrained(
-    MODEL_ID, vae=vae, image_encoder=image_encoder, torch_dtype=torch.bfloat16
-)
-pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config, flow_shift=8.0)
-pipe.to("cuda")
-causvid_path = hf_hub_download(repo_id=LORA_REPO_ID, filename=LORA_FILENAME)
-pipe.load_lora_weights(causvid_path, adapter_name="causvid_lora")
-pipe.set_adapters(["causvid_lora"], adapter_weights=[0.95])
-pipe.fuse_lora()
 MOD_VALUE = 32
 DEFAULT_H_SLIDER_VALUE = 512
@@ -117,46 +120,6 @@ def upload_video_to_r2(video_file, account_id, access_key, secret_key, bucket_na
     return video_remote_path
-def handle_image_upload_for_dims_wan(uploaded_pil_image, current_h_val, current_w_val):
-    if uploaded_pil_image is None:
-        return gr.update(value=DEFAULT_H_SLIDER_VALUE), gr.update(value=DEFAULT_W_SLIDER_VALUE)
-    try:
-        new_h, new_w = _calculate_new_dimensions_wan(
-            uploaded_pil_image, MOD_VALUE, NEW_FORMULA_MAX_AREA,
-            SLIDER_MIN_H, SLIDER_MAX_H, SLIDER_MIN_W, SLIDER_MAX_W,
-            DEFAULT_H_SLIDER_VALUE, DEFAULT_W_SLIDER_VALUE
-        )
-        return gr.update(value=new_h), gr.update(value=new_w)
-    except Exception as e:
-        gr.Warning("Error attempting to calculate new dimensions")
-        return gr.update(value=DEFAULT_H_SLIDER_VALUE), gr.update(value=DEFAULT_W_SLIDER_VALUE)
-def get_duration(
-    image_url,
-    prompt,
-    height,
-    width,
-    negative_prompt,
-    duration_seconds,
-    guidance_scale,
-    steps,
-    seed,
-    randomize_seed,
-    upload_to_r2,
-    account_id,
-    access_key,
-    secret_key,
-    bucket,
-    progress=gr.Progress(track_tqdm=True)
-):
-    # 保持逻辑不变
-    if steps > 4 and duration_seconds > 2:
-        return 90
-    elif steps > 4 or duration_seconds > 2:
-        return 75
-    else:
-        return 60
 @spaces.GPU(duration=120)
 def generate_video(image_url,
@@ -191,9 +154,14 @@ def generate_video(image_url,
     with torch.inference_mode():
         output_frames_list = pipe(
-            image=resized_image, prompt=prompt, negative_prompt=negative_prompt,
-            height=target_h, width=target_w, num_frames=num_frames,
-            guidance_scale=float(guidance_scale), num_inference_steps=int(steps),
             generator=torch.Generator(device="cuda").manual_seed(current_seed)
         ).frames[0]
@@ -209,8 +177,7 @@ def generate_video(image_url,
 with gr.Blocks() as demo:
-    gr.Markdown("# Fast 4 steps Wan 2.1 I2V (14B) with CausVid LoRA")
-    gr.Markdown("[CausVid](https://github.com/tianweiy/CausVid) is a distilled version of Wan 2.1 to run faster in just 4-8 steps, [extracted as LoRA by Kijai](https://huggingface.co/Kijai/WanVideo_comfy/blob/main/Wan21_CausVid_14B_T2V_lora_rank32.safetensors) and is compatible with 🧨 diffusers")
     with gr.Row():
         with gr.Column():
             image_url_input =  gr.Textbox(

+import os
 import torch
+from diffusers.pipelines.wan.pipeline_wan_i2v import WanImageToVideoPipeline
+from diffusers.models.transformers.transformer_wan import WanTransformer3DModel
+from diffusers.utils.export_utils import export_to_video
 from diffusers.utils import export_to_video
 from transformers import CLIPVisionModel
 import gradio as gr
 from io import BytesIO
 from diffusers.utils import load_image
+MODEL_ID = "Wan-AI/Wan2.2-I2V-A14B-Diffusers"
+pipe = WanImageToVideoPipeline.from_pretrained(MODEL_ID,
+    transformer=WanTransformer3DModel.from_pretrained('cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers',
+        subfolder='transformer',
+        torch_dtype=torch.bfloat16,
+        device_map='cuda',
+    ),
+    transformer_2=WanTransformer3DModel.from_pretrained('cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers',
+        subfolder='transformer_2',
+        torch_dtype=torch.bfloat16,
+        device_map='cuda',
+    ),
+    torch_dtype=torch.bfloat16,
+).to('cuda')
 MOD_VALUE = 32
 DEFAULT_H_SLIDER_VALUE = 512
     return video_remote_path
 @spaces.GPU(duration=120)
 def generate_video(image_url,
     with torch.inference_mode():
         output_frames_list = pipe(
+            image=resized_image,
+            prompt=prompt,
+            negative_prompt=negative_prompt,
+            height=target_h,
+            width=target_w,
+            num_frames=num_frames,
+            guidance_scale=float(guidance_scale),
+            num_inference_steps=int(steps),
             generator=torch.Generator(device="cuda").manual_seed(current_seed)
         ).frames[0]
 with gr.Blocks() as demo:
+    gr.Markdown("# Wan2.2-I2V-A14B-Diffusers")
     with gr.Row():
         with gr.Column():
             image_url_input =  gr.Textbox(