Spaces:

GeradeHouse
/

Wan2.1-FLF2V

Paused

App Files Files Community

GeradeHouse commited on Apr 25

Commit

7725ce2

verified ·

1 Parent(s): 5db3e50

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -45

app.py CHANGED Viewed

@@ -1,10 +1,16 @@
 #!/usr/bin/env python
 """
 Gradio demo for Wan2.1 FLF2V – First & Last Frame → Video
-Loads once, uses balanced device placement, streams high-level progress,
-and auto-offers the .mp4 for download.
 """
 import os
 import numpy as np
 import torch
 import gradio as gr
@@ -22,29 +28,26 @@ DTYPE          = torch.float16
 MAX_AREA       = 1280 * 720
 DEFAULT_FRAMES = 81
-# keep Hugging Face cache on disk so we don't re-download
-os.environ["HF_HOME"] = "/mnt/data/huggingface"
 # -----------------------------------------------------------------------------
-# PIPELINE LOADED ONCE
 # -----------------------------------------------------------------------------
 def load_pipeline():
-    # 1) image encoder in full precision
     image_encoder = CLIPVisionModel.from_pretrained(
         MODEL_ID, subfolder="image_encoder", torch_dtype=torch.float32
     )
-    # 2) VAE in half precision (no slicing API here)
     vae = AutoencoderKLWan.from_pretrained(
         MODEL_ID, subfolder="vae", torch_dtype=DTYPE
     )
-    # 3) load full pipeline balanced across GPU/CPU, with the fast processor
     pipe = WanImageToVideoPipeline.from_pretrained(
         MODEL_ID,
         image_encoder=image_encoder,
         vae=vae,
         torch_dtype=DTYPE,
-        device_map="balanced",    # spreads the model to fit your 24 GB
-        use_fast=True,            # get the fast CLIPImageProcessor internally
     )
     return pipe
@@ -52,58 +55,58 @@ PIPE = load_pipeline()
 # -----------------------------------------------------------------------------
-# UTILS
 # -----------------------------------------------------------------------------
 def aspect_resize(img: Image.Image, max_area=MAX_AREA):
-    ar   = img.height / img.width
-    mod  = PIPE.vae_scale_factor_spatial * PIPE.transformer.config.patch_size[1]
-    h    = int(np.sqrt(max_area * ar)) // mod * mod
-    w    = int(np.sqrt(max_area / ar)) // mod * mod
     return img.resize((w, h), Image.LANCZOS), h, w
 def center_crop_resize(img: Image.Image, h, w):
     ratio = max(w / img.width, h / img.height)
-    img = img.resize(
         (round(img.width * ratio), round(img.height * ratio)),
         Image.LANCZOS
     )
-    return TF.center_crop(img, [h, w])
 # -----------------------------------------------------------------------------
-# GENERATE WITH STREAMING PROGRESS
 # -----------------------------------------------------------------------------
 def generate(
-    first_frame: Image.Image,
-    last_frame:  Image.Image,
-    prompt:      str,
-    negative:    str,
-    steps:       int,
-    guidance:    float,
-    num_frames:  int,
-    seed:        int,
-    fps:         int,
-    progress=    gr.Progress(),
 ):
-    # seed
     if seed == -1:
         seed = torch.seed()
     gen = torch.Generator(device=PIPE.device).manual_seed(seed)
     # 0–15%: resize
     progress(0.0, desc="Resizing first frame…")
-    first_resized, h, w = aspect_resize(first_frame)
-    if last_frame.size != first_resized.size:
         progress(0.15, desc="Resizing last frame…")
-        last_resized = center_crop_resize(last_frame, h, w)
     else:
-        last_resized = first_resized
-    # 15–25%: warm up
-    progress(0.25, desc="Initializing pipeline…")
     out = PIPE(
-        image=first_resized,
-        last_image=last_resized,
         prompt=prompt,
         negative_prompt=negative or None,
         height=h,
@@ -114,12 +117,11 @@ def generate(
         generator=gen,
     )
-    # 25–90%: inference happens inside the pipeline (console shows bars)
-    progress(0.90, desc="Exporting video…")
     video_path = export_to_video(out.frames[0], fps=fps)
-    # done
     progress(1.0, desc="Done!")
     return video_path, seed
@@ -134,14 +136,14 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         last_img  = gr.Image(label="Last frame",  type="pil")
     prompt   = gr.Textbox(label="Prompt", placeholder="A blue bird takes off…")
-    negative = gr.Textbox(label="Negative prompt (optional)", placeholder="blurry, low-res")
     with gr.Accordion("Advanced parameters", open=False):
         steps      = gr.Slider(10, 50, value=30, step=1, label="Steps")
         guidance   = gr.Slider(0.0, 10.0, value=5.5, step=0.1, label="Guidance")
         num_frames = gr.Slider(16, 129, value=DEFAULT_FRAMES, step=1, label="Frames")
         fps        = gr.Slider(4, 30, value=16, step=1, label="FPS")
-        seed_input = gr.Number(value=-1, precision=0, label="Seed (-1=random)")
     run_btn   = gr.Button("Generate")
     download  = gr.File(label="Download .mp4", interactive=False)
@@ -154,5 +156,4 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         outputs=[ download, seed_used ],
     )
-    # serialize tasks with a mini progress badge
     demo.queue().launch(server_name="0.0.0.0", server_port=7860)

 #!/usr/bin/env python
 """
 Gradio demo for Wan2.1 FLF2V – First & Last Frame → Video
+• Single global load (no repeated downloads)
+• Balanced device_map to avoid OOM on 24 GB A10
+• Fast CLIP processor via use_fast=True
+• High-level streaming progress
+• Auto-download via gr.File
 """
 import os
+# persist Hugging Face cache so safetensors only download once
+os.environ["HF_HOME"] = "/mnt/data/huggingface"
 import numpy as np
 import torch
 import gradio as gr
 MAX_AREA       = 1280 * 720
 DEFAULT_FRAMES = 81
 # -----------------------------------------------------------------------------
+# LOAD PIPELINE ONCE
 # -----------------------------------------------------------------------------
 def load_pipeline():
+    # 1) CLIP image encoder (fp32)
     image_encoder = CLIPVisionModel.from_pretrained(
         MODEL_ID, subfolder="image_encoder", torch_dtype=torch.float32
     )
+    # 2) VAE (fp16)
     vae = AutoencoderKLWan.from_pretrained(
         MODEL_ID, subfolder="vae", torch_dtype=DTYPE
     )
+    # 3) Balanced device placement + fast processor
     pipe = WanImageToVideoPipeline.from_pretrained(
         MODEL_ID,
         image_encoder=image_encoder,
         vae=vae,
         torch_dtype=DTYPE,
+        device_map="balanced",  # spread weights CPU↔GPU
+        use_fast=True,          # internal fast CLIPImageProcessor
     )
     return pipe
 # -----------------------------------------------------------------------------
+# HELPERS
 # -----------------------------------------------------------------------------
 def aspect_resize(img: Image.Image, max_area=MAX_AREA):
+    ar  = img.height / img.width
+    mod = PIPE.vae_scale_factor_spatial * PIPE.transformer.config.patch_size[1]
+    h   = int(np.sqrt(max_area * ar)) // mod * mod
+    w   = int(np.sqrt(max_area / ar)) // mod * mod
     return img.resize((w, h), Image.LANCZOS), h, w
 def center_crop_resize(img: Image.Image, h, w):
     ratio = max(w / img.width, h / img.height)
+    img2 = img.resize(
         (round(img.width * ratio), round(img.height * ratio)),
         Image.LANCZOS
     )
+    return TF.center_crop(img2, [h, w])
 # -----------------------------------------------------------------------------
+# GENERATION + STREAMING
 # -----------------------------------------------------------------------------
 def generate(
+    first_frame:   Image.Image,
+    last_frame:    Image.Image,
+    prompt:        str,
+    negative:      str,
+    steps:         int,
+    guidance:      float,
+    num_frames:    int,
+    seed:          int,
+    fps:           int,
+    progress=      gr.Progress(),
 ):
+    # choose seed
     if seed == -1:
         seed = torch.seed()
     gen = torch.Generator(device=PIPE.device).manual_seed(seed)
     # 0–15%: resize
     progress(0.0, desc="Resizing first frame…")
+    f_resized, h, w = aspect_resize(first_frame)
+    if last_frame.size != f_resized.size:
         progress(0.15, desc="Resizing last frame…")
+        l_resized = center_crop_resize(last_frame, h, w)
     else:
+        l_resized = f_resized
+    # 15–25%: spin up pipeline
+    progress(0.25, desc="Launching inference…")
     out = PIPE(
+        image=f_resized,
+        last_image=l_resized,
         prompt=prompt,
         negative_prompt=negative or None,
         height=h,
         generator=gen,
     )
+    # 90–100%: export
+    progress(0.90, desc="Building video file…")
     video_path = export_to_video(out.frames[0], fps=fps)
     progress(1.0, desc="Done!")
     return video_path, seed
         last_img  = gr.Image(label="Last frame",  type="pil")
     prompt   = gr.Textbox(label="Prompt", placeholder="A blue bird takes off…")
+    negative = gr.Textbox(label="Negative prompt (opt)", placeholder="blurry, lowres")
     with gr.Accordion("Advanced parameters", open=False):
         steps      = gr.Slider(10, 50, value=30, step=1, label="Steps")
         guidance   = gr.Slider(0.0, 10.0, value=5.5, step=0.1, label="Guidance")
         num_frames = gr.Slider(16, 129, value=DEFAULT_FRAMES, step=1, label="Frames")
         fps        = gr.Slider(4, 30, value=16, step=1, label="FPS")
+        seed_input = gr.Number(value=-1, precision=0, label="Seed (-1=rand)")
     run_btn   = gr.Button("Generate")
     download  = gr.File(label="Download .mp4", interactive=False)
         outputs=[ download, seed_used ],
     )
     demo.queue().launch(server_name="0.0.0.0", server_port=7860)