zhangjiewu commited on
Commit
0ea273a
·
1 Parent(s): 0fb42d9

offload rewriter

Browse files
Files changed (3) hide show
  1. app.py +13 -10
  2. examples/1.png +1 -1
  3. examples/2.png +3 -0
app.py CHANGED
@@ -67,16 +67,15 @@ pipe.scheduler = UniPCMultistepScheduler.from_config(
67
  print(f"✓ Configured scheduler (flow_shift=2.0)")
68
 
69
  # Move to device
70
- pipe.to("cuda")
71
  print(f"✓ Models loaded and moved to cuda")
72
 
73
  end = time.time()
74
  print(f"Model loaded in {end - start:.2f}s.")
75
 
76
  start = time.time()
77
- prompt_enhancer_model = "Qwen/Qwen2.5-VL-7B-Instruct"
78
  prompt_model, processor = load_model(prompt_enhancer_model)
79
- prompt_model.to("cuda")
80
  end = time.time()
81
  print(f"Prompt enhancer loaded in {end - start:.2f}s.")
82
 
@@ -114,6 +113,7 @@ def run_inference(
114
  num_temporal_reasoning_steps: int = 8,
115
  ):
116
  # Rewriter
 
117
  final_prompt = prompt
118
 
119
  with th.no_grad():
@@ -132,8 +132,10 @@ def run_inference(
132
  print(cot_prompt)
133
  print("=" * 80 + "\n")
134
  final_prompt = cot_prompt
 
135
 
136
  # Inference
 
137
  print(f"Loading input image: {image_path}")
138
  image = load_image(image_path)
139
  mod_value = pipe.vae_scale_factor_spatial * pipe.transformer.config.patch_size[1]
@@ -159,6 +161,7 @@ def run_inference(
159
  ).frames[0]
160
 
161
  end = time.time()
 
162
 
163
  video_tmp = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
164
  output_path_video = video_tmp.name
@@ -227,16 +230,16 @@ def build_ui() -> gr.Blocks:
227
  examples=[
228
  [
229
  "examples/1.png",
230
- "The user wants to change the scene so that the girl in the traditional-style painting, wearing her ornate floral robe and headdress, is now playing a guitar. ",
231
- False,
232
  ],
233
  [
234
- "examples/1.png",
235
- "The user wants to change the scene so that the girl in the traditional-style painting, wearing her ornate floral robe and headdress, is now playing a guitar.",
236
- True,
237
  ],
238
  ],
239
- inputs=[image, prompt, enable_temporal_reasoning], outputs=[output_image, output_video], fn=_on_run, cache_examples=True
240
  )
241
 
242
  return demo
@@ -245,4 +248,4 @@ def build_ui() -> gr.Blocks:
245
  if __name__ == "__main__":
246
  demo = build_ui()
247
  # demo.launch(server_name="0.0.0.0", server_port=7869)
248
- demo.queue().launch(share=True)
 
67
  print(f"✓ Configured scheduler (flow_shift=2.0)")
68
 
69
  # Move to device
70
+ # pipe.to("cuda")
71
  print(f"✓ Models loaded and moved to cuda")
72
 
73
  end = time.time()
74
  print(f"Model loaded in {end - start:.2f}s.")
75
 
76
  start = time.time()
77
+ prompt_enhancer_model = "Qwen/Qwen3-VL-30B-A3B-Instruct"
78
  prompt_model, processor = load_model(prompt_enhancer_model)
 
79
  end = time.time()
80
  print(f"Prompt enhancer loaded in {end - start:.2f}s.")
81
 
 
113
  num_temporal_reasoning_steps: int = 8,
114
  ):
115
  # Rewriter
116
+ prompt_model.to("cuda")
117
  final_prompt = prompt
118
 
119
  with th.no_grad():
 
132
  print(cot_prompt)
133
  print("=" * 80 + "\n")
134
  final_prompt = cot_prompt
135
+ prompt_model.to("cpu")
136
 
137
  # Inference
138
+ pipe.to("cuda")
139
  print(f"Loading input image: {image_path}")
140
  image = load_image(image_path)
141
  mod_value = pipe.vae_scale_factor_spatial * pipe.transformer.config.patch_size[1]
 
161
  ).frames[0]
162
 
163
  end = time.time()
164
+ pipe.to("cpu")
165
 
166
  video_tmp = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
167
  output_path_video = video_tmp.name
 
230
  examples=[
231
  [
232
  "examples/1.png",
233
+ "The user wants to change the provided illustration of an elegant woman in a flowing red kimono into a high-end Japanese anime PVC scale figure, rendered photorealistically as a pre-painted collectible. Preserve her long black hair styled with golden hair ornaments and delicate floral accessories, her slightly tilted head and confident gaze, and the detailed red kimono with golden and floral embroidery tied with a wide gold obi. Cherry blossom petals drift around. Maintain the pose and camera view point unchanged. The scene should look like a premium finished PVC figure on display, with realistic textures, fine paint detailing, and a polished collectible presentation. Place the figure on a simple round base on a computer desk, with blurred keyboard and monitor glow in the background. Emphasize a strong 3D sense of volume and depth, realistic shadows and lighting, and painted PVC figure textures. Professional studio photography style, shallow depth of field, focus on the figure as a physical collectible. The lighting on the figure is uniform and highlighted, emphasizing every sculpted detail and painted accent.",
234
+ True,
235
  ],
236
  [
237
+ "examples/2.png",
238
+ "The user wants to change the scene so that the girl in the traditional-style painting, wearing her ornate floral robe and headdress, is now playing a guitar. ",
239
+ False,
240
  ],
241
  ],
242
+ inputs=[image, prompt, enable_temporal_reasoning], outputs=[output_image, output_video], fn=_on_run, cache_examples="lazy"
243
  )
244
 
245
  return demo
 
248
  if __name__ == "__main__":
249
  demo = build_ui()
250
  # demo.launch(server_name="0.0.0.0", server_port=7869)
251
+ demo.queue().launch()
examples/1.png CHANGED

Git LFS Details

  • SHA256: 90a09c2a17989bf25a5c3aa3dcda5361bf76cc97e507f376d6a8d3c87be84c3a
  • Pointer size: 132 Bytes
  • Size of remote file: 2.77 MB

Git LFS Details

  • SHA256: c70c66385f2871474f3100084ff2ea4a319685c3fdfc7fff94452d078893e27e
  • Pointer size: 132 Bytes
  • Size of remote file: 2.77 MB
examples/2.png ADDED

Git LFS Details

  • SHA256: 90a09c2a17989bf25a5c3aa3dcda5361bf76cc97e507f376d6a8d3c87be84c3a
  • Pointer size: 132 Bytes
  • Size of remote file: 2.77 MB