Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
0ea273a
1
Parent(s):
0fb42d9
offload rewriter
Browse files- app.py +13 -10
- examples/1.png +1 -1
- examples/2.png +3 -0
app.py
CHANGED
|
@@ -67,16 +67,15 @@ pipe.scheduler = UniPCMultistepScheduler.from_config(
|
|
| 67 |
print(f"✓ Configured scheduler (flow_shift=2.0)")
|
| 68 |
|
| 69 |
# Move to device
|
| 70 |
-
pipe.to("cuda")
|
| 71 |
print(f"✓ Models loaded and moved to cuda")
|
| 72 |
|
| 73 |
end = time.time()
|
| 74 |
print(f"Model loaded in {end - start:.2f}s.")
|
| 75 |
|
| 76 |
start = time.time()
|
| 77 |
-
prompt_enhancer_model = "Qwen/
|
| 78 |
prompt_model, processor = load_model(prompt_enhancer_model)
|
| 79 |
-
prompt_model.to("cuda")
|
| 80 |
end = time.time()
|
| 81 |
print(f"Prompt enhancer loaded in {end - start:.2f}s.")
|
| 82 |
|
|
@@ -114,6 +113,7 @@ def run_inference(
|
|
| 114 |
num_temporal_reasoning_steps: int = 8,
|
| 115 |
):
|
| 116 |
# Rewriter
|
|
|
|
| 117 |
final_prompt = prompt
|
| 118 |
|
| 119 |
with th.no_grad():
|
|
@@ -132,8 +132,10 @@ def run_inference(
|
|
| 132 |
print(cot_prompt)
|
| 133 |
print("=" * 80 + "\n")
|
| 134 |
final_prompt = cot_prompt
|
|
|
|
| 135 |
|
| 136 |
# Inference
|
|
|
|
| 137 |
print(f"Loading input image: {image_path}")
|
| 138 |
image = load_image(image_path)
|
| 139 |
mod_value = pipe.vae_scale_factor_spatial * pipe.transformer.config.patch_size[1]
|
|
@@ -159,6 +161,7 @@ def run_inference(
|
|
| 159 |
).frames[0]
|
| 160 |
|
| 161 |
end = time.time()
|
|
|
|
| 162 |
|
| 163 |
video_tmp = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
|
| 164 |
output_path_video = video_tmp.name
|
|
@@ -227,16 +230,16 @@ def build_ui() -> gr.Blocks:
|
|
| 227 |
examples=[
|
| 228 |
[
|
| 229 |
"examples/1.png",
|
| 230 |
-
"The user wants to change the
|
| 231 |
-
|
| 232 |
],
|
| 233 |
[
|
| 234 |
-
"examples/
|
| 235 |
-
"The user wants to change the scene so that the girl in the traditional-style painting, wearing her ornate floral robe and headdress, is now playing a guitar.",
|
| 236 |
-
|
| 237 |
],
|
| 238 |
],
|
| 239 |
-
inputs=[image, prompt, enable_temporal_reasoning], outputs=[output_image, output_video], fn=_on_run, cache_examples=
|
| 240 |
)
|
| 241 |
|
| 242 |
return demo
|
|
@@ -245,4 +248,4 @@ def build_ui() -> gr.Blocks:
|
|
| 245 |
if __name__ == "__main__":
|
| 246 |
demo = build_ui()
|
| 247 |
# demo.launch(server_name="0.0.0.0", server_port=7869)
|
| 248 |
-
demo.queue().launch(
|
|
|
|
| 67 |
print(f"✓ Configured scheduler (flow_shift=2.0)")
|
| 68 |
|
| 69 |
# Move to device
|
| 70 |
+
# pipe.to("cuda")
|
| 71 |
print(f"✓ Models loaded and moved to cuda")
|
| 72 |
|
| 73 |
end = time.time()
|
| 74 |
print(f"Model loaded in {end - start:.2f}s.")
|
| 75 |
|
| 76 |
start = time.time()
|
| 77 |
+
prompt_enhancer_model = "Qwen/Qwen3-VL-30B-A3B-Instruct"
|
| 78 |
prompt_model, processor = load_model(prompt_enhancer_model)
|
|
|
|
| 79 |
end = time.time()
|
| 80 |
print(f"Prompt enhancer loaded in {end - start:.2f}s.")
|
| 81 |
|
|
|
|
| 113 |
num_temporal_reasoning_steps: int = 8,
|
| 114 |
):
|
| 115 |
# Rewriter
|
| 116 |
+
prompt_model.to("cuda")
|
| 117 |
final_prompt = prompt
|
| 118 |
|
| 119 |
with th.no_grad():
|
|
|
|
| 132 |
print(cot_prompt)
|
| 133 |
print("=" * 80 + "\n")
|
| 134 |
final_prompt = cot_prompt
|
| 135 |
+
prompt_model.to("cpu")
|
| 136 |
|
| 137 |
# Inference
|
| 138 |
+
pipe.to("cuda")
|
| 139 |
print(f"Loading input image: {image_path}")
|
| 140 |
image = load_image(image_path)
|
| 141 |
mod_value = pipe.vae_scale_factor_spatial * pipe.transformer.config.patch_size[1]
|
|
|
|
| 161 |
).frames[0]
|
| 162 |
|
| 163 |
end = time.time()
|
| 164 |
+
pipe.to("cpu")
|
| 165 |
|
| 166 |
video_tmp = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
|
| 167 |
output_path_video = video_tmp.name
|
|
|
|
| 230 |
examples=[
|
| 231 |
[
|
| 232 |
"examples/1.png",
|
| 233 |
+
"The user wants to change the provided illustration of an elegant woman in a flowing red kimono into a high-end Japanese anime PVC scale figure, rendered photorealistically as a pre-painted collectible. Preserve her long black hair styled with golden hair ornaments and delicate floral accessories, her slightly tilted head and confident gaze, and the detailed red kimono with golden and floral embroidery tied with a wide gold obi. Cherry blossom petals drift around. Maintain the pose and camera view point unchanged. The scene should look like a premium finished PVC figure on display, with realistic textures, fine paint detailing, and a polished collectible presentation. Place the figure on a simple round base on a computer desk, with blurred keyboard and monitor glow in the background. Emphasize a strong 3D sense of volume and depth, realistic shadows and lighting, and painted PVC figure textures. Professional studio photography style, shallow depth of field, focus on the figure as a physical collectible. The lighting on the figure is uniform and highlighted, emphasizing every sculpted detail and painted accent.",
|
| 234 |
+
True,
|
| 235 |
],
|
| 236 |
[
|
| 237 |
+
"examples/2.png",
|
| 238 |
+
"The user wants to change the scene so that the girl in the traditional-style painting, wearing her ornate floral robe and headdress, is now playing a guitar. ",
|
| 239 |
+
False,
|
| 240 |
],
|
| 241 |
],
|
| 242 |
+
inputs=[image, prompt, enable_temporal_reasoning], outputs=[output_image, output_video], fn=_on_run, cache_examples="lazy"
|
| 243 |
)
|
| 244 |
|
| 245 |
return demo
|
|
|
|
| 248 |
if __name__ == "__main__":
|
| 249 |
demo = build_ui()
|
| 250 |
# demo.launch(server_name="0.0.0.0", server_port=7869)
|
| 251 |
+
demo.queue().launch()
|
examples/1.png
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|
examples/2.png
ADDED
|
Git LFS Details
|