pi-Qwen / app.py
lichorosario's picture
Update app.py
f4527b0 verified
import torch
# Enable faster matrix math on CUDA
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True
import os
import gradio as gr
import spaces
from diffusers import FlowMatchEulerDiscreteScheduler
from lakonlab.ui.gradio.create_text_to_img import create_interface_text_to_img
from lakonlab.pipelines.piqwen_pipeline import PiQwenImagePipeline
from huggingface_hub import login
#############3
import math
# Authenticate with Hugging Face. The token should be supplied via the
# `HF_TOKEN` environment variable to avoid hard‑coding credentials.
login(token=os.getenv('HF_TOKEN'))
# Default prompt used when the demo launches. The prompt mirrors the example
# provided in the original π‑Qwen demo.
DEFAULT_PROMPT = (
'Photo of a coffee shop entrance featuring a chalkboard sign reading "π‑Qwen Coffee $2 per cup," '
'with a neon light beside it displaying "π‑通义千问". Next to it hangs a poster showing a beautiful '
'Chinese woman, and beneath the poster is written "e≈2.71828-18284-59045-23536-02874-71352".'
)
# -----------------------------------------------------------------------------
# Model and adapter loading
#
# We first construct a PiQwenImagePipeline using the base Qwen/Qwen‑Image
# checkpoint. Then we load the π‑Flow adapter from Lakonik/pi‑Qwen‑Image to
# enable few‑step generation. Finally, we load an additional LoRA from the
# user's Hugging Face repository and activate both adapters simultaneously.
# -----------------------------------------------------------------------------
pipe = PiQwenImagePipeline.from_pretrained(
'Qwen/Qwen-Image',
torch_dtype=torch.bfloat16)
adapter_name = pipe.load_piflow_adapter(
'Lakonik/pi-Qwen-Image',
subfolder='gmqwen_k8_piid_4step',
target_module_name='transformer')
print("Adapter name:", adapter_name)
print("Tiene atributo transformer_piflow?", hasattr(pipe, "transformer_piflow"))
pipe.transformer.load_lora_adapter(
'lichorosario/dott-qwen-image-lora',
weight_name='dott-3000-qwen-image.safetensors',
adapter_name='dott',
prefix="diffusion_model"
)
pipe.set_adapters([adapter_name, 'dott'], adapter_weights=[1.0, 1.0])
pipe.scheduler = FlowMatchEulerDiscreteScheduler.from_config( # use fixed shift=3.2
pipe.scheduler.config,
#shift=3.2,
#shift_terminal=None,
# use_dynamic_shifting=True,
############################################
base_image_seq_len = 256,
base_shift = math.log(3),
invert_sigmas = False,
max_image_seq_len = 8192,
max_shift = math.log(3),
num_train_timesteps = 1000,
shift = 1.0,
shift_terminal = None,
stochastic_sampling = False,
time_shift_type = "exponential",
use_beta_sigmas = False,
use_dynamic_shifting = True,
use_exponential_sigmas = False,
use_karras_sigmas = False,
)
pipe = pipe.to('cuda')
# -----------------------------------------------------------------------------
# Gradio interface
#
# The `generate` function takes the usual generation parameters (seed, prompt,
# width, height and number of steps) and returns a single PIL image. The
# `@spaces.GPU` decorator indicates to the Spaces runtime that this function
# should execute on GPU. The progress bar from `gr.Progress` is optional but
# improves user experience during longer runs.
# -----------------------------------------------------------------------------
@spaces.GPU
def generate(seed, prompt, width, height, steps, progress=gr.Progress(track_tqdm=True)):
"""Generate an image using the π‑Qwen pipeline with both the π‑Flow adapter
and the user's custom LoRA active."""
return pipe(
prompt=prompt,
width=width,
height=height,
num_inference_steps=steps,
generator=torch.Generator().manual_seed(seed),
).images[0]
with gr.Blocks(
analytics_enabled=False,
title='pi‑Qwen Demo with Custom LoRA',
css='lakonlab/ui/gradio/style.css'
) as demo:
# Markdown description explaining the purpose of this demo. Note the added
# description of the custom LoRA. Feel free to edit this text to better
# describe your use case.
md_txt = (
'# pi‑Qwen Demo with Custom LoRA\n\n'
'This demo showcases the [pi‑Flow: Policy‑Based Few‑Step Generation via Imitation '
'Distillation](https://arxiv.org/abs/2510.14974) technique applied to '
'the Qwen‑Image base model. In addition to the fast π‑Flow policy, this '
'version also loads a user‑provided LoRA from '
'`lichorosario/dott‑3000‑qwen‑image‑lora` to stylize the outputs.\n\n'
'**Base model:** [Qwen‑Image](https://huggingface.co/Qwen/Qwen-Image). '
'**Fast policy:** GMFlow. **Code:** '
'[https://github.com/Lakonik/piFlow](https://github.com/Lakonik/piFlow).'
)
gr.Markdown(md_txt)
# Create the text‑to‑image interface using the helper from the LakonLab
# library. We expose the seed, prompt, width, height and steps arguments.
create_interface_text_to_img(
generate,
prompt=DEFAULT_PROMPT,
steps=4,
guidance_scale=None,
args=['last_seed', 'prompt', 'width', 'height', 'steps']
)
# Launch the app. In a Gradio Space this call is optional, but including
# it allows the script to be run locally as well.
demo.queue().launch()