import gradio as gr from transformers import Qwen2VLForConditionalGeneration, AutoProcessor from PIL import Image import torch # Carregar modelo Qwen Image Edit mais recente model_name = "Qwen/Qwen2-VL-2B-Instruct" model = Qwen2VLForConditionalGeneration.from_pretrained( model_name, trust_remote_code=True, device_map="auto", torch_dtype=torch.float16 ) processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True) def generate_images(img1, img2, img3, prompt="", n_outputs=3): images = [img for img in [img1, img2, img3] if img is not None] if not images: return ["min 1 img"] inputs = [{"type": "image", "image": img} for img in images] if prompt: inputs.append({"type": "text", "text": prompt}) text_input = processor.apply_chat_template([{"role": "user", "content": inputs}], tokenize=False) model_inputs = processor(text=text_input, images=images, padding=True, return_tensors="pt") outputs = [] for _ in range(min(n_outputs,5)): with torch.no_grad(): out = model.generate(**model_inputs, max_new_tokens=128) decoded = processor.decode(out[0], skip_special_tokens=True) # Caso o modelo retorne base64, decodifique para PIL.Image aqui outputs.append(decoded) return outputs # Interface Gradio with gr.Blocks(title="Qwen Image Multi-Edit By Chalenger") as demo: gr.Markdown("## Multi-Image Editor for Qwen Image Edit -- Envie até 3 imagens e descreva o que quer gerar." "Send at max 3 images and describe what you want to do") with gr.Row(): img1 = gr.Image(label="Image Upload 1", type="pil") img2 = gr.Image(label="Image Upload 2", type="pil") img3 = gr.Image(label="Image Upload 3", type="pil") prompt = gr.Textbox(label="Prompt (optional/opcional)", placeholder="Ex: garota da imagem 1 junto com o garoto da imagem 2 dentro do carro da imagem 3" "girl of image 1 at side of girl of image 2, inside the car of image 3", lines=2) nout = gr.Slider(label="Saida/Output", minimum=1, maximum=5, step=1, value=3) btn = gr.Button("Gerar/Generate") gallery = gr.Gallery(label="Saidas/Outputs", columns=2, height="500") btn.click(generate_images, inputs=[img1, img2, img3, prompt, nout], outputs=gallery) if __name__ == "__main__": demo.launch(server_name="0.0.0.0", server_port=7860)