Spaces:
Runtime error
Runtime error
| import cv2 | |
| from insightface.app import FaceAnalysis | |
| from insightface.utils import face_align | |
| import torch | |
| import os | |
| from datetime import datetime | |
| import torch | |
| import gradio as gr | |
| from diffusers import ( | |
| StableDiffusionPipeline, | |
| DDIMScheduler, | |
| AutoencoderKL, | |
| StableDiffusionControlNetPipeline, | |
| ControlNetModel, | |
| ) | |
| from PIL import Image | |
| from ip_adapter.ip_adapter_faceid import IPAdapterFaceIDPlus | |
| from diffusers.utils import load_image | |
| import numpy as np | |
| # date_time = now.strftime("%Y-%m-%d_%H-%M-%S") | |
| def generate_image( | |
| prompt, | |
| negative_prompt, | |
| depth_map_dir, | |
| face_reference_image, | |
| s_scale, | |
| num_inference_steps, | |
| v2, | |
| ): | |
| # Get the current date and time | |
| now = datetime.now() | |
| date_time = now.strftime("%Y-%m-%d_%H-%M-%S") | |
| # Create the output directory if it doesn't exist | |
| output_dir = "/content/output" | |
| os.makedirs(output_dir, exist_ok=True) | |
| # depth_map_dir = "" # or whichever you have the depthmap images in | |
| app = FaceAnalysis( | |
| name="buffalo_l", providers=["CUDAExecutionProvider", "CPUExecutionProvider"] | |
| ) | |
| app.prepare(ctx_id=0, det_size=(640, 640)) | |
| face_reference_image = face_reference_image # the face reference image | |
| face_reference_image_np = np.array(face_reference_image) | |
| faces = app.get(face_reference_image_np) | |
| faceid_embeds = torch.from_numpy(faces[0].normed_embedding).unsqueeze(0) | |
| face_image = face_align.norm_crop( | |
| face_reference_image_np, landmark=faces[0].kps, image_size=224 | |
| ) # you can also segment the face | |
| base_model_path = "SG161222/Realistic_Vision_V4.0_noVAE" | |
| vae_model_path = "stabilityai/sd-vae-ft-mse" | |
| image_encoder_path = "laion/CLIP-ViT-H-14-laion2B-s32B-b79K" | |
| ip_ckpt = ( | |
| "/content/ip-adapter-faceid-plus_sd15.bin" | |
| if not v2 | |
| else "ip-adapter-faceid-plusv2_sd15.bin" | |
| ) | |
| device = "cuda" | |
| # Control net test | |
| controlnet_model_path = "lllyasviel/control_v11f1p_sd15_depth" | |
| controlnet = ControlNetModel.from_pretrained( | |
| controlnet_model_path, torch_dtype=torch.float16 | |
| ) | |
| noise_scheduler = DDIMScheduler( | |
| num_train_timesteps=1000, | |
| beta_start=0.00085, | |
| beta_end=0.012, | |
| beta_schedule="scaled_linear", | |
| clip_sample=False, | |
| set_alpha_to_one=False, | |
| steps_offset=1, | |
| ) | |
| vae = AutoencoderKL.from_pretrained(vae_model_path).to(dtype=torch.float16) | |
| pipe = StableDiffusionControlNetPipeline.from_pretrained( | |
| base_model_path, | |
| torch_dtype=torch.float16, | |
| controlnet=controlnet, | |
| scheduler=noise_scheduler, | |
| vae=vae, | |
| feature_extractor=None, | |
| safety_checker=None, | |
| ) | |
| # load ip-adapter | |
| ip_model = IPAdapterFaceIDPlus(pipe, image_encoder_path, ip_ckpt, device) | |
| depth_map_files = [ | |
| f for f in os.listdir(depth_map_dir) if f.endswith((".jpg", ".png")) | |
| ] | |
| images = [] | |
| for idx, filename in enumerate(depth_map_files): | |
| depth_map_path = os.path.join(depth_map_dir, filename) | |
| depth_map = load_image(depth_map_path) | |
| image = ip_model.generate( | |
| prompt=prompt, | |
| negative_prompt=negative_prompt, | |
| image=depth_map, | |
| face_image=face_image, | |
| faceid_embeds=faceid_embeds, | |
| shortcut=v2, | |
| s_scale=s_scale, | |
| num_samples=1, # Generate one image per depth map | |
| width=512, | |
| height=512, | |
| num_inference_steps=num_inference_steps, | |
| seed=2023, | |
| )[0] | |
| # Save the image with the prompt name, date/time, and depth map index | |
| image_name = f"{prompt.replace(' ', '_')}_{date_time}_{idx}_0.png" | |
| image_path = os.path.join(output_dir, image_name) | |
| image.save(image_path) | |
| images.append(image) | |
| torch.cuda.empty_cache() | |
| return images | |
| with gr.Blocks() as demo: | |
| with gr.Row(): | |
| with gr.Column(): | |
| prompt = gr.Textbox(label="Prompt") | |
| negative_prompt = gr.Textbox(label="Negative Prompt") | |
| depth_map_dir = gr.Textbox(label="Depth Map Directory") | |
| face_reference_image = gr.Image(label="Face Reference Image", type="pil") | |
| # s_scale = gr.Slider(label="Face Structure strength", value=0.6, step=0.1, minimum=0, maximum=3) | |
| # num_inference_steps = gr.Slider(label="steps", value=10, step=1, minimum=1, maximum=50) | |
| v2 = gr.Checkbox(label="Use v2 Adapter", value=False) | |
| with gr.Column(): | |
| s_scale = gr.Slider( | |
| label="Face Structure strength", | |
| value=0.6, | |
| step=0.1, | |
| minimum=0, | |
| maximum=3, | |
| ) | |
| num_inference_steps = gr.Slider( | |
| label="steps", value=10, step=1, minimum=1, maximum=50 | |
| ) | |
| gallery = gr.Gallery(label="Generated Images") | |
| generate_btn = gr.Button("Generate Images") | |
| generate_btn.click( | |
| fn=generate_image, | |
| inputs=[ | |
| prompt, | |
| negative_prompt, | |
| depth_map_dir, | |
| face_reference_image, | |
| s_scale, | |
| num_inference_steps, | |
| v2, | |
| ], | |
| outputs=gallery, | |
| ) | |
| demo.launch(share=True, debug=True) | |