当前位置: 首页 > news >正文

AIGC笔记--基于Stable Diffusion实现图片的inpainting

1--完整代码

SD_Inpainting

2--简单代码

import PIL
import torch
import numpy as np
from PIL import Image
from tqdm import tqdm
import torchvision
from diffusers import AutoencoderKL, UNet2DConditionModel, DDIMScheduler
from transformers import CLIPTextModel, CLIPTokenizer# 预处理mask
def preprocess_mask(mask):mask = mask.convert("L") # 转换为灰度图: L = R * 299/1000 + G * 587/1000+ B * 114/1000。w, h = mask.size # 512, 512w, h = map(lambda x: x - x % 32, (w, h))  # resize to integer multiple of 32mask = mask.resize((w // 8, h // 8), resample = PIL.Image.NEAREST) # 64, 64mask = np.array(mask).astype(np.float32) / 255.0 # 归一化 64, 64mask = np.tile(mask, (4, 1, 1)) # 4, 64, 64mask = mask[None].transpose(0, 1, 2, 3)mask = 1 - mask  # repaint white, keep black # mask图中,mask的部分变为0mask = torch.from_numpy(mask)return mask# 预处理image
def preprocess(image):w, h = image.sizew, h = map(lambda x: x - x % 32, (w, h))  # resize to integer multiple of 32image = image.resize((w, h), resample=PIL.Image.LANCZOS)image = np.array(image).astype(np.float32) / 255.0image = image[None].transpose(0, 3, 1, 2)image = torch.from_numpy(image)return 2.0 * image - 1.0if __name__ == "__main__":model_id = "runwayml/stable-diffusion-v1-5" # online download# model_id = "/mnt/dolphinfs/hdd_pool/docker/user/hadoop-waimai-aigc/liujinfu/All_test/test0714/huggingface.co/runwayml/stable-diffusion-v1-5" # local path# 读取输入图像和输入maskinput_image = Image.open("./images/overture-creations-5sI6fQgYIuo.png").resize((512, 512))input_mask = Image.open("./images/overture-creations-5sI6fQgYIuo_mask.png").resize((512, 512))# 1. 加载autoencodervae = AutoencoderKL.from_pretrained(model_id, subfolder = "vae")# 2. 加载tokenizer和text encoder tokenizer = CLIPTokenizer.from_pretrained(model_id, subfolder = "tokenizer")text_encoder = CLIPTextModel.from_pretrained(model_id, subfolder = "text_encoder")# 3. 加载扩散模型UNetunet = UNet2DConditionModel.from_pretrained(model_id, subfolder = "unet")# 4. 定义noise schedulernoise_scheduler = DDIMScheduler(num_train_timesteps = 1000,beta_start = 0.00085,beta_end = 0.012,beta_schedule = "scaled_linear",clip_sample = False, # don't clip sample, the x0 in stable diffusion not in range [-1, 1]set_alpha_to_one = False,)# 将模型复制到GPU上device = "cuda"vae.to(device, dtype = torch.float16)text_encoder.to(device, dtype = torch.float16)unet = unet.to(device, dtype = torch.float16)# 设置prompt和超参数prompt = "a mecha robot sitting on a bench"negative_prompt = ""strength = 0.75guidance_scale = 7.5batch_size = 1num_inference_steps = 50generator = torch.Generator(device).manual_seed(0)with torch.no_grad():# get prompt text_embeddingstext_input = tokenizer(prompt, padding = "max_length", max_length = tokenizer.model_max_length, truncation = True, return_tensors = "pt")text_embeddings = text_encoder(text_input.input_ids.to(device))[0]# get unconditional text embeddingsmax_length = text_input.input_ids.shape[-1]uncond_input = tokenizer([negative_prompt] * batch_size, padding = "max_length", max_length = max_length, return_tensors = "pt")uncond_embeddings = text_encoder(uncond_input.input_ids.to(device))[0]# concat batchtext_embeddings = torch.cat([uncond_embeddings, text_embeddings])# 设置采样步数noise_scheduler.set_timesteps(num_inference_steps, device = device)# 根据strength计算timestepsinit_timestep = min(int(num_inference_steps * strength), num_inference_steps)t_start = max(num_inference_steps - init_timestep, 0)timesteps = noise_scheduler.timesteps[t_start:]# 预处理init_imageinit_input = preprocess(input_image)init_latents = vae.encode(init_input.to(device, dtype=torch.float16)).latent_dist.sample(generator)init_latents = 0.18215 * init_latentsinit_latents = torch.cat([init_latents] * batch_size, dim=0)init_latents_orig = init_latents# 处理maskmask_image = preprocess_mask(input_mask)mask_image = mask_image.to(device=device, dtype=init_latents.dtype)mask = torch.cat([mask_image] * batch_size)# 给init_latents加噪音noise = torch.randn(init_latents.shape, generator = generator, device = device, dtype = init_latents.dtype)init_latents = noise_scheduler.add_noise(init_latents, noise, timesteps[:1])latents = init_latents # 作为初始latents# Do denoise stepsfor t in tqdm(timesteps):# 这里latens扩展2份,是为了同时计算unconditional predictionlatent_model_input = torch.cat([latents] * 2)latent_model_input = noise_scheduler.scale_model_input(latent_model_input, t) # for DDIM, do nothing# 预测噪音noise_pred = unet(latent_model_input, t, encoder_hidden_states=text_embeddings).sample# Classifier Free Guidancenoise_pred_uncond, noise_pred_text = noise_pred.chunk(2)noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)# x_t -> x_t-1latents = noise_scheduler.step(noise_pred, t, latents).prev_sample# 将unmask区域替换原始图像的nosiy latentsinit_latents_proper = noise_scheduler.add_noise(init_latents_orig, noise, torch.tensor([t]))# mask的部分数值为0# 因此init_latents_proper * mask为保留原始latents(不mask)# 而latents * (1 - mask)为用生成的latents替换mask的部分latents = (init_latents_proper * mask) + (latents * (1 - mask)) # 注意要对latents进行scalelatents = 1 / 0.18215 * latentsimage = vae.decode(latents).sample# 转成pillowimg = (image / 2 + 0.5).clamp(0, 1).detach().cpu()img = torchvision.transforms.ToPILImage()(img.squeeze())img.save("./outputs/output.png")print("All Done!")

运行结果:

3--基于Diffuser进行调用

import torch
import torchvision
from PIL import Image
from diffusers import StableDiffusionInpaintPipelineLegacyif __name__ == "__main__":# load inpainting pipelinemodel_id = "runwayml/stable-diffusion-v1-5"# model_id = "/mnt/dolphinfs/hdd_pool/docker/user/hadoop-waimai-aigc/liujinfu/All_test/test0714/huggingface.co/runwayml/stable-diffusion-v1-5" # local pathpipe = StableDiffusionInpaintPipelineLegacy.from_pretrained(model_id, torch_dtype = torch.float16).to("cuda")# load input image and input maskinput_image = Image.open("./images/overture-creations-5sI6fQgYIuo.png").resize((512, 512))input_mask = Image.open("./images/overture-creations-5sI6fQgYIuo_mask.png").resize((512, 512))# run inferenceprompt = ["a mecha robot sitting on a bench", "a cat sitting on a bench"]generator = torch.Generator("cuda").manual_seed(0)with torch.autocast("cuda"):images = pipe(prompt = prompt,image = input_image,mask_image = input_mask,num_inference_steps = 50,strength = 0.75,guidance_scale = 7.5,num_images_per_prompt = 1,generator = generator).images# 转成pillowfor idx, image in enumerate(images):image.save("./outputs/output_{:d}.png".format(idx))print("All Done!")

运行结果:

相关文章:

  • 北京网站建设多少钱?
  • 辽宁网页制作哪家好_网站建设
  • 高端品牌网站建设_汉中网站制作
  • 9.5 栅格图层符号化多波段彩色渲染
  • 网页数据抓取:融合BeautifulSoup和Scrapy的高级爬虫技术
  • Node学习-第六章-express中间件与RESful API接口规范(下)
  • live555 rtsp服务器实战之createNewStreamSource
  • 目标检测--X-anylabeling使用自己的模型自动标注
  • [C++]多态
  • C语言课程回顾:十、C语言之 指针
  • 推荐一款uniapp拖动验证码插件
  • 从LeetCode215看排序算法
  • Nginx集群部署指南:实现高性能和高可用性
  • qt 创建一个可以拖拽的矩形,简单实践
  • 网站架构核心要素
  • [C/C++入门][字符与ASCII码]6、用代码来转换字符与它的ASCII码
  • 【游戏客户端】大话slg玩法架构(三)建筑控件
  • 线性代数|机器学习-P23梯度下降
  • 【译】React性能工程(下) -- 深入研究React性能调试
  • ➹使用webpack配置多页面应用(MPA)
  • ES6--对象的扩展
  • git 常用命令
  • Go 语言编译器的 //go: 详解
  • JavaScript设计模式系列一:工厂模式
  • npx命令介绍
  • NSTimer学习笔记
  • QQ浏览器x5内核的兼容性问题
  • Sass 快速入门教程
  • ViewService——一种保证客户端与服务端同步的方法
  • 阿里云爬虫风险管理产品商业化,为云端流量保驾护航
  • 创建一种深思熟虑的文化
  • 从PHP迁移至Golang - 基础篇
  • 从setTimeout-setInterval看JS线程
  • 基于遗传算法的优化问题求解
  • 力扣(LeetCode)965
  • 每个JavaScript开发人员应阅读的书【1】 - JavaScript: The Good Parts
  • 如何合理的规划jvm性能调优
  • 使用 @font-face
  • 我的zsh配置, 2019最新方案
  • 一些基于React、Vue、Node.js、MongoDB技术栈的实践项目
  • 走向全栈之MongoDB的使用
  • Python 之网络式编程
  • Redis4.x新特性 -- 萌萌的MEMORY DOCTOR
  • 策略 : 一文教你成为人工智能(AI)领域专家
  • 如何正确理解,内页权重高于首页?
  • ​ArcGIS Pro 如何批量删除字段
  • ​Z时代时尚SUV新宠:起亚赛图斯值不值得年轻人买?
  • ​一些不规范的GTID使用场景
  • # 飞书APP集成平台-数字化落地
  • #include
  • #nginx配置案例
  • #NOIP 2014# day.2 T2 寻找道路
  • #VERDI# 关于如何查看FSM状态机的方法
  • (C语言)编写程序将一个4×4的数组进行顺时针旋转90度后输出。
  • (力扣记录)235. 二叉搜索树的最近公共祖先
  • (十七)Flink 容错机制
  • (一)C语言之入门:使用Visual Studio Community 2022运行hello world
  • (轉貼) 寄發紅帖基本原則(教育部禮儀司頒布) (雜項)