mirror of
https://github.com/huggingface/diffusers.git
synced 2026-01-27 17:22:53 +03:00
[Community Pipeline] FRESCO: Spatial-Temporal Correspondence for Zero-Shot Video Translation (#8239)
* code and doc * update paper link * remove redundant codes * add example video --------- Co-authored-by: Sayak Paul <spsayakpaul@gmail.com>
This commit is contained in:
@@ -69,6 +69,7 @@ Please also check out our [Community Scripts](https://github.com/huggingface/dif
|
||||
| UFOGen Scheduler | Scheduler for UFOGen Model (compatible with Stable Diffusion pipelines) | [UFOGen Scheduler](#ufogen-scheduler) | - | [dg845](https://github.com/dg845) |
|
||||
| Stable Diffusion XL IPEX Pipeline | Accelerate Stable Diffusion XL inference pipeline with BF16/FP32 precision on Intel Xeon CPUs with [IPEX](https://github.com/intel/intel-extension-for-pytorch) | [Stable Diffusion XL on IPEX](#stable-diffusion-xl-on-ipex) | - | [Dan Li](https://github.com/ustcuna/) |
|
||||
| Stable Diffusion BoxDiff Pipeline | Training-free controlled generation with bounding boxes using [BoxDiff](https://github.com/showlab/BoxDiff) | [Stable Diffusion BoxDiff Pipeline](#stable-diffusion-boxdiff) | - | [Jingyang Zhang](https://github.com/zjysteven/) |
|
||||
| FRESCO V2V Pipeline | Implementation of [[CVPR 2024] FRESCO: Spatial-Temporal Correspondence for Zero-Shot Video Translation](https://arxiv.org/abs/2403.12962) | [FRESCO V2V Pipeline](#fresco) | - | [Yifan Zhou](https://github.com/SingleZombie) |
|
||||
|
||||
To load a custom pipeline you just need to pass the `custom_pipeline` argument to `DiffusionPipeline`, as one of the files in `diffusers/examples/community`. Feel free to send a PR with your own pipelines, we will merge them quickly.
|
||||
|
||||
@@ -4035,6 +4036,93 @@ onestep_image = pipe(prompt, num_inference_steps=1).images[0]
|
||||
multistep_image = pipe(prompt, num_inference_steps=4).images[0]
|
||||
```
|
||||
|
||||
### FRESCO
|
||||
|
||||
This is the Diffusers implementation of zero-shot video-to-video translation pipeline [FRESCO](https://github.com/williamyang1991/FRESCO) (without Ebsynth postprocessing and background smooth). To run the code, please install gmflow. Then modify the path in `gmflow_dir`. After that, you can run the pipeline with:
|
||||
|
||||
```py
|
||||
from PIL import Image
|
||||
import cv2
|
||||
import torch
|
||||
import numpy as np
|
||||
|
||||
from diffusers import ControlNetModel,DDIMScheduler, DiffusionPipeline
|
||||
import sys
|
||||
gmflow_dir = "/path/to/gmflow"
|
||||
sys.path.insert(0, gmflow_dir)
|
||||
|
||||
def video_to_frame(video_path: str, interval: int):
|
||||
vidcap = cv2.VideoCapture(video_path)
|
||||
success = True
|
||||
|
||||
count = 0
|
||||
res = []
|
||||
while success:
|
||||
count += 1
|
||||
success, image = vidcap.read()
|
||||
if count % interval != 1:
|
||||
continue
|
||||
if image is not None:
|
||||
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
||||
res.append(image)
|
||||
if len(res) >= 8:
|
||||
break
|
||||
|
||||
vidcap.release()
|
||||
return res
|
||||
|
||||
|
||||
input_video_path = 'https://github.com/williamyang1991/FRESCO/raw/main/data/car-turn.mp4'
|
||||
output_video_path = 'car.gif'
|
||||
|
||||
# You can use any fintuned SD here
|
||||
model_path = 'SG161222/Realistic_Vision_V2.0'
|
||||
|
||||
prompt = 'a red car turns in the winter'
|
||||
a_prompt = ', RAW photo, subject, (high detailed skin:1.2), 8k uhd, dslr, soft lighting, high quality, film grain, Fujifilm XT3, '
|
||||
n_prompt = '(deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime, mutated hands and fingers:1.4), (deformed, distorted, disfigured:1.3), poorly drawn, bad anatomy, wrong anatomy, extra limb, missing limb, floating limbs, disconnected limbs, mutation, mutated, ugly, disgusting, amputation'
|
||||
|
||||
input_interval = 5
|
||||
frames = video_to_frame(
|
||||
input_video_path, input_interval)
|
||||
|
||||
control_frames = []
|
||||
# get canny image
|
||||
for frame in frames:
|
||||
image = cv2.Canny(frame, 50, 100)
|
||||
np_image = np.array(image)
|
||||
np_image = np_image[:, :, None]
|
||||
np_image = np.concatenate([np_image, np_image, np_image], axis=2)
|
||||
canny_image = Image.fromarray(np_image)
|
||||
control_frames.append(canny_image)
|
||||
|
||||
# You can use any ControlNet here
|
||||
controlnet = ControlNetModel.from_pretrained(
|
||||
"lllyasviel/sd-controlnet-canny").to('cuda')
|
||||
|
||||
pipe = DiffusionPipeline.from_pretrained(
|
||||
model_path, controlnet=controlnet, custom_pipeline='fresco_v2v').to('cuda')
|
||||
pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
|
||||
|
||||
generator = torch.manual_seed(0)
|
||||
frames = [Image.fromarray(frame) for frame in frames]
|
||||
|
||||
output_frames = pipe(
|
||||
prompt + a_prompt,
|
||||
frames,
|
||||
control_frames,
|
||||
num_inference_steps=20,
|
||||
strength=0.75,
|
||||
controlnet_conditioning_scale=0.7,
|
||||
generator=generator,
|
||||
negative_prompt=n_prompt
|
||||
).images
|
||||
|
||||
output_frames[0].save(output_video_path, save_all=True,
|
||||
append_images=output_frames[1:], duration=100, loop=0)
|
||||
|
||||
```
|
||||
|
||||
# Perturbed-Attention Guidance
|
||||
|
||||
[Project](https://ku-cvlab.github.io/Perturbed-Attention-Guidance/) / [arXiv](https://arxiv.org/abs/2403.17377) / [GitHub](https://github.com/KU-CVLAB/Perturbed-Attention-Guidance)
|
||||
|
||||
2511
examples/community/fresco_v2v.py
Normal file
2511
examples/community/fresco_v2v.py
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user