Skip to content

Commit

Permalink
v0.7
Browse files Browse the repository at this point in the history
  • Loading branch information
volotat committed May 7, 2023
1 parent a553de3 commit 4adcaf0
Show file tree
Hide file tree
Showing 9 changed files with 1,001 additions and 566 deletions.
143 changes: 143 additions & 0 deletions FloweR/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
import torch
import torch.nn as nn
import torch.functional as F

# Define the model
class FloweR(nn.Module):
def __init__(self, input_size = (384, 384), window_size = 4):
super(FloweR, self).__init__()

self.input_size = input_size
self.window_size = window_size

#INPUT: 384 x 384 x 10 * 3

### DOWNSCALE ###
self.conv_block_1 = nn.Sequential(
nn.Conv2d(3 * self.window_size, 128, kernel_size=3, stride=1, padding='same'),
nn.ReLU(),
) # 384 x 384 x 128

self.conv_block_2 = nn.Sequential(
nn.AvgPool2d(2),
nn.Conv2d(128, 128, kernel_size=3, stride=1, padding='same'),
nn.ReLU(),
) # 192 x 192 x 128

self.conv_block_3 = nn.Sequential(
nn.AvgPool2d(2),
nn.Conv2d(128, 128, kernel_size=3, stride=1, padding='same'),
nn.ReLU(),
) # 96 x 96 x 128

self.conv_block_4 = nn.Sequential(
nn.AvgPool2d(2),
nn.Conv2d(128, 128, kernel_size=3, stride=1, padding='same'),
nn.ReLU(),
) # 48 x 48 x 128

self.conv_block_5 = nn.Sequential(
nn.AvgPool2d(2),
nn.Conv2d(128, 128, kernel_size=3, stride=1, padding='same'),
nn.ReLU(),
) # 24 x 24 x 128

self.conv_block_6 = nn.Sequential(
nn.AvgPool2d(2),
nn.Conv2d(128, 128, kernel_size=3, stride=1, padding='same'),
nn.ReLU(),
) # 12 x 12 x 128

self.conv_block_7 = nn.Sequential(
nn.AvgPool2d(2),
nn.Conv2d(128, 128, kernel_size=3, stride=1, padding='same'),
nn.ReLU(),
) # 6 x 6 x 128

self.conv_block_8 = nn.Sequential(
nn.AvgPool2d(2),
nn.Conv2d(128, 128, kernel_size=3, stride=1, padding='same'),
nn.ReLU(),
) # 3 x 3 x 128

### UPSCALE ###
self.conv_block_9 = nn.Sequential(
nn.Upsample(scale_factor=2),
nn.Conv2d(128, 128, kernel_size=3, stride=1, padding='same'),
nn.ReLU(),
) # 6 x 6 x 128

self.conv_block_10 = nn.Sequential(
nn.Upsample(scale_factor=2),
nn.Conv2d(128, 128, kernel_size=3, stride=1, padding='same'),
nn.ReLU(),
) # 12 x 12 x 128

self.conv_block_11 = nn.Sequential(
nn.Upsample(scale_factor=2),
nn.Conv2d(128, 128, kernel_size=3, stride=1, padding='same'),
nn.ReLU(),
) # 24 x 24 x 128

self.conv_block_12 = nn.Sequential(
nn.Upsample(scale_factor=2),
nn.Conv2d(128, 128, kernel_size=3, stride=1, padding='same'),
nn.ReLU(),
) # 48 x 48 x 128

self.conv_block_13 = nn.Sequential(
nn.Upsample(scale_factor=2),
nn.Conv2d(128, 128, kernel_size=3, stride=1, padding='same'),
nn.ReLU(),
) # 96 x 96 x 128

self.conv_block_14 = nn.Sequential(
nn.Upsample(scale_factor=2),
nn.Conv2d(128, 128, kernel_size=3, stride=1, padding='same'),
nn.ReLU(),
) # 192 x 192 x 128

self.conv_block_15 = nn.Sequential(
nn.Upsample(scale_factor=2),
nn.Conv2d(128, 128, kernel_size=3, stride=1, padding='same'),
nn.ReLU(),
) # 384 x 384 x 128

self.conv_block_16 = nn.Conv2d(128, 3, kernel_size=3, stride=1, padding='same')

def forward(self, x):
if x.size(1) != self.window_size:
raise Exception(f'Shape of the input is not compatable. There should be exactly {self.window_size} frames in an input video.')

# batch, frames, height, width, colors
in_x = x.permute((0, 1, 4, 2, 3))
# batch, frames, colors, height, width

in_x = in_x.reshape(-1, self.window_size * 3, self.input_size[0], self.input_size[1])

### DOWNSCALE ###
block_1_out = self.conv_block_1(in_x) # 384 x 384 x 128
block_2_out = self.conv_block_2(block_1_out) # 192 x 192 x 128
block_3_out = self.conv_block_3(block_2_out) # 96 x 96 x 128
block_4_out = self.conv_block_4(block_3_out) # 48 x 48 x 128
block_5_out = self.conv_block_5(block_4_out) # 24 x 24 x 128
block_6_out = self.conv_block_6(block_5_out) # 12 x 12 x 128
block_7_out = self.conv_block_7(block_6_out) # 6 x 6 x 128
block_8_out = self.conv_block_8(block_7_out) # 3 x 3 x 128

### UPSCALE ###
block_9_out = block_7_out + self.conv_block_9(block_8_out) # 6 x 6 x 128
block_10_out = block_6_out + self.conv_block_10(block_9_out) # 12 x 12 x 128
block_11_out = block_5_out + self.conv_block_11(block_10_out) # 24 x 24 x 128
block_12_out = block_4_out + self.conv_block_12(block_11_out) # 48 x 48 x 128
block_13_out = block_3_out + self.conv_block_13(block_12_out) # 96 x 96 x 128
block_14_out = block_2_out + self.conv_block_14(block_13_out) # 192 x 192 x 128
block_15_out = block_1_out + self.conv_block_15(block_14_out) # 384 x 384 x 128

block_16_out = self.conv_block_16(block_15_out) # 384 x 384 x (2 + 1)
out = block_16_out.reshape(-1, 3, self.input_size[0], self.input_size[1])

# batch, colors, height, width
out = out.permute((0, 2, 3, 1))
# batch, height, width, colors
return out
Binary file added examples/ui_preview.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
17 changes: 10 additions & 7 deletions readme.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
# SD-CN-Animation
This project allows you to automate video stylization task using StableDiffusion and ControlNet. It also allows you to generate completely new videos from text at any resolution and length in contrast to other current text2video methods using any Stable Diffusion model as a backbone, including custom ones. It uses '[RAFT](https://github.com/princeton-vl/RAFT)' optical flow estimation algorithm to keep the animation stable and create an occlusion mask that is used to generate the next frame. In text to video mode it relies on 'FloweR' method (work in progress) that predicts optical flow from the previous frames.


![sd-cn-animation ui preview](examples/ui_preview.png)
sd-cn-animation ui preview

### Video to Video Examples:
</table>
<table class="center">
Expand Down Expand Up @@ -46,11 +50,10 @@ Examples presented are generated at 1024x576 resolution using the 'realisticVisi
All examples you can see here are originally generated at 512x512 resolution using the 'sd-v1-5-inpainting' model as a base. They were downsized and compressed for better loading speed. You can see them in their original quality in the 'examples' folder. Actual prompts used were stated in the following format: "RAW photo, {subject}, 8k uhd, dslr, soft lighting, high quality, film grain, Fujifilm XT3", only the 'subject' part is described in the table above.

## Installing the extension
To install the extension go to 'Extensions' tab in [Automatic1111 web-ui](https://github.com/AUTOMATIC1111/stable-diffusion-webui), then go to 'Install from URL' tab. In 'URL for extension's git repository' field inter the path to this repository, i.e. 'https://github.com/volotat/SD-CN-Animation.git'. Leave 'Local directory name' field empty. Then just press 'Install' button. Download RAFT 'raft-things.pth' model from here: [Google Drive](https://drive.google.com/drive/folders/1sWDsfuZ3Up38EUQt7-JDTT1HcGHuJgvT) and place it into 'stable-diffusion-webui/models/RAFT/' folder. Restart web-ui, new 'SD-CN-Animation' tab should appear. All generated video will be saved into 'stable-diffusion-webui/outputs/sd-cn-animation' folder.

## Last version changes: v0.6
* Complete rewrite of the project to make it possible to install as an Automatic1111/Web-ui extension.
* Added flow normalization before resizing it, so the magnitude of the flow computed correctly at the different resolution.
* Less ghosting and color drift in vid2vid mode
* Added "warped styled frame fix" at vid2vid mode that removes duplicates from the parts of the image that cannot be relocated from the optical flow.
To install the extension go to 'Extensions' tab in [Automatic1111 web-ui](https://github.com/AUTOMATIC1111/stable-diffusion-webui), then go to 'Install from URL' tab. In 'URL for extension's git repository' field inter the path to this repository, i.e. 'https://github.com/volotat/SD-CN-Animation.git'. Leave 'Local directory name' field empty. Then just press 'Install' button. Restart web-ui, new 'SD-CN-Animation' tab should appear. All generated video will be saved into 'stable-diffusion-webui/outputs/sd-cn-animation' folder.

## Last version changes: v0.7
* Text to Video mode added to the extension
* 'Generate' button is now automatically disabled while the video is generated
* Added 'Interrupt' button that allows to stop video generation process
* Now all necessary models are automatically downloaded. No need for manual preparation.
150 changes: 67 additions & 83 deletions scripts/base_ui.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
sys.path.extend([scripts_path_fix])

import gradio as gr
import modules
from types import SimpleNamespace

from modules import script_callbacks, shared
Expand All @@ -26,8 +27,7 @@
from modules.sd_samplers import samplers_for_img2img
from modules.ui import setup_progressbar, create_sampler_and_steps_selection, ordered_ui_categories, create_output_panel


from vid2vid import *
from core import vid2vid, txt2vid, utils

def V2VArgs():
seed = -1
Expand Down Expand Up @@ -75,41 +75,59 @@ def setup_common_values(mode, d):
def inputs_ui():
v2v_args = SimpleNamespace(**V2VArgs())
t2v_args = SimpleNamespace(**T2VArgs())
with gr.Tab('vid2vid') as tab_vid2vid:
with gr.Row():
gr.HTML('Put your video here')
with gr.Row():
vid2vid_file = gr.File(label="Input video", interactive=True, file_count="single", file_types=["video"], elem_id="vid_to_vid_chosen_file")
#init_img = gr.Image(label="Image for img2img", elem_id="img2img_image", show_label=False, source="upload", interactive=True, type="pil", image_mode="RGBA")
#with gr.Row():
# gr.HTML('Alternative: enter the relative (to the webui) path to the file')
#with gr.Row():
# vid2vid_frames_path = gr.Textbox(label="Input video path", interactive=True, elem_id="vid_to_vid_chosen_path", placeholder='Enter your video path here, or upload in the box above ^')

width, height, prompt, n_prompt, cfg_scale, seed, processing_strength, fix_frame_strength = setup_common_values('vid2vid', v2v_args)

with FormRow(elem_id=f"sampler_selection_v2v"):
sampler_index = gr.Dropdown(label='Sampling method', elem_id=f"v2v_sampling", choices=[x.name for x in samplers_for_img2img], value=samplers_for_img2img[0].name, type="index")
steps = gr.Slider(minimum=1, maximum=150, step=1, elem_id=f"v2v_steps", label="Sampling steps", value=15)

with FormRow(elem_id="vid2vid_override_settings_row") as row:
override_settings = create_override_settings_dropdown("vid2vid", row)

with FormGroup(elem_id=f"script_container"):
custom_inputs = scripts.scripts_img2img.setup_ui()
#with gr.Row():
# strength = gr.Slider(label="denoising strength", value=d.strength, minimum=0, maximum=1, step=0.05, interactive=True)
# vid2vid_startFrame=gr.Number(label='vid2vid start frame',value=d.vid2vid_startFrame)
with gr.Tabs():
sdcn_process_mode = gr.State(value='vid2vid')

with gr.Tab('vid2vid') as tab_vid2vid:
with gr.Row():
gr.HTML('Put your video here')
with gr.Row():
v2v_file = gr.File(label="Input video", interactive=True, file_count="single", file_types=["video"], elem_id="vid_to_vid_chosen_file")
#init_img = gr.Image(label="Image for img2img", elem_id="img2img_image", show_label=False, source="upload", interactive=True, type="pil", image_mode="RGBA")
#with gr.Row():
# gr.HTML('Alternative: enter the relative (to the webui) path to the file')
#with gr.Row():
# vid2vid_frames_path = gr.Textbox(label="Input video path", interactive=True, elem_id="vid_to_vid_chosen_path", placeholder='Enter your video path here, or upload in the box above ^')

v2v_width, v2v_height, v2v_prompt, v2v_n_prompt, v2v_cfg_scale, v2v_seed, v2v_processing_strength, v2v_fix_frame_strength = setup_common_values('vid2vid', v2v_args)

with FormRow(elem_id=f"sampler_selection_v2v"):
v2v_sampler_index = gr.Dropdown(label='Sampling method', elem_id=f"v2v_sampling", choices=[x.name for x in samplers_for_img2img], value=samplers_for_img2img[0].name, type="index")
v2v_steps = gr.Slider(minimum=1, maximum=150, step=1, elem_id=f"v2v_steps", label="Sampling steps", value=15)

with FormRow(elem_id="vid2vid_override_settings_row") as row:
v2v_override_settings = create_override_settings_dropdown("vid2vid", row)

with FormGroup(elem_id=f"script_container"):
v2v_custom_inputs = scripts.scripts_img2img.setup_ui()
#with gr.Row():
# strength = gr.Slider(label="denoising strength", value=d.strength, minimum=0, maximum=1, step=0.05, interactive=True)
# vid2vid_startFrame=gr.Number(label='vid2vid start frame',value=d.vid2vid_startFrame)

with gr.Tab('txt2vid') as tab_txt2vid:
t2v_width, t2v_height, t2v_prompt, t2v_n_prompt, t2v_cfg_scale, t2v_seed, t2v_processing_strength, t2v_fix_frame_strength = setup_common_values('txt2vid', t2v_args)
with gr.Row():
t2v_length = gr.Slider(label='Length (in frames)', minimum=10, maximum=2048, step=10, value=40, interactive=True)
t2v_fps = gr.Slider(label='Video FPS', minimum=4, maximum=64, step=4, value=12, interactive=True)

with gr.Tab('txt2vid') as tab_txt2vid:
gr.Markdown('Work in progress...')
# width, height, prompt, n_prompt, steps, cfg_scale, seed, processing_strength, fix_frame_strength = setup_common_values('txt2vid', t2v_args)

#with gr.Tab('settings') as tab_setts:
# gr.Markdown('Work in progress...')

tab_vid2vid.select(fn=lambda: 'vid2vid', inputs=[], outputs=[sdcn_process_mode])
tab_txt2vid.select(fn=lambda: 'txt2vid', inputs=[], outputs=[sdcn_process_mode])

return locals()

def process(*args):
if args[0] == 'vid2vid':
yield from vid2vid.start_process(*args)
elif args[0] == 'txt2vid':
yield from txt2vid.start_process(*args)
else:
raise Exception(f"Unsupported processing mode: '{args[0]}'")

def stop_process(*args):
utils.shared.is_interrupted = True
return gr.Button.update(interactive=False)

def on_ui_tabs():
modules.scripts.scripts_current = modules.scripts.scripts_img2img
modules.scripts.scripts_img2img.initialize_scripts(is_img2img=True)
Expand All @@ -118,11 +136,8 @@ def on_ui_tabs():
components = {}

#dv = SimpleNamespace(**T2VOutputArgs())
with gr.Row(elem_id='v2v-core').style(equal_height=False, variant='compact'):
with gr.Row(elem_id='sdcn-core').style(equal_height=False, variant='compact'):
with gr.Column(scale=1, variant='panel'):
with gr.Row(variant='compact'):
run_button = gr.Button('Generate', elem_id=f"sdcn_anim_generate", variant='primary')

with gr.Tabs():
components = inputs_ui()

Expand All @@ -139,6 +154,10 @@ def on_ui_tabs():
# custom_inputs = scripts.scripts_img2img.setup_ui()

with gr.Column(scale=1, variant='compact'):
with gr.Row(variant='compact'):
run_button = gr.Button('Generate', elem_id=f"sdcn_anim_generate", variant='primary')
stop_button = gr.Button('Interrupt', elem_id=f"sdcn_anim_interrupt", variant='primary', interactive=False)

with gr.Column(variant="panel"):
sp_progress = gr.HTML(elem_id="sp_progress", value="")
sp_progress.update()
Expand All @@ -157,52 +176,9 @@ def on_ui_tabs():

with gr.Row(variant='compact'):
dummy_component = gr.Label(visible=False)


# Define parameters for the action methods. Not all of them are included yet
method_inputs = [
dummy_component, # send None for task_id
dummy_component, # mode
components['prompt'], # prompt
components['n_prompt'], # negative_prompt
dummy_component, # prompt_styles
components['vid2vid_file'], # input_video
dummy_component, # sketch
dummy_component, # init_img_with_mask
dummy_component, # inpaint_color_sketch
dummy_component, # inpaint_color_sketch_orig
dummy_component, # init_img_inpaint
dummy_component, # init_mask_inpaint
components['steps'], # steps
components['sampler_index'], # sampler_index
dummy_component, # mask_blur
dummy_component, # mask_alpha
dummy_component, # inpainting_fill
dummy_component, # restore_faces
dummy_component, # tiling
dummy_component, # n_iter
dummy_component, # batch_size
components['cfg_scale'], # cfg_scale
dummy_component, # image_cfg_scale
components['processing_strength'], # denoising_strength
components['fix_frame_strength'], # fix_frame_strength
components['seed'], # seed
dummy_component, # subseed
dummy_component, # subseed_strength
dummy_component, # seed_resize_from_h
dummy_component, # seed_resize_from_w
dummy_component, # seed_enable_extras
components['height'], # height
components['width'], # width
dummy_component, # resize_mode
dummy_component, # inpaint_full_res
dummy_component, # inpaint_full_res_padding
dummy_component, # inpainting_mask_invert
dummy_component, # img2img_batch_input_dir
dummy_component, # img2img_batch_output_dir
dummy_component, # img2img_batch_inpaint_mask_dir
components['override_settings'], # override_settings_texts
] + components['custom_inputs']
# Define parameters for the action methods.
method_inputs = [components[name] for name in utils.get_component_names()] + components['v2v_custom_inputs']

method_outputs = [
sp_progress,
Expand All @@ -211,15 +187,23 @@ def on_ui_tabs():
img_preview_prev_warp,
img_preview_processed,
html_log,
run_button,
stop_button
]

run_button.click(
fn=start_process, #wrap_gradio_gpu_call(start_process, extra_outputs=[None, '', '']),
fn=process, #wrap_gradio_gpu_call(start_process, extra_outputs=[None, '', '']),
inputs=method_inputs,
outputs=method_outputs,
show_progress=True,
)

stop_button.click(
fn=stop_process,
outputs=[stop_button],
show_progress=False
)

modules.scripts.scripts_current = None

# define queue - required for generators
Expand Down
Loading

0 comments on commit 4adcaf0

Please sign in to comment.