Skip to content

Commit

Permalink
Initial commit for version 1.0.0
Browse files Browse the repository at this point in the history
  • Loading branch information
dan64 committed Jun 1, 2024
1 parent 8db5ea2 commit 0324cb0
Show file tree
Hide file tree
Showing 31 changed files with 5,325 additions and 0 deletions.
59 changes: 59 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# ProPainter
Improving Propagation and Transformer for Video Inpainting using Vapoursynth, based on [ProPainter](https://github.com/sczhou/ProPainter).

The Vapoursynth filter version has the advantage of transforming the images directly in memory, without the need to use the filesystem to store the video frames. Using Vapoursynth the filter is faster and don't have any limitation on the number of frames that can be elaborated.

## Dependencies
- [PyTorch](https://pytorch.org/get-started) 2.4.0 or later
- [VapourSynth](http://www.vapoursynth.com/) R68 or later


## Installation
```
pip install vspropainter-x.x.x-py3-none-any.whl
```
## Models Download
The models are not installed with the package, they must be downloaded from the ProPainter github site.

The models to download are:

- [ProPainter.pth](https://github.com/sczhou/ProPainter/releases/download/v0.1.0/ProPainter.pth)
- [raft-things.pth](https://github.com/sczhou/ProPainter/releases/download/v0.1.0/raft-things.pth)
- [recurrent_flow_completion.pth](https://github.com/sczhou/ProPainter/releases/download/v0.1.0/recurrent_flow_completion.pth)

The _model files_ have to be copied in the **weights** directory usually located in:

.\Lib\site-packages\vspropainter\weights

## Usage
```python
# adjusting color space to RGB24 (full range) for vsProPainter
clip = core.resize.Bicubic(clip=clip, format=vs.RGB24, matrix_in_s="709", range_s="full")
from vspropainter import propainter

# ProPainter using a mask image
clip = propainter(clip, img_mask="sample.png")

# ProPainter using a clip mask
clipMask = core.lsmas.LWLibavSource(source="sample_mask.mp4", format="RGB24", cache=0)
clip = propainter(clip, clip_mask=clipMask)
```
See `__init__.py` for the description of the parameters.

## Memory optimization and inference speed-up

Video inpainting typically requires a significant amount of GPU memory. The filter offers various features that facilitate memory-efficient inference, effectively avoiding the Out-Of-Memory error. You can use the following options to reduce memory usage further:

- Reduce the number of local neighbors through decreasing the parameter *neighbor_length* (default 10).
- Reduce the number of global references by increasing the parameter *ref_stride* (default 10).
- Set the parameter *enable_fp16* to **True** to use fp16 (half precision) during inference.
- Reduce the sequence's length of frames that the model processes, decreasing the parameter *length* (default 100).
- Set a smaller mask region via the parameter *mask_region*. The mask region can specified using a tuple with the following format: (width, height, left, top). The reduction of the mask region will allow to speed up significantly the inference, expecially on HD movies, but the region must be big enough to allow the inference. In the case of bad output it will be necessary to increase its size.

With the only exception of parameter *length* the options to reduce the memory usage will allow also to speed up the inference's speed.

In the case the mask will not be able to totally remove the masked object it is possible to increase the parameter *mask_dilation* to extend the mask's size.




35 changes: 35 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"

[tool.hatch.build]
exclude = [
]

[project]
name = "vspropainter"
version = "1.0.0"
description = "ProPainter function for VapourSynth"
readme = "README.md"
requires-python = ">=3.10"
license = {file = "LICENSE"}
authors = [{name = "Dan64", email = "[email protected]"}]
keywords = ["ProPainter", "VapourSynth"]
classifiers = [
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
"Programming Language :: Python :: 3 :: Only",
"Topic :: Multimedia :: Video"
]
dependencies = [
"numpy>=1.26.4",
"nvidia-cuda-runtime-cu12>=12.5.39"
"torchvision>=0.19.0",
"torch>=2.4.0",
"Pillow>=10.1.0",
"VapourSynth>=68",
]

[project.urls]
"Homepage" = "https://github.com/dan64/vs-propainter"
"Bug Tracker" = "https://github.com/dan64/vs-propainter/issues"
2 changes: 2 additions & 0 deletions vspropainter/RAFT/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# from .demo import RAFT_infer
from .raft import RAFT
111 changes: 111 additions & 0 deletions vspropainter/RAFT/corr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
import torch
import torch.nn.functional as F
from .utils.utils import bilinear_sampler, coords_grid

try:
import alt_cuda_corr
except:
# alt_cuda_corr is not compiled
pass


class CorrBlock:
def __init__(self, fmap1, fmap2, num_levels=4, radius=4):
self.num_levels = num_levels
self.radius = radius
self.corr_pyramid = []

# all pairs correlation
corr = CorrBlock.corr(fmap1, fmap2)

batch, h1, w1, dim, h2, w2 = corr.shape
corr = corr.reshape(batch*h1*w1, dim, h2, w2)

self.corr_pyramid.append(corr)
for i in range(self.num_levels-1):
corr = F.avg_pool2d(corr, 2, stride=2)
self.corr_pyramid.append(corr)

def __call__(self, coords):
r = self.radius
coords = coords.permute(0, 2, 3, 1)
batch, h1, w1, _ = coords.shape

out_pyramid = []
for i in range(self.num_levels):
corr = self.corr_pyramid[i]
dx = torch.linspace(-r, r, 2*r+1)
dy = torch.linspace(-r, r, 2*r+1)
delta = torch.stack(torch.meshgrid(dy, dx), axis=-1).to(coords.device)

centroid_lvl = coords.reshape(batch*h1*w1, 1, 1, 2) / 2**i
delta_lvl = delta.view(1, 2*r+1, 2*r+1, 2)
coords_lvl = centroid_lvl + delta_lvl

corr = bilinear_sampler(corr, coords_lvl)
corr = corr.view(batch, h1, w1, -1)
out_pyramid.append(corr)

out = torch.cat(out_pyramid, dim=-1)
return out.permute(0, 3, 1, 2).contiguous().float()

@staticmethod
def corr(fmap1, fmap2):
batch, dim, ht, wd = fmap1.shape
fmap1 = fmap1.view(batch, dim, ht*wd)
fmap2 = fmap2.view(batch, dim, ht*wd)

corr = torch.matmul(fmap1.transpose(1,2), fmap2)
corr = corr.view(batch, ht, wd, 1, ht, wd)
return corr / torch.sqrt(torch.tensor(dim).float())


class CorrLayer(torch.autograd.Function):
@staticmethod
def forward(ctx, fmap1, fmap2, coords, r):
fmap1 = fmap1.contiguous()
fmap2 = fmap2.contiguous()
coords = coords.contiguous()
ctx.save_for_backward(fmap1, fmap2, coords)
ctx.r = r
corr, = correlation_cudaz.forward(fmap1, fmap2, coords, ctx.r)
return corr

@staticmethod
def backward(ctx, grad_corr):
fmap1, fmap2, coords = ctx.saved_tensors
grad_corr = grad_corr.contiguous()
fmap1_grad, fmap2_grad, coords_grad = \
correlation_cudaz.backward(fmap1, fmap2, coords, grad_corr, ctx.r)
return fmap1_grad, fmap2_grad, coords_grad, None


class AlternateCorrBlock:
def __init__(self, fmap1, fmap2, num_levels=4, radius=4):
self.num_levels = num_levels
self.radius = radius

self.pyramid = [(fmap1, fmap2)]
for i in range(self.num_levels):
fmap1 = F.avg_pool2d(fmap1, 2, stride=2)
fmap2 = F.avg_pool2d(fmap2, 2, stride=2)
self.pyramid.append((fmap1, fmap2))

def __call__(self, coords):

coords = coords.permute(0, 2, 3, 1)
B, H, W, _ = coords.shape

corr_list = []
for i in range(self.num_levels):
r = self.radius
fmap1_i = self.pyramid[0][0].permute(0, 2, 3, 1)
fmap2_i = self.pyramid[i][1].permute(0, 2, 3, 1)

coords_i = (coords / 2**i).reshape(B, 1, H, W, 2).contiguous()
corr = alt_cuda_corr(fmap1_i, fmap2_i, coords_i, r)
corr_list.append(corr.squeeze(1))

corr = torch.stack(corr_list, dim=1)
corr = corr.reshape(B, -1, H, W)
return corr / 16.0
Loading

0 comments on commit 0324cb0

Please sign in to comment.