-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
31 changed files
with
5,325 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
# ProPainter | ||
Improving Propagation and Transformer for Video Inpainting using Vapoursynth, based on [ProPainter](https://github.com/sczhou/ProPainter). | ||
|
||
The Vapoursynth filter version has the advantage of transforming the images directly in memory, without the need to use the filesystem to store the video frames. Using Vapoursynth the filter is faster and don't have any limitation on the number of frames that can be elaborated. | ||
|
||
## Dependencies | ||
- [PyTorch](https://pytorch.org/get-started) 2.4.0 or later | ||
- [VapourSynth](http://www.vapoursynth.com/) R68 or later | ||
|
||
|
||
## Installation | ||
``` | ||
pip install vspropainter-x.x.x-py3-none-any.whl | ||
``` | ||
## Models Download | ||
The models are not installed with the package, they must be downloaded from the ProPainter github site. | ||
|
||
The models to download are: | ||
|
||
- [ProPainter.pth](https://github.com/sczhou/ProPainter/releases/download/v0.1.0/ProPainter.pth) | ||
- [raft-things.pth](https://github.com/sczhou/ProPainter/releases/download/v0.1.0/raft-things.pth) | ||
- [recurrent_flow_completion.pth](https://github.com/sczhou/ProPainter/releases/download/v0.1.0/recurrent_flow_completion.pth) | ||
|
||
The _model files_ have to be copied in the **weights** directory usually located in: | ||
|
||
.\Lib\site-packages\vspropainter\weights | ||
|
||
## Usage | ||
```python | ||
# adjusting color space to RGB24 (full range) for vsProPainter | ||
clip = core.resize.Bicubic(clip=clip, format=vs.RGB24, matrix_in_s="709", range_s="full") | ||
from vspropainter import propainter | ||
|
||
# ProPainter using a mask image | ||
clip = propainter(clip, img_mask="sample.png") | ||
|
||
# ProPainter using a clip mask | ||
clipMask = core.lsmas.LWLibavSource(source="sample_mask.mp4", format="RGB24", cache=0) | ||
clip = propainter(clip, clip_mask=clipMask) | ||
``` | ||
See `__init__.py` for the description of the parameters. | ||
|
||
## Memory optimization and inference speed-up | ||
|
||
Video inpainting typically requires a significant amount of GPU memory. The filter offers various features that facilitate memory-efficient inference, effectively avoiding the Out-Of-Memory error. You can use the following options to reduce memory usage further: | ||
|
||
- Reduce the number of local neighbors through decreasing the parameter *neighbor_length* (default 10). | ||
- Reduce the number of global references by increasing the parameter *ref_stride* (default 10). | ||
- Set the parameter *enable_fp16* to **True** to use fp16 (half precision) during inference. | ||
- Reduce the sequence's length of frames that the model processes, decreasing the parameter *length* (default 100). | ||
- Set a smaller mask region via the parameter *mask_region*. The mask region can specified using a tuple with the following format: (width, height, left, top). The reduction of the mask region will allow to speed up significantly the inference, expecially on HD movies, but the region must be big enough to allow the inference. In the case of bad output it will be necessary to increase its size. | ||
|
||
With the only exception of parameter *length* the options to reduce the memory usage will allow also to speed up the inference's speed. | ||
|
||
In the case the mask will not be able to totally remove the masked object it is possible to increase the parameter *mask_dilation* to extend the mask's size. | ||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
[build-system] | ||
requires = ["hatchling"] | ||
build-backend = "hatchling.build" | ||
|
||
[tool.hatch.build] | ||
exclude = [ | ||
] | ||
|
||
[project] | ||
name = "vspropainter" | ||
version = "1.0.0" | ||
description = "ProPainter function for VapourSynth" | ||
readme = "README.md" | ||
requires-python = ">=3.10" | ||
license = {file = "LICENSE"} | ||
authors = [{name = "Dan64", email = "[email protected]"}] | ||
keywords = ["ProPainter", "VapourSynth"] | ||
classifiers = [ | ||
"License :: OSI Approved :: MIT License", | ||
"Operating System :: OS Independent", | ||
"Programming Language :: Python :: 3 :: Only", | ||
"Topic :: Multimedia :: Video" | ||
] | ||
dependencies = [ | ||
"numpy>=1.26.4", | ||
"nvidia-cuda-runtime-cu12>=12.5.39" | ||
"torchvision>=0.19.0", | ||
"torch>=2.4.0", | ||
"Pillow>=10.1.0", | ||
"VapourSynth>=68", | ||
] | ||
|
||
[project.urls] | ||
"Homepage" = "https://github.com/dan64/vs-propainter" | ||
"Bug Tracker" = "https://github.com/dan64/vs-propainter/issues" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
# from .demo import RAFT_infer | ||
from .raft import RAFT |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
import torch | ||
import torch.nn.functional as F | ||
from .utils.utils import bilinear_sampler, coords_grid | ||
|
||
try: | ||
import alt_cuda_corr | ||
except: | ||
# alt_cuda_corr is not compiled | ||
pass | ||
|
||
|
||
class CorrBlock: | ||
def __init__(self, fmap1, fmap2, num_levels=4, radius=4): | ||
self.num_levels = num_levels | ||
self.radius = radius | ||
self.corr_pyramid = [] | ||
|
||
# all pairs correlation | ||
corr = CorrBlock.corr(fmap1, fmap2) | ||
|
||
batch, h1, w1, dim, h2, w2 = corr.shape | ||
corr = corr.reshape(batch*h1*w1, dim, h2, w2) | ||
|
||
self.corr_pyramid.append(corr) | ||
for i in range(self.num_levels-1): | ||
corr = F.avg_pool2d(corr, 2, stride=2) | ||
self.corr_pyramid.append(corr) | ||
|
||
def __call__(self, coords): | ||
r = self.radius | ||
coords = coords.permute(0, 2, 3, 1) | ||
batch, h1, w1, _ = coords.shape | ||
|
||
out_pyramid = [] | ||
for i in range(self.num_levels): | ||
corr = self.corr_pyramid[i] | ||
dx = torch.linspace(-r, r, 2*r+1) | ||
dy = torch.linspace(-r, r, 2*r+1) | ||
delta = torch.stack(torch.meshgrid(dy, dx), axis=-1).to(coords.device) | ||
|
||
centroid_lvl = coords.reshape(batch*h1*w1, 1, 1, 2) / 2**i | ||
delta_lvl = delta.view(1, 2*r+1, 2*r+1, 2) | ||
coords_lvl = centroid_lvl + delta_lvl | ||
|
||
corr = bilinear_sampler(corr, coords_lvl) | ||
corr = corr.view(batch, h1, w1, -1) | ||
out_pyramid.append(corr) | ||
|
||
out = torch.cat(out_pyramid, dim=-1) | ||
return out.permute(0, 3, 1, 2).contiguous().float() | ||
|
||
@staticmethod | ||
def corr(fmap1, fmap2): | ||
batch, dim, ht, wd = fmap1.shape | ||
fmap1 = fmap1.view(batch, dim, ht*wd) | ||
fmap2 = fmap2.view(batch, dim, ht*wd) | ||
|
||
corr = torch.matmul(fmap1.transpose(1,2), fmap2) | ||
corr = corr.view(batch, ht, wd, 1, ht, wd) | ||
return corr / torch.sqrt(torch.tensor(dim).float()) | ||
|
||
|
||
class CorrLayer(torch.autograd.Function): | ||
@staticmethod | ||
def forward(ctx, fmap1, fmap2, coords, r): | ||
fmap1 = fmap1.contiguous() | ||
fmap2 = fmap2.contiguous() | ||
coords = coords.contiguous() | ||
ctx.save_for_backward(fmap1, fmap2, coords) | ||
ctx.r = r | ||
corr, = correlation_cudaz.forward(fmap1, fmap2, coords, ctx.r) | ||
return corr | ||
|
||
@staticmethod | ||
def backward(ctx, grad_corr): | ||
fmap1, fmap2, coords = ctx.saved_tensors | ||
grad_corr = grad_corr.contiguous() | ||
fmap1_grad, fmap2_grad, coords_grad = \ | ||
correlation_cudaz.backward(fmap1, fmap2, coords, grad_corr, ctx.r) | ||
return fmap1_grad, fmap2_grad, coords_grad, None | ||
|
||
|
||
class AlternateCorrBlock: | ||
def __init__(self, fmap1, fmap2, num_levels=4, radius=4): | ||
self.num_levels = num_levels | ||
self.radius = radius | ||
|
||
self.pyramid = [(fmap1, fmap2)] | ||
for i in range(self.num_levels): | ||
fmap1 = F.avg_pool2d(fmap1, 2, stride=2) | ||
fmap2 = F.avg_pool2d(fmap2, 2, stride=2) | ||
self.pyramid.append((fmap1, fmap2)) | ||
|
||
def __call__(self, coords): | ||
|
||
coords = coords.permute(0, 2, 3, 1) | ||
B, H, W, _ = coords.shape | ||
|
||
corr_list = [] | ||
for i in range(self.num_levels): | ||
r = self.radius | ||
fmap1_i = self.pyramid[0][0].permute(0, 2, 3, 1) | ||
fmap2_i = self.pyramid[i][1].permute(0, 2, 3, 1) | ||
|
||
coords_i = (coords / 2**i).reshape(B, 1, H, W, 2).contiguous() | ||
corr = alt_cuda_corr(fmap1_i, fmap2_i, coords_i, r) | ||
corr_list.append(corr.squeeze(1)) | ||
|
||
corr = torch.stack(corr_list, dim=1) | ||
corr = corr.reshape(B, -1, H, W) | ||
return corr / 16.0 |
Oops, something went wrong.