Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CodeCamp2023-554 #2402

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
_base_ = ['./super-resolution_dynamic.py', '../../_base_/backends/tensorrt.py']
codebase_config = dict(type='mmagic', task='VideoSuperResolution')
backend_config = dict(
common_config=dict(max_workspace_size=1 << 30),
model_inputs=[
dict(
input_shapes=dict(
input=dict(
min_shape=[1, 3, 32, 32],
opt_shape=[1, 3, 256, 256],
max_shape=[1, 3, 512, 512])))
])
3 changes: 1 addition & 2 deletions mmdeploy/backend/sdk/export_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ def get_preprocess(deploy_cfg: mmengine.Config, model_cfg: mmengine.Config,
transform['to_float'] = False
transform['mean'] = [0, 0, 0]
transform['std'] = [1, 1, 1]
if transforms[0]['type'] != 'Lift':
if transforms[0]['type'] not in ['Lift', 'GenerateSegmentIndices'] :
assert transforms[0]['type'] == 'LoadImageFromFile', \
'The first item type of pipeline should be LoadImageFromFile'
return dict(
Expand Down Expand Up @@ -352,7 +352,6 @@ def export2SDK(deploy_cfg: Union[str, mmengine.Config],
deploy_info = get_deploy(deploy_cfg, model_cfg, work_dir, device)
pipeline_info = get_pipeline(deploy_cfg, model_cfg, work_dir, device)
detail_info = get_detail(deploy_cfg, model_cfg, pth=pth)

mmengine.dump(
deploy_info,
'{}/deploy.json'.format(work_dir),
Expand Down
3 changes: 2 additions & 1 deletion mmdeploy/codebase/mmagic/deploy/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Copyright (c) OpenMMLab. All rights reserved.
from mmdeploy.codebase.mmagic.deploy.mmediting import MMEditing
from mmdeploy.codebase.mmagic.deploy.super_resolution import SuperResolution
from mmdeploy.codebase.mmagic.deploy.video_super_resolution import VideoSuperResolution

__all__ = ['MMEditing', 'SuperResolution']
__all__ = ['MMEditing', 'SuperResolution', 'VideoSuperResolution']
244 changes: 244 additions & 0 deletions mmdeploy/codebase/mmagic/deploy/video_super_resolution.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,244 @@
# Copyright (c) OpenMMLab. All rights reserved.
from copy import deepcopy
from typing import Any, Callable, Dict, Optional, Sequence, Tuple, Union

import mmengine
import numpy as np
import torch
from mmengine import Config
from mmengine.dataset import pseudo_collate
from mmengine.model import BaseDataPreprocessor
import os.path as osp
import mmcv
from typing import List
from mmengine.dataset import Compose
from mmengine.structures import BaseDataElement
from mmagic.structures import DataSample


from mmdeploy.codebase.base import BaseTask
from mmdeploy.codebase.mmagic.deploy.super_resolution import SuperResolution
from mmdeploy.codebase.mmagic.deploy.mmediting import MMAGIC_TASK
from mmdeploy.utils import Task, get_input_shape, get_root_logger
from mmagic.apis.inferencers.base_mmagic_inferencer import (
InputsType,
PredType,
ResType,
)
from mmagic.apis.inferencers.inference_functions import VIDEO_EXTENSIONS, pad_sequence
import glob
import os
from mmagic.utils import tensor2img
import cv2
from mmengine.utils import ProgressBar
from mmengine.logging import MMLogger
from .super_resolution import process_model_config


@MMAGIC_TASK.register_module(Task.VIDEO_SUPER_RESOLUTION.value)
class VideoSuperResolution(SuperResolution):
"""BaseTask class of video super resolution task.

Args:
model_cfg (mmengine.Config): Model config file.
deploy_cfg (mmengine.Config): Deployment config file.
device (str): A string specifying device type.
"""

extra_parameters = dict(
start_idx=0, filename_tmpl="{:08d}.png", window_size=0, max_seq_len=None
)

def __init__(
self, model_cfg: mmengine.Config, deploy_cfg: mmengine.Config, device: str
):
super(VideoSuperResolution, self).__init__(model_cfg, deploy_cfg, device)

def preprocess(self, video: InputsType) -> Dict:
"""Process the inputs into a model-feedable format.

Args:
video(InputsType): Video to be restored by models.

Returns:
results(InputsType): Results of preprocess.
"""
# build the data pipeline
if self.model_cfg.get("demo_pipeline", None):
test_pipeline = self.model_cfg.demo_pipeline
elif self.model_cfg.get("test_pipeline", None):
test_pipeline = self.model_cfg.test_pipeline
else:
test_pipeline = self.model_cfg.val_pipeline

# check if the input is a video
file_extension = osp.splitext(video)[1]
if file_extension in VIDEO_EXTENSIONS:
video_reader = mmcv.VideoReader(video)
# load the images
data = dict(img=[], img_path=None, key=video)
for frame in video_reader:
data["img"].append(np.flip(frame, axis=2))

# remove the data loading pipeline
tmp_pipeline = []
for pipeline in test_pipeline:
if pipeline["type"] not in [
"GenerateSegmentIndices",
"LoadImageFromFile",
]:
tmp_pipeline.append(pipeline)
test_pipeline = tmp_pipeline
else:
# the first element in the pipeline must be
# 'GenerateSegmentIndices'
if test_pipeline[0]["type"] != "GenerateSegmentIndices":
raise TypeError(
"The first element in the pipeline must be "
f'"GenerateSegmentIndices", but got '
f'"{test_pipeline[0]["type"]}".'
)

# specify start_idx and filename_tmpl
test_pipeline[0]["start_idx"] = self.extra_parameters["start_idx"]
test_pipeline[0]["filename_tmpl"] = self.extra_parameters["filename_tmpl"]

# prepare data
sequence_length = len(glob.glob(osp.join(video, "*")))
lq_folder = osp.dirname(video)
key = osp.basename(video)
data = dict(
img_path=lq_folder, gt_path="", key=key, sequence_length=sequence_length
)

# compose the pipeline
test_pipeline = Compose(test_pipeline)
data = test_pipeline(data)
results = data["inputs"].unsqueeze(0) / 255.0 # in cpu
data["inputs"] = results
return data

def create_input(
self,
video: InputsType,
input_shape: Sequence[int] = None,
data_preprocessor: Optional[BaseDataPreprocessor] = None,
) -> Tuple[Dict, torch.Tensor]:
"""Create input for editing processor.

Args:
imgs (str | np.ndarray): Input image(s).
input_shape (Sequence[int] | None): A list of two integer in
(width, height) format specifying input shape. Defaults to `None`.
data_preprocessor (BaseDataPreprocessor): The data preprocessor
of the model. Default to `None`.

Returns:
tuple: (data, img), meta information for the input image and input.
"""
data = self.preprocess(video)
return data, BaseTask.get_tensor_from_input(data)

def forward(
Copy link
Collaborator

@RunningLeon RunningLeon Sep 5, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this task class should not have forward function. You could add a video_super_resolution_model.py and put the end2endmodel there

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks. I will change based on your recommendatation

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have added the end2endmodel in video_super_resolution_model.py. and move the foward function there. Unfortunately, I have met same problem

self,
inputs: torch.Tensor,
data_samples: Optional[List[BaseDataElement]] = None,
mode: str = "predict",
*args,
**kwargs,
) -> list:
"""Run test inference for restorer.

We want forward() to output an image or a evaluation result.
When test_mode is set, the output is evaluation result. Otherwise
it is an image.

Args:
inputs (torch.Tensor): A list contains input image(s)
in [C x H x W] format.
data_samples (List[BaseDataElement], optional): The data samples.
Defaults to None.
mode (str, optional): forward mode, only support `predict`.
*args: Other arguments.
**kwargs: Other key-pair arguments.

Returns:
list | dict: High resolution image or a evaluation results.
"""
outputs = []

if self.extra_parameters["window_size"] > 0: # sliding window framework
data = pad_sequence(inputs, self.extra_parameters["window_size"])
# yapf: disable
for i in range(0, data.size(1) - 2 * (self.extra_parameters['window_size'] // 2)): # noqa
# yapf: enable
data_i = data[:, i:i +
self.extra_parameters['window_size']].to(
self.device)
outputs.append(
self.wrapper.invoke(
data_i.permute(1, 2, 0).contiguous().detach().cpu().numpy()))
else: # recurrent framework
if self.extra_parameters["max_seq_len"] is None:
outputs = self.model(inputs=inputs.to(self.device), mode="tensor").cpu()
else:
for i in range(0, inputs.size(1), self.extra_parameters["max_seq_len"]):
data_i = inputs[:, i : i + self.extra_parameters["max_seq_len"]].to(
self.device
)
outputs.append(
self.wrapper.invoke(
data_i.permute(1, 2, 0).contiguous().detach().cpu().numpy()
)
)

outputs = torch.stack(outputs, 0)
outputs = DataSample(pred_img=outputs.cpu()).split()

for data_sample, pred in zip(data_samples, outputs):
data_sample.output = pred
return data_samples

def visualize(self, preds: PredType, result_out_dir: str = "") -> List[np.ndarray]:
"""Visualize result of a model. mmagic does not have visualizer, so
write visualize function directly.

Args:
model (nn.Module): Input model.
image (str | np.ndarray): Input image to draw predictions on.
result (list | np.ndarray): A list of result.
output_file (str): Output file to save drawn image.
window_name (str): The name of visualization window. Defaults to
an empty string.
show_result (bool): Whether to show result in windows, defaults
to `False`.
"""

file_extension = os.path.splitext(result_out_dir)[1]
mmengine.utils.mkdir_or_exist(osp.dirname(result_out_dir))
prog_bar = ProgressBar(preds.size(1))
if file_extension in VIDEO_EXTENSIONS: # save as video
h, w = preds.shape[-2:]
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
video_writer = cv2.VideoWriter(result_out_dir, fourcc, 25, (w, h))
for i in range(0, preds.size(1)):
img = tensor2img(preds[:, i, :, :, :])
video_writer.write(img.astype(np.uint8))
prog_bar.update()
cv2.destroyAllWindows()
video_writer.release()
else:
for i in range(
self.extra_parameters["start_idx"],
self.extra_parameters["start_idx"] + preds.size(1),
):
output_i = preds[:, i - self.extra_parameters["start_idx"], :, :, :]
output_i = tensor2img(output_i)
filename_tmpl = self.extra_parameters["filename_tmpl"]
save_path_i = f"{result_out_dir}/{filename_tmpl.format(i)}"
mmcv.imwrite(output_i, save_path_i)
prog_bar.update()

logger: MMLogger = MMLogger.get_current_instance()
logger.info(f"Output video is save at {result_out_dir}.")
return []
5 changes: 4 additions & 1 deletion mmdeploy/utils/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ class Task(AdvancedEnum):
TEXT_RECOGNITION = 'TextRecognition'
SEGMENTATION = 'Segmentation'
SUPER_RESOLUTION = 'SuperResolution'
VIDEO_SUPER_RESOLUTION = 'VideoSuperResolution'
CLASSIFICATION = 'Classification'
OBJECT_DETECTION = 'ObjectDetection'
INSTANCE_SEGMENTATION = 'InstanceSegmentation'
Expand Down Expand Up @@ -83,7 +84,9 @@ class Backend(AdvancedEnum):
Task.SEGMENTATION:
dict(component='ResizeMask', cls_name='Segmentor'),
Task.SUPER_RESOLUTION:
dict(component='TensorToImg', cls_name='Restorer'),
dict(component='TensorToImg', cls_name='Restorer'),
Task.VIDEO_SUPER_RESOLUTION:
dict(component='TensorToImg', cls_name='Restorer'),
Task.TEXT_DETECTION:
dict(component='TextDetHead', cls_name='TextDetector'),
Task.TEXT_RECOGNITION:
Expand Down