Skip to content

Commit 0667926

Browse files
committed
1、优化图片&视频检索函数. 2、增加视频剪辑功能
1 parent 5524b72 commit 0667926

37 files changed

+135922
-261
lines changed

.gitignore

+8-1
Original file line numberDiff line numberDiff line change
@@ -166,19 +166,26 @@ cython_debug/
166166
# VSCODE
167167
.vscode/
168168

169+
# 模型目录
170+
/bert
169171

170-
# 视频输出文件
172+
# 项目缓存目录&输出目录
171173
/tmp
174+
/cache
175+
/output
172176

173177
# vs数据库目录
174178
/data
175179

176180
# 日志文件
177181
*.log
182+
info.log.*
183+
error.log.*
178184

179185
*.ipynb
180186

181187
test.py
188+
voc_req.txt
182189

183190
# 配置文件
184191
conf/config.json

README.md

+34-5
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,48 @@
11
# TEV
2-
基于给定文本,搜索给定目录下的图片、视频文件,找到相近的图片或视频片段。
2+
基于给定文本,搜索给定目录下的图片、视频文件,找到相近的图片或视频片段。进而即将检索到的视频片段或图片,裁剪成视频。
3+
- 视频片段&图片检索: 当指定目录时,加载指定目录视频|图片, 并在此目录下进行素材检索;如果未指定视频|图片目录,则在向量库中执行全文检索。
4+
- 视频剪辑:
5+
- 文本(脚本)与视频相关度较低时(threshold<0.5),检索不到视频。可以尝试降低相关度阈值,或者提供更多相关性视频素材。
6+
- 文本(脚本)与图片,未作相关度阈值检测,简单返回top-N。
7+
- 当与文本匹配的视频片段或者图片不足时,系统使用空白(黑色)背景作为填充帧,补足时长。
38

4-
## install
5-
建议python> 3.8 版本。
6-
- 安装torch,详情参考[pytorch](https://pytorch.org/)官方
9+
## install(安装)
10+
### 安装miniconda 或者其他python管理工具
11+
12+
13+
14+
### 创建python(建议3.9版本)环境
15+
16+
```shell
17+
conda create -n tev python=3.9
718
```
19+
20+
### 安装torch
21+
22+
详情参考[pytorch](https://pytorch.org/)官方。
23+
24+
```shell
825
pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
926
```
1027

11-
- 安装依赖库
28+
### 安装依赖库
1229

1330
```shell
1431
pip install -r requirements.txt
1532
```
1633

34+
## configuration(配置)
35+
36+
### bert 模型
37+
38+
### cn_clip 模型
39+
40+
### voc 模型
41+
42+
### config.json
43+
44+
- openi: 配置token
45+
1746
## using
1847

1948
![主界面](./images/main.png)

common/conf.py

+17-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88

99
"clip_model_name": "ViT-H-14", # chinese clip model: 选择的模型
1010
"download_root": "./", # 模型默认下载目录,如果目录没有模型文件,则下载,有的话直接加载
11-
"device":"cuda", #
1211

1312
# 文本
1413
"sentence_size": 256, # 文本分段最大长度
@@ -30,6 +29,17 @@
3029
"image_vs":"./data/image_vs/", # 图片存储地址
3130
"video_vs":"./data/video_vs/", # 视频存储地址
3231

32+
# voc 模块配置文件
33+
"voc_conf":"./conf/voc.conf",
34+
35+
"cache_path":"./cache", # 中间视频、音频缓存目录,每次工作前应清空缓存
36+
"output_path": "./output",
37+
"bkg": "D:\\ai\\video\\tev\\src\\image\\bkg.jpg", # 黑色背景图片,用于生成空白视频
38+
39+
# 模型镜像源,默认huggingface,使用openi镜像源需指定openi_token
40+
"mirror": "openi",
41+
"openi_token": "e6ea9886c4b70aaf4b62d6efe444fa574744b5ac", # openi token
42+
3343
"tmp_dir":'./tmp/' #
3444
}
3545

@@ -84,6 +94,12 @@ def read_file(path):
8494
@functools.lru_cache
8595
def get_conf():
8696
conf = load_config()
97+
98+
if 'hps' not in conf:
99+
from voc.utils import get_hparams_from_file
100+
hps = get_hparams_from_file(conf.voc_conf)
101+
conf.hps = hps
102+
87103
return conf
88104

89105
if __name__ == '__main__':

conf/voc.json

+97
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
{
2+
"train": {
3+
"log_interval": 200,
4+
"eval_interval": 1000,
5+
"seed": 42,
6+
"epochs": 1000,
7+
"learning_rate": 0.0002,
8+
"betas": [
9+
0.8,
10+
0.99
11+
],
12+
"eps": 1e-09,
13+
"batch_size": 4,
14+
"fp16_run": false,
15+
"lr_decay": 0.99995,
16+
"segment_size": 16384,
17+
"init_lr_ratio": 1,
18+
"warmup_epochs": 0,
19+
"c_mel": 45,
20+
"c_kl": 1.0,
21+
"skip_optimizer": true
22+
},
23+
"data": {
24+
"training_files": "filelists/train.list",
25+
"validation_files": "filelists/val.list",
26+
"max_wav_value": 32768.0,
27+
"sampling_rate": 44100,
28+
"filter_length": 2048,
29+
"hop_length": 512,
30+
"win_length": 2048,
31+
"n_mel_channels": 128,
32+
"mel_fmin": 0.0,
33+
"mel_fmax": null,
34+
"add_blank": true,
35+
"n_speakers": 700,
36+
"cleaned_text": true,
37+
"spk2id": {
38+
"fangqi": 0,
39+
"zhongxiang": 1
40+
}
41+
},
42+
"model": {
43+
"use_spk_conditioned_encoder": true,
44+
"use_noise_scaled_mas": true,
45+
"use_mel_posterior_encoder": false,
46+
"use_duration_discriminator": true,
47+
"inter_channels": 192,
48+
"hidden_channels": 192,
49+
"filter_channels": 768,
50+
"n_heads": 2,
51+
"n_layers": 6,
52+
"kernel_size": 3,
53+
"p_dropout": 0.1,
54+
"resblock": "1",
55+
"resblock_kernel_sizes": [
56+
3,
57+
7,
58+
11
59+
],
60+
"resblock_dilation_sizes": [
61+
[
62+
1,
63+
3,
64+
5
65+
],
66+
[
67+
1,
68+
3,
69+
5
70+
],
71+
[
72+
1,
73+
3,
74+
5
75+
]
76+
],
77+
"upsample_rates": [
78+
8,
79+
8,
80+
2,
81+
2,
82+
2
83+
],
84+
"upsample_initial_channel": 512,
85+
"upsample_kernel_sizes": [
86+
16,
87+
16,
88+
8,
89+
2,
90+
2
91+
],
92+
"n_layers_q": 3,
93+
"use_spectral_norm": false,
94+
"gin_channels": 256
95+
},
96+
"version": "2.0"
97+
}

db/dao.py

+1-7
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,19 @@
11
import functools
2-
3-
from docarray.index import HnswDocumentIndex
42

5-
# 日志
63
from common.log import get_logger
74
logger = get_logger()
85

96
from common.conf import get_conf
107
conf = get_conf()
118

129
from document import ImageFeature
13-
14-
db_images = HnswDocumentIndex[ImageFeature](
15-
work_dir=conf.image_vs
16-
)
1710

1811
@functools.lru_cache
1912
def load_db():
2013
'''
2114
加载视频 & 图片数据库
2215
'''
16+
from docarray.index import HnswDocumentIndex
2317
db_images = HnswDocumentIndex[ImageFeature](
2418
work_dir=conf.image_vs
2519
)

document/__init__.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,6 @@
55

66
class ImageFeature(BaseDoc):
77
uid:int
8-
url:str
8+
url:str # 文件
9+
folder:str # 文件夹,因db不支持 $regex 正则操作,多存储目录,用于更为详细的过滤
910
embedding:NdArray[1024]

requirements.txt

+18-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,20 @@
1-
docarray>=0.39.1
21
gradio>=4.8.0
32
cn_clip
3+
opencv-python
4+
numpy
5+
ffmpeg-python==0.2.0
6+
docarray[hnswlib]>=0.39.1
7+
8+
huggingface_hub
9+
numba
10+
cn2an
11+
pypinyin
12+
jieba
13+
g2p_en
14+
num2words
15+
transforms
16+
openi
17+
transformers
18+
pyopenjtalk
19+
jaconv
20+
sentencepiece

text/__init__.py

+49
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
from text.symbols import *
2+
3+
_symbol_to_id = {s: i for i, s in enumerate(symbols)}
4+
5+
6+
def cleaned_text_to_sequence(cleaned_text, tones, language):
7+
"""Converts a string of text to a sequence of IDs corresponding to the symbols in the text.
8+
Args:
9+
text: string to convert to a sequence
10+
Returns:
11+
List of integers corresponding to the symbols in the text
12+
"""
13+
phones = [_symbol_to_id[symbol] for symbol in cleaned_text]
14+
tone_start = language_tone_start_map[language]
15+
tones = [i + tone_start for i in tones]
16+
lang_id = language_id_map[language]
17+
lang_ids = [lang_id for i in phones]
18+
return phones, tones, lang_ids
19+
20+
21+
def get_bert(norm_text, word2ph, language, device):
22+
from .chinese_bert import get_bert_feature as zh_bert
23+
from .english_bert_mock import get_bert_feature as en_bert
24+
from .japanese_bert import get_bert_feature as jp_bert
25+
26+
lang_bert_func_map = {"ZH": zh_bert, "EN": en_bert, "JP": jp_bert}
27+
bert = lang_bert_func_map[language](norm_text, word2ph, device)
28+
return bert
29+
30+
from common.conf import get_conf
31+
conf = get_conf()
32+
33+
def check_bert_models():
34+
import json
35+
from pathlib import Path
36+
37+
from .bert_utils import _check_bert
38+
39+
if conf.mirror.lower() == "openi":
40+
import openi
41+
42+
kwargs = {"token": conf.openi_token} if conf.openi_token else {}
43+
openi.login(**kwargs)
44+
45+
with open("./bert/bert_models.json", "r") as fp:
46+
models = json.load(fp)
47+
for k, v in models.items():
48+
local_path = Path("./bert").joinpath(k)
49+
_check_bert(v["repo_id"], v["files"], local_path)

text/bert_utils.py

+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
from pathlib import Path
2+
3+
from huggingface_hub import hf_hub_download
4+
5+
from common.conf import get_conf
6+
conf = get_conf()
7+
8+
9+
MIRROR: str = conf.mirror
10+
11+
12+
def _check_bert(repo_id, files, local_path):
13+
for file in files:
14+
if not Path(local_path).joinpath(file).exists():
15+
if MIRROR.lower() == "openi":
16+
import openi
17+
18+
openi.model.download_model(
19+
"Stardust_minus/Bert-VITS2", repo_id.split("/")[-1], "./bert"
20+
)
21+
else:
22+
hf_hub_download(
23+
repo_id, file, local_dir=local_path, local_dir_use_symlinks=False
24+
)

0 commit comments

Comments
 (0)