Skip to content

Commit

Permalink
Merge pull request #702 from Ikaros-521/owner
Browse files Browse the repository at this point in the history
图像识别 新增循环定时截图触发功能,方便实现游戏解说、画面讲解等功能,自主触发,丰富直播内容
  • Loading branch information
Ikaros-521 committed Mar 17, 2024
2 parents bd2a1f1 + 9d91cdc commit 643748c
Show file tree
Hide file tree
Showing 11 changed files with 281 additions and 62 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
<a href="//github.com/Ikaros-521/AI-Vtuber/network"><img alt="GitHub forks" src="https://img.shields.io/github/forks/Ikaros-521/AI-Vtuber?color=%2300BFFF&style=flat-square"></a>
<a href="//www.python.org"><img src="https://img.shields.io/badge/python-3.10+-blue.svg" alt="python"></a>

`Luna AI` 是一款结合了最先进技术的虚拟AI主播。它的核心是一系列高效的人工智能模型,包括 `ChatterBot、GPT、Claude、langchain、chatglm、text-generation-webui、讯飞星火、智谱AI、谷歌Bard、文心一言、通义星尘、通义千问、千帆大模型、Gemini、Kimi Chat、QAnything、koboldcpp、FastGPT`。这些模型既可以在本地运行,也可以通过云端服务提供支持。
`Luna AI` 是一款结合了最先进技术的虚拟AI主播。它的核心是一系列高效的人工智能模型,包括 `ChatterBot、GPT、Claude、langchain、chatglm、text-generation-webui、讯飞星火、智谱AI、谷歌Bard、文心一言、通义星尘、通义千问、千帆大模型、Gemini、Kimi Chat、QAnything、koboldcpp、FastGPT`。这些模型既可以在本地运行,也可以通过云端服务提供支持。当然,为了让对话照进现实,还结合了多模态模型,包括 `Gemini` 的图像识别能力,获取电脑画面进行分析讲解。

`Luna AI` 的外观由 `Live2D、Vtube Studio、xuniren、UE5 结合 Audio2Face、EasyAIVtuber、数字人视频播放器(Easy-Wav2Lip)` 技术打造,为用户提供了一个生动、互动的虚拟形象。这使得 `Luna AI` 能够在各大直播平台,如 `Bilibili、抖音、快手、微信视频号、斗鱼、YouTube、Twitch 和 TikTok`,进行实时互动直播。当然,它也可以在本地环境中与您进行个性化对话。

Expand Down
4 changes: 3 additions & 1 deletion config.json
Original file line number Diff line number Diff line change
Expand Up @@ -718,12 +718,14 @@
},
"image_recognition": {
"enable": true,
"model": "gemini",
"screenshot_window_title": "任务管理器",
"img_save_path": "./out/图像识别",
"prompt": "请讲解一下图片里的内容",
"screenshot_delay": 3.0,
"loop_screenshot_enable": false,
"loop_screenshot_delay": 10,
"gemini": {
"enable": true,
"model": "gemini-pro-vision",
"api_key": "",
"http_proxy": "http://127.0.0.1:10809",
Expand Down
4 changes: 3 additions & 1 deletion config.json.bak
Original file line number Diff line number Diff line change
Expand Up @@ -718,12 +718,14 @@
},
"image_recognition": {
"enable": true,
"model": "gemini",
"screenshot_window_title": "任务管理器",
"img_save_path": "./out/图像识别",
"prompt": "请讲解一下图片里的内容",
"screenshot_delay": 3.0,
"loop_screenshot_enable": false,
"loop_screenshot_delay": 10,
"gemini": {
"enable": true,
"model": "gemini-pro-vision",
"api_key": "",
"http_proxy": "http://127.0.0.1:10809",
Expand Down
Binary file modified docs/AI Vtuber.xmind
Binary file not shown.
Binary file modified docs/xmind.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
38 changes: 38 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -779,6 +779,44 @@ def load_data_list(type):
# 创建闲时任务子线程并启动
threading.Thread(target=lambda: asyncio.run(idle_time_task())).start()


# 图像识别 定时任务
def image_recognition_schedule_task():
global config, common, my_handle

logging.debug("图像识别 定时任务执行中...")

data = {
"platform": platform,
"username": None,
"content": ""
}

logging.info(f"图像识别定时任务触发")

my_handle.process_data(data, "image_recognition_schedule")


# 启动图像识别 定时任务
def run_image_recognition_schedule():
global config

try:
schedule.every(config.get("image_recognition", "loop_screenshot_delay")).seconds.do(partial(image_recognition_schedule_task))
except Exception as e:
logging.error(traceback.format_exc())

while True:
schedule.run_pending()
# time.sleep(1) # 控制每次循环的间隔时间,避免过多占用 CPU 资源


if config.get("image_recognition", "loop_screenshot_enable"):
# 创建定时任务子线程并启动
image_recognition_schedule_thread = threading.Thread(target=run_image_recognition_schedule)
image_recognition_schedule_thread.start()


logging.info(f"当前平台:{platform}")

if platform == "bilibili":
Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -71,4 +71,5 @@ gradio==4.16.0
TikTokLive
azure-cognitiveservices-speech
pyjwt
dashscope
dashscope
pygetwindow
3 changes: 2 additions & 1 deletion requirements_common.txt
Original file line number Diff line number Diff line change
Expand Up @@ -280,4 +280,5 @@ zhipuai==1.0.7
zstandard==0.19.0
git+https://gitee.com/ikaros-521/WenxinWorkshop-Python-SDK
git+https://gitee.com/ikaros-521/blivedm
dashscope==1.14.1
dashscope==1.14.1
pygetwindow
6 changes: 6 additions & 0 deletions utils/gpt_model/gpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,12 @@ def set_model_config(self, model_name, config):
elif model_name in model_classes:
setattr(self, model_name, model_classes[model_name](config))

def set_vision_model_config(self, model_name, config):
model_classes = {
"gemini": Gemini,
}

setattr(self, model_name, model_classes[model_name](config))

def get(self, name):
logging.info("GPT_MODEL: 进入get方法")
Expand Down
163 changes: 132 additions & 31 deletions utils/my_handle.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,8 @@ def __init__(self, config_path):
self.qanything = None
self.koboldcpp = None

self.image_recognition_model = None

self.chat_type_list = ["chatgpt", "claude", "claude2", "chatglm", "qwen", "chat_with_file", "text_generation_webui", \
"sparkdesk", "langchain_chatglm", "langchain_chatchat", "zhipu", "bard", "yiyan", "tongyi", \
"tongyixingchen", "my_qianfan", "my_wenxinworkshop", "gemini", "qanything", "koboldcpp"]
Expand Down Expand Up @@ -173,6 +175,9 @@ def get_chat_model(self, chat_type, config):
GPT_MODEL.set_model_config(chat_type, config.get(chat_type))
self.__dict__[chat_type] = GPT_MODEL.get(chat_type)

def get_vision_model(self, chat_type, config):
GPT_MODEL.set_vision_model_config(chat_type, config)
self.image_recognition_model = GPT_MODEL.get(chat_type)

def handle_chat_type(self):
chat_type = My_handle.config.get("chat_type")
Expand Down Expand Up @@ -348,6 +353,7 @@ def audio_synthesis_handle(self, data_json):
follow 用户关注
idle_time_task 闲时任务
abnormal_alarm 异常报警
image_recognition_schedule 图像识别定时任务
"""
# 如果虚拟身体-Unity,则发送数据到中转站
Expand Down Expand Up @@ -1105,49 +1111,60 @@ def tuning_handle(self, data_json):


# LLM处理
def llm_handle(self, chat_type, data):
def llm_handle(self, chat_type, data, type="chat"):
"""LLM统一处理
Args:
chat_type (str): 聊天类型
data (str): dict,含用户名和内容
type (str): 调用的类型(chat / vision)
Returns:
str: LLM返回的结果
"""
# 使用 getattr 来动态获取属性
if getattr(self, chat_type, None) is None:
self.get_chat_model(chat_type, My_handle.config)
# setattr(self, chat_type, GPT_MODEL.get(chat_type))


resp_content = None
# print(f'''data: {data}''')

# 新增LLM需要在这里追加
chat_model_methods = {
"chatgpt": lambda: self.chatgpt.get_gpt_resp(data["username"], data["content"]),
"claude": lambda: self.claude.get_resp(data["content"]),
"claude2": lambda: self.claude2.get_resp(data["content"]),
"chatterbot": lambda: self.bot.get_response(data["content"]).text,
"chatglm": lambda: self.chatglm.get_resp(data["content"]),
"qwen": lambda: self.qwen.get_resp(data["username"], data["content"]),
"chat_with_file": lambda: self.chat_with_file.get_model_resp(data["content"]),
"text_generation_webui": lambda: self.text_generation_webui.get_resp(data["content"]),
"sparkdesk": lambda: self.sparkdesk.get_resp(data["content"]),
"langchain_chatglm": lambda: self.langchain_chatglm.get_resp(data["content"]),
"langchain_chatchat": lambda: self.langchain_chatchat.get_resp(data["content"]),
"zhipu": lambda: self.zhipu.get_resp(data["content"]),
"bard": lambda: self.bard_api.get_resp(data["content"]),
"yiyan": lambda: self.yiyan.get_resp(data["content"]),
"tongyi": lambda: self.tongyi.get_resp(data["content"]),
"tongyixingchen": lambda: self.tongyixingchen.get_resp(data["content"]),
"my_qianfan": lambda: self.my_qianfan.get_resp(data["content"]),
"my_wenxinworkshop": lambda: self.my_wenxinworkshop.get_resp(data["content"]),
"gemini": lambda: self.gemini.get_resp(data["content"]),
"qanything": lambda: self.qanything.get_resp({"prompt": data["content"]}),
"koboldcpp": lambda: self.koboldcpp.get_resp({"prompt": data["content"]}),
"reread": lambda: data["content"]
}
if type == "chat":
# 使用 getattr 来动态获取属性
if getattr(self, chat_type, None) is None:
self.get_chat_model(chat_type, My_handle.config)
# setattr(self, chat_type, GPT_MODEL.get(chat_type))

# 新增LLM需要在这里追加
chat_model_methods = {
"chatgpt": lambda: self.chatgpt.get_gpt_resp(data["username"], data["content"]),
"claude": lambda: self.claude.get_resp(data["content"]),
"claude2": lambda: self.claude2.get_resp(data["content"]),
"chatterbot": lambda: self.bot.get_response(data["content"]).text,
"chatglm": lambda: self.chatglm.get_resp(data["content"]),
"qwen": lambda: self.qwen.get_resp(data["username"], data["content"]),
"chat_with_file": lambda: self.chat_with_file.get_model_resp(data["content"]),
"text_generation_webui": lambda: self.text_generation_webui.get_resp(data["content"]),
"sparkdesk": lambda: self.sparkdesk.get_resp(data["content"]),
"langchain_chatglm": lambda: self.langchain_chatglm.get_resp(data["content"]),
"langchain_chatchat": lambda: self.langchain_chatchat.get_resp(data["content"]),
"zhipu": lambda: self.zhipu.get_resp(data["content"]),
"bard": lambda: self.bard_api.get_resp(data["content"]),
"yiyan": lambda: self.yiyan.get_resp(data["content"]),
"tongyi": lambda: self.tongyi.get_resp(data["content"]),
"tongyixingchen": lambda: self.tongyixingchen.get_resp(data["content"]),
"my_qianfan": lambda: self.my_qianfan.get_resp(data["content"]),
"my_wenxinworkshop": lambda: self.my_wenxinworkshop.get_resp(data["content"]),
"gemini": lambda: self.gemini.get_resp(data["content"]),
"qanything": lambda: self.qanything.get_resp({"prompt": data["content"]}),
"koboldcpp": lambda: self.koboldcpp.get_resp({"prompt": data["content"]}),
"reread": lambda: data["content"]
}
elif type == "vision":
# 使用 getattr 来动态获取属性
if getattr(self, chat_type, None) is None:
self.get_vision_model(chat_type, My_handle.config.get("image_recognition", chat_type))
# 新增LLM需要在这里追加
chat_model_methods = {
"gemini": lambda: self.image_recognition_model.get_resp_with_img(data["content"], data["img_data"]),
}

# 使用字典映射的方式来获取响应内容
resp_content = chat_model_methods.get(chat_type, lambda: data["content"])()
Expand Down Expand Up @@ -2263,6 +2280,86 @@ def idle_time_task_handle(self, data):
logging.error(traceback.format_exc())


# 图像识别 定时任务
def image_recognition_schedule_handle(self, data):
try:
username = data["username"]
content = My_handle.config.get("image_recognition", "prompt")

# 根据窗口名截图
screenshot_path = My_handle.common.capture_window_by_title(My_handle.config.get("image_recognition", "img_save_path"), My_handle.config.get("image_recognition", "screenshot_window_title"))

# 通用的data_json构造
data_json = {
"username": username,
"content": content,
"img_data": screenshot_path,
}

# 调用LLM统一接口,获取返回内容
resp_content = self.llm_handle(My_handle.config.get("image_recognition", "model"), data_json, type="vision")

if resp_content:
logging.info(f"[AI回复{username}]:{resp_content}")
else:
logging.warning(f'警告:{My_handle.config.get("image_recognition", "model")}无返回')
resp_content = ""

"""
双重过滤,为您保驾护航
"""
resp_content = resp_content.replace('\n', '。')

# LLM回复的内容进行违禁判断
resp_content = self.prohibitions_handle(resp_content)
if resp_content is None:
return

# logger.info("resp_content=" + resp_content)

# 将 AI 回复记录到日志文件中
with open(self.comment_file_path, "r+", encoding="utf-8") as f:
tmp_content = f.read()
# 将指针移到文件头部位置(此目的是为了让直播中读取日志文件时,可以一直让最新内容显示在顶部)
f.seek(0, 0)
# 不过这个实现方式,感觉有点低效
# 设置单行最大字符数,主要目的用于接入直播弹幕显示时,弹幕过长导致的显示溢出问题
max_length = 20
resp_content_substrings = [resp_content[i:i + max_length] for i in range(0, len(resp_content), max_length)]
resp_content_joined = '\n'.join(resp_content_substrings)

# 根据 弹幕日志类型进行各类日志写入
if My_handle.config.get("comment_log_type") == "问答":
f.write(f"[{username} 提问]:\n{content}\n[AI回复{username}]:{resp_content_joined}\n" + tmp_content)
elif My_handle.config.get("comment_log_type") == "问题":
f.write(f"[{username} 提问]:\n{content}\n" + tmp_content)
elif My_handle.config.get("comment_log_type") == "回答":
f.write(f"[AI回复{username}]:\n{resp_content_joined}\n" + tmp_content)

# 判断按键映射触发类型
if My_handle.config.get("key_mapping", "type") == "回复" or My_handle.config.get("key_mapping", "type") == "弹幕+回复":
# 替换内容
data["content"] = resp_content
# 按键映射 触发后不执行后面的其他功能
if self.key_mapping_handle("回复", data):
pass

# 音频合成时需要用到的重要数据
message = {
"type": "image_recognition_schedule",
"tts_type": My_handle.config.get("audio_synthesis_type"),
"data": My_handle.config.get(My_handle.config.get("audio_synthesis_type")),
"config": My_handle.config.get("filter"),
"username": username,
"content": resp_content
}


self.audio_synthesis_handle(message)
except Exception as e:
logging.error(traceback.format_exc())


"""
数据丢弃部分
增加新的处理事件时,需要进行这块部分的内容追加
Expand Down Expand Up @@ -2320,6 +2417,10 @@ def process_last_data(self, timer_flag):
for data in timer.last_data:
self.idle_time_task_handle(data)
#self.idle_time_task_handle(timer.last_data)
elif timer_flag == "image_recognition_schedule":
# 定时任务处理
for data in timer.last_data:
self.image_recognition_schedule_handle(data)

My_handle.is_handleing = 0

Expand Down
Loading

0 comments on commit 643748c

Please sign in to comment.