diff --git a/src/copaw/app/channels/base.py b/src/copaw/app/channels/base.py index c4e466bc4..2b7d6ec26 100644 --- a/src/copaw/app/channels/base.py +++ b/src/copaw/app/channels/base.py @@ -712,7 +712,7 @@ async def send_content_parts( ): body += f"\n[File: {m.file_url or m.file_id}]" elif t == ContentType.AUDIO and getattr(m, "data", None): - body += "\n[Audio]" + body += f"\n[Audio: {m.data}]" if body.strip(): logger.debug( f"channel send_content_parts: to_handle={to_handle} " diff --git a/src/copaw/app/channels/qq/channel.py b/src/copaw/app/channels/qq/channel.py index 3314139bd..bf40c0916 100644 --- a/src/copaw/app/channels/qq/channel.py +++ b/src/copaw/app/channels/qq/channel.py @@ -12,6 +12,7 @@ from __future__ import annotations import asyncio +import base64 import json import logging import os @@ -70,7 +71,10 @@ DEFAULT_API_BASE = "https://api.sgroup.qq.com" TOKEN_URL = "https://bots.qq.com/app/getAppAccessToken" _URL_PATTERN = re.compile(r"https?://[^\s]+", re.IGNORECASE) -_IMAGE_TAG_PATTERN = re.compile(r"\[Image: (https?://[^\]]+)\]", re.IGNORECASE) +_IMAGE_TAG_PATTERN = re.compile(r"\[Image: ([^\]]+)\]", re.IGNORECASE) +_VIDEO_TAG_PATTERN = re.compile(r"\[Video: ([^\]]+)\]", re.IGNORECASE) +_AUDIO_TAG_PATTERN = re.compile(r"\[Audio: ([^\]]+)\]", re.IGNORECASE) +_FILE_TAG_PATTERN = re.compile(r"\[File: ([^\]]+)\]", re.IGNORECASE) # Rich media paths _DEFAULT_MEDIA_DIR = Path("~/.copaw/media/qq").expanduser() @@ -304,6 +308,30 @@ async def _send_group_message_async( ) +def _strip_file_scheme(url: str) -> str: + """Strip 'file://' scheme prefix, returning a plain filesystem path.""" + return url[len("file://") :] if url.startswith("file://") else url + + +def _infer_media_type(url: str) -> int: + """Infer QQ media file_type integer from URL/path extension. + + QQ supported formats: + 1 image - png/jpg + 2 video - mp4 + 3 audio - silk/wav/mp3/flac + 4 file - other (including unsupported image formats like svg/gif) + """ + ext = Path(_strip_file_scheme(url)).suffix.lower() + if ext in (".jpg", ".jpeg", ".png"): + return 1 + if ext in (".mp4",): + return 2 + if ext in (".silk", ".wav", ".mp3", ".flac"): + return 3 + return 4 + + async def _upload_media_async( session: Any, access_token: str, @@ -336,11 +364,22 @@ async def _upload_media_async( ) return None - body = { - "file_type": media_type, - "url": url, - "srv_send_msg": False, - } + if url.startswith(("http://", "https://")): + body: Dict[str, Any] = { + "file_type": media_type, + "url": url, + "srv_send_msg": False, + } + else: + local_path = _strip_file_scheme(url) + async with aiofiles.open(local_path, "rb") as f: + raw = await f.read() + body = { + "file_type": media_type, + "file_data": base64.b64encode(raw).decode(), + "file_name": Path(local_path).name, + "srv_send_msg": False, + } response = await _api_request_async( session, access_token, @@ -350,7 +389,7 @@ async def _upload_media_async( ) return response.get("file_info") except Exception: - logger.exception(f"Failed to upload media from url: {url}") + logger.exception(f"Failed to upload media: {url}") return None @@ -361,6 +400,7 @@ async def _send_media_message_async( file_info: str, msg_id: Optional[str] = None, message_type: str = "c2c", + filename: Optional[str] = None, ) -> None: """Send rich media message. @@ -371,15 +411,18 @@ async def _send_media_message_async( file_info: file info from upload response msg_id: reply message id message_type: "c2c" or "group" + filename: optional filename shown to recipient """ msg_seq = _get_next_msg_seq(msg_id or f"{message_type}_media") - body = { + body: Dict[str, Any] = { "msg_type": 7, "media": { "file_info": file_info, }, "msg_seq": msg_seq, } + if filename: + body["content"] = filename if msg_id: body["msg_id"] = msg_id @@ -675,10 +718,16 @@ async def _dispatch(send_text: str, markdown: bool) -> None: use_markdown=markdown, ) - # Extract and process [Image: ] tags + # Extract and process media tags image_urls = _IMAGE_TAG_PATTERN.findall(text) - # Remove [Image: ] tags from text - clean_text = _IMAGE_TAG_PATTERN.sub("", text).strip() + video_urls = _VIDEO_TAG_PATTERN.findall(text) + audio_urls = _AUDIO_TAG_PATTERN.findall(text) + file_urls = _FILE_TAG_PATTERN.findall(text) + # Remove all media tags from text + clean_text = _IMAGE_TAG_PATTERN.sub("", text) + clean_text = _VIDEO_TAG_PATTERN.sub("", clean_text) + clean_text = _AUDIO_TAG_PATTERN.sub("", clean_text) + clean_text = _FILE_TAG_PATTERN.sub("", clean_text).strip() # Send text content if not empty text_sent = False @@ -711,46 +760,70 @@ async def _dispatch(send_text: str, markdown: bool) -> None: except Exception: logger.exception("send text fallback failed") - # Send images if any - if image_urls and message_type in ("c2c", "group"): - # Determine target openid + # Send all rich media (image/video/audio/file) for c2c and group + all_media: List[tuple[str, List[str]]] = [ + ("image", image_urls), + ("video", video_urls), + ("audio", audio_urls), + ("file", file_urls), + ] + if message_type in ("c2c", "group"): target_openid = ( sender_id if message_type == "c2c" else group_openid ) if target_openid: - for image_url in image_urls: - try: - # Upload image to QQ rich media - file_info = await _upload_media_async( - self._http, + for media_label, urls in all_media: + for media_url in urls: + await self._send_single_media( token, target_openid, - media_type=1, # 1 for image - url=image_url, - message_type=message_type, + media_label, + media_url, + msg_id if not text_sent else None, + message_type, ) - if file_info: - # Send media message - await _send_media_message_async( - self._http, - token, - target_openid, - file_info, - msg_id if not text_sent - # Only reply with msg_id for first message - else None, - message_type=message_type, - ) - logger.info( - f"Successfully sent image: {image_url}", - ) - else: - logger.warning( - f"Failed to upload image," - f" skipping: {image_url}", - ) - except Exception: - logger.exception(f"Failed to send image: {image_url}") + + async def _send_single_media( + self, + token: str, + target_openid: str, + media_label: str, + media_url: str, + msg_id: str | None, + message_type: str, + ) -> None: + """Upload and send a single media file to QQ API.""" + try: + file_info = await _upload_media_async( + self._http, + token, + target_openid, + media_type=_infer_media_type(media_url), + url=media_url, + message_type=message_type, + ) + if not file_info: + logger.warning( + f"Failed to upload {media_label}," + f" skipping: {media_url}", + ) + return + await _send_media_message_async( + self._http, + token, + target_openid, + file_info, + msg_id, + message_type=message_type, + filename=Path(_strip_file_scheme(media_url)).name, + ) + logger.info( + f"Successfully sent {media_label}: {media_url}", + ) + except Exception: + logger.exception( + f"Failed to send {media_label}: {media_url}", + ) def _resolve_attachment_type(self, att_type: str, file_name: str) -> str: # pylint: disable=too-many-return-statements diff --git a/website/public/docs/channels.en.md b/website/public/docs/channels.en.md index 8cc2524e4..d99d34fe3 100644 --- a/website/public/docs/channels.en.md +++ b/website/public/docs/channels.en.md @@ -429,6 +429,23 @@ You can also fill them in the Console UI. ![1](https://img.alicdn.com/imgextra/i1/O1CN013zS1dF1hLal9IM4rc_!!6000000004261-2-tps-4082-2126.png) +### Rich media support + +QQ channel supports sending image, video, audio, and files (C2C and group chat only). + +**Receiving**: Images sent by users are supported. Receiving video / audio / file is 🚧 (not yet verified). + +**Sending**: Multimodal content in Agent replies is delivered via the [QQ Rich Media API](https://bot.q.qq.com/wiki/develop/api-v2/server-inter/message/send-receive/rich-media.html): + +| Type | Supported formats | Notes | +| ----- | -------------------- | ----------------------------------------------------- | +| Image | png, jpg | Other formats (svg, gif, etc.) fall back to file type | +| Video | mp4 | — | +| Audio | silk, wav, mp3, flac | — | +| File | any | Uploaded as base64 with original filename | + +> **Note**: Rich media sending is only available in C2C and group chat; guild messages are not supported. Both local file paths (`file://` or absolute path) and remote HTTP URLs are accepted. + --- ## WeCom (WeChat Work) @@ -745,7 +762,7 @@ done). **✗** = not supported (not possible on this channel). | Feishu | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | Discord | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | 🚧 | 🚧 | 🚧 | 🚧 | | iMessage | ✓ | ✗ | ✗ | ✗ | ✗ | ✓ | ✗ | ✗ | ✗ | ✗ | -| QQ | ✓ | 🚧 | 🚧 | 🚧 | 🚧 | ✓ | 🚧 | 🚧 | 🚧 | 🚧 | +| QQ | ✓ | ✓ | 🚧 | 🚧 | 🚧 | ✓ | ✓ | ✓ | ✓ | ✓ | | WeCom | ✓ | ✓ | 🚧 | ✓ | ✓ | ✓ | 🚧 | 🚧 | 🚧 | 🚧 | | Telegram | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | Mattermost | ✓ | ✓ | 🚧 | 🚧 | ✓ | ✓ | ✓ | 🚧 | 🚧 | ✓ | @@ -763,8 +780,11 @@ Notes: agent; sending real media is 🚧 (currently link-only in reply). - **iMessage**: imsg + database polling; text only; attachments are ✗ (not possible on this channel). -- **QQ**: Receiving attachments as multimodal and sending real media are 🚧; - currently text + link-only. +- **QQ**: Receiving images is supported. Sending image (png/jpg), video (mp4), + audio (silk/wav/mp3/flac), and files of any format are supported in C2C and + group chat. Other image formats (svg, gif, etc.) fall back to file type + automatically. Guild messages do not support rich media sending. Receiving + video / audio / file is 🚧 (not yet verified). - **Telegram**: Attachments are parsed as files on receive and can be opened in the corresponding format (image / voice / video / file) within the Telegram chat interface. - **WeCom**: WebSocket long connection for receiving; markdown/template_card for sending. Supports text, image, voice, and file receiving; sending media is not supported by the SDK (only text via markdown). - **Matrix**: Receives image, video, audio, and file attachments via `mxc://` media URLs. Sends media by uploading to the homeserver and sending native Matrix media messages (`m.image`, `m.video`, `m.audio`, `m.file`). diff --git a/website/public/docs/channels.zh.md b/website/public/docs/channels.zh.md index fe995623c..fe0bdf39b 100644 --- a/website/public/docs/channels.zh.md +++ b/website/public/docs/channels.zh.md @@ -425,6 +425,23 @@ ![1](https://img.alicdn.com/imgextra/i1/O1CN01kK9tSJ1MHpZmGR2o9_!!6000000001410-2-tps-4082-2126.png) +### 多媒体支持 + +QQ 频道支持图片、视频、语音、文件的发送(仅限单聊 C2C 和群聊场景)。 + +**接收**:接收图片已支持。接收视频/语音/文件尚未完整验证(🚧)。 + +**发送**:Agent 回复中的多媒体内容会通过 [QQ 富媒体 API](https://bot.q.qq.com/wiki/develop/api-v2/server-inter/message/send-receive/rich-media.html) 发送给用户: + +| 类型 | 支持格式 | 说明 | +| ---- | -------------------- | ----------------------------------------- | +| 图片 | png、jpg | 其他格式(svg、gif 等)自动降级为文件发送 | +| 视频 | mp4 | — | +| 语音 | silk、wav、mp3、flac | — | +| 文件 | 任意格式 | 通过 base64 上传,携带原始文件名 | + +> **注意**:富媒体发送仅支持单聊(C2C)和群聊,频道(guild)消息暂不支持。本地文件路径(`file://` 或绝对路径)和远程 HTTP URL 均可直接使用。 + --- ## 企业微信 @@ -732,7 +749,7 @@ Matrix 频道通过 [matrix-nio](https://github.com/poljar/matrix-nio) 库将 Co | 飞书 | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | Discord | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | 🚧 | 🚧 | 🚧 | 🚧 | | iMessage | ✓ | ✗ | ✗ | ✗ | ✗ | ✓ | ✗ | ✗ | ✗ | ✗ | -| QQ | ✓ | 🚧 | 🚧 | 🚧 | 🚧 | ✓ | 🚧 | 🚧 | 🚧 | 🚧 | +| QQ | ✓ | ✓ | 🚧 | 🚧 | 🚧 | ✓ | ✓ | ✓ | ✓ | ✓ | | 企业微信 | ✓ | ✓ | 🚧 | ✓ | ✓ | ✓ | 🚧 | 🚧 | 🚧 | 🚧 | | Telegram | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | Mattermost | ✓ | ✓ | 🚧 | 🚧 | ✓ | ✓ | ✓ | 🚧 | 🚧 | ✓ | @@ -745,7 +762,7 @@ Matrix 频道通过 [matrix-nio](https://github.com/poljar/matrix-nio) 库将 Co - **飞书**:WebSocket 长连接收消息,Open API 发送;支持文本 / 图片 / 文件收发;群聊时在消息 metadata 中带 `feishu_chat_id`、`feishu_message_id` 便于下游去重与群上下文。 - **Discord**:接收时附件会解析为图片 / 视频 / 音频 / 文件并传入 Agent;回复时真实附件发送为 🚧 施工中,当前仅以链接形式附在文本中。 - **iMessage**:基于本地 imsg + 数据库轮询,仅支持文本收发;平台/实现限制,无法支持附件(✗)。 -- **QQ**:接收侧附件解析为多模态、发送侧真实媒体均为 🚧 施工中,当前仅文本 + 链接形式。 +- **QQ**:支持单聊(C2C)和群聊场景下图片(png/jpg)、视频(mp4)、语音(silk/wav/mp3/flac)、文件的发送;接收图片已支持。其他图片格式(svg、gif 等)自动降级为文件类型发送。频道(guild)消息暂不支持富媒体发送。接收视频/语音/文件尚未完整验证(🚧)。 - **Telegram**:接收时附件会解析为文件并传入,可在telegram对话界面以对应格式打开(图片 / 语音 / 视频 / 文件) - **企业微信**:WebSocket 长连接接收,markdown/template_card 发送;支持接收文本、图片、语音和文件;发送媒体暂不支持(SDK 限制,仅支持通过 markdown 发送文本)。 - **Matrix**:接收图片 / 视频 / 音频 / 文件(通过 `mxc://` 媒体 URL);发送时将文件上传至服务器后以原生 Matrix 媒体消息(`m.image`、`m.video`、`m.audio`、`m.file`)发出。