diff --git a/README.md b/README.md index 516d635..179e553 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,9 @@ [![NPM Downloads](https://img.shields.io/npm/dw/exif-ai)](https://www.npmjs.com/package/exif-ai) +_Read this in other languages:_ +[_简体中文_](README.zh-CN.md), + ## About _Exif AI_ is a powerful CLI tool designed to write AI-generated image descriptions and/or tags directly into the metadata of image files. This tool leverages advanced AI models to analyze image content and generate descriptive metadata, enhancing the accessibility and searchability of your images. @@ -30,11 +33,11 @@ exif-ai -i example.jpeg -a ollama Required options: -- `-a, --api-provider ` Name of the AI provider to use (`ollama` for Ollama, `zhipu` for ZhipuAI, `google` for Google Gemini). +- `-a, --api-provider `: Name of the AI provider to use (`ollama` for Ollama, `zhipu` for ZhipuAI, `google` for Google Gemini). Optional options: -- `-T, --tasks `: List of tasks to perform ('description' and/or 'tag'). +- `-T, --tasks `: List of tasks to perform ('description', 'tag', 'face'). - `-i, --input ` Path to the input image file. - `-p, --description-prompt `: Custom prompt for the AI provider to generate description. Defaults to a generic image description prompt. - `--tag-prompt `: Custom prompt for the AI provider to generate tags. Defaults to a generic image tagging prompt. @@ -49,6 +52,7 @@ Optional options: - `--avoid-overwrite`: Avoid overwriting if EXIF tags already exist in the file. - `--ext `: File extensions to watch. Only files with this extensions will be processed. - `--concurrency `: The numbers of files to process concurrently in watch mode. +- `--face-group-ids ` List of face group IDs to use for face recognition. Example usage: @@ -64,24 +68,25 @@ To use Exif AI as a library in your project, import it and use the provided func import { execute } from "exif-ai"; const options = { - path: "example.jpeg", // Path to the input image file - provider: "ollama", // AI provider to use (e.g., 'ollama', 'zhipu', 'google') - model: "moondream", // Optional: Specific AI model to use (if supported by the provider) + tasks: ["description"], // List of tasks to perform + path: "example.jpg", // Path to the input image file + provider: "ollama", // Name of the AI provider to use descriptionTags: [ "XPComment", "Description", "ImageDescription", "Caption-Abstract", - ], // Optional: EXIF tags to write the description to - tagTags: ["Subject", "TagsList", "Keywords"], // Optional: EXIF tags to write the tags to - descriptionPrompt: "请使用中文描述这个图片。", // Optional: Custom prompt for the AI provider to generate description - tagPrompt: - "Tag this image based on subject, object, event, place. Output format: , , , , , ..., ", // Optional: Custom prompt for the AI provider to generate tags - verbose: false, // Optional: Enable verbose logging for debugging - dry: false, // Optional: Perform a dry run without writing to the file - writeArgs: [], // Optional: Additional arguments for EXIF write task - providerArgs: [], // Optional: Additional arguments for the AI provider - avoidOverwrite: true, // Optional: Avoid overwriting existing tags + ], // List of EXIF tags to write the description to + tagTags: ["Subject", "TagsList", "Keywords"], // List EXIF tags to write the tags to + descriptionPrompt: "Describe this landscape photo.", // Custom prompt for the AI provider to generate description + tagPrompt: "Tag this image based on subject, object, event, place.", // Custom prompt for the AI provider to generate tags + verbose: false, // Enable verbose output for debugging + dry: false, // Preview AI-generated content without writing to the image file + writeArgs: [], // Additional ExifTool arguments for writing metadata + providerArgs: [], // Additional arguments for the AI provider + avoidOverwrite: false, // Avoid overwriting if EXIF tags already exist in the file + doNotEndExifTool: false, // Do not end ExifTool process after writing metadata + faceGroupIds: [], // List of face group IDs to use for face recognition }; execute(options) @@ -101,9 +106,34 @@ To install Exif AI globally, use the following command: npm install -g exif-ai ``` +## Tasks + +### Description + +The `description` task generates a description of the image using the AI provider. The description is written to the specified EXIF tags defined in `descriptionTags`. + +### Tag + +The `tag` task generates tags for the image using the AI provider. The tags are written to the specified EXIF tags defined in `tagTags`. + +### Face Recognition + +The `face` task performs face recognition on the image using the [Tencent Cloud API](https://cloud.tencent.com/document/api/867/44994). The face recognition results are written to the specified EXIF tags defined in `tagTags`. + +Currently, the `face` task requires user to enable face recognition service on Tencent Cloud and set a pair of Tencent Cloud API Secret ID and Tencent CLoud API Secret Key in the environment variable. + +```bash +export TENCENTCLOUD_SECRET_ID=your_tencentcloud_secret_id +export TENCENTCLOUD_SECRET_KEY=your_tencentcloud_secret_key +``` + +### Note + +Please ensure that you securely manage your API keys. Do not expose them in public repositories or other public forums. + ## API Providers -Exif AI relies on API providers to generate image descriptions. Currently, we support three providers: ZhipuAI, Ollama and Google Gemini. +Exif AI relies on API providers to generate image descriptions and tags. Currently, we support three providers: ZhipuAI, Ollama and Google Gemini. ### Supported Providers @@ -137,6 +167,16 @@ export API_KEY=your_google_api_key Ollama runs locally and does not require an API key. Ensure that Ollama is installed and properly configured on your machine. Refer to the [Ollama GitHub repository](https://github.com/ollama/ollama) for installation and setup instructions. +To use remote Ollama service, you can defined the url in providerArgs: + +```bash +exif-ai --providerArgs "http://ollama.example.com:8080" -a ollama -i image.jpg +``` + +```js +providerArgs: ["http://ollama.example.com:8080"], +``` + ## Develop ### Prerequisites @@ -151,7 +191,7 @@ First, clone the Exif AI repository to your local machine: ```bash git clone https://github.com/tychenjiajun/exif-ai.git cd exif-ai -```` +``` ### Install Dependencies diff --git a/README.zh-CN.md b/README.zh-CN.md new file mode 100644 index 0000000..84afe45 --- /dev/null +++ b/README.zh-CN.md @@ -0,0 +1,205 @@ +# Exif AI + +[![NPM Downloads](https://img.shields.io/npm/dw/exif-ai)](https://www.npmjs.com/package/exif-ai) + +## 关于 + +_Exif AI_ 是一个强大的命令行工具,旨在直接将AI生成的图像描述和/或标签写入图像文件的元数据。此工具利用先进的AI模型来分析图像内容并生成描述性元数据,从而提高图像的可用性和可搜索性。 + +## 使用示例 + +### 命令行 + +#### 免安装 + +如果您不想全局安装 Exif AI,可以使用 npx 命令直接运行。 + +```bash +npx exif-ai -i example.jpeg -a ollama +``` + +#### 安装版 + +如果您已经全局安装了 Exif AI,则可以直接从命令行运行它。 + +```bash +exif-ai -i example.jpeg -a ollama +``` + +#### 选项 + +必选项: + +- `-a, --api-provider `: 要使用的AI供应商名称(`ollama`代表Ollama,`zhipu`代表ZhipuAI,`google`代表Google Gemini) + +可选项: + +- `-T, --tasks `: 要执行的任务列表(`description`代表生成描述,`tags`代表生成标签,`face`代表面部识别)。 +- `-i, --input ` : 要处理的图像文件。 +- `-p, --description-prompt `: 自定义AI供应商生成描述的提示。默认为通用的图像描述提示。 +- `--tag-prompt `: 自定义AI供应商生成标签的提示。默认为通用的图像标签提示。 +- `-m, --model `: 指定要使用的AI模型,如果AI供应商支持。 +- `-t, --description-tags `: 要写入描述的EXIF标签列表。默认为常见的描述标签。 +- `--tag-tags `: 要写入标签的EXIF标签列表。默认为常见的标签。 +- `-v, --verbose`: 启用调试输出。 +- `-d, --dry-run`: 预览AI生成的内容而不写入图像文件。 +- `--exif-tool-write-args `: 用于写入元数据的ExifTool的额外参数。 +- `--provider-args `: AI供应商的额外参数。 +- `-w, --watch `: 监视要处理的目录中的新文件。 +- `--avoid-overwrite`: 如果文件中已经存在EXIF标签,则避免覆盖。 +- `--ext `: 要监视的文件扩展名。只有具有这些扩展名的文件才会被处理。 +- `--concurrency `: 在监视模式下同时处理的文件数量。 +- `--face-group-ids `: 要用于面部识别的面部组ID列表。 + +示例用法: + +```bash +exif-ai -i example.jpg -a ollama -p "描述这张图片" +``` + +### 作为库使用 + +要在您的项目中将Exif AI用作库,请导入它并使用提供的函数: + +```typescript +import { execute } from "exif-ai"; + +const options = { + tasks: ["description"], // 要执行的任务列表 + input: "example.jpg", // 要处理的图像文件 + provider: "ollama", // 要使用的AI供应商名称 + descriptionTags: ["Description"], // 要写入描述的EXIF标签列表 + tagTags: ["TagsList"], // 要写入标签的EXIF标签列表 + descriptionPrompt: "描述这张图片", // 自定义AI供应商生成描述的提示 + tagPrompt: "根据主题、对象、事件、地点标记这张图片", // 自定义AI供应商生成标签的提示 + verbose: true, // 启用调试输出 + dry: false, // 预览AI生成的内容而不写入图像文件 + writeArgs: [], // 用于写入元数据的ExifTool的额外参数 + providerArgs: [], // AI供应商的额外参数 + avoidOverwrite: false, // 如果文件中已经存在EXIF标签,则避免覆盖 + doNotEndExifTool: false, // 不在写入元数据后结束ExifTool进程 +}; + +execute(options) + .then((result) => { + console.log(result); // 处理结果 + }) + .catch((error) => { + console.error(error); // 处理错误 + }); +``` + +## 安装 + +要全局安装 Exif AI,请使用以下命令: + +```bash +npm install -g exif-ai +``` + +## 任务 + +### 生成描述 + +`description`任务使用AI供应商生成图像的描述。该描述将被写入在`descriptionTags`中。 + +### 生成标签 + +`tags`任务使用AI供应商生成图像的标签。标签将被写入在`tagTags`中。 + +### 面部识别 + +`face`任务使用腾讯云API在图像上执行面部识别。面部识别结果将写入在`tagTags`中定义的EXIF标签。 + +目前,`face`任务需要腾讯云API密钥,并且需要腾讯云人脸识别服务。如果您没有腾讯云账户,请先注册一个账户并启用人脸识别服务。 + +```bash +export TENCENTCLOUD_SECRET_ID=your_tencentcloud_secret_id +export TENCENTCLOUD_SECRET_KEY=your_tencentcloud_secret_key +``` + +### 注意 + +请确保您安全地管理您的API密钥。不要在公共仓库或其他公共论坛中暴露它们。 + +## API供应商 + +Exif AI依赖于API供应商来生成图像描述和标签。目前,我们支持三个供应商:ZhipuAI、Ollama和Google Gemini。 + +### 支持的供应商 + +- ZhipuAI:领先的AI服务供应商。需要API密钥。 +- Ollama:在您的机器上运行的本地AI服务,无需API密钥。 +- Google Gemini:由Google提供的强大AI服务。 + +### 自定义供应商 + +您还可以通过实现供应商接口来开发您自己的自定义供应商。这允许您与其他AI服务集成或自定义描述生成过程。 + +## 配置 + +### 设置API密钥(适用于ZhipuAI) + +要使用[ZhipuAI](https://open.bigmodel.cn/usercenter/apikeys),您需要设置API密钥。您可以通过设置环境变量来完成此操作: + +```bash +export ZHIPUAI_API_KEY=your_zhipuai_api_key +``` + +### Google Gemini + +要使用[Google Gemini](https://ai.google.dev/),您需要设置API密钥。您可以通过设置环境变量来完成此操作: + +```bash +export API_KEY=your_google_api_key +``` + +### Ollama Configuration + +Ollama在本地运行,不需要API密钥。请确保Ollama已安装在您的机器上并正确配置。有关安装和设置说明,请参考[Ollama](https://github.com/ollama/ollama)。 + +要使用远程Ollama服务,您可以在`providerArgs`中定义URL: + +```bash +exif-ai --providerArgs "http://ollama.example.com:8080" -a ollama -i image.jpg +``` + +```js +providerArgs: ["http://ollama.example.com:8080"], +``` + +## 开发 + +### 前置条件 + +- Node.js >=16 +- pnpm + +### 克隆仓库 + +首先,将Exif AI仓库克隆到您的本地机器: + +```bash +git clone https://github.com/tychenjiajun/exif-ai.git +cd exif-ai +``` + +### 安装依赖 + +接下来,使用 pnpm 安装所需的依赖项。 + +```bash +pnpm install +``` + +### 构建 + +```bash +pnpm run build +``` + +### Watch + +```bash +pnpm run watch +``` diff --git a/package.json b/package.json index de03f99..26bfa8a 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "exif-ai", - "version": "3.0.9", + "version": "3.1.0", "description": "A Node.js CLI and library that uses Ollama, ZhipuAI or Google Gemini to intelligently write image description and/or tags to exif metadata by it's content.", "homepage": "https://github.com/tychenjiajun/exif-ai", "repository": { @@ -60,6 +60,7 @@ "ollama": "^0.5.8", "p-limit": "^6.1.0", "sharp": "0.32.6", + "tencentcloud-sdk-nodejs-iai": "^4.0.918", "ts-extras": "^0.13.0", "xhr2": "^0.2.1" }, diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 5bcaccd..8522e11 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -44,6 +44,9 @@ importers: sharp: specifier: 0.32.6 version: 0.32.6 + tencentcloud-sdk-nodejs-iai: + specifier: ^4.0.918 + version: 4.0.918 ts-extras: specifier: ^0.13.0 version: 0.13.0 @@ -608,6 +611,10 @@ packages: engines: {node: '>=0.4.0'} hasBin: true + agent-base@6.0.2: + resolution: {integrity: sha512-RZNwNclF7+MS/8bDg70amg32dyeZGZxiDuQmZxKLAlQjr3jGyLx+4Kkk58UO7D2QdgFIQCovuSuZESne6RG6XQ==} + engines: {node: '>= 6.0.0'} + agent-base@7.1.1: resolution: {integrity: sha512-H0TSyFNDMomMNJQBn8wFV5YC/2eJ+VXECwOadZJT554xP6cODZHPX3H9QMQECxvrgiSOP1pHjy1sMWQVYJOUOA==} engines: {node: '>= 14'} @@ -646,6 +653,9 @@ packages: resolution: {integrity: sha512-Izi8RQcffqCeNVgFigKli1ssklIbpHnCYc6AknXGYoB6grJqyeby7jv12JUQgmTAnIDnbck1uxksT4dzN3PWBA==} engines: {node: '>=12'} + asynckit@0.4.0: + resolution: {integrity: sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==} + b4a@1.6.6: resolution: {integrity: sha512-5Tk1HLk6b6ctmjIkAcU/Ujv/1WqiDl0F0JdRCR80VsOcUlHcu7pWeWRlOqQLHfDEsVx9YH/aif5AG4ehoCtTmg==} @@ -674,6 +684,9 @@ packages: resolution: {integrity: sha512-EreW0Vi8TwovhYUHBXXRA5tthuU2ynGsZFlboyMJHCCUXYa2AjgwnE3ubBOJs2xJLcuXFJbi6c/8pH5+FVj8Og==} engines: {node: '>=14'} + bignumber.js@9.1.2: + resolution: {integrity: sha512-2/mKyZH9K85bzOEfhXDBFZTGd1CTs+5IHpeFQo9luiBG7hghdC851Pj2WAhb6E3R6b9tZj/XKhbg4fum+Kepug==} + bl@4.1.0: resolution: {integrity: sha512-1W07cM9gS6DcLperZfFSj+bWLtaPGSOHWhPiGzXmvVJbRLdG82sH/Kn8EtW1VqWVA54AKf2h5k5BbnIbwF3h6w==} @@ -765,6 +778,10 @@ packages: resolution: {integrity: sha512-1rXeuUUiGGrykh+CeBdu5Ie7OJwinCgQY0bc7GCRxy5xVHy+moaqkpL/jqQq0MtQOeYcrqEz4abc5f0KtU7W4A==} engines: {node: '>=12.5.0'} + combined-stream@1.0.8: + resolution: {integrity: sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==} + engines: {node: '>= 0.8'} + commander@11.1.0: resolution: {integrity: sha512-yPVavfyCcRhmorC7rWlkHn15b4wDVgVmBA7kV4QVBsF7kv/9TKJAbAXVTxvTnwP8HHKjRCJDClKbciiYS7p0DQ==} engines: {node: '>=16'} @@ -814,6 +831,10 @@ packages: resolution: {integrity: sha512-v2KyNk7efxhlyHpjEvfyxaAihKKK0nWCuf6ZtqZcFFpQRG0bJ12Qsr0RpvsICMjAAZ8DOVCxrlqpxISlMHC4Kg==} engines: {node: '>=14.16'} + delayed-stream@1.0.0: + resolution: {integrity: sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==} + engines: {node: '>=0.4.0'} + detect-libc@2.0.3: resolution: {integrity: sha512-bwy0MGW55bG41VqxxypOsdSdGqLwXPI/focwgTYCFMbdUiBAxLg9CFzG08sz2aqzknwiX7Hkl0bQENjg8iLByw==} engines: {node: '>=8'} @@ -1020,6 +1041,10 @@ packages: resolution: {integrity: sha512-Ld2g8rrAyMYFXBhEqMz8ZAHBi4J4uS1i/CxGMDnjyFWddMXLVcDp051DZfu+t7+ab7Wv6SMqpWmyFIj5UbfFvg==} engines: {node: '>=14'} + form-data@3.0.1: + resolution: {integrity: sha512-RHkBKtLWUVwd7SqRIvCZMEvAMoGUp0XU+seQiZejj0COz3RI3hWP4sCv3gZWWLjJTd7rGwcsF5eKZGii0r/hbg==} + engines: {node: '>= 6'} + formdata-polyfill@4.0.10: resolution: {integrity: sha512-buewHzMvYL29jdeQTVILecSaZKnt/RJWjoZCF5OW60Z67/GmSLBkOFM7qh1PI3zFNtJbaZL5eQu1vLfazOwj4g==} engines: {node: '>=12.20.0'} @@ -1045,6 +1070,10 @@ packages: get-func-name@2.0.2: resolution: {integrity: sha512-8vXOvuE167CtIc3OyItco7N/dpRtBbYOsPsXCz7X/PMnlGjYjSGuZJgM1Y7mmew7BKf9BqvLX2tnOVy1BBUsxQ==} + get-stream@6.0.1: + resolution: {integrity: sha512-ts6Wi+2j3jQjqi70w5AlN8DFnkSwC+MqmxEzdEALB2qXZYV3X/b1CTfgPLGJNMeAWxdPfU8FO1ms3NUfaHCPYg==} + engines: {node: '>=10'} + get-stream@9.0.1: resolution: {integrity: sha512-kVCxPF3vQM/N0B1PmoqVUqgHP+EeVjmZSQn+1oCRPxd2P21P2F19lIgbR3HBosbB1PUhOAoctJnfEn2GbN2eZA==} engines: {node: '>=18'} @@ -1115,6 +1144,10 @@ packages: html-escaper@2.0.2: resolution: {integrity: sha512-H2iMtd0I4Mt5eYiapRdIDjp+XzelXQ0tFE4JS7YFwFevXXMmOp9myNrUvCg0D6ws8iqkRPBfKHgbwig1SmlLfg==} + https-proxy-agent@5.0.1: + resolution: {integrity: sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA==} + engines: {node: '>= 6'} + https-proxy-agent@7.0.5: resolution: {integrity: sha512-1e4Wqeblerz+tMKPIq2EMGiiWW1dIjZOksyHWSUm1rmuvw/how9hBHZ38lAGj5ID4Ik6EdkOw7NmWPy6LAwalw==} engines: {node: '>= 14'} @@ -1194,6 +1227,10 @@ packages: resolution: {integrity: sha512-lJJV/5dYS+RcL8uQdBDW9c9uWFLLBNRyFhnAKXw5tVqLlKZ4RMGZKv+YQ/IA3OhD+RpbJa1LLFM1FQPGyIXvOA==} engines: {node: '>=12'} + is-stream@2.0.1: + resolution: {integrity: sha512-hFoiJiTl63nn+kstHGBtewWSKnQLpyb155KHheA1l39uvtO9nWIop1p3udqPcUd/xbF1VLMO4n7OI6p7RbngDg==} + engines: {node: '>=8'} + is-stream@4.0.1: resolution: {integrity: sha512-Dnz92NInDqYckGEUJv689RbRiTSEHCQ7wOVeALbkOz999YpqT46yMRIGtSNl2iCL1waAZSx40+h59NV/EwzV/A==} engines: {node: '>=18'} @@ -1249,6 +1286,9 @@ packages: engines: {node: '>=6'} hasBin: true + json-bigint@1.0.0: + resolution: {integrity: sha512-SiPv/8VpZuWbvLSMtTDU8hEfrZWg/mH/nV/b4o0CYbSxu1UIQPLdwKOCIyLQX+VIPO5vrLX3i8qtqFyhdPSUSQ==} + json-buffer@3.0.1: resolution: {integrity: sha512-4bV5BfR2mqfQTJm+V5tPPdf+ZpuhiIvTuAB5g8kcrXOZpTT/QwwVRWBywX1ozr6lEuPdbHxwaJlm9G6mI2sfSQ==} @@ -1318,6 +1358,14 @@ packages: resolution: {integrity: sha512-PXwfBhYu0hBCPw8Dn0E+WDYb7af3dSLVWKi3HGv84IdF4TyFoC0ysxFd0Goxw7nSv4T/PzEJQxsYsEiFCKo2BA==} engines: {node: '>=8.6'} + mime-db@1.52.0: + resolution: {integrity: sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==} + engines: {node: '>= 0.6'} + + mime-types@2.1.35: + resolution: {integrity: sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==} + engines: {node: '>= 0.6'} + mimic-response@3.1.0: resolution: {integrity: sha512-z0yWI+4FDrrweS8Zmt4Ej5HdJmky15+L2e6Wgn3+iK5fWzb6T3fhNFq2+MeTRb064c6Wr4N/wv0DzQTjNzHNGQ==} engines: {node: '>=10'} @@ -1372,6 +1420,15 @@ packages: resolution: {integrity: sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==} engines: {node: '>=10.5.0'} + node-fetch@2.7.0: + resolution: {integrity: sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==} + engines: {node: 4.x || >=6.0.0} + peerDependencies: + encoding: ^0.1.0 + peerDependenciesMeta: + encoding: + optional: true + node-fetch@3.3.2: resolution: {integrity: sha512-dRB78srN/l6gqWulah9SrxeYnxeddIG30+GOqK/9OlLVyLg3HPnr6SqOWTWOXKRwC2eGYCkZ59NNuSgvSrpgOA==} engines: {node: ^12.20.0 || ^14.13.1 || >=16.0.0} @@ -1708,6 +1765,14 @@ packages: tar-stream@3.1.7: resolution: {integrity: sha512-qJj60CXt7IU1Ffyc3NJMjh6EkuCFej46zUqJ4J7pqYlThyd9bO0XBTmcOIhSzZJVWfsLks0+nle/j538YAW9RQ==} + tencentcloud-sdk-nodejs-common@4.0.938: + resolution: {integrity: sha512-lNYig9PSM1SaYyZ9SPamy6wJPsixVNDsvL9KUQbUyE2eiD4RgGORB9bzlLvQf9nqQX5E3GHwksQRYZKS7FZ5Bw==} + engines: {node: '>=10'} + + tencentcloud-sdk-nodejs-iai@4.0.918: + resolution: {integrity: sha512-ycyDpNkQkRcceHzXXjy4osUSKfrwd+U/R+a6FJf7WIV39QX8/5D9Ctk6ChP6boa3GF8trLagx2zguU3Yl/Uckw==} + engines: {node: '>=10'} + test-exclude@7.0.1: resolution: {integrity: sha512-pFYqmTw68LXVjeWJMST4+borgQP2AyMNbg1BpZh9LbyhUeNkeaPF9gzfPGUAnSMV3qPYdWUwDIjjCLiSDOl7vg==} engines: {node: '>=18'} @@ -1756,6 +1821,9 @@ packages: resolution: {integrity: sha512-sf4i37nQ2LBx4m3wB74y+ubopq6W/dIzXg0FDGjsYnZHVa1Da8FH853wlL2gtUhg+xJXjfk3kUZS3BRoQeoQBQ==} engines: {node: '>=6'} + tr46@0.0.3: + resolution: {integrity: sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==} + ts-api-utils@1.3.0: resolution: {integrity: sha512-UQMIo7pb8WRomKR1/+MFVLTroIvDVtMX3K6OUir8ynLyzB8Jeriont2bTAtmNPa1ekAgN7YPDyf6V+ygrdU+eQ==} engines: {node: '>=16'} @@ -1766,6 +1834,9 @@ packages: resolution: {integrity: sha512-/hRadEKsjJhW4/LXxiLWG1KYEDcF4SXB7+646Btq8ySwC2DMShHApFVYld5rbuQlA+lLSGD/L67keSC/Kdczjw==} engines: {node: '>=18'} + tslib@1.13.0: + resolution: {integrity: sha512-i/6DQjL8Xf3be4K/E6Wgpekn5Qasl1usyw++dAA35Ue5orEn65VIxOA+YvNNl9HV3qv70T7CNwjODHZrLwvd1Q==} + tunnel-agent@0.6.0: resolution: {integrity: sha512-McnNiV1l8RYeY8tBgEpuodCC1mLUdbSN+CYBL7kJsJNInOP8UjDDEwdk6Mw60vdLLrr5NHKZhMAOSrR2NZuQ+w==} @@ -1818,6 +1889,10 @@ packages: util-deprecate@1.0.2: resolution: {integrity: sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==} + uuid@9.0.1: + resolution: {integrity: sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==} + hasBin: true + validate-npm-package-license@3.0.4: resolution: {integrity: sha512-DpKm2Ui/xN7/HQKCtpZxoRWBhZ9Z0kqtygG8XCgNQ8ZlDnxuQmWhj566j8fN4Cu3/JmbhsDo7fcAJq4s9h27Ew==} @@ -1886,9 +1961,15 @@ packages: resolution: {integrity: sha512-d2JWLCivmZYTSIoge9MsgFCZrt571BikcWGYkjC1khllbTeDlGqZ2D8vD8E/lJa8WGWbb7Plm8/XJYV7IJHZZw==} engines: {node: '>= 8'} + webidl-conversions@3.0.1: + resolution: {integrity: sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==} + whatwg-fetch@3.6.20: resolution: {integrity: sha512-EqhiFU6daOA8kpjOWTL0olhVOF3i7OrFzSYiGsEMB8GcXS+RrzauAERX65xMeNWVqxA6HXH2m69Z9LaKKdisfg==} + whatwg-url@5.0.0: + resolution: {integrity: sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==} + which@2.0.2: resolution: {integrity: sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==} engines: {node: '>= 8'} @@ -2425,6 +2506,12 @@ snapshots: acorn@8.12.1: {} + agent-base@6.0.2: + dependencies: + debug: 4.3.7 + transitivePeerDependencies: + - supports-color + agent-base@7.1.1: dependencies: debug: 4.3.7 @@ -2461,6 +2548,8 @@ snapshots: assertion-error@2.0.1: {} + asynckit@0.4.0: {} + b4a@1.6.6: {} balanced-match@1.0.2: {} @@ -2493,6 +2582,8 @@ snapshots: batch-cluster@13.0.0: {} + bignumber.js@9.1.2: {} + bl@4.1.0: dependencies: buffer: 5.7.1 @@ -2591,6 +2682,10 @@ snapshots: color-convert: 2.0.1 color-string: 1.9.1 + combined-stream@1.0.8: + dependencies: + delayed-stream: 1.0.0 + commander@11.1.0: {} concat-map@0.0.1: {} @@ -2634,6 +2729,8 @@ snapshots: rimraf: 3.0.2 slash: 4.0.0 + delayed-stream@1.0.0: {} + detect-libc@2.0.3: {} dir-glob@3.0.1: @@ -2900,6 +2997,12 @@ snapshots: cross-spawn: 7.0.3 signal-exit: 4.1.0 + form-data@3.0.1: + dependencies: + asynckit: 0.4.0 + combined-stream: 1.0.8 + mime-types: 2.1.35 + formdata-polyfill@4.0.10: dependencies: fetch-blob: 3.2.0 @@ -2917,6 +3020,8 @@ snapshots: get-func-name@2.0.2: {} + get-stream@6.0.1: {} + get-stream@9.0.1: dependencies: '@sec-ant/readable-stream': 0.4.1 @@ -2986,6 +3091,13 @@ snapshots: html-escaper@2.0.2: {} + https-proxy-agent@5.0.1: + dependencies: + agent-base: 6.0.2 + debug: 4.3.7 + transitivePeerDependencies: + - supports-color + https-proxy-agent@7.0.5: dependencies: agent-base: 7.1.1 @@ -3045,6 +3157,8 @@ snapshots: is-path-inside@4.0.0: {} + is-stream@2.0.1: {} + is-stream@4.0.1: {} isexe@2.0.0: {} @@ -3100,6 +3214,10 @@ snapshots: jsesc@3.0.2: {} + json-bigint@1.0.0: + dependencies: + bignumber.js: 9.1.2 + json-buffer@3.0.1: {} json-parse-even-better-errors@2.3.1: {} @@ -3164,6 +3282,12 @@ snapshots: braces: 3.0.3 picomatch: 2.3.1 + mime-db@1.52.0: {} + + mime-types@2.1.35: + dependencies: + mime-db: 1.52.0 + mimic-response@3.1.0: {} min-indent@1.0.1: {} @@ -3200,6 +3324,10 @@ snapshots: node-domexception@1.0.0: {} + node-fetch@2.7.0: + dependencies: + whatwg-url: 5.0.0 + node-fetch@3.3.2: dependencies: data-uri-to-buffer: 4.0.1 @@ -3567,6 +3695,28 @@ snapshots: fast-fifo: 1.3.2 streamx: 2.20.1 + tencentcloud-sdk-nodejs-common@4.0.938: + dependencies: + form-data: 3.0.1 + get-stream: 6.0.1 + https-proxy-agent: 5.0.1 + is-stream: 2.0.1 + json-bigint: 1.0.0 + node-fetch: 2.7.0 + tslib: 1.13.0 + uuid: 9.0.1 + transitivePeerDependencies: + - encoding + - supports-color + + tencentcloud-sdk-nodejs-iai@4.0.918: + dependencies: + tencentcloud-sdk-nodejs-common: 4.0.938 + tslib: 1.13.0 + transitivePeerDependencies: + - encoding + - supports-color + test-exclude@7.0.1: dependencies: '@istanbuljs/schema': 0.1.3 @@ -3607,6 +3757,8 @@ snapshots: totalist@3.0.1: {} + tr46@0.0.3: {} + ts-api-utils@1.3.0(typescript@5.6.2): dependencies: typescript: 5.6.2 @@ -3615,6 +3767,8 @@ snapshots: dependencies: type-fest: 4.26.1 + tslib@1.13.0: {} + tunnel-agent@0.6.0: dependencies: safe-buffer: 5.2.1 @@ -3658,6 +3812,8 @@ snapshots: util-deprecate@1.0.2: {} + uuid@9.0.1: {} + validate-npm-package-license@3.0.4: dependencies: spdx-correct: 3.2.0 @@ -3726,8 +3882,15 @@ snapshots: web-streams-polyfill@3.3.3: {} + webidl-conversions@3.0.1: {} + whatwg-fetch@3.6.20: {} + whatwg-url@5.0.0: + dependencies: + tr46: 0.0.3 + webidl-conversions: 3.0.1 + which@2.0.2: dependencies: isexe: 2.0.0 diff --git a/src/exif-ai.ts b/src/exif-ai.ts index 20e2a13..23ab5ba 100644 --- a/src/exif-ai.ts +++ b/src/exif-ai.ts @@ -31,7 +31,7 @@ async function findFilesRecursive( } const program = new Command(); program - .version("3.0.9") + .version("3.1.0") .description(getText("description") ?? "") .requiredOption("-a, --api-provider ", getText("api-provider")) .option("-T, --tasks ", getText("tasks")) @@ -49,6 +49,7 @@ program .option("--avoid-overwrite", getText("avoid-overwrite")) .option("--ext ", getText("ext")) .option("--concurrency ", getText("concurrency")) + .option("--face-group-ids ", getText("face-group-ids")) .parse(); const options = program.opts(); @@ -76,6 +77,7 @@ async function handleExecution(path: string) { providerArgs: options.providerArgs, avoidOverwrite: options.avoidOverwrite, doNotEndExifTool: Boolean(watchMode), + faceGroupIds: options.faceGroupIds, }); } catch (error) { console.error(`Error processing file ${path}:`, error); diff --git a/src/fluent/index.ts b/src/fluent/index.ts index 61dbead..5e78820 100644 --- a/src/fluent/index.ts +++ b/src/fluent/index.ts @@ -23,30 +23,32 @@ ext = File extensiosn to watch. Only files with this extensions will be processe description-prompt = Custom prompt for the AI provider to generate description. Defaults to a generic image description prompt. tag-prompt = Custom prompt for the AI provider to generate tags. Defaults to a generic image tagging prompt. verbose = Enable verbose output for debugging. -tasks = List of tasks to perform ('description' and/or 'tag'). +tasks = List of tasks to perform ('description', 'tag', 'face'). concurrency = The numbers of files to process concurrently in watch mode. +face-group-ids = List of face group IDs to use for face recognition. `), ); bundles["zh-CN"].addResource( new FluentResource(` description = 一个Node.js命令行工具,它使用Ollama或ZhipuAI根据图像内容智能地将图像描述和/或标签写入EXIF元数据。 -api-provider = 要使用的AI提供者名称('ollama'代表Ollama,'zhipu'代表ZhipuAI,'google'代表Google Gemini)。 +api-provider = 要使用的AI供应商名称('ollama'代表Ollama,'zhipu'代表ZhipuAI,'google'代表Google Gemini)。 input = 输入图像文件的路径。 -model = 指定要使用的AI模型,如果提供者支持。 +model = 指定要使用的AI模型,如果供应商支持。 description-tags = 要写入描述的EXIF标签列表。默认为常见描述标签。 tag-tags = 要写入标签的EXIF标签列表。默认为常见标签。 dry-run = 预览AI生成的描述和标签,但不写入图像文件。 exif-tool-write-args = 写入元数据时附加的ExifTool参数。 -provider-args = AI提供者的附加参数。 +provider-args = AI供应商的附加参数。 watch = 监视目录以处理新文件。 avoid-overwrite = 如果EXIF标签已在文件中存在,则避免覆盖。 ext = 要监视的文件扩展名。只有具有此扩展名的文件才会被处理。 -description-prompt = 为AI提供者定制的生成描述的提示。默认为通用图像描述提示。 -tag-prompt = 为AI提供者定制的生成标签的提示。默认为通用图像标签提示。 +description-prompt = 为AI供应商定制的生成描述的提示。默认为通用图像描述提示。 +tag-prompt = 为AI供应商定制的生成标签的提示。默认为通用图像标签提示。 verbose = 启用详细输出以进行调试。 -tasks = 要执行的任务列表('description'和/或'tag')。 +tasks = 要执行的任务列表('description','tag','face')。 concurrency = 在监视模式下同时处理文件的数目。 +face-group-ids = 人脸搜索要使用的面部组ID列表。 `), ); diff --git a/src/index.ts b/src/index.ts index 5149f23..0e42db1 100644 --- a/src/index.ts +++ b/src/index.ts @@ -16,6 +16,7 @@ import fetch, { import { DescriptionKey, getDescription } from "./tasks/description.js"; import { getTags, TagKey } from "./tasks/tags.js"; import { HttpsProxyAgent } from "https-proxy-agent"; +import { getFaces } from "./tasks/face.js"; if ( !globalThis.fetch || @@ -73,9 +74,10 @@ export async function execute({ providerArgs, avoidOverwrite = false, doNotEndExifTool = false, + faceGroupIds = [], }: { /** - * Array of tasks to perform: 'description', 'tag', or 'tags' + * Array of tasks to perform: 'description', 'tag', 'face' */ tasks?: string[]; /** @@ -130,8 +132,13 @@ export async function execute({ * Do not end the ExifTool session after execution */ doNotEndExifTool?: boolean; + /** + * Array of face group IDs to use for face recognition + */ + faceGroupIds?: string[]; }) { - if (["description", "tag", "tags"].every((t) => !tasks.includes(t))) return; + if (["description", "tag", "tags", "face"].every((t) => !tasks.includes(t))) + return; const resolvedPath = resolve(path); @@ -164,6 +171,14 @@ export async function execute({ } if (verbose) console.log("Imported provider:", provider); + const faces = tasks.includes("face") + ? await getFaces({ + buffer, + verbose, + faceGroupIds, + }) + : undefined; + const [description, tags] = await Promise.all([ tasks.includes("description") ? getDescription({ @@ -184,27 +199,32 @@ export async function execute({ prompt: tagPrompt, providerArgs, providerModule, - verbose, tagTags, existingTags, + additionalTags: faces, }) - : undefined, + : tasks.includes("face") + ? getTags({ + buffer, + model, + prompt: tagPrompt, + providerArgs, + tagTags, + existingTags, + additionalTags: faces, + }) + : undefined, ]); + const result = { + ...description, + ...tags, + }; + if (dry) { - if (description) { - console.log(description); - } - if (tags) { - console.log(tags); - } + console.log(JSON.stringify(result)); if (verbose) console.log("Dry run - did not write to file"); } else { - const result = { - ...description, - ...tags, - }; - if (Object.keys(result).length > 0) { await exiftool.write(resolvedPath, result, { writeArgs }); if (verbose) console.log("Wrote description to file:", resolvedPath); diff --git a/src/tasks/face.ts b/src/tasks/face.ts new file mode 100644 index 0000000..fcd37e8 --- /dev/null +++ b/src/tasks/face.ts @@ -0,0 +1,109 @@ +import { Tags } from "exiftool-vendored"; +import { objectFromEntries } from "ts-extras"; +import { TagKey } from "./tags.js"; +import sharp from "sharp"; +import { env } from "node:process"; + +interface Result { + RetCode: number; + Candidates: Candidate[]; + FaceRect: FaceRect; +} + +interface Candidate { + PersonId: string; + FaceId: string; + Score: number; + PersonName: string; + Gender: number; + PersonGroupInfos: PersonGroupInfo[]; +} + +interface PersonGroupInfo { + GroupId: string; + PersonExDescriptions: any[]; +} + +interface FaceRect { + X: number; + Y: number; + Width: number; + Height: number; +} + +async function sizeHandle( + buffer: Buffer, + quality = 100, + drop = 2, +): Promise { + const sharpInstance = await sharp(buffer); + const { width = 0, height = 0 } = await sharpInstance.metadata(); + let done = await sharp(buffer) + .jpeg({ + quality, + }) + .toBuffer(); + + while (done.byteLength > 5_000_000) { + quality = Math.max(quality - drop, 0); + done = await sharp(buffer) + .resize({ + ...(width > height ? { width: 4000 } : { height: 4000 }), + withoutEnlargement: true, + }) + .jpeg({ + quality, + }) + .toBuffer(); + } + + return done; +} + +export async function getFaces({ + buffer, + verbose = false, + faceGroupIds, +}: { + buffer: Buffer; + verbose?: boolean; + faceGroupIds: string[]; +}) { + const [handled, { iai }] = await Promise.all([ + sizeHandle(buffer), + import("tencentcloud-sdk-nodejs-iai"), + ] as const); + + const client = new iai.v20200303.Client({ + credential: { + secretId: env.TENCENTCLOUD_SECRET_ID, + secretKey: env.TENCENTCLOUD_SECRET_KEY, + }, + region: "ap-guangzhou", + }); + + try { + const a: { + Results?: Result[]; + FaceNum?: number; + FaceModelVersion?: string; + RequestId?: string; + } = await client.SearchFaces({ + GroupIds: faceGroupIds, + Image: handled.toString("base64"), + MaxPersonNum: 5, + NeedPersonInfo: 1, + }); + // log + if (verbose) { + console.log(a); + } + + return a?.Results?.map((k) => k.Candidates[0].PersonName).filter( + (k) => k != null, + ); + } catch (error) { + console.error("Failed to get tags from provider:", error); + return; + } +} diff --git a/src/tasks/tags.ts b/src/tasks/tags.ts index 475c413..3a43b1d 100644 --- a/src/tasks/tags.ts +++ b/src/tasks/tags.ts @@ -43,32 +43,38 @@ export async function getTags({ verbose = false, tagTags, existingTags, + additionalTags, }: { buffer: Buffer; model?: string; prompt: string; - providerModule: any; + providerModule?: any; providerArgs?: string[]; verbose?: boolean; tagTags: Readonly; existingTags?: Readonly; + additionalTags?: Readonly; }) { // Get tags from provider - let tags: string | string[] | undefined; + let tags: string | string[] = []; - try { - tags = await providerModule.getTags?.({ - buffer, - model, - prompt: prompt, - providerArgs, - }); - } catch (error) { - console.error("Failed to get tags from provider:", error); - return; + if (providerModule) { + try { + tags = await providerModule.getTags?.({ + buffer, + model, + prompt: prompt, + providerArgs, + }); + } catch (error) { + console.error("Failed to get tags from provider:", error); + return; + } } - const formatted = formatTags(tags); + const formatted = formatTags(tags)?.concat(additionalTags ?? []); + + if (verbose) console.log("Tags are:", formatted); return formatted == null || formatted.length === 0 ? ({} as Record)