diff --git a/PA_DATA_PATH_CONFIG.md b/PA_DATA_PATH_CONFIG.md deleted file mode 100644 index 9dd7e58..0000000 --- a/PA_DATA_PATH_CONFIG.md +++ /dev/null @@ -1,131 +0,0 @@ -# PA_DATA 路径配置说明 - -## 问题描述 - -之前版本中,`pa_data` 路径使用相对路径 `../pa_data`,这会导致以下问题: - -1. 在不同的启动位置(项目根目录 vs backend 目录)会导致路径不一致 -2. Docker 容器内的路径与本地开发环境不匹配 -3. 数据持久化不稳定,可能在错误的位置创建 pa_data 目录 - -## 解决方案 - -### 1. 统一的路径配置模块 - -创建了 `backend/config/paths.py` 模块,提供统一的路径管理: - -```python -from config.paths import ( - get_project_root, # 获取项目根目录 - get_pa_data_base, # 获取 pa_data 基础目录 - get_workspaces_path, # 获取工作空间目录 - get_templates_path, # 获取模板目录 - get_workspace_path, # 获取指定工作的工作空间路径 -) -``` - -### 2. 路径解析逻辑 - -- **环境变量优先**: 如果设置了 `PA_DATA_PATH` 环境变量,将使用该路径 -- **自动检测**: 否则自动检测项目根目录,在其下创建 `pa_data` 目录 -- **Docker 兼容**: 在容器内也能正确工作 - -### 3. 环境变量配置 - -在 `.env` 文件中可以设置(可选): - -```bash -# 自定义 pa_data 路径 -PA_DATA_PATH=/path/to/pa_data -``` - -如果不设置,系统会使用默认位置: -- **本地开发**: `<项目根>/pa_data` -- **Docker 容器**: 通过 volume 挂载到 `/app/pa_data` - -## 目录结构 - -``` -pa_data/ -├── workspaces/ # 工作空间目录 -│ └── / # 每个工作对应一个目录 -│ ├── attachment/ # 附件目录 -│ ├── code/ # 代码文件 -│ ├── logs/ # 日志文件 -│ ├── outputs/ # 输出文件 -│ ├── temp/ # 临时文件 -│ ├── paper.md # 生成的论文 -│ ├── metadata.json # 元数据 -│ └── chat_history.json # 对话历史 -└── templates/ # 模板目录 - └── _*.md # 模板文件 -``` - -## 迁移说明 - -### 1. 本地开发环境 - -如果你之前在 `backend/pa_data` 下有数据,需要移动到项目根目录: - -```bash -# 从项目根目录执行 -mv backend/pa_data ./pa_data -``` - -### 2. Docker 环境 - -docker-compose.yml 已配置好 volume 挂载: - -```yaml -volumes: - - ./pa_data:/app/pa_data # 挂载到容器内的 /app/pa_data -``` - -### 3. 自定义路径 - -如果想使用自定义路径(例如外部存储),设置环境变量: - -```bash -# .env 文件 -PA_DATA_PATH=/mnt/storage/paperagent_data -``` - -## 受影响的模块 - -以下模块已更新使用统一的路径配置: - -1. `backend/services/file_services/workspace_files.py` - 工作空间文件服务 -2. `backend/services/file_services/template_files.py` - 模板文件服务 -3. `backend/services/chat_services/chat_history_manager.py` - 聊天记录管理 -4. `backend/services/data_services/crud.py` - CRUD 操作 -5. `backend/ai_system/config/environment.py` - AI 环境配置 -6. `backend/routers/work_routes/work.py` - 工作路由 - -## 验证 - -启动应用后,查看日志输出,确认路径配置正确: - -``` -INFO: 项目根目录: /path/to/PaperAgent -INFO: PA_DATA 目录: /path/to/PaperAgent/pa_data -INFO: 工作空间目录: /path/to/PaperAgent/pa_data/workspaces -INFO: 模板目录: /path/to/PaperAgent/pa_data/templates -``` - -## 测试 - -可以通过以下方式测试路径配置: - -```python -# 在 Python shell 中 -from config.paths import get_pa_data_base, get_workspaces_path -print(f"PA_DATA: {get_pa_data_base()}") -print(f"Workspaces: {get_workspaces_path()}") -``` - -## 注意事项 - -1. **权限**: 确保运行用户对 pa_data 目录有读写权限 -2. **备份**: 在迁移数据前建议先备份 -3. **Docker**: 容器重建时,volume 数据会保留 -4. **开发**: 本地开发时建议在项目根目录启动应用 diff --git a/backend/Dockerfile b/backend/Dockerfile index 2b40a1b..b6dae4a 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -15,8 +15,9 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ curl \ && rm -rf /var/lib/apt/lists/* -# 复制依赖文件 +# 复制依赖文件和工作区成员 COPY pyproject.toml ./ +COPY office_word_mcp ./office_word_mcp # 安装 uv RUN pip install --no-cache-dir uv diff --git a/backend/ai_system/core_agents/__init__.py b/backend/ai_system/core_agents/__init__.py index 22260a3..b732db4 100644 --- a/backend/ai_system/core_agents/__init__.py +++ b/backend/ai_system/core_agents/__init__.py @@ -1,9 +1,10 @@ """ AI系统核心代理模块 - LangChain 重构版本 -包含MainAgent、CodeAgent等核心代理类 +包含MainAgent、CodeAgent、WriterAgent等核心代理类 """ from .main_agent import MainAgent from .code_agent import CodeAgent +from .writer_agent import WriterAgent -__all__ = ['MainAgent', 'CodeAgent'] +__all__ = ['MainAgent', 'CodeAgent', 'WriterAgent'] diff --git a/backend/ai_system/core_agents/agent_base.py b/backend/ai_system/core_agents/agent_base.py index 3eecd19..896db92 100644 --- a/backend/ai_system/core_agents/agent_base.py +++ b/backend/ai_system/core_agents/agent_base.py @@ -164,7 +164,7 @@ async def _execute_tool_call(self, tool_call: Dict[str, Any], index: int = 1, to # 发送工具调用完成通知 if self.stream_manager: try: - await self.stream_manager.print_main_content(f"工具 {function_name} 执行完成,结果长度: {len(tool_result)} 字符") + await self.stream_manager.print_main_content(f"工具 {function_name} 执行完成") except Exception as e: logger.warning(f"发送工具完成通知失败: {e}") diff --git a/backend/ai_system/core_agents/code_agent.py b/backend/ai_system/core_agents/code_agent.py index 9787880..be25e01 100644 --- a/backend/ai_system/core_agents/code_agent.py +++ b/backend/ai_system/core_agents/code_agent.py @@ -118,7 +118,7 @@ async def run(self, task_prompt: str) -> str: try: await self.stream_manager.send_json_block( "code_agent_result", - f"CodeAgent任务完成,结果长度: {len(output)}", + output, ) except Exception as e: logger.warning("发送CodeAgent完成通知失败: %s", e) diff --git a/backend/ai_system/core_agents/main_agent.py b/backend/ai_system/core_agents/main_agent.py index 962a174..647eab1 100644 --- a/backend/ai_system/core_agents/main_agent.py +++ b/backend/ai_system/core_agents/main_agent.py @@ -26,7 +26,8 @@ class MainAgent: def __init__(self, llm: BaseLanguageModel, stream_manager=None, workspace_dir: str = None, work_id: Optional[str] = None, - template_id: Optional[int] = None, codeagent_llm=None): + template_id: Optional[int] = None, codeagent_llm=None, + output_mode: str = "markdown", writer_llm=None): """ 初始化MainAgent @@ -36,13 +37,17 @@ def __init__(self, llm: BaseLanguageModel, stream_manager=None, workspace_dir: 工作空间目录路径 work_id: 工作ID template_id: 模板ID + codeagent_llm: CodeAgent使用的LLM实例 + output_mode: 输出模式 ("markdown", "word", "latex") + writer_llm: WriterAgent使用的LLM实例(从"writing"配置加载) """ - logger.info(f"MainAgent初始化开始,codeagent_llm: {codeagent_llm}") + logger.info(f"MainAgent初始化开始,output_mode: {output_mode}, codeagent_llm: {codeagent_llm}, writer_llm: {writer_llm}") self.llm = llm self.stream_manager = stream_manager self.work_id = work_id self.template_id = template_id self.workspace_dir = workspace_dir + self.output_mode = output_mode # 如果没有提供workspace_dir但有work_id,构建路径 if not workspace_dir and work_id: @@ -51,10 +56,12 @@ def __init__(self, llm: BaseLanguageModel, stream_manager=None, # 设置环境变量,供工具使用 os.environ["WORKSPACE_DIR"] = self.workspace_dir - # 创建所有需要的工具 - self.tools = LangChainToolFactory.create_all_tools( - self.workspace_dir, stream_manager, include_template=True + # 加载基础工具(不包括文档写作工具,文档操作由WriterAgent处理) + # 基础工具包括:tree, list_attachments, web_search 等通用工具 + self.tools = LangChainToolFactory.create_base_tools( + self.workspace_dir, stream_manager ) + logger.info("MainAgent加载基础工具(文档操作由WriterAgent处理)") # 添加代码执行工具(使用CodeAgent,默认复用主LLM,可指定codeagent_llm) code_llm = codeagent_llm or self.llm @@ -67,35 +74,186 @@ def __init__(self, llm: BaseLanguageModel, stream_manager=None, else: logger.warning("CodeAgent工具创建失败,代码能力可能受限") + # 添加WriterAgent工具(用于文档写作任务) + writer_agent_tool = LangChainToolFactory.create_writer_agent_tool( + workspace_dir=self.workspace_dir, + output_mode=self.output_mode, + stream_manager=stream_manager, + llm=self.llm, + writer_llm=writer_llm + ) + if writer_agent_tool: + self.tools.append(writer_agent_tool) + logger.info("成功添加WriterAgent工具,output_mode: %s", self.output_mode) + else: + logger.warning("WriterAgent工具创建失败,文档写作能力可能受限") + # 创建 LangChain Agent self.system_prompt = self._create_system_prompt() + + # 检查 LLM 是否支持工具调用 + logger.info(f"LLM 类型: {type(llm).__name__}") + logger.info(f"LLM 模型: {getattr(llm, 'model_name', getattr(llm, 'model', 'unknown'))}") + + # 检查是否有 bind_tools 方法(表示支持工具调用) + if hasattr(llm, 'bind_tools'): + logger.info("✓ LLM 支持 bind_tools 方法") + # 测试绑定工具 + try: + test_bound = llm.bind_tools(self.tools[:1]) + logger.info("✓ 工具绑定测试成功") + except Exception as e: + logger.error(f"✗ 工具绑定测试失败: {e}") + else: + logger.warning("⚠️ LLM 不支持 bind_tools 方法,工具调用可能不可用") + + # 检查 LLM 的配置 + if hasattr(llm, 'model_kwargs'): + logger.info(f"LLM model_kwargs: {llm.model_kwargs}") + + logger.info(f"创建 Agent,工具数量: {len(self.tools)}") self.agent = create_agent( model=llm, - tools=self.tools + tools=self.tools, + system_prompt=self.system_prompt, + debug=True # 启用调试模式 ) - logger.info(f"MainAgent初始化完成,work_id: {work_id}, template_id: {template_id}, 工具数量: {len(self.tools)}") + logger.info(f"MainAgent初始化完成,work_id: {work_id}, template_id: {template_id}, output_mode: {output_mode}, 工具数量: {len(self.tools)}") + logger.info(f"已注册工具: {[tool.name for tool in self.tools]}") def _create_system_prompt(self) -> str: """创建 MainAgent 的系统提示词""" # 基础系统提示 system_content = ( - "你是基于 LangChain Agent 的学术论文写作助手,负责协调整个论文生成过程。**你使用的语言需要跟模板语言一致**\n\n" + "你是基于 LangChain Agent 的学术论文写作助手(MainAgent),负责协调整个论文生成过程。**你使用的语言需要跟模板语言一致**\n\n" + "**🔴 核心行为准则**:\n" + "1. **主动执行,不要问用户要写什么内容!**\n" + "2. **根据用户的需求描述,自己思考并生成完整的论文内容**\n" + "3. **立即使用工具开始写作,不要只是回复文本说明!**\n" + "4. **如果用户说\"写论文\"、\"生成论文\",你要立即开始调用工具写入内容,而不是问用户要写什么**\n\n" + "**重要:你必须使用提供的工具来完成任务,不要只是回复文本!**\n\n" "请你记住:论文尽可能使用图表等清晰表示!涉及图表等务必使用代码执行得到!\n" "请你记住:如果最后发现没找到代码或者图片就重新执行数据分析!\n\n" - "你的职责:\n" - "1. 分析用户需求,制定论文生成计划\n" - "2. **主动检查和分析附件**:当用户上传附件时,使用read_attachment工具读取附件内容\n" - "3. 当需要代码执行、数据分析、图表生成时,使用code_agent_execute工具\n" - "4. 使用writemd工具保存论文草稿到paper.md\n\n" - "**你的工具集**:\n" - "- writemd: 保存论文草稿和内容到文件(推荐使用)\n" - "- update_template: 更新论文模板的特定章节\n" - "- read_attachment: 读取附件内容(PDF、Word、Excel等)\n" + "**你的身份和职责**:\n" + "- 你是MainAgent,负责论文写作的整体协调和文档生成\n" + "- 你有一个助手CodeAgent,专门负责编程任务(数据分析、图表生成等)\n" + "- 你需要明确区分哪些任务由你完成,哪些任务委派给CodeAgent\n" + "- **你要主动思考论文内容,不要总是问用户要写什么**\n\n" + "**核心工作流程**:\n" + "1. 分析用户需求,**立即制定论文生成计划并开始执行**\n" + "2. **委派编程任务给CodeAgent**:当需要数据分析、图表生成、统计计算时,使用code_agent_execute工具\n" + "3. **你自己负责文档生成**:创建Word文档、添加内容、格式化等由你直接使用Word工具完成\n" + "4. **主动生成内容**:根据用户需求和模板结构,自己思考并生成合适的论文内容\n" + ) + + # 根据输出模式添加文档生成指令 + if self.output_mode == "word": + system_content += ( + "4. **委派文档写作任务给WriterAgent**:你正在Word模式下工作,必须使用WriterAgent来处理所有文档操作\n\n" + "**🔴 核心原则:高层指令,WriterAgent自主创作**\n" + "- **你(MainAgent)只需要给WriterAgent高层次的写作目标,不要指定具体内容**\n" + "- **WriterAgent是专业的写作助手,会根据你的目标自主扩充和创作内容**\n" + "- 使用 writer_agent_execute 工具来委派文档操作\n\n" + "**WriterAgent工具使用方法**:\n" + "- 工具名称:writer_agent_execute\n" + "- 输入:高层次的写作目标(不是具体内容)\n" + "- WriterAgent会理解目标,自主创作内容,并选择合适的Word工具完成\n\n" + "**✅ 正确的指令示例(高层次目标)**:\n" + "- \"写一个Introduction章节,介绍圆周率的重要性和研究意义\"\n" + "- \"写一个History章节,讲述圆周率的历史发展\"\n" + "- \"写一个Applications章节,说明圆周率在各领域的应用\"\n" + "- \"插入图片outputs/chart.png并配上说明文字\"\n" + "- \"创建一个表格展示实验结果数据\"\n\n" + "**❌ 错误的指令示例(过于具体)**:\n" + "- \"添加一级标题Introduction\" ← 太具体,WriterAgent无法发挥\n" + "- \"添加段落内容:圆周率π是...\" ← 不要写具体内容,让WriterAgent自己写\n" + "- \"添加3行4列的表格\" ← 不要指定格式细节\n\n" + "**Word模式工作流程(立即执行,不要问用户)**:\n" + "1. 分析用户需求,确定论文需要哪些章节和内容主题\n" + "2. **给WriterAgent下达高层次的写作目标**(例如:\"写Introduction章节\")\n" + "3. WriterAgent会自主决定:\n" + " - 章节标题的具体文字\n" + " - 段落的具体内容和表述\n" + " - 内容的组织结构和逻辑\n" + " - 使用哪些Word工具(标题、段落、表格等)\n" + "4. 如果需要图表,先使用code_agent_execute生成图片,然后让WriterAgent插入并配文字\n" + "5. 文档会自动保存到 paper.docx\n\n" + "**任务分工原则(重要)**:\n" + "- **你(MainAgent)负责**:战略规划、章节划分、主题确定\n" + " * 决定论文需要哪些章节(Introduction, Methods, Results等)\n" + " * 给WriterAgent下达章节级别的写作目标\n" + " * 协调CodeAgent生成数据和图表\n" + " * 不要写具体内容,不要指定格式细节\n" + "- **WriterAgent负责**:内容创作、文档操作、格式控制\n" + " * 根据MainAgent的目标自主创作具体内容\n" + " * 决定标题文字、段落内容、表格结构等细节\n" + " * 选择合适的Word工具完成文档操作\n" + "- **CodeAgent负责**:数据分析、图表生成、复杂计算\n" + " * 使用 code_agent_execute 工具委派编程任务\n" + " * 例如:\"分析数据并生成柱状图\"、\"计算统计指标\"\n\n" + ) + else: + system_content += ( + "4. **委派文档写作任务给WriterAgent**:你正在Markdown模式下工作,必须使用WriterAgent来处理所有文档操作\n\n" + "**🔴 核心原则:高层指令,WriterAgent自主创作**\n" + "- **你(MainAgent)只需要给WriterAgent高层次的写作目标,不要指定具体内容**\n" + "- **WriterAgent是专业的写作助手,会根据你的目标自主扩充和创作内容**\n" + "- 使用 writer_agent_execute 工具来委派文档操作\n\n" + "**WriterAgent工具使用方法**:\n" + "- 工具名称:writer_agent_execute\n" + "- 输入:高层次的写作目标(不是具体内容)\n" + "- WriterAgent会理解目标,自主创作内容,并选择合适的Markdown工具完成\n\n" + "**✅ 正确的指令示例(高层次目标)**:\n" + "- \"写一个Introduction章节,介绍研究背景和意义\"\n" + "- \"写一个Methods章节,描述研究方法\"\n" + "- \"更新Abstract章节,总结全文要点\"\n\n" + "**❌ 错误的指令示例(过于具体)**:\n" + "- \"将以下内容追加到paper.md:# Introduction...\" ← 不要写具体内容\n" + "- \"添加段落:本文研究...\" ← 让WriterAgent自己写\n\n" + "**Markdown模式工作流程(立即执行,不要问用户)**:\n" + "1. 分析用户需求,确定论文需要哪些章节\n" + "2. **给WriterAgent下达高层次的写作目标**\n" + "3. WriterAgent会自主创作内容并选择合适的工具\n" + "4. 文档会自动保存到 paper.md\n\n" + "**任务分工原则(重要)**:\n" + "- **你(MainAgent)负责**:战略规划、章节划分、主题确定\n" + " * 决定论文结构和章节主题\n" + " * 给WriterAgent下达章节级别的写作目标\n" + " * 不要写具体内容\n" + "- **WriterAgent负责**:内容创作、文档操作\n" + " * 根据目标自主创作具体内容\n" + " * 选择合适的Markdown工具完成操作\n" + "- **CodeAgent负责**:数据分析、图表生成\n" + " * 使用 code_agent_execute 工具委派编程任务\n" + ) + + # 通用工具 + system_content += ( + "\n**通用工具**:\n" "- list_attachments: 列出所有附件文件\n" "- web_search: 搜索最新的学术资料和背景信息\n" - "- code_agent_execute: 使用专用CodeAgent执行复杂的代码任务,包括数据分析、图表生成、统计计算等(推荐用于复杂任务)\n" - "- tree: 显示工作空间目录结构\n" + "- tree: 显示工作空间目录结构\n\n" + "**WriterAgent工具(用于所有文档写作任务)**:\n" + "- writer_agent_execute: 委派给专用WriterAgent执行文档写作任务\n" + " * ✅ 适用场景:创建文档、添加标题、添加段落、添加表格、插入图片、格式化文档等所有文档操作\n" + " * 示例任务:\"添加一级标题Introduction\"、\"添加段落:This paper...\"、\"插入图片outputs/chart.png\"\n" + " * **所有文档写作任务必须通过WriterAgent完成**\n" + " * WriterAgent会根据output_mode自动使用Word或Markdown工具\n\n" + "**CodeAgent工具(仅用于编程任务)**:\n" + "- code_agent_execute: 委派给专用CodeAgent执行编程任务\n" + " * ✅ 适用场景:数据分析、图表生成(matplotlib/seaborn)、统计计算、文件处理、Python脚本执行\n" + " * 示例任务:\"读取data.csv并生成销售趋势图\"、\"计算数据的均值和标准差\"、\"处理Excel文件并提取关键信息\"\n" + " * ❌ 禁止场景:**绝对不要使用CodeAgent来创建、编辑、修改文档**\n" + " * ❌ 禁止场景:**绝对不要使用CodeAgent来添加文档内容、格式化文档**\n" + " * 文档操作必须委派给WriterAgent\n\n" + "**🚫 严格禁止事项**:\n" + "- **永远不要让CodeAgent操作文档(Word或Markdown)!**\n" + "- **永远不要让CodeAgent使用python-docx库或直接写入.md文件!**\n" + "- **所有文档操作必须通过writer_agent_execute委派给WriterAgent!**\n" + "- **你(MainAgent)不应该直接调用Word工具或writemd工具**\n" + "- 如果需要编辑文档,使用writer_agent_execute委派给WriterAgent\n" + "- CodeAgent只负责生成数据、图表等内容,不负责将内容写入文档\n" ) # 根据模板添加额外信息 @@ -118,13 +276,21 @@ def _create_system_prompt(self) -> str: ) system_content += ( - "\n\n重要原则:\n" + "\n\n**🎯 重要原则**:\n" + "- **主动执行,不要问用户要写什么!根据需求自己思考并生成内容!**\n" + "- **立即使用工具开始写作,不要只是说明你会怎么做!**\n" "- 保持对话连贯性,按步骤执行任务\n" - "- **充分利用用户上传的附件内容,确保论文基于真实的资料和数据**\n" "- 生成的图表要保存在outputs目录,并在论文中正确引用\n" "- 论文不要杜撰,确保科学性和准确性\n" "- 每完成一个重要章节,使用writemd保存一次\n" - "- 最终输出应该是完整的paper.md文件\n" + "- 最终输出应该是完整的paper.md或paper.docx文件\n" + "\n**🔴 关键要求:任务完成标准**\n" + "- **你的任务只有在将最终结果输出到文件后才算真正完成!**\n" + "- Word模式:必须使用Word工具将所有内容写入paper.docx文件\n" + "- Markdown模式:必须使用writemd工具将所有内容写入paper.md文件\n" + "- **不要只是在对话中回复内容,必须调用相应的工具将内容保存到文件中**\n" + "- 在完成文件输出后,向用户确认文件已生成并说明文件路径\n" + "- 如果没有将内容写入docx或md文件,任务视为未完成\n" ) return system_content @@ -147,12 +313,39 @@ async def run(self, user_input: str) -> str: logger.warning(f"发送开始通知失败: {e}") # 使用 LangChain Agent 执行 + logger.info(f"调用 Agent,可用工具数量: {len(self.tools)}") + logger.info(f"工具列表: {[tool.name for tool in self.tools]}") + inputs = {"messages": [HumanMessage(content=user_input)]} result = await self.agent.ainvoke(inputs) # 提取最后的AI回复 messages = result.get("messages", []) output = "" + + # 记录所有消息用于调试 + logger.info(f"Agent返回了 {len(messages)} 条消息") + tool_calls_count = 0 + for i, message in enumerate(messages): + msg_type = type(message).__name__ + logger.info(f"消息 {i}: 类型={msg_type}") + + # 检查是否有工具调用 + if hasattr(message, 'tool_calls') and message.tool_calls: + tool_calls_count += len(message.tool_calls) + logger.info(f" 包含 {len(message.tool_calls)} 个工具调用") + for tc in message.tool_calls: + logger.info(f" 工具: {tc.get('name', 'unknown')}") + + if hasattr(message, 'content') and message.content: + content_preview = str(message.content)[:100] + logger.info(f" 内容预览: {content_preview}") + + if tool_calls_count == 0: + logger.warning("⚠️ 没有检测到任何工具调用!") + else: + logger.info(f"✓ 总共执行了 {tool_calls_count} 个工具调用") + for message in reversed(messages): if hasattr(message, 'content') and message.content: output = message.content @@ -168,7 +361,7 @@ async def run(self, user_input: str) -> str: try: await self.stream_manager.send_json_block( "main_agent_complete", - f"任务完成,结果长度: {len(output)} 字符" + output ) except Exception as e: logger.warning(f"发送完成通知失败: {e}") diff --git a/backend/ai_system/core_agents/writer_agent.py b/backend/ai_system/core_agents/writer_agent.py new file mode 100644 index 0000000..a73900a --- /dev/null +++ b/backend/ai_system/core_agents/writer_agent.py @@ -0,0 +1,373 @@ +""" +WriterAgent - Specialized agent for document writing operations +Handles Word and Markdown document generation by directly calling appropriate tools +""" + +import logging +from typing import Any, Dict, Optional, List + +from langchain.agents import create_agent +from langchain_core.language_models import BaseLanguageModel +from langchain_core.messages import HumanMessage +from langchain_core.tools import BaseTool + +from ..core_managers.langchain_tools import LangChainToolFactory + +logger = logging.getLogger(__name__) + + +class WriterAgent: + """ + Specialized LangChain-based agent for document writing operations. + Separates document writing concerns from MainAgent orchestration. + + Supports multiple output modes: + - word: Uses Word tools for .docx document generation + - markdown: Uses Markdown tools for .md document generation + - latex: Not yet supported (logs warning) + """ + + def __init__( + self, + llm: BaseLanguageModel, + output_mode: str, + workspace_dir: str, + stream_manager=None, + work_id: Optional[str] = None, + ): + """ + Initialize WriterAgent with specified output mode and tools. + + Args: + llm: LangChain LLM instance for agent execution + output_mode: Document format ("word", "markdown", or "latex") + workspace_dir: Workspace directory path for file operations + stream_manager: Stream manager for output notifications + work_id: Work ID for tracking and logging + + Raises: + ValueError: If workspace_dir is empty or llm is None + """ + # Validate required parameters + if not workspace_dir: + raise ValueError("WriterAgent must be provided with workspace_dir parameter") + if llm is None: + raise ValueError("WriterAgent requires a valid LLM instance") + + self.llm = llm + self.output_mode = output_mode.lower() + self.workspace_dir = workspace_dir + self.stream_manager = stream_manager + self.work_id = work_id + + # Log LaTeX mode warning + if self.output_mode == "latex": + logger.warning( + "WriterAgent initialized with output_mode='latex', but LaTeX is not yet supported. " + "Please use 'word' or 'markdown' mode instead." + ) + + # Load tools based on output mode + try: + self.tools = self._load_tools() + logger.info( + "WriterAgent loaded %d tools for output_mode='%s'", + len(self.tools), + self.output_mode + ) + except Exception as e: + logger.error("Failed to load tools for WriterAgent: %s", e, exc_info=True) + raise ValueError(f"Failed to load tools: {str(e)}") + + # Create LangChain agent + try: + self.agent = create_agent( + model=self.llm, + tools=self.tools, + system_prompt=self._get_system_prompt(), + ) + logger.info( + "WriterAgent initialized successfully, workspace_dir: %s, work_id: %s, output_mode: %s, tools: %d", + workspace_dir, + work_id, + self.output_mode, + len(self.tools), + ) + except Exception as e: + logger.error("Failed to create WriterAgent: %s", e, exc_info=True) + raise ValueError(f"Failed to create agent: {str(e)}") + + def _load_tools(self) -> List[BaseTool]: + """ + Load appropriate tools based on output_mode. + + Returns: + List of LangChain tools for the specified output mode + + Raises: + ValueError: If output_mode is unsupported + """ + if self.output_mode == "word": + # Load Word tools + word_tools = LangChainToolFactory.create_word_tools( + workspace_dir=self.workspace_dir, + stream_manager=self.stream_manager + ) + if not word_tools: + raise ValueError("Failed to create Word tools") + return word_tools + + elif self.output_mode == "markdown": + # Load Markdown tools (writemd, update_template) + markdown_tools = LangChainToolFactory.create_file_tools( + workspace_dir=self.workspace_dir, + stream_manager=self.stream_manager + ) + if not markdown_tools: + raise ValueError("Failed to create Markdown tools") + return markdown_tools + + elif self.output_mode == "latex": + # LaTeX not yet supported - return empty list + logger.warning("LaTeX mode requested but not yet supported") + return [] + + else: + raise ValueError( + f"Unsupported output_mode: '{self.output_mode}'. " + f"Supported modes: 'word', 'markdown', 'latex'" + ) + + def _get_system_prompt(self) -> str: + """ + Generate system prompt based on output_mode. + + Returns: + System prompt string tailored to the output mode + """ + base_prompt = ( + "你是一个专业的学术写作助手(WriterAgent),负责根据高层次的写作目标自主创作内容。\n" + "**你使用的语言需要跟模板语言一致**\n\n" + "**🎯 核心职责**:\n" + "1. **理解写作目标**:MainAgent会给你高层次的写作目标(例如:\"写Introduction章节\")\n" + "2. **自主创作内容**:你需要根据目标自己思考并创作具体的文字内容\n" + "3. **选择合适工具**:根据内容类型选择合适的文档工具完成操作\n" + "4. **确保质量**:内容要专业、准确、符合学术规范\n\n" + "**🚫 重要提醒**:\n" + "- MainAgent只会告诉你\"写什么主题\",不会告诉你\"写什么内容\"\n" + "- 你需要自己扩充和发挥,创作具体的段落文字\n" + "- 不要只是简单执行指令,要展现你的写作能力\n\n" + ) + + if self.output_mode == "word": + return base_prompt + ( + "**输出模式:Word (.docx)**\n\n" + "你可以使用以下Word工具:\n" + "1. word_add_heading - 添加标题(1-5级)\n" + "2. word_add_paragraph - 添加段落\n" + "3. word_add_table - 添加表格\n" + "4. word_add_picture - 插入图片\n" + "5. word_add_page_break - 插入分页符\n" + "6. 其他格式化工具\n\n" + "**工作流程示例**:\n" + "收到指令:\"写一个Introduction章节,介绍圆周率的重要性\"\n" + "你应该:\n" + "1. 思考:Introduction应该包含什么内容?\n" + " - 圆周率的定义\n" + " - 历史重要性\n" + " - 现代应用价值\n" + " - 本文研究意义\n" + "2. 创作具体内容:\n" + " - 调用 word_add_heading(\"Introduction\", level=1)\n" + " - 调用 word_add_paragraph(\"圆周率π是数学中最重要的常数之一...\")\n" + " - 调用 word_add_paragraph(\"自古以来,人类对圆周率的研究...\")\n" + " - 调用 word_add_paragraph(\"本文旨在探讨...\")\n" + "3. 确认完成并报告\n\n" + "**内容创作要求**:\n" + "- 段落要充实,每段至少3-5句话\n" + "- 逻辑清晰,层次分明\n" + "- 语言专业,符合学术规范\n" + "- 适当使用过渡句连接段落\n" + "- 如果需要,可以添加多个段落来充分展开主题\n\n" + ) + + elif self.output_mode == "markdown": + return base_prompt + ( + "**输出模式:Markdown (.md)**\n\n" + "你可以使用以下Markdown工具:\n" + "1. writemd - 写入Markdown内容(支持append、overwrite、modify等模式)\n" + "2. update_template - 更新模板章节\n\n" + "**工作流程示例**:\n" + "收到指令:\"写一个Introduction章节,介绍研究背景\"\n" + "你应该:\n" + "1. 思考:Introduction应该包含什么内容?\n" + "2. 创作具体的Markdown内容:\n" + " ```markdown\n" + " # Introduction\n" + " \n" + " 研究背景的第一段...\n" + " \n" + " 研究背景的第二段...\n" + " \n" + " 本文的研究意义...\n" + " ```\n" + "3. 调用 writemd 或 update_template 工具写入\n" + "4. 确认完成并报告\n\n" + "**内容创作要求**:\n" + "- 使用标准Markdown格式\n" + "- 段落要充实,逻辑清晰\n" + "- 适当使用标题层级(#, ##, ###)\n" + "- 语言专业,符合学术规范\n\n" + ) + + elif self.output_mode == "latex": + return base_prompt + ( + "**Output Mode: LaTeX**\n\n" + "LaTeX mode is not yet supported. Please inform the user to use 'word' or 'markdown' mode instead.\n" + ) + + else: + return base_prompt + "Unknown output mode. Please check configuration.\n" + + async def run(self, instruction: str) -> str: + """ + Execute a writing instruction by calling appropriate tools. + + Args: + instruction: Natural language writing instruction specifying what to write + + Returns: + Execution result message or error description + """ + logger.info("WriterAgent starting task execution: %s", instruction[:100]) + + # Send start notification + if self.stream_manager: + try: + await self.stream_manager.send_json_block( + "writer_agent_start", + f"WriterAgent starting: {instruction[:100]}..." + ) + except Exception as e: + logger.warning("Failed to send WriterAgent start notification: %s", e) + + # Validate instruction + if not instruction or not instruction.strip(): + error_msg = ( + "Error: Instruction validation failed: Empty instruction\n" + "Details: Instruction must specify what content to write\n" + "Suggestion: Provide instruction in format: 'Add [type] with content: [text]'" + ) + logger.error("WriterAgent received empty instruction") + return error_msg + + # Execute instruction + try: + inputs = {"messages": [HumanMessage(content=instruction)]} + result = await self.agent.ainvoke(inputs) + output = self._extract_output(result) + + # Send completion notification + if self.stream_manager: + try: + await self.stream_manager.send_json_block( + "writer_agent_result", + output + ) + except Exception as e: + logger.warning("Failed to send WriterAgent completion notification: %s", e) + + logger.info("WriterAgent task completed successfully") + return output + + except Exception as e: + # Format error message + error_msg = self._format_error(e, instruction) + logger.error("WriterAgent execution failed: %s", e, exc_info=True) + + # Send error notification + if self.stream_manager: + try: + await self.stream_manager.send_json_block( + "writer_agent_error", + f"WriterAgent execution failed: {str(e)}" + ) + except Exception: + pass + + return error_msg + + def _extract_output(self, result: Any) -> str: + """ + Extract final output from agent execution result. + + Args: + result: Agent execution result (typically a dict) + + Returns: + Extracted output string + """ + if isinstance(result, dict): + # Try to get output field + if result.get("output"): + return str(result["output"]) + + # Try to extract from messages + messages = result.get("messages") + if messages: + for msg in reversed(messages): + content = getattr(msg, "content", None) + if not content and isinstance(msg, dict): + content = msg.get("content") + if content: + return str(content) + + return str(result) + + def _format_error(self, error: Exception, instruction: str) -> str: + """ + Format error message with operation details and suggestions. + + Args: + error: Exception that occurred + instruction: Original instruction that failed + + Returns: + Formatted error message string + """ + error_type = type(error).__name__ + error_str = str(error) + + # Categorize error and provide suggestions + if "file" in error_str.lower() and "not found" in error_str.lower(): + suggestion = "Ensure the file exists or generate it first using CodeAgent" + elif "path" in error_str.lower() and "invalid" in error_str.lower(): + suggestion = "Check that the file path is valid and within the workspace" + elif "unsupported" in error_str.lower() or "not supported" in error_str.lower(): + suggestion = "Use a supported output mode: 'word' or 'markdown'" + else: + suggestion = "Check the instruction format and try again, or contact support" + + return ( + f"Error: WriterAgent execution failed: {error_type}\n" + f"Details: {error_str}\n" + f"Instruction: {instruction[:200]}\n" + f"Suggestion: {suggestion}" + ) + + def get_execution_summary(self) -> Dict[str, Any]: + """ + Get execution summary with agent metadata. + + Returns: + Dictionary containing agent type, configuration, and tool information + """ + return { + "agent_type": "WriterAgent", + "output_mode": self.output_mode, + "workspace_dir": self.workspace_dir, + "work_id": self.work_id, + "tools_count": len(self.tools), + "tool_names": [tool.name for tool in self.tools], + "langchain_based": True, + } diff --git a/backend/ai_system/core_managers/langchain_tools.py b/backend/ai_system/core_managers/langchain_tools.py index 68cd213..543311e 100644 --- a/backend/ai_system/core_managers/langchain_tools.py +++ b/backend/ai_system/core_managers/langchain_tools.py @@ -43,52 +43,22 @@ def create_file_tools(workspace_dir: str, stream_manager=None) -> List[Structure StructuredTool.from_function( func=file_tools.writemd, name="writemd", - description="写入Markdown文件到工作空间。支持多种模式:append(追加)、overwrite(覆盖)、modify(修改)、insert(插入)、section_update(章节更新)", - args_schema={ - "filename": {"type": "string", "description": "文件名(不需要.md后缀)"}, - "content": {"type": "string", "description": "Markdown内容"}, - "mode": {"type": "string", "description": "写入模式:append/overwrite/modify/insert/section_update/smart_replace", "default": "overwrite"} - } + description="写入Markdown文件到工作空间。支持多种模式:append(追加)、overwrite(覆盖)、modify(修改)、insert(插入)、section_update(章节更新)" ), StructuredTool.from_function( func=file_tools.update_template, name="update_template", - description="专门用于更新论文模板的特定章节,只支持章节级别的更新", - args_schema={ - "template_name": {"type": "string", "description": "论文文件名,默认为paper.md", "default": "paper.md"}, - "content": {"type": "string", "description": "要更新的章节内容"}, - "section": {"type": "string", "description": "要更新的章节名称(必需)"} - } + description="专门用于更新论文模板的特定章节,只支持章节级别的更新" ), StructuredTool.from_function( func=file_tools.tree, name="tree", - description="显示工作空间的目录树结构,帮助了解文件组织", - args_schema={ - "directory": {"type": "string", "description": "要显示的目录路径(可选,默认显示整个工作空间)"} - } + description="显示工作空间的目录树结构,帮助了解文件组织" ), StructuredTool.from_function( func=file_tools.list_attachments, name="list_attachments", description="列出工作空间中所有附件文件,包括文件类型、大小等信息" - ), - StructuredTool.from_function( - func=file_tools.read_attachment, - name="read_attachment", - description="读取附件文件内容,支持多种格式(PDF、Word、Excel、文本等)", - args_schema={ - "file_path": {"type": "string", "description": "附件文件路径(相对于attachment目录)"} - } - ), - StructuredTool.from_function( - func=file_tools.search_attachments, - name="search_attachments", - description="在附件文件中搜索关键词", - args_schema={ - "keyword": {"type": "string", "description": "搜索关键词"}, - "file_type": {"type": "string", "description": "可选的文件类型过滤(如pdf、docx、txt等)"} - } ) ] @@ -145,6 +115,101 @@ async def run_code_agent(task_description: str) -> str: logger.error(f"创建 CodeAgent 工具失败: {e}") return None + @staticmethod + def create_writer_agent_tool( + workspace_dir: str, + output_mode: str, + stream_manager=None, + llm=None, + writer_llm=None + ) -> Optional[StructuredTool]: + """ + Create LangChain WriterAgent tool for document writing operations + + The WriterAgent is a specialized agent that handles document writing by directly + calling Word or Markdown tools based on the output mode. This allows the MainAgent + to delegate document writing tasks through a single tool interface. + + Args: + workspace_dir: Workspace directory path + output_mode: Document format mode ("word", "markdown", or "latex") + stream_manager: Stream manager for output notifications + llm: LangChain LLM instance for WriterAgent (fallback if writer_llm not provided) + writer_llm: Dedicated LangChain LLM instance for WriterAgent from "writing" config + + Returns: + WriterAgent tool or None if creation fails + """ + try: + # Use writer_llm if provided, otherwise fall back to llm + writer_model = writer_llm if writer_llm is not None else llm + + if writer_model is None: + logger.error("创建WriterAgent工具失败:未提供LLM实例") + return None + + if not workspace_dir: + logger.error("创建WriterAgent工具失败:未提供workspace_dir") + return None + + # Log which LLM is being used + if writer_llm is not None: + logger.info("WriterAgent使用专用的'writing'配置LLM") + else: + logger.info("WriterAgent使用MainAgent的LLM(未找到'writing'配置)") + + # Import WriterAgent + from ..core_agents.writer_agent import WriterAgent + + # Instantiate WriterAgent with provided parameters + try: + writer_agent = WriterAgent( + llm=writer_model, + output_mode=output_mode, + workspace_dir=workspace_dir, + stream_manager=stream_manager, + ) + except Exception as e: + logger.error(f"WriterAgent 实例化失败: {e}", exc_info=True) + return None + + # Create async wrapper function for WriterAgent.run() + async def run_writer_agent(instruction: str) -> str: + """ + Execute writing instruction through WriterAgent + + Args: + instruction: Natural language writing instruction + + Returns: + Execution result or error message + """ + try: + return await writer_agent.run(instruction) + except Exception as e: + logger.error(f"WriterAgent 工具执行失败: {e}", exc_info=True) + return f"WriterAgent 执行失败: {str(e)}" + + # Create StructuredTool with clear description + tool = StructuredTool.from_function( + coroutine=run_writer_agent, + name="writer_agent_execute", + description=( + f"使用专用WriterAgent执行文档写作任务(当前模式: {output_mode})。" + "WriterAgent可以处理各种文档操作,包括添加标题、段落、表格、图片、分页符等。" + "输入详细的写作指令,例如:'添加一级标题Introduction'、'添加段落内容:...'、" + "'添加3行4列的表格,包含以下数据:...'、'插入图片outputs/chart.png,宽度6英寸'。" + "WriterAgent会自动选择合适的工具来完成任务。" + ), + ) + + logger.info(f"创建了 LangChain WriterAgent 工具 (output_mode: {output_mode})") + return tool + + except Exception as e: + logger.error(f"创建 WriterAgent 工具失败: {e}", exc_info=True) + return None + @staticmethod def create_template_tools(workspace_dir: str, stream_manager=None) -> List[StructuredTool]: """ @@ -169,37 +234,22 @@ def create_template_tools(workspace_dir: str, stream_manager=None) -> List[Struc StructuredTool.from_function( func=template_tools.get_section_content, name="get_section_content", - description="获取paper.md文件中指定章节的内容", - args_schema={ - "section_title": {"type": "string", "description": "章节标题"} - } + description="获取paper.md文件中指定章节的内容" ), StructuredTool.from_function( func=template_tools.update_section_content, name="update_section_content", - description="更新paper.md文件中指定章节的内容", - args_schema={ - "section_title": {"type": "string", "description": "章节标题"}, - "new_content": {"type": "string", "description": "新的章节内容"} - } + description="更新paper.md文件中指定章节的内容" ), StructuredTool.from_function( func=template_tools.add_section, name="add_section", - description="在paper.md文件末尾添加新章节", - args_schema={ - "section_title": {"type": "string", "description": "新章节标题"}, - "content": {"type": "string", "description": "章节内容", "default": ""} - } + description="在paper.md文件末尾添加新章节" ), StructuredTool.from_function( func=template_tools.rename_section_title, name="rename_section_title", - description="修改paper.md文件中指定章节的标题", - args_schema={ - "old_title": {"type": "string", "description": "原标题"}, - "new_title": {"type": "string", "description": "新标题"} - } + description="修改paper.md文件中指定章节的标题" ) ] @@ -210,6 +260,214 @@ def create_template_tools(workspace_dir: str, stream_manager=None) -> List[Struc logger.error(f"创建模板工具失败: {e}") return [] + @staticmethod + def create_word_tools(workspace_dir: str, stream_manager=None) -> List[StructuredTool]: + """ + 创建 Word 文档工具(直接调用) + + Tool Categories: + 1. Document Creation and Properties + 2. Content Addition + 3. Advanced Content Manipulation + 4. Content Extraction + 5. Text Formatting + 6. Table Formatting + 7. Comment Extraction + + Args: + workspace_dir: 工作空间目录 + stream_manager: 流式输出管理器 + + Returns: + LangChain 格式的 Word 工具列表 + """ + try: + from ..core_tools.word_tools import WordTools + + # Create WordTools instance + word_tools = WordTools(workspace_dir, stream_manager) + + # Category 1: Document Creation and Properties + doc_creation_tools = [ + StructuredTool.from_function( + coroutine=word_tools.create_document, + name="word_create_document", + description="[Document Creation] Create a new Word document with optional title and author" + ), + StructuredTool.from_function( + coroutine=word_tools.get_document_info, + name="word_get_document_info", + description="[Document Properties] Get document metadata and properties including title, author, and statistics" + ), + StructuredTool.from_function( + coroutine=word_tools.get_document_text, + name="word_get_document_text", + description="[Document Properties] Extract all text content from the document" + ), + StructuredTool.from_function( + coroutine=word_tools.get_document_outline, + name="word_get_document_outline", + description="[Document Properties] Get the document structure and outline showing headings hierarchy" + ), + StructuredTool.from_function( + coroutine=word_tools.list_available_documents, + name="word_list_available_documents", + description="[Document Properties] List all .docx files available in the workspace" + ), + StructuredTool.from_function( + coroutine=word_tools.copy_document, + name="word_copy_document", + description="[Document Creation] Create a copy of the current document with a new filename" + ), + ] + + # Category 2: Content Addition + content_tools = [ + StructuredTool.from_function( + coroutine=word_tools.add_heading, + name="word_add_heading", + description="[Content Addition] Add a heading to the document with specified level (1-5) and optional formatting" + ), + StructuredTool.from_function( + coroutine=word_tools.add_paragraph, + name="word_add_paragraph", + description="[Content Addition] Add a paragraph to the document with optional style and formatting" + ), + StructuredTool.from_function( + coroutine=word_tools.add_table, + name="word_add_table", + description="[Content Addition] Add a table to the document with specified rows and columns, optionally filled with data" + ), + StructuredTool.from_function( + coroutine=word_tools.add_picture, + name="word_add_picture", + description="[Content Addition] Add an image to the document from a file path with optional width specification" + ), + StructuredTool.from_function( + coroutine=word_tools.add_page_break, + name="word_add_page_break", + description="[Content Addition] Insert a page break in the document" + ), + ] + + # Category 3: Advanced Content Manipulation + manipulation_tools = [ + StructuredTool.from_function( + coroutine=word_tools.insert_header_near_text, + name="word_insert_header_near_text", + description="[Advanced Content Manipulation] Insert a header before or after specific text or paragraph index" + ), + StructuredTool.from_function( + coroutine=word_tools.insert_line_or_paragraph_near_text, + name="word_insert_line_or_paragraph_near_text", + description="[Advanced Content Manipulation] Insert a line or paragraph before or after specific text or paragraph index" + ), + StructuredTool.from_function( + coroutine=word_tools.insert_numbered_list_near_text, + name="word_insert_numbered_list_near_text", + description="[Advanced Content Manipulation] Insert a numbered or bulleted list before or after specific text or paragraph index" + ), + ] + + # Category 4: Content Extraction + extraction_tools = [ + StructuredTool.from_function( + coroutine=word_tools.get_paragraph_text_from_document, + name="word_get_paragraph_text", + description="[Content Extraction] Get text content from a specific paragraph by index" + ), + StructuredTool.from_function( + coroutine=word_tools.find_text_in_document, + name="word_find_text", + description="[Content Extraction] Find text in the document with options for case-sensitive and whole-word matching" + ), + ] + + # Category 5: Text Formatting + text_format_tools = [ + StructuredTool.from_function( + coroutine=word_tools.format_text, + name="word_format_text", + description="[Text Formatting] Format text in a specific paragraph with bold, italic, underline, color, font size, and font name" + ), + StructuredTool.from_function( + coroutine=word_tools.search_and_replace, + name="word_search_and_replace", + description="[Text Formatting] Search for text and replace it with new text throughout the document" + ), + StructuredTool.from_function( + coroutine=word_tools.delete_paragraph, + name="word_delete_paragraph", + description="[Text Formatting] Delete a specific paragraph from the document by index" + ), + StructuredTool.from_function( + coroutine=word_tools.create_custom_style, + name="word_create_custom_style", + description="[Text Formatting] Create a custom style with specified formatting options" + ), + ] + + # Category 6: Table Formatting + table_format_tools = [ + StructuredTool.from_function( + coroutine=word_tools.format_table, + name="word_format_table", + description="[Table Formatting] Format a table with header row, border style, and cell shading" + ), + StructuredTool.from_function( + coroutine=word_tools.set_table_cell_shading, + name="word_set_table_cell_shading", + description="[Table Formatting] Set shading color for a specific table cell" + ), + StructuredTool.from_function( + coroutine=word_tools.merge_table_cells, + name="word_merge_table_cells", + description="[Table Formatting] Merge a range of cells in a table" + ), + StructuredTool.from_function( + coroutine=word_tools.set_table_cell_alignment, + name="word_set_table_cell_alignment", + description="[Table Formatting] Set horizontal and vertical alignment for a table cell" + ), + StructuredTool.from_function( + coroutine=word_tools.set_table_column_width, + name="word_set_table_column_width", + description="[Table Formatting] Set the width of a table column in points or percentage" + ), + ] + + # Category 7: Comment Extraction + comment_tools = [ + StructuredTool.from_function( + coroutine=word_tools.get_all_comments, + name="word_get_all_comments", + description="[Comment Extraction] Get all comments from the document" + ), + StructuredTool.from_function( + coroutine=word_tools.get_comments_by_author, + name="word_get_comments_by_author", + description="[Comment Extraction] Get comments from the document filtered by author name" + ), + ] + + # Combine all tools + all_tools = ( + doc_creation_tools + + content_tools + + manipulation_tools + + extraction_tools + + text_format_tools + + table_format_tools + + comment_tools + ) + + logger.info(f"创建了 {len(all_tools)} 个 Word 工具 (直接调用)") + return all_tools + + except Exception as e: + logger.error(f"创建 Word 工具失败: {e}", exc_info=True) + return [] + @staticmethod def create_standard_tools() -> List[BaseTool]: """ @@ -233,11 +491,52 @@ def create_standard_tools() -> List[BaseTool]: logger.error(f"创建标准工具失败: {e}") return [] + @staticmethod + def create_base_tools(workspace_dir: str, stream_manager=None) -> List[BaseTool]: + """ + 创建基础工具(不包含writemd等Markdown工具) + 用于Word模式,只包含附件读取、搜索、目录树等通用工具 + + Args: + workspace_dir: 工作空间目录 + stream_manager: 流式输出管理器 + + Returns: + 基础工具列表 + """ + try: + os.environ["WORKSPACE_DIR"] = workspace_dir + file_tools_instance = FileTools(stream_manager) + + base_tools = [ + StructuredTool.from_function( + func=file_tools_instance.tree, + name="tree", + description="显示工作空间的目录树结构,帮助了解文件组织" + ), + StructuredTool.from_function( + func=file_tools_instance.list_attachments, + name="list_attachments", + description="列出工作空间中所有附件文件,包括文件类型、大小等信息" + ) + ] + + # 添加标准工具(搜索) + standard_tools = LangChainToolFactory.create_standard_tools() + base_tools.extend(standard_tools) + + logger.info(f"创建了 {len(base_tools)} 个基础工具(不含writemd)") + return base_tools + + except Exception as e: + logger.error(f"创建基础工具失败: {e}") + return [] + @staticmethod def create_all_tools(workspace_dir: str, stream_manager=None, include_template: bool = False) -> List[BaseTool]: """ - 创建论文写作需要的所有工具 + 创建论文写作需要的所有工具(包括writemd等Markdown工具) Args: workspace_dir: 工作空间目录 @@ -250,7 +549,7 @@ def create_all_tools(workspace_dir: str, stream_manager=None, try: all_tools = [] - # 添加文件工具 + # 添加文件工具(包含writemd) file_tools = LangChainToolFactory.create_file_tools(workspace_dir, stream_manager) all_tools.extend(file_tools) diff --git a/backend/ai_system/core_tools/file_tools.py b/backend/ai_system/core_tools/file_tools.py index e6af4a9..e224947 100644 --- a/backend/ai_system/core_tools/file_tools.py +++ b/backend/ai_system/core_tools/file_tools.py @@ -441,205 +441,7 @@ def list_attachments(self) -> str: logger.error(error_msg) return error_msg - def read_attachment(self, file_path: str) -> str: - """ - 读取附件文件内容 - - Args: - file_path: 附件文件路径(相对于attachment目录) - - Returns: - 文件内容或错误信息 - """ - try: - # 构建完整的文件路径 - full_path = os.path.join(self.workspace_dir, "attachment", file_path) - - if not os.path.exists(full_path): - return f"附件文件不存在: {file_path}" - - if not os.path.isfile(full_path): - return f"指定的路径不是文件: {file_path}" - - # 检查文件大小限制(10MB) - file_size = os.path.getsize(full_path) - if file_size > 10 * 1024 * 1024: - return f"文件过大 ({self._format_file_size(file_size)}),超过10MB限制" - - # 获取文件扩展名 - file_ext = Path(full_path).suffix.lower() - - # 根据文件类型选择合适的读取方法 - content = self._extract_file_content(full_path, file_ext) - - # 准备返回信息 - result = f"**文件信息:**\n" - result += f"- 文件名: {Path(full_path).name}\n" - result += f"- 文件路径: {file_path}\n" - result += f"- 文件大小: {self._format_file_size(file_size)}\n" - result += f"- 文件类型: {self._get_file_type_description(file_ext)}\n\n" - result += f"**文件内容:**\n{content}" - - if self.stream_manager: - import json - json_data = { - "file_path": file_path, - "file_size": file_size, - "file_type": self._get_file_type_description(file_ext), - "content": content[:1000] + "..." if len(content) > 1000 else content, - "truncated": len(content) > 1000 - } - json_content = json.dumps(json_data, ensure_ascii=False) - self._send_json_block_sync("attachment_content", json_content) - - return result - - except Exception as e: - error_msg = f"读取附件失败: {str(e)}" - logger.error(error_msg) - return error_msg - - def get_attachment_info(self, file_path: str) -> str: - """ - 获取附件文件的详细信息 - - Args: - file_path: 附件文件路径(相对于attachment目录) - - Returns: - 文件详细信息 - """ - try: - # 构建完整的文件路径 - full_path = os.path.join(self.workspace_dir, "attachment", file_path) - - if not os.path.exists(full_path): - return f"附件文件不存在: {file_path}" - - if not os.path.isfile(full_path): - return f"指定的路径不是文件: {file_path}" - - # 获取文件统计信息 - stat_info = os.stat(full_path) - file_ext = Path(full_path).suffix.lower() - - result = f"**附件文件详细信息:**\n\n" - result += f"- **文件名**: {Path(full_path).name}\n" - result += f"- **相对路径**: {file_path}\n" - result += f"- **完整路径**: {full_path}\n" - result += f"- **文件大小**: {self._format_file_size(stat_info.st_size)}\n" - result += f"- **文件类型**: {self._get_file_type_description(file_ext)}\n" - result += f"- **扩展名**: {file_ext}\n" - result += f"- **创建时间**: {self._format_timestamp(stat_info.st_ctime)}\n" - result += f"- **修改时间**: {self._format_timestamp(stat_info.st_mtime)}\n" - - # 如果是可读的文本文件,显示前几行内容预览 - if self._is_text_file(file_ext) and stat_info.st_size < 1024 * 1024: # 1MB以下 - try: - with open(full_path, 'r', encoding='utf-8') as f: - preview = f.read(500) - if len(preview) == 500: - preview += "..." - result += f"- **内容预览**:\n```\n{preview}\n```\n" - except UnicodeDecodeError: - result += f"- **内容预览**: 二进制文件无法预览\n" - - return result - - except Exception as e: - error_msg = f"获取附件信息失败: {str(e)}" - logger.error(error_msg) - return error_msg - def search_attachments(self, keyword: str, file_type: Optional[str] = None) -> str: - """ - 在附件文件中搜索关键词 - - Args: - keyword: 搜索关键词 - file_type: 可选的文件类型过滤(如 'pdf', 'docx', 'txt') - - Returns: - 搜索结果 - """ - try: - attachment_dir = os.path.join(self.workspace_dir, "attachment") - - if not os.path.exists(attachment_dir): - return "工作空间中没有附件目录" - - search_results = [] - keyword_lower = keyword.lower() - - # 递归搜索附件 - for root, dirs, files in os.walk(attachment_dir): - for file in files: - file_path = os.path.join(root, file) - relative_path = os.path.relpath(file_path, attachment_dir) - file_ext = Path(file).suffix.lower() - - # 文件类型过滤 - if file_type and file_ext != f".{file_type.lower()}": - continue - - # 文件名匹配 - if keyword_lower in file.lower(): - search_results.append({ - "file": relative_path, - "match_type": "文件名", - "match_text": file - }) - continue - - # 文件内容匹配(仅对文本文件) - if self._is_text_file(file_ext): - try: - content = self._extract_file_content(file_path, file_ext) - if keyword_lower in content.lower(): - # 找到匹配的行 - lines = content.split('\n') - matching_lines = [] - for i, line in enumerate(lines, 1): - if keyword_lower in line.lower(): - matching_lines.append(f"第{i}行: {line.strip()}") - if len(matching_lines) >= 3: # 最多显示3个匹配行 - break - - search_results.append({ - "file": relative_path, - "match_type": "文件内容", - "match_text": "\n".join(matching_lines) - }) - except Exception as e: - logger.warning(f"搜索文件内容失败 {file_path}: {e}") - - if not search_results: - return f"未找到包含关键词 '{keyword}' 的附件文件" - - # 格式化搜索结果 - result = f"**搜索结果** (关键词: '{keyword}'):\n\n" - for i, item in enumerate(search_results, 1): - result += f"{i}. **{item['file']}** (匹配类型: {item['match_type']})\n" - if item['match_type'] == '文件内容': - result += f" 匹配内容:\n {item['match_text']}\n" - result += "\n" - - if self.stream_manager: - import json - json_data = { - "keyword": keyword, - "file_type": file_type, - "results": search_results - } - json_content = json.dumps(json_data, ensure_ascii=False) - self._send_json_block_sync("search_results", json_content) - - return result.strip() - - except Exception as e: - error_msg = f"搜索附件失败: {str(e)}" - logger.error(error_msg) - return error_msg # 辅助方法 def _get_file_type_description(self, file_ext: str) -> str: @@ -700,82 +502,9 @@ def _format_timestamp(self, timestamp: float) -> str: import datetime return datetime.datetime.fromtimestamp(timestamp).strftime('%Y-%m-%d %H:%M:%S') - def _is_text_file(self, file_ext: str) -> bool: - """判断是否为文本文件""" - text_extensions = { - '.txt', '.md', '.py', '.js', '.ts', '.java', '.cpp', '.c', '.h', '.hpp', - '.html', '.css', '.vue', '.json', '.xml', '.yaml', '.yml', '.toml', '.ini', - '.sql', '.sh', '.bat', '.cmd', '.log', '.csv', '.rtf' - } - return file_ext in text_extensions - - def _extract_file_content(self, file_path: str, file_ext: str) -> str: - """根据文件类型提取内容""" - try: - if file_ext in ['.txt', '.md', '.py', '.js', '.ts', '.java', '.cpp', '.c', '.h', '.hpp', - '.html', '.css', '.vue', '.json', '.xml', '.yaml', '.yml', '.toml', '.ini', - '.sql', '.sh', '.bat', '.cmd', '.log', '.rtf']: - # 直接读取文本文件 - with open(file_path, 'r', encoding='utf-8') as f: - return f.read() - - elif file_ext == '.csv': - # 使用pandas读取CSV文件 - import pandas as pd - df = pd.read_csv(file_path) - return f"CSV文件内容预览:\n{df.head().to_string()}\n\n总行数: {len(df)}\n总列数: {len(df.columns)}\n列名: {list(df.columns)}" - - elif file_ext in ['.xlsx', '.xls']: - # 使用pandas读取Excel文件 - import pandas as pd - df = pd.read_excel(file_path) - return f"Excel文件内容预览:\n{df.head().to_string()}\n\n总行数: {len(df)}\n总列数: {len(df.columns)}\n列名: {list(df.columns)}" - - elif file_ext == '.docx': - # 使用python-docx读取Word文档 - from docx import Document - doc = Document(file_path) - content = [] - for para in doc.paragraphs: - if para.text.strip(): - content.append(para.text) - return "\n".join(content) - - elif file_ext == '.pdf': - # 使用PyPDF2读取PDF文件 - try: - import PyPDF2 - with open(file_path, 'rb') as file: - pdf_reader = PyPDF2.PdfReader(file) - content = [] - for page_num in range(min(len(pdf_reader.pages), 10)): # 限制读取前10页 - page = pdf_reader.pages[page_num] - text = page.extract_text() - if text.strip(): - content.append(f"--- 第{page_num + 1}页 ---\n{text}") - return "\n".join(content) - except Exception as e: - # 如果PyPDF2失败,尝试pdfplumber - try: - import pdfplumber - with pdfplumber.open(file_path) as pdf: - content = [] - for page_num in range(min(len(pdf.pages), 10)): # 限制读取前10页 - page = pdf.pages[page_num] - text = page.extract_text() - if text.strip(): - content.append(f"--- 第{page_num + 1}页 ---\n{text}") - return "\n".join(content) - except Exception as e2: - return f"PDF文件读取失败:\nPyPDF2错误: {str(e)}\npdfplumber错误: {str(e2)}" - else: - return f"不支持的文件类型: {file_ext}" - except Exception as e: - logger.error(f"提取文件内容失败 {file_path}: {e}") - return f"读取文件内容失败: {str(e)}" def _send_json_block_sync(self, block_type: str, data: Any): """同步发送JSON块到stream_manager""" diff --git a/backend/ai_system/core_tools/word_tools.py b/backend/ai_system/core_tools/word_tools.py new file mode 100644 index 0000000..3d52284 --- /dev/null +++ b/backend/ai_system/core_tools/word_tools.py @@ -0,0 +1,511 @@ +""" +Direct Word document tools wrapper +Wraps existing office_word_mcp tool functions with workspace context +""" + +import logging +import asyncio +from typing import List, Optional, Dict, Any +from pathlib import Path + +logger = logging.getLogger(__name__) + + +class WordTools: + """ + Direct Word document tools wrapper + Wraps existing office_word_mcp tool functions with workspace context + """ + + def __init__(self, workspace_dir: str, stream_manager=None): + """ + Initialize Word tools + + Args: + workspace_dir: Absolute path to workspace directory + stream_manager: Optional stream manager for output notifications + """ + self.workspace_dir = Path(workspace_dir).resolve() + self.stream_manager = stream_manager + self.document_path = str(self.workspace_dir / "paper.docx") + + logger.info(f"WordTools initialized for workspace: {self.workspace_dir}") + + def _resolve_path(self, relative_path: str) -> str: + """ + Resolve relative path to absolute path within workspace + + Args: + relative_path: Path relative to workspace or absolute path + + Returns: + Absolute path string + + Raises: + ValueError: If path is outside workspace + """ + try: + # Convert to Path object + path = Path(relative_path) + + # If already absolute, validate it's within workspace + if path.is_absolute(): + resolved = path.resolve() + else: + # Resolve relative to workspace + resolved = (self.workspace_dir / path).resolve() + + # Security check: ensure path is within workspace + try: + resolved.relative_to(self.workspace_dir) + except ValueError: + raise ValueError(f"Path {relative_path} is outside workspace") + + return str(resolved) + + except Exception as e: + logger.error(f"Path resolution failed for {relative_path}: {e}") + raise + + def _send_notification(self, notification_type: str, message: str): + """ + Send notification through stream manager + + Args: + notification_type: Type of notification + message: Notification message + """ + if not self.stream_manager: + return + + # Skip word_tool_result notifications + if notification_type == "word_tool_result": + return + + try: + # Send notification asynchronously + asyncio.create_task( + self.stream_manager.send_json_block(notification_type, message) + ) + except Exception as e: + logger.warning(f"Failed to send notification: {e}") + # Don't fail the operation due to notification failure + + def _handle_error(self, operation: str, error: Exception) -> str: + """ + Centralized error handling + + Args: + operation: Name of the operation that failed + error: The exception that occurred + + Returns: + User-friendly error message + """ + error_msg = f"{operation} failed: {str(error)}" + logger.error(error_msg, exc_info=True) + + # Send notification if stream manager available + self._send_notification("error", error_msg) + + return error_msg + + # ===== Category 1: Document Creation and Properties ===== + + async def create_document(self, title: Optional[str] = None, author: Optional[str] = None) -> str: + """Create new Word document""" + from word_document_server.tools import document_tools + + try: + self._send_notification("word_tool_call", "Creating Word document") + result = await document_tools.create_document(self.document_path, title, author) + self._send_notification("word_tool_result", result) + return result + except Exception as e: + return self._handle_error("create_document", e) + + async def get_document_info(self) -> str: + """Get document metadata and properties""" + from word_document_server.tools import document_tools + + try: + self._send_notification("word_tool_call", "Getting document info") + result = await document_tools.get_document_info(self.document_path) + self._send_notification("word_tool_result", result) + return result + except Exception as e: + return self._handle_error("get_document_info", e) + + async def get_document_text(self) -> str: + """Extract all text from document""" + from word_document_server.tools import document_tools + + try: + self._send_notification("word_tool_call", "Extracting document text") + result = await document_tools.get_document_text(self.document_path) + self._send_notification("word_tool_result", result) + return result + except Exception as e: + return self._handle_error("get_document_text", e) + + async def get_document_outline(self) -> str: + """Get document structure""" + from word_document_server.tools import document_tools + + try: + self._send_notification("word_tool_call", "Getting document outline") + result = await document_tools.get_document_outline(self.document_path) + self._send_notification("word_tool_result", result) + return result + except Exception as e: + return self._handle_error("get_document_outline", e) + + async def list_available_documents(self) -> str: + """List all .docx files in workspace""" + from word_document_server.tools import document_tools + + try: + self._send_notification("word_tool_call", "Listing available documents") + result = await document_tools.list_available_documents(str(self.workspace_dir)) + self._send_notification("word_tool_result", result) + return result + except Exception as e: + return self._handle_error("list_available_documents", e) + + async def copy_document(self, destination_filename: Optional[str] = None) -> str: + """Create a copy of the document""" + from word_document_server.tools import document_tools + + try: + self._send_notification("word_tool_call", f"Copying document to {destination_filename}") + result = await document_tools.copy_document(self.document_path, destination_filename) + self._send_notification("word_tool_result", result) + return result + except Exception as e: + return self._handle_error("copy_document", e) + + # ===== Category 2: Content Addition ===== + + async def add_heading(self, text: str, level: int = 1, font_name: Optional[str] = None, + font_size: Optional[int] = None, bold: Optional[bool] = None, + italic: Optional[bool] = None, border_bottom: bool = False) -> str: + """Add heading to document""" + from word_document_server.tools import content_tools + + try: + self._send_notification("word_tool_call", f"Adding heading: {text}") + result = await content_tools.add_heading( + self.document_path, text, level, font_name, font_size, bold, italic, border_bottom + ) + self._send_notification("word_tool_result", result) + return result + except Exception as e: + return self._handle_error("add_heading", e) + + async def add_paragraph(self, text: str, style: Optional[str] = None, + font_name: Optional[str] = None, font_size: Optional[int] = None, + bold: Optional[bool] = None, italic: Optional[bool] = None, + color: Optional[str] = None) -> str: + """Add paragraph to document""" + from word_document_server.tools import content_tools + + try: + self._send_notification("word_tool_call", "Adding paragraph") + result = await content_tools.add_paragraph( + self.document_path, text, style, font_name, font_size, bold, italic, color + ) + self._send_notification("word_tool_result", result) + return result + except Exception as e: + return self._handle_error("add_paragraph", e) + + async def add_table(self, rows: int, cols: int, data: Optional[List[List[str]]] = None) -> str: + """Add table to document""" + from word_document_server.tools import content_tools + + try: + self._send_notification("word_tool_call", f"Adding table ({rows}x{cols})") + result = await content_tools.add_table(self.document_path, rows, cols, data) + self._send_notification("word_tool_result", result) + return result + except Exception as e: + return self._handle_error("add_table", e) + + async def add_picture(self, image_path: str, width: Optional[float] = None) -> str: + """Add picture to document with path resolution""" + from word_document_server.tools import content_tools + + try: + # Resolve path relative to workspace + resolved_path = self._resolve_path(image_path) + + self._send_notification("word_tool_call", f"Adding picture: {image_path}") + result = await content_tools.add_picture(self.document_path, resolved_path, width) + self._send_notification("word_tool_result", result) + return result + except ValueError as e: + # Path security violation + error_msg = f"Invalid image path: {str(e)}" + logger.error(error_msg) + self._send_notification("error", error_msg) + return f"Error: {error_msg}" + except Exception as e: + return self._handle_error("add_picture", e) + + async def add_page_break(self) -> str: + """Add page break to document""" + from word_document_server.tools import content_tools + + try: + self._send_notification("word_tool_call", "Adding page break") + result = await content_tools.add_page_break(self.document_path) + self._send_notification("word_tool_result", result) + return result + except Exception as e: + return self._handle_error("add_page_break", e) + + # ===== Category 3: Advanced Content Manipulation ===== + + async def insert_header_near_text(self, target_text: Optional[str] = None, header_title: str = "", + position: str = 'after', header_style: str = 'Heading 1', + target_paragraph_index: Optional[int] = None) -> str: + """Insert header near text""" + from word_document_server.tools import content_tools + + try: + self._send_notification("word_tool_call", f"Inserting header near text: {target_text}") + result = await content_tools.insert_header_near_text_tool( + self.document_path, target_text, header_title, position, header_style, target_paragraph_index + ) + self._send_notification("word_tool_result", result) + return result + except Exception as e: + return self._handle_error("insert_header_near_text", e) + + async def insert_line_or_paragraph_near_text(self, target_text: Optional[str] = None, line_text: str = "", + position: str = 'after', line_style: Optional[str] = None, + target_paragraph_index: Optional[int] = None) -> str: + """Insert line or paragraph near text""" + from word_document_server.tools import content_tools + + try: + self._send_notification("word_tool_call", f"Inserting line near text: {target_text}") + result = await content_tools.insert_line_or_paragraph_near_text_tool( + self.document_path, target_text, line_text, position, line_style, target_paragraph_index + ) + self._send_notification("word_tool_result", result) + return result + except Exception as e: + return self._handle_error("insert_line_or_paragraph_near_text", e) + + async def insert_numbered_list_near_text(self, target_text: Optional[str] = None, list_items: Optional[list] = None, + position: str = 'after', target_paragraph_index: Optional[int] = None, + bullet_type: str = 'bullet') -> str: + """Insert numbered list near text""" + from word_document_server.tools import content_tools + + try: + self._send_notification("word_tool_call", f"Inserting list near text: {target_text}") + result = await content_tools.insert_numbered_list_near_text_tool( + self.document_path, target_text, list_items, position, target_paragraph_index, bullet_type + ) + self._send_notification("word_tool_result", result) + return result + except Exception as e: + return self._handle_error("insert_numbered_list_near_text", e) + + # ===== Category 4: Content Extraction ===== + + async def get_paragraph_text_from_document(self, paragraph_index: int) -> str: + """Get text from specific paragraph""" + from word_document_server.tools import extended_document_tools + + try: + self._send_notification("word_tool_call", f"Getting paragraph {paragraph_index}") + result = await extended_document_tools.get_paragraph_text_from_document(self.document_path, paragraph_index) + self._send_notification("word_tool_result", result) + return result + except Exception as e: + return self._handle_error("get_paragraph_text_from_document", e) + + async def find_text_in_document(self, text_to_find: str, match_case: bool = True, whole_word: bool = False) -> str: + """Find text in document""" + from word_document_server.tools import extended_document_tools + + try: + self._send_notification("word_tool_call", f"Finding text: {text_to_find}") + result = await extended_document_tools.find_text_in_document( + self.document_path, text_to_find, match_case, whole_word + ) + self._send_notification("word_tool_result", result) + return result + except Exception as e: + return self._handle_error("find_text_in_document", e) + + # ===== Category 5: Text Formatting ===== + + async def format_text(self, paragraph_index: int, start_pos: int, end_pos: int, + bold: Optional[bool] = None, italic: Optional[bool] = None, + underline: Optional[bool] = None, color: Optional[str] = None, + font_size: Optional[int] = None, font_name: Optional[str] = None) -> str: + """Format text in paragraph""" + from word_document_server.tools import format_tools + + try: + self._send_notification("word_tool_call", f"Formatting text in paragraph {paragraph_index}") + result = await format_tools.format_text( + self.document_path, paragraph_index, start_pos, end_pos, + bold, italic, underline, color, font_size, font_name + ) + self._send_notification("word_tool_result", result) + return result + except Exception as e: + return self._handle_error("format_text", e) + + async def search_and_replace(self, find_text: str, replace_text: str) -> str: + """Search and replace text""" + from word_document_server.tools import content_tools + + try: + self._send_notification("word_tool_call", f"Searching and replacing: {find_text}") + result = await content_tools.search_and_replace(self.document_path, find_text, replace_text) + self._send_notification("word_tool_result", result) + return result + except Exception as e: + return self._handle_error("search_and_replace", e) + + async def delete_paragraph(self, paragraph_index: int) -> str: + """Delete paragraph from document""" + from word_document_server.tools import content_tools + + try: + self._send_notification("word_tool_call", f"Deleting paragraph {paragraph_index}") + result = await content_tools.delete_paragraph(self.document_path, paragraph_index) + self._send_notification("word_tool_result", result) + return result + except Exception as e: + return self._handle_error("delete_paragraph", e) + + async def create_custom_style(self, style_name: str, bold: Optional[bool] = None, + italic: Optional[bool] = None, font_size: Optional[int] = None, + font_name: Optional[str] = None, color: Optional[str] = None, + base_style: Optional[str] = None) -> str: + """Create custom style""" + from word_document_server.tools import format_tools + + try: + self._send_notification("word_tool_call", f"Creating custom style: {style_name}") + result = await format_tools.create_custom_style( + self.document_path, style_name, bold, italic, font_size, font_name, color, base_style + ) + self._send_notification("word_tool_result", result) + return result + except Exception as e: + return self._handle_error("create_custom_style", e) + + # ===== Category 6: Table Formatting ===== + + async def format_table(self, table_index: int, has_header_row: Optional[bool] = None, + border_style: Optional[str] = None, shading: Optional[List[List[str]]] = None) -> str: + """Format table""" + from word_document_server.tools import format_tools + + try: + self._send_notification("word_tool_call", f"Formatting table {table_index}") + result = await format_tools.format_table( + self.document_path, table_index, has_header_row, border_style, shading + ) + self._send_notification("word_tool_result", result) + return result + except Exception as e: + return self._handle_error("format_table", e) + + async def set_table_cell_shading(self, table_index: int, row_index: int, col_index: int, + fill_color: str, pattern: str = "clear") -> str: + """Set table cell shading""" + from word_document_server.tools import format_tools + + try: + self._send_notification("word_tool_call", f"Setting cell shading for table {table_index}") + result = await format_tools.set_table_cell_shading( + self.document_path, table_index, row_index, col_index, fill_color, pattern + ) + self._send_notification("word_tool_result", result) + return result + except Exception as e: + return self._handle_error("set_table_cell_shading", e) + + async def merge_table_cells(self, table_index: int, start_row: int, start_col: int, + end_row: int, end_col: int) -> str: + """Merge table cells""" + from word_document_server.tools import format_tools + + try: + self._send_notification("word_tool_call", f"Merging cells in table {table_index}") + result = await format_tools.merge_table_cells( + self.document_path, table_index, start_row, start_col, end_row, end_col + ) + self._send_notification("word_tool_result", result) + return result + except Exception as e: + return self._handle_error("merge_table_cells", e) + + async def set_table_cell_alignment(self, table_index: int, row_index: int, col_index: int, + horizontal: str = "left", vertical: str = "top") -> str: + """Set table cell alignment""" + from word_document_server.tools import format_tools + + try: + self._send_notification("word_tool_call", f"Setting cell alignment for table {table_index}") + result = await format_tools.set_table_cell_alignment( + self.document_path, table_index, row_index, col_index, horizontal, vertical + ) + self._send_notification("word_tool_result", result) + return result + except Exception as e: + return self._handle_error("set_table_cell_alignment", e) + + async def set_table_column_width(self, table_index: int, col_index: int, width: float, + width_type: str = "points") -> str: + """Set table column width""" + from word_document_server.tools import format_tools + + try: + self._send_notification("word_tool_call", f"Setting column width for table {table_index}") + result = await format_tools.set_table_column_width( + self.document_path, table_index, col_index, width, width_type + ) + self._send_notification("word_tool_result", result) + return result + except Exception as e: + return self._handle_error("set_table_column_width", e) + + # ===== Category 7: Comment Extraction ===== + + async def get_all_comments(self) -> str: + """Get all comments from document""" + from word_document_server.tools import comment_tools + + try: + self._send_notification("word_tool_call", "Getting all comments") + result = await comment_tools.get_all_comments(self.document_path) + self._send_notification("word_tool_result", result) + return result + except Exception as e: + return self._handle_error("get_all_comments", e) + + async def get_comments_by_author(self, author: str) -> str: + """Get comments by author""" + from word_document_server.tools import comment_tools + + try: + self._send_notification("word_tool_call", f"Getting comments by author: {author}") + result = await comment_tools.get_comments_by_author(self.document_path, author) + self._send_notification("word_tool_result", result) + return result + except Exception as e: + return self._handle_error("get_comments_by_author", e) + + def __repr__(self) -> str: + """String representation""" + return f"WordTools(workspace={self.workspace_dir})" diff --git a/backend/alembic/versions/906ca23fbfe1_add_output_mode_to_works.py b/backend/alembic/versions/906ca23fbfe1_add_output_mode_to_works.py new file mode 100644 index 0000000..af2f8ae --- /dev/null +++ b/backend/alembic/versions/906ca23fbfe1_add_output_mode_to_works.py @@ -0,0 +1,32 @@ +"""add_output_mode_to_works + +Revision ID: 906ca23fbfe1 +Revises: 1db1ebc908e4 +Create Date: 2025-11-30 00:26:33.895695 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = '906ca23fbfe1' +down_revision: Union[str, Sequence[str], None] = '1db1ebc908e4' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Upgrade schema.""" + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('works', sa.Column('output_mode', sa.String(length=20), server_default='markdown', nullable=False)) + # ### end Alembic commands ### + + +def downgrade() -> None: + """Downgrade schema.""" + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column('works', 'output_mode') + # ### end Alembic commands ### diff --git a/backend/main.py b/backend/main.py index 0560d59..a207b05 100644 --- a/backend/main.py +++ b/backend/main.py @@ -37,9 +37,13 @@ async def lifespan(app: FastAPI): logger.info(f"工作空间目录: {get_workspaces_path()}") logger.info(f"模板目录: {get_templates_path()}") + # Word tools are now directly integrated, no MCP initialization needed + logger.info("Word tools ready (direct integration)") + yield # 关闭时执行 logger.info("正在关闭应用...") + shutdown_async_config() logger.info("异步配置已关闭") @@ -90,6 +94,10 @@ async def async_status(): for router in all_routers: app.include_router(router) +# 设置app实例引用,供WebSocket路由使用 +from routers.chat_routes.chat import set_app_instance +set_app_instance(app) + if __name__ == "__main__": # 优化uvicorn配置,提高并发性能 uvicorn.run( diff --git a/backend/models/models.py b/backend/models/models.py index 7858abb..4fd1fd0 100644 --- a/backend/models/models.py +++ b/backend/models/models.py @@ -42,11 +42,12 @@ class ModelConfig(Base): class PaperTemplate(Base): __tablename__ = "paper_templates" - + id = Column(Integer, primary_key=True, index=True) name = Column(String(100), nullable=False) description = Column(Text) category = Column(String(50)) + output_format = Column(String(10), nullable=False, default="markdown") # 输出格式:md, word, latex file_path = Column(String(500), nullable=False) # 模板文件路径 created_at = Column(DateTime(timezone=True), server_default=func.now()) updated_at = Column(DateTime(timezone=True), server_default=func.now(), onupdate=func.now()) @@ -71,6 +72,7 @@ class Work(Base): progress = Column(Integer, default=0) # 进度百分比 (0-100) tags = Column(Text) # 标签,JSON格式存储 template_id = Column(Integer, ForeignKey("paper_templates.id"), nullable=True) # 关联的论文模板ID + output_mode = Column(String(20), nullable=False, default="markdown") # 输出模式:markdown, word, latex created_at = Column(DateTime(timezone=True), server_default=func.now()) updated_at = Column(DateTime(timezone=True), server_default=func.now(), onupdate=func.now()) created_by = Column(Integer, ForeignKey("users.id"), nullable=False) diff --git a/backend/office_word_mcp/.gitignore b/backend/office_word_mcp/.gitignore new file mode 100644 index 0000000..f4fde46 --- /dev/null +++ b/backend/office_word_mcp/.gitignore @@ -0,0 +1,16 @@ +# Project files +.idea +.DS_Store + +# Python-generated files +__pycache__/ +*.py[oc] +build/ +dist/ +wheels/ +*.egg-info + +# Virtual environments +.venv +.env.example +.idea diff --git a/backend/office_word_mcp/Dockerfile b/backend/office_word_mcp/Dockerfile new file mode 100644 index 0000000..666236c --- /dev/null +++ b/backend/office_word_mcp/Dockerfile @@ -0,0 +1,22 @@ +# Generated by https://smithery.ai. See: https://smithery.ai/docs/build/project-config +# syntax=docker/dockerfile:1 + +# Use official Python runtime +FROM python:3.11-slim + +# Set working directory +WORKDIR /app + +# Install build dependencies +RUN apt-get update \ + && apt-get install -y --no-install-recommends build-essential \ + && rm -rf /var/lib/apt/lists/* + +# Copy project files +COPY . /app + +# Install Python dependencies +RUN pip install --no-cache-dir . + +# Default command +ENTRYPOINT ["word_mcp_server"] diff --git a/backend/office_word_mcp/LICENSE b/backend/office_word_mcp/LICENSE new file mode 100644 index 0000000..31323d1 --- /dev/null +++ b/backend/office_word_mcp/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025 GongRzhe + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/backend/office_word_mcp/README.md b/backend/office_word_mcp/README.md new file mode 100644 index 0000000..20b5d25 --- /dev/null +++ b/backend/office_word_mcp/README.md @@ -0,0 +1,394 @@ +# Office-Word-MCP-Server + +[![smithery badge](https://smithery.ai/badge/@GongRzhe/Office-Word-MCP-Server)](https://smithery.ai/server/@GongRzhe/Office-Word-MCP-Server) + +A Model Context Protocol (MCP) server for creating, reading, and manipulating Microsoft Word documents. This server enables AI assistants to work with Word documents through a standardized interface, providing rich document editing capabilities. + + + Office Word Server MCP server + + +![](https://badge.mcpx.dev?type=server "MCP Server") + +## Overview + +Office-Word-MCP-Server implements the [Model Context Protocol](https://modelcontextprotocol.io/) to expose Word document operations as tools and resources. It serves as a bridge between AI assistants and Microsoft Word documents, allowing for document creation, content addition, formatting, and analysis. + +The server features a modular architecture that separates concerns into core functionality, tools, and utilities, making it highly maintainable and extensible for future enhancements. + +### Example + +#### Pormpt + +![image](https://github.com/user-attachments/assets/f49b0bcc-88b2-4509-bf50-995b9a40038c) + +#### Output + +![image](https://github.com/user-attachments/assets/ff64385d-3822-4160-8cdf-f8a484ccc01a) + +## Features + +### Document Management + +- Create new Word documents with metadata +- Extract text and analyze document structure +- View document properties and statistics +- List available documents in a directory +- Create copies of existing documents +- Merge multiple documents into a single document +- Convert Word documents to PDF format + +### Content Creation + +- Add headings with different levels and direct formatting (font, size, bold, italic, borders) +- Insert paragraphs with optional styling and direct formatting (font, size, bold, italic, color) +- Create tables with custom data +- Add images with proportional scaling +- Insert page breaks +- Insert bulleted and numbered lists with proper XML formatting +- Add footnotes and endnotes to documents +- Convert footnotes to endnotes +- Customize footnote and endnote styling +- Create professional table layouts for technical documentation +- Design callout boxes and formatted content for instructional materials +- Build structured data tables for business reports with consistent styling +- Insert content relative to existing text or paragraph indices + +### Rich Text Formatting + +- Format specific text sections (bold, italic, underline) +- Change text color and font properties +- Apply custom styles to text elements +- Search and replace text throughout documents +- Individual cell text formatting within tables +- Multiple formatting combinations for enhanced visual appeal +- Font customization with family and size control +- Direct formatting during content creation (paragraphs and headings) +- Reduce function calls by combining content creation with formatting +- Add section header borders for visual separation + +### Table Formatting + +- Format tables with borders and styles +- Create header rows with distinct formatting +- Apply cell shading and custom borders +- Structure tables for better readability +- Individual cell background shading with color support +- Alternating row colors for improved readability +- Enhanced header row highlighting with custom colors +- Cell text formatting with bold, italic, underline, color, font size, and font family +- Comprehensive color support with named colors and hex color codes +- Cell padding management with independent control of all sides +- Cell alignment (horizontal and vertical positioning) +- Cell merging (horizontal, vertical, and rectangular areas) +- Column width management with multiple units (points, percentage, auto-fit) +- Auto-fit capabilities for dynamic column sizing +- Professional callout table support with icon cells and styled content + +### Advanced Document Manipulation + +- Delete paragraphs +- Insert content relative to specific text or paragraph indices +- Insert bulleted and numbered lists with proper XML numbering structure +- Insert headers and paragraphs before or after target locations +- Create custom document styles +- Apply consistent formatting throughout documents +- Format specific ranges of text with detailed control +- Flexible padding units with support for points and percentage-based measurements +- Clear, readable table presentation with proper alignment and spacing + +### Document Protection + +- Add password protection to documents +- Implement restricted editing with editable sections +- Add digital signatures to documents +- Verify document authenticity and integrity + +### Comment Extraction + +- Extract all comments from a document +- Filter comments by author +- Get comments for specific paragraphs +- Access comment metadata (author, date, text) + +## Installation + +### Installing via Smithery + +To install Office Word Document Server for Claude Desktop automatically via [Smithery](https://smithery.ai/server/@GongRzhe/Office-Word-MCP-Server): + +```bash +npx -y @smithery/cli install @GongRzhe/Office-Word-MCP-Server --client claude +``` + +### Prerequisites + +- Python 3.8 or higher +- pip package manager + +### Basic Installation + +```bash +# Clone the repository +git clone https://github.com/GongRzhe/Office-Word-MCP-Server.git +cd Office-Word-MCP-Server + +# Install dependencies +pip install -r requirements.txt +``` + +### Using the Setup Script + +Alternatively, you can use the provided setup script which handles: + +- Checking prerequisites +- Setting up a virtual environment +- Installing dependencies +- Generating MCP configuration + +```bash +python setup_mcp.py +``` + +## Usage with Claude for Desktop + +### Configuration + +#### Method 1: After Local Installation + +1. After installation, add the server to your Claude for Desktop configuration file: + +```json +{ + "mcpServers": { + "word-document-server": { + "command": "python", + "args": ["/path/to/word_mcp_server.py"] + } + } +} +``` + +#### Method 2: Without Installation (Using uvx) + +1. You can also configure Claude for Desktop to use the server without local installation by using the uvx package manager: + +```json +{ + "mcpServers": { + "word-document-server": { + "command": "uvx", + "args": ["--from", "office-word-mcp-server", "word_mcp_server"] + } + } +} +``` + +2. Configuration file locations: + + - macOS: `~/Library/Application Support/Claude/claude_desktop_config.json` + - Windows: `%APPDATA%\Claude\claude_desktop_config.json` + +3. Restart Claude for Desktop to load the configuration. + +### Example Operations + +Once configured, you can ask Claude to perform operations like: + +- "Create a new document called 'report.docx' with a title page" +- "Add a heading and three paragraphs to my document" +- "Add my name in Helvetica 36pt bold at the top of the document" +- "Add a section heading 'Summary' in Helvetica 14pt bold with a bottom border" +- "Add a paragraph in Times New Roman 14pt with italic blue text" +- "Insert a bulleted list after the paragraph containing 'Introduction'" +- "Insert a numbered list with items: 'First step', 'Second step', 'Third step'" +- "Add bullet points after the 'Summary' heading" +- "Insert a 4x4 table with sales data" +- "Format the word 'important' in paragraph 2 to be bold and red" +- "Search and replace all instances of 'old term' with 'new term'" +- "Create a custom style for section headings" +- "Apply formatting to the table in my document" +- "Extract all comments from my document" +- "Show me all comments by John Doe" +- "Get comments for paragraph 3" +- "Make the text in table cell (1,2) bold and blue with 14pt font" +- "Add 10 points of padding to all sides of the header cells" +- "Create a callout table with a blue checkmark icon and white text" +- "Set the first column width to 50 points and auto-fit the remaining columns" +- "Apply alternating row colors to make the table more readable" + + +## API Reference + +### Document Creation and Properties + +```python +create_document(filename, title=None, author=None) +get_document_info(filename) +get_document_text(filename) +get_document_outline(filename) +list_available_documents(directory=".") +copy_document(source_filename, destination_filename=None) +convert_to_pdf(filename, output_filename=None) +``` + +### Content Addition + +```python +add_heading(filename, text, level=1, font_name=None, font_size=None, + bold=None, italic=None, border_bottom=False) +add_paragraph(filename, text, style=None, font_name=None, font_size=None, + bold=None, italic=None, color=None) +add_table(filename, rows, cols, data=None) +add_picture(filename, image_path, width=None) +add_page_break(filename) +``` + +### Advanced Content Manipulation + +```python +# Insert content relative to existing text or paragraph index +insert_header_near_text(filename, target_text=None, header_title=None, + position='after', header_style='Heading 1', + target_paragraph_index=None) + +insert_line_or_paragraph_near_text(filename, target_text=None, line_text=None, + position='after', line_style=None, + target_paragraph_index=None) + +# Insert bulleted or numbered lists with proper XML formatting +insert_numbered_list_near_text(filename, target_text=None, list_items=None, + position='after', target_paragraph_index=None, + bullet_type='bullet') +# bullet_type options: +# 'bullet' - Creates bulleted list with bullets (•) +# 'number' - Creates numbered list (1, 2, 3, ...) +``` + +### Content Extraction + +```python +get_document_text(filename) +get_paragraph_text_from_document(filename, paragraph_index) +find_text_in_document(filename, text_to_find, match_case=True, whole_word=False) +``` + +### Text Formatting + +```python +format_text(filename, paragraph_index, start_pos, end_pos, bold=None, + italic=None, underline=None, color=None, font_size=None, font_name=None) +search_and_replace(filename, find_text, replace_text) +delete_paragraph(filename, paragraph_index) +create_custom_style(filename, style_name, bold=None, italic=None, + font_size=None, font_name=None, color=None, base_style=None) +``` + +### Table Formatting + +```python +format_table(filename, table_index, has_header_row=None, + border_style=None, shading=None) +set_table_cell_shading(filename, table_index, row_index, col_index, + fill_color, pattern="clear") +apply_table_alternating_rows(filename, table_index, + color1="FFFFFF", color2="F2F2F2") +highlight_table_header(filename, table_index, + header_color="4472C4", text_color="FFFFFF") + +# Cell merging tools +merge_table_cells(filename, table_index, start_row, start_col, end_row, end_col) +merge_table_cells_horizontal(filename, table_index, row_index, start_col, end_col) +merge_table_cells_vertical(filename, table_index, col_index, start_row, end_row) + +# Cell alignment tools +set_table_cell_alignment(filename, table_index, row_index, col_index, + horizontal="left", vertical="top") +set_table_alignment_all(filename, table_index, + horizontal="left", vertical="top") + +# Cell text formatting tools +format_table_cell_text(filename, table_index, row_index, col_index, + text_content=None, bold=None, italic=None, underline=None, + color=None, font_size=None, font_name=None) + +# Cell padding tools +set_table_cell_padding(filename, table_index, row_index, col_index, + top=None, bottom=None, left=None, right=None, unit="points") + +# Column width management +set_table_column_width(filename, table_index, col_index, width, width_type="points") +set_table_column_widths(filename, table_index, widths, width_type="points") +set_table_width(filename, table_index, width, width_type="points") +auto_fit_table_columns(filename, table_index) +``` + +### Comment Extraction + +```python +get_all_comments(filename) +get_comments_by_author(filename, author) +get_comments_for_paragraph(filename, paragraph_index) +``` + +## Troubleshooting + +### Common Issues + +1. **Missing Styles** + + - Some documents may lack required styles for heading and table operations + - The server will attempt to create missing styles or use direct formatting + - For best results, use templates with standard Word styles + +2. **Permission Issues** + + - Ensure the server has permission to read/write to the document paths + - Use the `copy_document` function to create editable copies of locked documents + - Check file ownership and permissions if operations fail + +3. **Image Insertion Problems** + - Use absolute paths for image files + - Verify image format compatibility (JPEG, PNG recommended) + - Check image file size and permissions + +4. **Table Formatting Issues** + + - **Cell index errors**: Ensure row and column indices are within table bounds (0-based indexing) + - **Color format problems**: Use hex colors without '#' prefix (e.g., "FF0000" for red) or standard color names + - **Padding unit confusion**: Specify "points" or "percent" explicitly when setting cell padding + - **Column width conflicts**: Auto-fit may override manual column width settings + - **Text formatting persistence**: Apply cell text formatting after setting cell content for best results + +### Debugging + +Enable detailed logging by setting the environment variable: + +```bash +export MCP_DEBUG=1 # Linux/macOS +set MCP_DEBUG=1 # Windows +``` + +## Contributing + +Contributions are welcome! Please feel free to submit a Pull Request. + +1. Fork the repository +2. Create your feature branch (`git checkout -b feature/amazing-feature`) +3. Commit your changes (`git commit -m 'Add some amazing feature'`) +4. Push to the branch (`git push origin feature/amazing-feature`) +5. Open a Pull Request + +## License + +This project is licensed under the MIT License - see the LICENSE file for details. + +## Acknowledgments + +- [Model Context Protocol](https://modelcontextprotocol.io/) for the protocol specification +- [python-docx](https://python-docx.readthedocs.io/) for Word document manipulation +- [FastMCP](https://github.com/modelcontextprotocol/python-sdk) for the Python MCP implementation + +--- + +_Note: This server interacts with document files on your system. Always verify that requested operations are appropriate before confirming them in Claude for Desktop or other MCP clients._ diff --git a/backend/office_word_mcp/RENDER_DEPLOYMENT.md b/backend/office_word_mcp/RENDER_DEPLOYMENT.md new file mode 100644 index 0000000..70b1270 --- /dev/null +++ b/backend/office_word_mcp/RENDER_DEPLOYMENT.md @@ -0,0 +1,59 @@ +# Render Deployment Guide + +This document explains how to deploy the Office Word MCP Server on Render. + +## Required Environment Variables + +Set the following environment variables in your Render service: + +### `MCP_TRANSPORT` +- **Value**: `sse` +- **Description**: Sets the transport type to Server-Sent Events (SSE) for HTTP communication +- **Required**: Yes (for Render deployment) + +### `MCP_HOST` +- **Value**: `0.0.0.0` +- **Description**: Binds the server to all network interfaces +- **Required**: No (defaults to 0.0.0.0) + +### `FASTMCP_LOG_LEVEL` +- **Value**: `INFO` +- **Description**: Sets the logging level for FastMCP +- **Required**: No (defaults to INFO) + +## How to Set Environment Variables + +1. Go to your Render dashboard: https://dashboard.render.com +2. Navigate to your service: `Office-Word-MCP-Server` +3. Click on "Environment" in the left sidebar +4. Add the environment variable: + - Key: `MCP_TRANSPORT` + - Value: `sse` +5. Click "Save Changes" + +## Deployment + +After setting the environment variables: +1. Render will automatically redeploy your service +2. The server will start with SSE transport on the port provided by Render +3. Access your server at: `https://office-word-mcp-server-bzlp.onrender.com/sse` + +## Health Check Endpoint + +The FastMCP server with SSE transport automatically provides a health check endpoint at: +- `https://your-service.onrender.com/health` + +## Troubleshooting + +### Server exits with status 1 +- **Cause**: Server is running in STDIO mode instead of SSE +- **Fix**: Ensure `MCP_TRANSPORT=sse` is set in environment variables + +### Port binding errors +- **Cause**: Server not using Render's PORT environment variable +- **Fix**: This has been fixed in the latest version of main.py + +### Cannot connect to server +- **Cause**: Health checks failing +- **Fix**: Ensure SSE transport is enabled and server is listening on 0.0.0.0 + diff --git a/backend/office_word_mcp/__init__.py b/backend/office_word_mcp/__init__.py new file mode 100644 index 0000000..d9b86b6 --- /dev/null +++ b/backend/office_word_mcp/__init__.py @@ -0,0 +1,4 @@ +"""Office Word MCP Server package entry point.""" +from word_document_server.main import run_server + +__all__ = ["run_server"] diff --git a/backend/office_word_mcp/mcp-config.json b/backend/office_word_mcp/mcp-config.json new file mode 100644 index 0000000..246a3ef --- /dev/null +++ b/backend/office_word_mcp/mcp-config.json @@ -0,0 +1,14 @@ +{ + "mcpServers": { + "word-document-server": { + "command": "/Users/gongzhe/GitRepos/Office-Word-MCP-Server/.venv/bin/python", + "args": [ + "/Users/gongzhe/GitRepos/Office-Word-MCP-Server/word_mcp_server.py" + ], + "env": { + "PYTHONPATH": "/Users/gongzhe/GitRepos/Office-Word-MCP-Server", + "MCP_TRANSPORT": "stdio" + } + } + } +} \ No newline at end of file diff --git a/backend/office_word_mcp/office_word_mcp_server/__init__.py b/backend/office_word_mcp/office_word_mcp_server/__init__.py new file mode 100644 index 0000000..d71a049 --- /dev/null +++ b/backend/office_word_mcp/office_word_mcp_server/__init__.py @@ -0,0 +1,3 @@ +from word_document_server.main import run_server + +__all__ = ["run_server"] diff --git a/backend/office_word_mcp/pyproject.toml b/backend/office_word_mcp/pyproject.toml new file mode 100644 index 0000000..d19fe97 --- /dev/null +++ b/backend/office_word_mcp/pyproject.toml @@ -0,0 +1,40 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "office-word-mcp-server" +version = "1.1.10" +description = "MCP server for manipulating Microsoft Word documents" +readme = "README.md" +license = {file = "LICENSE"} +authors = [ + {name = "GongRzhe", email = "gongrzhe@gmail.com"} +] +classifiers = [ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", +] +requires-python = ">=3.11" +dependencies = [ + "python-docx>=1.1.2", + "fastmcp>=2.8.1", + "msoffcrypto-tool>=5.4.2", + "docx2pdf>=0.1.8", + "pytest>=8.4.2", +] + +[project.urls] +"Homepage" = "https://github.com/GongRzhe/Office-Word-MCP-Server.git" +"Bug Tracker" = "https://github.com/GongRzhe/Office-Word-MCP-Server.git/issues" + +[tool.hatch.build.targets.wheel] +only-include = [ + "word_document_server", + "office_word_mcp_server", +] +sources = ["."] + +[project.scripts] +word_mcp_server = "word_document_server.main:run_server" diff --git a/backend/office_word_mcp/requirements.txt b/backend/office_word_mcp/requirements.txt new file mode 100644 index 0000000..7077dbb --- /dev/null +++ b/backend/office_word_mcp/requirements.txt @@ -0,0 +1,5 @@ +fastmcp +python-docx +msoffcrypto-tool +docx2pdf +python-dotenv \ No newline at end of file diff --git a/backend/office_word_mcp/setup_mcp.py b/backend/office_word_mcp/setup_mcp.py new file mode 100644 index 0000000..f0caa5c --- /dev/null +++ b/backend/office_word_mcp/setup_mcp.py @@ -0,0 +1,524 @@ +# Import necessary Python standard libraries +import os +import json +import subprocess +import sys +import shutil +import platform + +def check_prerequisites(): + """ + Check if necessary prerequisites are installed + + Returns: + tuple: (python_ok, uv_installed, uvx_installed, word_server_installed) + """ + # Check Python version + python_version = sys.version_info + python_ok = python_version.major >= 3 and python_version.minor >= 8 + + # Check if uv/uvx is installed + uv_installed = shutil.which("uv") is not None + uvx_installed = shutil.which("uvx") is not None + + # Check if word-document-server is already installed via pip + try: + result = subprocess.run( + [sys.executable, "-m", "pip", "show", "word-document-server"], + capture_output=True, + text=True, + check=False + ) + word_server_installed = result.returncode == 0 + except Exception: + word_server_installed = False + + return (python_ok, uv_installed, uvx_installed, word_server_installed) + +def get_transport_choice(): + """ + Ask user to choose transport type + + Returns: + dict: Transport configuration + """ + print("\nTransport Configuration:") + print("1. STDIO (default, local execution)") + print("2. Streamable HTTP (modern, recommended for web deployment)") + print("3. SSE (Server-Sent Events, for compatibility)") + + choice = input("\nSelect transport type (1-3, default: 1): ").strip() + + if choice == "2": + host = input("Host (default: 127.0.0.1): ").strip() or "127.0.0.1" + port = input("Port (default: 8000): ").strip() or "8000" + path = input("Path (default: /mcp): ").strip() or "/mcp" + + return { + "transport": "streamable-http", + "host": host, + "port": port, + "path": path + } + elif choice == "3": + host = input("Host (default: 127.0.0.1): ").strip() or "127.0.0.1" + port = input("Port (default: 8000): ").strip() or "8000" + sse_path = input("SSE Path (default: /sse): ").strip() or "/sse" + + return { + "transport": "sse", + "host": host, + "port": port, + "sse_path": sse_path + } + else: + # Default to stdio + return { + "transport": "stdio" + } + +def setup_venv(): + """ + Function to set up Python virtual environment + + Features: + - Checks if Python version meets requirements (3.8+) + - Creates Python virtual environment (if it doesn't exist) + - Installs required dependencies in the newly created virtual environment + + No parameters required + + Returns: Path to Python interpreter in the virtual environment + """ + # Check Python version + python_version = sys.version_info + if python_version.major < 3 or (python_version.major == 3 and python_version.minor < 8): + print("Error: Python 3.8 or higher is required.") + sys.exit(1) + + # Get absolute path of the directory containing the current script + base_path = os.path.abspath(os.path.dirname(__file__)) + # Set virtual environment directory path + venv_path = os.path.join(base_path, '.venv') + + # Determine pip and python executable paths based on operating system + is_windows = platform.system() == "Windows" + if is_windows: + pip_path = os.path.join(venv_path, 'Scripts', 'pip.exe') + python_path = os.path.join(venv_path, 'Scripts', 'python.exe') + else: + pip_path = os.path.join(venv_path, 'bin', 'pip') + python_path = os.path.join(venv_path, 'bin', 'python') + + # Check if virtual environment already exists and is valid + venv_exists = os.path.exists(venv_path) + pip_exists = os.path.exists(pip_path) + + if not venv_exists or not pip_exists: + print("Creating new virtual environment...") + # Remove existing venv if it's invalid + if venv_exists and not pip_exists: + print("Existing virtual environment is incomplete, recreating it...") + try: + shutil.rmtree(venv_path) + except Exception as e: + print(f"Warning: Could not remove existing virtual environment: {e}") + print("Please delete the .venv directory manually and try again.") + sys.exit(1) + + # Create virtual environment + try: + subprocess.run([sys.executable, '-m', 'venv', venv_path], check=True) + print("Virtual environment created successfully!") + except subprocess.CalledProcessError as e: + print(f"Error creating virtual environment: {e}") + sys.exit(1) + else: + print("Valid virtual environment already exists.") + + # Double-check that pip exists after creating venv + if not os.path.exists(pip_path): + print(f"Error: pip executable not found at {pip_path}") + print("Try creating the virtual environment manually with: python -m venv .venv") + sys.exit(1) + + # Install or update dependencies + print("\nInstalling requirements...") + try: + # Install FastMCP package (standalone library) + subprocess.run([pip_path, 'install', 'fastmcp'], check=True) + # Install python-docx package + subprocess.run([pip_path, 'install', 'python-docx'], check=True) + + # Also install dependencies from requirements.txt if it exists + requirements_path = os.path.join(base_path, 'requirements.txt') + if os.path.exists(requirements_path): + subprocess.run([pip_path, 'install', '-r', requirements_path], check=True) + + print("Requirements installed successfully!") + except subprocess.CalledProcessError as e: + print(f"Error installing requirements: {e}") + sys.exit(1) + except FileNotFoundError: + print(f"Error: Could not execute {pip_path}") + print("Try activating the virtual environment manually and installing requirements:") + if is_windows: + print(f".venv\\Scripts\\activate") + else: + print("source .venv/bin/activate") + print("pip install mcp[cli] python-docx") + sys.exit(1) + + return python_path + +def generate_mcp_config_local(python_path, transport_config): + """ + Generate MCP configuration for locally installed word-document-server + + Parameters: + - python_path: Path to Python interpreter in the virtual environment + - transport_config: Transport configuration dictionary + + Returns: Path to the generated config file + """ + # Get absolute path of the directory containing the current script + base_path = os.path.abspath(os.path.dirname(__file__)) + + # Path to Word Document Server script + server_script_path = os.path.join(base_path, 'word_mcp_server.py') + + # Build environment variables + env = { + "PYTHONPATH": base_path, + "MCP_TRANSPORT": transport_config["transport"] + } + + # Add transport-specific environment variables + if transport_config["transport"] == "streamable-http": + env.update({ + "MCP_HOST": transport_config["host"], + "MCP_PORT": transport_config["port"], + "MCP_PATH": transport_config["path"] + }) + elif transport_config["transport"] == "sse": + env.update({ + "MCP_HOST": transport_config["host"], + "MCP_PORT": transport_config["port"], + "MCP_SSE_PATH": transport_config["sse_path"] + }) + # For stdio transport, no additional environment variables needed + + # Create MCP configuration dictionary + config = { + "mcpServers": { + "word-document-server": { + "command": python_path, + "args": [server_script_path], + "env": env + } + } + } + + # Save configuration to JSON file + config_path = os.path.join(base_path, 'mcp-config.json') + with open(config_path, 'w') as f: + json.dump(config, f, indent=2) + + return config_path + +def generate_mcp_config_uvx(transport_config): + """ + Generate MCP configuration for PyPI-installed word-document-server using UVX + + Parameters: + - transport_config: Transport configuration dictionary + + Returns: Path to the generated config file + """ + # Get absolute path of the directory containing the current script + base_path = os.path.abspath(os.path.dirname(__file__)) + + # Build environment variables + env = { + "MCP_TRANSPORT": transport_config["transport"] + } + + # Add transport-specific environment variables + if transport_config["transport"] == "streamable-http": + env.update({ + "MCP_HOST": transport_config["host"], + "MCP_PORT": transport_config["port"], + "MCP_PATH": transport_config["path"] + }) + elif transport_config["transport"] == "sse": + env.update({ + "MCP_HOST": transport_config["host"], + "MCP_PORT": transport_config["port"], + "MCP_SSE_PATH": transport_config["sse_path"] + }) + # For stdio transport, no additional environment variables needed + + # Create MCP configuration dictionary + config = { + "mcpServers": { + "word-document-server": { + "command": "uvx", + "args": ["--from", "word-mcp-server", "word_mcp_server"], + "env": env + } + } + } + + # Save configuration to JSON file + config_path = os.path.join(base_path, 'mcp-config.json') + with open(config_path, 'w') as f: + json.dump(config, f, indent=2) + + return config_path + +def generate_mcp_config_module(transport_config): + """ + Generate MCP configuration for PyPI-installed word-document-server using Python module + + Parameters: + - transport_config: Transport configuration dictionary + + Returns: Path to the generated config file + """ + # Get absolute path of the directory containing the current script + base_path = os.path.abspath(os.path.dirname(__file__)) + + # Build environment variables + env = { + "MCP_TRANSPORT": transport_config["transport"] + } + + # Add transport-specific environment variables + if transport_config["transport"] == "streamable-http": + env.update({ + "MCP_HOST": transport_config["host"], + "MCP_PORT": transport_config["port"], + "MCP_PATH": transport_config["path"] + }) + elif transport_config["transport"] == "sse": + env.update({ + "MCP_HOST": transport_config["host"], + "MCP_PORT": transport_config["port"], + "MCP_SSE_PATH": transport_config["sse_path"] + }) + + + # Create MCP configuration dictionary + config = { + "mcpServers": { + "word-document-server": { + "command": sys.executable, + "args": ["-m", "word_document_server"], + "env": env + } + } + } + + # Save configuration to JSON file + config_path = os.path.join(base_path, 'mcp-config.json') + with open(config_path, 'w') as f: + json.dump(config, f, indent=2) + + return config_path + +def install_from_pypi(): + """ + Install word-document-server from PyPI + + Returns: True if successful, False otherwise + """ + print("\nInstalling word-document-server from PyPI...") + try: + subprocess.run([sys.executable, "-m", "pip", "install", "word-mcp-server"], check=True) + print("word-mcp-server successfully installed from PyPI!") + return True + except subprocess.CalledProcessError: + print("Failed to install word-mcp-server from PyPI.") + return False + +def print_config_instructions(config_path, transport_config): + """ + Print instructions for using the generated config + + Parameters: + - config_path: Path to the generated config file + - transport_config: Transport configuration dictionary + """ + print(f"\nMCP configuration has been written to: {config_path}") + + with open(config_path, 'r') as f: + config = json.load(f) + + print("\nMCP configuration for Claude Desktop:") + print(json.dumps(config, indent=2)) + + # Print transport-specific instructions + if transport_config["transport"] == "streamable-http": + print(f"\n📡 Streamable HTTP Transport Configuration:") + print(f" Server will be accessible at: http://{transport_config['host']}:{transport_config['port']}{transport_config['path']}") + print(f" \n To test the server manually:") + print(f" curl -X POST http://{transport_config['host']}:{transport_config['port']}{transport_config['path']}") + + elif transport_config["transport"] == "sse": + print(f"\n📡 SSE Transport Configuration:") + print(f" Server will be accessible at: http://{transport_config['host']}:{transport_config['port']}{transport_config['sse_path']}") + print(f" \n To test the server manually:") + print(f" curl http://{transport_config['host']}:{transport_config['port']}{transport_config['sse_path']}") + + else: # stdio + print(f"\n💻 STDIO Transport Configuration:") + print(f" Server runs locally with standard input/output") + + # Provide instructions for adding configuration to Claude Desktop configuration file + if platform.system() == "Windows": + claude_config_path = os.path.expandvars("%APPDATA%\\Claude\\claude_desktop_config.json") + else: # macOS + claude_config_path = os.path.expanduser("~/Library/Application Support/Claude/claude_desktop_config.json") + + print(f"\nTo use with Claude Desktop, merge this configuration into: {claude_config_path}") + +def create_package_structure(): + """ + Create necessary package structure and environment files + """ + # Get absolute path of the directory containing the current script + base_path = os.path.abspath(os.path.dirname(__file__)) + + # Create __init__.py file + init_path = os.path.join(base_path, '__init__.py') + if not os.path.exists(init_path): + with open(init_path, 'w') as f: + f.write('# Word Document MCP Server') + print(f"Created __init__.py at: {init_path}") + + # Create requirements.txt file + requirements_path = os.path.join(base_path, 'requirements.txt') + if not os.path.exists(requirements_path): + with open(requirements_path, 'w') as f: + f.write('fastmcp\npython-docx\nmsoffcrypto-tool\ndocx2pdf\nhttpx\ncryptography\n') + print(f"Created requirements.txt at: {requirements_path}") + + # Create .env.example file + env_example_path = os.path.join(base_path, '.env.example') + if not os.path.exists(env_example_path): + with open(env_example_path, 'w') as f: + f.write("""# Transport Configuration +# Valid options: stdio, streamable-http, sse +MCP_TRANSPORT=stdio + +# HTTP/SSE Configuration (when not using stdio) +MCP_HOST=127.0.0.1 +MCP_PORT=8000 + +# Streamable HTTP specific +MCP_PATH=/mcp + +# SSE specific +MCP_SSE_PATH=/sse + +""") + print(f"Created .env.example at: {env_example_path}") + +# Main execution entry point +if __name__ == '__main__': + # Check prerequisites + python_ok, uv_installed, uvx_installed, word_server_installed = check_prerequisites() + + if not python_ok: + print("Error: Python 3.8 or higher is required.") + sys.exit(1) + + print("Word Document MCP Server Setup (Multi-Transport)") + print("===============================================\n") + + # Create necessary files + create_package_structure() + + # Get transport configuration + transport_config = get_transport_choice() + + # If word-document-server is already installed, offer config options + if word_server_installed: + print("word-document-server is already installed via pip.") + + if uvx_installed: + print("\nOptions:") + print("1. Generate MCP config for UVX (recommended)") + print("2. Generate MCP config for Python module") + print("3. Set up local development environment") + + choice = input("\nEnter your choice (1-3): ") + + if choice == "1": + config_path = generate_mcp_config_uvx(transport_config) + print_config_instructions(config_path, transport_config) + elif choice == "2": + config_path = generate_mcp_config_module(transport_config) + print_config_instructions(config_path, transport_config) + elif choice == "3": + python_path = setup_venv() + config_path = generate_mcp_config_local(python_path, transport_config) + print_config_instructions(config_path, transport_config) + else: + print("Invalid choice. Exiting.") + sys.exit(1) + else: + print("\nOptions:") + print("1. Generate MCP config for Python module") + print("2. Set up local development environment") + + choice = input("\nEnter your choice (1-2): ") + + if choice == "1": + config_path = generate_mcp_config_module(transport_config) + print_config_instructions(config_path, transport_config) + elif choice == "2": + python_path = setup_venv() + config_path = generate_mcp_config_local(python_path, transport_config) + print_config_instructions(config_path, transport_config) + else: + print("Invalid choice. Exiting.") + sys.exit(1) + + # If word-document-server is not installed, offer installation options + else: + print("word-document-server is not installed.") + + print("\nOptions:") + print("1. Install from PyPI (recommended)") + print("2. Set up local development environment") + + choice = input("\nEnter your choice (1-2): ") + + if choice == "1": + if install_from_pypi(): + if uvx_installed: + print("\nNow generating MCP config for UVX...") + config_path = generate_mcp_config_uvx(transport_config) + else: + print("\nUVX not found. Generating MCP config for Python module...") + config_path = generate_mcp_config_module(transport_config) + print_config_instructions(config_path, transport_config) + elif choice == "2": + python_path = setup_venv() + config_path = generate_mcp_config_local(python_path, transport_config) + print_config_instructions(config_path, transport_config) + else: + print("Invalid choice. Exiting.") + sys.exit(1) + + print("\nSetup complete! You can now use the Word Document MCP server with compatible clients like Claude Desktop.") + print("\nTransport Summary:") + print(f" - Transport: {transport_config['transport']}") + if transport_config['transport'] != 'stdio': + print(f" - Host: {transport_config.get('host', 'N/A')}") + print(f" - Port: {transport_config.get('port', 'N/A')}") + if transport_config['transport'] == 'streamable-http': + print(f" - Path: {transport_config.get('path', 'N/A')}") + elif transport_config['transport'] == 'sse': + print(f" - SSE Path: {transport_config.get('sse_path', 'N/A')}") \ No newline at end of file diff --git a/backend/office_word_mcp/smithery.yaml b/backend/office_word_mcp/smithery.yaml new file mode 100644 index 0000000..4971951 --- /dev/null +++ b/backend/office_word_mcp/smithery.yaml @@ -0,0 +1,13 @@ +# Smithery configuration file: https://smithery.ai/docs/build/project-config + +startCommand: + type: stdio + configSchema: + # JSON Schema defining the configuration options for the MCP. + type: object + description: No configuration options required + commandFunction: + # A JS function that produces the CLI command based on the given config to start the MCP on stdio. + |- + (config) => ({command:'word_mcp_server', args:[]}) + exampleConfig: {} diff --git a/backend/office_word_mcp/test_formatting.py b/backend/office_word_mcp/test_formatting.py new file mode 100644 index 0000000..f29f932 --- /dev/null +++ b/backend/office_word_mcp/test_formatting.py @@ -0,0 +1,108 @@ +""" +Test script for add_paragraph and add_heading formatting parameters. +""" +import asyncio +from docx import Document +from word_document_server.tools.content_tools import add_paragraph, add_heading +from word_document_server.tools.document_tools import create_document + + +async def test_formatting(): + """Test the new formatting parameters.""" + test_doc = 'test_formatting.docx' + + # Create test document + print("Creating test document...") + await create_document(test_doc, title="Formatting Test", author="Test Script") + + # Test 1: Name with large font + print("Test 1: Adding name with large Helvetica 36pt bold...") + result = await add_paragraph( + test_doc, + "JAMES MEHORTER", + font_name="Helvetica", + font_size=36, + bold=True + ) + print(f" Result: {result}") + + # Test 2: Title line + print("Test 2: Adding title with Helvetica 14pt...") + result = await add_paragraph( + test_doc, + "Principal Software Engineer | Technical Team Lead", + font_name="Helvetica", + font_size=14 + ) + print(f" Result: {result}") + + # Test 3: Section header with border + print("Test 3: Adding section header with border...") + result = await add_heading( + test_doc, + "PROFESSIONAL SUMMARY", + level=2, + font_name="Helvetica", + font_size=14, + bold=True, + border_bottom=True + ) + print(f" Result: {result}") + + # Test 4: Body text in Times New Roman + print("Test 4: Adding body text in Times New Roman 14pt...") + result = await add_paragraph( + test_doc, + "This is body text that should be in Times New Roman at 14pt. " + "It demonstrates the ability to apply different fonts to different paragraphs.", + font_name="Times New Roman", + font_size=14 + ) + print(f" Result: {result}") + + # Test 5: Another section header + print("Test 5: Adding another section header with border...") + result = await add_heading( + test_doc, + "SKILLS", + level=2, + font_name="Helvetica", + font_size=14, + bold=True, + border_bottom=True + ) + print(f" Result: {result}") + + # Test 6: Italic text with color + print("Test 6: Adding italic text with color...") + result = await add_paragraph( + test_doc, + "This text is italic and colored blue.", + font_name="Arial", + font_size=12, + italic=True, + color="0000FF" + ) + print(f" Result: {result}") + + print(f"\n✅ Test document created: {test_doc}") + + # Verify formatting + print("\nVerifying formatting...") + verify_doc = Document(test_doc) + for i, para in enumerate(verify_doc.paragraphs): + if para.runs: + run = para.runs[0] + text_preview = para.text[:50] + "..." if len(para.text) > 50 else para.text + print(f"\nParagraph {i}: {text_preview}") + print(f" Font: {run.font.name}") + print(f" Size: {run.font.size}") + print(f" Bold: {run.font.bold}") + print(f" Italic: {run.font.italic}") + + print("\n✅ All tests completed successfully!") + print(f"Open {test_doc} in Word to verify the formatting visually.") + + +if __name__ == "__main__": + asyncio.run(test_formatting()) diff --git a/backend/office_word_mcp/tests/test_convert_to_pdf.py b/backend/office_word_mcp/tests/test_convert_to_pdf.py new file mode 100644 index 0000000..c692fc8 --- /dev/null +++ b/backend/office_word_mcp/tests/test_convert_to_pdf.py @@ -0,0 +1,84 @@ +import asyncio +from pathlib import Path + +import pytest +from docx import Document + +# Target for testing: convert_to_pdf (async function) +from word_document_server.tools.extended_document_tools import convert_to_pdf + + +def _make_sample_docx(path: Path) -> None: + """Generates a simple .docx file in a temporary directory.""" + doc = Document() + doc.add_heading("Conversion Test Document", level=1) + doc.add_paragraph("This is a test paragraph for PDF conversion. Contains ASCII too.") + doc.add_paragraph("Second paragraph: Contains special characters and spaces to cover path/content edge cases.") + doc.save(path) + + +def test_convert_to_pdf_with_temp_docx(tmp_path: Path): + """ + End-to-end test: Create a temporary .docx -> call convert_to_pdf -> validate the PDF output. + + Notes: + - On Linux/macOS, it first tries LibreOffice (soffice/libreoffice), + and falls back to docx2pdf on failure (requires Microsoft Word). + - If these tools are missing or the command is unavailable, the test is skipped with a reason. + """ + # 1) Generate a docx file with spaces in its name in the temp directory + src_doc = tmp_path / "sample document with spaces.docx" + _make_sample_docx(src_doc) + + # 2) Define the output PDF path (also in the temp directory) + out_pdf = tmp_path / "converted output.pdf" + + # 3) Run the asynchronous function under test + result_msg = asyncio.run(convert_to_pdf(str(src_doc), output_filename=str(out_pdf))) + + # 4) Success condition: the return message contains success keywords, or the target PDF exists + success_keywords = ["successfully converted", "converted to PDF"] + success = any(k.lower() in result_msg.lower() for k in success_keywords) or out_pdf.exists() + + if not success: + # When LibreOffice or Microsoft Word is not installed, the tool returns a hint. + # In this case, skip the test instead of failing. + pytest.skip(f"PDF conversion tool unavailable or conversion failed: {result_msg}") + + # 5) Assert: The PDF file was generated and is not empty + # Some environments (especially docx2pdf) might ignore the exact output filename + # and just generate a PDF with the same name as the source in the output or source directory, + # so we check multiple possible locations. + candidates = [ + out_pdf, + # Common: A PDF with the same name as the source file in the output directory + out_pdf.parent / f"{src_doc.stem}.pdf", + # Fallback: A PDF in the same directory as the source file + src_doc.with_suffix(".pdf"), + ] + + # If none of the above paths exist, search for any newly generated PDF in the temp directory + found = None + for p in candidates: + if p.exists(): + found = p + break + if not found: + pdfs = sorted(tmp_path.glob("*.pdf"), key=lambda p: p.stat().st_mtime, reverse=True) + if pdfs: + found = pdfs[0] + + if not found: + # If the tool returns success but the output can't be found, + # treat it as an environment/tooling difference and skip instead of failing. + pytest.skip(f"Could not find the generated PDF. Function output: {result_msg}") + + assert found.exists(), f"Generated PDF not found: {found}, function output: {result_msg}" + assert found.stat().st_size > 0, f"The generated PDF file is empty: {found}" + + +if __name__ == "__main__": + # Allow running this file directly for quick verification: + # python tests/test_convert_to_pdf.py + import sys + sys.exit(pytest.main([__file__, "-q"])) diff --git a/backend/office_word_mcp/uv.lock b/backend/office_word_mcp/uv.lock new file mode 100644 index 0000000..baa64d4 --- /dev/null +++ b/backend/office_word_mcp/uv.lock @@ -0,0 +1,639 @@ +version = 1 +revision = 2 +requires-python = ">=3.11" + +[[package]] +name = "annotated-types" +version = "0.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ee/67/531ea369ba64dcff5ec9c3402f9f51bf748cec26dde048a2f973a4eea7f5/annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89", size = 16081, upload_time = "2024-05-20T21:33:25.928Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643, upload_time = "2024-05-20T21:33:24.1Z" }, +] + +[[package]] +name = "anyio" +version = "4.9.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "idna" }, + { name = "sniffio" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/95/7d/4c1bd541d4dffa1b52bd83fb8527089e097a106fc90b467a7313b105f840/anyio-4.9.0.tar.gz", hash = "sha256:673c0c244e15788651a4ff38710fea9675823028a6f08a5eda409e0c9840a028", size = 190949, upload_time = "2025-03-17T00:02:54.77Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a1/ee/48ca1a7c89ffec8b6a0c5d02b89c305671d5ffd8d3c94acf8b8c408575bb/anyio-4.9.0-py3-none-any.whl", hash = "sha256:9f76d541cad6e36af7beb62e978876f3b41e3e04f2c1fbf0884604c0a9c4d93c", size = 100916, upload_time = "2025-03-17T00:02:52.713Z" }, +] + +[[package]] +name = "appscript" +version = "1.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "lxml" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a3/84/5c0aec149c6a002d46af17e3d2c5efbe5e8258ef7574cfc17cd1b26c726e/appscript-1.3.0.tar.gz", hash = "sha256:80943118bc97f9f78a8aa55f85565752ed4d82c7893427d7d9ebfdf401c12b2c", size = 295205, upload_time = "2024-10-13T12:34:00.57Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/99/64/db8dddd3c561fe5085e5b3a60419bfb560f07e1ca0dc1c7027cbaa5fb582/appscript-1.3.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:76a3507b27c78bf79af83a5f6fac49664b53d530d75632c023e53df1bd350caf", size = 99353, upload_time = "2024-10-13T12:33:51.589Z" }, + { url = "https://files.pythonhosted.org/packages/40/ee/4e0dee488d3dd35aab03c2f6ecb6dc0161fad200077cca68afe041079d2b/appscript-1.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:94ca097d672de5b8cfc82b4179b00cabd21588dbfd939347cf14a9e81955b2d5", size = 85401, upload_time = "2024-10-13T12:33:52.46Z" }, + { url = "https://files.pythonhosted.org/packages/b8/e2/05fd221bea1d309211569130a1a8f0966eb56394e46df068a69df0f29d61/appscript-1.3.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:c0b5c160908de728072d4a0ae57f286608c5d7692bfccbc6eadde868aac2742b", size = 99575, upload_time = "2024-10-13T12:33:53.629Z" }, + { url = "https://files.pythonhosted.org/packages/df/2f/3ee4190ce97b0b39df58184210d3baaa5fe59ae0972e63c2c85f122ca887/appscript-1.3.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d2a287b81030c81017127d4fb1c24729623576c50d2ff41694476b9af3ce0a97", size = 85496, upload_time = "2024-10-13T12:33:55.108Z" }, + { url = "https://files.pythonhosted.org/packages/92/5a/3b642e3e904fb37d45e40bb07b4362979160bdecb0d37aa74f2506b1a47e/appscript-1.3.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:13094640e2694b888827d4e133f33dad1e08c9d7102b447c3cc8a73246fdab40", size = 99574, upload_time = "2024-10-13T12:33:56.317Z" }, + { url = "https://files.pythonhosted.org/packages/5c/bc/d8558bec737e02a9c404fb3b985b8636c313bb65a176375d551cb839e876/appscript-1.3.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e7b4760105810e9b1ecd5b40aba7617e0a047346fb94ee4370e9d37e4383b78d", size = 85503, upload_time = "2024-10-13T12:33:57.54Z" }, +] + +[[package]] +name = "certifi" +version = "2025.1.31" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/1c/ab/c9f1e32b7b1bf505bf26f0ef697775960db7932abeb7b516de930ba2705f/certifi-2025.1.31.tar.gz", hash = "sha256:3d5da6925056f6f18f119200434a4780a94263f10d1c21d032a6f6b2baa20651", size = 167577, upload_time = "2025-01-31T02:16:47.166Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/38/fc/bce832fd4fd99766c04d1ee0eead6b0ec6486fb100ae5e74c1d91292b982/certifi-2025.1.31-py3-none-any.whl", hash = "sha256:ca78db4565a652026a4db2bcdf68f2fb589ea80d0be70e03929ed730746b84fe", size = 166393, upload_time = "2025-01-31T02:16:45.015Z" }, +] + +[[package]] +name = "cffi" +version = "1.17.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pycparser" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/fc/97/c783634659c2920c3fc70419e3af40972dbaf758daa229a7d6ea6135c90d/cffi-1.17.1.tar.gz", hash = "sha256:1c39c6016c32bc48dd54561950ebd6836e1670f2ae46128f67cf49e789c52824", size = 516621, upload_time = "2024-09-04T20:45:21.852Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6b/f4/927e3a8899e52a27fa57a48607ff7dc91a9ebe97399b357b85a0c7892e00/cffi-1.17.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a45e3c6913c5b87b3ff120dcdc03f6131fa0065027d0ed7ee6190736a74cd401", size = 182264, upload_time = "2024-09-04T20:43:51.124Z" }, + { url = "https://files.pythonhosted.org/packages/6c/f5/6c3a8efe5f503175aaddcbea6ad0d2c96dad6f5abb205750d1b3df44ef29/cffi-1.17.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:30c5e0cb5ae493c04c8b42916e52ca38079f1b235c2f8ae5f4527b963c401caf", size = 178651, upload_time = "2024-09-04T20:43:52.872Z" }, + { url = "https://files.pythonhosted.org/packages/94/dd/a3f0118e688d1b1a57553da23b16bdade96d2f9bcda4d32e7d2838047ff7/cffi-1.17.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f75c7ab1f9e4aca5414ed4d8e5c0e303a34f4421f8a0d47a4d019ceff0ab6af4", size = 445259, upload_time = "2024-09-04T20:43:56.123Z" }, + { url = "https://files.pythonhosted.org/packages/2e/ea/70ce63780f096e16ce8588efe039d3c4f91deb1dc01e9c73a287939c79a6/cffi-1.17.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a1ed2dd2972641495a3ec98445e09766f077aee98a1c896dcb4ad0d303628e41", size = 469200, upload_time = "2024-09-04T20:43:57.891Z" }, + { url = "https://files.pythonhosted.org/packages/1c/a0/a4fa9f4f781bda074c3ddd57a572b060fa0df7655d2a4247bbe277200146/cffi-1.17.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:46bf43160c1a35f7ec506d254e5c890f3c03648a4dbac12d624e4490a7046cd1", size = 477235, upload_time = "2024-09-04T20:44:00.18Z" }, + { url = "https://files.pythonhosted.org/packages/62/12/ce8710b5b8affbcdd5c6e367217c242524ad17a02fe5beec3ee339f69f85/cffi-1.17.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a24ed04c8ffd54b0729c07cee15a81d964e6fee0e3d4d342a27b020d22959dc6", size = 459721, upload_time = "2024-09-04T20:44:01.585Z" }, + { url = "https://files.pythonhosted.org/packages/ff/6b/d45873c5e0242196f042d555526f92aa9e0c32355a1be1ff8c27f077fd37/cffi-1.17.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:610faea79c43e44c71e1ec53a554553fa22321b65fae24889706c0a84d4ad86d", size = 467242, upload_time = "2024-09-04T20:44:03.467Z" }, + { url = "https://files.pythonhosted.org/packages/1a/52/d9a0e523a572fbccf2955f5abe883cfa8bcc570d7faeee06336fbd50c9fc/cffi-1.17.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:a9b15d491f3ad5d692e11f6b71f7857e7835eb677955c00cc0aefcd0669adaf6", size = 477999, upload_time = "2024-09-04T20:44:05.023Z" }, + { url = "https://files.pythonhosted.org/packages/44/74/f2a2460684a1a2d00ca799ad880d54652841a780c4c97b87754f660c7603/cffi-1.17.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:de2ea4b5833625383e464549fec1bc395c1bdeeb5f25c4a3a82b5a8c756ec22f", size = 454242, upload_time = "2024-09-04T20:44:06.444Z" }, + { url = "https://files.pythonhosted.org/packages/f8/4a/34599cac7dfcd888ff54e801afe06a19c17787dfd94495ab0c8d35fe99fb/cffi-1.17.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:fc48c783f9c87e60831201f2cce7f3b2e4846bf4d8728eabe54d60700b318a0b", size = 478604, upload_time = "2024-09-04T20:44:08.206Z" }, + { url = "https://files.pythonhosted.org/packages/34/33/e1b8a1ba29025adbdcda5fb3a36f94c03d771c1b7b12f726ff7fef2ebe36/cffi-1.17.1-cp311-cp311-win32.whl", hash = "sha256:85a950a4ac9c359340d5963966e3e0a94a676bd6245a4b55bc43949eee26a655", size = 171727, upload_time = "2024-09-04T20:44:09.481Z" }, + { url = "https://files.pythonhosted.org/packages/3d/97/50228be003bb2802627d28ec0627837ac0bf35c90cf769812056f235b2d1/cffi-1.17.1-cp311-cp311-win_amd64.whl", hash = "sha256:caaf0640ef5f5517f49bc275eca1406b0ffa6aa184892812030f04c2abf589a0", size = 181400, upload_time = "2024-09-04T20:44:10.873Z" }, + { url = "https://files.pythonhosted.org/packages/5a/84/e94227139ee5fb4d600a7a4927f322e1d4aea6fdc50bd3fca8493caba23f/cffi-1.17.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:805b4371bf7197c329fcb3ead37e710d1bca9da5d583f5073b799d5c5bd1eee4", size = 183178, upload_time = "2024-09-04T20:44:12.232Z" }, + { url = "https://files.pythonhosted.org/packages/da/ee/fb72c2b48656111c4ef27f0f91da355e130a923473bf5ee75c5643d00cca/cffi-1.17.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:733e99bc2df47476e3848417c5a4540522f234dfd4ef3ab7fafdf555b082ec0c", size = 178840, upload_time = "2024-09-04T20:44:13.739Z" }, + { url = "https://files.pythonhosted.org/packages/cc/b6/db007700f67d151abadf508cbfd6a1884f57eab90b1bb985c4c8c02b0f28/cffi-1.17.1-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1257bdabf294dceb59f5e70c64a3e2f462c30c7ad68092d01bbbfb1c16b1ba36", size = 454803, upload_time = "2024-09-04T20:44:15.231Z" }, + { url = "https://files.pythonhosted.org/packages/1a/df/f8d151540d8c200eb1c6fba8cd0dfd40904f1b0682ea705c36e6c2e97ab3/cffi-1.17.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da95af8214998d77a98cc14e3a3bd00aa191526343078b530ceb0bd710fb48a5", size = 478850, upload_time = "2024-09-04T20:44:17.188Z" }, + { url = "https://files.pythonhosted.org/packages/28/c0/b31116332a547fd2677ae5b78a2ef662dfc8023d67f41b2a83f7c2aa78b1/cffi-1.17.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d63afe322132c194cf832bfec0dc69a99fb9bb6bbd550f161a49e9e855cc78ff", size = 485729, upload_time = "2024-09-04T20:44:18.688Z" }, + { url = "https://files.pythonhosted.org/packages/91/2b/9a1ddfa5c7f13cab007a2c9cc295b70fbbda7cb10a286aa6810338e60ea1/cffi-1.17.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f79fc4fc25f1c8698ff97788206bb3c2598949bfe0fef03d299eb1b5356ada99", size = 471256, upload_time = "2024-09-04T20:44:20.248Z" }, + { url = "https://files.pythonhosted.org/packages/b2/d5/da47df7004cb17e4955df6a43d14b3b4ae77737dff8bf7f8f333196717bf/cffi-1.17.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b62ce867176a75d03a665bad002af8e6d54644fad99a3c70905c543130e39d93", size = 479424, upload_time = "2024-09-04T20:44:21.673Z" }, + { url = "https://files.pythonhosted.org/packages/0b/ac/2a28bcf513e93a219c8a4e8e125534f4f6db03e3179ba1c45e949b76212c/cffi-1.17.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:386c8bf53c502fff58903061338ce4f4950cbdcb23e2902d86c0f722b786bbe3", size = 484568, upload_time = "2024-09-04T20:44:23.245Z" }, + { url = "https://files.pythonhosted.org/packages/d4/38/ca8a4f639065f14ae0f1d9751e70447a261f1a30fa7547a828ae08142465/cffi-1.17.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4ceb10419a9adf4460ea14cfd6bc43d08701f0835e979bf821052f1805850fe8", size = 488736, upload_time = "2024-09-04T20:44:24.757Z" }, + { url = "https://files.pythonhosted.org/packages/86/c5/28b2d6f799ec0bdecf44dced2ec5ed43e0eb63097b0f58c293583b406582/cffi-1.17.1-cp312-cp312-win32.whl", hash = "sha256:a08d7e755f8ed21095a310a693525137cfe756ce62d066e53f502a83dc550f65", size = 172448, upload_time = "2024-09-04T20:44:26.208Z" }, + { url = "https://files.pythonhosted.org/packages/50/b9/db34c4755a7bd1cb2d1603ac3863f22bcecbd1ba29e5ee841a4bc510b294/cffi-1.17.1-cp312-cp312-win_amd64.whl", hash = "sha256:51392eae71afec0d0c8fb1a53b204dbb3bcabcb3c9b807eedf3e1e6ccf2de903", size = 181976, upload_time = "2024-09-04T20:44:27.578Z" }, + { url = "https://files.pythonhosted.org/packages/8d/f8/dd6c246b148639254dad4d6803eb6a54e8c85c6e11ec9df2cffa87571dbe/cffi-1.17.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f3a2b4222ce6b60e2e8b337bb9596923045681d71e5a082783484d845390938e", size = 182989, upload_time = "2024-09-04T20:44:28.956Z" }, + { url = "https://files.pythonhosted.org/packages/8b/f1/672d303ddf17c24fc83afd712316fda78dc6fce1cd53011b839483e1ecc8/cffi-1.17.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:0984a4925a435b1da406122d4d7968dd861c1385afe3b45ba82b750f229811e2", size = 178802, upload_time = "2024-09-04T20:44:30.289Z" }, + { url = "https://files.pythonhosted.org/packages/0e/2d/eab2e858a91fdff70533cab61dcff4a1f55ec60425832ddfdc9cd36bc8af/cffi-1.17.1-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d01b12eeeb4427d3110de311e1774046ad344f5b1a7403101878976ecd7a10f3", size = 454792, upload_time = "2024-09-04T20:44:32.01Z" }, + { url = "https://files.pythonhosted.org/packages/75/b2/fbaec7c4455c604e29388d55599b99ebcc250a60050610fadde58932b7ee/cffi-1.17.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:706510fe141c86a69c8ddc029c7910003a17353970cff3b904ff0686a5927683", size = 478893, upload_time = "2024-09-04T20:44:33.606Z" }, + { url = "https://files.pythonhosted.org/packages/4f/b7/6e4a2162178bf1935c336d4da8a9352cccab4d3a5d7914065490f08c0690/cffi-1.17.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:de55b766c7aa2e2a3092c51e0483d700341182f08e67c63630d5b6f200bb28e5", size = 485810, upload_time = "2024-09-04T20:44:35.191Z" }, + { url = "https://files.pythonhosted.org/packages/c7/8a/1d0e4a9c26e54746dc08c2c6c037889124d4f59dffd853a659fa545f1b40/cffi-1.17.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c59d6e989d07460165cc5ad3c61f9fd8f1b4796eacbd81cee78957842b834af4", size = 471200, upload_time = "2024-09-04T20:44:36.743Z" }, + { url = "https://files.pythonhosted.org/packages/26/9f/1aab65a6c0db35f43c4d1b4f580e8df53914310afc10ae0397d29d697af4/cffi-1.17.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd398dbc6773384a17fe0d3e7eeb8d1a21c2200473ee6806bb5e6a8e62bb73dd", size = 479447, upload_time = "2024-09-04T20:44:38.492Z" }, + { url = "https://files.pythonhosted.org/packages/5f/e4/fb8b3dd8dc0e98edf1135ff067ae070bb32ef9d509d6cb0f538cd6f7483f/cffi-1.17.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:3edc8d958eb099c634dace3c7e16560ae474aa3803a5df240542b305d14e14ed", size = 484358, upload_time = "2024-09-04T20:44:40.046Z" }, + { url = "https://files.pythonhosted.org/packages/f1/47/d7145bf2dc04684935d57d67dff9d6d795b2ba2796806bb109864be3a151/cffi-1.17.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:72e72408cad3d5419375fc87d289076ee319835bdfa2caad331e377589aebba9", size = 488469, upload_time = "2024-09-04T20:44:41.616Z" }, + { url = "https://files.pythonhosted.org/packages/bf/ee/f94057fa6426481d663b88637a9a10e859e492c73d0384514a17d78ee205/cffi-1.17.1-cp313-cp313-win32.whl", hash = "sha256:e03eab0a8677fa80d646b5ddece1cbeaf556c313dcfac435ba11f107ba117b5d", size = 172475, upload_time = "2024-09-04T20:44:43.733Z" }, + { url = "https://files.pythonhosted.org/packages/7c/fc/6a8cb64e5f0324877d503c854da15d76c1e50eb722e320b15345c4d0c6de/cffi-1.17.1-cp313-cp313-win_amd64.whl", hash = "sha256:f6a16c31041f09ead72d69f583767292f750d24913dadacf5756b966aacb3f1a", size = 182009, upload_time = "2024-09-04T20:44:45.309Z" }, +] + +[[package]] +name = "click" +version = "8.1.8" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b9/2e/0090cbf739cee7d23781ad4b89a9894a41538e4fcf4c31dcdd705b78eb8b/click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a", size = 226593, upload_time = "2024-12-21T18:38:44.339Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7e/d4/7ebdbd03970677812aac39c869717059dbb71a4cfc033ca6e5221787892c/click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2", size = 98188, upload_time = "2024-12-21T18:38:41.666Z" }, +] + +[[package]] +name = "colorama" +version = "0.4.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload_time = "2022-10-25T02:36:22.414Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload_time = "2022-10-25T02:36:20.889Z" }, +] + +[[package]] +name = "cryptography" +version = "44.0.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cffi", marker = "platform_python_implementation != 'PyPy'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/cd/25/4ce80c78963834b8a9fd1cc1266be5ed8d1840785c0f2e1b73b8d128d505/cryptography-44.0.2.tar.gz", hash = "sha256:c63454aa261a0cf0c5b4718349629793e9e634993538db841165b3df74f37ec0", size = 710807, upload_time = "2025-03-02T00:01:37.692Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/92/ef/83e632cfa801b221570c5f58c0369db6fa6cef7d9ff859feab1aae1a8a0f/cryptography-44.0.2-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:efcfe97d1b3c79e486554efddeb8f6f53a4cdd4cf6086642784fa31fc384e1d7", size = 6676361, upload_time = "2025-03-02T00:00:06.528Z" }, + { url = "https://files.pythonhosted.org/packages/30/ec/7ea7c1e4c8fc8329506b46c6c4a52e2f20318425d48e0fe597977c71dbce/cryptography-44.0.2-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:29ecec49f3ba3f3849362854b7253a9f59799e3763b0c9d0826259a88efa02f1", size = 3952350, upload_time = "2025-03-02T00:00:09.537Z" }, + { url = "https://files.pythonhosted.org/packages/27/61/72e3afdb3c5ac510330feba4fc1faa0fe62e070592d6ad00c40bb69165e5/cryptography-44.0.2-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc821e161ae88bfe8088d11bb39caf2916562e0a2dc7b6d56714a48b784ef0bb", size = 4166572, upload_time = "2025-03-02T00:00:12.03Z" }, + { url = "https://files.pythonhosted.org/packages/26/e4/ba680f0b35ed4a07d87f9e98f3ebccb05091f3bf6b5a478b943253b3bbd5/cryptography-44.0.2-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:3c00b6b757b32ce0f62c574b78b939afab9eecaf597c4d624caca4f9e71e7843", size = 3958124, upload_time = "2025-03-02T00:00:14.518Z" }, + { url = "https://files.pythonhosted.org/packages/9c/e8/44ae3e68c8b6d1cbc59040288056df2ad7f7f03bbcaca6b503c737ab8e73/cryptography-44.0.2-cp37-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:7bdcd82189759aba3816d1f729ce42ffded1ac304c151d0a8e89b9996ab863d5", size = 3678122, upload_time = "2025-03-02T00:00:17.212Z" }, + { url = "https://files.pythonhosted.org/packages/27/7b/664ea5e0d1eab511a10e480baf1c5d3e681c7d91718f60e149cec09edf01/cryptography-44.0.2-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:4973da6ca3db4405c54cd0b26d328be54c7747e89e284fcff166132eb7bccc9c", size = 4191831, upload_time = "2025-03-02T00:00:19.696Z" }, + { url = "https://files.pythonhosted.org/packages/2a/07/79554a9c40eb11345e1861f46f845fa71c9e25bf66d132e123d9feb8e7f9/cryptography-44.0.2-cp37-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:4e389622b6927d8133f314949a9812972711a111d577a5d1f4bee5e58736b80a", size = 3960583, upload_time = "2025-03-02T00:00:22.488Z" }, + { url = "https://files.pythonhosted.org/packages/bb/6d/858e356a49a4f0b591bd6789d821427de18432212e137290b6d8a817e9bf/cryptography-44.0.2-cp37-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:f514ef4cd14bb6fb484b4a60203e912cfcb64f2ab139e88c2274511514bf7308", size = 4191753, upload_time = "2025-03-02T00:00:25.038Z" }, + { url = "https://files.pythonhosted.org/packages/b2/80/62df41ba4916067fa6b125aa8c14d7e9181773f0d5d0bd4dcef580d8b7c6/cryptography-44.0.2-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:1bc312dfb7a6e5d66082c87c34c8a62176e684b6fe3d90fcfe1568de675e6688", size = 4079550, upload_time = "2025-03-02T00:00:26.929Z" }, + { url = "https://files.pythonhosted.org/packages/f3/cd/2558cc08f7b1bb40683f99ff4327f8dcfc7de3affc669e9065e14824511b/cryptography-44.0.2-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:3b721b8b4d948b218c88cb8c45a01793483821e709afe5f622861fc6182b20a7", size = 4298367, upload_time = "2025-03-02T00:00:28.735Z" }, + { url = "https://files.pythonhosted.org/packages/71/59/94ccc74788945bc3bd4cf355d19867e8057ff5fdbcac781b1ff95b700fb1/cryptography-44.0.2-cp37-abi3-win32.whl", hash = "sha256:51e4de3af4ec3899d6d178a8c005226491c27c4ba84101bfb59c901e10ca9f79", size = 2772843, upload_time = "2025-03-02T00:00:30.592Z" }, + { url = "https://files.pythonhosted.org/packages/ca/2c/0d0bbaf61ba05acb32f0841853cfa33ebb7a9ab3d9ed8bb004bd39f2da6a/cryptography-44.0.2-cp37-abi3-win_amd64.whl", hash = "sha256:c505d61b6176aaf982c5717ce04e87da5abc9a36a5b39ac03905c4aafe8de7aa", size = 3209057, upload_time = "2025-03-02T00:00:33.393Z" }, + { url = "https://files.pythonhosted.org/packages/9e/be/7a26142e6d0f7683d8a382dd963745e65db895a79a280a30525ec92be890/cryptography-44.0.2-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:8e0ddd63e6bf1161800592c71ac794d3fb8001f2caebe0966e77c5234fa9efc3", size = 6677789, upload_time = "2025-03-02T00:00:36.009Z" }, + { url = "https://files.pythonhosted.org/packages/06/88/638865be7198a84a7713950b1db7343391c6066a20e614f8fa286eb178ed/cryptography-44.0.2-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:81276f0ea79a208d961c433a947029e1a15948966658cf6710bbabb60fcc2639", size = 3951919, upload_time = "2025-03-02T00:00:38.581Z" }, + { url = "https://files.pythonhosted.org/packages/d7/fc/99fe639bcdf58561dfad1faa8a7369d1dc13f20acd78371bb97a01613585/cryptography-44.0.2-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a1e657c0f4ea2a23304ee3f964db058c9e9e635cc7019c4aa21c330755ef6fd", size = 4167812, upload_time = "2025-03-02T00:00:42.934Z" }, + { url = "https://files.pythonhosted.org/packages/53/7b/aafe60210ec93d5d7f552592a28192e51d3c6b6be449e7fd0a91399b5d07/cryptography-44.0.2-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:6210c05941994290f3f7f175a4a57dbbb2afd9273657614c506d5976db061181", size = 3958571, upload_time = "2025-03-02T00:00:46.026Z" }, + { url = "https://files.pythonhosted.org/packages/16/32/051f7ce79ad5a6ef5e26a92b37f172ee2d6e1cce09931646eef8de1e9827/cryptography-44.0.2-cp39-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:d1c3572526997b36f245a96a2b1713bf79ce99b271bbcf084beb6b9b075f29ea", size = 3679832, upload_time = "2025-03-02T00:00:48.647Z" }, + { url = "https://files.pythonhosted.org/packages/78/2b/999b2a1e1ba2206f2d3bca267d68f350beb2b048a41ea827e08ce7260098/cryptography-44.0.2-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:b042d2a275c8cee83a4b7ae30c45a15e6a4baa65a179a0ec2d78ebb90e4f6699", size = 4193719, upload_time = "2025-03-02T00:00:51.397Z" }, + { url = "https://files.pythonhosted.org/packages/72/97/430e56e39a1356e8e8f10f723211a0e256e11895ef1a135f30d7d40f2540/cryptography-44.0.2-cp39-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:d03806036b4f89e3b13b6218fefea8d5312e450935b1a2d55f0524e2ed7c59d9", size = 3960852, upload_time = "2025-03-02T00:00:53.317Z" }, + { url = "https://files.pythonhosted.org/packages/89/33/c1cf182c152e1d262cac56850939530c05ca6c8d149aa0dcee490b417e99/cryptography-44.0.2-cp39-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:c7362add18b416b69d58c910caa217f980c5ef39b23a38a0880dfd87bdf8cd23", size = 4193906, upload_time = "2025-03-02T00:00:56.49Z" }, + { url = "https://files.pythonhosted.org/packages/e1/99/87cf26d4f125380dc674233971069bc28d19b07f7755b29861570e513650/cryptography-44.0.2-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:8cadc6e3b5a1f144a039ea08a0bdb03a2a92e19c46be3285123d32029f40a922", size = 4081572, upload_time = "2025-03-02T00:00:59.995Z" }, + { url = "https://files.pythonhosted.org/packages/b3/9f/6a3e0391957cc0c5f84aef9fbdd763035f2b52e998a53f99345e3ac69312/cryptography-44.0.2-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:6f101b1f780f7fc613d040ca4bdf835c6ef3b00e9bd7125a4255ec574c7916e4", size = 4298631, upload_time = "2025-03-02T00:01:01.623Z" }, + { url = "https://files.pythonhosted.org/packages/e2/a5/5bc097adb4b6d22a24dea53c51f37e480aaec3465285c253098642696423/cryptography-44.0.2-cp39-abi3-win32.whl", hash = "sha256:3dc62975e31617badc19a906481deacdeb80b4bb454394b4098e3f2525a488c5", size = 2773792, upload_time = "2025-03-02T00:01:04.133Z" }, + { url = "https://files.pythonhosted.org/packages/33/cf/1f7649b8b9a3543e042d3f348e398a061923ac05b507f3f4d95f11938aa9/cryptography-44.0.2-cp39-abi3-win_amd64.whl", hash = "sha256:5f6f90b72d8ccadb9c6e311c775c8305381db88374c65fa1a68250aa8a9cb3a6", size = 3210957, upload_time = "2025-03-02T00:01:06.987Z" }, + { url = "https://files.pythonhosted.org/packages/d6/d7/f30e75a6aa7d0f65031886fa4a1485c2fbfe25a1896953920f6a9cfe2d3b/cryptography-44.0.2-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:909c97ab43a9c0c0b0ada7a1281430e4e5ec0458e6d9244c0e821bbf152f061d", size = 3887513, upload_time = "2025-03-02T00:01:22.911Z" }, + { url = "https://files.pythonhosted.org/packages/9c/b4/7a494ce1032323ca9db9a3661894c66e0d7142ad2079a4249303402d8c71/cryptography-44.0.2-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:96e7a5e9d6e71f9f4fca8eebfd603f8e86c5225bb18eb621b2c1e50b290a9471", size = 4107432, upload_time = "2025-03-02T00:01:24.701Z" }, + { url = "https://files.pythonhosted.org/packages/45/f8/6b3ec0bc56123b344a8d2b3264a325646d2dcdbdd9848b5e6f3d37db90b3/cryptography-44.0.2-pp311-pypy311_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:d1b3031093a366ac767b3feb8bcddb596671b3aaff82d4050f984da0c248b615", size = 3891421, upload_time = "2025-03-02T00:01:26.335Z" }, + { url = "https://files.pythonhosted.org/packages/57/ff/f3b4b2d007c2a646b0f69440ab06224f9cf37a977a72cdb7b50632174e8a/cryptography-44.0.2-pp311-pypy311_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:04abd71114848aa25edb28e225ab5f268096f44cf0127f3d36975bdf1bdf3390", size = 4107081, upload_time = "2025-03-02T00:01:28.938Z" }, +] + +[[package]] +name = "docx2pdf" +version = "0.1.8" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "appscript", marker = "sys_platform == 'darwin'" }, + { name = "pywin32", marker = "sys_platform == 'win32'" }, + { name = "tqdm" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ab/5d/112531fff53cf60513e14fa1707755c874d47880ec4de7b2235302ad19a0/docx2pdf-0.1.8.tar.gz", hash = "sha256:6d2c20f9ad36eec75f4da017dc7a97622946954a6124ca0b11772875fa86fbed", size = 6483, upload_time = "2021-12-11T16:56:36.75Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/53/4f/1155781308281e67f80b829738a29e5354e03664c62311f753056afc873b/docx2pdf-0.1.8-py3-none-any.whl", hash = "sha256:00be1401fd486640314e993423a0a1cbdbc21142186f68549d962d505b2e8a12", size = 6741, upload_time = "2021-12-11T16:56:35.163Z" }, +] + +[[package]] +name = "h11" +version = "0.14.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f5/38/3af3d3633a34a3316095b39c8e8fb4853a28a536e55d347bd8d8e9a14b03/h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d", size = 100418, upload_time = "2022-09-25T15:40:01.519Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/95/04/ff642e65ad6b90db43e668d70ffb6736436c7ce41fcc549f4e9472234127/h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761", size = 58259, upload_time = "2022-09-25T15:39:59.68Z" }, +] + +[[package]] +name = "httpcore" +version = "1.0.8" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "h11" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9f/45/ad3e1b4d448f22c0cff4f5692f5ed0666658578e358b8d58a19846048059/httpcore-1.0.8.tar.gz", hash = "sha256:86e94505ed24ea06514883fd44d2bc02d90e77e7979c8eb71b90f41d364a1bad", size = 85385, upload_time = "2025-04-11T14:42:46.661Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/18/8d/f052b1e336bb2c1fc7ed1aaed898aa570c0b61a09707b108979d9fc6e308/httpcore-1.0.8-py3-none-any.whl", hash = "sha256:5254cf149bcb5f75e9d1b2b9f729ea4a4b883d1ad7379fc632b727cec23674be", size = 78732, upload_time = "2025-04-11T14:42:44.896Z" }, +] + +[[package]] +name = "httpx" +version = "0.28.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "certifi" }, + { name = "httpcore" }, + { name = "idna" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size = 141406, upload_time = "2024-12-06T15:37:23.222Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload_time = "2024-12-06T15:37:21.509Z" }, +] + +[[package]] +name = "httpx-sse" +version = "0.4.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/4c/60/8f4281fa9bbf3c8034fd54c0e7412e66edbab6bc74c4996bd616f8d0406e/httpx-sse-0.4.0.tar.gz", hash = "sha256:1e81a3a3070ce322add1d3529ed42eb5f70817f45ed6ec915ab753f961139721", size = 12624, upload_time = "2023-12-22T08:01:21.083Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e1/9b/a181f281f65d776426002f330c31849b86b31fc9d848db62e16f03ff739f/httpx_sse-0.4.0-py3-none-any.whl", hash = "sha256:f329af6eae57eaa2bdfd962b42524764af68075ea87370a2de920af5341e318f", size = 7819, upload_time = "2023-12-22T08:01:19.89Z" }, +] + +[[package]] +name = "idna" +version = "3.10" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f1/70/7703c29685631f5a7590aa73f1f1d3fa9a380e654b86af429e0934a32f7d/idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9", size = 190490, upload_time = "2024-09-15T18:07:39.745Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442, upload_time = "2024-09-15T18:07:37.964Z" }, +] + +[[package]] +name = "lxml" +version = "5.4.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/76/3d/14e82fc7c8fb1b7761f7e748fd47e2ec8276d137b6acfe5a4bb73853e08f/lxml-5.4.0.tar.gz", hash = "sha256:d12832e1dbea4be280b22fd0ea7c9b87f0d8fc51ba06e92dc62d52f804f78ebd", size = 3679479, upload_time = "2025-04-23T01:50:29.322Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/81/2d/67693cc8a605a12e5975380d7ff83020dcc759351b5a066e1cced04f797b/lxml-5.4.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:98a3912194c079ef37e716ed228ae0dcb960992100461b704aea4e93af6b0bb9", size = 8083240, upload_time = "2025-04-23T01:45:18.566Z" }, + { url = "https://files.pythonhosted.org/packages/73/53/b5a05ab300a808b72e848efd152fe9c022c0181b0a70b8bca1199f1bed26/lxml-5.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0ea0252b51d296a75f6118ed0d8696888e7403408ad42345d7dfd0d1e93309a7", size = 4387685, upload_time = "2025-04-23T01:45:21.387Z" }, + { url = "https://files.pythonhosted.org/packages/d8/cb/1a3879c5f512bdcd32995c301886fe082b2edd83c87d41b6d42d89b4ea4d/lxml-5.4.0-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b92b69441d1bd39f4940f9eadfa417a25862242ca2c396b406f9272ef09cdcaa", size = 4991164, upload_time = "2025-04-23T01:45:23.849Z" }, + { url = "https://files.pythonhosted.org/packages/f9/94/bbc66e42559f9d04857071e3b3d0c9abd88579367fd2588a4042f641f57e/lxml-5.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:20e16c08254b9b6466526bc1828d9370ee6c0d60a4b64836bc3ac2917d1e16df", size = 4746206, upload_time = "2025-04-23T01:45:26.361Z" }, + { url = "https://files.pythonhosted.org/packages/66/95/34b0679bee435da2d7cae895731700e519a8dfcab499c21662ebe671603e/lxml-5.4.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7605c1c32c3d6e8c990dd28a0970a3cbbf1429d5b92279e37fda05fb0c92190e", size = 5342144, upload_time = "2025-04-23T01:45:28.939Z" }, + { url = "https://files.pythonhosted.org/packages/e0/5d/abfcc6ab2fa0be72b2ba938abdae1f7cad4c632f8d552683ea295d55adfb/lxml-5.4.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ecf4c4b83f1ab3d5a7ace10bafcb6f11df6156857a3c418244cef41ca9fa3e44", size = 4825124, upload_time = "2025-04-23T01:45:31.361Z" }, + { url = "https://files.pythonhosted.org/packages/5a/78/6bd33186c8863b36e084f294fc0a5e5eefe77af95f0663ef33809cc1c8aa/lxml-5.4.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0cef4feae82709eed352cd7e97ae062ef6ae9c7b5dbe3663f104cd2c0e8d94ba", size = 4876520, upload_time = "2025-04-23T01:45:34.191Z" }, + { url = "https://files.pythonhosted.org/packages/3b/74/4d7ad4839bd0fc64e3d12da74fc9a193febb0fae0ba6ebd5149d4c23176a/lxml-5.4.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:df53330a3bff250f10472ce96a9af28628ff1f4efc51ccba351a8820bca2a8ba", size = 4765016, upload_time = "2025-04-23T01:45:36.7Z" }, + { url = "https://files.pythonhosted.org/packages/24/0d/0a98ed1f2471911dadfc541003ac6dd6879fc87b15e1143743ca20f3e973/lxml-5.4.0-cp311-cp311-manylinux_2_28_ppc64le.whl", hash = "sha256:aefe1a7cb852fa61150fcb21a8c8fcea7b58c4cb11fbe59c97a0a4b31cae3c8c", size = 5362884, upload_time = "2025-04-23T01:45:39.291Z" }, + { url = "https://files.pythonhosted.org/packages/48/de/d4f7e4c39740a6610f0f6959052b547478107967362e8424e1163ec37ae8/lxml-5.4.0-cp311-cp311-manylinux_2_28_s390x.whl", hash = "sha256:ef5a7178fcc73b7d8c07229e89f8eb45b2908a9238eb90dcfc46571ccf0383b8", size = 4902690, upload_time = "2025-04-23T01:45:42.386Z" }, + { url = "https://files.pythonhosted.org/packages/07/8c/61763abd242af84f355ca4ef1ee096d3c1b7514819564cce70fd18c22e9a/lxml-5.4.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:d2ed1b3cb9ff1c10e6e8b00941bb2e5bb568b307bfc6b17dffbbe8be5eecba86", size = 4944418, upload_time = "2025-04-23T01:45:46.051Z" }, + { url = "https://files.pythonhosted.org/packages/f9/c5/6d7e3b63e7e282619193961a570c0a4c8a57fe820f07ca3fe2f6bd86608a/lxml-5.4.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:72ac9762a9f8ce74c9eed4a4e74306f2f18613a6b71fa065495a67ac227b3056", size = 4827092, upload_time = "2025-04-23T01:45:48.943Z" }, + { url = "https://files.pythonhosted.org/packages/71/4a/e60a306df54680b103348545706a98a7514a42c8b4fbfdcaa608567bb065/lxml-5.4.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:f5cb182f6396706dc6cc1896dd02b1c889d644c081b0cdec38747573db88a7d7", size = 5418231, upload_time = "2025-04-23T01:45:51.481Z" }, + { url = "https://files.pythonhosted.org/packages/27/f2/9754aacd6016c930875854f08ac4b192a47fe19565f776a64004aa167521/lxml-5.4.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:3a3178b4873df8ef9457a4875703488eb1622632a9cee6d76464b60e90adbfcd", size = 5261798, upload_time = "2025-04-23T01:45:54.146Z" }, + { url = "https://files.pythonhosted.org/packages/38/a2/0c49ec6941428b1bd4f280650d7b11a0f91ace9db7de32eb7aa23bcb39ff/lxml-5.4.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:e094ec83694b59d263802ed03a8384594fcce477ce484b0cbcd0008a211ca751", size = 4988195, upload_time = "2025-04-23T01:45:56.685Z" }, + { url = "https://files.pythonhosted.org/packages/7a/75/87a3963a08eafc46a86c1131c6e28a4de103ba30b5ae903114177352a3d7/lxml-5.4.0-cp311-cp311-win32.whl", hash = "sha256:4329422de653cdb2b72afa39b0aa04252fca9071550044904b2e7036d9d97fe4", size = 3474243, upload_time = "2025-04-23T01:45:58.863Z" }, + { url = "https://files.pythonhosted.org/packages/fa/f9/1f0964c4f6c2be861c50db380c554fb8befbea98c6404744ce243a3c87ef/lxml-5.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:fd3be6481ef54b8cfd0e1e953323b7aa9d9789b94842d0e5b142ef4bb7999539", size = 3815197, upload_time = "2025-04-23T01:46:01.096Z" }, + { url = "https://files.pythonhosted.org/packages/f8/4c/d101ace719ca6a4ec043eb516fcfcb1b396a9fccc4fcd9ef593df34ba0d5/lxml-5.4.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:b5aff6f3e818e6bdbbb38e5967520f174b18f539c2b9de867b1e7fde6f8d95a4", size = 8127392, upload_time = "2025-04-23T01:46:04.09Z" }, + { url = "https://files.pythonhosted.org/packages/11/84/beddae0cec4dd9ddf46abf156f0af451c13019a0fa25d7445b655ba5ccb7/lxml-5.4.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:942a5d73f739ad7c452bf739a62a0f83e2578afd6b8e5406308731f4ce78b16d", size = 4415103, upload_time = "2025-04-23T01:46:07.227Z" }, + { url = "https://files.pythonhosted.org/packages/d0/25/d0d93a4e763f0462cccd2b8a665bf1e4343dd788c76dcfefa289d46a38a9/lxml-5.4.0-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:460508a4b07364d6abf53acaa0a90b6d370fafde5693ef37602566613a9b0779", size = 5024224, upload_time = "2025-04-23T01:46:10.237Z" }, + { url = "https://files.pythonhosted.org/packages/31/ce/1df18fb8f7946e7f3388af378b1f34fcf253b94b9feedb2cec5969da8012/lxml-5.4.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:529024ab3a505fed78fe3cc5ddc079464e709f6c892733e3f5842007cec8ac6e", size = 4769913, upload_time = "2025-04-23T01:46:12.757Z" }, + { url = "https://files.pythonhosted.org/packages/4e/62/f4a6c60ae7c40d43657f552f3045df05118636be1165b906d3423790447f/lxml-5.4.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7ca56ebc2c474e8f3d5761debfd9283b8b18c76c4fc0967b74aeafba1f5647f9", size = 5290441, upload_time = "2025-04-23T01:46:16.037Z" }, + { url = "https://files.pythonhosted.org/packages/9e/aa/04f00009e1e3a77838c7fc948f161b5d2d5de1136b2b81c712a263829ea4/lxml-5.4.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a81e1196f0a5b4167a8dafe3a66aa67c4addac1b22dc47947abd5d5c7a3f24b5", size = 4820165, upload_time = "2025-04-23T01:46:19.137Z" }, + { url = "https://files.pythonhosted.org/packages/c9/1f/e0b2f61fa2404bf0f1fdf1898377e5bd1b74cc9b2cf2c6ba8509b8f27990/lxml-5.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:00b8686694423ddae324cf614e1b9659c2edb754de617703c3d29ff568448df5", size = 4932580, upload_time = "2025-04-23T01:46:21.963Z" }, + { url = "https://files.pythonhosted.org/packages/24/a2/8263f351b4ffe0ed3e32ea7b7830f845c795349034f912f490180d88a877/lxml-5.4.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:c5681160758d3f6ac5b4fea370495c48aac0989d6a0f01bb9a72ad8ef5ab75c4", size = 4759493, upload_time = "2025-04-23T01:46:24.316Z" }, + { url = "https://files.pythonhosted.org/packages/05/00/41db052f279995c0e35c79d0f0fc9f8122d5b5e9630139c592a0b58c71b4/lxml-5.4.0-cp312-cp312-manylinux_2_28_ppc64le.whl", hash = "sha256:2dc191e60425ad70e75a68c9fd90ab284df64d9cd410ba8d2b641c0c45bc006e", size = 5324679, upload_time = "2025-04-23T01:46:27.097Z" }, + { url = "https://files.pythonhosted.org/packages/1d/be/ee99e6314cdef4587617d3b3b745f9356d9b7dd12a9663c5f3b5734b64ba/lxml-5.4.0-cp312-cp312-manylinux_2_28_s390x.whl", hash = "sha256:67f779374c6b9753ae0a0195a892a1c234ce8416e4448fe1e9f34746482070a7", size = 4890691, upload_time = "2025-04-23T01:46:30.009Z" }, + { url = "https://files.pythonhosted.org/packages/ad/36/239820114bf1d71f38f12208b9c58dec033cbcf80101cde006b9bde5cffd/lxml-5.4.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:79d5bfa9c1b455336f52343130b2067164040604e41f6dc4d8313867ed540079", size = 4955075, upload_time = "2025-04-23T01:46:32.33Z" }, + { url = "https://files.pythonhosted.org/packages/d4/e1/1b795cc0b174efc9e13dbd078a9ff79a58728a033142bc6d70a1ee8fc34d/lxml-5.4.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3d3c30ba1c9b48c68489dc1829a6eede9873f52edca1dda900066542528d6b20", size = 4838680, upload_time = "2025-04-23T01:46:34.852Z" }, + { url = "https://files.pythonhosted.org/packages/72/48/3c198455ca108cec5ae3662ae8acd7fd99476812fd712bb17f1b39a0b589/lxml-5.4.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:1af80c6316ae68aded77e91cd9d80648f7dd40406cef73df841aa3c36f6907c8", size = 5391253, upload_time = "2025-04-23T01:46:37.608Z" }, + { url = "https://files.pythonhosted.org/packages/d6/10/5bf51858971c51ec96cfc13e800a9951f3fd501686f4c18d7d84fe2d6352/lxml-5.4.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:4d885698f5019abe0de3d352caf9466d5de2baded00a06ef3f1216c1a58ae78f", size = 5261651, upload_time = "2025-04-23T01:46:40.183Z" }, + { url = "https://files.pythonhosted.org/packages/2b/11/06710dd809205377da380546f91d2ac94bad9ff735a72b64ec029f706c85/lxml-5.4.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:aea53d51859b6c64e7c51d522c03cc2c48b9b5d6172126854cc7f01aa11f52bc", size = 5024315, upload_time = "2025-04-23T01:46:43.333Z" }, + { url = "https://files.pythonhosted.org/packages/f5/b0/15b6217834b5e3a59ebf7f53125e08e318030e8cc0d7310355e6edac98ef/lxml-5.4.0-cp312-cp312-win32.whl", hash = "sha256:d90b729fd2732df28130c064aac9bb8aff14ba20baa4aee7bd0795ff1187545f", size = 3486149, upload_time = "2025-04-23T01:46:45.684Z" }, + { url = "https://files.pythonhosted.org/packages/91/1e/05ddcb57ad2f3069101611bd5f5084157d90861a2ef460bf42f45cced944/lxml-5.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:1dc4ca99e89c335a7ed47d38964abcb36c5910790f9bd106f2a8fa2ee0b909d2", size = 3817095, upload_time = "2025-04-23T01:46:48.521Z" }, + { url = "https://files.pythonhosted.org/packages/87/cb/2ba1e9dd953415f58548506fa5549a7f373ae55e80c61c9041b7fd09a38a/lxml-5.4.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:773e27b62920199c6197130632c18fb7ead3257fce1ffb7d286912e56ddb79e0", size = 8110086, upload_time = "2025-04-23T01:46:52.218Z" }, + { url = "https://files.pythonhosted.org/packages/b5/3e/6602a4dca3ae344e8609914d6ab22e52ce42e3e1638c10967568c5c1450d/lxml-5.4.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ce9c671845de9699904b1e9df95acfe8dfc183f2310f163cdaa91a3535af95de", size = 4404613, upload_time = "2025-04-23T01:46:55.281Z" }, + { url = "https://files.pythonhosted.org/packages/4c/72/bf00988477d3bb452bef9436e45aeea82bb40cdfb4684b83c967c53909c7/lxml-5.4.0-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9454b8d8200ec99a224df8854786262b1bd6461f4280064c807303c642c05e76", size = 5012008, upload_time = "2025-04-23T01:46:57.817Z" }, + { url = "https://files.pythonhosted.org/packages/92/1f/93e42d93e9e7a44b2d3354c462cd784dbaaf350f7976b5d7c3f85d68d1b1/lxml-5.4.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cccd007d5c95279e529c146d095f1d39ac05139de26c098166c4beb9374b0f4d", size = 4760915, upload_time = "2025-04-23T01:47:00.745Z" }, + { url = "https://files.pythonhosted.org/packages/45/0b/363009390d0b461cf9976a499e83b68f792e4c32ecef092f3f9ef9c4ba54/lxml-5.4.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0fce1294a0497edb034cb416ad3e77ecc89b313cff7adbee5334e4dc0d11f422", size = 5283890, upload_time = "2025-04-23T01:47:04.702Z" }, + { url = "https://files.pythonhosted.org/packages/19/dc/6056c332f9378ab476c88e301e6549a0454dbee8f0ae16847414f0eccb74/lxml-5.4.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:24974f774f3a78ac12b95e3a20ef0931795ff04dbb16db81a90c37f589819551", size = 4812644, upload_time = "2025-04-23T01:47:07.833Z" }, + { url = "https://files.pythonhosted.org/packages/ee/8a/f8c66bbb23ecb9048a46a5ef9b495fd23f7543df642dabeebcb2eeb66592/lxml-5.4.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:497cab4d8254c2a90bf988f162ace2ddbfdd806fce3bda3f581b9d24c852e03c", size = 4921817, upload_time = "2025-04-23T01:47:10.317Z" }, + { url = "https://files.pythonhosted.org/packages/04/57/2e537083c3f381f83d05d9b176f0d838a9e8961f7ed8ddce3f0217179ce3/lxml-5.4.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:e794f698ae4c5084414efea0f5cc9f4ac562ec02d66e1484ff822ef97c2cadff", size = 4753916, upload_time = "2025-04-23T01:47:12.823Z" }, + { url = "https://files.pythonhosted.org/packages/d8/80/ea8c4072109a350848f1157ce83ccd9439601274035cd045ac31f47f3417/lxml-5.4.0-cp313-cp313-manylinux_2_28_ppc64le.whl", hash = "sha256:2c62891b1ea3094bb12097822b3d44b93fc6c325f2043c4d2736a8ff09e65f60", size = 5289274, upload_time = "2025-04-23T01:47:15.916Z" }, + { url = "https://files.pythonhosted.org/packages/b3/47/c4be287c48cdc304483457878a3f22999098b9a95f455e3c4bda7ec7fc72/lxml-5.4.0-cp313-cp313-manylinux_2_28_s390x.whl", hash = "sha256:142accb3e4d1edae4b392bd165a9abdee8a3c432a2cca193df995bc3886249c8", size = 4874757, upload_time = "2025-04-23T01:47:19.793Z" }, + { url = "https://files.pythonhosted.org/packages/2f/04/6ef935dc74e729932e39478e44d8cfe6a83550552eaa072b7c05f6f22488/lxml-5.4.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:1a42b3a19346e5601d1b8296ff6ef3d76038058f311902edd574461e9c036982", size = 4947028, upload_time = "2025-04-23T01:47:22.401Z" }, + { url = "https://files.pythonhosted.org/packages/cb/f9/c33fc8daa373ef8a7daddb53175289024512b6619bc9de36d77dca3df44b/lxml-5.4.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4291d3c409a17febf817259cb37bc62cb7eb398bcc95c1356947e2871911ae61", size = 4834487, upload_time = "2025-04-23T01:47:25.513Z" }, + { url = "https://files.pythonhosted.org/packages/8d/30/fc92bb595bcb878311e01b418b57d13900f84c2b94f6eca9e5073ea756e6/lxml-5.4.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:4f5322cf38fe0e21c2d73901abf68e6329dc02a4994e483adbcf92b568a09a54", size = 5381688, upload_time = "2025-04-23T01:47:28.454Z" }, + { url = "https://files.pythonhosted.org/packages/43/d1/3ba7bd978ce28bba8e3da2c2e9d5ae3f8f521ad3f0ca6ea4788d086ba00d/lxml-5.4.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:0be91891bdb06ebe65122aa6bf3fc94489960cf7e03033c6f83a90863b23c58b", size = 5242043, upload_time = "2025-04-23T01:47:31.208Z" }, + { url = "https://files.pythonhosted.org/packages/ee/cd/95fa2201041a610c4d08ddaf31d43b98ecc4b1d74b1e7245b1abdab443cb/lxml-5.4.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:15a665ad90054a3d4f397bc40f73948d48e36e4c09f9bcffc7d90c87410e478a", size = 5021569, upload_time = "2025-04-23T01:47:33.805Z" }, + { url = "https://files.pythonhosted.org/packages/2d/a6/31da006fead660b9512d08d23d31e93ad3477dd47cc42e3285f143443176/lxml-5.4.0-cp313-cp313-win32.whl", hash = "sha256:d5663bc1b471c79f5c833cffbc9b87d7bf13f87e055a5c86c363ccd2348d7e82", size = 3485270, upload_time = "2025-04-23T01:47:36.133Z" }, + { url = "https://files.pythonhosted.org/packages/fc/14/c115516c62a7d2499781d2d3d7215218c0731b2c940753bf9f9b7b73924d/lxml-5.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:bcb7a1096b4b6b24ce1ac24d4942ad98f983cd3810f9711bcd0293f43a9d8b9f", size = 3814606, upload_time = "2025-04-23T01:47:39.028Z" }, +] + +[[package]] +name = "markdown-it-py" +version = "3.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "mdurl" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/38/71/3b932df36c1a044d397a1f92d1cf91ee0a503d91e470cbd670aa66b07ed0/markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb", size = 74596, upload_time = "2023-06-03T06:41:14.443Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/42/d7/1ec15b46af6af88f19b8e5ffea08fa375d433c998b8a7639e76935c14f1f/markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1", size = 87528, upload_time = "2023-06-03T06:41:11.019Z" }, +] + +[[package]] +name = "mcp" +version = "1.6.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "httpx" }, + { name = "httpx-sse" }, + { name = "pydantic" }, + { name = "pydantic-settings" }, + { name = "sse-starlette" }, + { name = "starlette" }, + { name = "uvicorn" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/95/d2/f587cb965a56e992634bebc8611c5b579af912b74e04eb9164bd49527d21/mcp-1.6.0.tar.gz", hash = "sha256:d9324876de2c5637369f43161cd71eebfd803df5a95e46225cab8d280e366723", size = 200031, upload_time = "2025-03-27T16:46:32.336Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/10/30/20a7f33b0b884a9d14dd3aa94ff1ac9da1479fe2ad66dd9e2736075d2506/mcp-1.6.0-py3-none-any.whl", hash = "sha256:7bd24c6ea042dbec44c754f100984d186620d8b841ec30f1b19eda9b93a634d0", size = 76077, upload_time = "2025-03-27T16:46:29.919Z" }, +] + +[package.optional-dependencies] +cli = [ + { name = "python-dotenv" }, + { name = "typer" }, +] + +[[package]] +name = "mdurl" +version = "0.1.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729, upload_time = "2022-08-14T12:40:10.846Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload_time = "2022-08-14T12:40:09.779Z" }, +] + +[[package]] +name = "msoffcrypto-tool" +version = "5.4.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cryptography" }, + { name = "olefile" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d2/b7/0fd6573157e0ec60c0c470e732ab3322fba4d2834fd24e1088d670522a01/msoffcrypto_tool-5.4.2.tar.gz", hash = "sha256:44b545adba0407564a0cc3d6dde6ca36b7c0fdf352b85bca51618fa1d4817370", size = 41183, upload_time = "2024-08-08T15:50:28.462Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/03/54/7f6d3d9acad083dae8c22d9ab483b657359a1bf56fee1d7af88794677707/msoffcrypto_tool-5.4.2-py3-none-any.whl", hash = "sha256:274fe2181702d1e5a107ec1b68a4c9fea997a44972ae1cc9ae0cb4f6a50fef0e", size = 48713, upload_time = "2024-08-08T15:50:27.093Z" }, +] + +[[package]] +name = "office-word-mcp-server" +version = "1.1.0" +source = { editable = "." } +dependencies = [ + { name = "docx2pdf" }, + { name = "mcp", extra = ["cli"] }, + { name = "msoffcrypto-tool" }, + { name = "python-docx" }, +] + +[package.metadata] +requires-dist = [ + { name = "docx2pdf", specifier = ">=0.1.8" }, + { name = "mcp", extras = ["cli"], specifier = ">=1.3.0" }, + { name = "msoffcrypto-tool", specifier = ">=5.4.2" }, + { name = "python-docx", specifier = ">=0.8.11" }, +] + +[[package]] +name = "olefile" +version = "0.47" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/69/1b/077b508e3e500e1629d366249c3ccb32f95e50258b231705c09e3c7a4366/olefile-0.47.zip", hash = "sha256:599383381a0bf3dfbd932ca0ca6515acd174ed48870cbf7fee123d698c192c1c", size = 112240, upload_time = "2023-12-01T16:22:53.025Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/17/d3/b64c356a907242d719fc668b71befd73324e47ab46c8ebbbede252c154b2/olefile-0.47-py2.py3-none-any.whl", hash = "sha256:543c7da2a7adadf21214938bb79c83ea12b473a4b6ee4ad4bf854e7715e13d1f", size = 114565, upload_time = "2023-12-01T16:22:51.518Z" }, +] + +[[package]] +name = "pycparser" +version = "2.22" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/1d/b2/31537cf4b1ca988837256c910a668b553fceb8f069bedc4b1c826024b52c/pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6", size = 172736, upload_time = "2024-03-30T13:22:22.564Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/13/a3/a812df4e2dd5696d1f351d58b8fe16a405b234ad2886a0dab9183fb78109/pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc", size = 117552, upload_time = "2024-03-30T13:22:20.476Z" }, +] + +[[package]] +name = "pydantic" +version = "2.11.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "annotated-types" }, + { name = "pydantic-core" }, + { name = "typing-extensions" }, + { name = "typing-inspection" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/10/2e/ca897f093ee6c5f3b0bee123ee4465c50e75431c3d5b6a3b44a47134e891/pydantic-2.11.3.tar.gz", hash = "sha256:7471657138c16adad9322fe3070c0116dd6c3ad8d649300e3cbdfe91f4db4ec3", size = 785513, upload_time = "2025-04-08T13:27:06.399Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b0/1d/407b29780a289868ed696d1616f4aad49d6388e5a77f567dcd2629dcd7b8/pydantic-2.11.3-py3-none-any.whl", hash = "sha256:a082753436a07f9ba1289c6ffa01cd93db3548776088aa917cc43b63f68fa60f", size = 443591, upload_time = "2025-04-08T13:27:03.789Z" }, +] + +[[package]] +name = "pydantic-core" +version = "2.33.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/17/19/ed6a078a5287aea7922de6841ef4c06157931622c89c2a47940837b5eecd/pydantic_core-2.33.1.tar.gz", hash = "sha256:bcc9c6fdb0ced789245b02b7d6603e17d1563064ddcfc36f046b61c0c05dd9df", size = 434395, upload_time = "2025-04-02T09:49:41.8Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d6/7f/c6298830cb780c46b4f46bb24298d01019ffa4d21769f39b908cd14bbd50/pydantic_core-2.33.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:6e966fc3caaf9f1d96b349b0341c70c8d6573bf1bac7261f7b0ba88f96c56c24", size = 2044224, upload_time = "2025-04-02T09:47:04.199Z" }, + { url = "https://files.pythonhosted.org/packages/a8/65/6ab3a536776cad5343f625245bd38165d6663256ad43f3a200e5936afd6c/pydantic_core-2.33.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bfd0adeee563d59c598ceabddf2c92eec77abcb3f4a391b19aa7366170bd9e30", size = 1858845, upload_time = "2025-04-02T09:47:05.686Z" }, + { url = "https://files.pythonhosted.org/packages/e9/15/9a22fd26ba5ee8c669d4b8c9c244238e940cd5d818649603ca81d1c69861/pydantic_core-2.33.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:91815221101ad3c6b507804178a7bb5cb7b2ead9ecd600041669c8d805ebd595", size = 1910029, upload_time = "2025-04-02T09:47:07.042Z" }, + { url = "https://files.pythonhosted.org/packages/d5/33/8cb1a62818974045086f55f604044bf35b9342900318f9a2a029a1bec460/pydantic_core-2.33.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9fea9c1869bb4742d174a57b4700c6dadea951df8b06de40c2fedb4f02931c2e", size = 1997784, upload_time = "2025-04-02T09:47:08.63Z" }, + { url = "https://files.pythonhosted.org/packages/c0/ca/49958e4df7715c71773e1ea5be1c74544923d10319173264e6db122543f9/pydantic_core-2.33.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1d20eb4861329bb2484c021b9d9a977566ab16d84000a57e28061151c62b349a", size = 2141075, upload_time = "2025-04-02T09:47:10.267Z" }, + { url = "https://files.pythonhosted.org/packages/7b/a6/0b3a167a9773c79ba834b959b4e18c3ae9216b8319bd8422792abc8a41b1/pydantic_core-2.33.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0fb935c5591573ae3201640579f30128ccc10739b45663f93c06796854405505", size = 2745849, upload_time = "2025-04-02T09:47:11.724Z" }, + { url = "https://files.pythonhosted.org/packages/0b/60/516484135173aa9e5861d7a0663dce82e4746d2e7f803627d8c25dfa5578/pydantic_core-2.33.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c964fd24e6166420d18fb53996d8c9fd6eac9bf5ae3ec3d03015be4414ce497f", size = 2005794, upload_time = "2025-04-02T09:47:13.099Z" }, + { url = "https://files.pythonhosted.org/packages/86/70/05b1eb77459ad47de00cf78ee003016da0cedf8b9170260488d7c21e9181/pydantic_core-2.33.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:681d65e9011f7392db5aa002b7423cc442d6a673c635668c227c6c8d0e5a4f77", size = 2123237, upload_time = "2025-04-02T09:47:14.355Z" }, + { url = "https://files.pythonhosted.org/packages/c7/57/12667a1409c04ae7dc95d3b43158948eb0368e9c790be8b095cb60611459/pydantic_core-2.33.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e100c52f7355a48413e2999bfb4e139d2977a904495441b374f3d4fb4a170961", size = 2086351, upload_time = "2025-04-02T09:47:15.676Z" }, + { url = "https://files.pythonhosted.org/packages/57/61/cc6d1d1c1664b58fdd6ecc64c84366c34ec9b606aeb66cafab6f4088974c/pydantic_core-2.33.1-cp311-cp311-musllinux_1_1_armv7l.whl", hash = "sha256:048831bd363490be79acdd3232f74a0e9951b11b2b4cc058aeb72b22fdc3abe1", size = 2258914, upload_time = "2025-04-02T09:47:17Z" }, + { url = "https://files.pythonhosted.org/packages/d1/0a/edb137176a1f5419b2ddee8bde6a0a548cfa3c74f657f63e56232df8de88/pydantic_core-2.33.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:bdc84017d28459c00db6f918a7272a5190bec3090058334e43a76afb279eac7c", size = 2257385, upload_time = "2025-04-02T09:47:18.631Z" }, + { url = "https://files.pythonhosted.org/packages/26/3c/48ca982d50e4b0e1d9954919c887bdc1c2b462801bf408613ccc641b3daa/pydantic_core-2.33.1-cp311-cp311-win32.whl", hash = "sha256:32cd11c5914d1179df70406427097c7dcde19fddf1418c787540f4b730289896", size = 1923765, upload_time = "2025-04-02T09:47:20.34Z" }, + { url = "https://files.pythonhosted.org/packages/33/cd/7ab70b99e5e21559f5de38a0928ea84e6f23fdef2b0d16a6feaf942b003c/pydantic_core-2.33.1-cp311-cp311-win_amd64.whl", hash = "sha256:2ea62419ba8c397e7da28a9170a16219d310d2cf4970dbc65c32faf20d828c83", size = 1950688, upload_time = "2025-04-02T09:47:22.029Z" }, + { url = "https://files.pythonhosted.org/packages/4b/ae/db1fc237b82e2cacd379f63e3335748ab88b5adde98bf7544a1b1bd10a84/pydantic_core-2.33.1-cp311-cp311-win_arm64.whl", hash = "sha256:fc903512177361e868bc1f5b80ac8c8a6e05fcdd574a5fb5ffeac5a9982b9e89", size = 1908185, upload_time = "2025-04-02T09:47:23.385Z" }, + { url = "https://files.pythonhosted.org/packages/c8/ce/3cb22b07c29938f97ff5f5bb27521f95e2ebec399b882392deb68d6c440e/pydantic_core-2.33.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:1293d7febb995e9d3ec3ea09caf1a26214eec45b0f29f6074abb004723fc1de8", size = 2026640, upload_time = "2025-04-02T09:47:25.394Z" }, + { url = "https://files.pythonhosted.org/packages/19/78/f381d643b12378fee782a72126ec5d793081ef03791c28a0fd542a5bee64/pydantic_core-2.33.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:99b56acd433386c8f20be5c4000786d1e7ca0523c8eefc995d14d79c7a081498", size = 1852649, upload_time = "2025-04-02T09:47:27.417Z" }, + { url = "https://files.pythonhosted.org/packages/9d/2b/98a37b80b15aac9eb2c6cfc6dbd35e5058a352891c5cce3a8472d77665a6/pydantic_core-2.33.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:35a5ec3fa8c2fe6c53e1b2ccc2454398f95d5393ab398478f53e1afbbeb4d939", size = 1892472, upload_time = "2025-04-02T09:47:29.006Z" }, + { url = "https://files.pythonhosted.org/packages/4e/d4/3c59514e0f55a161004792b9ff3039da52448f43f5834f905abef9db6e4a/pydantic_core-2.33.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b172f7b9d2f3abc0efd12e3386f7e48b576ef309544ac3a63e5e9cdd2e24585d", size = 1977509, upload_time = "2025-04-02T09:47:33.464Z" }, + { url = "https://files.pythonhosted.org/packages/a9/b6/c2c7946ef70576f79a25db59a576bce088bdc5952d1b93c9789b091df716/pydantic_core-2.33.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9097b9f17f91eea659b9ec58148c0747ec354a42f7389b9d50701610d86f812e", size = 2128702, upload_time = "2025-04-02T09:47:34.812Z" }, + { url = "https://files.pythonhosted.org/packages/88/fe/65a880f81e3f2a974312b61f82a03d85528f89a010ce21ad92f109d94deb/pydantic_core-2.33.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cc77ec5b7e2118b152b0d886c7514a4653bcb58c6b1d760134a9fab915f777b3", size = 2679428, upload_time = "2025-04-02T09:47:37.315Z" }, + { url = "https://files.pythonhosted.org/packages/6f/ff/4459e4146afd0462fb483bb98aa2436d69c484737feaceba1341615fb0ac/pydantic_core-2.33.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d5e3d15245b08fa4a84cefc6c9222e6f37c98111c8679fbd94aa145f9a0ae23d", size = 2008753, upload_time = "2025-04-02T09:47:39.013Z" }, + { url = "https://files.pythonhosted.org/packages/7c/76/1c42e384e8d78452ededac8b583fe2550c84abfef83a0552e0e7478ccbc3/pydantic_core-2.33.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ef99779001d7ac2e2461d8ab55d3373fe7315caefdbecd8ced75304ae5a6fc6b", size = 2114849, upload_time = "2025-04-02T09:47:40.427Z" }, + { url = "https://files.pythonhosted.org/packages/00/72/7d0cf05095c15f7ffe0eb78914b166d591c0eed72f294da68378da205101/pydantic_core-2.33.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:fc6bf8869e193855e8d91d91f6bf59699a5cdfaa47a404e278e776dd7f168b39", size = 2069541, upload_time = "2025-04-02T09:47:42.01Z" }, + { url = "https://files.pythonhosted.org/packages/b3/69/94a514066bb7d8be499aa764926937409d2389c09be0b5107a970286ef81/pydantic_core-2.33.1-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:b1caa0bc2741b043db7823843e1bde8aaa58a55a58fda06083b0569f8b45693a", size = 2239225, upload_time = "2025-04-02T09:47:43.425Z" }, + { url = "https://files.pythonhosted.org/packages/84/b0/e390071eadb44b41f4f54c3cef64d8bf5f9612c92686c9299eaa09e267e2/pydantic_core-2.33.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:ec259f62538e8bf364903a7d0d0239447059f9434b284f5536e8402b7dd198db", size = 2248373, upload_time = "2025-04-02T09:47:44.979Z" }, + { url = "https://files.pythonhosted.org/packages/d6/b2/288b3579ffc07e92af66e2f1a11be3b056fe1214aab314748461f21a31c3/pydantic_core-2.33.1-cp312-cp312-win32.whl", hash = "sha256:e14f369c98a7c15772b9da98987f58e2b509a93235582838bd0d1d8c08b68fda", size = 1907034, upload_time = "2025-04-02T09:47:46.843Z" }, + { url = "https://files.pythonhosted.org/packages/02/28/58442ad1c22b5b6742b992ba9518420235adced665513868f99a1c2638a5/pydantic_core-2.33.1-cp312-cp312-win_amd64.whl", hash = "sha256:1c607801d85e2e123357b3893f82c97a42856192997b95b4d8325deb1cd0c5f4", size = 1956848, upload_time = "2025-04-02T09:47:48.404Z" }, + { url = "https://files.pythonhosted.org/packages/a1/eb/f54809b51c7e2a1d9f439f158b8dd94359321abcc98767e16fc48ae5a77e/pydantic_core-2.33.1-cp312-cp312-win_arm64.whl", hash = "sha256:8d13f0276806ee722e70a1c93da19748594f19ac4299c7e41237fc791d1861ea", size = 1903986, upload_time = "2025-04-02T09:47:49.839Z" }, + { url = "https://files.pythonhosted.org/packages/7a/24/eed3466a4308d79155f1cdd5c7432c80ddcc4530ba8623b79d5ced021641/pydantic_core-2.33.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:70af6a21237b53d1fe7b9325b20e65cbf2f0a848cf77bed492b029139701e66a", size = 2033551, upload_time = "2025-04-02T09:47:51.648Z" }, + { url = "https://files.pythonhosted.org/packages/ab/14/df54b1a0bc9b6ded9b758b73139d2c11b4e8eb43e8ab9c5847c0a2913ada/pydantic_core-2.33.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:282b3fe1bbbe5ae35224a0dbd05aed9ccabccd241e8e6b60370484234b456266", size = 1852785, upload_time = "2025-04-02T09:47:53.149Z" }, + { url = "https://files.pythonhosted.org/packages/fa/96/e275f15ff3d34bb04b0125d9bc8848bf69f25d784d92a63676112451bfb9/pydantic_core-2.33.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4b315e596282bbb5822d0c7ee9d255595bd7506d1cb20c2911a4da0b970187d3", size = 1897758, upload_time = "2025-04-02T09:47:55.006Z" }, + { url = "https://files.pythonhosted.org/packages/b7/d8/96bc536e975b69e3a924b507d2a19aedbf50b24e08c80fb00e35f9baaed8/pydantic_core-2.33.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1dfae24cf9921875ca0ca6a8ecb4bb2f13c855794ed0d468d6abbec6e6dcd44a", size = 1986109, upload_time = "2025-04-02T09:47:56.532Z" }, + { url = "https://files.pythonhosted.org/packages/90/72/ab58e43ce7e900b88cb571ed057b2fcd0e95b708a2e0bed475b10130393e/pydantic_core-2.33.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6dd8ecfde08d8bfadaea669e83c63939af76f4cf5538a72597016edfa3fad516", size = 2129159, upload_time = "2025-04-02T09:47:58.088Z" }, + { url = "https://files.pythonhosted.org/packages/dc/3f/52d85781406886c6870ac995ec0ba7ccc028b530b0798c9080531b409fdb/pydantic_core-2.33.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2f593494876eae852dc98c43c6f260f45abdbfeec9e4324e31a481d948214764", size = 2680222, upload_time = "2025-04-02T09:47:59.591Z" }, + { url = "https://files.pythonhosted.org/packages/f4/56/6e2ef42f363a0eec0fd92f74a91e0ac48cd2e49b695aac1509ad81eee86a/pydantic_core-2.33.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:948b73114f47fd7016088e5186d13faf5e1b2fe83f5e320e371f035557fd264d", size = 2006980, upload_time = "2025-04-02T09:48:01.397Z" }, + { url = "https://files.pythonhosted.org/packages/4c/c0/604536c4379cc78359f9ee0aa319f4aedf6b652ec2854953f5a14fc38c5a/pydantic_core-2.33.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e11f3864eb516af21b01e25fac915a82e9ddad3bb0fb9e95a246067398b435a4", size = 2120840, upload_time = "2025-04-02T09:48:03.056Z" }, + { url = "https://files.pythonhosted.org/packages/1f/46/9eb764814f508f0edfb291a0f75d10854d78113fa13900ce13729aaec3ae/pydantic_core-2.33.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:549150be302428b56fdad0c23c2741dcdb5572413776826c965619a25d9c6bde", size = 2072518, upload_time = "2025-04-02T09:48:04.662Z" }, + { url = "https://files.pythonhosted.org/packages/42/e3/fb6b2a732b82d1666fa6bf53e3627867ea3131c5f39f98ce92141e3e3dc1/pydantic_core-2.33.1-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:495bc156026efafd9ef2d82372bd38afce78ddd82bf28ef5276c469e57c0c83e", size = 2248025, upload_time = "2025-04-02T09:48:06.226Z" }, + { url = "https://files.pythonhosted.org/packages/5c/9d/fbe8fe9d1aa4dac88723f10a921bc7418bd3378a567cb5e21193a3c48b43/pydantic_core-2.33.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:ec79de2a8680b1a67a07490bddf9636d5c2fab609ba8c57597e855fa5fa4dacd", size = 2254991, upload_time = "2025-04-02T09:48:08.114Z" }, + { url = "https://files.pythonhosted.org/packages/aa/99/07e2237b8a66438d9b26482332cda99a9acccb58d284af7bc7c946a42fd3/pydantic_core-2.33.1-cp313-cp313-win32.whl", hash = "sha256:ee12a7be1742f81b8a65b36c6921022301d466b82d80315d215c4c691724986f", size = 1915262, upload_time = "2025-04-02T09:48:09.708Z" }, + { url = "https://files.pythonhosted.org/packages/8a/f4/e457a7849beeed1e5defbcf5051c6f7b3c91a0624dd31543a64fc9adcf52/pydantic_core-2.33.1-cp313-cp313-win_amd64.whl", hash = "sha256:ede9b407e39949d2afc46385ce6bd6e11588660c26f80576c11c958e6647bc40", size = 1956626, upload_time = "2025-04-02T09:48:11.288Z" }, + { url = "https://files.pythonhosted.org/packages/20/d0/e8d567a7cff7b04e017ae164d98011f1e1894269fe8e90ea187a3cbfb562/pydantic_core-2.33.1-cp313-cp313-win_arm64.whl", hash = "sha256:aa687a23d4b7871a00e03ca96a09cad0f28f443690d300500603bd0adba4b523", size = 1909590, upload_time = "2025-04-02T09:48:12.861Z" }, + { url = "https://files.pythonhosted.org/packages/ef/fd/24ea4302d7a527d672c5be06e17df16aabfb4e9fdc6e0b345c21580f3d2a/pydantic_core-2.33.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:401d7b76e1000d0dd5538e6381d28febdcacb097c8d340dde7d7fc6e13e9f95d", size = 1812963, upload_time = "2025-04-02T09:48:14.553Z" }, + { url = "https://files.pythonhosted.org/packages/5f/95/4fbc2ecdeb5c1c53f1175a32d870250194eb2fdf6291b795ab08c8646d5d/pydantic_core-2.33.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7aeb055a42d734c0255c9e489ac67e75397d59c6fbe60d155851e9782f276a9c", size = 1986896, upload_time = "2025-04-02T09:48:16.222Z" }, + { url = "https://files.pythonhosted.org/packages/71/ae/fe31e7f4a62431222d8f65a3bd02e3fa7e6026d154a00818e6d30520ea77/pydantic_core-2.33.1-cp313-cp313t-win_amd64.whl", hash = "sha256:338ea9b73e6e109f15ab439e62cb3b78aa752c7fd9536794112e14bee02c8d18", size = 1931810, upload_time = "2025-04-02T09:48:17.97Z" }, + { url = "https://files.pythonhosted.org/packages/0b/76/1794e440c1801ed35415238d2c728f26cd12695df9057154ad768b7b991c/pydantic_core-2.33.1-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:3a371dc00282c4b84246509a5ddc808e61b9864aa1eae9ecc92bb1268b82db4a", size = 2042858, upload_time = "2025-04-02T09:49:03.419Z" }, + { url = "https://files.pythonhosted.org/packages/73/b4/9cd7b081fb0b1b4f8150507cd59d27b275c3e22ad60b35cb19ea0977d9b9/pydantic_core-2.33.1-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:f59295ecc75a1788af8ba92f2e8c6eeaa5a94c22fc4d151e8d9638814f85c8fc", size = 1873745, upload_time = "2025-04-02T09:49:05.391Z" }, + { url = "https://files.pythonhosted.org/packages/e1/d7/9ddb7575d4321e40d0363903c2576c8c0c3280ebea137777e5ab58d723e3/pydantic_core-2.33.1-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:08530b8ac922003033f399128505f513e30ca770527cc8bbacf75a84fcc2c74b", size = 1904188, upload_time = "2025-04-02T09:49:07.352Z" }, + { url = "https://files.pythonhosted.org/packages/d1/a8/3194ccfe461bb08da19377ebec8cb4f13c9bd82e13baebc53c5c7c39a029/pydantic_core-2.33.1-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bae370459da6a5466978c0eacf90690cb57ec9d533f8e63e564ef3822bfa04fe", size = 2083479, upload_time = "2025-04-02T09:49:09.304Z" }, + { url = "https://files.pythonhosted.org/packages/42/c7/84cb569555d7179ca0b3f838cef08f66f7089b54432f5b8599aac6e9533e/pydantic_core-2.33.1-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e3de2777e3b9f4d603112f78006f4ae0acb936e95f06da6cb1a45fbad6bdb4b5", size = 2118415, upload_time = "2025-04-02T09:49:11.25Z" }, + { url = "https://files.pythonhosted.org/packages/3b/67/72abb8c73e0837716afbb58a59cc9e3ae43d1aa8677f3b4bc72c16142716/pydantic_core-2.33.1-pp311-pypy311_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:3a64e81e8cba118e108d7126362ea30e021291b7805d47e4896e52c791be2761", size = 2079623, upload_time = "2025-04-02T09:49:13.292Z" }, + { url = "https://files.pythonhosted.org/packages/0b/cd/c59707e35a47ba4cbbf153c3f7c56420c58653b5801b055dc52cccc8e2dc/pydantic_core-2.33.1-pp311-pypy311_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:52928d8c1b6bda03cc6d811e8923dffc87a2d3c8b3bfd2ce16471c7147a24850", size = 2250175, upload_time = "2025-04-02T09:49:15.597Z" }, + { url = "https://files.pythonhosted.org/packages/84/32/e4325a6676b0bed32d5b084566ec86ed7fd1e9bcbfc49c578b1755bde920/pydantic_core-2.33.1-pp311-pypy311_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:1b30d92c9412beb5ac6b10a3eb7ef92ccb14e3f2a8d7732e2d739f58b3aa7544", size = 2254674, upload_time = "2025-04-02T09:49:17.61Z" }, + { url = "https://files.pythonhosted.org/packages/12/6f/5596dc418f2e292ffc661d21931ab34591952e2843e7168ea5a52591f6ff/pydantic_core-2.33.1-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:f995719707e0e29f0f41a8aa3bcea6e761a36c9136104d3189eafb83f5cec5e5", size = 2080951, upload_time = "2025-04-02T09:49:19.559Z" }, +] + +[[package]] +name = "pydantic-settings" +version = "2.9.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pydantic" }, + { name = "python-dotenv" }, + { name = "typing-inspection" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/67/1d/42628a2c33e93f8e9acbde0d5d735fa0850f3e6a2f8cb1eb6c40b9a732ac/pydantic_settings-2.9.1.tar.gz", hash = "sha256:c509bf79d27563add44e8446233359004ed85066cd096d8b510f715e6ef5d268", size = 163234, upload_time = "2025-04-18T16:44:48.265Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b6/5f/d6d641b490fd3ec2c4c13b4244d68deea3a1b970a97be64f34fb5504ff72/pydantic_settings-2.9.1-py3-none-any.whl", hash = "sha256:59b4f431b1defb26fe620c71a7d3968a710d719f5f4cdbbdb7926edeb770f6ef", size = 44356, upload_time = "2025-04-18T16:44:46.617Z" }, +] + +[[package]] +name = "pygments" +version = "2.19.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7c/2d/c3338d48ea6cc0feb8446d8e6937e1408088a72a39937982cc6111d17f84/pygments-2.19.1.tar.gz", hash = "sha256:61c16d2a8576dc0649d9f39e089b5f02bcd27fba10d8fb4dcc28173f7a45151f", size = 4968581, upload_time = "2025-01-06T17:26:30.443Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8a/0b/9fcc47d19c48b59121088dd6da2488a49d5f72dacf8262e2790a1d2c7d15/pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c", size = 1225293, upload_time = "2025-01-06T17:26:25.553Z" }, +] + +[[package]] +name = "python-docx" +version = "1.1.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "lxml" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/35/e4/386c514c53684772885009c12b67a7edd526c15157778ac1b138bc75063e/python_docx-1.1.2.tar.gz", hash = "sha256:0cf1f22e95b9002addca7948e16f2cd7acdfd498047f1941ca5d293db7762efd", size = 5656581, upload_time = "2024-05-01T19:41:57.772Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3e/3d/330d9efbdb816d3f60bf2ad92f05e1708e4a1b9abe80461ac3444c83f749/python_docx-1.1.2-py3-none-any.whl", hash = "sha256:08c20d6058916fb19853fcf080f7f42b6270d89eac9fa5f8c15f691c0017fabe", size = 244315, upload_time = "2024-05-01T19:41:47.006Z" }, +] + +[[package]] +name = "python-dotenv" +version = "1.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/88/2c/7bb1416c5620485aa793f2de31d3df393d3686aa8a8506d11e10e13c5baf/python_dotenv-1.1.0.tar.gz", hash = "sha256:41f90bc6f5f177fb41f53e87666db362025010eb28f60a01c9143bfa33a2b2d5", size = 39920, upload_time = "2025-03-25T10:14:56.835Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1e/18/98a99ad95133c6a6e2005fe89faedf294a748bd5dc803008059409ac9b1e/python_dotenv-1.1.0-py3-none-any.whl", hash = "sha256:d7c01d9e2293916c18baf562d95698754b0dbbb5e74d457c45d4f6561fb9d55d", size = 20256, upload_time = "2025-03-25T10:14:55.034Z" }, +] + +[[package]] +name = "pywin32" +version = "310" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f7/b1/68aa2986129fb1011dabbe95f0136f44509afaf072b12b8f815905a39f33/pywin32-310-cp311-cp311-win32.whl", hash = "sha256:1e765f9564e83011a63321bb9d27ec456a0ed90d3732c4b2e312b855365ed8bd", size = 8784284, upload_time = "2025-03-17T00:55:53.124Z" }, + { url = "https://files.pythonhosted.org/packages/b3/bd/d1592635992dd8db5bb8ace0551bc3a769de1ac8850200cfa517e72739fb/pywin32-310-cp311-cp311-win_amd64.whl", hash = "sha256:126298077a9d7c95c53823934f000599f66ec9296b09167810eb24875f32689c", size = 9520748, upload_time = "2025-03-17T00:55:55.203Z" }, + { url = "https://files.pythonhosted.org/packages/90/b1/ac8b1ffce6603849eb45a91cf126c0fa5431f186c2e768bf56889c46f51c/pywin32-310-cp311-cp311-win_arm64.whl", hash = "sha256:19ec5fc9b1d51c4350be7bb00760ffce46e6c95eaf2f0b2f1150657b1a43c582", size = 8455941, upload_time = "2025-03-17T00:55:57.048Z" }, + { url = "https://files.pythonhosted.org/packages/6b/ec/4fdbe47932f671d6e348474ea35ed94227fb5df56a7c30cbbb42cd396ed0/pywin32-310-cp312-cp312-win32.whl", hash = "sha256:8a75a5cc3893e83a108c05d82198880704c44bbaee4d06e442e471d3c9ea4f3d", size = 8796239, upload_time = "2025-03-17T00:55:58.807Z" }, + { url = "https://files.pythonhosted.org/packages/e3/e5/b0627f8bb84e06991bea89ad8153a9e50ace40b2e1195d68e9dff6b03d0f/pywin32-310-cp312-cp312-win_amd64.whl", hash = "sha256:bf5c397c9a9a19a6f62f3fb821fbf36cac08f03770056711f765ec1503972060", size = 9503839, upload_time = "2025-03-17T00:56:00.8Z" }, + { url = "https://files.pythonhosted.org/packages/1f/32/9ccf53748df72301a89713936645a664ec001abd35ecc8578beda593d37d/pywin32-310-cp312-cp312-win_arm64.whl", hash = "sha256:2349cc906eae872d0663d4d6290d13b90621eaf78964bb1578632ff20e152966", size = 8459470, upload_time = "2025-03-17T00:56:02.601Z" }, + { url = "https://files.pythonhosted.org/packages/1c/09/9c1b978ffc4ae53999e89c19c77ba882d9fce476729f23ef55211ea1c034/pywin32-310-cp313-cp313-win32.whl", hash = "sha256:5d241a659c496ada3253cd01cfaa779b048e90ce4b2b38cd44168ad555ce74ab", size = 8794384, upload_time = "2025-03-17T00:56:04.383Z" }, + { url = "https://files.pythonhosted.org/packages/45/3c/b4640f740ffebadd5d34df35fecba0e1cfef8fde9f3e594df91c28ad9b50/pywin32-310-cp313-cp313-win_amd64.whl", hash = "sha256:667827eb3a90208ddbdcc9e860c81bde63a135710e21e4cb3348968e4bd5249e", size = 9503039, upload_time = "2025-03-17T00:56:06.207Z" }, + { url = "https://files.pythonhosted.org/packages/b4/f4/f785020090fb050e7fb6d34b780f2231f302609dc964672f72bfaeb59a28/pywin32-310-cp313-cp313-win_arm64.whl", hash = "sha256:e308f831de771482b7cf692a1f308f8fca701b2d8f9dde6cc440c7da17e47b33", size = 8458152, upload_time = "2025-03-17T00:56:07.819Z" }, +] + +[[package]] +name = "rich" +version = "14.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markdown-it-py" }, + { name = "pygments" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a1/53/830aa4c3066a8ab0ae9a9955976fb770fe9c6102117c8ec4ab3ea62d89e8/rich-14.0.0.tar.gz", hash = "sha256:82f1bc23a6a21ebca4ae0c45af9bdbc492ed20231dcb63f297d6d1021a9d5725", size = 224078, upload_time = "2025-03-30T14:15:14.23Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0d/9b/63f4c7ebc259242c89b3acafdb37b41d1185c07ff0011164674e9076b491/rich-14.0.0-py3-none-any.whl", hash = "sha256:1c9491e1951aac09caffd42f448ee3d04e58923ffe14993f6e83068dc395d7e0", size = 243229, upload_time = "2025-03-30T14:15:12.283Z" }, +] + +[[package]] +name = "shellingham" +version = "1.5.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/58/15/8b3609fd3830ef7b27b655beb4b4e9c62313a4e8da8c676e142cc210d58e/shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de", size = 10310, upload_time = "2023-10-24T04:13:40.426Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755, upload_time = "2023-10-24T04:13:38.866Z" }, +] + +[[package]] +name = "sniffio" +version = "1.3.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a2/87/a6771e1546d97e7e041b6ae58d80074f81b7d5121207425c964ddf5cfdbd/sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc", size = 20372, upload_time = "2024-02-25T23:20:04.057Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235, upload_time = "2024-02-25T23:20:01.196Z" }, +] + +[[package]] +name = "sse-starlette" +version = "2.3.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "starlette" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/86/35/7d8d94eb0474352d55f60f80ebc30f7e59441a29e18886a6425f0bccd0d3/sse_starlette-2.3.3.tar.gz", hash = "sha256:fdd47c254aad42907cfd5c5b83e2282be15be6c51197bf1a9b70b8e990522072", size = 17499, upload_time = "2025-04-23T19:28:25.558Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5d/20/52fdb5ebb158294b0adb5662235dd396fc7e47aa31c293978d8d8942095a/sse_starlette-2.3.3-py3-none-any.whl", hash = "sha256:8b0a0ced04a329ff7341b01007580dd8cf71331cc21c0ccea677d500618da1e0", size = 10235, upload_time = "2025-04-23T19:28:24.115Z" }, +] + +[[package]] +name = "starlette" +version = "0.46.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ce/20/08dfcd9c983f6a6f4a1000d934b9e6d626cff8d2eeb77a89a68eef20a2b7/starlette-0.46.2.tar.gz", hash = "sha256:7f7361f34eed179294600af672f565727419830b54b7b084efe44bb82d2fccd5", size = 2580846, upload_time = "2025-04-13T13:56:17.942Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8b/0c/9d30a4ebeb6db2b25a841afbb80f6ef9a854fc3b41be131d249a977b4959/starlette-0.46.2-py3-none-any.whl", hash = "sha256:595633ce89f8ffa71a015caed34a5b2dc1c0cdb3f0f1fbd1e69339cf2abeec35", size = 72037, upload_time = "2025-04-13T13:56:16.21Z" }, +] + +[[package]] +name = "tqdm" +version = "4.67.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a8/4b/29b4ef32e036bb34e4ab51796dd745cdba7ed47ad142a9f4a1eb8e0c744d/tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2", size = 169737, upload_time = "2024-11-24T20:12:22.481Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d0/30/dc54f88dd4a2b5dc8a0279bdd7270e735851848b762aeb1c1184ed1f6b14/tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2", size = 78540, upload_time = "2024-11-24T20:12:19.698Z" }, +] + +[[package]] +name = "typer" +version = "0.15.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, + { name = "rich" }, + { name = "shellingham" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/8b/6f/3991f0f1c7fcb2df31aef28e0594d8d54b05393a0e4e34c65e475c2a5d41/typer-0.15.2.tar.gz", hash = "sha256:ab2fab47533a813c49fe1f16b1a370fd5819099c00b119e0633df65f22144ba5", size = 100711, upload_time = "2025-02-27T19:17:34.807Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7f/fc/5b29fea8cee020515ca82cc68e3b8e1e34bb19a3535ad854cac9257b414c/typer-0.15.2-py3-none-any.whl", hash = "sha256:46a499c6107d645a9c13f7ee46c5d5096cae6f5fc57dd11eccbbb9ae3e44ddfc", size = 45061, upload_time = "2025-02-27T19:17:32.111Z" }, +] + +[[package]] +name = "typing-extensions" +version = "4.13.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f6/37/23083fcd6e35492953e8d2aaaa68b860eb422b34627b13f2ce3eb6106061/typing_extensions-4.13.2.tar.gz", hash = "sha256:e6c81219bd689f51865d9e372991c540bda33a0379d5573cddb9a3a23f7caaef", size = 106967, upload_time = "2025-04-10T14:19:05.416Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8b/54/b1ae86c0973cc6f0210b53d508ca3641fb6d0c56823f288d108bc7ab3cc8/typing_extensions-4.13.2-py3-none-any.whl", hash = "sha256:a439e7c04b49fec3e5d3e2beaa21755cadbbdc391694e28ccdd36ca4a1408f8c", size = 45806, upload_time = "2025-04-10T14:19:03.967Z" }, +] + +[[package]] +name = "typing-inspection" +version = "0.4.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/82/5c/e6082df02e215b846b4b8c0b887a64d7d08ffaba30605502639d44c06b82/typing_inspection-0.4.0.tar.gz", hash = "sha256:9765c87de36671694a67904bf2c96e395be9c6439bb6c87b5142569dcdd65122", size = 76222, upload_time = "2025-02-25T17:27:59.638Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/31/08/aa4fdfb71f7de5176385bd9e90852eaf6b5d622735020ad600f2bab54385/typing_inspection-0.4.0-py3-none-any.whl", hash = "sha256:50e72559fcd2a6367a19f7a7e610e6afcb9fac940c650290eed893d61386832f", size = 14125, upload_time = "2025-02-25T17:27:57.754Z" }, +] + +[[package]] +name = "uvicorn" +version = "0.34.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, + { name = "h11" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a6/ae/9bbb19b9e1c450cf9ecaef06463e40234d98d95bf572fab11b4f19ae5ded/uvicorn-0.34.2.tar.gz", hash = "sha256:0e929828f6186353a80b58ea719861d2629d766293b6d19baf086ba31d4f3328", size = 76815, upload_time = "2025-04-19T06:02:50.101Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b1/4b/4cef6ce21a2aaca9d852a6e84ef4f135d99fcd74fa75105e2fc0c8308acd/uvicorn-0.34.2-py3-none-any.whl", hash = "sha256:deb49af569084536d269fe0a6d67e3754f104cf03aba7c11c40f01aadf33c403", size = 62483, upload_time = "2025-04-19T06:02:48.42Z" }, +] diff --git a/backend/office_word_mcp/word_document_server/__init__.py b/backend/office_word_mcp/word_document_server/__init__.py new file mode 100644 index 0000000..85d71f3 --- /dev/null +++ b/backend/office_word_mcp/word_document_server/__init__.py @@ -0,0 +1,15 @@ +""" +Word Document Server - MCP server for Microsoft Word document manipulation. + +This package provides tools for creating, reading, and manipulating Microsoft Word +documents through the Model Context Protocol (MCP). + +Features: +- Document creation and management +- Content addition (headings, paragraphs, tables, images) +- Text and table formatting +- Document protection (password, restricted editing, signatures) +- Footnote and endnote management +""" + +__version__ = "1.0.0" diff --git a/backend/office_word_mcp/word_document_server/core/__init__.py b/backend/office_word_mcp/word_document_server/core/__init__.py new file mode 100644 index 0000000..a0aa41d --- /dev/null +++ b/backend/office_word_mcp/word_document_server/core/__init__.py @@ -0,0 +1,10 @@ +""" +Core functionality for the Word Document Server. + +This package contains the core functionality modules used by the Word Document Server. +""" + +from word_document_server.core.styles import ensure_heading_style, ensure_table_style, create_style +from word_document_server.core.protection import add_protection_info, verify_document_protection, is_section_editable, create_signature_info, verify_signature +from word_document_server.core.footnotes import add_footnote, add_endnote, convert_footnotes_to_endnotes, find_footnote_references, get_format_symbols, customize_footnote_formatting +from word_document_server.core.tables import set_cell_border, apply_table_style, copy_table diff --git a/backend/office_word_mcp/word_document_server/core/comments.py b/backend/office_word_mcp/word_document_server/core/comments.py new file mode 100644 index 0000000..9695c8b --- /dev/null +++ b/backend/office_word_mcp/word_document_server/core/comments.py @@ -0,0 +1,210 @@ +""" +Core comment extraction functionality for Word documents. + +This module provides low-level functions to extract and process comments +from Word documents using the python-docx library. +""" +import datetime +from typing import Dict, List, Optional, Any +from docx import Document +from docx.document import Document as DocumentType +from docx.text.paragraph import Paragraph + + +def extract_all_comments(doc: DocumentType) -> List[Dict[str, Any]]: + """ + Extract all comments from a Word document. + + Args: + doc: The Document object to extract comments from + + Returns: + List of dictionaries containing comment information + """ + comments = [] + + # Access the document's comment part if it exists + try: + # Get the document part + document_part = doc.part + + # Find comments part through relationships + comments_part = None + for rel_id, rel in document_part.rels.items(): + if 'comments' in rel.reltype and 'comments' == rel.reltype.split('/')[-1]: + comments_part = rel.target_part + break + + if comments_part: + # Extract comments from the comments part using proper xpath syntax + comment_elements = comments_part.element.xpath('.//w:comment') + + for idx, comment_element in enumerate(comment_elements): + comment_data = extract_comment_data(comment_element, idx) + if comment_data: + comments.append(comment_data) + + # If no comments found, try alternative approach + if not comments: + # Fallback: scan paragraphs for comment references + comments = extract_comments_from_paragraphs(doc) + + except Exception as e: + # If direct access fails, try alternative approach + comments = extract_comments_from_paragraphs(doc) + + return comments + + +def extract_comments_from_paragraphs(doc: DocumentType) -> List[Dict[str, Any]]: + """ + Extract comments by scanning paragraphs for comment references. + + Args: + doc: The Document object + + Returns: + List of comment dictionaries + """ + comments = [] + comment_id = 1 + + # Check all paragraphs in the document + for para_idx, paragraph in enumerate(doc.paragraphs): + para_comments = find_paragraph_comments(paragraph, para_idx, comment_id) + comments.extend(para_comments) + comment_id += len(para_comments) + + # Check paragraphs in tables + for table in doc.tables: + for row in table.rows: + for cell in row.cells: + for para_idx, paragraph in enumerate(cell.paragraphs): + para_comments = find_paragraph_comments(paragraph, para_idx, comment_id, in_table=True) + comments.extend(para_comments) + comment_id += len(para_comments) + + return comments + + +def extract_comment_data(comment_element, index: int) -> Optional[Dict[str, Any]]: + """ + Extract data from a comment XML element. + + Args: + comment_element: The XML comment element + index: Index for generating a unique ID + + Returns: + Dictionary with comment data or None + """ + try: + # Extract comment attributes + comment_id = comment_element.get('{http://schemas.openxmlformats.org/wordprocessingml/2006/main}id', str(index)) + author = comment_element.get('{http://schemas.openxmlformats.org/wordprocessingml/2006/main}author', 'Unknown') + initials = comment_element.get('{http://schemas.openxmlformats.org/wordprocessingml/2006/main}initials', '') + date_str = comment_element.get('{http://schemas.openxmlformats.org/wordprocessingml/2006/main}date', '') + + # Parse date if available + date = None + if date_str: + try: + date = datetime.datetime.fromisoformat(date_str.replace('Z', '+00:00')) + date = date.isoformat() + except: + date = date_str + + # Extract comment text + text_elements = comment_element.xpath('.//w:t') + text = ''.join(elem.text or '' for elem in text_elements) + + return { + 'id': f'comment_{index + 1}', + 'comment_id': comment_id, + 'author': author, + 'initials': initials, + 'date': date, + 'text': text.strip(), + 'paragraph_index': None, # Will be filled if we can determine it + 'in_table': False, + 'reference_text': '' + } + + except Exception as e: + return None + + +def find_paragraph_comments(paragraph: Paragraph, para_index: int, + start_id: int, in_table: bool = False) -> List[Dict[str, Any]]: + """ + Find comments associated with a specific paragraph. + + Args: + paragraph: The paragraph to check + para_index: The index of the paragraph + start_id: Starting ID for comments + in_table: Whether the paragraph is in a table + + Returns: + List of comment dictionaries + """ + comments = [] + + try: + # Access the paragraph's XML element + para_xml = paragraph._element + + # Look for comment range markers (simplified approach) + # This is a basic implementation - the full version would need more sophisticated XML parsing + xml_text = str(para_xml) + + # Simple check for comment references in the XML + if 'commentRangeStart' in xml_text or 'commentReference' in xml_text: + # Create a placeholder comment entry + comment_info = { + 'id': f'comment_{start_id}', + 'comment_id': f'{start_id}', + 'author': 'Unknown', + 'initials': '', + 'date': None, + 'text': 'Comment detected but content not accessible', + 'paragraph_index': para_index, + 'in_table': in_table, + 'reference_text': paragraph.text[:50] + '...' if len(paragraph.text) > 50 else paragraph.text + } + comments.append(comment_info) + + except Exception: + # If we can't access the XML, skip this paragraph + pass + + return comments + + +def filter_comments_by_author(comments: List[Dict[str, Any]], author: str) -> List[Dict[str, Any]]: + """ + Filter comments by author name. + + Args: + comments: List of comment dictionaries + author: Author name to filter by (case-insensitive) + + Returns: + Filtered list of comments + """ + author_lower = author.lower() + return [c for c in comments if c.get('author', '').lower() == author_lower] + + +def get_comments_for_paragraph(comments: List[Dict[str, Any]], paragraph_index: int) -> List[Dict[str, Any]]: + """ + Get all comments for a specific paragraph. + + Args: + comments: List of all comments + paragraph_index: Index of the paragraph + + Returns: + Comments for the specified paragraph + """ + return [c for c in comments if c.get('paragraph_index') == paragraph_index] \ No newline at end of file diff --git a/backend/office_word_mcp/word_document_server/core/footnotes.py b/backend/office_word_mcp/word_document_server/core/footnotes.py new file mode 100644 index 0000000..f5147e0 --- /dev/null +++ b/backend/office_word_mcp/word_document_server/core/footnotes.py @@ -0,0 +1,842 @@ +""" +Consolidated footnote functionality for Word documents. +This module combines all footnote implementations with proper namespace handling and Word compliance. +""" + +import os +import zipfile +import tempfile +from typing import Optional, Tuple, Dict, Any, List +from lxml import etree +from docx import Document +from docx.oxml.ns import qn + +# Namespace definitions +W_NS = 'http://schemas.openxmlformats.org/wordprocessingml/2006/main' +R_NS = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' +CT_NS = 'http://schemas.openxmlformats.org/package/2006/content-types' +REL_NS = 'http://schemas.openxmlformats.org/package/2006/relationships' + +# Constants +RESERVED_FOOTNOTE_IDS = {-1, 0, 1} # Reserved for separators and Word internals +MIN_FOOTNOTE_ID = -2147483648 +MAX_FOOTNOTE_ID = 32767 +MAX_RELATIONSHIP_ID_LENGTH = 255 +FOOTNOTE_REF_STYLE_INDEX = 38 +FOOTNOTE_TEXT_STYLE_INDEX = 29 + + +# ============================================================================ +# BASIC UTILITIES (from footnotes.py) +# ============================================================================ + +def find_footnote_references(doc): + """Find all footnote references in the document.""" + footnote_refs = [] + for para_idx, para in enumerate(doc.paragraphs): + for run_idx, run in enumerate(para.runs): + # Check if this run has superscript formatting + if run.font.superscript: + # Check if it's likely a footnote reference + if run.text.isdigit() or run.text in "¹²³⁴⁵⁶⁷⁸⁹⁰†‡§¶": + footnote_refs.append({ + 'paragraph_index': para_idx, + 'run_index': run_idx, + 'text': run.text, + 'paragraph': para, + 'run': run + }) + return footnote_refs + + +def get_format_symbols(format_type: str, count: int) -> list: + """Generate format symbols for footnote numbering.""" + symbols = [] + + if format_type == "1, 2, 3": + symbols = [str(i) for i in range(1, count + 1)] + elif format_type == "i, ii, iii": + # Roman numerals + roman_map = [(10, 'x'), (9, 'ix'), (5, 'v'), (4, 'iv'), (1, 'i')] + for i in range(1, count + 1): + result = '' + num = i + for value, numeral in roman_map: + count_sym, num = divmod(num, value) + result += numeral * count_sym + symbols.append(result) + elif format_type == "a, b, c": + # Alphabetic + for i in range(1, count + 1): + if i <= 26: + symbols.append(chr(96 + i)) + else: + # For numbers > 26, use aa, ab, etc. + first = (i - 1) // 26 + second = (i - 1) % 26 + 1 + symbols.append(chr(96 + first) + chr(96 + second)) + elif format_type == "*, †, ‡": + # Special symbols + special = ['*', '†', '‡', '§', '¶', '#'] + for i in range(1, count + 1): + if i <= len(special): + symbols.append(special[i - 1]) + else: + # Repeat symbols with numbers + symbols.append(special[(i - 1) % len(special)] + str((i - 1) // len(special) + 1)) + else: + # Default to numeric + symbols = [str(i) for i in range(1, count + 1)] + + return symbols + + +def customize_footnote_formatting(doc, footnote_refs, format_symbols, start_number, footnote_style): + """Apply custom formatting to footnotes.""" + count = 0 + for i, ref in enumerate(footnote_refs): + if i < len(format_symbols): + # Update the footnote reference text + ref['run'].text = format_symbols[i] + ref['run'].font.superscript = True + + # Apply style if available + if footnote_style: + try: + ref['paragraph'].style = footnote_style + except: + pass + count += 1 + return count + + +# ============================================================================ +# ROBUST IMPLEMENTATION (consolidated from footnotes_robust.py) +# ============================================================================ + +def _get_safe_footnote_id(footnotes_root) -> int: + """Get a safe footnote ID avoiding conflicts and reserved values.""" + nsmap = {'w': W_NS} + existing_footnotes = footnotes_root.xpath('//w:footnote', namespaces=nsmap) + + used_ids = set() + for fn in existing_footnotes: + fn_id = fn.get(f'{{{W_NS}}}id') + if fn_id: + try: + used_ids.add(int(fn_id)) + except ValueError: + pass + + # Start from 2 to avoid reserved IDs + candidate_id = 2 + while candidate_id in used_ids or candidate_id in RESERVED_FOOTNOTE_IDS: + candidate_id += 1 + if candidate_id > MAX_FOOTNOTE_ID: + raise ValueError("No available footnote IDs") + + return candidate_id + + +def _ensure_content_types(content_types_xml: bytes) -> bytes: + """Ensure content types with proper namespace handling.""" + ct_tree = etree.fromstring(content_types_xml) + + # Content Types uses default namespace - must use namespace-aware XPath + nsmap = {'ct': CT_NS} + + # Check for existing override with proper namespace + existing_overrides = ct_tree.xpath( + "//ct:Override[@PartName='/word/footnotes.xml']", + namespaces=nsmap + ) + + if existing_overrides: + return content_types_xml # Already exists + + # Add override with proper namespace + override = etree.Element(f'{{{CT_NS}}}Override', + PartName='/word/footnotes.xml', + ContentType='application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml' + ) + ct_tree.append(override) + + return etree.tostring(ct_tree, encoding='UTF-8', xml_declaration=True, standalone="yes") + + +def _ensure_document_rels(document_rels_xml: bytes) -> bytes: + """Ensure document relationships with proper namespace handling.""" + rels_tree = etree.fromstring(document_rels_xml) + + # Relationships uses default namespace - must use namespace-aware XPath + nsmap = {'r': REL_NS} + + # Check for existing footnotes relationship with proper namespace + existing_footnote_rels = rels_tree.xpath( + "//r:Relationship[contains(@Type, 'footnotes')]", + namespaces=nsmap + ) + + if existing_footnote_rels: + return document_rels_xml # Already exists + + # Generate unique rId using namespace-aware XPath + all_rels = rels_tree.xpath('//r:Relationship', namespaces=nsmap) + existing_ids = {rel.get('Id') for rel in all_rels if rel.get('Id')} + rid_num = 1 + while f'rId{rid_num}' in existing_ids: + rid_num += 1 + + # Validate ID length + new_rid = f'rId{rid_num}' + if len(new_rid) > MAX_RELATIONSHIP_ID_LENGTH: + raise ValueError(f"Relationship ID too long: {new_rid}") + + # Create relationship with proper namespace + rel = etree.Element(f'{{{REL_NS}}}Relationship', + Id=new_rid, + Type='http://schemas.openxmlformats.org/officeDocument/2006/relationships/footnotes', + Target='footnotes.xml' + ) + rels_tree.append(rel) + + return etree.tostring(rels_tree, encoding='UTF-8', xml_declaration=True, standalone="yes") + + +def _create_minimal_footnotes_xml() -> bytes: + """Create minimal footnotes.xml with separators.""" + xml = f''' + + + + + + + + + + + + + + + + + + + + + +''' + return xml.encode('utf-8') + + +def _ensure_footnote_styles(styles_root): + """Ensure both FootnoteReference and FootnoteText styles exist.""" + nsmap = {'w': W_NS} + + # Check for FootnoteReference style + ref_style = styles_root.xpath('//w:style[@w:styleId="FootnoteReference"]', namespaces=nsmap) + if not ref_style: + # Create FootnoteReference character style + style = etree.Element(f'{{{W_NS}}}style', + attrib={ + f'{{{W_NS}}}type': 'character', + f'{{{W_NS}}}styleId': 'FootnoteReference' + } + ) + name = etree.SubElement(style, f'{{{W_NS}}}name') + name.set(f'{{{W_NS}}}val', 'footnote reference') + + base = etree.SubElement(style, f'{{{W_NS}}}basedOn') + base.set(f'{{{W_NS}}}val', 'DefaultParagraphFont') + + rPr = etree.SubElement(style, f'{{{W_NS}}}rPr') + vert_align = etree.SubElement(rPr, f'{{{W_NS}}}vertAlign') + vert_align.set(f'{{{W_NS}}}val', 'superscript') + + styles_root.append(style) + + # Check for FootnoteText style + text_style = styles_root.xpath('//w:style[@w:styleId="FootnoteText"]', namespaces=nsmap) + if not text_style: + # Create FootnoteText paragraph style + style = etree.Element(f'{{{W_NS}}}style', + attrib={ + f'{{{W_NS}}}type': 'paragraph', + f'{{{W_NS}}}styleId': 'FootnoteText' + } + ) + name = etree.SubElement(style, f'{{{W_NS}}}name') + name.set(f'{{{W_NS}}}val', 'footnote text') + + base = etree.SubElement(style, f'{{{W_NS}}}basedOn') + base.set(f'{{{W_NS}}}val', 'Normal') + + pPr = etree.SubElement(style, f'{{{W_NS}}}pPr') + sz = etree.SubElement(pPr, f'{{{W_NS}}}sz') + sz.set(f'{{{W_NS}}}val', '20') # 10pt + + styles_root.append(style) + + +def add_footnote_robust( + filename: str, + search_text: Optional[str] = None, + paragraph_index: Optional[int] = None, + footnote_text: str = "", + output_filename: Optional[str] = None, + position: str = "after", + validate_location: bool = True, + auto_repair: bool = False +) -> Tuple[bool, str, Optional[Dict[str, Any]]]: + """ + Add a footnote with robust validation and error handling. + + This is the main production-ready function with all fixes applied. + """ + + # Validate inputs + if not search_text and paragraph_index is None: + return False, "Must provide either search_text or paragraph_index", None + + if search_text and paragraph_index is not None: + return False, "Cannot provide both search_text and paragraph_index", None + + if not os.path.exists(filename): + return False, f"File not found: {filename}", None + + # Set working file + working_file = output_filename if output_filename else filename + if output_filename and filename != output_filename: + import shutil + shutil.copy2(filename, output_filename) + + try: + # Read document parts + doc_parts = {} + with zipfile.ZipFile(filename, 'r') as zin: + doc_parts['document'] = zin.read('word/document.xml') + doc_parts['content_types'] = zin.read('[Content_Types].xml') + doc_parts['document_rels'] = zin.read('word/_rels/document.xml.rels') + + # Read or create footnotes.xml + if 'word/footnotes.xml' in zin.namelist(): + doc_parts['footnotes'] = zin.read('word/footnotes.xml') + else: + doc_parts['footnotes'] = _create_minimal_footnotes_xml() + + # Read styles + if 'word/styles.xml' in zin.namelist(): + doc_parts['styles'] = zin.read('word/styles.xml') + else: + # Create minimal styles + doc_parts['styles'] = b'' + + # Parse XML documents + doc_root = etree.fromstring(doc_parts['document']) + footnotes_root = etree.fromstring(doc_parts['footnotes']) + styles_root = etree.fromstring(doc_parts['styles']) + + # Find target location + nsmap = {'w': W_NS} + + if search_text: + # Search for text in paragraphs + found = False + for para in doc_root.xpath('//w:p', namespaces=nsmap): + para_text = ''.join(para.xpath('.//w:t/text()', namespaces=nsmap)) + if search_text in para_text: + target_para = para + found = True + break + + if not found: + return False, f"Text '{search_text}' not found in document", None + else: + # Use paragraph index + paragraphs = doc_root.xpath('//w:p', namespaces=nsmap) + if paragraph_index >= len(paragraphs): + return False, f"Paragraph index {paragraph_index} out of range", None + target_para = paragraphs[paragraph_index] + + # Validate location if requested + if validate_location: + # Check if paragraph is in header/footer + parent = target_para.getparent() + while parent is not None: + if parent.tag in [f'{{{W_NS}}}hdr', f'{{{W_NS}}}ftr']: + return False, "Cannot add footnote in header/footer", None + parent = parent.getparent() + + # Get safe footnote ID + footnote_id = _get_safe_footnote_id(footnotes_root) + + # Add footnote reference to document + if position == "after": + # Find last run in paragraph or create one + runs = target_para.xpath('.//w:r', namespaces=nsmap) + if runs: + last_run = runs[-1] + # Insert after last run + insert_pos = target_para.index(last_run) + 1 + else: + insert_pos = len(target_para) + else: # before + # Find first run with text + runs = target_para.xpath('.//w:r[w:t]', namespaces=nsmap) + if runs: + first_run = runs[0] + insert_pos = target_para.index(first_run) + else: + insert_pos = 0 + + # Create footnote reference run + ref_run = etree.Element(f'{{{W_NS}}}r') + + # Add run properties with superscript + rPr = etree.SubElement(ref_run, f'{{{W_NS}}}rPr') + rStyle = etree.SubElement(rPr, f'{{{W_NS}}}rStyle') + rStyle.set(f'{{{W_NS}}}val', 'FootnoteReference') + + # Add footnote reference + fn_ref = etree.SubElement(ref_run, f'{{{W_NS}}}footnoteReference') + fn_ref.set(f'{{{W_NS}}}id', str(footnote_id)) + + # Insert the reference run + target_para.insert(insert_pos, ref_run) + + # Add footnote content + new_footnote = etree.Element(f'{{{W_NS}}}footnote', + attrib={f'{{{W_NS}}}id': str(footnote_id)} + ) + + # Add paragraph to footnote + fn_para = etree.SubElement(new_footnote, f'{{{W_NS}}}p') + + # Add paragraph properties + pPr = etree.SubElement(fn_para, f'{{{W_NS}}}pPr') + pStyle = etree.SubElement(pPr, f'{{{W_NS}}}pStyle') + pStyle.set(f'{{{W_NS}}}val', 'FootnoteText') + + # Add the footnote reference marker + marker_run = etree.SubElement(fn_para, f'{{{W_NS}}}r') + marker_rPr = etree.SubElement(marker_run, f'{{{W_NS}}}rPr') + marker_rStyle = etree.SubElement(marker_rPr, f'{{{W_NS}}}rStyle') + marker_rStyle.set(f'{{{W_NS}}}val', 'FootnoteReference') + marker_ref = etree.SubElement(marker_run, f'{{{W_NS}}}footnoteRef') + + # Add space after marker + space_run = etree.SubElement(fn_para, f'{{{W_NS}}}r') + space_text = etree.SubElement(space_run, f'{{{W_NS}}}t') + space_text.set(f'{{{XML_NS}}}space', 'preserve') + space_text.text = ' ' + + # Add footnote text + text_run = etree.SubElement(fn_para, f'{{{W_NS}}}r') + text_elem = etree.SubElement(text_run, f'{{{W_NS}}}t') + text_elem.text = footnote_text + + # Append footnote to footnotes.xml + footnotes_root.append(new_footnote) + + # Ensure styles exist + _ensure_footnote_styles(styles_root) + + # Ensure coherence + content_types_xml = _ensure_content_types(doc_parts['content_types']) + document_rels_xml = _ensure_document_rels(doc_parts['document_rels']) + + # Write modified document + temp_file = working_file + '.tmp' + with zipfile.ZipFile(temp_file, 'w', zipfile.ZIP_DEFLATED) as zout: + with zipfile.ZipFile(filename, 'r') as zin: + # Copy unchanged files + for item in zin.infolist(): + if item.filename not in [ + 'word/document.xml', 'word/footnotes.xml', 'word/styles.xml', + '[Content_Types].xml', 'word/_rels/document.xml.rels' + ]: + zout.writestr(item, zin.read(item.filename)) + + # Write modified files + zout.writestr('word/document.xml', + etree.tostring(doc_root, encoding='UTF-8', xml_declaration=True, standalone="yes")) + zout.writestr('word/footnotes.xml', + etree.tostring(footnotes_root, encoding='UTF-8', xml_declaration=True, standalone="yes")) + zout.writestr('word/styles.xml', + etree.tostring(styles_root, encoding='UTF-8', xml_declaration=True, standalone="yes")) + zout.writestr('[Content_Types].xml', content_types_xml) + zout.writestr('word/_rels/document.xml.rels', document_rels_xml) + + # Replace original with temp file + os.replace(temp_file, working_file) + + details = { + 'footnote_id': footnote_id, + 'location': 'search_text' if search_text else 'paragraph_index', + 'styles_created': ['FootnoteReference', 'FootnoteText'], + 'coherence_verified': True + } + + return True, f"Successfully added footnote (ID: {footnote_id}) to {working_file}", details + + except Exception as e: + # Clean up temp file if exists + temp_file = working_file + '.tmp' + if os.path.exists(temp_file): + os.remove(temp_file) + return False, f"Error adding footnote: {str(e)}", None + + +def delete_footnote_robust( + filename: str, + footnote_id: Optional[int] = None, + search_text: Optional[str] = None, + output_filename: Optional[str] = None, + clean_orphans: bool = True +) -> Tuple[bool, str, Optional[Dict[str, Any]]]: + """Delete a footnote with comprehensive cleanup.""" + + if not footnote_id and not search_text: + return False, "Must provide either footnote_id or search_text", None + + if not os.path.exists(filename): + return False, f"File not found: {filename}", None + + # Set working file + working_file = output_filename if output_filename else filename + if output_filename and filename != output_filename: + import shutil + shutil.copy2(filename, output_filename) + + try: + # Read document parts + with zipfile.ZipFile(filename, 'r') as zin: + doc_xml = zin.read('word/document.xml') + + if 'word/footnotes.xml' not in zin.namelist(): + return False, "No footnotes in document", None + + footnotes_xml = zin.read('word/footnotes.xml') + + # Parse documents + doc_root = etree.fromstring(doc_xml) + footnotes_root = etree.fromstring(footnotes_xml) + nsmap = {'w': W_NS} + + # Find footnote to delete + if search_text: + # Find footnote reference near text + for para in doc_root.xpath('//w:p', namespaces=nsmap): + para_text = ''.join(para.xpath('.//w:t/text()', namespaces=nsmap)) + if search_text in para_text: + # Look for footnote reference in this paragraph + fn_refs = para.xpath('.//w:footnoteReference', namespaces=nsmap) + if fn_refs: + footnote_id = int(fn_refs[0].get(f'{{{W_NS}}}id')) + break + + if not footnote_id: + return False, f"No footnote found near text '{search_text}'", None + + # Remove footnote reference from document + refs_removed = 0 + for fn_ref in doc_root.xpath(f'//w:footnoteReference[@w:id="{footnote_id}"]', namespaces=nsmap): + # Remove the entire run containing the reference + run = fn_ref.getparent() + if run is not None and run.tag == f'{{{W_NS}}}r': + para = run.getparent() + if para is not None: + para.remove(run) + refs_removed += 1 + + if refs_removed == 0: + return False, f"Footnote {footnote_id} not found", None + + # Remove footnote content + content_removed = 0 + for fn in footnotes_root.xpath(f'//w:footnote[@w:id="{footnote_id}"]', namespaces=nsmap): + footnotes_root.remove(fn) + content_removed += 1 + + # Clean orphans if requested + orphans_removed = [] + if clean_orphans: + # Find all referenced IDs + referenced_ids = set() + for ref in doc_root.xpath('//w:footnoteReference', namespaces=nsmap): + ref_id = ref.get(f'{{{W_NS}}}id') + if ref_id: + referenced_ids.add(ref_id) + + # Remove unreferenced footnotes (except separators) + for fn in footnotes_root.xpath('//w:footnote', namespaces=nsmap): + fn_id = fn.get(f'{{{W_NS}}}id') + if fn_id and fn_id not in referenced_ids and fn_id not in ['-1', '0']: + footnotes_root.remove(fn) + orphans_removed.append(fn_id) + + # Write modified document + temp_file = working_file + '.tmp' + with zipfile.ZipFile(temp_file, 'w', zipfile.ZIP_DEFLATED) as zout: + with zipfile.ZipFile(filename, 'r') as zin: + for item in zin.infolist(): + if item.filename == 'word/document.xml': + zout.writestr(item, + etree.tostring(doc_root, encoding='UTF-8', xml_declaration=True, standalone="yes")) + elif item.filename == 'word/footnotes.xml': + zout.writestr(item, + etree.tostring(footnotes_root, encoding='UTF-8', xml_declaration=True, standalone="yes")) + else: + zout.writestr(item, zin.read(item.filename)) + + os.replace(temp_file, working_file) + + details = { + 'footnote_id': footnote_id, + 'references_removed': refs_removed, + 'content_removed': content_removed, + 'orphans_removed': orphans_removed + } + + message = f"Successfully deleted footnote {footnote_id}" + if orphans_removed: + message += f" and {len(orphans_removed)} orphaned footnotes" + + return True, message, details + + except Exception as e: + return False, f"Error deleting footnote: {str(e)}", None + + +def validate_document_footnotes(filename: str) -> Tuple[bool, str, Dict[str, Any]]: + """Validate all footnotes in a document for coherence and compliance.""" + + if not os.path.exists(filename): + return False, f"File not found: {filename}", {} + + report = { + 'total_references': 0, + 'total_content': 0, + 'id_conflicts': [], + 'orphaned_content': [], + 'missing_references': [], + 'invalid_locations': [], + 'missing_styles': [], + 'coherence_issues': [] + } + + try: + with zipfile.ZipFile(filename, 'r') as zf: + # Check document.xml + doc_xml = zf.read('word/document.xml') + doc_root = etree.fromstring(doc_xml) + nsmap = {'w': W_NS} + + # Get all footnote references + ref_ids = set() + for ref in doc_root.xpath('//w:footnoteReference', namespaces=nsmap): + ref_id = ref.get(f'{{{W_NS}}}id') + if ref_id: + ref_ids.add(ref_id) + report['total_references'] += 1 + + # Check location + parent = ref.getparent() + while parent is not None: + if parent.tag in [f'{{{W_NS}}}hdr', f'{{{W_NS}}}ftr']: + report['invalid_locations'].append(ref_id) + break + parent = parent.getparent() + + # Check footnotes.xml + if 'word/footnotes.xml' in zf.namelist(): + footnotes_xml = zf.read('word/footnotes.xml') + footnotes_root = etree.fromstring(footnotes_xml) + + content_ids = set() + for fn in footnotes_root.xpath('//w:footnote', namespaces=nsmap): + fn_id = fn.get(f'{{{W_NS}}}id') + if fn_id: + content_ids.add(fn_id) + if fn_id not in ['-1', '0']: # Exclude separators + report['total_content'] += 1 + + # Find orphans and missing + report['orphaned_content'] = list(content_ids - ref_ids - {'-1', '0'}) + report['missing_references'] = list(ref_ids - content_ids) + else: + if report['total_references'] > 0: + report['coherence_issues'].append('References exist but no footnotes.xml') + + # Check relationships + if 'word/_rels/document.xml.rels' in zf.namelist(): + rels_xml = zf.read('word/_rels/document.xml.rels') + rels_root = etree.fromstring(rels_xml) + rel_nsmap = {'r': REL_NS} + + fn_rels = rels_root.xpath( + "//r:Relationship[contains(@Type, 'footnotes')]", + namespaces=rel_nsmap + ) + + if report['total_content'] > 0 and len(fn_rels) == 0: + report['coherence_issues'].append('Missing footnotes relationship') + elif len(fn_rels) > 1: + report['coherence_issues'].append(f'Multiple footnote relationships: {len(fn_rels)}') + + # Check content types + if '[Content_Types].xml' in zf.namelist(): + ct_xml = zf.read('[Content_Types].xml') + ct_root = etree.fromstring(ct_xml) + ct_nsmap = {'ct': CT_NS} + + fn_overrides = ct_root.xpath( + "//ct:Override[@PartName='/word/footnotes.xml']", + namespaces=ct_nsmap + ) + + if report['total_content'] > 0 and len(fn_overrides) == 0: + report['coherence_issues'].append('Missing footnotes content type') + elif len(fn_overrides) > 1: + report['coherence_issues'].append(f'Multiple footnote content types: {len(fn_overrides)}') + + # Check styles + if 'word/styles.xml' in zf.namelist(): + styles_xml = zf.read('word/styles.xml') + styles_root = etree.fromstring(styles_xml) + + ref_style = styles_root.xpath('//w:style[@w:styleId="FootnoteReference"]', namespaces=nsmap) + text_style = styles_root.xpath('//w:style[@w:styleId="FootnoteText"]', namespaces=nsmap) + + if not ref_style: + report['missing_styles'].append('FootnoteReference') + if not text_style: + report['missing_styles'].append('FootnoteText') + + # Determine if valid + is_valid = ( + len(report['id_conflicts']) == 0 and + len(report['orphaned_content']) == 0 and + len(report['missing_references']) == 0 and + len(report['invalid_locations']) == 0 and + len(report['coherence_issues']) == 0 + ) + + if is_valid: + message = "Document footnotes are valid" + else: + message = "Document has footnote issues" + + return is_valid, message, report + + except Exception as e: + return False, f"Error validating document: {str(e)}", report + + +# ============================================================================ +# COMPATIBILITY FUNCTIONS (for backward compatibility) +# ============================================================================ + +def add_footnote_at_paragraph_end( + filename: str, + paragraph_index: int, + footnote_text: str, + output_filename: Optional[str] = None +) -> Tuple[bool, str]: + """Add footnote at the end of a specific paragraph (backward compatibility).""" + success, message, _ = add_footnote_robust( + filename=filename, + paragraph_index=paragraph_index, + footnote_text=footnote_text, + output_filename=output_filename, + position="after" + ) + return success, message + + +def add_footnote_with_proper_formatting( + filename: str, + search_text: str, + footnote_text: str, + output_filename: Optional[str] = None, + position: str = "after" +) -> Tuple[bool, str]: + """Add footnote with proper formatting (backward compatibility).""" + success, message, _ = add_footnote_robust( + filename=filename, + search_text=search_text, + footnote_text=footnote_text, + output_filename=output_filename, + position=position + ) + return success, message + + +def delete_footnote( + filename: str, + footnote_id: Optional[int] = None, + search_text: Optional[str] = None, + output_filename: Optional[str] = None +) -> Tuple[bool, str]: + """Delete a footnote (backward compatibility).""" + success, message, _ = delete_footnote_robust( + filename=filename, + footnote_id=footnote_id, + search_text=search_text, + output_filename=output_filename + ) + return success, message + + +# ============================================================================ +# LEGACY FUNCTIONS (for core/__init__.py compatibility) +# ============================================================================ + +def add_footnote(doc, paragraph_index: int, footnote_text: str): + """Legacy function for adding footnotes to python-docx Document objects. + Note: This is a simplified version that doesn't create proper Word footnotes.""" + if paragraph_index >= len(doc.paragraphs): + raise IndexError(f"Paragraph index {paragraph_index} out of range") + + para = doc.paragraphs[paragraph_index] + # Add superscript number + run = para.add_run() + run.text = "¹" + run.font.superscript = True + + # Add footnote text at document end + doc.add_paragraph("_" * 50) + footnote_para = doc.add_paragraph(f"¹ {footnote_text}") + footnote_para.style = "Caption" + + return doc + + +def add_endnote(doc, paragraph_index: int, endnote_text: str): + """Legacy function for adding endnotes.""" + if paragraph_index >= len(doc.paragraphs): + raise IndexError(f"Paragraph index {paragraph_index} out of range") + + para = doc.paragraphs[paragraph_index] + run = para.add_run() + run.text = "†" + run.font.superscript = True + + # Endnotes go at the very end + doc.add_page_break() + doc.add_heading("Endnotes", level=1) + endnote_para = doc.add_paragraph(f"† {endnote_text}") + + return doc + + +def convert_footnotes_to_endnotes(doc): + """Legacy function to convert footnotes to endnotes in a Document object.""" + # This is a placeholder - real conversion requires XML manipulation + return doc + + +# Define XML_NS if needed +XML_NS = 'http://www.w3.org/XML/1998/namespace' \ No newline at end of file diff --git a/backend/office_word_mcp/word_document_server/core/protection.py b/backend/office_word_mcp/word_document_server/core/protection.py new file mode 100644 index 0000000..e706fd7 --- /dev/null +++ b/backend/office_word_mcp/word_document_server/core/protection.py @@ -0,0 +1,242 @@ +""" +Document protection functionality for Word Document Server. +""" +import os +import json +import hashlib +import datetime +from typing import Dict, List, Tuple, Optional, Any + + +def add_protection_info(doc_path: str, protection_type: str, password_hash: str, + sections: Optional[List[str]] = None, + signature_info: Optional[Dict[str, Any]] = None, + raw_password: Optional[str] = None) -> bool: + """ + Add document protection information to a separate metadata file and encrypt the document. + + Args: + doc_path: Path to the document + protection_type: Type of protection ('password', 'restricted', 'signature') + password_hash: Hashed password for security + sections: List of section names that can be edited (for restricted editing) + signature_info: Information about digital signature + raw_password: The actual password for document encryption + + Returns: + True if protection info was successfully added, False otherwise + """ + # Create metadata filename based on document path + base_path, _ = os.path.splitext(doc_path) + metadata_path = f"{base_path}.protection" + + # Prepare protection data + protection_data = { + "type": protection_type, + "password_hash": password_hash, + "applied_date": datetime.datetime.now().isoformat(), + } + + if sections: + protection_data["editable_sections"] = sections + + if signature_info: + protection_data["signature"] = signature_info + + # Write protection info to metadata file + try: + with open(metadata_path, 'w') as f: + json.dump(protection_data, f, indent=2) + + # Apply actual document encryption if raw_password is provided + if protection_type == "password" and raw_password: + import msoffcrypto + import tempfile + import shutil + + # Create a temporary file for the encrypted output + temp_fd, temp_path = tempfile.mkstemp(suffix='.docx') + os.close(temp_fd) + + try: + # Open the document + with open(doc_path, 'rb') as f: + office_file = msoffcrypto.OfficeFile(f) + + # Encrypt with password + office_file.load_key(password=raw_password) + + # Write the encrypted file to the temp path + with open(temp_path, 'wb') as out_file: + office_file.encrypt(out_file) + + # Replace original with encrypted version + shutil.move(temp_path, doc_path) + + # Update metadata to note that true encryption was applied + protection_data["true_encryption"] = True + with open(metadata_path, 'w') as f: + json.dump(protection_data, f, indent=2) + + except Exception as e: + print(f"Encryption error: {str(e)}") + if os.path.exists(temp_path): + os.unlink(temp_path) + return False + + return True + except Exception as e: + print(f"Protection error: {str(e)}") + return False + + +def verify_document_protection(doc_path: str, password: Optional[str] = None) -> Tuple[bool, str]: + """ + Verify if a document is protected and if the password is correct. + + Args: + doc_path: Path to the document + password: Password to verify + + Returns: + Tuple of (is_protected_and_verified, message) + """ + base_path, _ = os.path.splitext(doc_path) + metadata_path = f"{base_path}.protection" + + # Check if protection metadata exists + if not os.path.exists(metadata_path): + return False, "Document is not protected" + + try: + # Read protection data + with open(metadata_path, 'r') as f: + protection_data = json.load(f) + + # If password is provided, verify it + if password: + password_hash = hashlib.sha256(password.encode()).hexdigest() + if password_hash != protection_data.get("password_hash"): + return False, "Incorrect password" + + # Return protection type + protection_type = protection_data.get("type", "unknown") + return True, f"Document is protected with {protection_type} protection" + + except Exception as e: + return False, f"Error verifying protection: {str(e)}" + + +def is_section_editable(doc_path: str, section_name: str) -> bool: + """ + Check if a specific section of a document is editable. + + Args: + doc_path: Path to the document + section_name: Name of the section to check + + Returns: + True if section is editable, False otherwise + """ + base_path, _ = os.path.splitext(doc_path) + metadata_path = f"{base_path}.protection" + + # Check if protection metadata exists + if not os.path.exists(metadata_path): + # If no protection exists, all sections are editable + return True + + try: + # Read protection data + with open(metadata_path, 'r') as f: + protection_data = json.load(f) + + # Check protection type + if protection_data.get("type") != "restricted": + # If not restricted editing, return based on protection type + return protection_data.get("type") != "password" + + # Check if the section is in the list of editable sections + editable_sections = protection_data.get("editable_sections", []) + return section_name in editable_sections + + except Exception: + # In case of error, default to not editable for security + return False + + +def create_signature_info(doc, signer_name: str, reason: Optional[str] = None) -> Dict[str, Any]: + """ + Create signature information for a document. + + Args: + doc: Document object + signer_name: Name of the person signing the document + reason: Optional reason for signing + + Returns: + Dictionary containing signature information + """ + # Create signature info + signature_info = { + "signer": signer_name, + "timestamp": datetime.datetime.now().isoformat(), + } + + if reason: + signature_info["reason"] = reason + + # Generate a simple signature hash based on document content and metadata + text_content = "\n".join([p.text for p in doc.paragraphs]) + content_hash = hashlib.sha256(text_content.encode()).hexdigest() + signature_info["content_hash"] = content_hash + + return signature_info + + +def verify_signature(doc_path: str) -> Tuple[bool, str]: + """ + Verify a document's digital signature. + + Args: + doc_path: Path to the document + + Returns: + Tuple of (is_valid, message) + """ + from docx import Document + + base_path, _ = os.path.splitext(doc_path) + metadata_path = f"{base_path}.protection" + + if not os.path.exists(metadata_path): + return False, "Document is not signed" + + try: + # Read protection data + with open(metadata_path, 'r') as f: + protection_data = json.load(f) + + if protection_data.get("type") != "signature": + return False, f"Document is protected with {protection_data.get('type')} protection, not a signature" + + # Get the original content hash + signature_info = protection_data.get("signature", {}) + original_hash = signature_info.get("content_hash") + + if not original_hash: + return False, "Invalid signature: missing content hash" + + # Calculate current content hash + doc = Document(doc_path) + text_content = "\n".join([p.text for p in doc.paragraphs]) + current_hash = hashlib.sha256(text_content.encode()).hexdigest() + + # Compare hashes + if current_hash != original_hash: + return False, f"Document has been modified since it was signed by {signature_info.get('signer')}" + + return True, f"Document signature is valid. Signed by {signature_info.get('signer')} on {signature_info.get('timestamp')}" + + except Exception as e: + return False, f"Error verifying signature: {str(e)}" diff --git a/backend/office_word_mcp/word_document_server/core/styles.py b/backend/office_word_mcp/word_document_server/core/styles.py new file mode 100644 index 0000000..49c5d51 --- /dev/null +++ b/backend/office_word_mcp/word_document_server/core/styles.py @@ -0,0 +1,134 @@ +""" +Style-related functions for Word Document Server. +""" +from docx.shared import Pt +from docx.enum.style import WD_STYLE_TYPE + + +def ensure_heading_style(doc): + """ + Ensure Heading styles exist in the document. + + Args: + doc: Document object + """ + for i in range(1, 10): # Create Heading 1 through Heading 9 + style_name = f'Heading {i}' + try: + # Try to access the style to see if it exists + style = doc.styles[style_name] + except KeyError: + # Create the style if it doesn't exist + try: + style = doc.styles.add_style(style_name, WD_STYLE_TYPE.PARAGRAPH) + if i == 1: + style.font.size = Pt(16) + style.font.bold = True + elif i == 2: + style.font.size = Pt(14) + style.font.bold = True + else: + style.font.size = Pt(12) + style.font.bold = True + except Exception: + # If style creation fails, we'll just use default formatting + pass + + +def ensure_table_style(doc): + """ + Ensure Table Grid style exists in the document. + + Args: + doc: Document object + """ + try: + # Try to access the style to see if it exists + style = doc.styles['Table Grid'] + except KeyError: + # If style doesn't exist, we'll handle it at usage time + pass + + +def create_style(doc, style_name, style_type, base_style=None, font_properties=None, paragraph_properties=None): + """ + Create a new style in the document. + + Args: + doc: Document object + style_name: Name for the new style + style_type: Type of style (WD_STYLE_TYPE) + base_style: Optional base style to inherit from + font_properties: Dictionary of font properties (bold, italic, size, name, color) + paragraph_properties: Dictionary of paragraph properties (alignment, spacing) + + Returns: + The created style + """ + from docx.shared import Pt + + try: + # Check if style already exists + style = doc.styles.get_by_id(style_name, WD_STYLE_TYPE.PARAGRAPH) + return style + except: + # Create new style + new_style = doc.styles.add_style(style_name, style_type) + + # Set base style if specified + if base_style: + new_style.base_style = doc.styles[base_style] + + # Set font properties + if font_properties: + font = new_style.font + if 'bold' in font_properties: + font.bold = font_properties['bold'] + if 'italic' in font_properties: + font.italic = font_properties['italic'] + if 'size' in font_properties: + font.size = Pt(font_properties['size']) + if 'name' in font_properties: + font.name = font_properties['name'] + if 'color' in font_properties: + from docx.shared import RGBColor + + # Define common RGB colors + color_map = { + 'red': RGBColor(255, 0, 0), + 'blue': RGBColor(0, 0, 255), + 'green': RGBColor(0, 128, 0), + 'yellow': RGBColor(255, 255, 0), + 'black': RGBColor(0, 0, 0), + 'gray': RGBColor(128, 128, 128), + 'white': RGBColor(255, 255, 255), + 'purple': RGBColor(128, 0, 128), + 'orange': RGBColor(255, 165, 0) + } + + color_value = font_properties['color'] + try: + # Handle string color names + if isinstance(color_value, str) and color_value.lower() in color_map: + font.color.rgb = color_map[color_value.lower()] + # Handle RGBColor objects + elif hasattr(color_value, 'rgb'): + font.color.rgb = color_value + # Try to parse as RGB string + elif isinstance(color_value, str): + font.color.rgb = RGBColor.from_string(color_value) + # Use directly if it's already an RGB value + else: + font.color.rgb = color_value + except Exception as e: + # Fallback to black if all else fails + font.color.rgb = RGBColor(0, 0, 0) + + # Set paragraph properties + if paragraph_properties: + if 'alignment' in paragraph_properties: + new_style.paragraph_format.alignment = paragraph_properties['alignment'] + if 'spacing' in paragraph_properties: + new_style.paragraph_format.line_spacing = paragraph_properties['spacing'] + + return new_style diff --git a/backend/office_word_mcp/word_document_server/core/tables.py b/backend/office_word_mcp/word_document_server/core/tables.py new file mode 100644 index 0000000..9cc8dcf --- /dev/null +++ b/backend/office_word_mcp/word_document_server/core/tables.py @@ -0,0 +1,866 @@ +""" +Table-related operations for Word Document Server. +""" +from docx.oxml.shared import OxmlElement, qn +from docx.oxml.ns import nsdecls +from docx.oxml import parse_xml +from docx.shared import RGBColor, Inches, Cm, Pt +from docx.enum.text import WD_ALIGN_PARAGRAPH +from docx.enum.table import WD_CELL_VERTICAL_ALIGNMENT + + +def set_cell_border(cell, **kwargs): + """ + Set cell border properties. + + Args: + cell: The cell to modify + **kwargs: Border properties (top, bottom, left, right, val, color) + """ + tc = cell._tc + tcPr = tc.get_or_add_tcPr() + + # Create border elements + for key, value in kwargs.items(): + if key in ['top', 'left', 'bottom', 'right']: + tag = 'w:{}'.format(key) + + element = OxmlElement(tag) + element.set(qn('w:val'), kwargs.get('val', 'single')) + element.set(qn('w:sz'), kwargs.get('sz', '4')) + element.set(qn('w:space'), kwargs.get('space', '0')) + element.set(qn('w:color'), kwargs.get('color', 'auto')) + + tcBorders = tcPr.first_child_found_in("w:tcBorders") + if tcBorders is None: + tcBorders = OxmlElement('w:tcBorders') + tcPr.append(tcBorders) + + tcBorders.append(element) + + +def apply_table_style(table, has_header_row=False, border_style=None, shading=None): + """ + Apply formatting to a table. + + Args: + table: The table to format + has_header_row: If True, formats the first row as a header + border_style: Style for borders ('none', 'single', 'double', 'thick') + shading: 2D list of cell background colors (by row and column) + + Returns: + True if successful, False otherwise + """ + try: + # Format header row if requested + if has_header_row and table.rows: + header_row = table.rows[0] + for cell in header_row.cells: + for paragraph in cell.paragraphs: + if paragraph.runs: + for run in paragraph.runs: + run.bold = True + + # Apply border style if specified + if border_style: + val_map = { + 'none': 'nil', + 'single': 'single', + 'double': 'double', + 'thick': 'thick' + } + val = val_map.get(border_style.lower(), 'single') + + # Apply to all cells + for row in table.rows: + for cell in row.cells: + set_cell_border( + cell, + top=True, + bottom=True, + left=True, + right=True, + val=val, + color="000000" + ) + + # Apply cell shading if specified + if shading: + for i, row_colors in enumerate(shading): + if i >= len(table.rows): + break + for j, color in enumerate(row_colors): + if j >= len(table.rows[i].cells): + break + try: + # Apply shading to cell + cell = table.rows[i].cells[j] + shading_elm = parse_xml(f'') + cell._tc.get_or_add_tcPr().append(shading_elm) + except: + # Skip if color format is invalid + pass + + return True + except Exception: + return False + + +def copy_table(source_table, target_doc): + """ + Copy a table from one document to another. + + Args: + source_table: The table to copy + target_doc: The document to copy the table to + + Returns: + The new table in the target document + """ + # Create a new table with the same dimensions + new_table = target_doc.add_table(rows=len(source_table.rows), cols=len(source_table.columns)) + + # Try to apply the same style + try: + if source_table.style: + new_table.style = source_table.style + except: + # Fall back to default grid style + try: + new_table.style = 'Table Grid' + except: + pass + + # Copy cell contents + for i, row in enumerate(source_table.rows): + for j, cell in enumerate(row.cells): + for paragraph in cell.paragraphs: + if paragraph.text: + new_table.cell(i, j).text = paragraph.text + + return new_table + + +def set_cell_shading(cell, fill_color=None, pattern="clear", pattern_color="auto"): + """ + Apply shading/filling to a table cell. + + Args: + cell: The table cell to format + fill_color: Background color (hex string like "FF0000" or RGBColor) + pattern: Shading pattern ("clear", "solid", "pct10", "pct20", etc.) + pattern_color: Pattern color for patterned fills + + Returns: + True if successful, False otherwise + """ + try: + # Get or create table cell properties + tc_pr = cell._tc.get_or_add_tcPr() + + # Remove existing shading + existing_shd = tc_pr.find(qn('w:shd')) + if existing_shd is not None: + tc_pr.remove(existing_shd) + + # Create shading element + shd_attrs = { + 'w:val': pattern, + 'w:color': pattern_color if pattern_color != "auto" else "auto" + } + + # Set fill color + if fill_color: + if isinstance(fill_color, str): + # Hex color string - remove # if present + fill_color = fill_color.lstrip('#').upper() + if len(fill_color) == 6: # Valid hex color + shd_attrs['w:fill'] = fill_color + elif isinstance(fill_color, RGBColor): + # RGBColor object + hex_color = f"{fill_color.r:02X}{fill_color.g:02X}{fill_color.b:02X}" + shd_attrs['w:fill'] = hex_color + + # Build XML string + attr_str = ' '.join([f'{k}="{v}"' for k, v in shd_attrs.items()]) + shd_xml = f'' + + # Parse and append shading element + shading_elm = parse_xml(shd_xml) + tc_pr.append(shading_elm) + + return True + + except Exception as e: + print(f"Error setting cell shading: {e}") + return False + + +def apply_alternating_row_shading(table, color1="FFFFFF", color2="F2F2F2"): + """ + Apply alternating row colors for better readability. + + Args: + table: The table to format + color1: Color for odd rows (hex string) + color2: Color for even rows (hex string) + + Returns: + True if successful, False otherwise + """ + try: + for i, row in enumerate(table.rows): + fill_color = color1 if i % 2 == 0 else color2 + for cell in row.cells: + set_cell_shading(cell, fill_color=fill_color) + return True + except Exception as e: + print(f"Error applying alternating row shading: {e}") + return False + + +def highlight_header_row(table, header_color="4472C4", text_color="FFFFFF"): + """ + Apply special shading to header row. + + Args: + table: The table to format + header_color: Background color for header (hex string) + text_color: Text color for header (hex string) + + Returns: + True if successful, False otherwise + """ + try: + if table.rows: + for cell in table.rows[0].cells: + # Apply background shading + set_cell_shading(cell, fill_color=header_color) + + # Apply text formatting + for paragraph in cell.paragraphs: + for run in paragraph.runs: + run.bold = True + if text_color and text_color != "auto": + # Convert hex to RGB + try: + text_color = text_color.lstrip('#') + r = int(text_color[0:2], 16) + g = int(text_color[2:4], 16) + b = int(text_color[4:6], 16) + run.font.color.rgb = RGBColor(r, g, b) + except: + pass # Skip if color format is invalid + return True + except Exception as e: + print(f"Error highlighting header row: {e}") + return False + + +def set_cell_shading_by_position(table, row_index, col_index, fill_color, pattern="clear"): + """ + Apply shading to a specific cell by row/column position. + + Args: + table: The table containing the cell + row_index: Row index (0-based) + col_index: Column index (0-based) + fill_color: Background color (hex string) + pattern: Shading pattern + + Returns: + True if successful, False otherwise + """ + try: + if (0 <= row_index < len(table.rows) and + 0 <= col_index < len(table.rows[row_index].cells)): + cell = table.rows[row_index].cells[col_index] + return set_cell_shading(cell, fill_color=fill_color, pattern=pattern) + else: + return False + except Exception as e: + print(f"Error setting cell shading by position: {e}") + return False + + +def merge_cells(table, start_row, start_col, end_row, end_col): + """ + Merge cells in a rectangular area. + + Args: + table: The table containing cells to merge + start_row: Starting row index (0-based) + start_col: Starting column index (0-based) + end_row: Ending row index (0-based, inclusive) + end_col: Ending column index (0-based, inclusive) + + Returns: + True if successful, False otherwise + """ + try: + # Validate indices + if (start_row < 0 or start_col < 0 or end_row < 0 or end_col < 0 or + start_row >= len(table.rows) or end_row >= len(table.rows) or + start_row > end_row or start_col > end_col): + return False + + # Check if all rows have enough columns + for row_idx in range(start_row, end_row + 1): + if (start_col >= len(table.rows[row_idx].cells) or + end_col >= len(table.rows[row_idx].cells)): + return False + + # Get the start and end cells + start_cell = table.cell(start_row, start_col) + end_cell = table.cell(end_row, end_col) + + # Merge the cells + start_cell.merge(end_cell) + + return True + + except Exception as e: + print(f"Error merging cells: {e}") + return False + + +def merge_cells_horizontal(table, row_index, start_col, end_col): + """ + Merge cells horizontally in a single row. + + Args: + table: The table containing cells to merge + row_index: Row index (0-based) + start_col: Starting column index (0-based) + end_col: Ending column index (0-based, inclusive) + + Returns: + True if successful, False otherwise + """ + return merge_cells(table, row_index, start_col, row_index, end_col) + + +def merge_cells_vertical(table, col_index, start_row, end_row): + """ + Merge cells vertically in a single column. + + Args: + table: The table containing cells to merge + col_index: Column index (0-based) + start_row: Starting row index (0-based) + end_row: Ending row index (0-based, inclusive) + + Returns: + True if successful, False otherwise + """ + return merge_cells(table, start_row, col_index, end_row, col_index) + + +def set_cell_alignment(cell, horizontal="left", vertical="top"): + """ + Set text alignment within a cell. + + Args: + cell: The table cell to format + horizontal: Horizontal alignment ("left", "center", "right", "justify") + vertical: Vertical alignment ("top", "center", "bottom") + + Returns: + True if successful, False otherwise + """ + try: + # Set horizontal alignment for all paragraphs in the cell + for paragraph in cell.paragraphs: + if horizontal.lower() == "center": + paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER + elif horizontal.lower() == "right": + paragraph.alignment = WD_ALIGN_PARAGRAPH.RIGHT + elif horizontal.lower() == "justify": + paragraph.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY + else: # default to left + paragraph.alignment = WD_ALIGN_PARAGRAPH.LEFT + + # Set vertical alignment for the cell using XML manipulation + tc_pr = cell._tc.get_or_add_tcPr() + + # Remove existing vertical alignment + existing_valign = tc_pr.find(qn('w:vAlign')) + if existing_valign is not None: + tc_pr.remove(existing_valign) + + # Create vertical alignment element + valign_element = OxmlElement('w:vAlign') + if vertical.lower() == "center": + valign_element.set(qn('w:val'), 'center') + elif vertical.lower() == "bottom": + valign_element.set(qn('w:val'), 'bottom') + else: # default to top + valign_element.set(qn('w:val'), 'top') + + tc_pr.append(valign_element) + + return True + + except Exception as e: + print(f"Error setting cell alignment: {e}") + return False + + +def set_cell_alignment_by_position(table, row_index, col_index, horizontal="left", vertical="top"): + """ + Set text alignment for a specific cell by position. + + Args: + table: The table containing the cell + row_index: Row index (0-based) + col_index: Column index (0-based) + horizontal: Horizontal alignment ("left", "center", "right", "justify") + vertical: Vertical alignment ("top", "center", "bottom") + + Returns: + True if successful, False otherwise + """ + try: + if (0 <= row_index < len(table.rows) and + 0 <= col_index < len(table.rows[row_index].cells)): + cell = table.rows[row_index].cells[col_index] + return set_cell_alignment(cell, horizontal, vertical) + else: + return False + except Exception as e: + print(f"Error setting cell alignment by position: {e}") + return False + + +def set_table_alignment(table, horizontal="left", vertical="top"): + """ + Set text alignment for all cells in a table. + + Args: + table: The table to format + horizontal: Horizontal alignment ("left", "center", "right", "justify") + vertical: Vertical alignment ("top", "center", "bottom") + + Returns: + True if successful, False otherwise + """ + try: + for row in table.rows: + for cell in row.cells: + set_cell_alignment(cell, horizontal, vertical) + return True + except Exception as e: + print(f"Error setting table alignment: {e}") + return False + + +def set_column_width(table, col_index, width, width_type="dxa"): + """ + Set the width of a specific column in a table. + + Args: + table: The table to modify + col_index: Column index (0-based) + width: Column width value + width_type: Width type ("dxa" for points*20, "pct" for percentage*50, "auto") + + Returns: + True if successful, False otherwise + """ + try: + # Validate column index + if col_index < 0 or col_index >= len(table.columns): + return False + + # Convert width based on type + if width_type == "dxa": + # DXA units (twentieths of a point) + if isinstance(width, (int, float)): + width_value = str(int(width * 20)) + else: + width_value = str(width) + elif width_type == "pct": + # Percentage (multiply by 50 for Word format) + if isinstance(width, (int, float)): + width_value = str(int(width * 50)) + else: + width_value = str(width) + else: + width_value = str(width) + + # Iterate through all rows and set width for cells in the specified column + for row in table.rows: + if col_index < len(row.cells): + cell = row.cells[col_index] + tc_pr = cell._tc.get_or_add_tcPr() + + # Remove existing width + existing_width = tc_pr.find(qn('w:tcW')) + if existing_width is not None: + tc_pr.remove(existing_width) + + # Create new width element + width_element = OxmlElement('w:tcW') + width_element.set(qn('w:w'), width_value) + width_element.set(qn('w:type'), width_type) + + tc_pr.append(width_element) + + return True + + except Exception as e: + print(f"Error setting column width: {e}") + return False + + +def set_column_width_by_position(table, col_index, width, width_type="dxa"): + """ + Set the width of a specific column by position. + + Args: + table: The table containing the column + col_index: Column index (0-based) + width: Column width value + width_type: Width type ("dxa" for points*20, "pct" for percentage*50, "auto") + + Returns: + True if successful, False otherwise + """ + return set_column_width(table, col_index, width, width_type) + + +def set_column_widths(table, widths, width_type="dxa"): + """ + Set widths for multiple columns in a table. + + Args: + table: The table to modify + widths: List of width values for each column + width_type: Width type ("dxa" for points*20, "pct" for percentage*50, "auto") + + Returns: + True if successful, False otherwise + """ + try: + for col_index, width in enumerate(widths): + if col_index >= len(table.columns): + break + if not set_column_width(table, col_index, width, width_type): + return False + return True + except Exception as e: + print(f"Error setting column widths: {e}") + return False + + +def set_table_width(table, width, width_type="dxa"): + """ + Set the overall width of a table. + + Args: + table: The table to modify + width: Table width value + width_type: Width type ("dxa" for points*20, "pct" for percentage*50, "auto") + + Returns: + True if successful, False otherwise + """ + try: + # Convert width based on type + if width_type == "dxa": + # DXA units (twentieths of a point) + if isinstance(width, (int, float)): + width_value = str(int(width * 20)) + else: + width_value = str(width) + elif width_type == "pct": + # Percentage (multiply by 50 for Word format) + if isinstance(width, (int, float)): + width_value = str(int(width * 50)) + else: + width_value = str(width) + else: + width_value = str(width) + + # Get table element and properties + tbl = table._tbl + + # Get or create table properties + tbl_pr = tbl.find(qn('w:tblPr')) + if tbl_pr is None: + tbl_pr = OxmlElement('w:tblPr') + tbl.insert(0, tbl_pr) + + # Remove existing table width + existing_width = tbl_pr.find(qn('w:tblW')) + if existing_width is not None: + tbl_pr.remove(existing_width) + + # Create new table width element + width_element = OxmlElement('w:tblW') + width_element.set(qn('w:w'), width_value) + width_element.set(qn('w:type'), width_type) + + tbl_pr.append(width_element) + + return True + + except Exception as e: + print(f"Error setting table width: {e}") + return False + + +def auto_fit_table(table): + """ + Set table to auto-fit columns based on content. + + Args: + table: The table to modify + + Returns: + True if successful, False otherwise + """ + try: + # Get table element and properties + tbl = table._tbl + + # Get or create table properties + tbl_pr = tbl.find(qn('w:tblPr')) + if tbl_pr is None: + tbl_pr = OxmlElement('w:tblPr') + tbl.insert(0, tbl_pr) + + # Remove existing layout + existing_layout = tbl_pr.find(qn('w:tblLayout')) + if existing_layout is not None: + tbl_pr.remove(existing_layout) + + # Create auto layout element + layout_element = OxmlElement('w:tblLayout') + layout_element.set(qn('w:type'), 'autofit') + + tbl_pr.append(layout_element) + + # Set all column widths to auto + for col_index in range(len(table.columns)): + set_column_width(table, col_index, 0, "auto") + + return True + + except Exception as e: + print(f"Error setting auto-fit table: {e}") + return False + + +def format_cell_text(cell, text_content=None, bold=None, italic=None, underline=None, + color=None, font_size=None, font_name=None): + """ + Format text within a table cell. + + Args: + cell: The table cell to format + text_content: Optional new text content for the cell + bold: Set text bold (True/False) + italic: Set text italic (True/False) + underline: Set text underlined (True/False) + color: Text color (hex string like "FF0000" or color name) + font_size: Font size in points + font_name: Font name/family + + Returns: + True if successful, False otherwise + """ + try: + # Set text content if provided + if text_content is not None: + cell.text = str(text_content) + + # Apply formatting to all paragraphs and runs in the cell + for paragraph in cell.paragraphs: + for run in paragraph.runs: + if bold is not None: + run.bold = bold + if italic is not None: + run.italic = italic + if underline is not None: + run.underline = underline + + if font_size is not None: + from docx.shared import Pt + run.font.size = Pt(font_size) + + if font_name is not None: + run.font.name = font_name + + if color is not None: + from docx.shared import RGBColor + # Define common RGB colors + color_map = { + 'red': RGBColor(255, 0, 0), + 'blue': RGBColor(0, 0, 255), + 'green': RGBColor(0, 128, 0), + 'yellow': RGBColor(255, 255, 0), + 'black': RGBColor(0, 0, 0), + 'gray': RGBColor(128, 128, 128), + 'grey': RGBColor(128, 128, 128), + 'white': RGBColor(255, 255, 255), + 'purple': RGBColor(128, 0, 128), + 'orange': RGBColor(255, 165, 0) + } + + try: + if color.lower() in color_map: + # Use predefined RGB color + run.font.color.rgb = color_map[color.lower()] + elif color.startswith('#'): + # Hex color string + hex_color = color.lstrip('#') + if len(hex_color) == 6: + r = int(hex_color[0:2], 16) + g = int(hex_color[2:4], 16) + b = int(hex_color[4:6], 16) + run.font.color.rgb = RGBColor(r, g, b) + else: + # Try hex without # + if len(color) == 6: + r = int(color[0:2], 16) + g = int(color[2:4], 16) + b = int(color[4:6], 16) + run.font.color.rgb = RGBColor(r, g, b) + except Exception: + # If color parsing fails, default to black + run.font.color.rgb = RGBColor(0, 0, 0) + + return True + + except Exception as e: + print(f"Error formatting cell text: {e}") + return False + + +def format_cell_text_by_position(table, row_index, col_index, text_content=None, + bold=None, italic=None, underline=None, color=None, + font_size=None, font_name=None): + """ + Format text in a specific table cell by position. + + Args: + table: The table containing the cell + row_index: Row index (0-based) + col_index: Column index (0-based) + text_content: Optional new text content for the cell + bold: Set text bold (True/False) + italic: Set text italic (True/False) + underline: Set text underlined (True/False) + color: Text color (hex string or color name) + font_size: Font size in points + font_name: Font name/family + + Returns: + True if successful, False otherwise + """ + try: + if (0 <= row_index < len(table.rows) and + 0 <= col_index < len(table.rows[row_index].cells)): + cell = table.rows[row_index].cells[col_index] + return format_cell_text(cell, text_content, bold, italic, underline, + color, font_size, font_name) + else: + return False + except Exception as e: + print(f"Error formatting cell text by position: {e}") + return False + + +def set_cell_padding(cell, top=None, bottom=None, left=None, right=None, unit="dxa"): + """ + Set padding/margins for a table cell. + + Args: + cell: The table cell to format + top: Top padding value + bottom: Bottom padding value + left: Left padding value + right: Right padding value + unit: Unit type ("dxa" for twentieths of a point, "pct" for percentage) + + Returns: + True if successful, False otherwise + """ + try: + # Get or create table cell properties + tc_pr = cell._tc.get_or_add_tcPr() + + # Remove existing margins + existing_margins = tc_pr.find(qn('w:tcMar')) + if existing_margins is not None: + tc_pr.remove(existing_margins) + + # Create margins element if any padding is specified + if any(p is not None for p in [top, bottom, left, right]): + margins_element = OxmlElement('w:tcMar') + + # Add individual margin elements + margin_sides = { + 'w:top': top, + 'w:bottom': bottom, + 'w:left': left, + 'w:right': right + } + + for side, value in margin_sides.items(): + if value is not None: + margin_el = OxmlElement(side) + if unit == "dxa": + # DXA units (twentieths of a point) + margin_el.set(qn('w:w'), str(int(value * 20))) + margin_el.set(qn('w:type'), 'dxa') + elif unit == "pct": + # Percentage + margin_el.set(qn('w:w'), str(int(value * 50))) + margin_el.set(qn('w:type'), 'pct') + else: + # Default to DXA + margin_el.set(qn('w:w'), str(int(value * 20))) + margin_el.set(qn('w:type'), 'dxa') + + margins_element.append(margin_el) + + tc_pr.append(margins_element) + + return True + + except Exception as e: + print(f"Error setting cell padding: {e}") + return False + + +def set_cell_padding_by_position(table, row_index, col_index, top=None, bottom=None, + left=None, right=None, unit="dxa"): + """ + Set padding for a specific table cell by position. + + Args: + table: The table containing the cell + row_index: Row index (0-based) + col_index: Column index (0-based) + top: Top padding value + bottom: Bottom padding value + left: Left padding value + right: Right padding value + unit: Unit type ("dxa" for twentieths of a point, "pct" for percentage) + + Returns: + True if successful, False otherwise + """ + try: + if (0 <= row_index < len(table.rows) and + 0 <= col_index < len(table.rows[row_index].cells)): + cell = table.rows[row_index].cells[col_index] + return set_cell_padding(cell, top, bottom, left, right, unit) + else: + return False + except Exception as e: + print(f"Error setting cell padding by position: {e}") + return False diff --git a/backend/office_word_mcp/word_document_server/core/unprotect.py b/backend/office_word_mcp/word_document_server/core/unprotect.py new file mode 100644 index 0000000..8daddef --- /dev/null +++ b/backend/office_word_mcp/word_document_server/core/unprotect.py @@ -0,0 +1,78 @@ +""" +Unprotect document functionality for the Word Document Server. + +This module handles removing document protection. +""" +import os +import json +import hashlib +import tempfile +import shutil +from typing import Tuple, Optional + +def remove_protection_info(filename: str, password: Optional[str] = None) -> Tuple[bool, str]: + """ + Remove protection information from a document and decrypt it if necessary. + + Args: + filename: Path to the Word document + password: Password to verify before removing protection + + Returns: + Tuple of (success, message) + """ + base_path, _ = os.path.splitext(filename) + metadata_path = f"{base_path}.protection" + + # Check if protection metadata exists + if not os.path.exists(metadata_path): + return False, "Document is not protected" + + try: + # Load protection data + with open(metadata_path, 'r') as f: + protection_data = json.load(f) + + # Verify password if provided and required + if password and protection_data.get("password_hash"): + password_hash = hashlib.sha256(password.encode()).hexdigest() + if password_hash != protection_data.get("password_hash"): + return False, "Incorrect password" + + # Handle true encryption if it was applied + if protection_data.get("true_encryption") and password: + try: + import msoffcrypto + + # Create a temporary file for the decrypted output + temp_fd, temp_path = tempfile.mkstemp(suffix='.docx') + os.close(temp_fd) + + # Open the encrypted document + with open(filename, 'rb') as f: + office_file = msoffcrypto.OfficeFile(f) + + # Decrypt with provided password + try: + office_file.load_key(password=password) + + # Write the decrypted file to the temp path + with open(temp_path, 'wb') as out_file: + office_file.decrypt(out_file) + + # Replace encrypted file with decrypted version + shutil.move(temp_path, filename) + except Exception as decrypt_error: + if os.path.exists(temp_path): + os.unlink(temp_path) + return False, f"Failed to decrypt document: {str(decrypt_error)}" + except ImportError: + return False, "Missing msoffcrypto package required for encryption/decryption" + except Exception as e: + return False, f"Error decrypting document: {str(e)}" + + # Remove the protection metadata file + os.remove(metadata_path) + return True, "Protection removed successfully" + except Exception as e: + return False, f"Error removing protection: {str(e)}" diff --git a/backend/office_word_mcp/word_document_server/main.py b/backend/office_word_mcp/word_document_server/main.py new file mode 100644 index 0000000..6f1b2f8 --- /dev/null +++ b/backend/office_word_mcp/word_document_server/main.py @@ -0,0 +1,523 @@ +""" +Main entry point for the Word Document MCP Server. +Acts as the central controller for the MCP server that handles Word document operations. +Supports multiple transports: stdio, sse, and streamable-http using standalone FastMCP. +""" + +import os +import sys +from dotenv import load_dotenv + +# Load environment variables from .env file +print("Loading configuration from .env file...") +load_dotenv() +# Set required environment variable for FastMCP 2.8.1+ +os.environ.setdefault('FASTMCP_LOG_LEVEL', 'INFO') +from fastmcp import FastMCP +from word_document_server.tools import ( + document_tools, + content_tools, + format_tools, + protection_tools, + footnote_tools, + extended_document_tools, + comment_tools +) +from word_document_server.tools.content_tools import replace_paragraph_block_below_header_tool +from word_document_server.tools.content_tools import replace_block_between_manual_anchors_tool + +def get_transport_config(): + """ + Get transport configuration from environment variables. + + Returns: + dict: Transport configuration with type, host, port, and other settings + """ + # Default configuration + config = { + 'transport': 'stdio', # Default to stdio for backward compatibility + 'host': '0.0.0.0', + 'port': 8000, + 'path': '/mcp', + 'sse_path': '/sse' + } + + # Override with environment variables if provided + transport = os.getenv('MCP_TRANSPORT', 'stdio').lower() + print(f"Transport: {transport}") + # Validate transport type + valid_transports = ['stdio', 'streamable-http', 'sse'] + if transport not in valid_transports: + print(f"Warning: Invalid transport '{transport}'. Falling back to 'stdio'.") + transport = 'stdio' + + config['transport'] = transport + config['host'] = os.getenv('MCP_HOST', config['host']) + # Use PORT from Render if available, otherwise fall back to MCP_PORT or default + config['port'] = int(os.getenv('PORT', os.getenv('MCP_PORT', config['port']))) + config['path'] = os.getenv('MCP_PATH', config['path']) + config['sse_path'] = os.getenv('MCP_SSE_PATH', config['sse_path']) + + return config + + +def setup_logging(debug_mode): + """ + Setup logging based on debug mode. + + Args: + debug_mode (bool): Whether to enable debug logging + """ + import logging + + if debug_mode: + logging.basicConfig( + level=logging.DEBUG, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' + ) + print("Debug logging enabled") + else: + logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(levelname)s - %(message)s' + ) + + +# Initialize FastMCP server +mcp = FastMCP("Word Document Server") + + +def register_tools(): + """Register all tools with the MCP server using FastMCP decorators.""" + + # Document tools (create, copy, info, etc.) + @mcp.tool() + def create_document(filename: str, title: str = None, author: str = None): + """Create a new Word document with optional metadata.""" + return document_tools.create_document(filename, title, author) + + @mcp.tool() + def copy_document(source_filename: str, destination_filename: str = None): + """Create a copy of a Word document.""" + return document_tools.copy_document(source_filename, destination_filename) + + @mcp.tool() + def get_document_info(filename: str): + """Get information about a Word document.""" + return document_tools.get_document_info(filename) + + @mcp.tool() + def get_document_text(filename: str): + """Extract all text from a Word document.""" + return document_tools.get_document_text(filename) + + @mcp.tool() + def get_document_outline(filename: str): + """Get the structure of a Word document.""" + return document_tools.get_document_outline(filename) + + @mcp.tool() + def list_available_documents(directory: str = "."): + """List all .docx files in the specified directory.""" + return document_tools.list_available_documents(directory) + + @mcp.tool() + def get_document_xml(filename: str): + """Get the raw XML structure of a Word document.""" + return document_tools.get_document_xml_tool(filename) + + @mcp.tool() + def insert_header_near_text(filename: str, target_text: str = None, header_title: str = None, position: str = 'after', header_style: str = 'Heading 1', target_paragraph_index: int = None): + """Insert a header (with specified style) before or after the target paragraph. Specify by text or paragraph index. Args: filename (str), target_text (str, optional), header_title (str), position ('before' or 'after'), header_style (str, default 'Heading 1'), target_paragraph_index (int, optional).""" + return content_tools.insert_header_near_text_tool(filename, target_text, header_title, position, header_style, target_paragraph_index) + + @mcp.tool() + def insert_line_or_paragraph_near_text(filename: str, target_text: str = None, line_text: str = None, position: str = 'after', line_style: str = None, target_paragraph_index: int = None): + """ + Insert a new line or paragraph (with specified or matched style) before or after the target paragraph. Specify by text or paragraph index. Args: filename (str), target_text (str, optional), line_text (str), position ('before' or 'after'), line_style (str, optional), target_paragraph_index (int, optional). + """ + return content_tools.insert_line_or_paragraph_near_text_tool(filename, target_text, line_text, position, line_style, target_paragraph_index) + + @mcp.tool() + def insert_numbered_list_near_text(filename: str, target_text: str = None, list_items: list = None, position: str = 'after', target_paragraph_index: int = None, bullet_type: str = 'bullet'): + """Insert a bulleted or numbered list before or after the target paragraph. Specify by text or paragraph index. Args: filename (str), target_text (str, optional), list_items (list of str), position ('before' or 'after'), target_paragraph_index (int, optional), bullet_type ('bullet' for bullets or 'number' for numbered lists, default: 'bullet').""" + return content_tools.insert_numbered_list_near_text_tool(filename, target_text, list_items, position, target_paragraph_index, bullet_type) + # Content tools (paragraphs, headings, tables, etc.) + @mcp.tool() + def add_paragraph(filename: str, text: str, style: str = None, + font_name: str = None, font_size: int = None, + bold: bool = None, italic: bool = None, color: str = None): + """Add a paragraph to a Word document with optional formatting. + + Args: + filename: Path to Word document + text: Paragraph text content + style: Optional paragraph style name + font_name: Font family (e.g., 'Helvetica', 'Times New Roman') + font_size: Font size in points (e.g., 14, 36) + bold: Make text bold + italic: Make text italic + color: Text color as hex RGB (e.g., '000000') + """ + return content_tools.add_paragraph(filename, text, style, font_name, font_size, bold, italic, color) + + @mcp.tool() + def add_heading(filename: str, text: str, level: int = 1, + font_name: str = None, font_size: int = None, + bold: bool = None, italic: bool = None, border_bottom: bool = False): + """Add a heading to a Word document with optional formatting. + + Args: + filename: Path to Word document + text: Heading text + level: Heading level (1-9) + font_name: Font family (e.g., 'Helvetica') + font_size: Font size in points (e.g., 14) + bold: Make heading bold + italic: Make heading italic + border_bottom: Add bottom border (for section headers) + """ + return content_tools.add_heading(filename, text, level, font_name, font_size, bold, italic, border_bottom) + + @mcp.tool() + def add_picture(filename: str, image_path: str, width: float = None): + """Add an image to a Word document.""" + return content_tools.add_picture(filename, image_path, width) + + @mcp.tool() + def add_table(filename: str, rows: int, cols: int, data: list = None): + """Add a table to a Word document.""" + return content_tools.add_table(filename, rows, cols, data) + + @mcp.tool() + def add_page_break(filename: str): + """Add a page break to the document.""" + return content_tools.add_page_break(filename) + + @mcp.tool() + def delete_paragraph(filename: str, paragraph_index: int): + """Delete a paragraph from a document.""" + return content_tools.delete_paragraph(filename, paragraph_index) + + @mcp.tool() + def search_and_replace(filename: str, find_text: str, replace_text: str): + """Search for text and replace all occurrences.""" + return content_tools.search_and_replace(filename, find_text, replace_text) + + # Format tools (styling, text formatting, etc.) + @mcp.tool() + def create_custom_style(filename: str, style_name: str, bold: bool = None, + italic: bool = None, font_size: int = None, + font_name: str = None, color: str = None, + base_style: str = None): + """Create a custom style in the document.""" + return format_tools.create_custom_style( + filename, style_name, bold, italic, font_size, font_name, color, base_style + ) + + @mcp.tool() + def format_text(filename: str, paragraph_index: int, start_pos: int, end_pos: int, + bold: bool = None, italic: bool = None, underline: bool = None, + color: str = None, font_size: int = None, font_name: str = None): + """Format a specific range of text within a paragraph.""" + return format_tools.format_text( + filename, paragraph_index, start_pos, end_pos, bold, italic, + underline, color, font_size, font_name + ) + + @mcp.tool() + def format_table(filename: str, table_index: int, has_header_row: bool = None, + border_style: str = None, shading: list = None): + """Format a table with borders, shading, and structure.""" + return format_tools.format_table(filename, table_index, has_header_row, border_style, shading) + + # New table cell shading tools + @mcp.tool() + def set_table_cell_shading(filename: str, table_index: int, row_index: int, + col_index: int, fill_color: str, pattern: str = "clear"): + """Apply shading/filling to a specific table cell.""" + return format_tools.set_table_cell_shading(filename, table_index, row_index, col_index, fill_color, pattern) + + @mcp.tool() + def apply_table_alternating_rows(filename: str, table_index: int, + color1: str = "FFFFFF", color2: str = "F2F2F2"): + """Apply alternating row colors to a table for better readability.""" + return format_tools.apply_table_alternating_rows(filename, table_index, color1, color2) + + @mcp.tool() + def highlight_table_header(filename: str, table_index: int, + header_color: str = "4472C4", text_color: str = "FFFFFF"): + """Apply special highlighting to table header row.""" + return format_tools.highlight_table_header(filename, table_index, header_color, text_color) + + # Cell merging tools + @mcp.tool() + def merge_table_cells(filename: str, table_index: int, start_row: int, start_col: int, + end_row: int, end_col: int): + """Merge cells in a rectangular area of a table.""" + return format_tools.merge_table_cells(filename, table_index, start_row, start_col, end_row, end_col) + + @mcp.tool() + def merge_table_cells_horizontal(filename: str, table_index: int, row_index: int, + start_col: int, end_col: int): + """Merge cells horizontally in a single row.""" + return format_tools.merge_table_cells_horizontal(filename, table_index, row_index, start_col, end_col) + + @mcp.tool() + def merge_table_cells_vertical(filename: str, table_index: int, col_index: int, + start_row: int, end_row: int): + """Merge cells vertically in a single column.""" + return format_tools.merge_table_cells_vertical(filename, table_index, col_index, start_row, end_row) + + # Cell alignment tools + @mcp.tool() + def set_table_cell_alignment(filename: str, table_index: int, row_index: int, col_index: int, + horizontal: str = "left", vertical: str = "top"): + """Set text alignment for a specific table cell.""" + return format_tools.set_table_cell_alignment(filename, table_index, row_index, col_index, horizontal, vertical) + + @mcp.tool() + def set_table_alignment_all(filename: str, table_index: int, + horizontal: str = "left", vertical: str = "top"): + """Set text alignment for all cells in a table.""" + return format_tools.set_table_alignment_all(filename, table_index, horizontal, vertical) + + # Protection tools + @mcp.tool() + def protect_document(filename: str, password: str): + """Add password protection to a Word document.""" + return protection_tools.protect_document(filename, password) + + @mcp.tool() + def unprotect_document(filename: str, password: str): + """Remove password protection from a Word document.""" + return protection_tools.unprotect_document(filename, password) + + # Footnote tools + @mcp.tool() + def add_footnote_to_document(filename: str, paragraph_index: int, footnote_text: str): + """Add a footnote to a specific paragraph in a Word document.""" + return footnote_tools.add_footnote_to_document(filename, paragraph_index, footnote_text) + + @mcp.tool() + def add_footnote_after_text(filename: str, search_text: str, footnote_text: str, + output_filename: str = None): + """Add a footnote after specific text with proper superscript formatting. + This enhanced function ensures footnotes display correctly as superscript.""" + return footnote_tools.add_footnote_after_text(filename, search_text, footnote_text, output_filename) + + @mcp.tool() + def add_footnote_before_text(filename: str, search_text: str, footnote_text: str, + output_filename: str = None): + """Add a footnote before specific text with proper superscript formatting. + This enhanced function ensures footnotes display correctly as superscript.""" + return footnote_tools.add_footnote_before_text(filename, search_text, footnote_text, output_filename) + + @mcp.tool() + def add_footnote_enhanced(filename: str, paragraph_index: int, footnote_text: str, + output_filename: str = None): + """Enhanced footnote addition with guaranteed superscript formatting. + Adds footnote at the end of a specific paragraph with proper style handling.""" + return footnote_tools.add_footnote_enhanced(filename, paragraph_index, footnote_text, output_filename) + + @mcp.tool() + def add_endnote_to_document(filename: str, paragraph_index: int, endnote_text: str): + """Add an endnote to a specific paragraph in a Word document.""" + return footnote_tools.add_endnote_to_document(filename, paragraph_index, endnote_text) + + @mcp.tool() + def customize_footnote_style(filename: str, numbering_format: str = "1, 2, 3", + start_number: int = 1, font_name: str = None, + font_size: int = None): + """Customize footnote numbering and formatting in a Word document.""" + return footnote_tools.customize_footnote_style( + filename, numbering_format, start_number, font_name, font_size + ) + + @mcp.tool() + def delete_footnote_from_document(filename: str, footnote_id: int = None, + search_text: str = None, output_filename: str = None): + """Delete a footnote from a Word document. + Identify the footnote either by ID (1, 2, 3, etc.) or by searching for text near it.""" + return footnote_tools.delete_footnote_from_document( + filename, footnote_id, search_text, output_filename + ) + + # Robust footnote tools - Production-ready with comprehensive validation + @mcp.tool() + def add_footnote_robust(filename: str, search_text: str = None, + paragraph_index: int = None, footnote_text: str = "", + validate_location: bool = True, auto_repair: bool = False): + """Add footnote with robust validation and Word compliance. + This is the production-ready version with comprehensive error handling.""" + return footnote_tools.add_footnote_robust_tool( + filename, search_text, paragraph_index, footnote_text, + validate_location, auto_repair + ) + + @mcp.tool() + def validate_document_footnotes(filename: str): + """Validate all footnotes in document for coherence and compliance. + Returns detailed report on ID conflicts, orphaned content, missing styles, etc.""" + return footnote_tools.validate_footnotes_tool(filename) + + @mcp.tool() + def delete_footnote_robust(filename: str, footnote_id: int = None, + search_text: str = None, clean_orphans: bool = True): + """Delete footnote with comprehensive cleanup and orphan removal. + Ensures complete removal from document.xml, footnotes.xml, and relationships.""" + return footnote_tools.delete_footnote_robust_tool( + filename, footnote_id, search_text, clean_orphans + ) + + # Extended document tools + @mcp.tool() + def get_paragraph_text_from_document(filename: str, paragraph_index: int): + """Get text from a specific paragraph in a Word document.""" + return extended_document_tools.get_paragraph_text_from_document(filename, paragraph_index) + + @mcp.tool() + def find_text_in_document(filename: str, text_to_find: str, match_case: bool = True, + whole_word: bool = False): + """Find occurrences of specific text in a Word document.""" + return extended_document_tools.find_text_in_document( + filename, text_to_find, match_case, whole_word + ) + + @mcp.tool() + def convert_to_pdf(filename: str, output_filename: str = None): + """Convert a Word document to PDF format.""" + return extended_document_tools.convert_to_pdf(filename, output_filename) + + @mcp.tool() + def replace_paragraph_block_below_header(filename: str, header_text: str, new_paragraphs: list, detect_block_end_fn=None): + """Reemplaza el bloque de párrafos debajo de un encabezado, evitando modificar TOC.""" + return replace_paragraph_block_below_header_tool(filename, header_text, new_paragraphs, detect_block_end_fn) + + @mcp.tool() + def replace_block_between_manual_anchors(filename: str, start_anchor_text: str, new_paragraphs: list, end_anchor_text: str = None, match_fn=None, new_paragraph_style: str = None): + """Replace all content between start_anchor_text and end_anchor_text (or next logical header if not provided).""" + return replace_block_between_manual_anchors_tool(filename, start_anchor_text, new_paragraphs, end_anchor_text, match_fn, new_paragraph_style) + + # Comment tools + @mcp.tool() + def get_all_comments(filename: str): + """Extract all comments from a Word document.""" + return comment_tools.get_all_comments(filename) + + @mcp.tool() + def get_comments_by_author(filename: str, author: str): + """Extract comments from a specific author in a Word document.""" + return comment_tools.get_comments_by_author(filename, author) + + @mcp.tool() + def get_comments_for_paragraph(filename: str, paragraph_index: int): + """Extract comments for a specific paragraph in a Word document.""" + return comment_tools.get_comments_for_paragraph(filename, paragraph_index) + # New table column width tools + @mcp.tool() + def set_table_column_width(filename: str, table_index: int, col_index: int, + width: float, width_type: str = "points"): + """Set the width of a specific table column.""" + return format_tools.set_table_column_width(filename, table_index, col_index, width, width_type) + + @mcp.tool() + def set_table_column_widths(filename: str, table_index: int, widths: list, + width_type: str = "points"): + """Set the widths of multiple table columns.""" + return format_tools.set_table_column_widths(filename, table_index, widths, width_type) + + @mcp.tool() + def set_table_width(filename: str, table_index: int, width: float, + width_type: str = "points"): + """Set the overall width of a table.""" + return format_tools.set_table_width(filename, table_index, width, width_type) + + @mcp.tool() + def auto_fit_table_columns(filename: str, table_index: int): + """Set table columns to auto-fit based on content.""" + return format_tools.auto_fit_table_columns(filename, table_index) + + # New table cell text formatting and padding tools + @mcp.tool() + def format_table_cell_text(filename: str, table_index: int, row_index: int, col_index: int, + text_content: str = None, bold: bool = None, italic: bool = None, + underline: bool = None, color: str = None, font_size: int = None, + font_name: str = None): + """Format text within a specific table cell.""" + return format_tools.format_table_cell_text(filename, table_index, row_index, col_index, + text_content, bold, italic, underline, color, font_size, font_name) + + @mcp.tool() + def set_table_cell_padding(filename: str, table_index: int, row_index: int, col_index: int, + top: float = None, bottom: float = None, left: float = None, + right: float = None, unit: str = "points"): + """Set padding/margins for a specific table cell.""" + return format_tools.set_table_cell_padding(filename, table_index, row_index, col_index, + top, bottom, left, right, unit) + + + +def run_server(): + """Run the Word Document MCP Server with configurable transport.""" + # Get transport configuration + config = get_transport_config() + + # Setup logging + # setup_logging(config['debug']) + + # Register all tools + register_tools() + + # Print startup information + transport_type = config['transport'] + print(f"Starting Word Document MCP Server with {transport_type} transport...") + + # if config['debug']: + # print(f"Configuration: {config}") + + try: + if transport_type == 'stdio': + # Run with stdio transport (default, backward compatible) + print("Server running on stdio transport") + mcp.run(transport='stdio') + + elif transport_type == 'streamable-http': + # Run with streamable HTTP transport + print(f"Server running on streamable-http transport at http://{config['host']}:{config['port']}{config['path']}") + mcp.run( + transport='streamable-http', + host=config['host'], + port=config['port'], + path=config['path'] + ) + + elif transport_type == 'sse': + # Run with SSE transport + print(f"Server running on SSE transport at http://{config['host']}:{config['port']}{config['sse_path']}") + mcp.run( + transport='sse', + host=config['host'], + port=config['port'], + path=config['sse_path'] + ) + + except KeyboardInterrupt: + print("\nShutting down server...") + except Exception as e: + print(f"Error starting server: {e}") + if config['debug']: + import traceback + traceback.print_exc() + sys.exit(1) + + return mcp + + +def main(): + """Main entry point for the server.""" + run_server() + + +if __name__ == "__main__": + main() diff --git a/backend/office_word_mcp/word_document_server/tools/__init__.py b/backend/office_word_mcp/word_document_server/tools/__init__.py new file mode 100644 index 0000000..e183253 --- /dev/null +++ b/backend/office_word_mcp/word_document_server/tools/__init__.py @@ -0,0 +1,42 @@ +""" +MCP tool implementations for the Word Document Server. + +This package contains the MCP tool implementations that expose functionality +to clients through the Model Context Protocol. +""" + +# Document tools +from word_document_server.tools.document_tools import ( + create_document, get_document_info, get_document_text, + get_document_outline, list_available_documents, + copy_document, merge_documents +) + +# Content tools +from word_document_server.tools.content_tools import ( + add_heading, add_paragraph, add_table, add_picture, + add_page_break, add_table_of_contents, delete_paragraph, + search_and_replace +) + +# Format tools +from word_document_server.tools.format_tools import ( + format_text, create_custom_style, format_table +) + +# Protection tools +from word_document_server.tools.protection_tools import ( + protect_document, add_restricted_editing, + add_digital_signature, verify_document +) + +# Footnote tools +from word_document_server.tools.footnote_tools import ( + add_footnote_to_document, add_endnote_to_document, + convert_footnotes_to_endnotes_in_document, customize_footnote_style +) + +# Comment tools +from word_document_server.tools.comment_tools import ( + get_all_comments, get_comments_by_author, get_comments_for_paragraph +) diff --git a/backend/office_word_mcp/word_document_server/tools/comment_tools.py b/backend/office_word_mcp/word_document_server/tools/comment_tools.py new file mode 100644 index 0000000..ffe8812 --- /dev/null +++ b/backend/office_word_mcp/word_document_server/tools/comment_tools.py @@ -0,0 +1,168 @@ +""" +Comment extraction tools for Word Document Server. + +These tools provide high-level interfaces for extracting and analyzing +comments from Word documents through the MCP protocol. +""" +import os +import json +from typing import Dict, List, Optional, Any +from docx import Document + +from word_document_server.utils.file_utils import ensure_docx_extension +from word_document_server.core.comments import ( + extract_all_comments, + filter_comments_by_author, + get_comments_for_paragraph +) + + +async def get_all_comments(filename: str) -> str: + """ + Extract all comments from a Word document. + + Args: + filename: Path to the Word document + + Returns: + JSON string containing all comments with metadata + """ + filename = ensure_docx_extension(filename) + + if not os.path.exists(filename): + return json.dumps({ + 'success': False, + 'error': f'Document {filename} does not exist' + }, indent=2) + + try: + # Load the document + doc = Document(filename) + + # Extract all comments + comments = extract_all_comments(doc) + + # Return results + return json.dumps({ + 'success': True, + 'comments': comments, + 'total_comments': len(comments) + }, indent=2) + + except Exception as e: + return json.dumps({ + 'success': False, + 'error': f'Failed to extract comments: {str(e)}' + }, indent=2) + + +async def get_comments_by_author(filename: str, author: str) -> str: + """ + Extract comments from a specific author in a Word document. + + Args: + filename: Path to the Word document + author: Name of the comment author to filter by + + Returns: + JSON string containing filtered comments + """ + filename = ensure_docx_extension(filename) + + if not os.path.exists(filename): + return json.dumps({ + 'success': False, + 'error': f'Document {filename} does not exist' + }, indent=2) + + if not author or not author.strip(): + return json.dumps({ + 'success': False, + 'error': 'Author name cannot be empty' + }, indent=2) + + try: + # Load the document + doc = Document(filename) + + # Extract all comments + all_comments = extract_all_comments(doc) + + # Filter by author + author_comments = filter_comments_by_author(all_comments, author) + + # Return results + return json.dumps({ + 'success': True, + 'author': author, + 'comments': author_comments, + 'total_comments': len(author_comments) + }, indent=2) + + except Exception as e: + return json.dumps({ + 'success': False, + 'error': f'Failed to extract comments: {str(e)}' + }, indent=2) + + +async def get_comments_for_paragraph(filename: str, paragraph_index: int) -> str: + """ + Extract comments for a specific paragraph in a Word document. + + Args: + filename: Path to the Word document + paragraph_index: Index of the paragraph (0-based) + + Returns: + JSON string containing comments for the specified paragraph + """ + filename = ensure_docx_extension(filename) + + if not os.path.exists(filename): + return json.dumps({ + 'success': False, + 'error': f'Document {filename} does not exist' + }, indent=2) + + if paragraph_index < 0: + return json.dumps({ + 'success': False, + 'error': 'Paragraph index must be non-negative' + }, indent=2) + + try: + # Load the document + doc = Document(filename) + + # Check if paragraph index is valid + if paragraph_index >= len(doc.paragraphs): + return json.dumps({ + 'success': False, + 'error': f'Paragraph index {paragraph_index} is out of range. Document has {len(doc.paragraphs)} paragraphs.' + }, indent=2) + + # Extract all comments + all_comments = extract_all_comments(doc) + + # Filter for the specific paragraph + from word_document_server.core.comments import get_comments_for_paragraph as core_get_comments_for_paragraph + para_comments = core_get_comments_for_paragraph(all_comments, paragraph_index) + + # Get the paragraph text for context + paragraph_text = doc.paragraphs[paragraph_index].text + + # Return results + return json.dumps({ + 'success': True, + 'paragraph_index': paragraph_index, + 'paragraph_text': paragraph_text, + 'comments': para_comments, + 'total_comments': len(para_comments) + }, indent=2) + + except Exception as e: + return json.dumps({ + 'success': False, + 'error': f'Failed to extract comments: {str(e)}' + }, indent=2) \ No newline at end of file diff --git a/backend/office_word_mcp/word_document_server/tools/content_tools.py b/backend/office_word_mcp/word_document_server/tools/content_tools.py new file mode 100644 index 0000000..d825000 --- /dev/null +++ b/backend/office_word_mcp/word_document_server/tools/content_tools.py @@ -0,0 +1,481 @@ +""" +Content tools for Word Document Server. + +These tools add various types of content to Word documents, +including headings, paragraphs, tables, images, and page breaks. +""" +import os +from typing import List, Optional, Dict, Any +from docx import Document +from docx.shared import Inches, Pt, RGBColor + +from word_document_server.utils.file_utils import check_file_writeable, ensure_docx_extension +from word_document_server.utils.document_utils import find_and_replace_text, insert_header_near_text, insert_numbered_list_near_text, insert_line_or_paragraph_near_text, replace_paragraph_block_below_header, replace_block_between_manual_anchors +from word_document_server.core.styles import ensure_heading_style, ensure_table_style + + +async def add_heading(filename: str, text: str, level: int = 1, + font_name: Optional[str] = None, font_size: Optional[int] = None, + bold: Optional[bool] = None, italic: Optional[bool] = None, + border_bottom: bool = False) -> str: + """Add a heading to a Word document with optional formatting. + + Args: + filename: Path to the Word document + text: Heading text + level: Heading level (1-9, where 1 is the highest level) + font_name: Font family (e.g., 'Helvetica') + font_size: Font size in points (e.g., 14) + bold: True/False for bold text + italic: True/False for italic text + border_bottom: True to add bottom border (for section headers) + """ + filename = ensure_docx_extension(filename) + + # Ensure level is converted to integer + try: + level = int(level) + except (ValueError, TypeError): + return "Invalid parameter: level must be an integer between 1 and 9" + + # Validate level range + if level < 1 or level > 9: + return f"Invalid heading level: {level}. Level must be between 1 and 9." + + if not os.path.exists(filename): + return f"Document {filename} does not exist" + + # Check if file is writeable + is_writeable, error_message = check_file_writeable(filename) + if not is_writeable: + # Suggest creating a copy + return f"Cannot modify document: {error_message}. Consider creating a copy first or creating a new document." + + try: + doc = Document(filename) + + # Ensure heading styles exist + ensure_heading_style(doc) + + # Try to add heading with style + try: + heading = doc.add_heading(text, level=level) + except Exception as style_error: + # If style-based approach fails, use direct formatting + heading = doc.add_paragraph(text) + heading.style = doc.styles['Normal'] + if heading.runs: + run = heading.runs[0] + run.bold = True + # Adjust size based on heading level + if level == 1: + run.font.size = Pt(16) + elif level == 2: + run.font.size = Pt(14) + else: + run.font.size = Pt(12) + + # Apply formatting to all runs in the heading + if any([font_name, font_size, bold is not None, italic is not None]): + for run in heading.runs: + if font_name: + run.font.name = font_name + if font_size: + run.font.size = Pt(font_size) + if bold is not None: + run.font.bold = bold + if italic is not None: + run.font.italic = italic + + # Add bottom border if requested + if border_bottom: + from docx.oxml import OxmlElement + from docx.oxml.ns import qn + + pPr = heading._element.get_or_add_pPr() + pBdr = OxmlElement('w:pBdr') + + bottom = OxmlElement('w:bottom') + bottom.set(qn('w:val'), 'single') + bottom.set(qn('w:sz'), '4') # 0.5pt border + bottom.set(qn('w:space'), '0') + bottom.set(qn('w:color'), '000000') + + pBdr.append(bottom) + pPr.append(pBdr) + + doc.save(filename) + return f"Heading '{text}' (level {level}) added to {filename}" + except Exception as e: + return f"Failed to add heading: {str(e)}" + + +async def add_paragraph(filename: str, text: str, style: Optional[str] = None, + font_name: Optional[str] = None, font_size: Optional[int] = None, + bold: Optional[bool] = None, italic: Optional[bool] = None, + color: Optional[str] = None) -> str: + """Add a paragraph to a Word document with optional formatting. + + Args: + filename: Path to the Word document + text: Paragraph text + style: Optional paragraph style name + font_name: Font family (e.g., 'Helvetica', 'Times New Roman') + font_size: Font size in points (e.g., 14, 36) + bold: True/False for bold text + italic: True/False for italic text + color: RGB color as hex string (e.g., '000000' for black) + """ + filename = ensure_docx_extension(filename) + + if not os.path.exists(filename): + return f"Document {filename} does not exist" + + # Check if file is writeable + is_writeable, error_message = check_file_writeable(filename) + if not is_writeable: + # Suggest creating a copy + return f"Cannot modify document: {error_message}. Consider creating a copy first or creating a new document." + + try: + doc = Document(filename) + paragraph = doc.add_paragraph(text) + + if style: + try: + paragraph.style = style + except KeyError: + # Style doesn't exist, use normal and report it + paragraph.style = doc.styles['Normal'] + doc.save(filename) + return f"Style '{style}' not found, paragraph added with default style to {filename}" + + # Apply formatting to all runs in the paragraph + if any([font_name, font_size, bold is not None, italic is not None, color]): + for run in paragraph.runs: + if font_name: + run.font.name = font_name + if font_size: + run.font.size = Pt(font_size) + if bold is not None: + run.font.bold = bold + if italic is not None: + run.font.italic = italic + if color: + # Remove any '#' prefix if present + color_hex = color.lstrip('#') + run.font.color.rgb = RGBColor.from_string(color_hex) + + doc.save(filename) + return f"Paragraph added to {filename}" + except Exception as e: + return f"Failed to add paragraph: {str(e)}" + + +async def add_table(filename: str, rows: int, cols: int, data: Optional[List[List[str]]] = None) -> str: + """Add a table to a Word document. + + Args: + filename: Path to the Word document + rows: Number of rows in the table + cols: Number of columns in the table + data: Optional 2D array of data to fill the table + """ + filename = ensure_docx_extension(filename) + + if not os.path.exists(filename): + return f"Document {filename} does not exist" + + # Check if file is writeable + is_writeable, error_message = check_file_writeable(filename) + if not is_writeable: + # Suggest creating a copy + return f"Cannot modify document: {error_message}. Consider creating a copy first or creating a new document." + + try: + doc = Document(filename) + table = doc.add_table(rows=rows, cols=cols) + + # Try to set the table style + try: + table.style = 'Table Grid' + except KeyError: + # If style doesn't exist, add basic borders + pass + + # Fill table with data if provided + if data: + for i, row_data in enumerate(data): + if i >= rows: + break + for j, cell_text in enumerate(row_data): + if j >= cols: + break + table.cell(i, j).text = str(cell_text) + + doc.save(filename) + return f"Table ({rows}x{cols}) added to {filename}" + except Exception as e: + return f"Failed to add table: {str(e)}" + + +async def add_picture(filename: str, image_path: str, width: Optional[float] = None) -> str: + """Add an image to a Word document. + + Args: + filename: Path to the Word document + image_path: Path to the image file + width: Optional width in inches (proportional scaling) + """ + filename = ensure_docx_extension(filename) + + # Validate document existence + if not os.path.exists(filename): + return f"Document {filename} does not exist" + + # Get absolute paths for better diagnostics + abs_filename = os.path.abspath(filename) + abs_image_path = os.path.abspath(image_path) + + # Validate image existence with improved error message + if not os.path.exists(abs_image_path): + return f"Image file not found: {abs_image_path}" + + # Check image file size + try: + image_size = os.path.getsize(abs_image_path) / 1024 # Size in KB + if image_size <= 0: + return f"Image file appears to be empty: {abs_image_path} (0 KB)" + except Exception as size_error: + return f"Error checking image file: {str(size_error)}" + + # Check if file is writeable + is_writeable, error_message = check_file_writeable(abs_filename) + if not is_writeable: + return f"Cannot modify document: {error_message}. Consider creating a copy first or creating a new document." + + try: + doc = Document(abs_filename) + # Additional diagnostic info + diagnostic = f"Attempting to add image ({abs_image_path}, {image_size:.2f} KB) to document ({abs_filename})" + + try: + if width: + doc.add_picture(abs_image_path, width=Inches(width)) + else: + doc.add_picture(abs_image_path) + doc.save(abs_filename) + return f"Picture {image_path} added to {filename}" + except Exception as inner_error: + # More detailed error for the specific operation + error_type = type(inner_error).__name__ + error_msg = str(inner_error) + return f"Failed to add picture: {error_type} - {error_msg or 'No error details available'}\nDiagnostic info: {diagnostic}" + except Exception as outer_error: + # Fallback error handling + error_type = type(outer_error).__name__ + error_msg = str(outer_error) + return f"Document processing error: {error_type} - {error_msg or 'No error details available'}" + + +async def add_page_break(filename: str) -> str: + """Add a page break to the document. + + Args: + filename: Path to the Word document + """ + filename = ensure_docx_extension(filename) + + if not os.path.exists(filename): + return f"Document {filename} does not exist" + + # Check if file is writeable + is_writeable, error_message = check_file_writeable(filename) + if not is_writeable: + return f"Cannot modify document: {error_message}. Consider creating a copy first." + + try: + doc = Document(filename) + doc.add_page_break() + doc.save(filename) + return f"Page break added to {filename}." + except Exception as e: + return f"Failed to add page break: {str(e)}" + + +async def add_table_of_contents(filename: str, title: str = "Table of Contents", max_level: int = 3) -> str: + """Add a table of contents to a Word document based on heading styles. + + Args: + filename: Path to the Word document + title: Optional title for the table of contents + max_level: Maximum heading level to include (1-9) + """ + filename = ensure_docx_extension(filename) + + if not os.path.exists(filename): + return f"Document {filename} does not exist" + + # Check if file is writeable + is_writeable, error_message = check_file_writeable(filename) + if not is_writeable: + return f"Cannot modify document: {error_message}. Consider creating a copy first." + + try: + # Ensure max_level is within valid range + max_level = max(1, min(max_level, 9)) + + doc = Document(filename) + + # Collect headings and their positions + headings = [] + for i, paragraph in enumerate(doc.paragraphs): + # Check if paragraph style is a heading + if paragraph.style and paragraph.style.name.startswith('Heading '): + try: + # Extract heading level from style name + level = int(paragraph.style.name.split(' ')[1]) + if level <= max_level: + headings.append({ + 'level': level, + 'text': paragraph.text, + 'position': i + }) + except (ValueError, IndexError): + # Skip if heading level can't be determined + pass + + if not headings: + return f"No headings found in document {filename}. Table of contents not created." + + # Create a new document with the TOC + toc_doc = Document() + + # Add title + if title: + toc_doc.add_heading(title, level=1) + + # Add TOC entries + for heading in headings: + # Indent based on level (using tab characters) + indent = ' ' * (heading['level'] - 1) + toc_doc.add_paragraph(f"{indent}{heading['text']}") + + # Add page break + toc_doc.add_page_break() + + # Get content from original document + for paragraph in doc.paragraphs: + p = toc_doc.add_paragraph(paragraph.text) + # Copy style if possible + try: + if paragraph.style: + p.style = paragraph.style.name + except: + pass + + # Copy tables + for table in doc.tables: + # Create a new table with the same dimensions + new_table = toc_doc.add_table(rows=len(table.rows), cols=len(table.columns)) + # Copy cell contents + for i, row in enumerate(table.rows): + for j, cell in enumerate(row.cells): + for paragraph in cell.paragraphs: + new_table.cell(i, j).text = paragraph.text + + # Save the new document with TOC + toc_doc.save(filename) + + return f"Table of contents with {len(headings)} entries added to {filename}" + except Exception as e: + return f"Failed to add table of contents: {str(e)}" + + +async def delete_paragraph(filename: str, paragraph_index: int) -> str: + """Delete a paragraph from a document. + + Args: + filename: Path to the Word document + paragraph_index: Index of the paragraph to delete (0-based) + """ + filename = ensure_docx_extension(filename) + + if not os.path.exists(filename): + return f"Document {filename} does not exist" + + # Check if file is writeable + is_writeable, error_message = check_file_writeable(filename) + if not is_writeable: + return f"Cannot modify document: {error_message}. Consider creating a copy first." + + try: + doc = Document(filename) + + # Validate paragraph index + if paragraph_index < 0 or paragraph_index >= len(doc.paragraphs): + return f"Invalid paragraph index. Document has {len(doc.paragraphs)} paragraphs (0-{len(doc.paragraphs)-1})." + + # Delete the paragraph (by removing its content and setting it empty) + # Note: python-docx doesn't support true paragraph deletion, this is a workaround + paragraph = doc.paragraphs[paragraph_index] + p = paragraph._p + p.getparent().remove(p) + + doc.save(filename) + return f"Paragraph at index {paragraph_index} deleted successfully." + except Exception as e: + return f"Failed to delete paragraph: {str(e)}" + + +async def search_and_replace(filename: str, find_text: str, replace_text: str) -> str: + """Search for text and replace all occurrences. + + Args: + filename: Path to the Word document + find_text: Text to search for + replace_text: Text to replace with + """ + filename = ensure_docx_extension(filename) + + if not os.path.exists(filename): + return f"Document {filename} does not exist" + + # Check if file is writeable + is_writeable, error_message = check_file_writeable(filename) + if not is_writeable: + return f"Cannot modify document: {error_message}. Consider creating a copy first." + + try: + doc = Document(filename) + + # Perform find and replace + count = find_and_replace_text(doc, find_text, replace_text) + + if count > 0: + doc.save(filename) + return f"Replaced {count} occurrence(s) of '{find_text}' with '{replace_text}'." + else: + return f"No occurrences of '{find_text}' found." + except Exception as e: + return f"Failed to search and replace: {str(e)}" + +async def insert_header_near_text_tool(filename: str, target_text: str = None, header_title: str = "", position: str = 'after', header_style: str = 'Heading 1', target_paragraph_index: int = None) -> str: + """Insert a header (with specified style) before or after the target paragraph. Specify by text or paragraph index.""" + return insert_header_near_text(filename, target_text, header_title, position, header_style, target_paragraph_index) + +async def insert_numbered_list_near_text_tool(filename: str, target_text: str = None, list_items: list = None, position: str = 'after', target_paragraph_index: int = None, bullet_type: str = 'bullet') -> str: + """Insert a bulleted or numbered list before or after the target paragraph. Specify by text or paragraph index.""" + return insert_numbered_list_near_text(filename, target_text, list_items, position, target_paragraph_index, bullet_type) + +async def insert_line_or_paragraph_near_text_tool(filename: str, target_text: str = None, line_text: str = "", position: str = 'after', line_style: str = None, target_paragraph_index: int = None) -> str: + """Insert a new line or paragraph (with specified or matched style) before or after the target paragraph. Specify by text or paragraph index.""" + return insert_line_or_paragraph_near_text(filename, target_text, line_text, position, line_style, target_paragraph_index) + +async def replace_paragraph_block_below_header_tool(filename: str, header_text: str, new_paragraphs: list, detect_block_end_fn=None) -> str: + """Reemplaza el bloque de párrafos debajo de un encabezado, evitando modificar TOC.""" + return replace_paragraph_block_below_header(filename, header_text, new_paragraphs, detect_block_end_fn) + +async def replace_block_between_manual_anchors_tool(filename: str, start_anchor_text: str, new_paragraphs: list, end_anchor_text: str = None, match_fn=None, new_paragraph_style: str = None) -> str: + """Replace all content between start_anchor_text and end_anchor_text (or next logical header if not provided).""" + return replace_block_between_manual_anchors(filename, start_anchor_text, new_paragraphs, end_anchor_text, match_fn, new_paragraph_style) diff --git a/backend/office_word_mcp/word_document_server/tools/document_tools.py b/backend/office_word_mcp/word_document_server/tools/document_tools.py new file mode 100644 index 0000000..c15ad38 --- /dev/null +++ b/backend/office_word_mcp/word_document_server/tools/document_tools.py @@ -0,0 +1,214 @@ +""" +Document creation and manipulation tools for Word Document Server. +""" +import os +import json +from typing import Dict, List, Optional, Any +from docx import Document + +from word_document_server.utils.file_utils import check_file_writeable, ensure_docx_extension, create_document_copy +from word_document_server.utils.document_utils import get_document_properties, extract_document_text, get_document_structure, get_document_xml, insert_header_near_text, insert_line_or_paragraph_near_text +from word_document_server.core.styles import ensure_heading_style, ensure_table_style + + +async def create_document(filename: str, title: Optional[str] = None, author: Optional[str] = None) -> str: + """Create a new Word document with optional metadata. + + Args: + filename: Name of the document to create (with or without .docx extension) + title: Optional title for the document metadata + author: Optional author for the document metadata + """ + filename = ensure_docx_extension(filename) + + # Check if file is writeable + is_writeable, error_message = check_file_writeable(filename) + if not is_writeable: + return f"Cannot create document: {error_message}" + + try: + doc = Document() + + # Set properties if provided + if title: + doc.core_properties.title = title + if author: + doc.core_properties.author = author + + # Ensure necessary styles exist + ensure_heading_style(doc) + ensure_table_style(doc) + + # Save the document + doc.save(filename) + + return f"Document {filename} created successfully" + except Exception as e: + return f"Failed to create document: {str(e)}" + + +async def get_document_info(filename: str) -> str: + """Get information about a Word document. + + Args: + filename: Path to the Word document + """ + filename = ensure_docx_extension(filename) + + if not os.path.exists(filename): + return f"Document {filename} does not exist" + + try: + properties = get_document_properties(filename) + return json.dumps(properties, indent=2) + except Exception as e: + return f"Failed to get document info: {str(e)}" + + +async def get_document_text(filename: str) -> str: + """Extract all text from a Word document. + + Args: + filename: Path to the Word document + """ + filename = ensure_docx_extension(filename) + + return extract_document_text(filename) + + +async def get_document_outline(filename: str) -> str: + """Get the structure of a Word document. + + Args: + filename: Path to the Word document + """ + filename = ensure_docx_extension(filename) + + structure = get_document_structure(filename) + return json.dumps(structure, indent=2) + + +async def list_available_documents(directory: str = ".") -> str: + """List all .docx files in the specified directory. + + Args: + directory: Directory to search for Word documents + """ + try: + if not os.path.exists(directory): + return f"Directory {directory} does not exist" + + docx_files = [f for f in os.listdir(directory) if f.endswith('.docx')] + + if not docx_files: + return f"No Word documents found in {directory}" + + result = f"Found {len(docx_files)} Word documents in {directory}:\n" + for file in docx_files: + file_path = os.path.join(directory, file) + size = os.path.getsize(file_path) / 1024 # KB + result += f"- {file} ({size:.2f} KB)\n" + + return result + except Exception as e: + return f"Failed to list documents: {str(e)}" + + +async def copy_document(source_filename: str, destination_filename: Optional[str] = None) -> str: + """Create a copy of a Word document. + + Args: + source_filename: Path to the source document + destination_filename: Optional path for the copy. If not provided, a default name will be generated. + """ + source_filename = ensure_docx_extension(source_filename) + + if destination_filename: + destination_filename = ensure_docx_extension(destination_filename) + + success, message, new_path = create_document_copy(source_filename, destination_filename) + if success: + return message + else: + return f"Failed to copy document: {message}" + + +async def merge_documents(target_filename: str, source_filenames: List[str], add_page_breaks: bool = True) -> str: + """Merge multiple Word documents into a single document. + + Args: + target_filename: Path to the target document (will be created or overwritten) + source_filenames: List of paths to source documents to merge + add_page_breaks: If True, add page breaks between documents + """ + from word_document_server.core.tables import copy_table + + target_filename = ensure_docx_extension(target_filename) + + # Check if target file is writeable + is_writeable, error_message = check_file_writeable(target_filename) + if not is_writeable: + return f"Cannot create target document: {error_message}" + + # Validate all source documents exist + missing_files = [] + for filename in source_filenames: + doc_filename = ensure_docx_extension(filename) + if not os.path.exists(doc_filename): + missing_files.append(doc_filename) + + if missing_files: + return f"Cannot merge documents. The following source files do not exist: {', '.join(missing_files)}" + + try: + # Create a new document for the merged result + target_doc = Document() + + # Process each source document + for i, filename in enumerate(source_filenames): + doc_filename = ensure_docx_extension(filename) + source_doc = Document(doc_filename) + + # Add page break between documents (except before the first one) + if add_page_breaks and i > 0: + target_doc.add_page_break() + + # Copy all paragraphs + for paragraph in source_doc.paragraphs: + # Create a new paragraph with the same text and style + new_paragraph = target_doc.add_paragraph(paragraph.text) + new_paragraph.style = target_doc.styles['Normal'] # Default style + + # Try to match the style if possible + try: + if paragraph.style and paragraph.style.name in target_doc.styles: + new_paragraph.style = target_doc.styles[paragraph.style.name] + except: + pass + + # Copy run formatting + for i, run in enumerate(paragraph.runs): + if i < len(new_paragraph.runs): + new_run = new_paragraph.runs[i] + # Copy basic formatting + new_run.bold = run.bold + new_run.italic = run.italic + new_run.underline = run.underline + # Font size if specified + if run.font.size: + new_run.font.size = run.font.size + + # Copy all tables + for table in source_doc.tables: + copy_table(table, target_doc) + + # Save the merged document + target_doc.save(target_filename) + return f"Successfully merged {len(source_filenames)} documents into {target_filename}" + except Exception as e: + return f"Failed to merge documents: {str(e)}" + + +async def get_document_xml_tool(filename: str) -> str: + """Get the raw XML structure of a Word document.""" + return get_document_xml(filename) diff --git a/backend/office_word_mcp/word_document_server/tools/extended_document_tools.py b/backend/office_word_mcp/word_document_server/tools/extended_document_tools.py new file mode 100644 index 0000000..8e51b23 --- /dev/null +++ b/backend/office_word_mcp/word_document_server/tools/extended_document_tools.py @@ -0,0 +1,184 @@ +""" +Extended document tools for Word Document Server. + +These tools provide enhanced document content extraction and search capabilities. +""" +import os +import json +import subprocess +import platform +import shutil +from typing import Dict, List, Optional, Any, Union, Tuple +from docx import Document + +from word_document_server.utils.file_utils import check_file_writeable, ensure_docx_extension +from word_document_server.utils.extended_document_utils import get_paragraph_text, find_text + + +async def get_paragraph_text_from_document(filename: str, paragraph_index: int) -> str: + """Get text from a specific paragraph in a Word document. + + Args: + filename: Path to the Word document + paragraph_index: Index of the paragraph to retrieve (0-based) + """ + filename = ensure_docx_extension(filename) + + if not os.path.exists(filename): + return f"Document {filename} does not exist" + + + if paragraph_index < 0: + return "Invalid parameter: paragraph_index must be a non-negative integer" + + try: + result = get_paragraph_text(filename, paragraph_index) + return json.dumps(result, indent=2) + except Exception as e: + return f"Failed to get paragraph text: {str(e)}" + + +async def find_text_in_document(filename: str, text_to_find: str, match_case: bool = True, whole_word: bool = False) -> str: + """Find occurrences of specific text in a Word document. + + Args: + filename: Path to the Word document + text_to_find: Text to search for in the document + match_case: Whether to match case (True) or ignore case (False) + whole_word: Whether to match whole words only (True) or substrings (False) + """ + filename = ensure_docx_extension(filename) + + if not os.path.exists(filename): + return f"Document {filename} does not exist" + + if not text_to_find: + return "Search text cannot be empty" + + try: + + result = find_text(filename, text_to_find, match_case, whole_word) + return json.dumps(result, indent=2) + except Exception as e: + return f"Failed to search for text: {str(e)}" + + +async def convert_to_pdf(filename: str, output_filename: Optional[str] = None) -> str: + """Convert a Word document to PDF format. + + Args: + filename: Path to the Word document + output_filename: Optional path for the output PDF. If not provided, + will use the same name with .pdf extension + """ + filename = ensure_docx_extension(filename) + + if not os.path.exists(filename): + return f"Document {filename} does not exist" + + # Generate output filename if not provided + if not output_filename: + base_name, _ = os.path.splitext(filename) + output_filename = f"{base_name}.pdf" + elif not output_filename.lower().endswith('.pdf'): + output_filename = f"{output_filename}.pdf" + + # Convert to absolute path if not already + if not os.path.isabs(output_filename): + output_filename = os.path.abspath(output_filename) + + # Ensure the output directory exists + output_dir = os.path.dirname(output_filename) + if not output_dir: + output_dir = os.path.abspath('.') + + # Create the directory if it doesn't exist + os.makedirs(output_dir, exist_ok=True) + + # Check if output file can be written + is_writeable, error_message = check_file_writeable(output_filename) + if not is_writeable: + return f"Cannot create PDF: {error_message} (Path: {output_filename}, Dir: {output_dir})" + + try: + # Determine platform for appropriate conversion method + system = platform.system() + + if system == "Windows": + # On Windows, try docx2pdf which uses Microsoft Word + try: + from docx2pdf import convert + convert(filename, output_filename) + return f"Document successfully converted to PDF: {output_filename}" + except (ImportError, Exception) as e: + return f"Failed to convert document to PDF: {str(e)}\nNote: docx2pdf requires Microsoft Word to be installed." + + elif system in ["Linux", "Darwin"]: # Linux or macOS + errors = [] + + # --- Attempt 1: LibreOffice --- + lo_commands = [] + if system == "Darwin": # macOS + lo_commands = ["soffice", "/Applications/LibreOffice.app/Contents/MacOS/soffice"] + else: # Linux + lo_commands = ["libreoffice", "soffice"] + + for cmd_name in lo_commands: + try: + output_dir_for_lo = os.path.dirname(output_filename) or '.' + os.makedirs(output_dir_for_lo, exist_ok=True) + + cmd = [cmd_name, '--headless', '--convert-to', 'pdf', '--outdir', output_dir_for_lo, filename] + result = subprocess.run(cmd, capture_output=True, text=True, timeout=60, check=False) + + if result.returncode == 0: + # LibreOffice typically creates a PDF with the same base name as the source file. + # e.g., 'mydoc.docx' -> 'mydoc.pdf' + base_name = os.path.splitext(os.path.basename(filename))[0] + created_pdf_name = f"{base_name}.pdf" + created_pdf_path = os.path.join(output_dir_for_lo, created_pdf_name) + + # If the created file exists, move it to the desired output_filename if necessary. + if os.path.exists(created_pdf_path): + if created_pdf_path != output_filename: + shutil.move(created_pdf_path, output_filename) + + # Final check: does the target file now exist? + if os.path.exists(output_filename): + return f"Document successfully converted to PDF via {cmd_name}: {output_filename}" + + # If we get here, soffice returned 0 but the expected file wasn't created. + errors.append(f"{cmd_name} returned success code, but output file '{created_pdf_path}' was not found.") + # Continue to the next command or fallback. + else: + errors.append(f"{cmd_name} failed. Stderr: {result.stderr.strip()}") + except FileNotFoundError: + errors.append(f"Command '{cmd_name}' not found.") + except (subprocess.SubprocessError, Exception) as e: + errors.append(f"An error occurred with {cmd_name}: {str(e)}") + + # --- Attempt 2: docx2pdf (Fallback) --- + try: + from docx2pdf import convert + convert(filename, output_filename) + if os.path.exists(output_filename) and os.path.getsize(output_filename) > 0: + return f"Document successfully converted to PDF via docx2pdf: {output_filename}" + else: + errors.append("docx2pdf fallback was executed but failed to create a valid output file.") + except ImportError: + errors.append("docx2pdf is not installed, skipping fallback.") + except Exception as e: + errors.append(f"docx2pdf fallback failed with an exception: {str(e)}") + + # --- If all attempts failed --- + error_summary = "Failed to convert document to PDF using all available methods.\n" + error_summary += "Recorded errors: " + "; ".join(errors) + "\n" + error_summary += "To convert documents to PDF, please install either:\n" + error_summary += "1. LibreOffice (recommended for Linux/macOS)\n" + error_summary += "2. Microsoft Word (required for docx2pdf on Windows/macOS)" + return error_summary + else: + return f"PDF conversion not supported on {system} platform" + + except Exception as e: + return f"Failed to convert document to PDF: {str(e)}" diff --git a/backend/office_word_mcp/word_document_server/tools/footnote_tools.py b/backend/office_word_mcp/word_document_server/tools/footnote_tools.py new file mode 100644 index 0000000..72b0190 --- /dev/null +++ b/backend/office_word_mcp/word_document_server/tools/footnote_tools.py @@ -0,0 +1,709 @@ +""" +Footnote and endnote tools for Word Document Server. + +These tools handle footnote and endnote functionality, +including adding, customizing, and converting between them. + +This module combines both standard and robust implementations: +- String-return functions for backward compatibility +- Dict-return robust functions for structured responses +""" +import os +from typing import Optional, Dict, Any +from docx import Document +from docx.shared import Pt +from docx.enum.style import WD_STYLE_TYPE + +from word_document_server.utils.file_utils import check_file_writeable, ensure_docx_extension +from word_document_server.core.footnotes import ( + find_footnote_references, + get_format_symbols, + customize_footnote_formatting, + add_footnote_robust, + delete_footnote_robust, + validate_document_footnotes, + add_footnote_at_paragraph_end # Compatibility function +) + + +async def add_footnote_to_document(filename: str, paragraph_index: int, footnote_text: str) -> str: + """Add a footnote to a specific paragraph in a Word document. + + Args: + filename: Path to the Word document + paragraph_index: Index of the paragraph to add footnote to (0-based) + footnote_text: Text content of the footnote + """ + filename = ensure_docx_extension(filename) + + # Ensure paragraph_index is an integer + try: + paragraph_index = int(paragraph_index) + except (ValueError, TypeError): + return "Invalid parameter: paragraph_index must be an integer" + + if not os.path.exists(filename): + return f"Document {filename} does not exist" + + # Check if file is writeable + is_writeable, error_message = check_file_writeable(filename) + if not is_writeable: + return f"Cannot modify document: {error_message}. Consider creating a copy first." + + try: + doc = Document(filename) + + # Validate paragraph index + if paragraph_index < 0 or paragraph_index >= len(doc.paragraphs): + return f"Invalid paragraph index. Document has {len(doc.paragraphs)} paragraphs (0-{len(doc.paragraphs)-1})." + + paragraph = doc.paragraphs[paragraph_index] + + # In python-docx, we'd use paragraph.add_footnote(), but we'll use a more robust approach + try: + footnote = paragraph.add_run() + footnote.text = "" + + # Create the footnote reference + reference = footnote.add_footnote(footnote_text) + + doc.save(filename) + return f"Footnote added to paragraph {paragraph_index} in {filename}" + except AttributeError: + # Fall back to a simpler approach if direct footnote addition fails + last_run = paragraph.add_run() + last_run.text = "¹" # Unicode superscript 1 + last_run.font.superscript = True + + # Add a footnote section at the end if it doesn't exist + found_footnote_section = False + for p in doc.paragraphs: + if p.text.startswith("Footnotes:"): + found_footnote_section = True + break + + if not found_footnote_section: + doc.add_paragraph("\n").add_run() + doc.add_paragraph("Footnotes:").bold = True + + # Add footnote text + footnote_para = doc.add_paragraph("¹ " + footnote_text) + footnote_para.style = "Footnote Text" if "Footnote Text" in doc.styles else "Normal" + + doc.save(filename) + return f"Footnote added to paragraph {paragraph_index} in {filename} (simplified approach)" + except Exception as e: + return f"Failed to add footnote: {str(e)}" + + +async def add_endnote_to_document(filename: str, paragraph_index: int, endnote_text: str) -> str: + """Add an endnote to a specific paragraph in a Word document. + + Args: + filename: Path to the Word document + paragraph_index: Index of the paragraph to add endnote to (0-based) + endnote_text: Text content of the endnote + """ + filename = ensure_docx_extension(filename) + + # Ensure paragraph_index is an integer + try: + paragraph_index = int(paragraph_index) + except (ValueError, TypeError): + return "Invalid parameter: paragraph_index must be an integer" + + if not os.path.exists(filename): + return f"Document {filename} does not exist" + + # Check if file is writeable + is_writeable, error_message = check_file_writeable(filename) + if not is_writeable: + return f"Cannot modify document: {error_message}. Consider creating a copy first." + + try: + doc = Document(filename) + + # Validate paragraph index + if paragraph_index < 0 or paragraph_index >= len(doc.paragraphs): + return f"Invalid paragraph index. Document has {len(doc.paragraphs)} paragraphs (0-{len(doc.paragraphs)-1})." + + paragraph = doc.paragraphs[paragraph_index] + + # Add endnote reference + last_run = paragraph.add_run() + last_run.text = "†" # Unicode dagger symbol common for endnotes + last_run.font.superscript = True + + # Check if endnotes section exists, if not create it + endnotes_heading_found = False + for para in doc.paragraphs: + if para.text == "Endnotes:" or para.text == "ENDNOTES": + endnotes_heading_found = True + break + + if not endnotes_heading_found: + # Add a page break before endnotes section + doc.add_page_break() + doc.add_heading("Endnotes:", level=1) + + # Add the endnote text + endnote_para = doc.add_paragraph("† " + endnote_text) + endnote_para.style = "Endnote Text" if "Endnote Text" in doc.styles else "Normal" + + doc.save(filename) + return f"Endnote added to paragraph {paragraph_index} in {filename}" + except Exception as e: + return f"Failed to add endnote: {str(e)}" + + +async def convert_footnotes_to_endnotes_in_document(filename: str) -> str: + """Convert all footnotes to endnotes in a Word document. + + Args: + filename: Path to the Word document + """ + filename = ensure_docx_extension(filename) + + if not os.path.exists(filename): + return f"Document {filename} does not exist" + + # Check if file is writeable + is_writeable, error_message = check_file_writeable(filename) + if not is_writeable: + return f"Cannot modify document: {error_message}. Consider creating a copy first." + + try: + doc = Document(filename) + + + # Find all runs that might be footnote references + footnote_references = [] + + for para_idx, para in enumerate(doc.paragraphs): + for run_idx, run in enumerate(para.runs): + # Check if this run is likely a footnote reference + # (superscript number or special character) + if run.font.superscript and (run.text.isdigit() or run.text in "¹²³⁴⁵⁶⁷⁸⁹"): + footnote_references.append({ + "paragraph_index": para_idx, + "run_index": run_idx, + "text": run.text + }) + + if not footnote_references: + return f"No footnote references found in {filename}" + + # Create endnotes section + doc.add_page_break() + doc.add_heading("Endnotes:", level=1) + + # Create a placeholder for endnote content, we'll fill it later + endnote_content = [] + + # Find the footnote text at the bottom of the page + + found_footnote_section = False + footnote_text = [] + + for para in doc.paragraphs: + if not found_footnote_section and para.text.startswith("Footnotes:"): + found_footnote_section = True + continue + + if found_footnote_section: + footnote_text.append(para.text) + + # Create endnotes based on footnote references + for i, ref in enumerate(footnote_references): + # Add a new endnote + endnote_para = doc.add_paragraph() + + # Try to match with footnote text, or use placeholder + if i < len(footnote_text): + endnote_para.text = f"†{i+1} {footnote_text[i]}" + else: + endnote_para.text = f"†{i+1} Converted from footnote {ref['text']}" + + # Change the footnote reference to an endnote reference + try: + paragraph = doc.paragraphs[ref["paragraph_index"]] + paragraph.runs[ref["run_index"]].text = f"†{i+1}" + except IndexError: + # Skip if we can't locate the reference + pass + + # Save the document + doc.save(filename) + + return f"Converted {len(footnote_references)} footnotes to endnotes in {filename}" + except Exception as e: + return f"Failed to convert footnotes to endnotes: {str(e)}" + + +async def add_footnote_after_text(filename: str, search_text: str, footnote_text: str, + output_filename: Optional[str] = None) -> str: + """Add a footnote after specific text in a Word document with proper formatting. + + This enhanced function ensures proper superscript formatting by managing styles at the XML level. + + Args: + filename: Path to the Word document + search_text: Text to search for (footnote will be added after this text) + footnote_text: Content of the footnote + output_filename: Optional output filename (if None, modifies in place) + """ + filename = ensure_docx_extension(filename) + + if not os.path.exists(filename): + return f"Document {filename} does not exist" + + # Check if file is writeable + is_writeable, error_message = check_file_writeable(filename) + if not is_writeable: + return f"Cannot modify document: {error_message}. Consider creating a copy first." + + try: + # Use robust implementation + success, message, details = add_footnote_robust( + filename=filename, + search_text=search_text, + footnote_text=footnote_text, + output_filename=output_filename, + position="after", + validate_location=True + ) + return message + except Exception as e: + return f"Failed to add footnote: {str(e)}" + + +async def add_footnote_before_text(filename: str, search_text: str, footnote_text: str, + output_filename: Optional[str] = None) -> str: + """Add a footnote before specific text in a Word document with proper formatting. + + This enhanced function ensures proper superscript formatting by managing styles at the XML level. + + Args: + filename: Path to the Word document + search_text: Text to search for (footnote will be added before this text) + footnote_text: Content of the footnote + output_filename: Optional output filename (if None, modifies in place) + """ + filename = ensure_docx_extension(filename) + + if not os.path.exists(filename): + return f"Document {filename} does not exist" + + # Check if file is writeable + is_writeable, error_message = check_file_writeable(filename) + if not is_writeable: + return f"Cannot modify document: {error_message}. Consider creating a copy first." + + try: + # Use robust implementation + success, message, details = add_footnote_robust( + filename=filename, + search_text=search_text, + footnote_text=footnote_text, + output_filename=output_filename, + position="before", + validate_location=True + ) + return message + except Exception as e: + return f"Failed to add footnote: {str(e)}" + + +async def add_footnote_enhanced(filename: str, paragraph_index: int, footnote_text: str, + output_filename: Optional[str] = None) -> str: + """Enhanced version of add_footnote_to_document with proper superscript formatting. + + Now uses the robust implementation for better reliability. + + Args: + filename: Path to the Word document + paragraph_index: Index of the paragraph to add footnote to (0-based) + footnote_text: Text content of the footnote + output_filename: Optional output filename (if None, modifies in place) + """ + filename = ensure_docx_extension(filename) + + # Ensure paragraph_index is an integer + try: + paragraph_index = int(paragraph_index) + except (ValueError, TypeError): + return "Invalid parameter: paragraph_index must be an integer" + + if not os.path.exists(filename): + return f"Document {filename} does not exist" + + # Check if file is writeable + is_writeable, error_message = check_file_writeable(filename) + if not is_writeable: + return f"Cannot modify document: {error_message}. Consider creating a copy first." + + try: + # Use robust implementation + success, message, details = add_footnote_robust( + filename=filename, + paragraph_index=paragraph_index, + footnote_text=footnote_text, + output_filename=output_filename, + validate_location=True + ) + return message + except Exception as e: + return f"Failed to add footnote: {str(e)}" + + +async def customize_footnote_style(filename: str, numbering_format: str = "1, 2, 3", + start_number: int = 1, font_name: Optional[str] = None, + font_size: Optional[int] = None) -> str: + """Customize footnote numbering and formatting in a Word document. + + Args: + filename: Path to the Word document + numbering_format: Format for footnote numbers (e.g., "1, 2, 3", "i, ii, iii", "a, b, c") + start_number: Number to start footnote numbering from + font_name: Optional font name for footnotes + font_size: Optional font size for footnotes (in points) + """ + filename = ensure_docx_extension(filename) + + if not os.path.exists(filename): + return f"Document {filename} does not exist" + + # Check if file is writeable + is_writeable, error_message = check_file_writeable(filename) + if not is_writeable: + return f"Cannot modify document: {error_message}. Consider creating a copy first." + + try: + doc = Document(filename) + + # Create or get footnote style + footnote_style_name = "Footnote Text" + footnote_style = None + + try: + footnote_style = doc.styles[footnote_style_name] + except KeyError: + # Create the style if it doesn't exist + footnote_style = doc.styles.add_style(footnote_style_name, WD_STYLE_TYPE.PARAGRAPH) + + # Apply formatting to footnote style + if footnote_style: + if font_name: + footnote_style.font.name = font_name + if font_size: + footnote_style.font.size = Pt(font_size) + + # Find all existing footnote references + footnote_refs = find_footnote_references(doc) + + # Generate format symbols for the specified numbering format + format_symbols = get_format_symbols(numbering_format, len(footnote_refs) + start_number) + + # Apply custom formatting to footnotes + count = customize_footnote_formatting(doc, footnote_refs, format_symbols, start_number, footnote_style) + + # Save the document + doc.save(filename) + + return f"Footnote style and numbering customized in {filename}" + except Exception as e: + return f"Failed to customize footnote style: {str(e)}" + + +async def delete_footnote_from_document(filename: str, footnote_id: Optional[int] = None, + search_text: Optional[str] = None, + output_filename: Optional[str] = None) -> str: + """Delete a footnote from a Word document. + + You can identify the footnote to delete either by: + 1. footnote_id: The numeric ID of the footnote (1, 2, 3, etc.) + 2. search_text: Text near the footnote reference to find and delete + + Args: + filename: Path to the Word document + footnote_id: Optional ID of the footnote to delete (1-based) + search_text: Optional text to search near the footnote reference + output_filename: Optional output filename (if None, modifies in place) + """ + filename = ensure_docx_extension(filename) + + if not os.path.exists(filename): + return f"Document {filename} does not exist" + + # Check if file is writeable + is_writeable, error_message = check_file_writeable(filename) + if not is_writeable: + return f"Cannot modify document: {error_message}. Consider creating a copy first." + + try: + # Use robust implementation with orphan cleanup + success, message, details = delete_footnote_robust( + filename=filename, + footnote_id=footnote_id, + search_text=search_text, + output_filename=output_filename, + clean_orphans=True + ) + return message + except Exception as e: + return f"Failed to delete footnote: {str(e)}" + + +# ============================================================================ +# Robust tool functions with Dict returns for structured responses +# ============================================================================ + + +async def add_footnote_robust_tool( + filename: str, + search_text: Optional[str] = None, + paragraph_index: Optional[int] = None, + footnote_text: str = "", + validate_location: bool = True, + auto_repair: bool = False +) -> Dict[str, Any]: + """ + Add a footnote with robust validation and error handling. + + This is the production-ready version with comprehensive Word compliance. + + Args: + filename: Path to the Word document + search_text: Text to search for (mutually exclusive with paragraph_index) + paragraph_index: Index of paragraph (mutually exclusive with search_text) + footnote_text: Content of the footnote + validate_location: Whether to validate placement restrictions + auto_repair: Whether to attempt automatic document repair + + Returns: + Dict with success status, message, and optional details + """ + filename = ensure_docx_extension(filename) + + # Check if file is writeable + is_writeable, error_message = check_file_writeable(filename) + if not is_writeable: + return { + "success": False, + "message": f"Cannot modify document: {error_message}", + "details": None + } + + # Convert paragraph_index if provided as string + if paragraph_index is not None: + try: + paragraph_index = int(paragraph_index) + except (ValueError, TypeError): + return { + "success": False, + "message": "Invalid parameter: paragraph_index must be an integer", + "details": None + } + + # Call robust implementation + success, message, details = add_footnote_robust( + filename=filename, + search_text=search_text, + paragraph_index=paragraph_index, + footnote_text=footnote_text, + validate_location=validate_location, + auto_repair=auto_repair + ) + + return { + "success": success, + "message": message, + "details": details + } + + +async def delete_footnote_robust_tool( + filename: str, + footnote_id: Optional[int] = None, + search_text: Optional[str] = None, + clean_orphans: bool = True +) -> Dict[str, Any]: + """ + Delete a footnote with comprehensive cleanup. + + Args: + filename: Path to the Word document + footnote_id: ID of footnote to delete + search_text: Text near footnote reference + clean_orphans: Whether to remove orphaned content + + Returns: + Dict with success status, message, and optional details + """ + filename = ensure_docx_extension(filename) + + # Check if file is writeable + is_writeable, error_message = check_file_writeable(filename) + if not is_writeable: + return { + "success": False, + "message": f"Cannot modify document: {error_message}", + "details": None + } + + # Convert footnote_id if provided as string + if footnote_id is not None: + try: + footnote_id = int(footnote_id) + except (ValueError, TypeError): + return { + "success": False, + "message": "Invalid parameter: footnote_id must be an integer", + "details": None + } + + # Call robust implementation + success, message, details = delete_footnote_robust( + filename=filename, + footnote_id=footnote_id, + search_text=search_text, + clean_orphans=clean_orphans + ) + + return { + "success": success, + "message": message, + "details": details + } + + +async def validate_footnotes_tool(filename: str) -> Dict[str, Any]: + """ + Validate all footnotes in a document. + + Provides comprehensive validation report including: + - ID conflicts + - Orphaned content + - Missing styles + - Invalid locations + - Coherence issues + + Args: + filename: Path to the Word document + + Returns: + Dict with validation status and detailed report + """ + filename = ensure_docx_extension(filename) + + if not os.path.exists(filename): + return { + "valid": False, + "message": f"Document {filename} does not exist", + "report": {} + } + + # Call validation + is_valid, message, report = validate_document_footnotes(filename) + + return { + "valid": is_valid, + "message": message, + "report": report + } + + +# ============================================================================ +# Compatibility wrappers for robust tools (maintain backward compatibility) +# ============================================================================ + +async def add_footnote_to_document_robust( + filename: str, + paragraph_index: int, + footnote_text: str +) -> str: + """ + Robust version of add_footnote_to_document. + Maintains backward compatibility with existing API. + """ + result = await add_footnote_robust_tool( + filename=filename, + paragraph_index=paragraph_index, + footnote_text=footnote_text + ) + return result["message"] + + +async def add_footnote_after_text_robust( + filename: str, + search_text: str, + footnote_text: str, + output_filename: Optional[str] = None +) -> str: + """ + Robust version of add_footnote_after_text. + Maintains backward compatibility with existing API. + """ + # Handle output filename by copying first if needed + working_file = filename + if output_filename: + import shutil + shutil.copy2(filename, output_filename) + working_file = output_filename + + result = await add_footnote_robust_tool( + filename=working_file, + search_text=search_text, + footnote_text=footnote_text + ) + return result["message"] + + +async def add_footnote_before_text_robust( + filename: str, + search_text: str, + footnote_text: str, + output_filename: Optional[str] = None +) -> str: + """ + Robust version of add_footnote_before_text. + Note: Current robust implementation defaults to 'after' position. + """ + # Handle output filename + working_file = filename + if output_filename: + import shutil + shutil.copy2(filename, output_filename) + working_file = output_filename + + result = await add_footnote_robust_tool( + filename=working_file, + search_text=search_text, + footnote_text=footnote_text + ) + return result["message"] + + +async def delete_footnote_from_document_robust( + filename: str, + footnote_id: Optional[int] = None, + search_text: Optional[str] = None, + output_filename: Optional[str] = None +) -> str: + """ + Robust version of delete_footnote_from_document. + Maintains backward compatibility with existing API. + """ + # Handle output filename + working_file = filename + if output_filename: + import shutil + shutil.copy2(filename, output_filename) + working_file = output_filename + + result = await delete_footnote_robust_tool( + filename=working_file, + footnote_id=footnote_id, + search_text=search_text + ) + return result["message"] diff --git a/backend/office_word_mcp/word_document_server/tools/format_tools.py b/backend/office_word_mcp/word_document_server/tools/format_tools.py new file mode 100644 index 0000000..a60fc0c --- /dev/null +++ b/backend/office_word_mcp/word_document_server/tools/format_tools.py @@ -0,0 +1,1112 @@ +""" +Formatting tools for Word Document Server. + +These tools handle formatting operations for Word documents, +including text formatting, table formatting, and custom styles. +""" +import os +from typing import List, Optional, Dict, Any +from docx import Document +from docx.shared import Pt, RGBColor +from docx.enum.text import WD_COLOR_INDEX +from docx.enum.style import WD_STYLE_TYPE + +from word_document_server.utils.file_utils import check_file_writeable, ensure_docx_extension +from word_document_server.core.styles import create_style +from word_document_server.core.tables import ( + apply_table_style, set_cell_shading_by_position, apply_alternating_row_shading, + highlight_header_row, merge_cells, merge_cells_horizontal, merge_cells_vertical, + set_cell_alignment_by_position, set_table_alignment, set_column_width_by_position, + set_column_widths, set_table_width as set_table_width_func, auto_fit_table, + format_cell_text_by_position, set_cell_padding_by_position +) + + +async def format_text(filename: str, paragraph_index: int, start_pos: int, end_pos: int, + bold: Optional[bool] = None, italic: Optional[bool] = None, + underline: Optional[bool] = None, color: Optional[str] = None, + font_size: Optional[int] = None, font_name: Optional[str] = None) -> str: + """Format a specific range of text within a paragraph. + + Args: + filename: Path to the Word document + paragraph_index: Index of the paragraph (0-based) + start_pos: Start position within the paragraph text + end_pos: End position within the paragraph text + bold: Set text bold (True/False) + italic: Set text italic (True/False) + underline: Set text underlined (True/False) + color: Text color (e.g., 'red', 'blue', etc.) + font_size: Font size in points + font_name: Font name/family + """ + filename = ensure_docx_extension(filename) + + # Ensure numeric parameters are the correct type + try: + paragraph_index = int(paragraph_index) + start_pos = int(start_pos) + end_pos = int(end_pos) + if font_size is not None: + font_size = int(font_size) + except (ValueError, TypeError): + return "Invalid parameter: paragraph_index, start_pos, end_pos, and font_size must be integers" + + if not os.path.exists(filename): + return f"Document {filename} does not exist" + + # Check if file is writeable + is_writeable, error_message = check_file_writeable(filename) + if not is_writeable: + return f"Cannot modify document: {error_message}. Consider creating a copy first." + + try: + doc = Document(filename) + + # Validate paragraph index + if paragraph_index < 0 or paragraph_index >= len(doc.paragraphs): + return f"Invalid paragraph index. Document has {len(doc.paragraphs)} paragraphs (0-{len(doc.paragraphs)-1})." + + paragraph = doc.paragraphs[paragraph_index] + text = paragraph.text + + # Validate text positions + if start_pos < 0 or end_pos > len(text) or start_pos >= end_pos: + return f"Invalid text positions. Paragraph has {len(text)} characters." + + # Get the text to format + target_text = text[start_pos:end_pos] + + # Clear existing runs and create three runs: before, target, after + for run in paragraph.runs: + run.clear() + + # Add text before target + if start_pos > 0: + run_before = paragraph.add_run(text[:start_pos]) + + # Add target text with formatting + run_target = paragraph.add_run(target_text) + if bold is not None: + run_target.bold = bold + if italic is not None: + run_target.italic = italic + if underline is not None: + run_target.underline = underline + if color: + # Define common RGB colors + color_map = { + 'red': RGBColor(255, 0, 0), + 'blue': RGBColor(0, 0, 255), + 'green': RGBColor(0, 128, 0), + 'yellow': RGBColor(255, 255, 0), + 'black': RGBColor(0, 0, 0), + 'gray': RGBColor(128, 128, 128), + 'white': RGBColor(255, 255, 255), + 'purple': RGBColor(128, 0, 128), + 'orange': RGBColor(255, 165, 0) + } + + try: + if color.lower() in color_map: + # Use predefined RGB color + run_target.font.color.rgb = color_map[color.lower()] + else: + # Try to set color by name + run_target.font.color.rgb = RGBColor.from_string(color) + except Exception as e: + # If all else fails, default to black + run_target.font.color.rgb = RGBColor(0, 0, 0) + if font_size: + run_target.font.size = Pt(font_size) + if font_name: + run_target.font.name = font_name + + # Add text after target + if end_pos < len(text): + run_after = paragraph.add_run(text[end_pos:]) + + doc.save(filename) + return f"Text '{target_text}' formatted successfully in paragraph {paragraph_index}." + except Exception as e: + return f"Failed to format text: {str(e)}" + + +async def create_custom_style(filename: str, style_name: str, + bold: Optional[bool] = None, italic: Optional[bool] = None, + font_size: Optional[int] = None, font_name: Optional[str] = None, + color: Optional[str] = None, base_style: Optional[str] = None) -> str: + """Create a custom style in the document. + + Args: + filename: Path to the Word document + style_name: Name for the new style + bold: Set text bold (True/False) + italic: Set text italic (True/False) + font_size: Font size in points + font_name: Font name/family + color: Text color (e.g., 'red', 'blue') + base_style: Optional existing style to base this on + """ + filename = ensure_docx_extension(filename) + + if not os.path.exists(filename): + return f"Document {filename} does not exist" + + # Check if file is writeable + is_writeable, error_message = check_file_writeable(filename) + if not is_writeable: + return f"Cannot modify document: {error_message}. Consider creating a copy first." + + try: + doc = Document(filename) + + # Build font properties dictionary + font_properties = {} + if bold is not None: + font_properties['bold'] = bold + if italic is not None: + font_properties['italic'] = italic + if font_size is not None: + font_properties['size'] = font_size + if font_name is not None: + font_properties['name'] = font_name + if color is not None: + font_properties['color'] = color + + # Create the style + new_style = create_style( + doc, + style_name, + WD_STYLE_TYPE.PARAGRAPH, + base_style=base_style, + font_properties=font_properties + ) + + doc.save(filename) + return f"Style '{style_name}' created successfully." + except Exception as e: + return f"Failed to create style: {str(e)}" + + +async def format_table(filename: str, table_index: int, + has_header_row: Optional[bool] = None, + border_style: Optional[str] = None, + shading: Optional[List[List[str]]] = None) -> str: + """Format a table with borders, shading, and structure. + + Args: + filename: Path to the Word document + table_index: Index of the table (0-based) + has_header_row: If True, formats the first row as a header + border_style: Style for borders ('none', 'single', 'double', 'thick') + shading: 2D list of cell background colors (by row and column) + """ + filename = ensure_docx_extension(filename) + + if not os.path.exists(filename): + return f"Document {filename} does not exist" + + # Check if file is writeable + is_writeable, error_message = check_file_writeable(filename) + if not is_writeable: + return f"Cannot modify document: {error_message}. Consider creating a copy first." + + try: + doc = Document(filename) + + # Validate table index + if table_index < 0 or table_index >= len(doc.tables): + return f"Invalid table index. Document has {len(doc.tables)} tables (0-{len(doc.tables)-1})." + + table = doc.tables[table_index] + + # Apply formatting + success = apply_table_style(table, has_header_row or False, border_style, shading) + + if success: + doc.save(filename) + return f"Table at index {table_index} formatted successfully." + else: + return f"Failed to format table at index {table_index}." + except Exception as e: + return f"Failed to format table: {str(e)}" + + +async def set_table_cell_shading(filename: str, table_index: int, row_index: int, + col_index: int, fill_color: str, pattern: str = "clear") -> str: + """Apply shading/filling to a specific table cell. + + Args: + filename: Path to the Word document + table_index: Index of the table (0-based) + row_index: Row index of the cell (0-based) + col_index: Column index of the cell (0-based) + fill_color: Background color (hex string like "FF0000" or "red") + pattern: Shading pattern ("clear", "solid", "pct10", "pct20", etc.) + """ + filename = ensure_docx_extension(filename) + + # Ensure numeric parameters are the correct type + try: + table_index = int(table_index) + row_index = int(row_index) + col_index = int(col_index) + except (ValueError, TypeError): + return "Invalid parameter: table_index, row_index, and col_index must be integers" + + if not os.path.exists(filename): + return f"Document {filename} does not exist" + + # Check if file is writeable + is_writeable, error_message = check_file_writeable(filename) + if not is_writeable: + return f"Cannot modify document: {error_message}. Consider creating a copy first." + + try: + doc = Document(filename) + + # Validate table index + if table_index < 0 or table_index >= len(doc.tables): + return f"Invalid table index. Document has {len(doc.tables)} tables (0-{len(doc.tables)-1})." + + table = doc.tables[table_index] + + # Validate row and column indices + if row_index < 0 or row_index >= len(table.rows): + return f"Invalid row index. Table has {len(table.rows)} rows (0-{len(table.rows)-1})." + + if col_index < 0 or col_index >= len(table.rows[row_index].cells): + return f"Invalid column index. Row has {len(table.rows[row_index].cells)} cells (0-{len(table.rows[row_index].cells)-1})." + + # Apply cell shading + success = set_cell_shading_by_position(table, row_index, col_index, fill_color, pattern) + + if success: + doc.save(filename) + return f"Cell shading applied successfully to table {table_index}, row {row_index}, column {col_index}." + else: + return f"Failed to apply cell shading." + except Exception as e: + return f"Failed to apply cell shading: {str(e)}" + + +async def apply_table_alternating_rows(filename: str, table_index: int, + color1: str = "FFFFFF", color2: str = "F2F2F2") -> str: + """Apply alternating row colors to a table for better readability. + + Args: + filename: Path to the Word document + table_index: Index of the table (0-based) + color1: Color for odd rows (hex string, default white) + color2: Color for even rows (hex string, default light gray) + """ + filename = ensure_docx_extension(filename) + + # Ensure numeric parameters are the correct type + try: + table_index = int(table_index) + except (ValueError, TypeError): + return "Invalid parameter: table_index must be an integer" + + if not os.path.exists(filename): + return f"Document {filename} does not exist" + + # Check if file is writeable + is_writeable, error_message = check_file_writeable(filename) + if not is_writeable: + return f"Cannot modify document: {error_message}. Consider creating a copy first." + + try: + doc = Document(filename) + + # Validate table index + if table_index < 0 or table_index >= len(doc.tables): + return f"Invalid table index. Document has {len(doc.tables)} tables (0-{len(doc.tables)-1})." + + table = doc.tables[table_index] + + # Apply alternating row shading + success = apply_alternating_row_shading(table, color1, color2) + + if success: + doc.save(filename) + return f"Alternating row shading applied successfully to table {table_index}." + else: + return f"Failed to apply alternating row shading." + except Exception as e: + return f"Failed to apply alternating row shading: {str(e)}" + + +async def highlight_table_header(filename: str, table_index: int, + header_color: str = "4472C4", text_color: str = "FFFFFF") -> str: + """Apply special highlighting to table header row. + + Args: + filename: Path to the Word document + table_index: Index of the table (0-based) + header_color: Background color for header (hex string, default blue) + text_color: Text color for header (hex string, default white) + """ + filename = ensure_docx_extension(filename) + + # Ensure numeric parameters are the correct type + try: + table_index = int(table_index) + except (ValueError, TypeError): + return "Invalid parameter: table_index must be an integer" + + if not os.path.exists(filename): + return f"Document {filename} does not exist" + + # Check if file is writeable + is_writeable, error_message = check_file_writeable(filename) + if not is_writeable: + return f"Cannot modify document: {error_message}. Consider creating a copy first." + + try: + doc = Document(filename) + + # Validate table index + if table_index < 0 or table_index >= len(doc.tables): + return f"Invalid table index. Document has {len(doc.tables)} tables (0-{len(doc.tables)-1})." + + table = doc.tables[table_index] + + # Apply header highlighting + success = highlight_header_row(table, header_color, text_color) + + if success: + doc.save(filename) + return f"Header highlighting applied successfully to table {table_index}." + else: + return f"Failed to apply header highlighting." + except Exception as e: + return f"Failed to apply header highlighting: {str(e)}" + + +async def merge_table_cells(filename: str, table_index: int, start_row: int, start_col: int, + end_row: int, end_col: int) -> str: + """Merge cells in a rectangular area of a table. + + Args: + filename: Path to the Word document + table_index: Index of the table (0-based) + start_row: Starting row index (0-based) + start_col: Starting column index (0-based) + end_row: Ending row index (0-based, inclusive) + end_col: Ending column index (0-based, inclusive) + """ + filename = ensure_docx_extension(filename) + + # Ensure numeric parameters are the correct type + try: + table_index = int(table_index) + start_row = int(start_row) + start_col = int(start_col) + end_row = int(end_row) + end_col = int(end_col) + except (ValueError, TypeError): + return "Invalid parameter: all indices must be integers" + + if not os.path.exists(filename): + return f"Document {filename} does not exist" + + # Check if file is writeable + is_writeable, error_message = check_file_writeable(filename) + if not is_writeable: + return f"Cannot modify document: {error_message}. Consider creating a copy first." + + try: + doc = Document(filename) + + # Validate table index + if table_index < 0 or table_index >= len(doc.tables): + return f"Invalid table index. Document has {len(doc.tables)} tables (0-{len(doc.tables)-1})." + + table = doc.tables[table_index] + + # Validate merge parameters + if start_row > end_row or start_col > end_col: + return "Invalid merge range: start indices must be <= end indices" + + if start_row == end_row and start_col == end_col: + return "Invalid merge range: cannot merge a single cell with itself" + + # Apply cell merge + success = merge_cells(table, start_row, start_col, end_row, end_col) + + if success: + doc.save(filename) + return f"Cells merged successfully in table {table_index} from ({start_row},{start_col}) to ({end_row},{end_col})." + else: + return f"Failed to merge cells. Check that indices are valid." + except Exception as e: + return f"Failed to merge cells: {str(e)}" + + +async def merge_table_cells_horizontal(filename: str, table_index: int, row_index: int, + start_col: int, end_col: int) -> str: + """Merge cells horizontally in a single row. + + Args: + filename: Path to the Word document + table_index: Index of the table (0-based) + row_index: Row index (0-based) + start_col: Starting column index (0-based) + end_col: Ending column index (0-based, inclusive) + """ + filename = ensure_docx_extension(filename) + + # Ensure numeric parameters are the correct type + try: + table_index = int(table_index) + row_index = int(row_index) + start_col = int(start_col) + end_col = int(end_col) + except (ValueError, TypeError): + return "Invalid parameter: all indices must be integers" + + if not os.path.exists(filename): + return f"Document {filename} does not exist" + + # Check if file is writeable + is_writeable, error_message = check_file_writeable(filename) + if not is_writeable: + return f"Cannot modify document: {error_message}. Consider creating a copy first." + + try: + doc = Document(filename) + + # Validate table index + if table_index < 0 or table_index >= len(doc.tables): + return f"Invalid table index. Document has {len(doc.tables)} tables (0-{len(doc.tables)-1})." + + table = doc.tables[table_index] + + # Apply horizontal cell merge + success = merge_cells_horizontal(table, row_index, start_col, end_col) + + if success: + doc.save(filename) + return f"Cells merged horizontally in table {table_index}, row {row_index}, columns {start_col}-{end_col}." + else: + return f"Failed to merge cells horizontally. Check that indices are valid." + except Exception as e: + return f"Failed to merge cells horizontally: {str(e)}" + + +async def merge_table_cells_vertical(filename: str, table_index: int, col_index: int, + start_row: int, end_row: int) -> str: + """Merge cells vertically in a single column. + + Args: + filename: Path to the Word document + table_index: Index of the table (0-based) + col_index: Column index (0-based) + start_row: Starting row index (0-based) + end_row: Ending row index (0-based, inclusive) + """ + filename = ensure_docx_extension(filename) + + # Ensure numeric parameters are the correct type + try: + table_index = int(table_index) + col_index = int(col_index) + start_row = int(start_row) + end_row = int(end_row) + except (ValueError, TypeError): + return "Invalid parameter: all indices must be integers" + + if not os.path.exists(filename): + return f"Document {filename} does not exist" + + # Check if file is writeable + is_writeable, error_message = check_file_writeable(filename) + if not is_writeable: + return f"Cannot modify document: {error_message}. Consider creating a copy first." + + try: + doc = Document(filename) + + # Validate table index + if table_index < 0 or table_index >= len(doc.tables): + return f"Invalid table index. Document has {len(doc.tables)} tables (0-{len(doc.tables)-1})." + + table = doc.tables[table_index] + + # Apply vertical cell merge + success = merge_cells_vertical(table, col_index, start_row, end_row) + + if success: + doc.save(filename) + return f"Cells merged vertically in table {table_index}, column {col_index}, rows {start_row}-{end_row}." + else: + return f"Failed to merge cells vertically. Check that indices are valid." + except Exception as e: + return f"Failed to merge cells vertically: {str(e)}" + + +async def set_table_cell_alignment(filename: str, table_index: int, row_index: int, col_index: int, + horizontal: str = "left", vertical: str = "top") -> str: + """Set text alignment for a specific table cell. + + Args: + filename: Path to the Word document + table_index: Index of the table (0-based) + row_index: Row index (0-based) + col_index: Column index (0-based) + horizontal: Horizontal alignment ("left", "center", "right", "justify") + vertical: Vertical alignment ("top", "center", "bottom") + """ + filename = ensure_docx_extension(filename) + + # Ensure numeric parameters are the correct type + try: + table_index = int(table_index) + row_index = int(row_index) + col_index = int(col_index) + except (ValueError, TypeError): + return "Invalid parameter: table_index, row_index, and col_index must be integers" + + # Validate alignment parameters + valid_horizontal = ["left", "center", "right", "justify"] + valid_vertical = ["top", "center", "bottom"] + + if horizontal.lower() not in valid_horizontal: + return f"Invalid horizontal alignment. Valid options: {', '.join(valid_horizontal)}" + + if vertical.lower() not in valid_vertical: + return f"Invalid vertical alignment. Valid options: {', '.join(valid_vertical)}" + + if not os.path.exists(filename): + return f"Document {filename} does not exist" + + # Check if file is writeable + is_writeable, error_message = check_file_writeable(filename) + if not is_writeable: + return f"Cannot modify document: {error_message}. Consider creating a copy first." + + try: + doc = Document(filename) + + # Validate table index + if table_index < 0 or table_index >= len(doc.tables): + return f"Invalid table index. Document has {len(doc.tables)} tables (0-{len(doc.tables)-1})." + + table = doc.tables[table_index] + + # Apply cell alignment + success = set_cell_alignment_by_position(table, row_index, col_index, horizontal, vertical) + + if success: + doc.save(filename) + return f"Cell alignment set successfully for table {table_index}, cell ({row_index},{col_index}) to {horizontal}/{vertical}." + else: + return f"Failed to set cell alignment. Check that indices are valid." + except Exception as e: + return f"Failed to set cell alignment: {str(e)}" + + +async def set_table_alignment_all(filename: str, table_index: int, + horizontal: str = "left", vertical: str = "top") -> str: + """Set text alignment for all cells in a table. + + Args: + filename: Path to the Word document + table_index: Index of the table (0-based) + horizontal: Horizontal alignment ("left", "center", "right", "justify") + vertical: Vertical alignment ("top", "center", "bottom") + """ + filename = ensure_docx_extension(filename) + + # Ensure numeric parameters are the correct type + try: + table_index = int(table_index) + except (ValueError, TypeError): + return "Invalid parameter: table_index must be an integer" + + # Validate alignment parameters + valid_horizontal = ["left", "center", "right", "justify"] + valid_vertical = ["top", "center", "bottom"] + + if horizontal.lower() not in valid_horizontal: + return f"Invalid horizontal alignment. Valid options: {', '.join(valid_horizontal)}" + + if vertical.lower() not in valid_vertical: + return f"Invalid vertical alignment. Valid options: {', '.join(valid_vertical)}" + + if not os.path.exists(filename): + return f"Document {filename} does not exist" + + # Check if file is writeable + is_writeable, error_message = check_file_writeable(filename) + if not is_writeable: + return f"Cannot modify document: {error_message}. Consider creating a copy first." + + try: + doc = Document(filename) + + # Validate table index + if table_index < 0 or table_index >= len(doc.tables): + return f"Invalid table index. Document has {len(doc.tables)} tables (0-{len(doc.tables)-1})." + + table = doc.tables[table_index] + + # Apply table alignment + success = set_table_alignment(table, horizontal, vertical) + + if success: + doc.save(filename) + return f"Table alignment set successfully for table {table_index} to {horizontal}/{vertical} for all cells." + else: + return f"Failed to set table alignment." + except Exception as e: + return f"Failed to set table alignment: {str(e)}" + + +async def set_table_column_width(filename: str, table_index: int, col_index: int, + width: float, width_type: str = "points") -> str: + """Set the width of a specific table column. + + Args: + filename: Path to the Word document + table_index: Index of the table (0-based) + col_index: Column index (0-based) + width: Column width value + width_type: Width type ("points", "inches", "cm", "percent", "auto") + """ + filename = ensure_docx_extension(filename) + + # Ensure numeric parameters are the correct type + try: + table_index = int(table_index) + col_index = int(col_index) + if width_type != "auto": + width = float(width) + except (ValueError, TypeError): + return "Invalid parameter: table_index and col_index must be integers, width must be a number" + + # Validate width type + valid_width_types = ["points", "inches", "cm", "percent", "auto"] + if width_type.lower() not in valid_width_types: + return f"Invalid width type. Valid options: {', '.join(valid_width_types)}" + + if not os.path.exists(filename): + return f"Document {filename} does not exist" + + # Check if file is writeable + is_writeable, error_message = check_file_writeable(filename) + if not is_writeable: + return f"Cannot modify document: {error_message}. Consider creating a copy first." + + try: + doc = Document(filename) + + # Validate table index + if table_index < 0 or table_index >= len(doc.tables): + return f"Invalid table index. Document has {len(doc.tables)} tables (0-{len(doc.tables)-1})." + + table = doc.tables[table_index] + + # Validate column index + if col_index < 0 or col_index >= len(table.columns): + return f"Invalid column index. Table has {len(table.columns)} columns (0-{len(table.columns)-1})." + + # Convert width and type for Word format + if width_type.lower() == "points": + # Points to DXA (twentieths of a point) + word_width = width + word_type = "dxa" + elif width_type.lower() == "inches": + # Inches to points, then to DXA + word_width = width * 72 # 72 points per inch + word_type = "dxa" + elif width_type.lower() == "cm": + # CM to points, then to DXA + word_width = width * 28.35 # ~28.35 points per cm + word_type = "dxa" + elif width_type.lower() == "percent": + # Percentage (Word uses 50x the percentage value) + word_width = width + word_type = "pct" + else: # auto + word_width = 0 + word_type = "auto" + + # Apply column width + success = set_column_width_by_position(table, col_index, word_width, word_type) + + if success: + doc.save(filename) + return f"Column width set successfully for table {table_index}, column {col_index} to {width} {width_type}." + else: + return f"Failed to set column width. Check that indices are valid." + except Exception as e: + return f"Failed to set column width: {str(e)}" + + +async def set_table_column_widths(filename: str, table_index: int, widths: list, + width_type: str = "points") -> str: + """Set the widths of multiple table columns. + + Args: + filename: Path to the Word document + table_index: Index of the table (0-based) + widths: List of width values for each column + width_type: Width type ("points", "inches", "cm", "percent", "auto") + """ + filename = ensure_docx_extension(filename) + + # Ensure numeric parameters are the correct type + try: + table_index = int(table_index) + if width_type != "auto": + widths = [float(w) for w in widths] + except (ValueError, TypeError): + return "Invalid parameter: table_index must be an integer, widths must be a list of numbers" + + # Validate width type + valid_width_types = ["points", "inches", "cm", "percent", "auto"] + if width_type.lower() not in valid_width_types: + return f"Invalid width type. Valid options: {', '.join(valid_width_types)}" + + if not os.path.exists(filename): + return f"Document {filename} does not exist" + + # Check if file is writeable + is_writeable, error_message = check_file_writeable(filename) + if not is_writeable: + return f"Cannot modify document: {error_message}. Consider creating a copy first." + + try: + doc = Document(filename) + + # Validate table index + if table_index < 0 or table_index >= len(doc.tables): + return f"Invalid table index. Document has {len(doc.tables)} tables (0-{len(doc.tables)-1})." + + table = doc.tables[table_index] + + # Convert widths and type for Word format + word_widths = [] + for width in widths: + if width_type.lower() == "points": + word_widths.append(width) + elif width_type.lower() == "inches": + word_widths.append(width * 72) # 72 points per inch + elif width_type.lower() == "cm": + word_widths.append(width * 28.35) # ~28.35 points per cm + elif width_type.lower() == "percent": + word_widths.append(width) + else: # auto + word_widths.append(0) + + # Determine Word type + if width_type.lower() == "percent": + word_type = "pct" + elif width_type.lower() == "auto": + word_type = "auto" + else: + word_type = "dxa" + + # Apply column widths + success = set_column_widths(table, word_widths, word_type) + + if success: + doc.save(filename) + return f"Column widths set successfully for table {table_index} with {len(widths)} columns in {width_type}." + else: + return f"Failed to set column widths." + except Exception as e: + return f"Failed to set column widths: {str(e)}" + + +async def set_table_width(filename: str, table_index: int, width: float, + width_type: str = "points") -> str: + """Set the overall width of a table. + + Args: + filename: Path to the Word document + table_index: Index of the table (0-based) + width: Table width value + width_type: Width type ("points", "inches", "cm", "percent", "auto") + """ + filename = ensure_docx_extension(filename) + + # Ensure numeric parameters are the correct type + try: + table_index = int(table_index) + if width_type != "auto": + width = float(width) + except (ValueError, TypeError): + return "Invalid parameter: table_index must be an integer, width must be a number" + + # Validate width type + valid_width_types = ["points", "inches", "cm", "percent", "auto"] + if width_type.lower() not in valid_width_types: + return f"Invalid width type. Valid options: {', '.join(valid_width_types)}" + + if not os.path.exists(filename): + return f"Document {filename} does not exist" + + # Check if file is writeable + is_writeable, error_message = check_file_writeable(filename) + if not is_writeable: + return f"Cannot modify document: {error_message}. Consider creating a copy first." + + try: + doc = Document(filename) + + # Validate table index + if table_index < 0 or table_index >= len(doc.tables): + return f"Invalid table index. Document has {len(doc.tables)} tables (0-{len(doc.tables)-1})." + + table = doc.tables[table_index] + + # Convert width and type for Word format + if width_type.lower() == "points": + word_width = width + word_type = "dxa" + elif width_type.lower() == "inches": + word_width = width * 72 # 72 points per inch + word_type = "dxa" + elif width_type.lower() == "cm": + word_width = width * 28.35 # ~28.35 points per cm + word_type = "dxa" + elif width_type.lower() == "percent": + word_width = width + word_type = "pct" + else: # auto + word_width = 0 + word_type = "auto" + + # Apply table width + success = set_table_width_func(table, word_width, word_type) + + if success: + doc.save(filename) + return f"Table width set successfully for table {table_index} to {width} {width_type}." + else: + return f"Failed to set table width." + except Exception as e: + return f"Failed to set table width: {str(e)}" + + +async def auto_fit_table_columns(filename: str, table_index: int) -> str: + """Set table columns to auto-fit based on content. + + Args: + filename: Path to the Word document + table_index: Index of the table (0-based) + """ + filename = ensure_docx_extension(filename) + + # Ensure numeric parameters are the correct type + try: + table_index = int(table_index) + except (ValueError, TypeError): + return "Invalid parameter: table_index must be an integer" + + if not os.path.exists(filename): + return f"Document {filename} does not exist" + + # Check if file is writeable + is_writeable, error_message = check_file_writeable(filename) + if not is_writeable: + return f"Cannot modify document: {error_message}. Consider creating a copy first." + + try: + doc = Document(filename) + + # Validate table index + if table_index < 0 or table_index >= len(doc.tables): + return f"Invalid table index. Document has {len(doc.tables)} tables (0-{len(doc.tables)-1})." + + table = doc.tables[table_index] + + # Apply auto-fit + success = auto_fit_table(table) + + if success: + doc.save(filename) + return f"Table {table_index} set to auto-fit columns based on content." + else: + return f"Failed to set table auto-fit." + except Exception as e: + return f"Failed to set table auto-fit: {str(e)}" + + +async def format_table_cell_text(filename: str, table_index: int, row_index: int, col_index: int, + text_content: Optional[str] = None, bold: Optional[bool] = None, italic: Optional[bool] = None, + underline: Optional[bool] = None, color: Optional[str] = None, font_size: Optional[int] = None, + font_name: Optional[str] = None) -> str: + """Format text within a specific table cell. + + Args: + filename: Path to the Word document + table_index: Index of the table (0-based) + row_index: Row index (0-based) + col_index: Column index (0-based) + text_content: Optional new text content for the cell + bold: Set text bold (True/False) + italic: Set text italic (True/False) + underline: Set text underlined (True/False) + color: Text color (hex string like "FF0000" or color name like "red") + font_size: Font size in points + font_name: Font name/family + """ + filename = ensure_docx_extension(filename) + + # Ensure numeric parameters are the correct type + try: + table_index = int(table_index) + row_index = int(row_index) + col_index = int(col_index) + if font_size is not None: + font_size = int(font_size) + except (ValueError, TypeError): + return "Invalid parameter: table_index, row_index, col_index must be integers, font_size must be integer" + + if not os.path.exists(filename): + return f"Document {filename} does not exist" + + # Check if file is writeable + is_writeable, error_message = check_file_writeable(filename) + if not is_writeable: + return f"Cannot modify document: {error_message}. Consider creating a copy first." + + try: + doc = Document(filename) + + # Validate table index + if table_index < 0 or table_index >= len(doc.tables): + return f"Invalid table index. Document has {len(doc.tables)} tables (0-{len(doc.tables)-1})." + + table = doc.tables[table_index] + + # Validate row and column indices + if row_index < 0 or row_index >= len(table.rows): + return f"Invalid row index. Table has {len(table.rows)} rows (0-{len(table.rows)-1})." + + if col_index < 0 or col_index >= len(table.rows[row_index].cells): + return f"Invalid column index. Row has {len(table.rows[row_index].cells)} cells (0-{len(table.rows[row_index].cells)-1})." + + # Apply cell text formatting + success = format_cell_text_by_position(table, row_index, col_index, text_content, + bold, italic, underline, color, font_size, font_name) + + if success: + doc.save(filename) + format_desc = [] + if text_content is not None: + format_desc.append(f"content='{text_content[:30]}{'...' if len(text_content) > 30 else ''}'") + if bold is not None: + format_desc.append(f"bold={bold}") + if italic is not None: + format_desc.append(f"italic={italic}") + if underline is not None: + format_desc.append(f"underline={underline}") + if color is not None: + format_desc.append(f"color={color}") + if font_size is not None: + format_desc.append(f"size={font_size}pt") + if font_name is not None: + format_desc.append(f"font={font_name}") + + format_str = ", ".join(format_desc) if format_desc else "no changes" + return f"Cell text formatted successfully in table {table_index}, cell ({row_index},{col_index}): {format_str}." + else: + return f"Failed to format cell text. Check that indices are valid." + except Exception as e: + return f"Failed to format cell text: {str(e)}" + + +async def set_table_cell_padding(filename: str, table_index: int, row_index: int, col_index: int, + top: Optional[float] = None, bottom: Optional[float] = None, left: Optional[float] = None, + right: Optional[float] = None, unit: str = "points") -> str: + """Set padding/margins for a specific table cell. + + Args: + filename: Path to the Word document + table_index: Index of the table (0-based) + row_index: Row index (0-based) + col_index: Column index (0-based) + top: Top padding in specified units + bottom: Bottom padding in specified units + left: Left padding in specified units + right: Right padding in specified units + unit: Unit type ("points" or "percent") + """ + filename = ensure_docx_extension(filename) + + # Ensure numeric parameters are the correct type + try: + table_index = int(table_index) + row_index = int(row_index) + col_index = int(col_index) + if top is not None: + top = float(top) + if bottom is not None: + bottom = float(bottom) + if left is not None: + left = float(left) + if right is not None: + right = float(right) + except (ValueError, TypeError): + return "Invalid parameter: indices must be integers, padding values must be numbers" + + # Validate unit + valid_units = ["points", "percent"] + if unit.lower() not in valid_units: + return f"Invalid unit. Valid options: {', '.join(valid_units)}" + + if not os.path.exists(filename): + return f"Document {filename} does not exist" + + # Check if file is writeable + is_writeable, error_message = check_file_writeable(filename) + if not is_writeable: + return f"Cannot modify document: {error_message}. Consider creating a copy first." + + try: + doc = Document(filename) + + # Validate table index + if table_index < 0 or table_index >= len(doc.tables): + return f"Invalid table index. Document has {len(doc.tables)} tables (0-{len(doc.tables)-1})." + + table = doc.tables[table_index] + + # Validate row and column indices + if row_index < 0 or row_index >= len(table.rows): + return f"Invalid row index. Table has {len(table.rows)} rows (0-{len(table.rows)-1})." + + if col_index < 0 or col_index >= len(table.rows[row_index].cells): + return f"Invalid column index. Row has {len(table.rows[row_index].cells)} cells (0-{len(table.rows[row_index].cells)-1})." + + # Convert unit for Word format + word_unit = "dxa" if unit.lower() == "points" else "pct" + + # Apply cell padding + success = set_cell_padding_by_position(table, row_index, col_index, top, bottom, + left, right, word_unit) + + if success: + doc.save(filename) + padding_desc = [] + if top is not None: + padding_desc.append(f"top={top}") + if bottom is not None: + padding_desc.append(f"bottom={bottom}") + if left is not None: + padding_desc.append(f"left={left}") + if right is not None: + padding_desc.append(f"right={right}") + + padding_str = ", ".join(padding_desc) if padding_desc else "no padding" + return f"Cell padding set successfully for table {table_index}, cell ({row_index},{col_index}): {padding_str} {unit}." + else: + return f"Failed to set cell padding. Check that indices are valid." + except Exception as e: + return f"Failed to set cell padding: {str(e)}" diff --git a/backend/office_word_mcp/word_document_server/tools/protection_tools.py b/backend/office_word_mcp/word_document_server/tools/protection_tools.py new file mode 100644 index 0000000..e52fd34 --- /dev/null +++ b/backend/office_word_mcp/word_document_server/tools/protection_tools.py @@ -0,0 +1,275 @@ +""" +Protection tools for Word Document Server. + +These tools handle document protection features such as +password protection, restricted editing, and digital signatures. +""" +import os +import hashlib +import datetime +import io +from typing import List, Optional, Dict, Any +from docx import Document +import msoffcrypto + +from word_document_server.utils.file_utils import check_file_writeable, ensure_docx_extension + + + +from word_document_server.core.protection import ( + add_protection_info, + verify_document_protection, + create_signature_info +) + + +async def protect_document(filename: str, password: str) -> str: + """Add password protection to a Word document. + + Args: + filename: Path to the Word document + password: Password to protect the document with + """ + filename = ensure_docx_extension(filename) + + if not os.path.exists(filename): + return f"Document {filename} does not exist" + + # Check if file is writeable + is_writeable, error_message = check_file_writeable(filename) + if not is_writeable: + return f"Cannot protect document: {error_message}" + + try: + # Read the original file content + with open(filename, "rb") as infile: + original_data = infile.read() + + # Create an msoffcrypto file object from the original data + file = msoffcrypto.OfficeFile(io.BytesIO(original_data)) + file.load_key(password=password) # Set the password for encryption + + # Encrypt the data into an in-memory buffer + encrypted_data_io = io.BytesIO() + + file.encrypt(password=password, outfile=encrypted_data_io) + + # Overwrite the original file with the encrypted data + with open(filename, "wb") as outfile: + outfile.write(encrypted_data_io.getvalue()) + + + base_path, _ = os.path.splitext(filename) + metadata_path = f"{base_path}.protection" + if os.path.exists(metadata_path): + os.remove(metadata_path) + + return f"Document {filename} encrypted successfully with password." + + except Exception as e: + # Attempt to restore original file content on failure + try: + if 'original_data' in locals(): + with open(filename, "wb") as outfile: + outfile.write(original_data) + return f"Failed to encrypt document {filename}: {str(e)}. Original file restored." + else: + return f"Failed to encrypt document {filename}: {str(e)}. Could not restore original file." + except Exception as restore_e: + return f"Failed to encrypt document {filename}: {str(e)}. Also failed to restore original file: {str(restore_e)}" + + +async def add_restricted_editing(filename: str, password: str, editable_sections: List[str]) -> str: + """Add restricted editing to a Word document, allowing editing only in specified sections. + + Args: + filename: Path to the Word document + password: Password to protect the document with + editable_sections: List of section names that can be edited + """ + filename = ensure_docx_extension(filename) + + if not os.path.exists(filename): + return f"Document {filename} does not exist" + + # Check if file is writeable + is_writeable, error_message = check_file_writeable(filename) + if not is_writeable: + return f"Cannot protect document: {error_message}" + + try: + # Hash the password for security + password_hash = hashlib.sha256(password.encode()).hexdigest() + + # Add protection info to metadata + success = add_protection_info( + filename, + protection_type="restricted", + password_hash=password_hash, + sections=editable_sections + ) + + if not editable_sections: + return "No editable sections specified. Document will be fully protected." + + if success: + return f"Document {filename} protected with restricted editing. Editable sections: {', '.join(editable_sections)}" + else: + return f"Failed to protect document {filename} with restricted editing" + except Exception as e: + return f"Failed to add restricted editing: {str(e)}" + +async def add_digital_signature(filename: str, signer_name: str, reason: Optional[str] = None) -> str: + """Add a digital signature to a Word document. + + Args: + filename: Path to the Word document + signer_name: Name of the person signing the document + reason: Optional reason for signing + """ + filename = ensure_docx_extension(filename) + + if not os.path.exists(filename): + return f"Document {filename} does not exist" + + # Check if file is writeable + is_writeable, error_message = check_file_writeable(filename) + if not is_writeable: + return f"Cannot add signature to document: {error_message}" + + try: + doc = Document(filename) + + # Create signature info + signature_info = create_signature_info(doc, signer_name, reason) + + # Add protection info to metadata + success = add_protection_info( + filename, + protection_type="signature", + password_hash="", # No password for signature-only + signature_info=signature_info + ) + + if success: + # Add a visible signature block to the document + doc.add_paragraph("").add_run() # Add empty paragraph for spacing + signature_para = doc.add_paragraph() + signature_para.add_run(f"Digitally signed by: {signer_name}").bold = True + if reason: + signature_para.add_run(f"\nReason: {reason}") + signature_para.add_run(f"\nDate: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") + signature_para.add_run(f"\nSignature ID: {signature_info['content_hash'][:8]}") + + # Save the document with the visible signature + doc.save(filename) + + return f"Digital signature added to document {filename}" + else: + return f"Failed to add digital signature to document {filename}" + except Exception as e: + return f"Failed to add digital signature: {str(e)}" + +async def verify_document(filename: str, password: Optional[str] = None) -> str: + """Verify document protection and/or digital signature. + + Args: + filename: Path to the Word document + password: Optional password to verify + """ + filename = ensure_docx_extension(filename) + + if not os.path.exists(filename): + return f"Document {filename} does not exist" + + try: + # Verify document protection + is_verified, message = verify_document_protection(filename, password) + + if not is_verified and password: + return f"Document verification failed: {message}" + + # If document has a digital signature, verify content integrity + base_path, _ = os.path.splitext(filename) + metadata_path = f"{base_path}.protection" + + if os.path.exists(metadata_path): + try: + import json + with open(metadata_path, 'r') as f: + protection_data = json.load(f) + + if protection_data.get("type") == "signature": + # Get the original content hash + signature_info = protection_data.get("signature", {}) + original_hash = signature_info.get("content_hash") + + if original_hash: + # Calculate current content hash + doc = Document(filename) + text_content = "\n".join([p.text for p in doc.paragraphs]) + current_hash = hashlib.sha256(text_content.encode()).hexdigest() + + # Compare hashes + if current_hash != original_hash: + return f"Document has been modified since it was signed by {signature_info.get('signer')}" + else: + return f"Document signature is valid. Signed by {signature_info.get('signer')} on {signature_info.get('timestamp')}" + except Exception as e: + return f"Error verifying signature: {str(e)}" + + return message + except Exception as e: + return f"Failed to verify document: {str(e)}" + +async def unprotect_document(filename: str, password: str) -> str: + """Remove password protection from a Word document. + + Args: + filename: Path to the Word document + password: Password that was used to protect the document + """ + filename = ensure_docx_extension(filename) + + if not os.path.exists(filename): + return f"Document {filename} does not exist" + + # Check if file is writeable + is_writeable, error_message = check_file_writeable(filename) + if not is_writeable: + return f"Cannot modify document: {error_message}" + + try: + # Read the encrypted file content + with open(filename, "rb") as infile: + encrypted_data = infile.read() + + # Create an msoffcrypto file object from the encrypted data + file = msoffcrypto.OfficeFile(io.BytesIO(encrypted_data)) + file.load_key(password=password) # Set the password for decryption + + # Decrypt the data into an in-memory buffer + decrypted_data_io = io.BytesIO() + file.decrypt(outfile=decrypted_data_io) # Pass the buffer as the 'outfile' argument + + # Overwrite the original file with the decrypted data + with open(filename, "wb") as outfile: + outfile.write(decrypted_data_io.getvalue()) + + return f"Document {filename} decrypted successfully." + + except msoffcrypto.exceptions.InvalidKeyError: + return f"Failed to decrypt document {filename}: Incorrect password." + except msoffcrypto.exceptions.InvalidFormatError: + return f"Failed to decrypt document {filename}: File is not encrypted or is not a supported Office format." + except Exception as e: + # Attempt to restore encrypted file content on failure + try: + if 'encrypted_data' in locals(): + with open(filename, "wb") as outfile: + outfile.write(encrypted_data) + return f"Failed to decrypt document {filename}: {str(e)}. Encrypted file restored." + else: + return f"Failed to decrypt document {filename}: {str(e)}. Could not restore encrypted file." + except Exception as restore_e: + return f"Failed to decrypt document {filename}: {str(e)}. Also failed to restore encrypted file: {str(restore_e)}" diff --git a/backend/office_word_mcp/word_document_server/utils/__init__.py b/backend/office_word_mcp/word_document_server/utils/__init__.py new file mode 100644 index 0000000..1146a58 --- /dev/null +++ b/backend/office_word_mcp/word_document_server/utils/__init__.py @@ -0,0 +1,8 @@ +""" +Utility functions for the Word Document Server. + +This package contains utility modules for file operations and document handling. +""" + +from word_document_server.utils.file_utils import check_file_writeable, create_document_copy, ensure_docx_extension +from word_document_server.utils.document_utils import get_document_properties, extract_document_text, get_document_structure, find_paragraph_by_text, find_and_replace_text diff --git a/backend/office_word_mcp/word_document_server/utils/document_utils.py b/backend/office_word_mcp/word_document_server/utils/document_utils.py new file mode 100644 index 0000000..c3681c0 --- /dev/null +++ b/backend/office_word_mcp/word_document_server/utils/document_utils.py @@ -0,0 +1,618 @@ +""" +Document utility functions for Word Document Server. +""" +import json +from typing import Dict, List, Any +from docx import Document +from docx.oxml.table import CT_Tbl +from docx.oxml.text.paragraph import CT_P +from docx.oxml.ns import qn +from docx.oxml import OxmlElement + + +def get_document_properties(doc_path: str) -> Dict[str, Any]: + """Get properties of a Word document.""" + import os + if not os.path.exists(doc_path): + return {"error": f"Document {doc_path} does not exist"} + + try: + doc = Document(doc_path) + core_props = doc.core_properties + + return { + "title": core_props.title or "", + "author": core_props.author or "", + "subject": core_props.subject or "", + "keywords": core_props.keywords or "", + "created": str(core_props.created) if core_props.created else "", + "modified": str(core_props.modified) if core_props.modified else "", + "last_modified_by": core_props.last_modified_by or "", + "revision": core_props.revision or 0, + "page_count": len(doc.sections), + "word_count": sum(len(paragraph.text.split()) for paragraph in doc.paragraphs), + "paragraph_count": len(doc.paragraphs), + "table_count": len(doc.tables) + } + except Exception as e: + return {"error": f"Failed to get document properties: {str(e)}"} + + +def extract_document_text(doc_path: str) -> str: + """Extract all text from a Word document.""" + import os + if not os.path.exists(doc_path): + return f"Document {doc_path} does not exist" + + try: + doc = Document(doc_path) + text = [] + + for paragraph in doc.paragraphs: + text.append(paragraph.text) + + for table in doc.tables: + for row in table.rows: + for cell in row.cells: + for paragraph in cell.paragraphs: + text.append(paragraph.text) + + return "\n".join(text) + except Exception as e: + return f"Failed to extract text: {str(e)}" + + +def get_document_structure(doc_path: str) -> Dict[str, Any]: + """Get the structure of a Word document.""" + import os + if not os.path.exists(doc_path): + return {"error": f"Document {doc_path} does not exist"} + + try: + doc = Document(doc_path) + structure = { + "paragraphs": [], + "tables": [] + } + + # Get paragraphs + for i, para in enumerate(doc.paragraphs): + structure["paragraphs"].append({ + "index": i, + "text": para.text[:100] + ("..." if len(para.text) > 100 else ""), + "style": para.style.name if para.style else "Normal" + }) + + # Get tables + for i, table in enumerate(doc.tables): + table_data = { + "index": i, + "rows": len(table.rows), + "columns": len(table.columns), + "preview": [] + } + + # Get sample of table data + max_rows = min(3, len(table.rows)) + for row_idx in range(max_rows): + row_data = [] + max_cols = min(3, len(table.columns)) + for col_idx in range(max_cols): + try: + cell_text = table.cell(row_idx, col_idx).text + row_data.append(cell_text[:20] + ("..." if len(cell_text) > 20 else "")) + except IndexError: + row_data.append("N/A") + table_data["preview"].append(row_data) + + structure["tables"].append(table_data) + + return structure + except Exception as e: + return {"error": f"Failed to get document structure: {str(e)}"} + + +def find_paragraph_by_text(doc, text, partial_match=False): + """ + Find paragraphs containing specific text. + + Args: + doc: Document object + text: Text to search for + partial_match: If True, matches paragraphs containing the text; if False, matches exact text + + Returns: + List of paragraph indices that match the criteria + """ + matching_paragraphs = [] + + for i, para in enumerate(doc.paragraphs): + if partial_match and text in para.text: + matching_paragraphs.append(i) + elif not partial_match and para.text == text: + matching_paragraphs.append(i) + + return matching_paragraphs + + +def find_and_replace_text(doc, old_text, new_text): + """ + Find and replace text throughout the document, skipping Table of Contents (TOC) paragraphs. + + Args: + doc: Document object + old_text: Text to find + new_text: Text to replace with + + Returns: + Number of replacements made + """ + count = 0 + + # Search in paragraphs + for para in doc.paragraphs: + # Skip TOC paragraphs + if para.style and para.style.name.startswith("TOC"): + continue + if old_text in para.text: + for run in para.runs: + if old_text in run.text: + run.text = run.text.replace(old_text, new_text) + count += 1 + + # Search in tables + for table in doc.tables: + for row in table.rows: + for cell in row.cells: + for para in cell.paragraphs: + # Skip TOC paragraphs in tables + if para.style and para.style.name.startswith("TOC"): + continue + if old_text in para.text: + for run in para.runs: + if old_text in run.text: + run.text = run.text.replace(old_text, new_text) + count += 1 + + return count + + +def get_document_xml(doc_path: str) -> str: + """Extract and return the raw XML structure of the Word document (word/document.xml).""" + import os + import zipfile + if not os.path.exists(doc_path): + return f"Document {doc_path} does not exist" + try: + with zipfile.ZipFile(doc_path) as docx_zip: + with docx_zip.open('word/document.xml') as xml_file: + return xml_file.read().decode('utf-8') + except Exception as e: + return f"Failed to extract XML: {str(e)}" + + +def insert_header_near_text(doc_path: str, target_text: str = None, header_title: str = "", position: str = 'after', header_style: str = 'Heading 1', target_paragraph_index: int = None) -> str: + """Insert a header (with specified style) before or after the target paragraph. Specify by text or paragraph index. Skips TOC paragraphs in text search.""" + import os + from docx import Document + if not os.path.exists(doc_path): + return f"Document {doc_path} does not exist" + try: + doc = Document(doc_path) + found = False + para = None + if target_paragraph_index is not None: + if target_paragraph_index < 0 or target_paragraph_index >= len(doc.paragraphs): + return f"Invalid target_paragraph_index: {target_paragraph_index}. Document has {len(doc.paragraphs)} paragraphs." + para = doc.paragraphs[target_paragraph_index] + found = True + else: + for i, p in enumerate(doc.paragraphs): + # Skip TOC paragraphs + if p.style and p.style.name.lower().startswith("toc"): + continue + if target_text and target_text in p.text: + para = p + found = True + break + if not found or para is None: + return f"Target paragraph not found (by index or text). (TOC paragraphs are skipped in text search)" + # Save anchor index before insertion + if target_paragraph_index is not None: + anchor_index = target_paragraph_index + else: + anchor_index = None + for i, p in enumerate(doc.paragraphs): + if p is para: + anchor_index = i + break + new_para = doc.add_paragraph(header_title, style=header_style) + if position == 'before': + para._element.addprevious(new_para._element) + else: + para._element.addnext(new_para._element) + doc.save(doc_path) + if anchor_index is not None: + return f"Header '{header_title}' (style: {header_style}) inserted {position} paragraph (index {anchor_index})." + else: + return f"Header '{header_title}' (style: {header_style}) inserted {position} the target paragraph." + except Exception as e: + return f"Failed to insert header: {str(e)}" + + +def insert_line_or_paragraph_near_text(doc_path: str, target_text: str = None, line_text: str = "", position: str = 'after', line_style: str = None, target_paragraph_index: int = None) -> str: + """ + Insert a new line or paragraph (with specified or matched style) before or after the target paragraph. + You can specify the target by text (first match) or by paragraph index. + Skips paragraphs whose style name starts with 'TOC' if using text search. + """ + import os + from docx import Document + if not os.path.exists(doc_path): + return f"Document {doc_path} does not exist" + try: + doc = Document(doc_path) + found = False + para = None + if target_paragraph_index is not None: + if target_paragraph_index < 0 or target_paragraph_index >= len(doc.paragraphs): + return f"Invalid target_paragraph_index: {target_paragraph_index}. Document has {len(doc.paragraphs)} paragraphs." + para = doc.paragraphs[target_paragraph_index] + found = True + else: + for i, p in enumerate(doc.paragraphs): + # Skip TOC paragraphs + if p.style and p.style.name.lower().startswith("toc"): + continue + if target_text and target_text in p.text: + para = p + found = True + break + if not found or para is None: + return f"Target paragraph not found (by index or text). (TOC paragraphs are skipped in text search)" + # Save anchor index before insertion + if target_paragraph_index is not None: + anchor_index = target_paragraph_index + else: + anchor_index = None + for i, p in enumerate(doc.paragraphs): + if p is para: + anchor_index = i + break + # Determine style: use provided or match target + style = line_style if line_style else para.style + new_para = doc.add_paragraph(line_text, style=style) + if position == 'before': + para._element.addprevious(new_para._element) + else: + para._element.addnext(new_para._element) + doc.save(doc_path) + if anchor_index is not None: + return f"Line/paragraph inserted {position} paragraph (index {anchor_index}) with style '{style}'." + else: + return f"Line/paragraph inserted {position} the target paragraph with style '{style}'." + except Exception as e: + return f"Failed to insert line/paragraph: {str(e)}" + + +def add_bullet_numbering(paragraph, num_id=1, level=0): + """ + Add bullet/numbering XML to a paragraph. + + Args: + paragraph: python-docx Paragraph object + num_id: Numbering definition ID (1=bullets, 2=numbers, etc.) + level: Indentation level (0=first level, 1=second level, etc.) + + Returns: + The modified paragraph + """ + # Get or create paragraph properties + pPr = paragraph._element.get_or_add_pPr() + + # Remove existing numPr if any (to avoid duplicates) + existing_numPr = pPr.find(qn('w:numPr')) + if existing_numPr is not None: + pPr.remove(existing_numPr) + + # Create numbering properties element + numPr = OxmlElement('w:numPr') + + # Set indentation level + ilvl = OxmlElement('w:ilvl') + ilvl.set(qn('w:val'), str(level)) + numPr.append(ilvl) + + # Set numbering definition ID + numId = OxmlElement('w:numId') + numId.set(qn('w:val'), str(num_id)) + numPr.append(numId) + + # Add to paragraph properties + pPr.append(numPr) + + return paragraph + + +def insert_numbered_list_near_text(doc_path: str, target_text: str = None, list_items: list = None, position: str = 'after', target_paragraph_index: int = None, bullet_type: str = 'bullet') -> str: + """ + Insert a bulleted or numbered list before or after the target paragraph. Specify by text or paragraph index. Skips TOC paragraphs in text search. + Args: + doc_path: Path to the Word document + target_text: Text to search for in paragraphs (optional if using index) + list_items: List of strings, each as a list item + position: 'before' or 'after' (default: 'after') + target_paragraph_index: Optional paragraph index to use as anchor + bullet_type: 'bullet' for bullets (•), 'number' for numbers (1,2,3) (default: 'bullet') + Returns: + Status message + """ + import os + from docx import Document + if not os.path.exists(doc_path): + return f"Document {doc_path} does not exist" + try: + doc = Document(doc_path) + found = False + para = None + if target_paragraph_index is not None: + if target_paragraph_index < 0 or target_paragraph_index >= len(doc.paragraphs): + return f"Invalid target_paragraph_index: {target_paragraph_index}. Document has {len(doc.paragraphs)} paragraphs." + para = doc.paragraphs[target_paragraph_index] + found = True + else: + for i, p in enumerate(doc.paragraphs): + # Skip TOC paragraphs + if p.style and p.style.name.lower().startswith("toc"): + continue + if target_text and target_text in p.text: + para = p + found = True + break + if not found or para is None: + return f"Target paragraph not found (by index or text). (TOC paragraphs are skipped in text search)" + # Save anchor index before insertion + if target_paragraph_index is not None: + anchor_index = target_paragraph_index + else: + anchor_index = None + for i, p in enumerate(doc.paragraphs): + if p is para: + anchor_index = i + break + # Determine numbering ID based on bullet_type + num_id = 1 if bullet_type == 'bullet' else 2 + + # Use ListParagraph style for proper list formatting + style_name = None + for candidate in ['List Paragraph', 'ListParagraph', 'Normal']: + try: + _ = doc.styles[candidate] + style_name = candidate + break + except KeyError: + continue + if not style_name: + style_name = None # fallback to default + + new_paras = [] + for item in (list_items or []): + p = doc.add_paragraph(item, style=style_name) + # Add bullet numbering XML - this is the fix! + add_bullet_numbering(p, num_id=num_id, level=0) + new_paras.append(p) + # Move the new paragraphs to the correct position + for p in reversed(new_paras): + if position == 'before': + para._element.addprevious(p._element) + else: + para._element.addnext(p._element) + doc.save(doc_path) + list_type = "bulleted" if bullet_type == 'bullet' else "numbered" + if anchor_index is not None: + return f"{list_type.capitalize()} list with {len(new_paras)} items inserted {position} paragraph (index {anchor_index})." + else: + return f"{list_type.capitalize()} list with {len(new_paras)} items inserted {position} the target paragraph." + except Exception as e: + return f"Failed to insert numbered list: {str(e)}" + + +def is_toc_paragraph(para): + """Devuelve True si el párrafo tiene un estilo de tabla de contenido (TOC).""" + return para.style and para.style.name.upper().startswith("TOC") + + +def is_heading_paragraph(para): + """Devuelve True si el párrafo tiene un estilo de encabezado (Heading 1, Heading 2, etc).""" + return para.style and para.style.name.lower().startswith("heading") + + +# --- Helper: Get style name from a element --- +def get_paragraph_style(el): + from docx.oxml.ns import qn + pPr = el.find(qn('w:pPr')) + if pPr is not None: + pStyle = pPr.find(qn('w:pStyle')) + if pStyle is not None and 'w:val' in pStyle.attrib: + return pStyle.attrib['w:val'] + return None + +# --- Main: Delete everything under a header until next heading/TOC --- +def delete_block_under_header(doc, header_text): + """ + Remove all elements (paragraphs, tables, etc.) after the header (by text) and before the next heading/TOC (by style). + Returns: (header_element, elements_removed) + """ + # Find the header paragraph by text (like delete_paragraph finds by index) + header_para = None + header_idx = None + + for i, para in enumerate(doc.paragraphs): + if para.text.strip().lower() == header_text.strip().lower(): + header_para = para + header_idx = i + break + + if header_para is None: + return None, 0 + + # Find the next heading/TOC paragraph to determine the end of the block + end_idx = None + for i in range(header_idx + 1, len(doc.paragraphs)): + para = doc.paragraphs[i] + if para.style and para.style.name.lower().startswith(('heading', 'título', 'toc')): + end_idx = i + break + + # If no next heading found, delete until end of document + if end_idx is None: + end_idx = len(doc.paragraphs) + + # Remove paragraphs by index (like delete_paragraph does) + removed_count = 0 + for i in range(header_idx + 1, end_idx): + if i < len(doc.paragraphs): # Safety check + para = doc.paragraphs[header_idx + 1] # Always remove the first paragraph after header + p = para._p + p.getparent().remove(p) + removed_count += 1 + + return header_para._p, removed_count + +# --- Usage in replace_paragraph_block_below_header --- +def replace_paragraph_block_below_header( + doc_path: str, + header_text: str, + new_paragraphs: list, + detect_block_end_fn=None, + new_paragraph_style: str = None +) -> str: + """ + Reemplaza todo el contenido debajo de una cabecera (por texto), hasta el siguiente encabezado/TOC (por estilo). + """ + from docx import Document + import os + if not os.path.exists(doc_path): + return f"Document {doc_path} not found." + + doc = Document(doc_path) + + # Find the header paragraph first + header_para = None + header_idx = None + for i, para in enumerate(doc.paragraphs): + para_text = para.text.strip().lower() + is_toc = is_toc_paragraph(para) + if para_text == header_text.strip().lower() and not is_toc: + header_para = para + header_idx = i + break + + if header_para is None: + return f"Header '{header_text}' not found in document." + + # Delete everything under the header using the same document instance + header_el, removed_count = delete_block_under_header(doc, header_text) + + # Now insert new paragraphs after the header (which should still be in the document) + style_to_use = new_paragraph_style or "Normal" + + # Find the header again after deletion (it should still be there) + current_para = header_para + for text in new_paragraphs: + new_para = doc.add_paragraph(text, style=style_to_use) + current_para._element.addnext(new_para._element) + current_para = new_para + + doc.save(doc_path) + return f"Replaced content under '{header_text}' with {len(new_paragraphs)} paragraph(s), style: {style_to_use}, removed {removed_count} elements." + + +def replace_block_between_manual_anchors( + doc_path: str, + start_anchor_text: str, + new_paragraphs: list, + end_anchor_text: str = None, + match_fn=None, + new_paragraph_style: str = None +) -> str: + """ + Replace all content (paragraphs, tables, etc.) between start_anchor_text and end_anchor_text (or next logical header if not provided). + If end_anchor_text is None, deletes until next visually distinct paragraph (bold, all caps, or different font size), or end of document. + Inserts new_paragraphs after the start anchor. + """ + from docx import Document + import os + if not os.path.exists(doc_path): + return f"Document {doc_path} not found." + doc = Document(doc_path) + body = doc.element.body + elements = list(body) + start_idx = None + end_idx = None + # Find start anchor + for i, el in enumerate(elements): + if el.tag == CT_P.tag: + p_text = "".join([node.text or '' for node in el.iter() if node.tag.endswith('}t')]).strip() + if match_fn: + if match_fn(p_text, el): + start_idx = i + break + elif p_text == start_anchor_text.strip(): + start_idx = i + break + if start_idx is None: + return f"Start anchor '{start_anchor_text}' not found." + # Find end anchor + if end_anchor_text: + for i in range(start_idx + 1, len(elements)): + el = elements[i] + if el.tag == CT_P.tag: + p_text = "".join([node.text or '' for node in el.iter() if node.tag.endswith('}t')]).strip() + if match_fn: + if match_fn(p_text, el, is_end=True): + end_idx = i + break + elif p_text == end_anchor_text.strip(): + end_idx = i + break + else: + # Heuristic: next visually distinct paragraph (bold, all caps, or different font size), or end of document + for i in range(start_idx + 1, len(elements)): + el = elements[i] + if el.tag == CT_P.tag: + # Check for bold, all caps, or font size + runs = [node for node in el.iter() if node.tag.endswith('}r')] + for run in runs: + rpr = run.find(qn('w:rPr')) + if rpr is not None: + if rpr.find(qn('w:b')) is not None or rpr.find(qn('w:caps')) is not None or rpr.find(qn('w:sz')) is not None: + end_idx = i + break + if end_idx is not None: + break + # Mark elements for removal + to_remove = [] + for i in range(start_idx + 1, end_idx if end_idx is not None else len(elements)): + to_remove.append(elements[i]) + for el in to_remove: + body.remove(el) + doc.save(doc_path) + # Reload and find start anchor for insertion + doc = Document(doc_path) + paras = doc.paragraphs + anchor_idx = None + for i, para in enumerate(paras): + if para.text.strip() == start_anchor_text.strip(): + anchor_idx = i + break + if anchor_idx is None: + return f"Start anchor '{start_anchor_text}' not found after deletion (unexpected)." + anchor_para = paras[anchor_idx] + style_to_use = new_paragraph_style or "Normal" + for text in new_paragraphs: + new_para = doc.add_paragraph(text, style=style_to_use) + anchor_para._element.addnext(new_para._element) + anchor_para = new_para + doc.save(doc_path) + return f"Replaced content between '{start_anchor_text}' and '{end_anchor_text or 'next logical header'}' with {len(new_paragraphs)} paragraph(s), style: {style_to_use}, removed {len(to_remove)} elements." diff --git a/backend/office_word_mcp/word_document_server/utils/extended_document_utils.py b/backend/office_word_mcp/word_document_server/utils/extended_document_utils.py new file mode 100644 index 0000000..007d5ce --- /dev/null +++ b/backend/office_word_mcp/word_document_server/utils/extended_document_utils.py @@ -0,0 +1,165 @@ +""" +Extended document utilities for Word Document Server. +""" +from typing import Dict, List, Any, Tuple +from docx import Document + + +def get_paragraph_text(doc_path: str, paragraph_index: int) -> Dict[str, Any]: + """ + Get text from a specific paragraph in a Word document. + + Args: + doc_path: Path to the Word document + paragraph_index: Index of the paragraph to extract (0-based) + + Returns: + Dictionary with paragraph text and metadata + """ + import os + if not os.path.exists(doc_path): + return {"error": f"Document {doc_path} does not exist"} + + try: + doc = Document(doc_path) + + # Check if paragraph index is valid + if paragraph_index < 0 or paragraph_index >= len(doc.paragraphs): + return {"error": f"Invalid paragraph index: {paragraph_index}. Document has {len(doc.paragraphs)} paragraphs."} + + paragraph = doc.paragraphs[paragraph_index] + + return { + "index": paragraph_index, + "text": paragraph.text, + "style": paragraph.style.name if paragraph.style else "Normal", + "is_heading": paragraph.style.name.startswith("Heading") if paragraph.style else False + } + except Exception as e: + return {"error": f"Failed to get paragraph text: {str(e)}"} + + +def find_text(doc_path: str, text_to_find: str, match_case: bool = True, whole_word: bool = False) -> Dict[str, Any]: + """ + Find all occurrences of specific text in a Word document. + + Args: + doc_path: Path to the Word document + text_to_find: Text to search for + match_case: Whether to perform case-sensitive search + whole_word: Whether to match whole words only + + Returns: + Dictionary with search results + """ + import os + if not os.path.exists(doc_path): + return {"error": f"Document {doc_path} does not exist"} + + if not text_to_find: + return {"error": "Search text cannot be empty"} + + try: + doc = Document(doc_path) + results = { + "query": text_to_find, + "match_case": match_case, + "whole_word": whole_word, + "occurrences": [], + "total_count": 0 + } + + # Search in paragraphs + for i, para in enumerate(doc.paragraphs): + # Prepare text for comparison + para_text = para.text + search_text = text_to_find + + if not match_case: + para_text = para_text.lower() + search_text = search_text.lower() + + # Find all occurrences (simple implementation) + start_pos = 0 + while True: + if whole_word: + # For whole word search, we need to check word boundaries + words = para_text.split() + found = False + for word_idx, word in enumerate(words): + if (word == search_text or + (not match_case and word.lower() == search_text.lower())): + results["occurrences"].append({ + "paragraph_index": i, + "position": word_idx, + "context": para.text[:100] + ("..." if len(para.text) > 100 else "") + }) + results["total_count"] += 1 + found = True + + # Break after checking all words + break + else: + # For substring search + pos = para_text.find(search_text, start_pos) + if pos == -1: + break + + results["occurrences"].append({ + "paragraph_index": i, + "position": pos, + "context": para.text[:100] + ("..." if len(para.text) > 100 else "") + }) + results["total_count"] += 1 + start_pos = pos + len(search_text) + + # Search in tables + for table_idx, table in enumerate(doc.tables): + for row_idx, row in enumerate(table.rows): + for col_idx, cell in enumerate(row.cells): + for para_idx, para in enumerate(cell.paragraphs): + # Prepare text for comparison + para_text = para.text + search_text = text_to_find + + if not match_case: + para_text = para_text.lower() + search_text = search_text.lower() + + # Find all occurrences (simple implementation) + start_pos = 0 + while True: + if whole_word: + # For whole word search, check word boundaries + words = para_text.split() + found = False + for word_idx, word in enumerate(words): + if (word == search_text or + (not match_case and word.lower() == search_text.lower())): + results["occurrences"].append({ + "location": f"Table {table_idx}, Row {row_idx}, Column {col_idx}", + "position": word_idx, + "context": para.text[:100] + ("..." if len(para.text) > 100 else "") + }) + results["total_count"] += 1 + found = True + + # Break after checking all words + break + else: + # For substring search + pos = para_text.find(search_text, start_pos) + if pos == -1: + break + + results["occurrences"].append({ + "location": f"Table {table_idx}, Row {row_idx}, Column {col_idx}", + "position": pos, + "context": para.text[:100] + ("..." if len(para.text) > 100 else "") + }) + results["total_count"] += 1 + start_pos = pos + len(search_text) + + return results + except Exception as e: + return {"error": f"Failed to search for text: {str(e)}"} diff --git a/backend/office_word_mcp/word_document_server/utils/file_utils.py b/backend/office_word_mcp/word_document_server/utils/file_utils.py new file mode 100644 index 0000000..7974707 --- /dev/null +++ b/backend/office_word_mcp/word_document_server/utils/file_utils.py @@ -0,0 +1,85 @@ +""" +File utility functions for Word Document Server. +""" +import os +from typing import Tuple, Optional +import shutil + + +def check_file_writeable(filepath: str) -> Tuple[bool, str]: + """ + Check if a file can be written to. + + Args: + filepath: Path to the file + + Returns: + Tuple of (is_writeable, error_message) + """ + # If file doesn't exist, check if directory is writeable + if not os.path.exists(filepath): + directory = os.path.dirname(filepath) + # If no directory is specified (empty string), use current directory + if directory == '': + directory = '.' + if not os.path.exists(directory): + return False, f"Directory {directory} does not exist" + if not os.access(directory, os.W_OK): + return False, f"Directory {directory} is not writeable" + return True, "" + + # If file exists, check if it's writeable + if not os.access(filepath, os.W_OK): + return False, f"File {filepath} is not writeable (permission denied)" + + # Try to open the file for writing to see if it's locked + try: + with open(filepath, 'a'): + pass + return True, "" + except IOError as e: + return False, f"File {filepath} is not writeable: {str(e)}" + except Exception as e: + return False, f"Unknown error checking file permissions: {str(e)}" + + +def create_document_copy(source_path: str, dest_path: Optional[str] = None) -> Tuple[bool, str, Optional[str]]: + """ + Create a copy of a document. + + Args: + source_path: Path to the source document + dest_path: Optional path for the new document. If not provided, will use source_path + '_copy.docx' + + Returns: + Tuple of (success, message, new_filepath) + """ + if not os.path.exists(source_path): + return False, f"Source document {source_path} does not exist", None + + if not dest_path: + # Generate a new filename if not provided + base, ext = os.path.splitext(source_path) + dest_path = f"{base}_copy{ext}" + + try: + # Simple file copy + shutil.copy2(source_path, dest_path) + return True, f"Document copied to {dest_path}", dest_path + except Exception as e: + return False, f"Failed to copy document: {str(e)}", None + + +def ensure_docx_extension(filename: str) -> str: + """ + Ensure filename has .docx extension. + + Args: + filename: The filename to check + + Returns: + Filename with .docx extension + """ + if not filename.endswith('.docx'): + return filename + '.docx' + return filename diff --git a/backend/office_word_mcp/word_mcp_server.py b/backend/office_word_mcp/word_mcp_server.py new file mode 100644 index 0000000..cd92472 --- /dev/null +++ b/backend/office_word_mcp/word_mcp_server.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python3 +""" +Run script for the Word Document Server. + +This script provides a simple way to start the Word Document Server. +""" + +from word_document_server.main import run_server + +if __name__ == "__main__": + run_server() diff --git a/backend/pyproject.toml b/backend/pyproject.toml index b5de796..179d65f 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -36,4 +36,13 @@ dependencies = [ "ddgs>=9.6.1", "langchain-anthropic>=1.0.0", "langchain-google-genai>=3.0.0", + "office-word-mcp-server", ] + +[tool.uv.workspace] +members = [ + "office_word_mcp", +] + +[tool.uv.sources] +office-word-mcp-server = { workspace = true } diff --git a/backend/routers/__init__.py b/backend/routers/__init__.py index 44e74c2..19b126b 100644 --- a/backend/routers/__init__.py +++ b/backend/routers/__init__.py @@ -8,6 +8,7 @@ from .work_routes import work_router, workspace_router from .file_routes import file_router, template_router from .config_routes import model_config_router, context_router +from .mcp_routes import mcp_router # 所有路由列表,方便在主应用中注册 all_routers = [ @@ -18,7 +19,8 @@ file_router, template_router, model_config_router, - context_router + context_router, + mcp_router ] __all__ = [ @@ -30,5 +32,6 @@ 'template_router', 'model_config_router', 'context_router', + 'mcp_router', 'all_routers' ] diff --git a/backend/routers/chat_routes/chat.py b/backend/routers/chat_routes/chat.py index d80e2cc..b59506a 100644 --- a/backend/routers/chat_routes/chat.py +++ b/backend/routers/chat_routes/chat.py @@ -25,6 +25,18 @@ router = APIRouter(prefix="/api/chat", tags=["聊天系统"]) +# 全局变量用于存储app实例的引用(在WebSocket中使用) +_app_instance = None + +def set_app_instance(app): + """设置app实例引用,供WebSocket使用""" + global _app_instance + _app_instance = app + +def get_app_instance(): + """获取app实例引用""" + return _app_instance + # 简化的WebSocket连接管理器 class ConnectionManager: @@ -322,6 +334,7 @@ async def on_json_block(self, block: dict): workspace_dir = env_manager.get_workspace_dir() model_config = env_manager.config_manager.get_model_config("brain", user_id) codeagent_model_config = env_manager.config_manager.get_model_config("code", user_id) + writer_model_config = env_manager.config_manager.get_model_config("writing", user_id) # 创建流式回调和管理器 ws_callback = WebSocketStreamCallback( @@ -352,19 +365,47 @@ async def on_json_block(self, block: dict): logger.info("未提供codeagent配置,使用主LLM") codeagent_llm = llm_handler.get_llm_instance() - # 获取工作的模板ID + # 获取writer的LLM实例(从"writing"配置加载) + writer_llm = None + if writer_model_config: + from ai_system.core_handlers.llm_providers import create_llm_from_model_config + try: + writer_llm = create_llm_from_model_config(writer_model_config) + logger.info(f"使用LangChain模型作为WriterAgent: {writer_llm}") + except Exception as e: + logger.error(f"创建WriterAgent专用LangChain模型失败: {e}") + writer_llm = None + else: + logger.info("未提供writer配置,WriterAgent将使用主LLM") + writer_llm = None + + # 获取工作的模板ID和输出模式 template_id = None + output_mode = "markdown" # 默认值 try: from services.data_services.crud import get_work work = get_work(db, work_id) - if work and hasattr(work, 'template_id') and work.template_id: - template_id = work.template_id - logger.info(f"工作 {work_id} 使用模板: {template_id}") + if work: + if hasattr(work, 'template_id') and work.template_id: + template_id = work.template_id + logger.info(f"工作 {work_id} 使用模板: {template_id}") + if hasattr(work, 'output_mode') and work.output_mode: + output_mode = work.output_mode + logger.info(f"工作 {work_id} 输出模式: {output_mode}") except Exception as e: - logger.warning(f"获取工作模板ID失败: {e}") - - # 创建MainAgent,传入workspace_dir、work_id、template_id和codeagent_llm - main_agent = MainAgent(llm_handler.get_llm_instance(), stream_manager, workspace_dir, work_id, template_id, codeagent_llm) + logger.warning(f"获取工作配置失败: {e}") + + # 创建MainAgent,传入workspace_dir、work_id、template_id、codeagent_llm、output_mode、writer_llm + main_agent = MainAgent( + llm_handler.get_llm_instance(), + stream_manager, + workspace_dir, + work_id, + template_id, + codeagent_llm, + output_mode=output_mode, + writer_llm=writer_llm + ) # 立即保存用户消息到持久化存储,确保历史记录顺序正确 await stream_manager.save_user_message(message_data['problem']) diff --git a/backend/routers/file_routes/template.py b/backend/routers/file_routes/template.py index 00f53c1..8eb321b 100644 --- a/backend/routers/file_routes/template.py +++ b/backend/routers/file_routes/template.py @@ -25,6 +25,7 @@ async def create_template( name=template.name, description=template.description, category=template.category, + output_format=template.output_format, file_path=template.file_path, is_public=template.is_public ) @@ -35,21 +36,23 @@ async def create_template( async def get_user_templates( skip: int = 0, limit: int = 100, + output_format: str = None, current_user: int = Depends(auth.get_current_user), db: Session = Depends(get_db) ): """获取当前用户的模板""" - return crud.get_user_templates(db, current_user, skip, limit) + return crud.get_user_templates(db, current_user, skip, limit, output_format) @router.get("/public", response_model=List[schemas.PaperTemplateResponse]) @route_guard async def get_public_templates( skip: int = 0, limit: int = 100, + output_format: str = None, db: Session = Depends(get_db) ): """获取公开模板""" - return crud.get_public_templates(db, skip, limit) + return crud.get_public_templates(db, skip, limit, output_format) @router.get("/{template_id}", response_model=schemas.PaperTemplateResponse) @route_guard diff --git a/backend/routers/mcp_routes/__init__.py b/backend/routers/mcp_routes/__init__.py new file mode 100644 index 0000000..b6c2bb3 --- /dev/null +++ b/backend/routers/mcp_routes/__init__.py @@ -0,0 +1,7 @@ +""" +MCP相关路由模块 +""" + +from .mcp_status import router as mcp_router + +__all__ = ['mcp_router'] diff --git a/backend/routers/mcp_routes/mcp_status.py b/backend/routers/mcp_routes/mcp_status.py new file mode 100644 index 0000000..166b742 --- /dev/null +++ b/backend/routers/mcp_routes/mcp_status.py @@ -0,0 +1,44 @@ +""" +MCP状态管理API路由 +提供MCP服务器状态查询功能 +""" + +from fastapi import APIRouter, Request +from typing import Dict, Any +import logging + +logger = logging.getLogger(__name__) + +router = APIRouter(prefix="/api/mcp", tags=["MCP管理"]) + + +@router.get("/status") +async def get_mcp_status(request: Request) -> Dict[str, Any]: + """ + 获取Word工具状态(MCP已移除,现使用直接集成) + + 返回Word工具的可用性状态 + + Returns: + Dict包含: + - available: Word工具是否可用 + - status: 状态描述 + - message: 状态信息 + """ + try: + # MCP已移除,Word工具现在直接集成 + return { + "available": True, + "status": "direct_integration", + "message": "Word工具已直接集成,无需MCP", + "integration_type": "direct" + } + + except Exception as e: + logger.error(f"获取Word工具状态时出错: {e}", exc_info=True) + return { + "available": False, + "status": "error", + "message": "获取Word工具状态时发生错误", + "error": str(e) + } diff --git a/backend/schemas/schemas.py b/backend/schemas/schemas.py index 77586ec..defb65e 100644 --- a/backend/schemas/schemas.py +++ b/backend/schemas/schemas.py @@ -65,6 +65,7 @@ class PaperTemplateBase(BaseModel): name: str description: Optional[str] = None category: Optional[str] = None + output_format: str = "markdown" # 输出格式:md, word, latex file_path: str # 模板文件路径 is_public: bool = False @@ -76,6 +77,7 @@ class PaperTemplateCreateWithContent(BaseModel): name: str description: Optional[str] = None category: Optional[str] = None + output_format: str = "markdown" # 输出格式:md, word, latex file_path: str # 模板文件路径 is_public: bool = False content: str = "" # 模板文件内容 @@ -84,6 +86,7 @@ class PaperTemplateUpdate(BaseModel): name: Optional[str] = None description: Optional[str] = None category: Optional[str] = None + output_format: Optional[str] = None # 输出格式:md, word, latex file_path: Optional[str] = None # 允许更新文件路径 is_public: Optional[bool] = None @@ -104,7 +107,7 @@ class WorkBase(BaseModel): template_id: Optional[int] = None # 关联的论文模板ID class WorkCreate(WorkBase): - pass + output_mode: str = "markdown" # 输出模式:markdown, word, latex class WorkUpdate(BaseModel): title: Optional[str] = None @@ -113,6 +116,7 @@ class WorkUpdate(BaseModel): progress: Optional[int] = None tags: Optional[str] = None template_id: Optional[int] = None # 关联的论文模板ID + output_mode: Optional[str] = None # 输出模式:markdown, word, latex class WorkResponse(WorkBase): id: int @@ -120,6 +124,7 @@ class WorkResponse(WorkBase): status: str progress: int template_id: Optional[int] = None # 关联的论文模板ID + output_mode: str = "markdown" # 输出模式:markdown, word, latex created_at: datetime updated_at: datetime created_by: int diff --git a/backend/services/data_services/crud.py b/backend/services/data_services/crud.py index 570b07c..9b9833e 100644 --- a/backend/services/data_services/crud.py +++ b/backend/services/data_services/crud.py @@ -253,17 +253,19 @@ def get_paper_template(db: Session, template_id: int): """根据ID获取论文模板""" return db.query(models.PaperTemplate).filter(models.PaperTemplate.id == template_id).first() -def get_user_templates(db: Session, user_id: int, skip: int = 0, limit: int = 100): +def get_user_templates(db: Session, user_id: int, skip: int = 0, limit: int = 100, output_format: str = None): """获取指定用户的模板""" - return db.query(models.PaperTemplate).filter( - models.PaperTemplate.created_by == user_id - ).offset(skip).limit(limit).all() + query = db.query(models.PaperTemplate).filter(models.PaperTemplate.created_by == user_id) + if output_format: + query = query.filter(models.PaperTemplate.output_format == output_format) + return query.offset(skip).limit(limit).all() -def get_public_templates(db: Session, skip: int = 0, limit: int = 100): +def get_public_templates(db: Session, skip: int = 0, limit: int = 100, output_format: str = None): """获取公开模板""" - return db.query(models.PaperTemplate).filter( - models.PaperTemplate.is_public == True - ).offset(skip).limit(limit).all() + query = db.query(models.PaperTemplate).filter(models.PaperTemplate.is_public == True) + if output_format: + query = query.filter(models.PaperTemplate.output_format == output_format) + return query.offset(skip).limit(limit).all() def update_paper_template(db: Session, template_id: int, template_update: schemas.PaperTemplateUpdate, user_id: int): """更新论文模板""" @@ -394,6 +396,7 @@ def create_work(db: Session, work: schemas.WorkCreate, user_id: int): description=work.description, tags=work.tags, template_id=work.template_id, # 添加模板ID + output_mode=work.output_mode, # 添加输出模式 created_by=user_id ) @@ -404,7 +407,12 @@ def create_work(db: Session, work: schemas.WorkCreate, user_id: int): # 创建工作空间目录结构和初始文件 from ..file_services.workspace_structure import WorkspaceStructureManager base_path = get_workspace_path(work_id) - WorkspaceStructureManager.create_workspace_structure(base_path, work_id, template_id=db_work.template_id) + WorkspaceStructureManager.create_workspace_structure( + base_path, + work_id, + template_id=db_work.template_id, + output_mode=db_work.output_mode # 传递输出模式 + ) return db_work except Exception as e: diff --git a/backend/services/file_services/workspace_files.py b/backend/services/file_services/workspace_files.py index 7d344b6..259d731 100644 --- a/backend/services/file_services/workspace_files.py +++ b/backend/services/file_services/workspace_files.py @@ -112,7 +112,7 @@ def list_files_by_category(self, work_id: str) -> Dict[str, List[Dict[str, Any]] files.sort(key=lambda x: x["name"].lower()) result[category_name] = files - # 特殊处理papers分类:扫描根目录的paper.md文件 + # 特殊处理papers分类:扫描根目录的paper.md和paper.docx文件 papers_files = [] paper_md = workspace_path / 'paper.md' if paper_md.exists(): @@ -126,6 +126,20 @@ def list_files_by_category(self, work_id: str) -> Dict[str, List[Dict[str, Any]] "category": "papers" } papers_files.append(file_info) + + # 添加paper.docx文件(如果存在) + paper_docx = workspace_path / 'paper.docx' + if paper_docx.exists(): + file_info = { + "name": paper_docx.name, + "type": "file", + "size": paper_docx.stat().st_size, + "modified": paper_docx.stat().st_mtime, + "path": "paper.docx", + "category_path": "paper.docx", + "category": "papers" + } + papers_files.append(file_info) result["papers"] = papers_files @@ -252,7 +266,7 @@ def detect_file_type(self, file_path: str) -> str: '.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx', '.zip', '.rar', '.7z', '.tar', '.gz', '.bz2', '.xz', '.exe', '.msi', '.dmg', '.pkg', '.deb', '.rpm', '.apk', - '.mp3', '.mp4', '.avi', '.mov', 'wmv', '.flv', '.mkv', + '.mp3', '.mp4', '.avi', '.mov', '.wmv', '.flv', '.mkv', '.ttf', '.otf', '.woff', '.woff2', '.eot', '.psd', '.ai', '.eps', '.sketch', '.fig' } diff --git a/backend/services/file_services/workspace_structure.py b/backend/services/file_services/workspace_structure.py index 48f7578..ea41134 100644 --- a/backend/services/file_services/workspace_structure.py +++ b/backend/services/file_services/workspace_structure.py @@ -23,13 +23,14 @@ class WorkspaceStructureManager: ] @classmethod - def create_workspace_structure(cls, workspace_path: Path, work_id: str, template_id: Optional[int] = None) -> None: + def create_workspace_structure(cls, workspace_path: Path, work_id: str, template_id: Optional[int] = None, output_mode: str = "markdown") -> None: """创建统一的工作空间目录结构和初始文件 Args: workspace_path: 工作空间路径 work_id: 工作ID template_id: 可选的模板ID,如果提供则使用模板内容初始化 paper.md + output_mode: 输出模式,可选值:markdown, word, latex """ try: # 创建目录结构 @@ -38,21 +39,22 @@ def create_workspace_structure(cls, workspace_path: Path, work_id: str, template dir_path.mkdir(parents=True, exist_ok=True) # 创建初始文件 - cls._create_workspace_files(workspace_path, work_id, template_id) + cls._create_workspace_files(workspace_path, work_id, template_id, output_mode) - logger.info(f"工作空间目录结构和初始文件创建完成: {workspace_path}") + logger.info(f"工作空间目录结构和初始文件创建完成: {workspace_path}, 输出模式: {output_mode}") except Exception as e: logger.error(f"创建工作空间目录和文件失败: {e}") raise Exception(f"创建工作空间目录和文件失败: {e}") @classmethod - def _create_workspace_files(cls, workspace_path: Path, work_id: str, template_id: Optional[int] = None) -> None: + def _create_workspace_files(cls, workspace_path: Path, work_id: str, template_id: Optional[int] = None, output_mode: str = "markdown") -> None: """创建工作空间初始文件 Args: workspace_path: 工作空间路径 work_id: 工作ID template_id: 可选的模板ID + output_mode: 输出模式,可选值:markdown, word, latex """ # 创建初始元数据文件 metadata = { @@ -83,8 +85,56 @@ def _create_workspace_files(cls, workspace_path: Path, work_id: str, template_id with open(chat_file, 'w', encoding='utf-8') as f: json.dump(chat_history, f, ensure_ascii=False, indent=2) - # 创建 paper.md 文件 - cls._create_paper_md(workspace_path, template_id) + # 根据输出模式创建相应的初始文件 + if output_mode == "markdown": + # Markdown 模式:创建 paper.md 文件 + cls._create_paper_md(workspace_path, template_id) + logger.info(f"Markdown 模式:已创建 paper.md") + elif output_mode == "word": + # Word 模式:创建空的 paper.docx 文件 + cls._create_paper_docx(workspace_path) + logger.info(f"Word 模式:已创建 paper.docx") + elif output_mode == "latex": + # LaTeX 模式:暂时回退到 Markdown + cls._create_paper_md(workspace_path, template_id) + logger.info(f"LaTeX 模式暂未实现,回退到 Markdown 模式:已创建 paper.md") + else: + # 未知模式:默认创建 paper.md + logger.warning(f"未知的输出模式 '{output_mode}',默认创建 paper.md") + cls._create_paper_md(workspace_path, template_id) + + @classmethod + def _create_paper_docx(cls, workspace_path: Path) -> None: + """创建初始的 paper.docx 文件 + + Args: + workspace_path: 工作空间路径 + + Note: + 创建一个空的 Word 文档,供 AI 后续添加内容 + """ + paper_docx_path = workspace_path / "paper.docx" + + # 如果文件已存在,不覆盖 + if paper_docx_path.exists(): + logger.info(f"paper.docx 已存在,跳过创建: {paper_docx_path}") + return + + try: + from docx import Document + + # 创建空文档 + doc = Document() + + # 保存文档 + doc.save(str(paper_docx_path)) + + logger.info(f"成功创建空的 paper.docx: {paper_docx_path}") + + except Exception as e: + logger.error(f"创建 paper.docx 失败: {e}") + # Word 文档创建失败不应该阻止工作空间创建 + # 只记录错误,让 AI 后续通过工具创建 @classmethod def _create_paper_md(cls, workspace_path: Path, template_id: Optional[int] = None) -> None: diff --git a/backend/uv.lock b/backend/uv.lock index 286dfad..99b8b79 100644 --- a/backend/uv.lock +++ b/backend/uv.lock @@ -2,6 +2,12 @@ version = 1 revision = 3 requires-python = ">=3.14" +[manifest] +members = [ + "backend", + "office-word-mcp-server", +] + [[package]] name = "aiohappyeyeballs" version = "2.6.1" @@ -150,6 +156,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/15/b3/9b1a8074496371342ec1e796a96f99c82c945a339cd81a8e73de28b4cf9e/anyio-4.11.0-py3-none-any.whl", hash = "sha256:0287e96f4d26d4149305414d4e3bc32f0dcd0862365a4bddea19d7a1ec38c4fc", size = 109097, upload-time = "2025-09-23T09:19:10.601Z" }, ] +[[package]] +name = "appscript" +version = "1.4.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "lxml" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ca/52/2fa70edfd98f0058219ecc2e365a3ba7aabd42db14ff9d7f44bbdcc5400d/appscript-1.4.0.tar.gz", hash = "sha256:b2c6fc770bf822ea45529c7084bc0ee340e67ab260016b01d28e0449ec8723be", size = 295279, upload-time = "2025-10-08T07:56:39.126Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4d/15/748adf302d8f1f8f975bb26c1b918d84f1d39bb6c4730e0b91f551297984/appscript-1.4.0-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:1a5747425d2a5732e2854cab0d0dc893ef077cabd7d57f7ec4caea2ac313e19f", size = 99700, upload-time = "2025-10-08T07:56:37.313Z" }, + { url = "https://files.pythonhosted.org/packages/dd/e3/03dc0f97eab839f72061342d69bd34424e89876ce4026509aab3d74d4f23/appscript-1.4.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:5efce3302c00674b769b79938cc5f66f7791ef45c6419e850a5f1c8f9fcefcc1", size = 85610, upload-time = "2025-10-08T07:56:38.103Z" }, +] + [[package]] name = "asyncpg" version = "0.30.0" @@ -165,6 +184,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3a/2a/7cc015f5b9f5db42b7d48157e23356022889fc354a2813c15934b7cb5c0e/attrs-25.4.0-py3-none-any.whl", hash = "sha256:adcf7e2a1fb3b36ac48d97835bb6d8ade15b8dcce26aba8bf1d14847b57a3373", size = 67615, upload-time = "2025-10-06T13:54:43.17Z" }, ] +[[package]] +name = "authlib" +version = "1.6.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cryptography" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/cd/3f/1d3bbd0bf23bdd99276d4def22f29c27a914067b4cf66f753ff9b8bbd0f3/authlib-1.6.5.tar.gz", hash = "sha256:6aaf9c79b7cc96c900f0b284061691c5d4e61221640a948fe690b556a6d6d10b", size = 164553, upload-time = "2025-10-02T13:36:09.489Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f8/aa/5082412d1ee302e9e7d80b6949bc4d2a8fa1149aaab610c5fc24709605d6/authlib-1.6.5-py2.py3-none-any.whl", hash = "sha256:3e0e0507807f842b02175507bdee8957a1d5707fd4afb17c32fb43fee90b6e3a", size = 243608, upload-time = "2025-10-02T13:36:07.637Z" }, +] + [[package]] name = "backend" version = "0.1.0" @@ -184,6 +215,7 @@ dependencies = [ { name = "markdown" }, { name = "matplotlib" }, { name = "numpy" }, + { name = "office-word-mcp-server" }, { name = "openpyxl" }, { name = "pandas" }, { name = "pdfplumber" }, @@ -219,6 +251,7 @@ requires-dist = [ { name = "markdown", specifier = ">=3.6.0" }, { name = "matplotlib", specifier = ">=3.10.5" }, { name = "numpy", specifier = ">=2.2.6" }, + { name = "office-word-mcp-server", editable = "office_word_mcp" }, { name = "openpyxl", specifier = ">=3.0.0" }, { name = "pandas", specifier = ">=2.3.1" }, { name = "pdfplumber", specifier = ">=0.7.0" }, @@ -289,6 +322,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/27/44/d2ef5e87509158ad2187f4dd0852df80695bb1ee0cfe0a684727b01a69e0/bcrypt-5.0.0-cp39-abi3-win_arm64.whl", hash = "sha256:f2347d3534e76bf50bca5500989d6c1d05ed64b440408057a37673282c654927", size = 144953, upload-time = "2025-09-25T19:50:37.32Z" }, ] +[[package]] +name = "beartype" +version = "0.22.7" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/2c/49/e28a77f8a3868b1c9ff6a030678e84de24c4783bae4c12cec9443cf8fb54/beartype-0.22.7.tar.gz", hash = "sha256:c7269855b71e32b7c9f0fc662baade752eb525107266e053338c2f6e8873826b", size = 1599627, upload-time = "2025-11-29T06:49:56.751Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f6/0c/a764253610513295b7f57904b91fae1d99c7afd1b16b6eaae06fdfb71fb5/beartype-0.22.7-py3-none-any.whl", hash = "sha256:e13430ac07c61fa4bc54d375970438aeb9aa47a482c529a6f438ce52e18e6f50", size = 1330771, upload-time = "2025-11-29T06:49:54.545Z" }, +] + [[package]] name = "brotli" version = "1.1.0" @@ -507,6 +549,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e7/05/c19819d5e3d95294a6f5947fb9b9629efb316b96de511b418c53d245aae6/cycler-0.12.1-py3-none-any.whl", hash = "sha256:85cef7cff222d8644161529808465972e51340599459b8ac3ccbac5a854e0d30", size = 8321, upload-time = "2023-10-07T05:32:16.783Z" }, ] +[[package]] +name = "cyclopts" +version = "4.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "attrs" }, + { name = "docstring-parser" }, + { name = "rich" }, + { name = "rich-rst" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/1b/0f/fe026df2ab8301e30a2b0bd425ff1462ad858fd4f991c1ac0389c2059c24/cyclopts-4.3.0.tar.gz", hash = "sha256:e95179cd0a959ce250ecfb2f0262a5996a92c1f9467bccad2f3d829e6833cef5", size = 151411, upload-time = "2025-11-25T02:59:33.572Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7a/e8/77a231ae531cf38765b75ddf27dae28bb5f70b41d8bb4f15ce1650e93f57/cyclopts-4.3.0-py3-none-any.whl", hash = "sha256:91a30b69faf128ada7cfeaefd7d9649dc222e8b2a8697f1fc99e4ee7b7ca44f3", size = 187184, upload-time = "2025-11-25T02:59:32.21Z" }, +] + [[package]] name = "dataclasses-json" version = "0.6.7" @@ -535,6 +592,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ba/58/4b35f13d21a44681e71ee8b1bca5755db0f84017cb29593eb0375aaa01e0/ddgs-9.6.1-py3-none-any.whl", hash = "sha256:e7d7e0c4dbae3f287627b9f6e411278256d7859d017bbad45b8229c230bf5270", size = 41577, upload-time = "2025-10-12T18:36:32.505Z" }, ] +[[package]] +name = "diskcache" +version = "5.6.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/3f/21/1c1ffc1a039ddcc459db43cc108658f32c57d271d7289a2794e401d0fdb6/diskcache-5.6.3.tar.gz", hash = "sha256:2c3a3fa2743d8535d832ec61c2054a1641f41775aa7c556758a109941e33e4fc", size = 67916, upload-time = "2023-08-31T06:12:00.316Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3f/27/4570e78fc0bf5ea0ca45eb1de3818a23787af9b390c0b0a0033a1b8236f9/diskcache-5.6.3-py3-none-any.whl", hash = "sha256:5e31b2d5fbad117cc363ebaf6b689474db18a1f6438bc82358b024abd4c2ca19", size = 45550, upload-time = "2023-08-31T06:11:58.822Z" }, +] + [[package]] name = "distro" version = "1.9.0" @@ -562,6 +628,29 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/55/e2/2537ebcff11c1ee1ff17d8d0b6f4db75873e3b0fb32c2d4a2ee31ecb310a/docstring_parser-0.17.0-py3-none-any.whl", hash = "sha256:cf2569abd23dce8099b300f9b4fa8191e9582dda731fd533daf54c4551658708", size = 36896, upload-time = "2025-07-21T07:35:00.684Z" }, ] +[[package]] +name = "docutils" +version = "0.22.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d9/02/111134bfeb6e6c7ac4c74594e39a59f6c0195dc4846afbeac3cba60f1927/docutils-0.22.3.tar.gz", hash = "sha256:21486ae730e4ca9f622677b1412b879af1791efcfba517e4c6f60be543fc8cdd", size = 2290153, upload-time = "2025-11-06T02:35:55.655Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/11/a8/c6a4b901d17399c77cd81fb001ce8961e9f5e04d3daf27e8925cb012e163/docutils-0.22.3-py3-none-any.whl", hash = "sha256:bd772e4aca73aff037958d44f2be5229ded4c09927fcf8690c577b66234d6ceb", size = 633032, upload-time = "2025-11-06T02:35:52.391Z" }, +] + +[[package]] +name = "docx2pdf" +version = "0.1.8" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "appscript", marker = "sys_platform == 'darwin'" }, + { name = "pywin32", marker = "sys_platform == 'win32'" }, + { name = "tqdm" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ab/5d/112531fff53cf60513e14fa1707755c874d47880ec4de7b2235302ad19a0/docx2pdf-0.1.8.tar.gz", hash = "sha256:6d2c20f9ad36eec75f4da017dc7a97622946954a6124ca0b11772875fa86fbed", size = 6483, upload-time = "2021-12-11T16:56:36.75Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/53/4f/1155781308281e67f80b829738a29e5354e03664c62311f753056afc873b/docx2pdf-0.1.8-py3-none-any.whl", hash = "sha256:00be1401fd486640314e993423a0a1cbdbc21142186f68549d962d505b2e8a12", size = 6741, upload-time = "2021-12-11T16:56:35.163Z" }, +] + [[package]] name = "ecdsa" version = "0.19.1" @@ -596,6 +685,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c1/8b/5fe2cc11fee489817272089c4203e679c63b570a5aaeb18d852ae3cbba6a/et_xmlfile-2.0.0-py3-none-any.whl", hash = "sha256:7a91720bc756843502c3b7504c77b8fe44217c85c537d85037f0f536151b2caa", size = 18059, upload-time = "2024-10-25T17:25:39.051Z" }, ] +[[package]] +name = "exceptiongroup" +version = "1.3.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/50/79/66800aadf48771f6b62f7eb014e352e5d06856655206165d775e675a02c9/exceptiongroup-1.3.1.tar.gz", hash = "sha256:8b412432c6055b0b7d14c310000ae93352ed6754f70fa8f7c34141f91c4e3219", size = 30371, upload-time = "2025-11-21T23:01:54.787Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8a/0e/97c33bf5009bdbac74fd2beace167cab3f978feb69cc36f1ef79360d6c4e/exceptiongroup-1.3.1-py3-none-any.whl", hash = "sha256:a7a39a3bd276781e98394987d3a5701d0c4edffb633bb7a5144577f82c773598", size = 16740, upload-time = "2025-11-21T23:01:53.443Z" }, +] + [[package]] name = "fastapi" version = "0.120.0" @@ -611,6 +709,32 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/1d/60/7a639ceaba54aec4e1d5676498c568abc654b95762d456095b6cb529b1ca/fastapi-0.120.0-py3-none-any.whl", hash = "sha256:84009182e530c47648da2f07eb380b44b69889a4acfd9e9035ee4605c5cfc469", size = 108243, upload-time = "2025-10-23T20:56:33.281Z" }, ] +[[package]] +name = "fastmcp" +version = "2.13.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "authlib" }, + { name = "cyclopts" }, + { name = "exceptiongroup" }, + { name = "httpx" }, + { name = "jsonschema-path" }, + { name = "mcp" }, + { name = "openapi-pydantic" }, + { name = "platformdirs" }, + { name = "py-key-value-aio", extra = ["disk", "keyring", "memory"] }, + { name = "pydantic", extra = ["email"] }, + { name = "pyperclip" }, + { name = "python-dotenv" }, + { name = "rich" }, + { name = "uvicorn" }, + { name = "websockets" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d4/a3/c9eb28b5f0b979b0dd8aa9ba56e69298cdb2d72c15592165d042ccb20194/fastmcp-2.13.1.tar.gz", hash = "sha256:b9c664c51f1ff47c698225e7304267ae29a51913f681bd49e442b8682f9a5f90", size = 8170226, upload-time = "2025-11-15T19:02:17.693Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9b/4b/7e36db0a90044be181319ff025be7cc57089ddb6ba8f3712dea543b9cf97/fastmcp-2.13.1-py3-none-any.whl", hash = "sha256:7a78b19785c4ec04a758d920c312769a497e3f6ab4c80feed504df1ed7de9f3c", size = 376750, upload-time = "2025-11-15T19:02:15.748Z" }, +] + [[package]] name = "filelock" version = "3.20.0" @@ -983,6 +1107,48 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" }, ] +[[package]] +name = "jaraco-classes" +version = "3.4.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "more-itertools" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/06/c0/ed4a27bc5571b99e3cff68f8a9fa5b56ff7df1c2251cc715a652ddd26402/jaraco.classes-3.4.0.tar.gz", hash = "sha256:47a024b51d0239c0dd8c8540c6c7f484be3b8fcf0b2d85c13825780d3b3f3acd", size = 11780, upload-time = "2024-03-31T07:27:36.643Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7f/66/b15ce62552d84bbfcec9a4873ab79d993a1dd4edb922cbfccae192bd5b5f/jaraco.classes-3.4.0-py3-none-any.whl", hash = "sha256:f662826b6bed8cace05e7ff873ce0f9283b5c924470fe664fff1c2f00f581790", size = 6777, upload-time = "2024-03-31T07:27:34.792Z" }, +] + +[[package]] +name = "jaraco-context" +version = "6.0.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/df/ad/f3777b81bf0b6e7bc7514a1656d3e637b2e8e15fab2ce3235730b3e7a4e6/jaraco_context-6.0.1.tar.gz", hash = "sha256:9bae4ea555cf0b14938dc0aee7c9f32ed303aa20a3b73e7dc80111628792d1b3", size = 13912, upload-time = "2024-08-20T03:39:27.358Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ff/db/0c52c4cf5e4bd9f5d7135ec7669a3a767af21b3a308e1ed3674881e52b62/jaraco.context-6.0.1-py3-none-any.whl", hash = "sha256:f797fc481b490edb305122c9181830a3a5b76d84ef6d1aef2fb9b47ab956f9e4", size = 6825, upload-time = "2024-08-20T03:39:25.966Z" }, +] + +[[package]] +name = "jaraco-functools" +version = "4.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "more-itertools" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f7/ed/1aa2d585304ec07262e1a83a9889880701079dde796ac7b1d1826f40c63d/jaraco_functools-4.3.0.tar.gz", hash = "sha256:cfd13ad0dd2c47a3600b439ef72d8615d482cedcff1632930d6f28924d92f294", size = 19755, upload-time = "2025-08-18T20:05:09.91Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b4/09/726f168acad366b11e420df31bf1c702a54d373a83f968d94141a8c3fde0/jaraco_functools-4.3.0-py3-none-any.whl", hash = "sha256:227ff8ed6f7b8f62c56deff101545fa7543cf2c8e7b82a7c2116e672f29c26e8", size = 10408, upload-time = "2025-08-18T20:05:08.69Z" }, +] + +[[package]] +name = "jeepney" +version = "0.9.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7b/6f/357efd7602486741aa73ffc0617fb310a29b588ed0fd69c2399acbb85b0c/jeepney-0.9.0.tar.gz", hash = "sha256:cf0e9e845622b81e4a28df94c40345400256ec608d0e55bb8a3feaa9163f5732", size = 106758, upload-time = "2025-02-27T18:51:01.684Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b2/a3/e137168c9c44d18eff0376253da9f1e9234d0239e0ee230d2fee6cea8e55/jeepney-0.9.0-py3-none-any.whl", hash = "sha256:97e5714520c16fc0a45695e5365a2e11b81ea79bba796e26f9f1d178cb182683", size = 49010, upload-time = "2025-02-27T18:51:00.104Z" }, +] + [[package]] name = "jinja2" version = "3.1.6" @@ -1049,6 +1215,65 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/71/92/5e77f98553e9e75130c78900d000368476aed74276eb8ae8796f65f00918/jsonpointer-3.0.0-py2.py3-none-any.whl", hash = "sha256:13e088adc14fca8b6aa8177c044e12701e6ad4b28ff10e65f2267a90109c9942", size = 7595, upload-time = "2024-06-10T19:24:40.698Z" }, ] +[[package]] +name = "jsonschema" +version = "4.25.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "attrs" }, + { name = "jsonschema-specifications" }, + { name = "referencing" }, + { name = "rpds-py" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/74/69/f7185de793a29082a9f3c7728268ffb31cb5095131a9c139a74078e27336/jsonschema-4.25.1.tar.gz", hash = "sha256:e4a9655ce0da0c0b67a085847e00a3a51449e1157f4f75e9fb5aa545e122eb85", size = 357342, upload-time = "2025-08-18T17:03:50.038Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/bf/9c/8c95d856233c1f82500c2450b8c68576b4cf1c871db3afac5c34ff84e6fd/jsonschema-4.25.1-py3-none-any.whl", hash = "sha256:3fba0169e345c7175110351d456342c364814cfcf3b964ba4587f22915230a63", size = 90040, upload-time = "2025-08-18T17:03:48.373Z" }, +] + +[[package]] +name = "jsonschema-path" +version = "0.3.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pathable" }, + { name = "pyyaml" }, + { name = "referencing" }, + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/6e/45/41ebc679c2a4fced6a722f624c18d658dee42612b83ea24c1caf7c0eb3a8/jsonschema_path-0.3.4.tar.gz", hash = "sha256:8365356039f16cc65fddffafda5f58766e34bebab7d6d105616ab52bc4297001", size = 11159, upload-time = "2025-01-24T14:33:16.547Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cb/58/3485da8cb93d2f393bce453adeef16896751f14ba3e2024bc21dc9597646/jsonschema_path-0.3.4-py3-none-any.whl", hash = "sha256:f502191fdc2b22050f9a81c9237be9d27145b9001c55842bece5e94e382e52f8", size = 14810, upload-time = "2025-01-24T14:33:14.652Z" }, +] + +[[package]] +name = "jsonschema-specifications" +version = "2025.9.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "referencing" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/19/74/a633ee74eb36c44aa6d1095e7cc5569bebf04342ee146178e2d36600708b/jsonschema_specifications-2025.9.1.tar.gz", hash = "sha256:b540987f239e745613c7a9176f3edb72b832a4ac465cf02712288397832b5e8d", size = 32855, upload-time = "2025-09-08T01:34:59.186Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/41/45/1a4ed80516f02155c51f51e8cedb3c1902296743db0bbc66608a0db2814f/jsonschema_specifications-2025.9.1-py3-none-any.whl", hash = "sha256:98802fee3a11ee76ecaca44429fda8a41bff98b00a0f2838151b113f210cc6fe", size = 18437, upload-time = "2025-09-08T01:34:57.871Z" }, +] + +[[package]] +name = "keyring" +version = "25.7.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "jaraco-classes" }, + { name = "jaraco-context" }, + { name = "jaraco-functools" }, + { name = "jeepney", marker = "sys_platform == 'linux'" }, + { name = "pywin32-ctypes", marker = "sys_platform == 'win32'" }, + { name = "secretstorage", marker = "sys_platform == 'linux'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/43/4b/674af6ef2f97d56f0ab5153bf0bfa28ccb6c3ed4d1babf4305449668807b/keyring-25.7.0.tar.gz", hash = "sha256:fe01bd85eb3f8fb3dd0405defdeac9a5b4f6f0439edbb3149577f244a2e8245b", size = 63516, upload-time = "2025-11-16T16:26:09.482Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/81/db/e655086b7f3a705df045bf0933bdd9c2f79bb3c97bfef1384598bb79a217/keyring-25.7.0-py3-none-any.whl", hash = "sha256:be4a0b195f149690c166e850609a477c532ddbfbaed96a404d4e43f8d5e2689f", size = 39160, upload-time = "2025-11-16T16:26:08.402Z" }, +] + [[package]] name = "kiwisolver" version = "1.4.9" @@ -1437,6 +1662,31 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/04/5f/e22e08da14bc1a0894184640d47819d2338b792732e20d292bf86e5ab785/matplotlib-3.10.7-cp314-cp314t-win_arm64.whl", hash = "sha256:cb783436e47fcf82064baca52ce748af71725d0352e1d31564cbe9c95df92b9c", size = 8172585, upload-time = "2025-10-09T00:27:47.185Z" }, ] +[[package]] +name = "mcp" +version = "1.22.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "httpx" }, + { name = "httpx-sse" }, + { name = "jsonschema" }, + { name = "pydantic" }, + { name = "pydantic-settings" }, + { name = "pyjwt", extra = ["crypto"] }, + { name = "python-multipart" }, + { name = "pywin32", marker = "sys_platform == 'win32'" }, + { name = "sse-starlette" }, + { name = "starlette" }, + { name = "typing-extensions" }, + { name = "typing-inspection" }, + { name = "uvicorn", marker = "sys_platform != 'emscripten'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a3/a2/c5ec0ab38b35ade2ae49a90fada718fbc76811dc5aa1760414c6aaa6b08a/mcp-1.22.0.tar.gz", hash = "sha256:769b9ac90ed42134375b19e777a2858ca300f95f2e800982b3e2be62dfc0ba01", size = 471788, upload-time = "2025-11-20T20:11:28.095Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a9/bb/711099f9c6bb52770f56e56401cdfb10da5b67029f701e0df29362df4c8e/mcp-1.22.0-py3-none-any.whl", hash = "sha256:bed758e24df1ed6846989c909ba4e3df339a27b4f30f1b8b627862a4bade4e98", size = 175489, upload-time = "2025-11-20T20:11:26.542Z" }, +] + [[package]] name = "mdurl" version = "0.1.2" @@ -1446,6 +1696,28 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" }, ] +[[package]] +name = "more-itertools" +version = "10.8.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ea/5d/38b681d3fce7a266dd9ab73c66959406d565b3e85f21d5e66e1181d93721/more_itertools-10.8.0.tar.gz", hash = "sha256:f638ddf8a1a0d134181275fb5d58b086ead7c6a72429ad725c67503f13ba30bd", size = 137431, upload-time = "2025-09-02T15:23:11.018Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a4/8e/469e5a4a2f5855992e425f3cb33804cc07bf18d48f2db061aec61ce50270/more_itertools-10.8.0-py3-none-any.whl", hash = "sha256:52d4362373dcf7c52546bc4af9a86ee7c4579df9a8dc268be0a2f949d376cc9b", size = 69667, upload-time = "2025-09-02T15:23:09.635Z" }, +] + +[[package]] +name = "msoffcrypto-tool" +version = "5.4.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cryptography" }, + { name = "olefile" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d2/b7/0fd6573157e0ec60c0c470e732ab3322fba4d2834fd24e1088d670522a01/msoffcrypto_tool-5.4.2.tar.gz", hash = "sha256:44b545adba0407564a0cc3d6dde6ca36b7c0fdf352b85bca51618fa1d4817370", size = 41183, upload-time = "2024-08-08T15:50:28.462Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/03/54/7f6d3d9acad083dae8c22d9ab483b657359a1bf56fee1d7af88794677707/msoffcrypto_tool-5.4.2-py3-none-any.whl", hash = "sha256:274fe2181702d1e5a107ec1b68a4c9fea997a44972ae1cc9ae0cb4f6a50fef0e", size = 48713, upload-time = "2024-08-08T15:50:27.093Z" }, +] + [[package]] name = "multidict" version = "6.7.0" @@ -1530,6 +1802,36 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/54/23/08c002201a8e7e1f9afba93b97deceb813252d9cfd0d3351caed123dcf97/numpy-2.3.4-cp314-cp314t-win_arm64.whl", hash = "sha256:8b5a9a39c45d852b62693d9b3f3e0fe052541f804296ff401a72a1b60edafb29", size = 10547532, upload-time = "2025-10-15T16:17:53.48Z" }, ] +[[package]] +name = "office-word-mcp-server" +version = "1.1.10" +source = { editable = "office_word_mcp" } +dependencies = [ + { name = "docx2pdf" }, + { name = "fastmcp" }, + { name = "msoffcrypto-tool" }, + { name = "pytest" }, + { name = "python-docx" }, +] + +[package.metadata] +requires-dist = [ + { name = "docx2pdf", specifier = ">=0.1.8" }, + { name = "fastmcp", specifier = ">=2.8.1" }, + { name = "msoffcrypto-tool", specifier = ">=5.4.2" }, + { name = "pytest", specifier = ">=8.4.2" }, + { name = "python-docx", specifier = ">=1.1.2" }, +] + +[[package]] +name = "olefile" +version = "0.47" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/69/1b/077b508e3e500e1629d366249c3ccb32f95e50258b231705c09e3c7a4366/olefile-0.47.zip", hash = "sha256:599383381a0bf3dfbd932ca0ca6515acd174ed48870cbf7fee123d698c192c1c", size = 112240, upload-time = "2023-12-01T16:22:53.025Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/17/d3/b64c356a907242d719fc668b71befd73324e47ab46c8ebbbede252c154b2/olefile-0.47-py2.py3-none-any.whl", hash = "sha256:543c7da2a7adadf21214938bb79c83ea12b473a4b6ee4ad4bf854e7715e13d1f", size = 114565, upload-time = "2023-12-01T16:22:51.518Z" }, +] + [[package]] name = "openai" version = "2.6.1" @@ -1549,6 +1851,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/15/0e/331df43df633e6105ff9cf45e0ce57762bd126a45ac16b25a43f6738d8a2/openai-2.6.1-py3-none-any.whl", hash = "sha256:904e4b5254a8416746a2f05649594fa41b19d799843cd134dac86167e094edef", size = 1005551, upload-time = "2025-10-24T13:29:50.973Z" }, ] +[[package]] +name = "openapi-pydantic" +version = "0.5.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pydantic" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/02/2e/58d83848dd1a79cb92ed8e63f6ba901ca282c5f09d04af9423ec26c56fd7/openapi_pydantic-0.5.1.tar.gz", hash = "sha256:ff6835af6bde7a459fb93eb93bb92b8749b754fc6e51b2f1590a19dc3005ee0d", size = 60892, upload-time = "2025-01-08T19:29:27.083Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/12/cf/03675d8bd8ecbf4445504d8071adab19f5f993676795708e36402ab38263/openapi_pydantic-0.5.1-py3-none-any.whl", hash = "sha256:a3a09ef4586f5bd760a8df7f43028b60cafb6d9f61de2acba9574766255ab146", size = 96381, upload-time = "2025-01-08T19:29:25.275Z" }, +] + [[package]] name = "openpyxl" version = "3.1.5" @@ -1640,6 +1954,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/70/44/5191d2e4026f86a2a109053e194d3ba7a31a2d10a9c2348368c63ed4e85a/pandas-2.3.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:3869faf4bd07b3b66a9f462417d0ca3a9df29a9f6abd5d0d0dbab15dac7abe87", size = 13202175, upload-time = "2025-09-29T23:31:59.173Z" }, ] +[[package]] +name = "pathable" +version = "0.4.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/67/93/8f2c2075b180c12c1e9f6a09d1a985bc2036906b13dff1d8917e395f2048/pathable-0.4.4.tar.gz", hash = "sha256:6905a3cd17804edfac7875b5f6c9142a218c7caef78693c2dbbbfbac186d88b2", size = 8124, upload-time = "2025-01-10T18:43:13.247Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7d/eb/b6260b31b1a96386c0a880edebe26f89669098acea8e0318bff6adb378fd/pathable-0.4.4-py3-none-any.whl", hash = "sha256:5ae9e94793b6ef5a4cbe0a7ce9dbbefc1eec38df253763fd0aeeacf2762dbbc2", size = 9592, upload-time = "2025-01-10T18:43:11.88Z" }, +] + +[[package]] +name = "pathvalidate" +version = "3.3.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/fa/2a/52a8da6fe965dea6192eb716b357558e103aea0a1e9a8352ad575a8406ca/pathvalidate-3.3.1.tar.gz", hash = "sha256:b18c07212bfead624345bb8e1d6141cdcf15a39736994ea0b94035ad2b1ba177", size = 63262, upload-time = "2025-06-15T09:07:20.736Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9a/70/875f4a23bfc4731703a5835487d0d2fb999031bd415e7d17c0ae615c18b7/pathvalidate-3.3.1-py3-none-any.whl", hash = "sha256:5263baab691f8e1af96092fa5137ee17df5bdfbd6cff1fcac4d6ef4bc2e1735f", size = 24305, upload-time = "2025-06-15T09:07:19.117Z" }, +] + [[package]] name = "pdfminer-six" version = "20250506" @@ -1700,6 +2032,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c1/70/6b41bdcddf541b437bbb9f47f94d2db5d9ddef6c37ccab8c9107743748a4/pillow-12.0.0-cp314-cp314t-win_arm64.whl", hash = "sha256:99353a06902c2e43b43e8ff74ee65a7d90307d82370604746738a1e0661ccca7", size = 2525630, upload-time = "2025-10-15T18:23:57.149Z" }, ] +[[package]] +name = "platformdirs" +version = "4.5.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/61/33/9611380c2bdb1225fdef633e2a9610622310fed35ab11dac9620972ee088/platformdirs-4.5.0.tar.gz", hash = "sha256:70ddccdd7c99fc5942e9fc25636a8b34d04c24b335100223152c2803e4063312", size = 21632, upload-time = "2025-10-08T17:44:48.791Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/73/cb/ac7874b3e5d58441674fb70742e6c374b28b0c7cb988d37d991cde47166c/platformdirs-4.5.0-py3-none-any.whl", hash = "sha256:e578a81bb873cbb89a41fcc904c7ef523cc18284b7e3b3ccf06aca1403b7ebd3", size = 18651, upload-time = "2025-10-08T17:44:47.223Z" }, +] + [[package]] name = "pluggy" version = "1.6.0" @@ -1824,6 +2165,44 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/53/cf/10c3e95827a3ca8af332dfc471befec86e15a14dc83cee893c49a4910dad/psycopg_binary-3.2.12-cp314-cp314-win_amd64.whl", hash = "sha256:48a8e29f3e38fcf8d393b8fe460d83e39c107ad7e5e61cd3858a7569e0554a39", size = 3005787, upload-time = "2025-10-26T00:36:06.783Z" }, ] +[[package]] +name = "py-key-value-aio" +version = "0.2.8" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "beartype" }, + { name = "py-key-value-shared" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ca/35/65310a4818acec0f87a46e5565e341c5a96fc062a9a03495ad28828ff4d7/py_key_value_aio-0.2.8.tar.gz", hash = "sha256:c0cfbb0bd4e962a3fa1a9fa6db9ba9df812899bd9312fa6368aaea7b26008b36", size = 32853, upload-time = "2025-10-24T13:31:04.688Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cd/5a/e56747d87a97ad2aff0f3700d77f186f0704c90c2da03bfed9e113dae284/py_key_value_aio-0.2.8-py3-none-any.whl", hash = "sha256:561565547ce8162128fd2bd0b9d70ce04a5f4586da8500cce79a54dfac78c46a", size = 69200, upload-time = "2025-10-24T13:31:03.81Z" }, +] + +[package.optional-dependencies] +disk = [ + { name = "diskcache" }, + { name = "pathvalidate" }, +] +keyring = [ + { name = "keyring" }, +] +memory = [ + { name = "cachetools" }, +] + +[[package]] +name = "py-key-value-shared" +version = "0.2.8" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "beartype" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/26/79/05a1f9280cfa0709479319cbfd2b1c5beb23d5034624f548c83fb65b0b61/py_key_value_shared-0.2.8.tar.gz", hash = "sha256:703b4d3c61af124f0d528ba85995c3c8d78f8bd3d2b217377bd3278598070cc1", size = 8216, upload-time = "2025-10-24T13:31:03.601Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/84/7a/1726ceaa3343874f322dd83c9ec376ad81f533df8422b8b1e1233a59f8ce/py_key_value_shared-0.2.8-py3-none-any.whl", hash = "sha256:aff1bbfd46d065b2d67897d298642e80e5349eae588c6d11b48452b46b8d46ba", size = 14586, upload-time = "2025-10-24T13:31:02.838Z" }, +] + [[package]] name = "pyasn1" version = "0.6.1" @@ -1927,6 +2306,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" }, ] +[[package]] +name = "pyjwt" +version = "2.10.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e7/46/bd74733ff231675599650d3e47f361794b22ef3e3770998dda30d3b63726/pyjwt-2.10.1.tar.gz", hash = "sha256:3cc5772eb20009233caf06e9d8a0577824723b44e6648ee0a2aedb6cf9381953", size = 87785, upload-time = "2024-11-28T03:43:29.933Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/61/ad/689f02752eeec26aed679477e80e632ef1b682313be70793d798c1d5fc8f/PyJWT-2.10.1-py3-none-any.whl", hash = "sha256:dcdd193e30abefd5debf142f9adfcdd2b58004e644f25406ffaebd50bd98dacb", size = 22997, upload-time = "2024-11-28T03:43:27.893Z" }, +] + +[package.optional-dependencies] +crypto = [ + { name = "cryptography" }, +] + [[package]] name = "pyparsing" version = "3.2.5" @@ -1965,6 +2358,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/be/7a/097801205b991bc3115e8af1edb850d30aeaf0118520b016354cf5ccd3f6/pypdfium2-4.30.0-py3-none-win_arm64.whl", hash = "sha256:119b2969a6d6b1e8d55e99caaf05290294f2d0fe49c12a3f17102d01c441bd29", size = 2752118, upload-time = "2024-05-09T18:33:15.489Z" }, ] +[[package]] +name = "pyperclip" +version = "1.11.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e8/52/d87eba7cb129b81563019d1679026e7a112ef76855d6159d24754dbd2a51/pyperclip-1.11.0.tar.gz", hash = "sha256:244035963e4428530d9e3a6101a1ef97209c6825edab1567beac148ccc1db1b6", size = 12185, upload-time = "2025-09-26T14:40:37.245Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/df/80/fc9d01d5ed37ba4c42ca2b55b4339ae6e200b456be3a1aaddf4a9fa99b8c/pyperclip-1.11.0-py3-none-any.whl", hash = "sha256:299403e9ff44581cb9ba2ffeed69c7aa96a008622ad0c46cb575ca75b5b84273", size = 11063, upload-time = "2025-09-26T14:40:36.069Z" }, +] + [[package]] name = "pytest" version = "8.4.2" @@ -2052,6 +2454,25 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/81/c4/34e93fe5f5429d7570ec1fa436f1986fb1f00c3e0f43a589fe2bbcd22c3f/pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00", size = 509225, upload-time = "2025-03-25T02:24:58.468Z" }, ] +[[package]] +name = "pywin32" +version = "311" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c9/31/097f2e132c4f16d99a22bfb777e0fd88bd8e1c634304e102f313af69ace5/pywin32-311-cp314-cp314-win32.whl", hash = "sha256:b7a2c10b93f8986666d0c803ee19b5990885872a7de910fc460f9b0c2fbf92ee", size = 8840714, upload-time = "2025-07-14T20:13:32.449Z" }, + { url = "https://files.pythonhosted.org/packages/90/4b/07c77d8ba0e01349358082713400435347df8426208171ce297da32c313d/pywin32-311-cp314-cp314-win_amd64.whl", hash = "sha256:3aca44c046bd2ed8c90de9cb8427f581c479e594e99b5c0bb19b29c10fd6cb87", size = 9656800, upload-time = "2025-07-14T20:13:34.312Z" }, + { url = "https://files.pythonhosted.org/packages/c0/d2/21af5c535501a7233e734b8af901574572da66fcc254cb35d0609c9080dd/pywin32-311-cp314-cp314-win_arm64.whl", hash = "sha256:a508e2d9025764a8270f93111a970e1d0fbfc33f4153b388bb649b7eec4f9b42", size = 8932540, upload-time = "2025-07-14T20:13:36.379Z" }, +] + +[[package]] +name = "pywin32-ctypes" +version = "0.2.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/85/9f/01a1a99704853cb63f253eea009390c88e7131c67e66a0a02099a8c917cb/pywin32-ctypes-0.2.3.tar.gz", hash = "sha256:d162dc04946d704503b2edc4d55f3dba5c1d539ead017afa00142c38b9885755", size = 29471, upload-time = "2024-08-14T10:15:34.626Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/de/3d/8161f7711c017e01ac9f008dfddd9410dff3674334c233bde66e7ba65bbf/pywin32_ctypes-0.2.3-py3-none-any.whl", hash = "sha256:8a1513379d709975552d202d942d9837758905c8d01eb82b8bcc30918929e7b8", size = 30756, upload-time = "2024-08-14T10:15:33.187Z" }, +] + [[package]] name = "pyyaml" version = "6.0.3" @@ -2078,6 +2499,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f1/12/de94a39c2ef588c7e6455cfbe7343d3b2dc9d6b6b2f40c4c6565744c873d/pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b", size = 149341, upload-time = "2025-09-25T21:32:56.828Z" }, ] +[[package]] +name = "referencing" +version = "0.36.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "attrs" }, + { name = "rpds-py" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/2f/db/98b5c277be99dd18bfd91dd04e1b759cad18d1a338188c936e92f921c7e2/referencing-0.36.2.tar.gz", hash = "sha256:df2e89862cd09deabbdba16944cc3f10feb6b3e6f18e902f7cc25609a34775aa", size = 74744, upload-time = "2025-01-25T08:48:16.138Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c1/b1/3baf80dc6d2b7bc27a95a67752d0208e410351e3feb4eb78de5f77454d8d/referencing-0.36.2-py3-none-any.whl", hash = "sha256:e8699adbbf8b5c7de96d8ffa0eb5c158b3beafce084968e2ea8bb08c6794dcd0", size = 26775, upload-time = "2025-01-25T08:48:14.241Z" }, +] + [[package]] name = "regex" version = "2025.10.23" @@ -2154,6 +2588,56 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/25/7a/b0178788f8dc6cafce37a212c99565fa1fe7872c70c6c9c1e1a372d9d88f/rich-14.2.0-py3-none-any.whl", hash = "sha256:76bc51fe2e57d2b1be1f96c524b890b816e334ab4c1e45888799bfaab0021edd", size = 243393, upload-time = "2025-10-09T14:16:51.245Z" }, ] +[[package]] +name = "rich-rst" +version = "1.3.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "docutils" }, + { name = "rich" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/bc/6d/a506aaa4a9eaa945ed8ab2b7347859f53593864289853c5d6d62b77246e0/rich_rst-1.3.2.tar.gz", hash = "sha256:a1196fdddf1e364b02ec68a05e8ff8f6914fee10fbca2e6b6735f166bb0da8d4", size = 14936, upload-time = "2025-10-14T16:49:45.332Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/13/2f/b4530fbf948867702d0a3f27de4a6aab1d156f406d72852ab902c4d04de9/rich_rst-1.3.2-py3-none-any.whl", hash = "sha256:a99b4907cbe118cf9d18b0b44de272efa61f15117c61e39ebdc431baf5df722a", size = 12567, upload-time = "2025-10-14T16:49:42.953Z" }, +] + +[[package]] +name = "rpds-py" +version = "0.29.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/98/33/23b3b3419b6a3e0f559c7c0d2ca8fc1b9448382b25245033788785921332/rpds_py-0.29.0.tar.gz", hash = "sha256:fe55fe686908f50154d1dc599232016e50c243b438c3b7432f24e2895b0e5359", size = 69359, upload-time = "2025-11-16T14:50:39.532Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/89/b1/0b1474e7899371d9540d3bbb2a499a3427ae1fc39c998563fe9035a1073b/rpds_py-0.29.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:394d27e4453d3b4d82bb85665dc1fcf4b0badc30fc84282defed71643b50e1a1", size = 363731, upload-time = "2025-11-16T14:49:26.683Z" }, + { url = "https://files.pythonhosted.org/packages/28/12/3b7cf2068d0a334ed1d7b385a9c3c8509f4c2bcba3d4648ea71369de0881/rpds_py-0.29.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:55d827b2ae95425d3be9bc9a5838b6c29d664924f98146557f7715e331d06df8", size = 354343, upload-time = "2025-11-16T14:49:28.24Z" }, + { url = "https://files.pythonhosted.org/packages/eb/73/5afcf8924bc02a749416eda64e17ac9c9b28f825f4737385295a0e99b0c1/rpds_py-0.29.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fc31a07ed352e5462d3ee1b22e89285f4ce97d5266f6d1169da1142e78045626", size = 385406, upload-time = "2025-11-16T14:49:29.943Z" }, + { url = "https://files.pythonhosted.org/packages/c8/37/5db736730662508535221737a21563591b6f43c77f2e388951c42f143242/rpds_py-0.29.0-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c4695dd224212f6105db7ea62197144230b808d6b2bba52238906a2762f1d1e7", size = 396162, upload-time = "2025-11-16T14:49:31.833Z" }, + { url = "https://files.pythonhosted.org/packages/70/0d/491c1017d14f62ce7bac07c32768d209a50ec567d76d9f383b4cfad19b80/rpds_py-0.29.0-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fcae1770b401167f8b9e1e3f566562e6966ffa9ce63639916248a9e25fa8a244", size = 517719, upload-time = "2025-11-16T14:49:33.804Z" }, + { url = "https://files.pythonhosted.org/packages/d7/25/b11132afcb17cd5d82db173f0c8dab270ffdfaba43e5ce7a591837ae9649/rpds_py-0.29.0-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:90f30d15f45048448b8da21c41703b31c61119c06c216a1bf8c245812a0f0c17", size = 409498, upload-time = "2025-11-16T14:49:35.222Z" }, + { url = "https://files.pythonhosted.org/packages/0f/7d/e6543cedfb2e6403a1845710a5ab0e0ccf8fc288e0b5af9a70bfe2c12053/rpds_py-0.29.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:44a91e0ab77bdc0004b43261a4b8cd6d6b451e8d443754cfda830002b5745b32", size = 382743, upload-time = "2025-11-16T14:49:36.704Z" }, + { url = "https://files.pythonhosted.org/packages/75/11/a4ebc9f654293ae9fefb83b2b6be7f3253e85ea42a5db2f77d50ad19aaeb/rpds_py-0.29.0-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:4aa195e5804d32c682e453b34474f411ca108e4291c6a0f824ebdc30a91c973c", size = 400317, upload-time = "2025-11-16T14:49:39.132Z" }, + { url = "https://files.pythonhosted.org/packages/52/18/97677a60a81c7f0e5f64e51fb3f8271c5c8fcabf3a2df18e97af53d7c2bf/rpds_py-0.29.0-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7971bdb7bf4ee0f7e6f67fa4c7fbc6019d9850cc977d126904392d363f6f8318", size = 416979, upload-time = "2025-11-16T14:49:40.575Z" }, + { url = "https://files.pythonhosted.org/packages/f0/69/28ab391a9968f6c746b2a2db181eaa4d16afaa859fedc9c2f682d19f7e18/rpds_py-0.29.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:8ae33ad9ce580c7a47452c3b3f7d8a9095ef6208e0a0c7e4e2384f9fc5bf8212", size = 567288, upload-time = "2025-11-16T14:49:42.24Z" }, + { url = "https://files.pythonhosted.org/packages/3b/d3/0c7afdcdb830eee94f5611b64e71354ffe6ac8df82d00c2faf2bfffd1d4e/rpds_py-0.29.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:c661132ab2fb4eeede2ef69670fd60da5235209874d001a98f1542f31f2a8a94", size = 593157, upload-time = "2025-11-16T14:49:43.782Z" }, + { url = "https://files.pythonhosted.org/packages/e2/ac/a0fcbc2feed4241cf26d32268c195eb88ddd4bd862adfc9d4b25edfba535/rpds_py-0.29.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:bb78b3a0d31ac1bde132c67015a809948db751cb4e92cdb3f0b242e430b6ed0d", size = 554741, upload-time = "2025-11-16T14:49:45.557Z" }, + { url = "https://files.pythonhosted.org/packages/0f/f1/fcc24137c470df8588674a677f33719d5800ec053aaacd1de8a5d5d84d9e/rpds_py-0.29.0-cp314-cp314-win32.whl", hash = "sha256:f475f103488312e9bd4000bc890a95955a07b2d0b6e8884aef4be56132adbbf1", size = 215508, upload-time = "2025-11-16T14:49:47.562Z" }, + { url = "https://files.pythonhosted.org/packages/7b/c7/1d169b2045512eac019918fc1021ea07c30e84a4343f9f344e3e0aa8c788/rpds_py-0.29.0-cp314-cp314-win_amd64.whl", hash = "sha256:b9cf2359a4fca87cfb6801fae83a76aedf66ee1254a7a151f1341632acf67f1b", size = 228125, upload-time = "2025-11-16T14:49:49.064Z" }, + { url = "https://files.pythonhosted.org/packages/be/36/0cec88aaba70ec4a6e381c444b0d916738497d27f0c30406e3d9fcbd3bc2/rpds_py-0.29.0-cp314-cp314-win_arm64.whl", hash = "sha256:9ba8028597e824854f0f1733d8b964e914ae3003b22a10c2c664cb6927e0feb9", size = 221992, upload-time = "2025-11-16T14:49:50.777Z" }, + { url = "https://files.pythonhosted.org/packages/b1/fa/a2e524631717c9c0eb5d90d30f648cfba6b731047821c994acacb618406c/rpds_py-0.29.0-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:e71136fd0612556b35c575dc2726ae04a1669e6a6c378f2240312cf5d1a2ab10", size = 366425, upload-time = "2025-11-16T14:49:52.691Z" }, + { url = "https://files.pythonhosted.org/packages/a2/a4/6d43ebe0746ff694a30233f63f454aed1677bd50ab7a59ff6b2bb5ac61f2/rpds_py-0.29.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:76fe96632d53f3bf0ea31ede2f53bbe3540cc2736d4aec3b3801b0458499ef3a", size = 355282, upload-time = "2025-11-16T14:49:54.292Z" }, + { url = "https://files.pythonhosted.org/packages/fa/a7/52fd8270e0320b09eaf295766ae81dd175f65394687906709b3e75c71d06/rpds_py-0.29.0-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9459a33f077130dbb2c7c3cea72ee9932271fb3126404ba2a2661e4fe9eb7b79", size = 384968, upload-time = "2025-11-16T14:49:55.857Z" }, + { url = "https://files.pythonhosted.org/packages/f4/7d/e6bc526b7a14e1ef80579a52c1d4ad39260a058a51d66c6039035d14db9d/rpds_py-0.29.0-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5c9546cfdd5d45e562cc0444b6dddc191e625c62e866bf567a2c69487c7ad28a", size = 394714, upload-time = "2025-11-16T14:49:57.343Z" }, + { url = "https://files.pythonhosted.org/packages/c0/3f/f0ade3954e7db95c791e7eaf978aa7e08a756d2046e8bdd04d08146ed188/rpds_py-0.29.0-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:12597d11d97b8f7e376c88929a6e17acb980e234547c92992f9f7c058f1a7310", size = 520136, upload-time = "2025-11-16T14:49:59.162Z" }, + { url = "https://files.pythonhosted.org/packages/87/b3/07122ead1b97009715ab9d4082be6d9bd9546099b2b03fae37c3116f72be/rpds_py-0.29.0-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28de03cf48b8a9e6ec10318f2197b83946ed91e2891f651a109611be4106ac4b", size = 409250, upload-time = "2025-11-16T14:50:00.698Z" }, + { url = "https://files.pythonhosted.org/packages/c9/c6/dcbee61fd1dc892aedcb1b489ba661313101aa82ec84b1a015d4c63ebfda/rpds_py-0.29.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd7951c964069039acc9d67a8ff1f0a7f34845ae180ca542b17dc1456b1f1808", size = 384940, upload-time = "2025-11-16T14:50:02.312Z" }, + { url = "https://files.pythonhosted.org/packages/47/11/914ecb6f3574cf9bf8b38aced4063e0f787d6e1eb30b181a7efbc6c1da9a/rpds_py-0.29.0-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:c07d107b7316088f1ac0177a7661ca0c6670d443f6fe72e836069025e6266761", size = 399392, upload-time = "2025-11-16T14:50:03.829Z" }, + { url = "https://files.pythonhosted.org/packages/f5/fd/2f4bd9433f58f816434bb934313584caa47dbc6f03ce5484df8ac8980561/rpds_py-0.29.0-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1de2345af363d25696969befc0c1688a6cb5e8b1d32b515ef84fc245c6cddba3", size = 416796, upload-time = "2025-11-16T14:50:05.558Z" }, + { url = "https://files.pythonhosted.org/packages/79/a5/449f0281af33efa29d5c71014399d74842342ae908d8cd38260320167692/rpds_py-0.29.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:00e56b12d2199ca96068057e1ae7f9998ab6e99cda82431afafd32f3ec98cca9", size = 566843, upload-time = "2025-11-16T14:50:07.243Z" }, + { url = "https://files.pythonhosted.org/packages/ab/32/0a6a1ccee2e37fcb1b7ba9afde762b77182dbb57937352a729c6cd3cf2bb/rpds_py-0.29.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:3919a3bbecee589300ed25000b6944174e07cd20db70552159207b3f4bbb45b8", size = 593956, upload-time = "2025-11-16T14:50:09.029Z" }, + { url = "https://files.pythonhosted.org/packages/4a/3d/eb820f95dce4306f07a495ede02fb61bef36ea201d9137d4fcd5ab94ec1e/rpds_py-0.29.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:e7fa2ccc312bbd91e43aa5e0869e46bc03278a3dddb8d58833150a18b0f0283a", size = 557288, upload-time = "2025-11-16T14:50:10.73Z" }, + { url = "https://files.pythonhosted.org/packages/e9/f8/b8ff786f40470462a252918e0836e0db903c28e88e3eec66bc4a7856ee5d/rpds_py-0.29.0-cp314-cp314t-win32.whl", hash = "sha256:97c817863ffc397f1e6a6e9d2d89fe5408c0a9922dac0329672fb0f35c867ea5", size = 211382, upload-time = "2025-11-16T14:50:12.827Z" }, + { url = "https://files.pythonhosted.org/packages/c9/7f/1a65ae870bc9d0576aebb0c501ea5dccf1ae2178fe2821042150ebd2e707/rpds_py-0.29.0-cp314-cp314t-win_amd64.whl", hash = "sha256:2023473f444752f0f82a58dfcbee040d0a1b3d1b3c2ec40e884bd25db6d117d2", size = 225919, upload-time = "2025-11-16T14:50:14.734Z" }, +] + [[package]] name = "rsa" version = "4.9.1" @@ -2211,6 +2695,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/83/11/00d3c3dfc25ad54e731d91449895a79e4bf2384dc3ac01809010ba88f6d5/seaborn-0.13.2-py3-none-any.whl", hash = "sha256:636f8336facf092165e27924f223d3c62ca560b1f2bb5dff7ab7fad265361987", size = 294914, upload-time = "2024-01-25T13:21:49.598Z" }, ] +[[package]] +name = "secretstorage" +version = "3.5.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cryptography" }, + { name = "jeepney" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/1c/03/e834bcd866f2f8a49a85eaff47340affa3bfa391ee9912a952a1faa68c7b/secretstorage-3.5.0.tar.gz", hash = "sha256:f04b8e4689cbce351744d5537bf6b1329c6fc68f91fa666f60a380edddcd11be", size = 19884, upload-time = "2025-11-23T19:02:53.191Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b7/46/f5af3402b579fd5e11573ce652019a67074317e18c1935cc0b4ba9b35552/secretstorage-3.5.0-py3-none-any.whl", hash = "sha256:0ce65888c0725fcb2c5bc0fdb8e5438eece02c523557ea40ce0703c266248137", size = 15554, upload-time = "2025-11-23T19:02:51.545Z" }, +] + [[package]] name = "six" version = "1.17.0" @@ -2268,6 +2765,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9c/5e/6a29fa884d9fb7ddadf6b69490a9d45fded3b38541713010dad16b77d015/sqlalchemy-2.0.44-py3-none-any.whl", hash = "sha256:19de7ca1246fbef9f9d1bff8f1ab25641569df226364a0e40457dc5457c54b05", size = 1928718, upload-time = "2025-10-10T15:29:45.32Z" }, ] +[[package]] +name = "sse-starlette" +version = "3.0.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/db/3c/fa6517610dc641262b77cc7bf994ecd17465812c1b0585fe33e11be758ab/sse_starlette-3.0.3.tar.gz", hash = "sha256:88cfb08747e16200ea990c8ca876b03910a23b547ab3bd764c0d8eb81019b971", size = 21943, upload-time = "2025-10-30T18:44:20.117Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/23/a0/984525d19ca5c8a6c33911a0c164b11490dd0f90ff7fd689f704f84e9a11/sse_starlette-3.0.3-py3-none-any.whl", hash = "sha256:af5bf5a6f3933df1d9c7f8539633dc8444ca6a97ab2e2a7cd3b6e431ac03a431", size = 11765, upload-time = "2025-10-30T18:44:18.834Z" }, +] + [[package]] name = "starlette" version = "0.48.0" diff --git a/frontend/src/api/template.ts b/frontend/src/api/template.ts index f1d8f92..843da1a 100644 --- a/frontend/src/api/template.ts +++ b/frontend/src/api/template.ts @@ -6,6 +6,7 @@ export interface PaperTemplate { name: string description?: string category?: string + output_format: string // 添加输出格式字段 file_path: string // 添加文件路径字段 created_at: string updated_at: string @@ -17,6 +18,7 @@ export interface PaperTemplateCreate { name: string description?: string category?: string + output_format: string // 添加输出格式字段 file_path: string // 添加文件路径字段 is_public: boolean } @@ -25,6 +27,7 @@ export interface PaperTemplateCreateWithContent { name: string description?: string category?: string + output_format: string // 添加输出格式字段 file_path: string is_public: boolean content: string // 添加文件内容字段 @@ -34,6 +37,7 @@ export interface PaperTemplateUpdate { name?: string description?: string category?: string + output_format?: string // 添加输出格式字段 file_path?: string // 允许更新文件路径 is_public?: boolean } @@ -48,8 +52,13 @@ class TemplateAPI { token: string, skip: number = 0, limit: number = 100, + outputFormat?: string, ): Promise { - return this.request(`/templates?skip=${skip}&limit=${limit}`, { + let url = `/templates?skip=${skip}&limit=${limit}` + if (outputFormat) { + url += `&output_format=${outputFormat}` + } + return this.request(url, { headers: { Authorization: `Bearer ${token}`, }, @@ -57,8 +66,16 @@ class TemplateAPI { } // 获取公开模板列表 - async getPublicTemplates(skip: number = 0, limit: number = 100): Promise { - return this.request(`/templates/public?skip=${skip}&limit=${limit}`) + async getPublicTemplates( + skip: number = 0, + limit: number = 100, + outputFormat?: string, + ): Promise { + let url = `/templates/public?skip=${skip}&limit=${limit}` + if (outputFormat) { + url += `&output_format=${outputFormat}` + } + return this.request(url) } // 获取指定模板 diff --git a/frontend/src/api/workspace.ts b/frontend/src/api/workspace.ts index 1125ff0..7ecfecd 100644 --- a/frontend/src/api/workspace.ts +++ b/frontend/src/api/workspace.ts @@ -5,6 +5,7 @@ export interface WorkCreate { description?: string tags?: string template_id?: number // 关联的论文模板ID + output_mode?: 'markdown' | 'word' | 'latex' // 输出模式 } export interface WorkUpdate { @@ -14,6 +15,7 @@ export interface WorkUpdate { progress?: number tags?: string template_id?: number // 关联的论文模板ID + output_mode?: 'markdown' | 'word' | 'latex' // 输出模式 } export interface Work { @@ -25,6 +27,7 @@ export interface Work { progress: number tags?: string template_id?: number // 关联的论文模板ID + output_mode?: 'markdown' | 'word' | 'latex' // 输出模式 created_at: string updated_at: string created_by: number diff --git a/frontend/src/components/BinaryFileViewer.vue b/frontend/src/components/BinaryFileViewer.vue index e2c4b4e..53f07d9 100644 --- a/frontend/src/components/BinaryFileViewer.vue +++ b/frontend/src/components/BinaryFileViewer.vue @@ -111,27 +111,35 @@ const downloadFile = async () => { background: #f5f7fa; border-radius: 8px; border: 1px solid #e0e6ed; + overflow: hidden; + box-sizing: border-box; } .file-info { display: flex; - align-items: center; + align-items: flex-start; justify-content: space-between; + gap: 16px; } .file-header { display: flex; - align-items: center; - gap: 16px; + align-items: flex-start; + gap: 12px; + flex: 1; + min-width: 0; + overflow: hidden; } .file-icon { color: #0052d9; flex-shrink: 0; + margin-top: 2px; } .file-details { flex: 1; + min-width: 0; } .file-name { @@ -155,6 +163,7 @@ const downloadFile = async () => { .file-actions { flex-shrink: 0; + align-self: flex-start; } .download-btn { diff --git a/frontend/src/components/JsonChatRenderer.vue b/frontend/src/components/JsonChatRenderer.vue index 9868fb5..998b2b8 100644 --- a/frontend/src/components/JsonChatRenderer.vue +++ b/frontend/src/components/JsonChatRenderer.vue @@ -22,8 +22,24 @@