We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
我用之前的版本,当尝试建立下级目录,需要想出精巧的正则公式,这就overkill了,我想要tab来指示层级。不知道现在是不是还忽略行首tab的。
#!/bin/python import os import re import sys from collections import defaultdict from pypdf import PdfWriter, PdfReader class Pdf(object): def __init__(self, path): self.path = path reader = PdfReader(open(path, "rb"), strict=False) self.writer = PdfWriter() self.writer.append(reader) self.writer._root_object.pop("/Outlines", None) @property def _new_path(self): name, ext = os.path.splitext(self.path) return name + '_new' + ext def add_bookmark(self, title, pagenum, parent=None): return self.writer.add_outline_item(title, pagenum, parent=parent) def save_pdf(self): if os.path.exists(self._new_path): os.remove(self._new_path) with open(self._new_path, 'wb') as out: self.writer.write(out) return self._new_path def _add_bookmark(pdf, index_dict): if not index_dict: return None m = max(index_dict.keys()) parent_dict = {} # {parent index:IndirectObject} for i in range(m+1): value = index_dict[i] inobject = pdf.add_bookmark(value['title'], value['pagenum'] - 1, parent_dict.get(value.get('parent'))) parent_dict[i] = inobject def add_bookmark(path, index_dict): pdf = Pdf(path) _add_bookmark(pdf, index_dict) return pdf.save_pdf() def toc_reader(path, gap): pattern = re.compile(r'^(\t*)([^\t]+)\t(\d+)$') tocdict = {} levels = defaultdict(list) lastpagenum = 0 with open(path, 'r') as toc: for line, item in enumerate(toc): content = pattern.search(item) assert content, f"line {line}:{item} line ill-formatted" indent, title, pagenum = content.group(1, 2, 3) pagenum = int(pagenum) + int(gap) assert pagenum >= lastpagenum, f"line {line}:{item} pagenum wrong" tocdict[line] = {'title': title, 'pagenum': pagenum} levels[len(indent)].append(line) if len(indent) > 0: tocdict[line]['parent'] = levels[len(indent) - 1][-1] lastpagenum = pagenum return tocdict if __name__ == '__main__': file, toc, gap = sys.argv[1:] index_dict = toc_reader(toc, gap) add_bookmark(file, index_dict)
使用pdfbookmark.py xxx.pdf toc 10来运行它,toc用tab缩进来分级
The text was updated successfully, but these errors were encountered:
新版本支持空格分层了,不过脚本写的挺好的
Sorry, something went wrong.
No branches or pull requests
我用之前的版本,当尝试建立下级目录,需要想出精巧的正则公式,这就overkill了,我想要tab来指示层级。不知道现在是不是还忽略行首tab的。
使用pdfbookmark.py xxx.pdf toc 10来运行它,toc用tab缩进来分级
The text was updated successfully, but these errors were encountered: