diff --git a/PyPDFForm/template.py b/PyPDFForm/template.py index d1240b1d..eb0a64cb 100644 --- a/PyPDFForm/template.py +++ b/PyPDFForm/template.py @@ -408,39 +408,41 @@ def get_paragraph_auto_wrap_length(widget_middleware: Text) -> int: return result -def update_widget_key( +def update_widget_keys( template: bytes, widgets: Dict[str, WIDGET_TYPES], - old_key: str, - new_key: str, - index: int, + old_keys: List[str], + new_keys: List[str], + indices: List[int], ) -> bytes: - """Updates the key of a widget.""" + """Updates a list of keys of widgets.""" # pylint: disable=R0801 pdf = PdfReader(stream_to_io(template)) out = PdfWriter() out.append(pdf) - tracker = -1 - - for page in out.pages: - for annot in page.get(Annots, []): # noqa - annot = cast(DictionaryObject, annot.get_object()) - key = get_widget_key(annot.get_object()) - - widget = widgets.get(key) - if widget is None: - continue + for i, old_key in enumerate(old_keys): + index = indices[i] + new_key = new_keys[i] + tracker = -1 + for page in out.pages: + for annot in page.get(Annots, []): # noqa + annot = cast(DictionaryObject, annot.get_object()) + key = get_widget_key(annot.get_object()) + + widget = widgets.get(key) + if widget is None: + continue - if old_key != key: - continue + if old_key != key: + continue - tracker += 1 - if not isinstance(widget, Radio) and tracker != index: - continue + tracker += 1 + if not isinstance(widget, Radio) and tracker != index: + continue - update_annotation_name(annot, new_key) + update_annotation_name(annot, new_key) with BytesIO() as f: out.write(f) diff --git a/PyPDFForm/wrapper.py b/PyPDFForm/wrapper.py index a1fcb72b..6cb10dba 100644 --- a/PyPDFForm/wrapper.py +++ b/PyPDFForm/wrapper.py @@ -18,7 +18,7 @@ from .middleware.text import Text from .template import (build_widgets, dropdown_to_text, set_character_x_paddings, update_text_field_attributes, - update_widget_key, widget_rect_watermarks) + update_widget_keys, widget_rect_watermarks) from .utils import (get_page_streams, merge_two_pdfs, preview_widget_to_draw, remove_all_widgets) from .watermark import create_watermarks_and_draw, merge_watermarks_with_pdf @@ -80,6 +80,7 @@ def __init__( super().__init__(template) self.widgets = build_widgets(self.stream) if self.stream else {} + self._keys_to_update = [] self.global_font = kwargs.get("global_font") self.global_font_size = kwargs.get("global_font_size") @@ -236,13 +237,41 @@ def create_widget( return self def update_widget_key( - self, old_key: str, new_key: str, index: int = 0 + self, old_key: str, new_key: str, index: int = 0, defer: bool = False ) -> PdfWrapper: """Updates the key of an existed widget on a PDF form.""" + if defer: + self._keys_to_update.append( + ( + old_key, + new_key, + index + ) + ) + return self + + self.__init__( + template=update_widget_keys( + self.read(), self.widgets, [old_key], [new_key], [index] + ), + global_font=self.global_font, + global_font_size=self.global_font_size, + global_font_color=self.global_font_color, + ) + + return self + + def commit_widget_key_updates(self) -> PdfWrapper: + """Commits all deferred widget key updates on a PDF form.""" + + old_keys = [each[0] for each in self._keys_to_update] + new_keys = [each[1] for each in self._keys_to_update] + indices = [each[2] for each in self._keys_to_update] + self.__init__( - template=update_widget_key( - self.read(), self.widgets, old_key, new_key, index + template=update_widget_keys( + self.read(), self.widgets, old_keys, new_keys, indices ), global_font=self.global_font, global_font_size=self.global_font_size, diff --git a/docs/prepare.md b/docs/prepare.md index 1f6aa8ef..5a1a358a 100644 --- a/docs/prepare.md +++ b/docs/prepare.md @@ -129,7 +129,7 @@ with open("output.pdf", "wb+") as output: output.write(new_form.read()) ``` -If there is more than one widget with the same key, the third `index` parameter can be used to pick which one +If there is more than one widget with the same key, the optional parameter `index` can be used to pick which one to update. Consider [this PDF](https://github.com/chinapandaman/PyPDFForm/raw/master/pdf_samples/scenario/issues/733.pdf), the below snippet will change the key of the second row's text field with the key `Description[0]` to `Description[1]`: @@ -143,3 +143,27 @@ new_form = PdfWrapper("733.pdf").update_widget_key( with open("output.pdf", "wb+") as output: output.write(new_form.read()) ``` + +Finally, if there are multiple widgets that need to be bulk updated, the performance optimal way of doing it is to set +the optional parameter `defer` to `True` when updating each key and at the very end call `commit_widget_key_updates()` +to commit all the updates. + +Consider [this PDF](https://github.com/chinapandaman/PyPDFForm/raw/master/pdf_samples/scenario/issues/733.pdf), +the below snippet will change the key of each row's text field with the key `Description[0]` to `Description[i]` +where `i` is the index of each row: + +```python +from PyPDFForm import PdfWrapper + +new_form = PdfWrapper("733.pdf") + +for i in range(1, 10): + new_form.update_widget_key( + "Description[0]", f"Description[{i}]", index=1, defer=True + ) + +new_form.commit_widget_key_updates() + +with open("output.pdf", "wb+") as output: + output.write(new_form.read()) +``` diff --git a/tests/scenario/test_issues.py b/tests/scenario/test_issues.py index 19a61e81..43b44cf0 100644 --- a/tests/scenario/test_issues.py +++ b/tests/scenario/test_issues.py @@ -249,3 +249,26 @@ def test_update_key(issue_pdf_directory, request): expected = f.read() assert len(obj.preview) == len(expected) assert obj.preview == expected + + +def test_bulk_update_key(issue_pdf_directory, request): + obj = PdfWrapper(os.path.join(issue_pdf_directory, "733.pdf")) + + for i in range(1, 10): + obj.update_widget_key("Description[0]", f"Description[{i}]", 1, defer=True) + obj.update_widget_key("symbol[0]", f"symbol[{i}]", 1, defer=True) + obj.update_widget_key("tradedate[0]", f"tradedate[{i}]", 1, defer=True) + obj.update_widget_key("settlementdate[0]", f"settlementdate[{i}]", 1, defer=True) + obj.update_widget_key("quantity[0]", f"quantity[{i}]", 1, defer=True) + obj.update_widget_key("costperunit[0]", f"costperunit[{i}]", 1, defer=True) + obj.update_widget_key("costabasis[0]", f"costabasis[{i}]", 1, defer=True) + + obj.commit_widget_key_updates() + + expected_path = os.path.join(issue_pdf_directory, "733_expected.pdf") + request.config.results["expected_path"] = expected_path + request.config.results["stream"] = obj.preview + with open(expected_path, "rb+") as f: + expected = f.read() + assert len(obj.preview) == len(expected) + assert obj.preview == expected