Skip to content

Commit

Permalink
Merge pull request #761 from chinapandaman/PPF-760
Browse files Browse the repository at this point in the history
PPF-760: implement performance optimized bulk update widget keys
  • Loading branch information
chinapandaman authored Dec 6, 2024
2 parents 11bc3b0 + 905bc77 commit f16f548
Show file tree
Hide file tree
Showing 4 changed files with 104 additions and 26 deletions.
44 changes: 23 additions & 21 deletions PyPDFForm/template.py
Original file line number Diff line number Diff line change
Expand Up @@ -408,39 +408,41 @@ def get_paragraph_auto_wrap_length(widget_middleware: Text) -> int:
return result


def update_widget_key(
def update_widget_keys(
template: bytes,
widgets: Dict[str, WIDGET_TYPES],
old_key: str,
new_key: str,
index: int,
old_keys: List[str],
new_keys: List[str],
indices: List[int],
) -> bytes:
"""Updates the key of a widget."""
"""Updates a list of keys of widgets."""
# pylint: disable=R0801

pdf = PdfReader(stream_to_io(template))
out = PdfWriter()
out.append(pdf)

tracker = -1

for page in out.pages:
for annot in page.get(Annots, []): # noqa
annot = cast(DictionaryObject, annot.get_object())
key = get_widget_key(annot.get_object())

widget = widgets.get(key)
if widget is None:
continue
for i, old_key in enumerate(old_keys):
index = indices[i]
new_key = new_keys[i]
tracker = -1
for page in out.pages:
for annot in page.get(Annots, []): # noqa
annot = cast(DictionaryObject, annot.get_object())
key = get_widget_key(annot.get_object())

widget = widgets.get(key)
if widget is None:
continue

if old_key != key:
continue
if old_key != key:
continue

tracker += 1
if not isinstance(widget, Radio) and tracker != index:
continue
tracker += 1
if not isinstance(widget, Radio) and tracker != index:
continue

update_annotation_name(annot, new_key)
update_annotation_name(annot, new_key)

with BytesIO() as f:
out.write(f)
Expand Down
37 changes: 33 additions & 4 deletions PyPDFForm/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from .middleware.text import Text
from .template import (build_widgets, dropdown_to_text,
set_character_x_paddings, update_text_field_attributes,
update_widget_key, widget_rect_watermarks)
update_widget_keys, widget_rect_watermarks)
from .utils import (get_page_streams, merge_two_pdfs, preview_widget_to_draw,
remove_all_widgets)
from .watermark import create_watermarks_and_draw, merge_watermarks_with_pdf
Expand Down Expand Up @@ -80,6 +80,7 @@ def __init__(

super().__init__(template)
self.widgets = build_widgets(self.stream) if self.stream else {}
self._keys_to_update = []

self.global_font = kwargs.get("global_font")
self.global_font_size = kwargs.get("global_font_size")
Expand Down Expand Up @@ -236,13 +237,41 @@ def create_widget(
return self

def update_widget_key(
self, old_key: str, new_key: str, index: int = 0
self, old_key: str, new_key: str, index: int = 0, defer: bool = False
) -> PdfWrapper:
"""Updates the key of an existed widget on a PDF form."""

if defer:
self._keys_to_update.append(
(
old_key,
new_key,
index
)
)
return self

self.__init__(
template=update_widget_keys(
self.read(), self.widgets, [old_key], [new_key], [index]
),
global_font=self.global_font,
global_font_size=self.global_font_size,
global_font_color=self.global_font_color,
)

return self

def commit_widget_key_updates(self) -> PdfWrapper:
"""Commits all deferred widget key updates on a PDF form."""

old_keys = [each[0] for each in self._keys_to_update]
new_keys = [each[1] for each in self._keys_to_update]
indices = [each[2] for each in self._keys_to_update]

self.__init__(
template=update_widget_key(
self.read(), self.widgets, old_key, new_key, index
template=update_widget_keys(
self.read(), self.widgets, old_keys, new_keys, indices
),
global_font=self.global_font,
global_font_size=self.global_font_size,
Expand Down
26 changes: 25 additions & 1 deletion docs/prepare.md
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ with open("output.pdf", "wb+") as output:
output.write(new_form.read())
```

If there is more than one widget with the same key, the third `index` parameter can be used to pick which one
If there is more than one widget with the same key, the optional parameter `index` can be used to pick which one
to update. Consider [this PDF](https://github.com/chinapandaman/PyPDFForm/raw/master/pdf_samples/scenario/issues/733.pdf),
the below snippet will change the key of the second row's text field with the key `Description[0]` to `Description[1]`:

Expand All @@ -143,3 +143,27 @@ new_form = PdfWrapper("733.pdf").update_widget_key(
with open("output.pdf", "wb+") as output:
output.write(new_form.read())
```

Finally, if there are multiple widgets that need to be bulk updated, the performance optimal way of doing it is to set
the optional parameter `defer` to `True` when updating each key and at the very end call `commit_widget_key_updates()`
to commit all the updates.

Consider [this PDF](https://github.com/chinapandaman/PyPDFForm/raw/master/pdf_samples/scenario/issues/733.pdf),
the below snippet will change the key of each row's text field with the key `Description[0]` to `Description[i]`
where `i` is the index of each row:

```python
from PyPDFForm import PdfWrapper

new_form = PdfWrapper("733.pdf")

for i in range(1, 10):
new_form.update_widget_key(
"Description[0]", f"Description[{i}]", index=1, defer=True
)

new_form.commit_widget_key_updates()

with open("output.pdf", "wb+") as output:
output.write(new_form.read())
```
23 changes: 23 additions & 0 deletions tests/scenario/test_issues.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,3 +249,26 @@ def test_update_key(issue_pdf_directory, request):
expected = f.read()
assert len(obj.preview) == len(expected)
assert obj.preview == expected


def test_bulk_update_key(issue_pdf_directory, request):
obj = PdfWrapper(os.path.join(issue_pdf_directory, "733.pdf"))

for i in range(1, 10):
obj.update_widget_key("Description[0]", f"Description[{i}]", 1, defer=True)
obj.update_widget_key("symbol[0]", f"symbol[{i}]", 1, defer=True)
obj.update_widget_key("tradedate[0]", f"tradedate[{i}]", 1, defer=True)
obj.update_widget_key("settlementdate[0]", f"settlementdate[{i}]", 1, defer=True)
obj.update_widget_key("quantity[0]", f"quantity[{i}]", 1, defer=True)
obj.update_widget_key("costperunit[0]", f"costperunit[{i}]", 1, defer=True)
obj.update_widget_key("costabasis[0]", f"costabasis[{i}]", 1, defer=True)

obj.commit_widget_key_updates()

expected_path = os.path.join(issue_pdf_directory, "733_expected.pdf")
request.config.results["expected_path"] = expected_path
request.config.results["stream"] = obj.preview
with open(expected_path, "rb+") as f:
expected = f.read()
assert len(obj.preview) == len(expected)
assert obj.preview == expected

0 comments on commit f16f548

Please sign in to comment.