Skip to content

Commit

Permalink
Merge pull request #523 from chinapandaman/PPF-522
Browse files Browse the repository at this point in the history
PPF-522: bring back simple fill
  • Loading branch information
chinapandaman authored Mar 16, 2024
2 parents 3c4a549 + 4435b55 commit 3e173b4
Show file tree
Hide file tree
Showing 42 changed files with 817 additions and 12 deletions.
2 changes: 1 addition & 1 deletion PyPDFForm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

__version__ = "1.4.12"

from .wrapper import PdfWrapper, PyPDFForm
from .wrapper import FormWrapper, PdfWrapper, PyPDFForm
6 changes: 6 additions & 0 deletions PyPDFForm/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

DEPRECATION_NOTICE = "{} will be deprecated soon. Use {} instead."

ANNOTATION_KEY = "/Annots"
ANNOTATION_FIELD_KEY = "/T"
ANNOTATION_RECTANGLE_KEY = "/Rect"
SUBTYPE_KEY = "/Subtype"
Expand All @@ -34,6 +35,8 @@
PARENT_KEY = "/Parent"
FIELD_FLAG_KEY = "/Ff"
TEXT_FIELD_IDENTIFIER = "/Tx"
TEXT_VALUE_IDENTIFIER = "/V"
TEXT_VALUE_SHOW_UP_IDENTIFIER = "/AP"
SIGNATURE_FIELD_IDENTIFIER = "/Sig"
TEXT_FIELD_APPEARANCE_IDENTIFIER = "/DA"
SELECTABLE_IDENTIFIER = "/Btn"
Expand All @@ -43,6 +46,7 @@
CHOICES_IDENTIFIER = "/Opt"
BUTTON_IDENTIFIER = "/MK"
BUTTON_STYLE_IDENTIFIER = "/CA"
SELECTED_IDENTIFIER = "/AS"

# Field flag bits
MULTILINE = 1 << 12
Expand All @@ -65,4 +69,6 @@
"l": "\u25CF", # circle
}

CHECKBOX_SELECT = "/Yes"

COORDINATE_GRID_FONT_SIZE_MARGIN_RATIO = DEFAULT_FONT_SIZE / 100
58 changes: 55 additions & 3 deletions PyPDFForm/filler.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,28 @@
# -*- coding: utf-8 -*-
"""Contains helpers for filling a PDF form."""

from typing import Dict
from io import BytesIO
from typing import cast, Dict

from .constants import WIDGET_TYPES
from pypdf import PdfReader, PdfWriter
from pypdf.generic import DictionaryObject, NameObject, TextStringObject

from .constants import (CHECKBOX_SELECT, WIDGET_TYPES,
ANNOTATION_KEY, SELECTED_IDENTIFIER, TEXT_VALUE_IDENTIFIER,
TEXT_VALUE_SHOW_UP_IDENTIFIER)
from .coordinate import (get_draw_checkbox_radio_coordinates,
get_draw_sig_coordinates_resolutions,
get_draw_text_coordinates,
get_text_line_x_coordinates)
from .font import checkbox_radio_font_size
from .image import any_image_to_jpg
from .middleware.checkbox import Checkbox
from .middleware.dropdown import Dropdown
from .middleware.radio import Radio
from .middleware.signature import Signature
from .middleware.text import Text
from .template import get_widget_key, get_widgets_by_page
from .utils import checkbox_radio_to_draw
from .utils import checkbox_radio_to_draw, stream_to_io
from .watermark import create_watermarks_and_draw, merge_watermarks_with_pdf


Expand Down Expand Up @@ -117,3 +125,47 @@ def fill(
result = merge_watermarks_with_pdf(result, image_watermarks)

return result


def simple_fill(
template: bytes,
widgets: Dict[str, WIDGET_TYPES],
) -> bytes:
"""Fills a PDF form in place."""

pdf = PdfReader(stream_to_io(template))
out = PdfWriter()
out.append(pdf)

radio_button_tracker = {}

for page in out.pages:
for annot in page.get(ANNOTATION_KEY, []): # noqa
annot = cast(DictionaryObject, annot.get_object())
key = get_widget_key(annot.get_object())

widget = widgets.get(key)
if widget is None:
continue

if isinstance(widget, Checkbox) and widget.value is True:
annot[NameObject(SELECTED_IDENTIFIER)] = NameObject(CHECKBOX_SELECT)
elif isinstance(widget, Radio):
if key not in radio_button_tracker:
radio_button_tracker[key] = 0
radio_button_tracker[key] += 1
if widget.value == radio_button_tracker[key] - 1:
annot[NameObject(SELECTED_IDENTIFIER)] = NameObject(f"/{widget.value}")
elif isinstance(widget, Dropdown) and widget.value is not None:
annot[NameObject(TEXT_VALUE_IDENTIFIER)] = (
TextStringObject(widget.choices[widget.value]))
annot[NameObject(TEXT_VALUE_SHOW_UP_IDENTIFIER)] = (
TextStringObject(widget.choices[widget.value]))
elif isinstance(widget, Text) and widget.value:
annot[NameObject(TEXT_VALUE_IDENTIFIER)] = TextStringObject(widget.value)
annot[NameObject(TEXT_VALUE_SHOW_UP_IDENTIFIER)] = TextStringObject(widget.value)

with BytesIO() as f:
out.write(f)
f.seek(0)
return f.read()
44 changes: 36 additions & 8 deletions PyPDFForm/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
DEPRECATION_NOTICE, VERSION_IDENTIFIER_PREFIX,
VERSION_IDENTIFIERS)
from .coordinate import generate_coordinate_grid
from .filler import fill
from .filler import fill, simple_fill
from .font import register_font
from .image import any_image_to_jpg, rotate_image
from .middleware.dropdown import Dropdown
Expand All @@ -26,7 +26,40 @@
from .widgets.text import TextWidget


class PdfWrapper:
class FormWrapper:
"""A simple base wrapper for just filling a PDF form."""

def __init__(
self,
template: Union[bytes, str, BinaryIO] = b"",
) -> None:
"""Constructs all attributes for the object."""

self.stream = fp_or_f_obj_or_stream_to_stream(template)

def read(self) -> bytes:
"""Reads the file stream of a PDF form."""

return self.stream

def fill(
self,
data: Dict[str, Union[str, bool, int]],
) -> FormWrapper:
"""Fills a PDF form."""

widgets = build_widgets(self.stream) if self.stream else {}

for key, value in data.items():
if key in widgets:
widgets[key].value = value

self.stream = simple_fill(self.read(), widgets)

return self


class PdfWrapper(FormWrapper):
"""A class to represent a PDF form."""

def __init__(
Expand All @@ -36,7 +69,7 @@ def __init__(
) -> None:
"""Constructs all attributes for the object."""

self.stream = fp_or_f_obj_or_stream_to_stream(template)
super().__init__(template)
self.widgets = build_widgets(self.stream) if self.stream else {}

self.global_font = kwargs.get("global_font")
Expand All @@ -49,11 +82,6 @@ def __init__(
each.font_size = self.global_font_size
each.font_color = self.global_font_color

def read(self) -> bytes:
"""Reads the file stream of a PDF form."""

return self.stream

@property
def elements(self) -> dict:
"""ToDo: deprecate this."""
Expand Down
Binary file added pdf_samples/scenario/issues/521.pdf
Binary file not shown.
Binary file not shown.
Binary file added pdf_samples/simple/dropdown/dropdown_four.pdf
Binary file not shown.
Binary file added pdf_samples/simple/dropdown/dropdown_one.pdf
Binary file not shown.
Binary file added pdf_samples/simple/dropdown/dropdown_three.pdf
Binary file not shown.
Binary file added pdf_samples/simple/dropdown/dropdown_two.pdf
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file added pdf_samples/simple/sample_filled.pdf
Binary file not shown.
Binary file added pdf_samples/simple/sample_filled_radiobutton.pdf
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file added pdf_samples/simple/test_fill_complex_fonts.pdf
Binary file not shown.
Binary file added pdf_samples/simple/test_fill_font_color.pdf
Binary file not shown.
112 changes: 112 additions & 0 deletions tests/scenario/test_existed_simple.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
# -*- coding: utf-8 -*-

import os

from PyPDFForm import FormWrapper


def test_ds82(existed_pdf_directory, pdf_samples, request):
expected_path = os.path.join(pdf_samples, "simple", "scenario", "existed", "DS82_expected.pdf")
with open(expected_path, "rb+") as f:
obj = FormWrapper(os.path.join(existed_pdf_directory, "DS82.pdf")).fill(
{
"LastName": "Smith",
}
)

request.config.results["expected_path"] = expected_path
request.config.results["stream"] = obj.read()

expected = f.read()

assert len(obj.read()) == len(expected)
assert obj.stream == expected


def test_ds82_all_chars_lowercase(existed_pdf_directory, pdf_samples, request):
expected_path = os.path.join(pdf_samples, "simple", "scenario", "existed", "DS82_expected_all_chars_lowercase.pdf")
with open(expected_path, "rb+") as f:
obj = FormWrapper(os.path.join(existed_pdf_directory, "DS82.pdf")).fill(
{
"LastName": "x" * 30,
}
)

request.config.results["expected_path"] = expected_path
request.config.results["stream"] = obj.read()

expected = f.read()

assert len(obj.read()) == len(expected)
assert obj.stream == expected


def test_ds82_all_chars_uppercase(existed_pdf_directory, pdf_samples, request):
expected_path = os.path.join(pdf_samples, "simple", "scenario", "existed", "DS82_expected_all_chars_uppercase.pdf")
with open(expected_path, "rb+") as f:
obj = FormWrapper(os.path.join(existed_pdf_directory, "DS82.pdf")).fill(
{
"LastName": "X" * 30,
}
)

request.config.results["expected_path"] = expected_path
request.config.results["stream"] = obj.read()

expected = f.read()

assert len(obj.read()) == len(expected)
assert obj.stream == expected


def test_ds82_mixed_case(existed_pdf_directory, pdf_samples, request):
expected_path = os.path.join(pdf_samples, "simple", "scenario", "existed", "DS82_expected_mixed_case.pdf")
with open(expected_path, "rb+") as f:
obj = FormWrapper(os.path.join(existed_pdf_directory, "DS82.pdf")).fill(
{
"LastName": "xX" * 10,
}
)

request.config.results["expected_path"] = expected_path
request.config.results["stream"] = obj.read()

expected = f.read()

assert len(obj.read()) == len(expected)
assert obj.stream == expected


def test_illinois_real_estate_power_of_attorney_form(existed_pdf_directory, pdf_samples, request):
expected_path = os.path.join(pdf_samples, "simple", "scenario", "existed",
"illinois-real-estate-power-of-attorney-form_expected.pdf")
with open(expected_path, "rb+") as f:
obj = FormWrapper(os.path.join(existed_pdf_directory, "illinois-real-estate-power-of-attorney-form.pdf")).fill(
{
"undefined": "John Doe",
"State of": "Chicago",
"undefined_2": "Illinois",
"of": "Michael Smith",
"Illinois as my Attorneyin": "Chicago",
"with full power and": "Random",
"is as": "Not Random",
"Address of Principal": "1 N Central, Chicago, IL 60000",
"Phone number where Principal can be contacted": "(000)000-0000",
"Email address of Principal": "[email protected]",
"Text3": "Someone",
"Dated": "2018-01-01",
"Text4": "Sometwo",
"Text5": "Somethree",
"Text6": "Somefour",
"Dated 1": "2019-01-01",
"My commission expires": "NOW",
}
)

request.config.results["expected_path"] = expected_path
request.config.results["stream"] = obj.read()

expected = f.read()

assert len(obj.read()) == len(expected)
assert obj.stream == expected
64 changes: 64 additions & 0 deletions tests/scenario/test_issues_simple.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# -*- coding: utf-8 -*-
# pylint: disable=line-too-long

import os

from PyPDFForm import FormWrapper


def test_pdf_form_with_central_aligned_text_fields(issue_pdf_directory, pdf_samples, request):
expected_path = os.path.join(pdf_samples, "simple", "scenario", "issues", "PPF-285-expected.pdf")
with open(expected_path, "rb+") as f:
obj = FormWrapper(os.path.join(issue_pdf_directory, "PPF-285.pdf")).fill(
{
"name": "Hans Mustermann",
"fulladdress": "Musterstr. 12, 82903 Musterdorf, Musterland",
"advisorname": "Karl Test",
}
)

request.config.results["expected_path"] = expected_path
request.config.results["stream"] = obj.read()

expected = f.read()

assert len(obj.read()) == len(expected)
assert obj.stream == expected


def test_pdf_form_with_paragraph_fields_new_line_symbol_text_overflow(issue_pdf_directory, pdf_samples, request):
expected_path = os.path.join(pdf_samples, "simple", "scenario", "issues", "PPF-415-2-expected.pdf")
with open(expected_path, "rb+") as f:
obj = FormWrapper(os.path.join(issue_pdf_directory, "PPF-415-2.pdf")).fill(
{
"multiline-text": "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Malesuada proin libero nunc consequat interdum varius sit amet mattis. Nec tincidunt praesent semper feugiat nibh sed.\nSed libero enim sed faucibus turpis. Cursus in hac habitasse platea dictumst quisque sagittis. Placerat in egestas erat imperdiet sed euismod. Id aliquet risus feugiat in ante metus dictum at. Proin fermentum leo vel orci porta non pulvinar. Consequat semper viverra nam libero justo.\nPellentesque massa placerat duis ultricies lacus sed. Amet est placerat in egestas erat imperdiet sed euismod nisi. Id cursus metus aliquam eleifend mi. Massa massa ultricies mi quis. Volutpat consequat mauris nunc congue nisi vitae suscipit tellus. Ut tellus elementum sagittis vitae.\n\nEtiam sit amet nisl purus in mollis nunc. Vel turpis nunc eget lorem dolor sed. Ultrices dui sapien eget mi proin sed libero enim. Condimentum id venenatis a condimentum vitae sapien pellentesque habitant. Libero volutpat sed cras ornare arcu. Commodo quis imperdiet massa tincidunt nunc pulvinar sapien et ligula. Nisi est sit amet facilisis magna etiam. In iaculis nunc sed augue.\nSapien pellentesque habitant morbi tristique.\nCondimentum mattis pellentesque id nibh tortor id aliquet. Porttitor massa id neque aliquam vestibulum. Feugiat in fermentum posuere urna nec tincidunt praesent semper. Malesuada fames ac turpis egestas integer. Aenean vel elit scelerisque mauris pellentesque. Vel turpis nunc eget lorem dolor sed viverra. Nec feugiat nisl pretium fusce id velit ut tortor." # noqa
}
)

request.config.results["expected_path"] = expected_path
request.config.results["stream"] = obj.read()

expected = f.read()

assert len(obj.read()) == len(expected)
assert obj.stream == expected


def test_521(issue_pdf_directory, pdf_samples, request):
expected_path = os.path.join(pdf_samples, "simple", "scenario", "issues", "521-expected.pdf")
with open(expected_path, "rb+") as f:
obj = FormWrapper(os.path.join(issue_pdf_directory, "521.pdf")).fill(
{
"Text1": "Sed ut perspiciatis unde omnis iste natus error sit voluptatem accusantium doloremque laudantium, totam rem aperiam, eaque ipsa quae ab illo inventore veritatis et quasi architecto beatae vitae dicta sunt explicabo. Nemo enim ipsam voluptatem quia voluptas sit aspernatur aut odit aut fugit, sed quia consequuntur magni dolores eos qui ratione voluptatem sequi nesciunt. Neque porro quisquam est, qui dolorem ipsum quia dolor sit amet, consectetur, adipisci velit, sed quia non numquam eius modi tempora incidunt ut labore et dolore magnam aliquam quaerat voluptatem. Ut enim ad minima veniam, quis nostrum exercitationem ullam corporis suscipit laboriosam, nisi ut aliquid ex ea commodi consequatur? Quis autem vel eum iure reprehenderit qui in ea voluptate velit esse quam nihil molestiae consequatur, vel illum qui dolorem eum fugiat quo voluptas nulla pariatur?", # noqa
"Text2": "Sed ut perspiciatis unde omnis iste natus error sit voluptatem accusantium doloremque laudantium, totam rem aperiam, eaque ipsa quae ab illo inventore veritatis et quasi architecto beatae vitae dicta sunt explicabo. NEMO ENIM IPSAM VOLUPTATEM QUIA VOLUPTAS SIT ASPERNATUR AUT ODIT AUT FUGIT, SED QUIA CONSEQUUNTUR MAGNI DOLORES EOS QUI RATIONE VOLUPTATEM SEQUI NESCIUNT. NEQUE PORRO QUISQUAM EST, QUI DOLOREM IPSUM QUIA DOLOR SIT AMET, CONSECTETUR, ADIPISCI VELIT, SED QUIA NON NUMQUAM EIUS MODI TEMPORA INCIDUNT UT LABORE ET DOLORE MAGNAM ALIQUAM QUAERAT VOLUPTATEM. Ut enim ad minima veniam, quis nostrum exercitationem ullam corporis suscipit laboriosam, nisi ut aliquid ex ea commodi consequatur? Quis autem vel eum iure reprehenderit qui in ea voluptate velit esse quam nihil molestiae consequatur, vel illum qui dolorem eum fugiat quo voluptas nulla pariatur?", # noqa
"Text3": "Sed ut perspiciatis unde omnis iste natus error sit voluptatem accusantium doloremque laudantium, totam rem aperiam, eaque ipsa quae ab illo inventore veritatis et quasi architecto beatae vitae dicta sunt explicabo. Nemo enim ipsam voluptatem quia voluptas sit aspernatur aut odit aut fugit, sed quia consequuntur magni dolores eos qui ratione voluptatem sequi nesciunt. Neque porro quisquam est, qui dolorem ipsum quia dolor sit amet, consectetur, adipisci velit, sed quia non numquam eius modi tempora incidunt ut labore et dolore magnam aliquam quaerat voluptatem. Ut enim ad minima veniam, quis nostrum exercitationem ullam corporis suscipit laboriosam, nisi ut aliquid ex ea commodi consequatur? Quis autem vel eum iure reprehenderit qui in ea voluptate velit esse quam nihil molestiae consequatur, vel illum qui dolorem eum fugiat quo voluptas nulla pariatur?", # noqa
},
)

request.config.results["expected_path"] = expected_path
request.config.results["stream"] = obj.read()

expected = f.read()

assert len(obj.read()) == len(expected)
assert obj.stream == expected
Loading

0 comments on commit 3e173b4

Please sign in to comment.