Skip to content
Open
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions pypdf/_cmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,9 +461,9 @@ def build_font_width_map(
m = 0
cpt = 0
for xx in w:
xx = xx.get_object()
if xx > 0:
m += xx
xx_value = xx.get_object()
if xx_value > 0:
m += xx_value
cpt += 1
font_width_map["default"] = m / max(1, cpt)
st = cast(int, ft["/FirstChar"])
Expand Down
28 changes: 14 additions & 14 deletions pypdf/_doc_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,8 +391,8 @@ def recursive_call(
return top, -1
return None, mi + ma
for idx, kid in enumerate(cast(ArrayObject, node["/Kids"])):
kid = cast(DictionaryObject, kid.get_object())
n, i = recursive_call(kid, mi)
kid_object = cast(DictionaryObject, kid.get_object())
n, i = recursive_call(kid_object, mi)
if n is not None: # page has just been found ...
if i < 0: # ... just below!
return node, idx
Expand Down Expand Up @@ -612,8 +612,8 @@ def _build_field(
states: list[str] = []
retval[key][NameObject("/_States_")] = ArrayObject(states)
for k in obj.get(FA.Kids, {}):
k = k.get_object()
for s in list(k["/AP"]["/N"].keys()):
k_object = k.get_object()
for s in list(k_object["/AP"]["/N"].keys()):
if s not in states:
states.append(s)
retval[key][NameObject("/_States_")] = ArrayObject(states)
Expand Down Expand Up @@ -641,8 +641,8 @@ def _check_kids(
if PagesAttributes.KIDS in tree:
# recurse down the tree
for kid in tree[PagesAttributes.KIDS]: # type: ignore
kid = kid.get_object()
self.get_fields(kid, retval, fileobj, stack)
kid_object = kid.get_object()
self.get_fields(kid_object, retval, fileobj, stack)

def _write_field(self, fileobj: Any, field: Any, field_attributes: Any) -> None:
field_attributes_tuple = FA.attributes()
Expand Down Expand Up @@ -771,16 +771,16 @@ def _get_inherited(obj: DictionaryObject, key: str) -> Any:
else:
kids = field.get("/Kids", ())
for k in kids:
k = k.get_object()
if (k.get("/Subtype", "") == "/Widget") and ("/T" not in k):
k_object = k.get_object()
if (k_object.get("/Subtype", "") == "/Widget") and ("/T" not in k_object):
# Kid that is just a widget, not a field:
if "/P" in k:
ret += [k["/P"].get_object()]
if "/P" in k_object:
ret += [k_object["/P"].get_object()]
else:
ret += [
p
for p in self.pages
if k.indirect_reference in p.get("/Annots", "")
if k_object.indirect_reference in p.get("/Annots", "")
]
return [
x
Expand Down Expand Up @@ -1317,9 +1317,9 @@ def xfa(self) -> Optional[dict[str, Any]]:
i = iter(fields)
for f in i:
tag = f
f = next(i)
if isinstance(f, IndirectObject):
field = cast(Optional[EncodedStreamObject], f.get_object())
field_value = next(i)
if isinstance(field_value, IndirectObject):
field = cast(Optional[EncodedStreamObject], field_value.get_object())
if field:
es = zlib.decompress(field._data)
retval[tag] = es
Expand Down
20 changes: 10 additions & 10 deletions pypdf/_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -743,14 +743,14 @@ def _get_inline_images(self) -> dict[str, ImageFile]:
if k in {"/Length", "/L"}: # no length is expected
continue
if isinstance(v, list):
v = ArrayObject(
value_for_init: PdfObject = ArrayObject(
[self._translate_value_inline_image(k, x) for x in v]
)
else:
v = self._translate_value_inline_image(k, v)
k = NameObject(_INLINE_IMAGE_KEY_MAPPING[k])
if k not in init:
init[k] = v
value_for_init = self._translate_value_inline_image(k, v)
mapped_k = NameObject(_INLINE_IMAGE_KEY_MAPPING[k])
if mapped_k not in init:
init[mapped_k] = value_for_init
ii["object"] = EncodedStreamObject.initialize_from_dictionary(init)
extension, byte_stream, img = _xobj_to_image(ii["object"])
files[f"~{num}~"] = ImageFile(
Expand Down Expand Up @@ -1236,13 +1236,13 @@ def _merge_page_writer(
else:
trsf = Transformation(ctm)
for a in cast(ArrayObject, page2[PG.ANNOTS]):
a = a.get_object()
aa = a.clone(
annotation_object = a.get_object()
aa = annotation_object.clone(
pdf,
ignore_fields=("/P", "/StructParent", "/Parent"),
force_duplicate=True,
)
r = cast(ArrayObject, a["/Rect"])
r = cast(ArrayObject, annotation_object["/Rect"])
pt1 = trsf.apply_on((r[0], r[1]), True)
pt2 = trsf.apply_on((r[2], r[3]), True)
aa[NameObject("/Rect")] = ArrayObject(
Expand All @@ -1253,8 +1253,8 @@ def _merge_page_writer(
max(pt1[1], pt2[1]),
)
)
if "/QuadPoints" in a:
q = cast(ArrayObject, a["/QuadPoints"])
if "/QuadPoints" in annotation_object:
q = cast(ArrayObject, annotation_object["/QuadPoints"])
aa[NameObject("/QuadPoints")] = ArrayObject(
trsf.apply_on((q[0], q[1]), True)
+ trsf.apply_on((q[2], q[3]), True)
Expand Down
5 changes: 3 additions & 2 deletions pypdf/_text_extraction/_layout_mode/_fixed_width_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,13 +186,14 @@ def recurs_to_target_op(
tj_ops.append(text_state_mgr.text_state_params(operands[2]))
elif op in (b"Td", b"Tm", b"TD", b"T*"):
text_state_mgr.reset_trm()
operands_for_tm = operands
if op == b"Tm":
text_state_mgr.reset_tm()
elif op == b"TD":
text_state_mgr.set_state_param(b"TL", -operands[1])
elif op == b"T*":
operands = [0, -text_state_mgr.TL]
text_state_mgr.add_tm(operands)
operands_for_tm = [0, -text_state_mgr.TL]
text_state_mgr.add_tm(operands_for_tm)
elif op == b"Tf":
text_state_mgr.set_font(fonts[operands[0]], operands[1])
else: # handle Tc, Tw, Tz, TL, and Ts operators
Expand Down
18 changes: 8 additions & 10 deletions pypdf/_text_extraction/_layout_mode/_font.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

from ..._codecs import adobe_glyphs
from ...errors import ParseError
from ...generic import IndirectObject
from ._font_widths import STANDARD_WIDTHS


Expand Down Expand Up @@ -60,13 +59,12 @@ def __post_init__(self) -> None:

# CID fonts have a /W array mapping character codes to widths stashed in /DescendantFonts
if "/DescendantFonts" in self.font_dictionary:
d_font: dict[Any, Any]
d_font: Any
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we need this change?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

mypy flagged "dict[Any, Any]" has no attribute "get_object".

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Then you should probably make this a DictionaryObject or PdfObject.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, added PdfObject for that

for d_font_idx, d_font in enumerate(
self.font_dictionary["/DescendantFonts"]
):
while isinstance(d_font, IndirectObject):
d_font = d_font.get_object()
self.font_dictionary["/DescendantFonts"][d_font_idx] = d_font
d_font_object = d_font.get_object()
self.font_dictionary["/DescendantFonts"][d_font_idx] = d_font_object
ord_map = {
ord(_target): _surrogate
for _target, _surrogate in self.char_map.items()
Expand All @@ -78,20 +76,20 @@ def __post_init__(self) -> None:
# (2) A character start index, a character stop index, and a width, e.g.
# `45 65 500` applies width 500 to characters 45-65.
skip_count = 0
_w = d_font.get("/W", [])
_w = d_font_object.get("/W", [])
for idx, w_entry in enumerate(_w):
w_entry = w_entry.get_object()
w_value = w_entry.get_object()
if skip_count:
skip_count -= 1
continue
if not isinstance(w_entry, (int, float)): # pragma: no cover
if not isinstance(w_value, (int, float)): # pragma: no cover
# We should never get here due to skip_count above. Add a
# warning and or use reader's "strict" to force an ex???
continue
# check for format (1): `int [int int int int ...]`
w_next_entry = _w[idx + 1].get_object()
if isinstance(w_next_entry, Sequence):
start_idx, width_list = w_entry, w_next_entry
start_idx, width_list = w_value, w_next_entry
self.width_map.update(
{
ord_map[_cidx]: _width
Expand All @@ -112,7 +110,7 @@ def __post_init__(self) -> None:
_w[idx + 2].get_object(), (int, float)
):
start_idx, stop_idx, const_width = (
w_entry,
w_value,
w_next_entry,
_w[idx + 2].get_object(),
)
Expand Down
25 changes: 12 additions & 13 deletions pypdf/_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -982,8 +982,8 @@ def update_page_form_field_values(
if PG.ANNOTS not in page:
logger_warning("No fields to update on this page", __name__)
return
for annotation in page[PG.ANNOTS]: # type: ignore
annotation = cast(DictionaryObject, annotation.get_object())
for annotation_ref in page[PG.ANNOTS]: # type: ignore
annotation = cast(DictionaryObject, annotation_ref.get_object())
if annotation.get("/Subtype", "") != "/Widget":
continue
if "/FT" in annotation and "/T" in annotation:
Expand All @@ -1007,7 +1007,6 @@ def update_page_form_field_values(
del parent_annotation["/I"]
if flags:
annotation[NameObject(FA.Ff)] = NumberObject(flags)
# Set the field value
if not (value is None and flatten): # Only change values if given by user and not flattening.
if isinstance(value, list):
lst = ArrayObject(TextStringObject(v) for v in value)
Expand All @@ -1029,7 +1028,7 @@ def update_page_form_field_values(
if v not in normal_ap:
v = NameObject("/Off")
appearance_stream_obj = normal_ap.get(v)
# Other cases will be updated through the for loop
# other cases will be updated through the for loop
annotation[NameObject(AA.AS)] = v
annotation[NameObject(FA.V)] = v
elif (
Expand All @@ -1050,7 +1049,7 @@ def update_page_form_field_values(
annotation[NameObject(AA.AP)] = DictionaryObject(
{NameObject("/N"): self._add_object(appearance_stream_obj)}
)
elif "/N" not in (ap:= cast(DictionaryObject, annotation[AA.AP])):
elif "/N" not in (ap := cast(DictionaryObject, annotation[AA.AP])):
cast(DictionaryObject, annotation[NameObject(AA.AP)])[
NameObject("/N")
] = self._add_object(appearance_stream_obj)
Expand Down Expand Up @@ -1100,9 +1099,9 @@ def reattach_fields(
if "/Annots" not in page:
return lst
annotations = cast(ArrayObject, page["/Annots"])
for idx, annotation in enumerate(annotations):
is_indirect = isinstance(annotation, IndirectObject)
annotation = cast(DictionaryObject, annotation.get_object())
for idx, annotation_ref in enumerate(annotations):
is_indirect = isinstance(annotation_ref, IndirectObject)
annotation = cast(DictionaryObject, annotation_ref.get_object())
if annotation.get("/Subtype", "") == "/Widget" and "/FT" in annotation:
if (
"indirect_reference" in annotation.__dict__
Expand Down Expand Up @@ -1472,9 +1471,10 @@ def _write_pdf_structure(self, stream: StreamType) -> tuple[list[int], list[int]
if obj is not None:
object_positions.append(stream.tell())
stream.write(f"{idnum} 0 obj\n".encode())
object_to_write = obj
if self._encryption and obj != self._encrypt_entry:
obj = self._encryption.encrypt_object(obj, idnum, 0)
obj.write_to_stream(stream)
object_to_write = self._encryption.encrypt_object(obj, idnum, 0)
object_to_write.write_to_stream(stream)
stream.write(b"\nendobj\n")
else:
object_positions.append(-1)
Expand Down Expand Up @@ -1563,9 +1563,8 @@ def add_metadata(self, infos: dict[str, Any]) -> None:
if isinstance(infos, PdfObject):
infos = cast(DictionaryObject, infos.get_object())
for key, value in list(infos.items()):
if isinstance(value, PdfObject):
value = value.get_object()
args[NameObject(key)] = create_string_object(str(value))
value_object = value.get_object() if isinstance(value, PdfObject) else value
args[NameObject(key)] = create_string_object(str(value_object))
if self._info is None:
self._info = DictionaryObject()
self._info.update(args)
Expand Down
13 changes: 7 additions & 6 deletions pypdf/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -760,27 +760,28 @@ def decode_stream_data(stream: Any) -> bytes:
if not data:
return data
for filter_name, params in zip(filters, decode_parms):
if isinstance(params, NullObject):
params = {}
params_typed: Optional[DictionaryObject] = None
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not params directly to reduce the diff?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems like you misunderstood me here: I meant to use params_untyped as the loop variable and params as the name for the typed one.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, got it. Fixed this now

if not isinstance(params, NullObject):
params_typed = cast(Optional[DictionaryObject], params)
if filter_name in (FT.ASCII_HEX_DECODE, FTA.AHx):
data = ASCIIHexDecode.decode(data)
elif filter_name in (FT.ASCII_85_DECODE, FTA.A85):
data = ASCII85Decode.decode(data)
elif filter_name in (FT.LZW_DECODE, FTA.LZW):
data = LZWDecode.decode(data, params)
data = LZWDecode.decode(data, params_typed)
elif filter_name in (FT.FLATE_DECODE, FTA.FL):
data = FlateDecode.decode(data, params)
data = FlateDecode.decode(data, params_typed)
elif filter_name in (FT.RUN_LENGTH_DECODE, FTA.RL):
data = RunLengthDecode.decode(data)
elif filter_name == FT.CCITT_FAX_DECODE:
height = stream.get(IA.HEIGHT, ())
data = CCITTFaxDecode.decode(data, params, height)
data = CCITTFaxDecode.decode(data, params_typed, height)
elif filter_name == FT.DCT_DECODE:
data = DCTDecode.decode(data)
elif filter_name == FT.JPX_DECODE:
data = JPXDecode.decode(data)
elif filter_name == FT.JBIG2_DECODE:
data = JBIG2Decode.decode(data, params)
data = JBIG2Decode.decode(data, params_typed)
elif filter_name == "/Crypt":
if "/Name" in params or "/Type" in params:
raise NotImplementedError(
Expand Down
6 changes: 3 additions & 3 deletions pypdf/generic/_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,8 +394,8 @@ def _load(cls, catalog: DictionaryObject) -> Generator[EmbeddedFile]:
for kid in cast(ArrayObject, container["/Kids"].get_object()):
# There might be further (nested) kids here.
# Wait for an example before evaluating an implementation.
kid = kid.get_object()
if "/Names" in kid:
yield from cls._load_from_names(cast(ArrayObject, kid["/Names"]))
kid_object = kid.get_object()
if "/Names" in kid_object:
yield from cls._load_from_names(cast(ArrayObject, kid_object["/Names"]))
if "/Names" in container:
yield from cls._load_from_names(cast(ArrayObject, container["/Names"]))
1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,6 @@ ignore = [
"PERF203", # `try`-`except` within a loop incurs performance overhead
"PGH003", # Use specific rule codes when ignoring type issues
"PLW1510", # `subprocess.run` without explicit `check` argument
"PLW2901", # `with` statement variable `img` overwritten by assignment target
"PT011", # `pytest.raises(ValueError)` is too broad, set the `match`
"PT012", # `pytest.raises()` block should contain a single simple statement
"PT014", # Ruff bug: Duplicate of test case at index 1 in `@pytest_mark.parametrize`
Expand Down
8 changes: 4 additions & 4 deletions tests/scripts/test_make_release.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ def test_strip_header(data, expected):
def test_get_git_commits_since_tag():
make_release = pytest.importorskip("make_release")

with open(COMMITS__VERSION_4_0_1, mode="rb") as commits, mock.patch(
"urllib.request.urlopen", side_effect=lambda _: commits
with open(COMMITS__VERSION_4_0_1, mode="rb") as commits_fh, mock.patch(
"urllib.request.urlopen", side_effect=lambda _: commits_fh
), mock.patch("subprocess.check_output", return_value=GIT_LOG__VERSION_4_0_1):
commits = make_release.get_git_commits_since_tag("4.0.1")
assert commits == [
Expand Down Expand Up @@ -87,8 +87,8 @@ def test_get_git_commits_since_tag():
def test_get_formatted_changes():
make_release = pytest.importorskip("make_release")

with open(COMMITS__VERSION_4_0_1, mode="rb") as commits, mock.patch(
"urllib.request.urlopen", side_effect=lambda _: commits
with open(COMMITS__VERSION_4_0_1, mode="rb") as commits_fh, mock.patch(
"urllib.request.urlopen", side_effect=lambda _: commits_fh
), mock.patch("subprocess.check_output", return_value=GIT_LOG__VERSION_4_0_1):
output, output_with_user = make_release.get_formatted_changes("4.0.1")

Expand Down
4 changes: 2 additions & 2 deletions tests/test_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,9 @@ def test_flatedecode_unsupported_predictor():
predictors = (-10, -1, 0, 9, 16, 20, 100)

for predictor, s in cartesian_product(predictors, filter_inputs):
s = s.encode()
s_bytes = s.encode()
with pytest.raises(PdfReadError):
codec.decode(codec.encode(s), DictionaryObject({"/Predictor": predictor}))
codec.decode(codec.encode(s_bytes), DictionaryObject({"/Predictor": predictor}))


@pytest.mark.parametrize(
Expand Down
4 changes: 2 additions & 2 deletions tests/test_images.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@ def open_image(path: Union[Path, Image.Image, BytesIO]) -> Image.Image:
else:
if isinstance(path, Path):
assert path.exists()
with Image.open(path) as img:
with Image.open(path) as opened_img:
img = (
img.copy()
opened_img.copy()
) # Opened image should be copied to avoid issues with file closing
return img

Expand Down
6 changes: 3 additions & 3 deletions tests/test_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2422,10 +2422,10 @@ def test_no_resource_for_14_std_fonts(caplog):
writer = PdfWriter(BytesIO(get_data_from_url(url, name=name)))
p = writer.pages[0]
for a in p["/Annots"]:
a = a.get_object()
if a["/FT"] == "/Tx":
a_obj = a.get_object()
if a_obj["/FT"] == "/Tx":
writer.update_page_form_field_values(
p, {a["/T"]: "Brooks"}, auto_regenerate=False
p, {a_obj["/T"]: "Brooks"}, auto_regenerate=False
)
assert "Font dictionary for /Helvetica not found." in caplog.text

Expand Down
Loading