Skip to content

Commit

Permalink
Fix a couple issues with parsing PDF files produced by Microsoft Repo…
Browse files Browse the repository at this point in the history
…rting

Services (Issue #46)

- Odd cross-reference stream containing 3-byte generation number field for this
  16-bit value
- Odd empty hex strings
  • Loading branch information
michaelrsweet committed Oct 6, 2023
1 parent 87ca4db commit 7f6ffcd
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 2 deletions.
2 changes: 2 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ v1.1.2 (TBD)

- Fixed an issue with broken PDF files containing extra CR and/or LF separators
after the object stream token (Issue #40)
- Fixed an issue with PDF files produced by Microsoft Reporting Services
(Issue #46)


v1.1.1 (March 20, 2023)
Expand Down
19 changes: 17 additions & 2 deletions pdfio-file.c
Original file line number Diff line number Diff line change
Expand Up @@ -1759,9 +1759,9 @@ load_xref(
w_2 = w[0];
w_3 = w[0] + w[1];

if (w[1] == 0 || w[2] > 2 || w[0] > sizeof(buffer) || w[1] > sizeof(buffer) || w[2] > sizeof(buffer) || w_total > sizeof(buffer))
if (w[1] == 0 || w[2] > 4 || w[0] > sizeof(buffer) || w[1] > sizeof(buffer) || w[2] > sizeof(buffer) || w_total > sizeof(buffer))
{
_pdfioFileError(pdf, "Cross-reference stream has invalid W key.");
_pdfioFileError(pdf, "Cross-reference stream has invalid W key [%u %u %u].", (unsigned)w[0], (unsigned)w[1], (unsigned)w[2]);
return (false);
}

Expand Down Expand Up @@ -1801,9 +1801,11 @@ load_xref(
}
}

// Offset
for (i = 1, offset = buffer[w_2]; i < w[1]; i ++)
offset = (offset << 8) | buffer[w_2 + i];

// Generation number
switch (w[2])
{
default :
Expand All @@ -1815,6 +1817,19 @@ load_xref(
case 2 :
generation = (buffer[w_3] << 8) | buffer[w_3 + 1];
break;
case 3 :
// Issue #46: Stupid Microsoft PDF generator using 3 bytes to
// encode 16-bit generation numbers == 0 (probably a lazy coder
// stuffing things into an array of 64-bit unsigned integers)
generation = (buffer[w_3] << 16) | (buffer[w_3 + 1] << 8) | buffer[w_3 + 2];
if (generation > 65535)
generation = 65535;
break;
case 4 : // Even stupider :)
generation = (buffer[w_3] << 24) | (buffer[w_3 + 1] << 16) | (buffer[w_3 + 2] << 8) | buffer[w_3 + 3];
if (generation > 65535)
generation = 65535;
break;
}

// Create a placeholder for the object in memory...
Expand Down
7 changes: 7 additions & 0 deletions pdfio-token.c
Original file line number Diff line number Diff line change
Expand Up @@ -495,6 +495,13 @@ _pdfioTokenRead(_pdfio_token_t *tb, // I - Token buffer/stack
*bufptr++ = (char)ch;
break;
}
else if (ch == '>')
{
// Issue #46: Empty hex string from Microsoft PDF generator; treat as
// empty literal string...
*buffer = '(';
break;
}
else if (!isspace(ch & 255) && !isxdigit(ch & 255))
{
_pdfioFileError(tb->pdf, "Syntax error: '<%c'", ch);
Expand Down

0 comments on commit 7f6ffcd

Please sign in to comment.