Skip to content

Commit

Permalink
Add parameter for maximum horizontal stretching for croppdf
Browse files Browse the repository at this point in the history
* This allows better filling for a 8.5 x 11 inch printed page.
  • Loading branch information
DanBloomberg committed Sep 6, 2023
1 parent f3aa483 commit 2a541fd
Show file tree
Hide file tree
Showing 5 changed files with 35 additions and 15 deletions.
24 changes: 18 additions & 6 deletions prog/croppdf.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@
* is encoded with tiffg4.
*
* Syntax:
* croppdf basedir lrclear tbclear edgeclean lradd tbadd title fileout
* croppdf basedir lrclear tbclear edgeclean lradd tbadd maxwiden
* title fileout
*
* The %basedir is a directory where the input pdf files are located.
* The program will operate on every file in this directory with
Expand All @@ -50,6 +51,15 @@
* The %edgeclean parameter is used to remove edge noise, going from
* 0 (default, no removal) to 15 (maximally aggressive removal).
*
* The suggested value for %lradd and %tbadd is 50. Laser printers do not
* print foreground pixels very close to the page edges, and using a
* margin of 50 pixels (1/6" at 300 ppi) should allow all foregrounnd
* pixels to be printed.
*
* The %maxwiden parameter allows the foreground to better fill an
* 8.5 x 11 inch printed page. It gives the maximum fractional horizontal
* stretching allowed. Suggested values are between 1.0 and 1.15.
*
* The %title is the title given to the pdf. Use %title == "none"
* to omit the title.
*
Expand Down Expand Up @@ -91,20 +101,22 @@ char buf[256];
char *basedir, *fname, *tail, *basename, *imagedir, *title, *fileout;
l_int32 lrclear, tbclear, edgeclean, lradd, tbadd;
l_int32 render_res, i, n, ret;
l_float32 maxwiden;
SARRAY *sa;

if (argc != 9)
if (argc != 10)
return ERROR_INT(
"Syntax: croppdf basedir lrclear tbclear edgeclean "
"lradd tbadd title fileout", __func__, 1);
"lradd tbadd maxwiden title fileout", __func__, 1);
basedir = argv[1];
lrclear = atoi(argv[2]);
tbclear = atoi(argv[3]);
edgeclean = atoi(argv[4]);
lradd = atoi(argv[5]);
tbadd = atoi(argv[6]);
title = argv[7];
fileout = argv[8];
maxwiden = atof(argv[7]);
title = argv[8];
fileout = argv[9];
setLeptDebugOK(1);

/* Set up a directory for temp images */
Expand Down Expand Up @@ -156,7 +168,7 @@ SARRAY *sa;
lept_free(imagedir);
sarrayWriteStderr(sa);
lept_stderr("cropping ...\n");
cropFilesToPdf(sa, lrclear, tbclear, edgeclean, lradd, tbadd,
cropFilesToPdf(sa, lrclear, tbclear, edgeclean, lradd, tbadd, maxwiden,
title, fileout);

return 0;
Expand Down
2 changes: 1 addition & 1 deletion prog/misctest1.c
Original file line number Diff line number Diff line change
Expand Up @@ -416,7 +416,7 @@ PIXCMAP *cmap, *cmapg;

/* Page cropping */
pix1 = pixRead("tel_3.tif");
pix2 = pixCropImage(pix1, 30, 30, 4, 25, 25,
pix2 = pixCropImage(pix1, 30, 30, 4, 25, 25, 1.15,
"/tmp/lept/misc/cropdebug.pdf", NULL);
pixDestroy(&pix1);
pixDestroy(&pix2);
Expand Down
4 changes: 2 additions & 2 deletions src/allheaders.h
Original file line number Diff line number Diff line change
Expand Up @@ -1375,7 +1375,7 @@ LEPT_DLL extern PIX * pixGenHalftoneMask ( PIX *pixs, PIX **ppixtext, l_int32 *p
LEPT_DLL extern PIX * pixGenerateHalftoneMask ( PIX *pixs, PIX **ppixtext, l_int32 *phtfound, PIXA *pixadb );
LEPT_DLL extern PIX * pixGenTextlineMask ( PIX *pixs, PIX **ppixvws, l_int32 *ptlfound, PIXA *pixadb );
LEPT_DLL extern PIX * pixGenTextblockMask ( PIX *pixs, PIX *pixvws, PIXA *pixadb );
LEPT_DLL extern PIX * pixCropImage ( PIX *pixs, l_int32 lr_clear, l_int32 tb_clear, l_int32 edgeclean, l_int32 lr_add, l_int32 tb_add, const char *debugfile, BOX **pcropbox );
LEPT_DLL extern PIX * pixCropImage ( PIX *pixs, l_int32 lr_clear, l_int32 tb_clear, l_int32 edgeclean, l_int32 lr_add, l_int32 tb_add, l_float32 maxwiden, const char *debugfile, BOX **pcropbox );
LEPT_DLL extern PIX * pixCleanImage ( PIX *pixs, l_int32 contrast, l_int32 rotation, l_int32 scale, l_int32 opensize );
LEPT_DLL extern BOX * pixFindPageForeground ( PIX *pixs, l_int32 threshold, l_int32 mindist, l_int32 erasedist, l_int32 showmorph, PIXAC *pixac );
LEPT_DLL extern l_ok pixSplitIntoCharacters ( PIX *pixs, l_int32 minw, l_int32 minh, BOXA **pboxa, PIXA **ppixa, PIX **ppixdebug );
Expand Down Expand Up @@ -1405,7 +1405,7 @@ LEPT_DLL extern l_ok partifyPixac ( PIXAC *pixac, l_int32 nparts, const char *ou
LEPT_DLL extern BOXA * boxaGetWhiteblocks ( BOXA *boxas, BOX *box, l_int32 sortflag, l_int32 maxboxes, l_float32 maxoverlap, l_int32 maxperim, l_float32 fract, l_int32 maxpops );
LEPT_DLL extern BOXA * boxaPruneSortedOnOverlap ( BOXA *boxas, l_float32 maxoverlap );
LEPT_DLL extern l_ok compressFilesToPdf ( SARRAY *sa, l_int32 onebit, l_int32 savecolor, l_float32 scalefactor, l_int32 quality, const char *title, const char *fileout );
LEPT_DLL extern l_ok cropFilesToPdf ( SARRAY *sa, l_int32 lr_clear, l_int32 tb_clear, l_int32 edgeclean, l_int32 lr_add, l_int32 tb_add, const char *title, const char *fileout );
LEPT_DLL extern l_ok cropFilesToPdf ( SARRAY *sa, l_int32 lr_clear, l_int32 tb_clear, l_int32 edgeclean, l_int32 lr_add, l_int32 tb_add, l_float32 maxwiden, const char *title, const char *fileout );
LEPT_DLL extern l_ok cleanTo1bppFilesToPdf ( SARRAY *sa, l_int32 res, l_int32 contrast, l_int32 rotation, l_int32 opensize, const char *title, const char *fileout );
LEPT_DLL extern l_ok convertFilesToPdf ( const char *dirname, const char *substr, l_int32 res, l_float32 scalefactor, l_int32 type, l_int32 quality, const char *title, const char *fileout );
LEPT_DLL extern l_ok saConvertFilesToPdf ( SARRAY *sa, l_int32 res, l_float32 scalefactor, l_int32 type, l_int32 quality, const char *title, const char *fileout );
Expand Down
16 changes: 11 additions & 5 deletions src/pageseg.c
Original file line number Diff line number Diff line change
Expand Up @@ -538,6 +538,7 @@ PIX *pix1, *pix2, *pix3, *pixd;
* 15 is maximally aggressive
* \param[in] lr_add full res expansion of crop box on left and right
* \param[in] tb_add full res expansion of crop box on top and bottom
* \param[in] maxwiden max fractional horizontal stretch allowed
* \param[in] *debugfile [optional] usually is NULL
* \param[out] *pcropbox [optional] crop box at full resolution
* \return cropped pix, or NULL on error
Expand All @@ -555,8 +556,9 @@ PIX *pix1, *pix2, *pix3, *pixd;
* (e) 2x expansion of the bounding box to full resolution.
* (f) Crops the binarized image to the bounding box.
* (g) Slightly thickens long horizontal lines.
* (h) Does anamorphic horizontal upscaling to better fill
* an 8.5 x 11 inch printed page.
* (h) Does anamorphic horizontal upscaling by %maxwiden
* to better fill an 8.5 x 11 inch printed page.
* Suggest not to exceed 1.15.
* Note that input parameters are given at full resolution and
* (a) - (c) are done at 2x reduction for efficiency.
* (2) The side clearing must not exceed 1/6 of the dimension on that side.
Expand All @@ -574,6 +576,7 @@ pixCropImage(PIX *pixs,
l_int32 edgeclean,
l_int32 lr_add,
l_int32 tb_add,
l_float32 maxwiden,
const char *debugfile,
BOX **pcropbox)
{
Expand Down Expand Up @@ -607,6 +610,9 @@ BOX *box1, *box2;
__func__, w / 6, h / 6);
return NULL;
}
if (maxwiden > 1.2)
L_WARNING("maxwiden = %f > 1.2; suggest between 1.0 and 1.15\n",
__func__, maxwiden);
pixa1 = (debugfile) ? pixaCreate(5) : NULL;
if (pixa1) pixaAddPix(pixa1, pixs, L_COPY);

Expand Down Expand Up @@ -664,11 +670,11 @@ BOX *box1, *box2;
pixDestroy(&pix3);

/* Widen the result to fit the standard page shape (8.5 x 11 inch).
* Do not stretch horizontally by more than 15%. */
* Do not stretch horizontally by more than %maxwiden. */
pixGetDimensions(pix2, &w, &h, NULL);
hscale = (l_float32)h / (1.2941f * (l_float32)w);
if (hscale > 1.0) {
hscale = L_MIN(hscale, 1.15);
if (hscale > 1.0 && maxwiden > 1.0) {
hscale = L_MIN(hscale, maxwiden);
pix3 = pixScale(pix2, hscale, 1.0);
} else {
pix3 = pixClone(pix2);
Expand Down
4 changes: 3 additions & 1 deletion src/pdfapp.c
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,7 @@ PIXAC *pixac1 = NULL;
* default = 0 (no removal);
* \param[in] lr_add full res expansion of crop box on left and right
* \param[in] tb_add full res expansion of crop box on top and bottom
* \param[in] maxwiden max fractional horizontal stretch allowed
* \param[in] title [optional] pdf title; can be null
* \param[in] fileout pdf file of all images
* \return 0 if OK, 1 on error
Expand All @@ -251,6 +252,7 @@ cropFilesToPdf(SARRAY *sa,
l_int32 edgeclean,
l_int32 lr_add,
l_int32 tb_add,
l_float32 maxwiden,
const char *title,
const char *fileout)
{
Expand Down Expand Up @@ -280,7 +282,7 @@ PIXAC *pixac1 = NULL;
fname = sarrayGetString(sa, i, L_NOCOPY);
pixs = pixRead(fname);
pix1 = pixCropImage(pixs, lr_clear, tb_clear, edgeclean,
lr_add, tb_add, NULL, NULL);
lr_add, tb_add, maxwiden, NULL, NULL);
if (n <= maxsmallset)
pixaAddPix(pixa1, pix1, L_INSERT);
else
Expand Down

0 comments on commit 2a541fd

Please sign in to comment.