From 01aebf21f6a7f9a5f16771ce8ed299b6178a1450 Mon Sep 17 00:00:00 2001 From: danblooomberg Date: Fri, 22 Sep 2023 16:26:33 -0700 Subject: [PATCH] Add parameter in compresspdf to control the resolution of the wrapped image * The page image is assumed to have a maximum dimension of either about 1650 or 3300 pixels, depending on whether this parameter is 150 ppi (default) or 300 ppi. --- prog/compresspdf.c | 65 ++++++++++++++++++++++++++++++---------------- 1 file changed, 42 insertions(+), 23 deletions(-) diff --git a/prog/compresspdf.c b/prog/compresspdf.c index 3647ae59b..5557dea96 100644 --- a/prog/compresspdf.c +++ b/prog/compresspdf.c @@ -49,12 +49,20 @@ * in high resolution, 1 bpp tiffg4 encoded images in the pdf. * * Syntax: - * compresspdf basedir scalefactor onebit savecolor quality title fileout + * compresspdf basedir imres scalefactor onebit savecolor + * quality title fileout * * The %basedir is a directory where the input pdf files are located. * The program will operate on every file in this directory with * the ".pdf" extension. * + * The %imres is the desired resolution of the rasterization from the + * pdf page to a page image. Two choices are allowed: 150 and 300 ppi. + * Use 0 for default (150 ppi). The actual resolution used by the + * renderer depends on the page image size and is computed internally. + * We limit the maximum resolution to 300 ppi because these images are + * RGB uncompressed and are large: 6.3 MB for 150 ppi and 25 MB for 300 ppi. + * * The %scalefactor is typically used to downscale the image to * reduce the size of the generated pdf. It should not affect the * pdf display otherwise. For normal text on images scanned at 300 ppi, @@ -122,29 +130,38 @@ l_int32 main(int argc, { char buf[256]; char *basedir, *fname, *tail, *basename, *imagedir, *title, *fileout; -l_int32 render_res, onebit, savecolor, quality, i, n, ret; +l_int32 imres, render_res, onebit, savecolor, quality, i, n, ret; l_float32 scalefactor; SARRAY *sa; - if (argc != 8) + if (argc != 9) return ERROR_INT( - "Syntax: compresspdf basedir scalefactor onebit savecolor quality " - "title fileout", __func__, 1); + "Syntax: compresspdf basedir imres scalefactor " + "onebit savecolor quality title fileout", __func__, 1); basedir = argv[1]; - scalefactor = atof(argv[2]); - onebit = atoi(argv[3]); /* set to 1 to enforce 1 bpp tiffg4 encoding */ - savecolor = atoi(argv[4]); /* if onebit == 1, set to 1 to save color */ - quality = atoi(argv[5]); /* jpeg quality */ - title = argv[6]; - fileout = argv[7]; + imres = atoi(argv[2]); + scalefactor = atof(argv[3]); + onebit = atoi(argv[4]); /* set to 1 to enforce 1 bpp tiffg4 encoding */ + savecolor = atoi(argv[5]); /* if onebit == 1, set to 1 to save color */ + quality = atoi(argv[6]); /* jpeg quality */ + title = argv[7]; + fileout = argv[8]; setLeptDebugOK(1); + if (imres == 0) imres = 150; /* default value */ + if (imres != 150 && imres != 300) { + L_WARNING("imres = %d must be 150 or 300; setting to 150\n", + __func__, imres); + imres = 150; + } if (quality <= 0) quality = 50; /* default value */ if (quality < 25) { - L_WARNING("quality %d too low; setting to 25\n", __func__, quality); + L_WARNING("quality = %d is too low; setting to 25\n", + __func__, quality); quality = 25; } if (quality > 95) { - L_WARNING("quality %d too high; setting to 95\n", __func__, quality); + L_WARNING("quality = %d is too high; setting to 95\n", + __func__, quality); quality = 95; } @@ -162,22 +179,24 @@ SARRAY *sa; sarrayWriteStderr(sa); n = sarrayGetCount(sa); - /* Figure out the resolution to use with the image renderer to - * generate page images with a resolution of not more than 150 ppi. - * These would have a maximum dimension of about 1650 pixels. - * Use the first pdf file in the directory. */ + /* Use the first pdf file in the directory to estimate the + * resolution to use with the image renderer that will generate + * page images with a resolution of either about 150 ppi + * (which is the default) or about 300 ppi for special cases. + * At 150 and 300 ppi, the page images have maximum dimensions + * of about 1650 and 3300 pixels, respectively. These are the + * uncompressed images, written to file, from which the compressed + * images will be generated. */ fname = sarrayGetString(sa, 0, L_NOCOPY); getPdfRendererResolution(fname, imagedir, &render_res); /* for 300 ppi */ - render_res /= 2; /* for 150 ppi */ + if (imres == 150) render_res /= 2; /* Rasterize: - * pdftoppm -r 150 fname outroot + * pdftoppm -r 150 fname outroot [max dimension about 1650 pixels] + * pdftoppm -r 300 fname outroot [max dimension about 3300 pixels] * Use of pdftoppm: * This works on all pdf pages, both wrapped images and pages that - * were made orthographically. We generate images that are no - * larger than about 1650 pixels in the maximum direction. This - * makes uncompressed 6 MB files and is very fast. If you want - * higher resolution 1 bpp output, use cleanpdf.c. */ + * were made orthographically. */ for (i = 0; i < n; i++) { fname = sarrayGetString(sa, i, L_NOCOPY); splitPathAtDirectory(fname, NULL, &tail);