Skip to content

Commit

Permalink
Add an arg for the output filename to cleanpdf.c
Browse files Browse the repository at this point in the history
  • Loading branch information
DanBloomberg committed Jun 23, 2020
1 parent 5ba34b1 commit 8466a46
Showing 1 changed file with 23 additions and 11 deletions.
34 changes: 23 additions & 11 deletions prog/cleanpdf.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
* It will also take as input clean, orthographically-generated pdfs,
* and concatenate them into a single pdf file of images.
*
* Syntax: cleanpdf basedir threshold resolution [rotation]
* Syntax: cleanpdf basedir threshold resolution outfile [rotation]
*
* The basedir is a directory where the input pdf files are located.
* The program will operate on every file in this directory with
Expand All @@ -50,6 +50,8 @@
* The resolution should be the scanned resolution. This is typically
* 300 ppi, which for an 8.5 x 11 page would be 2550 x 3300 pixels.
*
* The pdf output is written to outfile; suggest it has a '.pdf' extension.
*
* The optional rotation is an integer:
* 0 no rotation
* 1 90 degrees cw
Expand All @@ -58,6 +60,14 @@
*
* Whenever possible, the images have been deskewed.
*
* The file-handling functions in leptonica do not support filenames
* that have spaces. To use cleanpdf in linux with such filenames,
* substitute an ascii character for the spaces; e.g., '^'.
* char *newstr = stringReplaceEachSubstr(str, " ", "^", NULL);
* Then run cleanpdf on the file(s). Note that you can have an
* output filename with spaces by using single quotes; e.g.,
* cleanpdf dir thresh res 'filename with spaces'
*
* N.B. This requires pdfimages. For non-unix systems, this requires
* installation of the cygwin Poppler package:
* https://cygwin.com/cgi-bin2/package-cat.cgi?file=x86/poppler/
Expand Down Expand Up @@ -85,21 +95,22 @@ l_int32 main(int argc,
char **argv)
{
char buf[256];
char *basedir, *fname, *tail, *basename, *imagedir;
char *basedir, *fname, *tail, *basename, *imagedir, *outfile;
l_int32 thresh, res, rotation, i, n, ret;
PIX *pixs, *pix1, *pix2, *pix3, *pix4, *pix5;
SARRAY *sa;
static char mainName[] = "cleanpdf";

if (argc != 4 && argc != 5)
if (argc != 5 && argc != 6)
return ERROR_INT(
"Syntax: cleanpdf basedir threshold resolution [rotation]",
"Syntax: cleanpdf basedir threshold resolution outfile [rotation]",
mainName, 1);
basedir = argv[1];
thresh = atoi(argv[2]);
res = atoi(argv[3]);
if (argc == 5)
rotation = atoi(argv[4]);
outfile = argv[4];
if (argc == 6)
rotation = atoi(argv[5]);
else
rotation = 0;
if (rotation < 0 || rotation > 3) {
Expand Down Expand Up @@ -132,7 +143,7 @@ static char mainName[] = "cleanpdf";
fname, imagedir, basename);
lept_free(tail);
lept_free(basename);
lept_stderr("%s\n", buf);
fprintf(stderr, "%s\n", buf);
ret = system(buf); /* pdfimages -j */
}
sarrayDestroy(&sa);
Expand All @@ -158,7 +169,7 @@ static char mainName[] = "cleanpdf";
splitPathAtDirectory(fname, NULL, &tail);
splitPathAtExtension(tail, &basename, NULL);
snprintf(buf, sizeof(buf), "%s/%s.tif", imagedir, basename);
lept_stderr("%s\n", buf);
fprintf(stderr, "%s\n", buf);
pixWrite(buf, pix5, IFF_TIFF_G4);
pixDestroy(&pixs);
pixDestroy(&pix1);
Expand All @@ -174,10 +185,11 @@ static char mainName[] = "cleanpdf";

#if 1
/* Generate the pdf */
lept_stderr("Write output to /tmp/output.pdf\n");
convertFilesToPdf(imagedir, "tif", res, 1.0, L_G4_ENCODE, 0, NULL,
"/tmp/output.pdf");
fprintf(stderr, "Write output to %s\n", outfile);
convertFilesToPdf(imagedir, "tif", res, 1.0, L_G4_ENCODE, 0, NULL, outfile);
#endif

return 0;
}


0 comments on commit 8466a46

Please sign in to comment.