From 4ddef37cef61f28e32d1cf5bb45027a6217f1aef Mon Sep 17 00:00:00 2001 From: Taka Date: Sun, 25 Apr 2021 06:12:58 +1000 Subject: [PATCH] Allow PDF's to be compressed. --- asl_rulebook2/webapp/static/SearchResult.js | 2 +- bin/{add_pdf_dests.py => prepare_pdf.py} | 49 ++++++++++++++++----- 2 files changed, 40 insertions(+), 11 deletions(-) rename bin/{add_pdf_dests.py => prepare_pdf.py} (57%) diff --git a/asl_rulebook2/webapp/static/SearchResult.js b/asl_rulebook2/webapp/static/SearchResult.js index 865bd54..6cf9758 100644 --- a/asl_rulebook2/webapp/static/SearchResult.js +++ b/asl_rulebook2/webapp/static/SearchResult.js @@ -215,7 +215,7 @@ gMainApp.component( "ruleid", { } ; }, // NOTE: This bit of HTML is sensitive to spaces :-/ - // NOTE: Unfortunately, PDF destinations can't have spaces (see add_pdf_dests.py), so we need to + // NOTE: Unfortunately, PDF destinations can't have spaces (see prepare_pdf.py), so we need to // do a bit of hackery to simulate them :-( We could allow spaces in the ruleid's, and transparently // convert them to underscores when we create the PDF destinations, but it's probably better // to force them to be defined with underscores (and convert them to spaces in the UI), so that diff --git a/bin/add_pdf_dests.py b/bin/prepare_pdf.py similarity index 57% rename from bin/add_pdf_dests.py rename to bin/prepare_pdf.py index 2bcb8f8..313ab47 100755 --- a/bin/add_pdf_dests.py +++ b/bin/prepare_pdf.py @@ -10,6 +10,17 @@ import click from asl_rulebook2.utils import TempFile +# NOTE: "screen" gives significant savings (~65%) but scanned PDF's become very blurry. The main MMP eASLRB +# is not too bad, but some images are also a bit unclear. "ebook" gives no savings for scanned PDF's, but +# the main MMP eASLRB is quite usable, and only slightly larger (43 MB vs. 35 MB), so we use that. +_COMPRESSION_CHOICES = [ + "screen", # 72 dpi + "ebook", # 150 dpi + "printer", # 300 dpi + "prepress", # 300 dpi, color preserving + "none" +] + # --------------------------------------------------------------------- @click.command() @@ -22,24 +33,42 @@ from asl_rulebook2.utils import TempFile @click.option( "--output","-o","output_fname", required=True, type=click.Path(dir_okay=False), help="Output PDF file." ) +@click.option( "--compression", type=click.Choice(_COMPRESSION_CHOICES), default="ebook", + help="Level of compression." +) @click.option( "--gs","gs_path", default="gs", help="Path to the Ghostscript executable." ) -def main( pdf_file, title, targets_fname, yoffset, output_fname, gs_path ): +def main( pdf_file, title, targets_fname, yoffset, output_fname, compression, gs_path ): """Add named destinations to a PDF file.""" # load the targets with open( targets_fname, "r" ) as fp: targets = json.load( fp ) - with TempFile( mode="w" ) as temp_file: + with TempFile(mode="w") as compressed_file, TempFile(mode="w") as pdfmarks_file: + + # compress the PDF + if compression and compression != "none": + print( "Compressing the PDF ({})...".format( compression ) ) + compressed_file.close( delete=False ) + args = [ gs_path, "-sDEVICE=pdfwrite", "-dNOPAUSE", "-dQUIET", "-dBATCH", + "-dPDFSETTINGS=/{}".format( compression ), + "-sOutputFile={}".format( compressed_file.name ), + pdf_file + ] + start_time = time.time() + subprocess.run( args, check=True ) + elapsed_time = time.time() - start_time + print( "- Elapsed time: {}".format( datetime.timedelta(seconds=int(elapsed_time)) ) ) + pdf_file = compressed_file.name # generate the pdfmarks print( "Generating the pdfmarks..." ) if title: - print( "[ /Title ({})".format( title ), file=temp_file ) + print( "[ /Title ({})".format( title ), file=pdfmarks_file ) else: - print( "[", file=temp_file ) - print( " /DOCINFO pdfmark", file=temp_file ) - print( file=temp_file ) + print( "[", file=pdfmarks_file ) + print( " /DOCINFO pdfmark", file=pdfmarks_file ) + print( file=pdfmarks_file ) for ruleid, target in targets.items(): xpos, ypos = target.get( "pos", ["null","null"] ) if isinstance( ypos, int ): @@ -50,9 +79,9 @@ def main( pdf_file, title, targets_fname, yoffset, output_fname, gs_path ): raise RuntimeError( "PDF destinations cannot have spaces." ) print( "[ /Dest /{} /Page {} /View [/XYZ {} {}] /DEST pdfmark".format( ruleid, target["page_no"], xpos, ypos - ), file=temp_file ) - print( file=temp_file ) - temp_file.close( delete=False ) + ), file=pdfmarks_file ) + print( file=pdfmarks_file ) + pdfmarks_file.close( delete=False ) # generate the pdfmark'ed document print( "Generating the pdfmark'ed document..." ) @@ -60,7 +89,7 @@ def main( pdf_file, title, targets_fname, yoffset, output_fname, gs_path ): args = [ gs_path, "-q", "-dBATCH", "-dNOPAUSE", "-sDEVICE=pdfwrite" ] args.extend( [ "-o", output_fname ] ) args.extend( [ "-f", pdf_file ] ) - args.append( temp_file.name ) + args.append( pdfmarks_file.name ) start_time = time.time() subprocess.run( args, check=True ) elapsed_time = time.time() - start_time