diff --git a/bin/dump_pdf.py b/asl_rulebook2/bin/dump_pdf.py similarity index 100% rename from bin/dump_pdf.py rename to asl_rulebook2/bin/dump_pdf.py diff --git a/bin/extract_pages.py b/asl_rulebook2/bin/extract_pages.py similarity index 100% rename from bin/extract_pages.py rename to asl_rulebook2/bin/extract_pages.py diff --git a/bin/fixup_mmp_pdf.py b/asl_rulebook2/bin/fixup_mmp_pdf.py similarity index 68% rename from bin/fixup_mmp_pdf.py rename to asl_rulebook2/bin/fixup_mmp_pdf.py index e23e5c9..091178f 100755 --- a/bin/fixup_mmp_pdf.py +++ b/asl_rulebook2/bin/fixup_mmp_pdf.py @@ -2,29 +2,26 @@ """ Fixup issues in the MMP eASLRB. """ import os -import math from pikepdf import Pdf, Page, OutlineItem, Encryption, make_page_destination import click +from asl_rulebook2.utils import log_msg_stderr + # --------------------------------------------------------------------- -def fixup_easlrb( fname, output_fname, optimize_web, rotate, log=None ): - """Fixup the eASLRB.""" +def fixup_mmp_pdf( fname, output_fname, optimize_web, rotate, log=None ): + """Fixup the MMP eASLRB PDF.""" def log_msg( msg_type, msg, *args, **kwargs ): if not log: return if isinstance( msg, list ): msg = "\n".join( msg ) - data = kwargs.pop( "data", None ) msg = msg.format( *args, **kwargs ) - log( msg_type, msg, data=data ) + log( msg_type, msg ) - def percentage( curr, total ): - return math.floor( 100 * float(curr) / float(total) ) - - # NOTE: It would be nice to use the targetes file to get the TOC entries and annotations + # NOTE: It would be nice to use the targets file to get the TOC entries and annotations # to point to the exact point on the page, but figuring out the text associated with each # annotiation is extremely messy (annotations are simply a rectangle on a page, so we need # to figure out which elements lie within that rectangle, and since things are not always @@ -32,24 +29,23 @@ def fixup_easlrb( fname, output_fname, optimize_web, rotate, log=None ): with Pdf.open( fname ) as pdf: - log_msg( "start", "Loaded PDF: {}".format( fname ), data=[ - ( "PDF version", pdf.pdf_version ), - ( "# pages", len(pdf.pages) ), - ] ) + log_msg( "start", "Loaded PDF: {}\n- PDF version = {}\n- #pages = {}".format( + fname, pdf.pdf_version, len(pdf.pages) ) + ) log_msg( None, "" ) # fixup bookmarks in the TOC - log_msg( "toc", "Fixing up the TOC..." ) + log_msg( "progress", "Fixing up the TOC..." ) def walk_toc( items, depth ): for item_no,item in enumerate(items): if item.destination[0].Type != "/Page" or item.destination[1] != "/Fit" \ or item.page_location is not None or item.page_location_kwargs != {}: - log_msg( "toc:warning", "Unexpected TOC item: {}/{}".format( depth, item_no ) ) + log_msg( "warning", "Unexpected TOC item: {}/{}".format( depth, item_no ) ) continue page = Page( item.destination[0] ) page_height = page.mediabox[3] bullet = "#" if depth <= 1 else "-" - log_msg( "toc:detail", " {}{} {} => p{}", + log_msg( "verbose", " {}{} {} => p{}", depth*" ", bullet, item.title, 1+page.index ) walk_toc( item.children, depth+1 ) @@ -60,16 +56,13 @@ def fixup_easlrb( fname, output_fname, optimize_web, rotate, log=None ): with pdf.open_outline() as outline: walk_toc( outline.root, 0 ) # NOTE: The TOC will be updated when we exit the context manager, and can take some time. - log_msg( "toc", "Installing the new TOC..." ) + log_msg( "progress", "Installing the new TOC..." ) log_msg( None, "" ) # fixup up each page - log_msg( "annoations", "Fixing up the content..." ) + log_msg( "progress", "Fixing up the content..." ) for page_no, raw_page in enumerate(pdf.pages): - log_msg( "annotations:progress", "- page {}", - 1+page_no, - data = { "percentage": percentage( page_no, len(pdf.pages) ) } - ) + log_msg( "verbose", "- page {}", 1+page_no ) if rotate: # force pages to be landscape (so that we don't get an h-scrollbar in Firefox # when we set the zoom to "fit width"). @@ -83,21 +76,20 @@ def fixup_easlrb( fname, output_fname, optimize_web, rotate, log=None ): dest = annot.get( "/Dest" ) if dest: page_no = Page( dest[0] ).index - log_msg( "annotations:detail", " - {} => p{}", + log_msg( "verbose", " - {} => p{}", repr(annot.Rect), 1+page_no ) annot.Dest = make_page_destination( pdf, page_no, "XYZ", top=page_height ) log_msg( None, "" ) # save the updated PDF - log_msg( "save", "Saving updated PDF: {}", output_fname ) + log_msg( "progress", "Saving the fixed-up PDF..." ) # NOTE: Setting a blank password will encrypt the file, but doesn't require the user to enter a password # when opening the file (but it will be marked as "SECURE" in the UI). enc = Encryption( owner="", user="" ) def save_progress( pct ): - log_msg( "save:progress", "- Saved {}%...", pct, - data = { "percentage": pct } - ) + if pct > 0 and pct % 10 == 0: + log_msg( "verbose", "- Saved {}%.", pct ) pdf.save( output_fname, encryption=enc, linearize=optimize_web, progress = save_progress ) @@ -107,9 +99,9 @@ def fixup_easlrb( fname, output_fname, optimize_web, rotate, log=None ): new_size = os.path.getsize( output_fname ) ratio = round( 100 * float(new_size) / float(old_size) ) - 100 if ratio == 0: - log_msg( "save", "The updated PDF file is about the same size as the original file." ) + log_msg( "verbose", "The updated PDF file is about the same size as the original file." ) else: - log_msg( "save", "The updated PDF file is about {}% {} than the original file.", + log_msg( "verbose", "The updated PDF file is about {}% {} than the original file.", abs(ratio), "larger" if ratio > 0 else "smaller" ) @@ -120,23 +112,18 @@ def fixup_easlrb( fname, output_fname, optimize_web, rotate, log=None ): @click.option( "--output","-o", required=True, type=click.Path(dir_okay=False), help="Where to save the fixed-up PDF." ) @click.option( "--optimize-web", is_flag=True, default=False, help="Optimize for use in a browser (larger file)." ) @click.option( "--rotate", is_flag=True, default=False, help="Rotate landscape pages." ) -@click.option( "--verbose","-v", is_flag=True, default=False, help="Verbose output." ) @click.option( "--progress","-p", is_flag=True, default=False, help="Log progress." ) -def main( pdf_file, output, optimize_web, rotate, verbose, progress ): +@click.option( "--verbose","-v", is_flag=True, default=False, help="Verbose output." ) +def main( pdf_file, output, optimize_web, rotate, progress, verbose ): """Fixup the eASLRB.""" - def log_msg( msg_type, msg, data=None ): - if not msg_type: - msg_type = "" - if msg_type.endswith( ":detail" ) and not verbose: + def log_msg( msg_type, msg ): + if msg_type in ("progress", "start", None) and not progress: return - if msg_type.endswith( ":progress" ) and not progress: + if msg_type == "verbose" and not verbose: return - print( msg ) - if msg_type == "start": - for k, v in data: - print( "- {:<12} {}".format( k+":", v ) ) - fixup_easlrb( pdf_file, output, optimize_web, rotate, log=log_msg ) + log_msg_stderr( msg_type, msg ) + fixup_mmp_pdf( pdf_file, output, optimize_web, rotate, log=log_msg ) if __name__ == "__main__": main() #pylint: disable=no-value-for-parameter diff --git a/bin/prepare_pdf.py b/asl_rulebook2/bin/prepare_pdf.py similarity index 74% rename from bin/prepare_pdf.py rename to asl_rulebook2/bin/prepare_pdf.py index 313ab47..071767d 100755 --- a/bin/prepare_pdf.py +++ b/asl_rulebook2/bin/prepare_pdf.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -""" Add named destinations to a PDF file. """ +""" Prepare the MMP eASLRB PDF. """ import subprocess import json @@ -8,7 +8,7 @@ import datetime import click -from asl_rulebook2.utils import TempFile +from asl_rulebook2.utils import TempFile, log_msg_stderr # NOTE: "screen" gives significant savings (~65%) but scanned PDF's become very blurry. The main MMP eASLRB # is not too bad, but some images are also a bit unclear. "ebook" gives no savings for scanned PDF's, but @@ -23,22 +23,8 @@ _COMPRESSION_CHOICES = [ # --------------------------------------------------------------------- -@click.command() -@click.argument( "pdf_file", nargs=1, type=click.Path(exists=True,dir_okay=False) ) -@click.option( "--title", help="Document title." ) -@click.option( "--targets","-t","targets_fname", required=True, type=click.Path(dir_okay=False), - help="Target definition file." -) -@click.option( "--yoffset", default=5, help="Offset to add to y co-ordinates." ) -@click.option( "--output","-o","output_fname", required=True, type=click.Path(dir_okay=False), - help="Output PDF file." -) -@click.option( "--compression", type=click.Choice(_COMPRESSION_CHOICES), default="ebook", - help="Level of compression." -) -@click.option( "--gs","gs_path", default="gs", help="Path to the Ghostscript executable." ) -def main( pdf_file, title, targets_fname, yoffset, output_fname, compression, gs_path ): - """Add named destinations to a PDF file.""" +def prepare_pdf( pdf_file, title, targets_fname, yoffset, output_fname, compression, gs_path, log_msg ): + """Prepare the MMP eASLRB PDF.""" # load the targets with open( targets_fname, "r" ) as fp: @@ -48,7 +34,7 @@ def main( pdf_file, title, targets_fname, yoffset, output_fname, compression, gs # compress the PDF if compression and compression != "none": - print( "Compressing the PDF ({})...".format( compression ) ) + log_msg( "progress", "Compressing the PDF ({})...".format( compression ) ) compressed_file.close( delete=False ) args = [ gs_path, "-sDEVICE=pdfwrite", "-dNOPAUSE", "-dQUIET", "-dBATCH", "-dPDFSETTINGS=/{}".format( compression ), @@ -58,11 +44,13 @@ def main( pdf_file, title, targets_fname, yoffset, output_fname, compression, gs start_time = time.time() subprocess.run( args, check=True ) elapsed_time = time.time() - start_time - print( "- Elapsed time: {}".format( datetime.timedelta(seconds=int(elapsed_time)) ) ) + log_msg( "timestamp", "- Elapsed time: {}".format( + datetime.timedelta( seconds=int(elapsed_time) ) ) + ) pdf_file = compressed_file.name # generate the pdfmarks - print( "Generating the pdfmarks..." ) + log_msg( "progress", "Generating the pdfmarks..." ) if title: print( "[ /Title ({})".format( title ), file=pdfmarks_file ) else: @@ -84,8 +72,7 @@ def main( pdf_file, title, targets_fname, yoffset, output_fname, compression, gs pdfmarks_file.close( delete=False ) # generate the pdfmark'ed document - print( "Generating the pdfmark'ed document..." ) - print( "- {} => {}".format( pdf_file, output_fname ) ) + log_msg( "progress", "Adding targets to the PDF..." ) args = [ gs_path, "-q", "-dBATCH", "-dNOPAUSE", "-sDEVICE=pdfwrite" ] args.extend( [ "-o", output_fname ] ) args.extend( [ "-f", pdf_file ] ) @@ -93,9 +80,44 @@ def main( pdf_file, title, targets_fname, yoffset, output_fname, compression, gs start_time = time.time() subprocess.run( args, check=True ) elapsed_time = time.time() - start_time - print( "- Elapsed time: {}".format( datetime.timedelta(seconds=int(elapsed_time)) ) ) + log_msg( "timestamp", "- Elapsed time: {}".format( + datetime.timedelta( seconds=int(elapsed_time) ) ) + ) # --------------------------------------------------------------------- +@click.command() +@click.argument( "pdf_file", nargs=1, type=click.Path(exists=True,dir_okay=False) ) +@click.option( "--title", help="Document title." ) +@click.option( "--targets","-t","targets_fname", required=True, type=click.Path(dir_okay=False), + help="Target definition file." +) +@click.option( "--yoffset", default=5, help="Offset to add to y co-ordinates." ) +@click.option( "--output","-o","output_fname", required=True, type=click.Path(dir_okay=False), + help="Output PDF file." +) +@click.option( "--compression", type=click.Choice(_COMPRESSION_CHOICES), default="ebook", + help="Level of compression." +) +@click.option( "--gs","gs_path", default="gs", help="Path to the Ghostscript executable." ) +@click.option( "--progress","-p", is_flag=True, default=False, help="Log progress." ) +def main( pdf_file, title, targets_fname, yoffset, output_fname, compression, gs_path, progress ): + """Prepare the MMP eASLRB PDF.""" + + # initialize + def log_msg( msg_type, msg ): + if msg_type in ("progress", "start", "timestamp", None) and not progress: + return + log_msg_stderr( msg_type, msg ) + + # prepare the PDF + prepare_pdf( + pdf_file, title, + targets_fname, yoffset, + output_fname, compression, + gs_path, + log_msg + ) + if __name__ == "__main__": main() #pylint: disable=no-value-for-parameter diff --git a/asl_rulebook2/extract/all.py b/asl_rulebook2/extract/all.py index 581c905..5cb4ab1 100755 --- a/asl_rulebook2/extract/all.py +++ b/asl_rulebook2/extract/all.py @@ -8,10 +8,11 @@ import importlib import click -from asl_rulebook2.pdf import PdfDoc -from asl_rulebook2.extract.base import ExtractBase, log_msg_stderr +from asl_rulebook2.extract.base import ExtractBase from asl_rulebook2.extract.index import ExtractIndex from asl_rulebook2.extract.content import ExtractContent +from asl_rulebook2.pdf import PdfDoc +from asl_rulebook2.utils import log_msg_stderr # --------------------------------------------------------------------- @@ -34,13 +35,13 @@ class ExtractAll( ExtractBase ): default_args.update( getattr( mod, "_DEFAULT_ARGS" ) ) # extract the index - self.log_msg( "progress", "\nExtracting the index..." ) + self.log_msg( "status", "\nExtracting the index..." ) args = ExtractBase.parse_args( self._args, default_args ) self.extract_index = ExtractIndex( args, self._log ) self.extract_index.extract_index( pdf ) # extract the content - self.log_msg( "progress", "\nExtracting the content..." ) + self.log_msg( "status", "\nExtracting the content..." ) args = ExtractBase.parse_args( self._args, default_args ) self.extract_content = ExtractContent( args, self._log ) self.extract_content.extract_content( pdf ) @@ -125,13 +126,16 @@ class ExtractAll( ExtractBase ): ) @click.option( "--save-index","save_index_fname", required=True, help="Where to save the extracted index." ) @click.option( "--save-targets","save_targets_fname", required=True, help="Where to save the extracted targets." ) +@click.option( "--save-chapters","save_chapters_fname", required=True, help="Where to save the extracted chaopters." ) @click.option( "--save-footnotes","save_footnotes_fname", required=True, help="Where to save the extracted footnotes." ) -def main( pdf_file, args, progress, output_fmt, save_index_fname, save_targets_fname, save_footnotes_fname ): +def main( pdf_file, args, progress, output_fmt, + save_index_fname, save_targets_fname, save_chapters_fname, save_footnotes_fname +): """Extract everything we need from the MMP eASLRB.""" # extract everything def log_msg( msg_type, msg ): - if msg_type == "progress" and not progress: + if msg_type in ("status", "progress") and not progress: return log_msg_stderr( msg_type, msg ) extract = ExtractAll( args, log_msg ) @@ -142,9 +146,10 @@ def main( pdf_file, args, progress, output_fmt, save_index_fname, save_targets_f # save the results with open( save_index_fname, "w", encoding="utf-8" ) as index_out, \ open( save_targets_fname, "w", encoding="utf-8" ) as targets_out, \ + open( save_chapters_fname, "w", encoding="utf-8" ) as chapters_out, \ open( save_footnotes_fname, "w", encoding="utf-8" ) as footnotes_out: getattr( extract.extract_index, "save_as_"+output_fmt )( index_out ) - getattr( extract.extract_content, "save_as_"+output_fmt )( targets_out, footnotes_out ) + getattr( extract.extract_content, "save_as_"+output_fmt )( targets_out, chapters_out, footnotes_out ) if __name__ == "__main__": main() #pylint: disable=no-value-for-parameter diff --git a/asl_rulebook2/extract/base.py b/asl_rulebook2/extract/base.py index a09ce0c..be5a19b 100644 --- a/asl_rulebook2/extract/base.py +++ b/asl_rulebook2/extract/base.py @@ -1,9 +1,5 @@ """ Base class for the extraction classes. """ -import sys - -import click - # --------------------------------------------------------------------- class ExtractBase: @@ -50,11 +46,3 @@ class ExtractBase: return msg = msg.format( *args, **kwargs ) self._log( msg_type, msg ) - -# --------------------------------------------------------------------- - -def log_msg_stderr( msg_type, msg ): - """Log a message to stderr.""" - if msg_type == "warning": - msg = click.style( "WARNING: {}".format( msg ), fg="yellow" ) - click.echo( msg, file=sys.stderr ) diff --git a/asl_rulebook2/extract/content.py b/asl_rulebook2/extract/content.py index 4305264..a809314 100755 --- a/asl_rulebook2/extract/content.py +++ b/asl_rulebook2/extract/content.py @@ -9,9 +9,9 @@ import math import click from pdfminer.layout import LTChar -from asl_rulebook2.extract.base import ExtractBase, log_msg_stderr +from asl_rulebook2.extract.base import ExtractBase from asl_rulebook2.pdf import PdfDoc, PageIterator, PageElemIterator -from asl_rulebook2.utils import parse_page_numbers, fixup_text, append_text, remove_trailing, jsonval +from asl_rulebook2.utils import parse_page_numbers, fixup_text, append_text, remove_trailing, jsonval, log_msg_stderr # NOTE: Characters are laid out individually on the page, and we generally want to process them top-to-bottom, # left-to-right, but in some cases, alignment is messed up (e.g. the bounding boxes don't line up properly @@ -104,7 +104,7 @@ class ExtractContent( ExtractBase ): self._curr_pageid = "{}{}".format( # nb: this is the ASL page# (e.g. "A42"), not the PDF page# self._curr_chapter, curr_chapter_pageno ) - self.log_msg( "progress", "- Processing page {} ({})...", page_no, self._curr_pageid ) + self.log_msg( "progress", "- Analyzing page {} ({}).", page_no, self._curr_pageid ) # process each element on the page curr_caption = None diff --git a/asl_rulebook2/extract/index.py b/asl_rulebook2/extract/index.py index d301a2e..e605fda 100755 --- a/asl_rulebook2/extract/index.py +++ b/asl_rulebook2/extract/index.py @@ -8,9 +8,9 @@ import re import click from pdfminer.layout import LTChar -from asl_rulebook2.extract.base import ExtractBase, log_msg_stderr +from asl_rulebook2.extract.base import ExtractBase from asl_rulebook2.pdf import PdfDoc, PageIterator, PageElemIterator -from asl_rulebook2.utils import parse_page_numbers, fixup_text, extract_parens_content, jsonval +from asl_rulebook2.utils import parse_page_numbers, fixup_text, extract_parens_content, jsonval, log_msg_stderr # --------------------------------------------------------------------- @@ -49,7 +49,7 @@ class ExtractIndex( ExtractBase ): if page_no not in page_nos: self.log_msg( "progress", "- Skipping page {}.", page_no ) continue - self.log_msg( "progress", "- Processing page {}...", page_no ) + self.log_msg( "progress", "- Analyzing page {}.", page_no ) # process each element on the page self._prev_y0 = 99999 diff --git a/asl_rulebook2/tests/test_extract.py b/asl_rulebook2/tests/test_extract.py index c17a897..808a1c0 100644 --- a/asl_rulebook2/tests/test_extract.py +++ b/asl_rulebook2/tests/test_extract.py @@ -10,6 +10,7 @@ from asl_rulebook2.extract.index import ExtractIndex from asl_rulebook2.extract.content import ExtractContent from asl_rulebook2.extract.all import ExtractAll from asl_rulebook2.tests import pytest_options +from asl_rulebook2.tests.utils import for_each_easlrb_version # --------------------------------------------------------------------- @@ -34,7 +35,7 @@ def test_extract_index(): assert open( fname, "r", encoding="utf-8" ).read() == buf # run the test - _for_each_version( do_test ) + for_each_easlrb_version( do_test ) # --------------------------------------------------------------------- @@ -65,7 +66,7 @@ def test_extract_content(): assert open( fname2, "r", encoding="utf-8" ).read() == footnotes_buf # run the test - _for_each_version( do_test ) + for_each_easlrb_version( do_test ) # --------------------------------------------------------------------- @@ -101,21 +102,10 @@ def test_extract_all(): assert open( fname2, "r", encoding="utf-8" ).read() == footnotes_buf # run the test - _for_each_version( do_test ) + for_each_easlrb_version( do_test ) # --------------------------------------------------------------------- -def _for_each_version( func ): - """Run tests for each version of the eASLRB.""" - base_dir = pytest_options.easlrb_path - ncalls = 0 - for name in os.listdir( base_dir ): - dname = os.path.join( base_dir, name ) - if os.path.isfile( os.path.join( dname, "eASLRB.pdf" ) ): - func( dname ) - ncalls += 1 - assert ncalls > 0 - def _check_log_msg( msg_type, msg ): """Check a log message.""" assert msg_type not in ( "warning", "error" ), \ diff --git a/asl_rulebook2/tests/utils.py b/asl_rulebook2/tests/utils.py new file mode 100644 index 0000000..2ec40b6 --- /dev/null +++ b/asl_rulebook2/tests/utils.py @@ -0,0 +1,19 @@ +""" Helper utilities. """ + +import os + +from asl_rulebook2.tests import pytest_options + +# --------------------------------------------------------------------- + +def for_each_easlrb_version( func ): + """Run tests for each version of the eASLRB.""" + assert pytest_options.easlrb_path + base_dir = pytest_options.easlrb_path + ncalls = 0 + for name in os.listdir( base_dir ): + dname = os.path.join( base_dir, name ) + if os.path.isfile( os.path.join( dname, "eASLRB.pdf" ) ): + func( dname ) + ncalls += 1 + assert ncalls > 0 diff --git a/asl_rulebook2/utils.py b/asl_rulebook2/utils.py index 5c9db95..d946605 100644 --- a/asl_rulebook2/utils.py +++ b/asl_rulebook2/utils.py @@ -1,5 +1,6 @@ """ Miscellaneous utilities. """ +import sys import os import pathlib import tempfile @@ -8,6 +9,8 @@ import math from io import StringIO from html.parser import HTMLParser +import click + # --------------------------------------------------------------------- class TempFile: @@ -160,6 +163,14 @@ def jsonval( val ): assert False, "Unknown JSON data type: {}".format( type(val) ) return '"???"' +def log_msg_stderr( msg_type, msg ): + """Log a message to stderr.""" + if msg_type == "warning": + msg = click.style( "WARNING: {}".format( msg ), fg="yellow" ) + elif msg_type == "error": + msg = click.style( "ERROR: {}".format( msg ), fg="red" ) + click.echo( msg, file=sys.stderr ) + def change_extn( fname, extn ): """Change a filename's extension.""" return pathlib.Path( fname ).with_suffix( extn ) diff --git a/asl_rulebook2/webapp/__init__.py b/asl_rulebook2/webapp/__init__.py index 40004ab..09054ed 100644 --- a/asl_rulebook2/webapp/__init__.py +++ b/asl_rulebook2/webapp/__init__.py @@ -76,6 +76,7 @@ import asl_rulebook2.webapp.startup #pylint: disable=wrong-import-position,cycli import asl_rulebook2.webapp.content #pylint: disable=wrong-import-position,cyclic-import import asl_rulebook2.webapp.search #pylint: disable=wrong-import-position,cyclic-import import asl_rulebook2.webapp.rule_info #pylint: disable=wrong-import-position,cyclic-import +import asl_rulebook2.webapp.prepare #pylint: disable=wrong-import-position,cyclic-import from asl_rulebook2.webapp import globvars #pylint: disable=wrong-import-position,cyclic-import app.before_request( globvars.on_request ) diff --git a/asl_rulebook2/webapp/globvars.py b/asl_rulebook2/webapp/globvars.py index 68717ee..5aa8c13 100644 --- a/asl_rulebook2/webapp/globvars.py +++ b/asl_rulebook2/webapp/globvars.py @@ -9,6 +9,8 @@ from asl_rulebook2.webapp.config.constants import APP_NAME, APP_VERSION cleanup_handlers = [] +socketio_server = None + # --------------------------------------------------------------------- _init_lock = threading.Lock() diff --git a/asl_rulebook2/webapp/main.py b/asl_rulebook2/webapp/main.py index 4914187..cdffe88 100644 --- a/asl_rulebook2/webapp/main.py +++ b/asl_rulebook2/webapp/main.py @@ -8,17 +8,27 @@ import logging from flask import render_template, jsonify, abort from asl_rulebook2.webapp import app, globvars, shutdown_event -from asl_rulebook2.webapp.utils import parse_int +from asl_rulebook2.webapp.utils import parse_int, get_gs_path # --------------------------------------------------------------------- @app.route( "/" ) def main(): """Return the main page.""" - from asl_rulebook2.webapp.asop import user_css_url - return render_template( "index.html", - ASOP_CSS_URL = user_css_url - ) + if app.config.get( "DATA_DIR" ): + # return the main page + from asl_rulebook2.webapp.asop import user_css_url + return render_template( "index.html", + ASOP_CSS_URL = user_css_url + ) + else: + # NOTE: If a data directory has not been configured, this is probably the first time the user + # has run the application, so we show the page that explains how to set things up. + # NOTE: Check for Ghostscript before we start. + args = {} + if get_gs_path(): + args["HAVE_GHOSTSCRIPT"] = 1 + return render_template( "prepare.html", **args ) # --------------------------------------------------------------------- diff --git a/asl_rulebook2/webapp/prepare.py b/asl_rulebook2/webapp/prepare.py new file mode 100644 index 0000000..222df8f --- /dev/null +++ b/asl_rulebook2/webapp/prepare.py @@ -0,0 +1,214 @@ +""" Analyze the MMP eASLRB PDF and prepare the data files. """ + +import threading +import zipfile +import io +import time +import base64 +import traceback +import logging + +from flask import request, send_file, abort, url_for + +from asl_rulebook2.extract.all import ExtractAll +from asl_rulebook2.bin.prepare_pdf import prepare_pdf +from asl_rulebook2.bin.fixup_mmp_pdf import fixup_mmp_pdf +from asl_rulebook2.pdf import PdfDoc +from asl_rulebook2.utils import TempFile +from asl_rulebook2.webapp import app, globvars +from asl_rulebook2.webapp.utils import get_gs_path + +_zip_data_download = None + +_logger = logging.getLogger( "prepare" ) + +# --------------------------------------------------------------------- + +@app.route( "/prepare", methods=["POST"] ) +def prepare_data_files(): + """Prepare the data files.""" + + # initialize + args = dict( request.json ) + download_url = url_for( "download_prepared_data" ) + + # initialize the socketio server + sio = globvars.socketio_server + if not sio: + raise RuntimeError( "The socketio server has not been started." ) + @sio.on( "start" ) + def on_start( data ): #pylint: disable=unused-variable,unused-argument + # start the worker thread that prepares the data files + # NOTE: We don't do this when the POST request comes in, but wait until the client + # tells us it's ready (otherwise, it might miss the first event or two). + def worker(): + try: + _do_prepare_data_files( args, download_url ) + except Exception as ex: #pylint: disable=broad-except + _logger.error( "PREPARE ERROR: %s\n%s", ex, traceback.format_exc() ) + globvars.socketio_server.emit( "error", str(ex) ) + threading.Thread( target=worker, daemon=True ).start() + + return "ok" + +# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +def _do_prepare_data_files( args, download_url ): + + # initialize + sio = globvars.socketio_server + pdf_data = args.get( "pdfData" ) + if not pdf_data: + # no data was sent - this is a test of logging progress messages. + del args["pdfData"] + _test_progress( **args ) + return + pdf_data = base64.b64decode( pdf_data ) + + def on_done( zip_data ): + global _zip_data_download + _zip_data_download = zip_data + sio.emit( "done", download_url ) + + # check if we should just return a pre-prepared ZIP file (for testing porpoises) + fname = app.config.get( "PREPARED_ZIP" ) + if fname: + with open( fname, "rb" ) as fp: + on_done( fp.read() ) + return + + with TempFile() as input_file, TempFile() as prepared_file: + + # save the PDF file data + input_file.write( pdf_data ) + input_file.close( delete=False ) + _logger.info( "Saved PDF file (#bytes=%d): %s", len(pdf_data), input_file.name ) + + # initialize logging + msg_types = set() + def log_msg( msg_type, msg ): + msg = msg.lstrip() + if msg_type == "status": + _logger.info( "[STATUS]: %s", msg ) + elif msg_type == "warning": + _logger.warning( "[WARNING]: %s", msg ) + elif msg_type == "error": + _logger.error( "[ERROR]: %s", msg ) + else: + _logger.debug( "[%s] %s", msg_type, msg ) + if msg.startswith( "- " ): + msg = msg[2:] + sio.emit( msg_type, msg ) + msg_types.add( msg_type ) + + # NOTE: The plan was to allow the user to change the default parameters in the UI, + # but this can be done (ahem) later. For now, if they really need to change something, + # they can prepare the data files from the command-line. + args = [] + + # extract everything we need from the PDF + log_msg( "status", "Opening the PDF..." ) + extract = ExtractAll( args, log_msg ) + with PdfDoc( input_file.name ) as pdf: + extract.extract_all( pdf ) + index_buf = io.StringIO() + extract.extract_index.save_as_json( index_buf ) + targets_buf, chapters_buf, footnotes_buf = io.StringIO(), io.StringIO(), io.StringIO() + extract.extract_content.save_as_json( targets_buf, chapters_buf, footnotes_buf ) + file_data = { + "index": index_buf.getvalue(), + "targets": targets_buf.getvalue(), + "chapters": chapters_buf.getvalue(), + "footnotes": footnotes_buf.getvalue(), + } + + # prepare the PDF + gs_path = get_gs_path() + if not gs_path: + raise RuntimeError( "Ghostscript is not available." ) + with TempFile( mode="w", encoding="utf-8" ) as targets_file: + log_msg( "status", "Preparing the final PDF..." ) + # save the extracted targets + targets_file.temp_file.write( file_data["targets"] ) + targets_file.close( delete=False ) + # prepare the PDF + prepared_file.close( delete=False ) + prepare_pdf( input_file.name, + "ASL Rulebook", + targets_file.name, 5, + prepared_file.name, "ebook", + gs_path, + log_msg + ) + + # fixup the PDF + with TempFile() as fixedup_file: + log_msg( "status", "Fixing up the final PDF..." ) + fixedup_file.close( delete=False ) + fixup_mmp_pdf( prepared_file.name, + fixedup_file.name, + True, True, + log_msg + ) + # read the final PDF data + with open( fixedup_file.name, "rb" ) as fp: + pdf_data = fp.read() + + # prepare the ZIP for the user to download + log_msg( "status", "Preparing the download ZIP..." ) + zip_data = io.BytesIO() + with zipfile.ZipFile( zip_data, "w", zipfile.ZIP_DEFLATED ) as zip_file: + fname_stem = "ASL Rulebook" + zip_file.writestr( fname_stem+".pdf", pdf_data ) + for key in file_data: + fname = "{}.{}".format( fname_stem, key ) + zip_file.writestr( fname, file_data[key] ) + zip_data = zip_data.getvalue() + + # notify the front-end that we're done + on_done( zip_data ) + _logger.debug( "Message types seen: %s", + " ; ".join( sorted( str(mt) for mt in msg_types ) ) + ) + + # NOTE: We don't bother shutting down the socketio server, since the user + # has to restart the server, using the newly-prepared data files. + +# --------------------------------------------------------------------- + +@app.route( "/prepare/download" ) +def download_prepared_data(): + """Download the prepared data ZIP file.""" + if not _zip_data_download: + abort( 404 ) + return send_file( + io.BytesIO( _zip_data_download ), + as_attachment=True, attachment_filename="asl-rulebook2.zip" + ) + +# --------------------------------------------------------------------- + +def _test_progress( npasses=100, status=10, warnings=None, errors=None, delay=0.1 ): + """Test progress messages.""" + + # initialize + warnings = [ int(w) for w in warnings.split(",") ] if warnings else [] + errors = [ int(e) for e in errors.split(",") ] if errors else [] + + # generate progress messages + sio = globvars.socketio_server + status_no = 0 + for i in range( int(npasses) ): + # check if we should start a new status block + if i % status == 0: + status_no += 1 + sio.emit( "status", "Status #{}".format( status_no ) ) + # issue the next progress message + if 1+i in warnings: + sio.emit( "warning", "Progress {}: warning".format( 1+i ) ) + if 1+i in errors: + sio.emit( "error", "Progress {}: error".format( 1+i ) ) + else: + sio.emit( "progress", "Progress {}.".format( 1+i ) ) + time.sleep( float( delay ) ) + sio.emit( "done" ) diff --git a/asl_rulebook2/webapp/run_server.py b/asl_rulebook2/webapp/run_server.py index 48b1905..e76a453 100755 --- a/asl_rulebook2/webapp/run_server.py +++ b/asl_rulebook2/webapp/run_server.py @@ -9,7 +9,7 @@ import glob import click -from asl_rulebook2.webapp import app +from asl_rulebook2.webapp import app, globvars # --------------------------------------------------------------------- @@ -79,11 +79,33 @@ def main( bind_addr, data_dir, force_init_delay, flask_debug ): _ = urllib.request.urlopen( url ) threading.Thread( target=_start_server, daemon=True ).start() + # check if the user needs to prepare their data files + if not app.config.get( "DATA_DIR" ): + # yup - initialize the socketio server + init_prepare_socketio( app ) + # run the server app.run( host=host, port=port, debug=flask_debug, extra_files = extra_files ) +# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +def init_prepare_socketio( flask_app ): + """Initialize the socketio server needed to prepare the data files.""" + # NOTE: We only set this up if it's needed (i.e. because there is no data directory, + # and the user needs to prepare their data files), rather than always having it running + # on the off-chance that the user might need it :-/ + # NOTE: socketio doesn't really work well with threads, and it's tricky to get it to + # send events to the client if we're using e.g. eventlet: + # https://stackoverflow.com/questions/43801884/how-to-run-python-socketio-in-thread + # https://python-socketio.readthedocs.io/en/latest/server.html#standard-threads + # Using native threads is less-performant, but it's not an issue for us, and it works :-/ + import socketio + sio = socketio.Server( async_mode="threading" ) + flask_app.wsgi_app = socketio.WSGIApp( sio, flask_app.wsgi_app ) + globvars.socketio_server = sio + # --------------------------------------------------------------------- if __name__ == "__main__": diff --git a/asl_rulebook2/webapp/static/css/global.css b/asl_rulebook2/webapp/static/css/global.css index ff07028..38adf17 100644 --- a/asl_rulebook2/webapp/static/css/global.css +++ b/asl_rulebook2/webapp/static/css/global.css @@ -19,6 +19,7 @@ ul ul ul { list-style-image: url(../images/bullet3.png) ; } .exc .auto-ruleid { color: #555 ; } .auto-ruleid { color: red ; } .auto-ruleid:hover { background: #ffffcc ; } +span.pre { font-family: monospace ; } /* notification balloons */ #growls-br { bottom: 22px ; right: 0 ; max-height: 40% ; } diff --git a/asl_rulebook2/webapp/static/css/prepare.css b/asl_rulebook2/webapp/static/css/prepare.css new file mode 100644 index 0000000..4599b40 --- /dev/null +++ b/asl_rulebook2/webapp/static/css/prepare.css @@ -0,0 +1,37 @@ +p { margin: 5px 0 ; } +code { display: block ; margin: 5px 0 5px 20px ; } +.info { + margin-top: 10px ; min-height: 25px ; + padding-left: 30px ; background: no-repeat url(../images/info.png) ; + font-size: 80% ; font-style: italic ; color: #444 ; +} + +#prepare-app { height: 100% ; display: flex ; } +#header { margin-bottom: 5px ; } +#main { width: 100% ; margin: 10px ; display: flex ; flex-direction: column ; } + +#fatal-error { margin-bottom: 10px ; font-size: 120% ; font-weight: bold ; } + +#upload-panel { align-self: start ; border: 1px solid black ; border-radius: 5px ; padding: 10px ; } +#upload-panel button { height: 70px ; margin-right: 10px ; } +#upload-panel button img { margin-top: 3px ; height: 60px ; } + +#progress-panel { + flex-grow: 1 ; overflow-y: auto ; + border: 1px solid black ; border-radius: 5px ; padding: 10px ; + font-family: monospace ; font-size: 90% ; +} +#progress-panel .progress { font-style: italic ; } +#progress-panel .status { margin: 5px 0 ; } +#progress-panel .status:first-of-type { margin-top: 0 ; } +#progress-panel .status table { margin-left: 2px ; } +#progress-panel .status table td { vertical-align: top ; } +#progress-panel .status img.icon { height: 15px ; margin: 1px 3px 0 0 ; } + +#download-panel { + position: fixed ; bottom: 18px ; right: 18px ; width: 75% ; + border: 1px solid black ; border-radius: 5px ; background: white ; + padding: 10px ; +} +#download-panel button { height: 40px ; margin-right: 10px ; padding: 5px ; } +#download-panel button img { height: 30px ; } diff --git a/asl_rulebook2/webapp/static/images/download.png b/asl_rulebook2/webapp/static/images/download.png new file mode 100644 index 0000000..9e7ea54 Binary files /dev/null and b/asl_rulebook2/webapp/static/images/download.png differ diff --git a/asl_rulebook2/webapp/static/images/eASLRB.png b/asl_rulebook2/webapp/static/images/eASLRB.png new file mode 100644 index 0000000..24604ab Binary files /dev/null and b/asl_rulebook2/webapp/static/images/eASLRB.png differ diff --git a/asl_rulebook2/webapp/static/images/error.png b/asl_rulebook2/webapp/static/images/error.png new file mode 100644 index 0000000..f161e9e Binary files /dev/null and b/asl_rulebook2/webapp/static/images/error.png differ diff --git a/asl_rulebook2/webapp/static/images/warning.png b/asl_rulebook2/webapp/static/images/warning.png new file mode 100644 index 0000000..26f754e Binary files /dev/null and b/asl_rulebook2/webapp/static/images/warning.png differ diff --git a/asl_rulebook2/webapp/static/prepare.js b/asl_rulebook2/webapp/static/prepare.js new file mode 100644 index 0000000..e3076a9 --- /dev/null +++ b/asl_rulebook2/webapp/static/prepare.js @@ -0,0 +1,364 @@ +// create the main application +export const gPrepareApp = Vue.createApp( { //eslint-disable-line no-undef + template: "", +} ) ; +$(document).ready( () => { + gPrepareApp.mount( "#prepare-app" ) ; +} ) ; + +// parse any URL parameters +let gUrlParams = new URLSearchParams( window.location.search.substring(1) ) ; + +let gProgressPanel = null ; + +// -------------------------------------------------------------------- + +gPrepareApp.component( "prepare-app", { + + data() { return { + isLoaded: false, + isProcessing: false, + downloadUrl: null, + fatalErrorMsg: gHaveGhostscript ? null : "Ghostscript is not available.", //eslint-disable-line no-undef + fatalErrorIconUrl: makeImageUrl( "error.png" ), + } ; }, + + template: ` +
+ +
+ + {{fatalErrorMsg}} +
+ + + +