diff --git a/asl_rulebook2/bin/fixup_mmp_pdf.py b/asl_rulebook2/bin/fixup_mmp_pdf.py index 867574f..24f5250 100755 --- a/asl_rulebook2/bin/fixup_mmp_pdf.py +++ b/asl_rulebook2/bin/fixup_mmp_pdf.py @@ -2,6 +2,8 @@ """ Fixup issues in the MMP eASLRB. """ import os +import threading +import time from pikepdf import Pdf, Page, OutlineItem, Encryption, make_page_destination import click @@ -10,7 +12,7 @@ from asl_rulebook2.utils import log_msg_stderr # --------------------------------------------------------------------- -def fixup_mmp_pdf( fname, output_fname, fix_zoom, optimize_web, rotate, log=None ): +def fixup_mmp_pdf( fname, output_fname, fix_zoom, optimize_web, rotate, log=None, relinq=None ): """Fixup the MMP eASLRB PDF.""" # NOTE: v1.03 had problems with links within the PDF being of type /Fit rather than /XYZ, @@ -91,15 +93,28 @@ def fixup_mmp_pdf( fname, output_fname, fix_zoom, optimize_web, rotate, log=None # save the updated PDF log_msg( "progress", "Saving the fixed-up PDF..." ) - # NOTE: Setting a blank password will encrypt the file, but doesn't require the user to enter a password - # when opening the file (but it will be marked as "SECURE" in the UI). + # NOTE: Setting a blank password will encrypt the file, but doesn't require the user + # to enter a password when opening the file (but it will be marked as "SECURE" in the UI). enc = Encryption( owner="", user="" ) - def save_progress( pct ): - if pct > 0 and pct % 10 == 0: - log_msg( "verbose", "- Saved {}%.", pct ) - pdf.save( output_fname, encryption=enc, linearize=optimize_web, - progress = save_progress + # NOTE: We can't log progress messages if we're being run from the webapp, since log_msg() + # will try to relinquish the CPU, but it will be in the wrong thread. We could disable this, + # but it's more trouble than it's worth. + thread = SavePdfThread( pdf, + output_fname, enc, optimize_web, + log_msg = None if relinq else log_msg ) + thread.start() + pass_no = 0 + while True: + if thread.done: + break + pass_no += 1 + if relinq: + relinq( "Saving PDF: {}".format( pass_no ), delay=1 ) + else: + time.sleep( 1 ) + if thread.exc: + raise thread.exc # compare the file sizes old_size = os.path.getsize( fname ) @@ -112,6 +127,40 @@ def fixup_mmp_pdf( fname, output_fname, fix_zoom, optimize_web, rotate, log=None abs(ratio), "larger" if ratio > 0 else "smaller" ) +# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +class SavePdfThread( threading.Thread ): + """Save the PDF in a background thread.""" + + def __init__( self, pdf, fname, enc, optimize_web, log_msg ): + # initialize + super().__init__( daemon=True ) + self.pdf = pdf + self.fname = fname + self.enc = enc + self.optimize_web = optimize_web + self._log_msg = log_msg + # initialize + self.done = False + self.exc = None + + def run( self ): + """Run the worker thread.""" + try: + self.pdf.save( self.fname, + encryption=self.enc, linearize=self.optimize_web, + progress=self._log_progress + ) + except Exception as ex: #pylint: disable=broad-except + self.exc = ex + finally: + self.done = True + + def _log_progress( self, pct ): + """Log progress.""" + if self._log_msg and pct > 0 and pct % 10 == 0: + self._log_msg( "verbose", "- Saved {}%.", pct ) + # --------------------------------------------------------------------- @click.command() diff --git a/asl_rulebook2/bin/prepare_pdf.py b/asl_rulebook2/bin/prepare_pdf.py index 965f9ee..b90617c 100755 --- a/asl_rulebook2/bin/prepare_pdf.py +++ b/asl_rulebook2/bin/prepare_pdf.py @@ -23,7 +23,12 @@ _COMPRESSION_CHOICES = [ # --------------------------------------------------------------------- -def prepare_pdf( pdf_file, title, targets_fname, vo_notes_fname, yoffset, output_fname, compression, gs_path, log_msg ): +def prepare_pdf( pdf_file, + title, targets_fname, vo_notes_fname, yoffset, + output_fname, compression, + gs_path, + log_msg, relinq=None +): """Prepare the MMP eASLRB PDF.""" # load the targets @@ -47,7 +52,7 @@ def prepare_pdf( pdf_file, title, targets_fname, vo_notes_fname, yoffset, output pdf_file ] start_time = time.time() - subprocess.run( args, check=True ) + _run_subprocess( args, "compression", relinq ) elapsed_time = time.time() - start_time log_msg( "timestamp", "- Elapsed time: {}".format( datetime.timedelta( seconds=int(elapsed_time) ) ) @@ -101,7 +106,7 @@ def prepare_pdf( pdf_file, title, targets_fname, vo_notes_fname, yoffset, output args.extend( [ "-f", pdf_file ] ) args.append( pdfmarks_file.name ) start_time = time.time() - subprocess.run( args, check=True ) + _run_subprocess( args, "pdfmarks", relinq ) elapsed_time = time.time() - start_time log_msg( "timestamp", "- Elapsed time: {}".format( datetime.timedelta( seconds=int(elapsed_time) ) ) @@ -109,6 +114,33 @@ def prepare_pdf( pdf_file, title, targets_fname, vo_notes_fname, yoffset, output # --------------------------------------------------------------------- +def _run_subprocess( args, caption, relinq ): + """Run an external process.""" + proc = subprocess.Popen( args ) + try: + pass_no = 0 + while True: + pass_no += 1 + # check if the external process has finished + rc = proc.poll() + if rc is not None: + # yup - check its exit code + if rc != 0: + raise RuntimeError( "Sub-process \"{}\" failed: rc={}".format( caption, rc ) ) + break + # delay for a bit before checking again + if relinq: + relinq( "Waiting for {}: {}".format( caption, pass_no ), delay=1 ) + else: + time.sleep( 1 ) + except ( Exception, KeyboardInterrupt ): + # NOTE: We want to kill the child process if something goes wrong, and while it's not + # 100%-guaranteed that we will get here (e.g. if we get killed), it's good enuf. + proc.terminate() + raise + +# --------------------------------------------------------------------- + @click.command() @click.argument( "pdf_file", nargs=1, type=click.Path(exists=True,dir_okay=False) ) @click.option( "--title", help="Document title." ) diff --git a/asl_rulebook2/webapp/__init__.py b/asl_rulebook2/webapp/__init__.py index 102946f..53b081f 100644 --- a/asl_rulebook2/webapp/__init__.py +++ b/asl_rulebook2/webapp/__init__.py @@ -50,9 +50,6 @@ def _on_sigint( signum, stack ): #pylint: disable=unused-argument # --------------------------------------------------------------------- -# disable the Flask startup banner -flask.cli.show_server_banner = lambda *args: None - # initialize Flask app = Flask( __name__ ) diff --git a/asl_rulebook2/webapp/prepare.py b/asl_rulebook2/webapp/prepare.py index b8a8d2c..c5b61d7 100644 --- a/asl_rulebook2/webapp/prepare.py +++ b/asl_rulebook2/webapp/prepare.py @@ -1,9 +1,7 @@ """ Analyze the MMP eASLRB PDF and prepare the data files. """ -import threading import zipfile import io -import time import base64 import traceback import logging @@ -33,21 +31,20 @@ def prepare_data_files(): download_url = url_for( "download_prepared_data" ) # initialize the socketio server + # NOTE: We wait until the client tells us to start processing (instead of when the POST data arrives), + # since it might not be ready to receive events, and miss the first few. sio = globvars.socketio_server - if not sio: - raise RuntimeError( "The socketio server has not been started." ) @sio.on( "start" ) - def on_start( data ): #pylint: disable=unused-variable,unused-argument - # start the worker thread that prepares the data files - # NOTE: We don't do this when the POST request comes in, but wait until the client - # tells us it's ready (otherwise, it might miss the first event or two). - def worker(): - try: - _do_prepare_data_files( args, download_url ) - except Exception as ex: #pylint: disable=broad-except - _logger.error( "PREPARE ERROR: %s\n%s", ex, traceback.format_exc() ) - globvars.socketio_server.emit( "error", str(ex) ) - threading.Thread( target=worker, daemon=True ).start() + def on_start(): #pylint: disable=unused-variable + # NOTE: We used to do this in a background thread (when we were using the Flask development server), + # but flask-socketio + eventlet handles concurrency differently, and we now do it synchronously, + # and periodically relinquish the CPU, so that we remain responsive (otherwise the client pings timeout, + # and it disconnects). + try: + _do_prepare_data_files( args, download_url ) + except Exception as ex: #pylint: disable=broad-except + _logger.error( "PREPARE ERROR: %s\n%s", ex, traceback.format_exc() ) + globvars.socketio_server.emit( "error", str(ex) ) return "ok" @@ -100,6 +97,11 @@ def _do_prepare_data_files( args, download_url ): msg = msg[2:] sio.emit( msg_type, msg ) msg_types.add( msg_type ) + # NOTE: There's no particular significance in relinquishing the CPU here, but this function + # is called regularly during processing, so it's a convenient place to do it. + # This function also gets passed into the low-level extract code (as a logging handler), + # which results in that code also relinquishing at regular intervals. + _relinq( msg ) # NOTE: The plan was to allow the user to change the default parameters in the UI, # but this can be done (ahem) later. For now, if they really need to change something, @@ -143,7 +145,8 @@ def _do_prepare_data_files( args, download_url ): targets_file.name, vo_notes_file.name, 5, prepared_file.name, "ebook", gs_path, - log_msg + log_msg, + relinq = _relinq ) # fixup the PDF @@ -153,7 +156,8 @@ def _do_prepare_data_files( args, download_url ): fixup_mmp_pdf( prepared_file.name, fixedup_file.name, False, True, True, - log_msg + log_msg, + relinq = _relinq ) # read the final PDF data with open( fixedup_file.name, "rb" ) as fp: @@ -179,6 +183,10 @@ def _do_prepare_data_files( args, download_url ): # NOTE: We don't bother shutting down the socketio server, since the user # has to restart the server, using the newly-prepared data files. +def _relinq( msg=None, delay=0 ): #pylint: disable=unused-argument + """Relinquish the CPU (to keep the webapp server responsive).""" + globvars.socketio_server.sleep( delay ) + # --------------------------------------------------------------------- @app.route( "/prepare/download" ) @@ -215,5 +223,5 @@ def _test_progress( npasses=100, status=10, warnings=None, errors=None, delay=0. sio.emit( "error", "Progress {}: error".format( 1+i ) ) else: sio.emit( "progress", "Progress {}.".format( 1+i ) ) - time.sleep( float( delay ) ) + _relinq( delay=float(delay) ) sio.emit( "done" ) diff --git a/asl_rulebook2/webapp/run_server.py b/asl_rulebook2/webapp/run_server.py index 7f9a977..03840d3 100755 --- a/asl_rulebook2/webapp/run_server.py +++ b/asl_rulebook2/webapp/run_server.py @@ -7,6 +7,7 @@ import urllib.request import time import glob +import flask_socketio import click from asl_rulebook2.webapp import app, globvars @@ -22,16 +23,16 @@ def main( bind_addr, data_dir, force_init_delay, flask_debug ): """Run the webapp server.""" # initialize - port = None + flask_port = None if bind_addr: words = bind_addr.split( ":" ) - host = words[0] + flask_host = words[0] if len(words) > 1: - port = words[1] + flask_port = words[1] else: - host = app.config.get( "FLASK_HOST", "localhost" ) - if not port: - port = app.config.get( "FLASK_PORT_NO" ) + flask_host = app.config.get( "FLASK_HOST", "localhost" ) + if not flask_port: + flask_port = app.config.get( "FLASK_PORT_NO" ) if not flask_debug: flask_debug = app.config.get( "FLASK_DEBUG", False ) @@ -42,9 +43,9 @@ def main( bind_addr, data_dir, force_init_delay, flask_debug ): app.config["DATA_DIR"] = data_dir # validate the configuration - if not host: + if not flask_host: raise RuntimeError( "The server host was not set." ) - if not port: + if not flask_port: raise RuntimeError( "The server port was not set." ) # monitor extra files for changes @@ -75,36 +76,47 @@ def main( bind_addr, data_dir, force_init_delay, flask_debug ): if force_init_delay > 0: def _start_server(): time.sleep( force_init_delay ) - url = "http://{}:{}".format( host, port ) + url = "http://{}:{}".format( flask_host, flask_port ) _ = urllib.request.urlopen( url ) threading.Thread( target=_start_server, daemon=True ).start() - # check if the user needs to prepare their data files - if not app.config.get( "DATA_DIR" ): - # yup - initialize the socketio server - init_prepare_socketio( app ) - # run the server - app.run( host=host, port=port, debug=flask_debug, - extra_files = extra_files - ) + run_server( flask_host, flask_port, flask_debug, extra_files ) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -def init_prepare_socketio( flask_app ): - """Initialize the socketio server needed to prepare the data files.""" - # NOTE: We only set this up if it's needed (i.e. because there is no data directory, - # and the user needs to prepare their data files), rather than always having it running - # on the off-chance that the user might need it :-/ - # NOTE: socketio doesn't really work well with threads, and it's tricky to get it to - # send events to the client if we're using e.g. eventlet: - # https://stackoverflow.com/questions/43801884/how-to-run-python-socketio-in-thread - # https://python-socketio.readthedocs.io/en/latest/server.html#standard-threads - # Using native threads is less-performant, but it's not an issue for us, and it works :-/ - import socketio - sio = socketio.Server( async_mode="threading" ) - flask_app.wsgi_app = socketio.WSGIApp( sio, flask_app.wsgi_app ) +def run_server( host, port, debug, extra_files=None ): + """Run the webapp server.""" + + # NOTE: flask-socketio + eventlet handles concurrency differently to the Flask development server, + # and we need to remain responsive, otherwise pings from the socketio client will timeout, and it will + # disconnect (and show a big warning in the UI that the server has gone away). To avoid this, + # we relinquish the CPU regularly, but just in case, we increase the ping timeout (and allow the user + # to increase it even further, if necessary). This should only be an issue when preparing the data files, + # since the main program doesn't use socketio. + # NOTE: Setting the timeout high shouldn't be a problem, since if the server really does go away, + # the connection will be dropped, and the front-end Javascript will detect that immediately. + ping_timeout = app.config.get( "SOCKETIO_PING_TIMEOUT", 30 ) + + # run the server + sio = flask_socketio.SocketIO( app, + async_mode = "eventlet", + ping_timeout = ping_timeout + ) globvars.socketio_server = sio + args = { + "debug": debug, + "log_output": False + } + if extra_files: + args.update( { + "use_reloader": True, + "reloader_options": { "extra_files": extra_files }, + } ) + sio.run( app, + host=host, port=port, + **args + ) # --------------------------------------------------------------------- diff --git a/asl_rulebook2/webapp/static/prepare.js b/asl_rulebook2/webapp/static/prepare.js index 8b89ec8..0e536be 100644 --- a/asl_rulebook2/webapp/static/prepare.js +++ b/asl_rulebook2/webapp/static/prepare.js @@ -308,7 +308,7 @@ gPrepareApp.component( "download-panel", { ./run-container.sh --data ...
- You can edit these files directly, if you want to make changes. + You can edit the generated data files directly, if you want to make changes.

If you want to make changes permanent (so they happen if you redo this preparation process), check out the files in $/asl_rulebook2/extract/data/.

`, diff --git a/conftest.py b/conftest.py index 37703f8..aab14af 100644 --- a/conftest.py +++ b/conftest.py @@ -13,6 +13,7 @@ import pytest from flask import url_for from asl_rulebook2.webapp import app +from asl_rulebook2.webapp.run_server import run_server from asl_rulebook2.webapp.tests.control_tests import ControlTests from asl_rulebook2.webapp.tests.utils import wait_for @@ -140,11 +141,6 @@ def _make_webapp(): else: app.config.pop( "FORCE_CACHED_SEARCHDB", None ) app.config[ "IGNORE_MISSING_DATA_FILES" ] = True - # check if we will be running the prepare tests - if _pytest_options.enable_prepare: - # yup - initialize the socketio server - from asl_rulebook2.webapp.run_server import init_prepare_socketio - init_prepare_socketio( app ) # NOTE: We run the server thread as a daemon so that it won't prevent the tests from finishing # when they're done. However, this makes it difficult to know when to shut the server down, # and, in particular, clean up the gRPC service. We send an EndTests message at the end of each test, @@ -152,7 +148,7 @@ def _make_webapp(): # or otherwise finish eearly before they get a chance to send the EndTests message), but we can # live with it. thread = threading.Thread( - target = lambda: app.run( host="0.0.0.0", port=_FLASK_WEBAPP_PORT, use_reloader=False ), + target = lambda: run_server( "0.0.0.0", _FLASK_WEBAPP_PORT, False ), daemon = True ) thread.start() diff --git a/requirements.txt b/requirements.txt index 46c87d3..76f28c2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,13 +1,11 @@ # python 3.8.7 flask==1.1.2 -python-socketio==5.2.1 +flask-socketio==5.1.1 +eventlet==0.33.0 pyyaml==5.4.1 lxml==4.6.2 click==7.1.2 pdfminer.six==20201018 pikepdf==2.5.2 - -# NOTE: This is needed for socketio when using the "threading" async mode. -simple-websocket==0.2.0