diff --git a/asl_rulebook2/bin/fixup_mmp_pdf.py b/asl_rulebook2/bin/fixup_mmp_pdf.py
index 867574f..24f5250 100755
--- a/asl_rulebook2/bin/fixup_mmp_pdf.py
+++ b/asl_rulebook2/bin/fixup_mmp_pdf.py
@@ -2,6 +2,8 @@
""" Fixup issues in the MMP eASLRB. """
import os
+import threading
+import time
from pikepdf import Pdf, Page, OutlineItem, Encryption, make_page_destination
import click
@@ -10,7 +12,7 @@ from asl_rulebook2.utils import log_msg_stderr
# ---------------------------------------------------------------------
-def fixup_mmp_pdf( fname, output_fname, fix_zoom, optimize_web, rotate, log=None ):
+def fixup_mmp_pdf( fname, output_fname, fix_zoom, optimize_web, rotate, log=None, relinq=None ):
"""Fixup the MMP eASLRB PDF."""
# NOTE: v1.03 had problems with links within the PDF being of type /Fit rather than /XYZ,
@@ -91,15 +93,28 @@ def fixup_mmp_pdf( fname, output_fname, fix_zoom, optimize_web, rotate, log=None
# save the updated PDF
log_msg( "progress", "Saving the fixed-up PDF..." )
- # NOTE: Setting a blank password will encrypt the file, but doesn't require the user to enter a password
- # when opening the file (but it will be marked as "SECURE" in the UI).
+ # NOTE: Setting a blank password will encrypt the file, but doesn't require the user
+ # to enter a password when opening the file (but it will be marked as "SECURE" in the UI).
enc = Encryption( owner="", user="" )
- def save_progress( pct ):
- if pct > 0 and pct % 10 == 0:
- log_msg( "verbose", "- Saved {}%.", pct )
- pdf.save( output_fname, encryption=enc, linearize=optimize_web,
- progress = save_progress
+ # NOTE: We can't log progress messages if we're being run from the webapp, since log_msg()
+ # will try to relinquish the CPU, but it will be in the wrong thread. We could disable this,
+ # but it's more trouble than it's worth.
+ thread = SavePdfThread( pdf,
+ output_fname, enc, optimize_web,
+ log_msg = None if relinq else log_msg
)
+ thread.start()
+ pass_no = 0
+ while True:
+ if thread.done:
+ break
+ pass_no += 1
+ if relinq:
+ relinq( "Saving PDF: {}".format( pass_no ), delay=1 )
+ else:
+ time.sleep( 1 )
+ if thread.exc:
+ raise thread.exc
# compare the file sizes
old_size = os.path.getsize( fname )
@@ -112,6 +127,40 @@ def fixup_mmp_pdf( fname, output_fname, fix_zoom, optimize_web, rotate, log=None
abs(ratio), "larger" if ratio > 0 else "smaller"
)
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+class SavePdfThread( threading.Thread ):
+ """Save the PDF in a background thread."""
+
+ def __init__( self, pdf, fname, enc, optimize_web, log_msg ):
+ # initialize
+ super().__init__( daemon=True )
+ self.pdf = pdf
+ self.fname = fname
+ self.enc = enc
+ self.optimize_web = optimize_web
+ self._log_msg = log_msg
+ # initialize
+ self.done = False
+ self.exc = None
+
+ def run( self ):
+ """Run the worker thread."""
+ try:
+ self.pdf.save( self.fname,
+ encryption=self.enc, linearize=self.optimize_web,
+ progress=self._log_progress
+ )
+ except Exception as ex: #pylint: disable=broad-except
+ self.exc = ex
+ finally:
+ self.done = True
+
+ def _log_progress( self, pct ):
+ """Log progress."""
+ if self._log_msg and pct > 0 and pct % 10 == 0:
+ self._log_msg( "verbose", "- Saved {}%.", pct )
+
# ---------------------------------------------------------------------
@click.command()
diff --git a/asl_rulebook2/bin/prepare_pdf.py b/asl_rulebook2/bin/prepare_pdf.py
index 965f9ee..b90617c 100755
--- a/asl_rulebook2/bin/prepare_pdf.py
+++ b/asl_rulebook2/bin/prepare_pdf.py
@@ -23,7 +23,12 @@ _COMPRESSION_CHOICES = [
# ---------------------------------------------------------------------
-def prepare_pdf( pdf_file, title, targets_fname, vo_notes_fname, yoffset, output_fname, compression, gs_path, log_msg ):
+def prepare_pdf( pdf_file,
+ title, targets_fname, vo_notes_fname, yoffset,
+ output_fname, compression,
+ gs_path,
+ log_msg, relinq=None
+):
"""Prepare the MMP eASLRB PDF."""
# load the targets
@@ -47,7 +52,7 @@ def prepare_pdf( pdf_file, title, targets_fname, vo_notes_fname, yoffset, output
pdf_file
]
start_time = time.time()
- subprocess.run( args, check=True )
+ _run_subprocess( args, "compression", relinq )
elapsed_time = time.time() - start_time
log_msg( "timestamp", "- Elapsed time: {}".format(
datetime.timedelta( seconds=int(elapsed_time) ) )
@@ -101,7 +106,7 @@ def prepare_pdf( pdf_file, title, targets_fname, vo_notes_fname, yoffset, output
args.extend( [ "-f", pdf_file ] )
args.append( pdfmarks_file.name )
start_time = time.time()
- subprocess.run( args, check=True )
+ _run_subprocess( args, "pdfmarks", relinq )
elapsed_time = time.time() - start_time
log_msg( "timestamp", "- Elapsed time: {}".format(
datetime.timedelta( seconds=int(elapsed_time) ) )
@@ -109,6 +114,33 @@ def prepare_pdf( pdf_file, title, targets_fname, vo_notes_fname, yoffset, output
# ---------------------------------------------------------------------
+def _run_subprocess( args, caption, relinq ):
+ """Run an external process."""
+ proc = subprocess.Popen( args )
+ try:
+ pass_no = 0
+ while True:
+ pass_no += 1
+ # check if the external process has finished
+ rc = proc.poll()
+ if rc is not None:
+ # yup - check its exit code
+ if rc != 0:
+ raise RuntimeError( "Sub-process \"{}\" failed: rc={}".format( caption, rc ) )
+ break
+ # delay for a bit before checking again
+ if relinq:
+ relinq( "Waiting for {}: {}".format( caption, pass_no ), delay=1 )
+ else:
+ time.sleep( 1 )
+ except ( Exception, KeyboardInterrupt ):
+ # NOTE: We want to kill the child process if something goes wrong, and while it's not
+ # 100%-guaranteed that we will get here (e.g. if we get killed), it's good enuf.
+ proc.terminate()
+ raise
+
+# ---------------------------------------------------------------------
+
@click.command()
@click.argument( "pdf_file", nargs=1, type=click.Path(exists=True,dir_okay=False) )
@click.option( "--title", help="Document title." )
diff --git a/asl_rulebook2/webapp/__init__.py b/asl_rulebook2/webapp/__init__.py
index 102946f..53b081f 100644
--- a/asl_rulebook2/webapp/__init__.py
+++ b/asl_rulebook2/webapp/__init__.py
@@ -50,9 +50,6 @@ def _on_sigint( signum, stack ): #pylint: disable=unused-argument
# ---------------------------------------------------------------------
-# disable the Flask startup banner
-flask.cli.show_server_banner = lambda *args: None
-
# initialize Flask
app = Flask( __name__ )
diff --git a/asl_rulebook2/webapp/prepare.py b/asl_rulebook2/webapp/prepare.py
index b8a8d2c..c5b61d7 100644
--- a/asl_rulebook2/webapp/prepare.py
+++ b/asl_rulebook2/webapp/prepare.py
@@ -1,9 +1,7 @@
""" Analyze the MMP eASLRB PDF and prepare the data files. """
-import threading
import zipfile
import io
-import time
import base64
import traceback
import logging
@@ -33,21 +31,20 @@ def prepare_data_files():
download_url = url_for( "download_prepared_data" )
# initialize the socketio server
+ # NOTE: We wait until the client tells us to start processing (instead of when the POST data arrives),
+ # since it might not be ready to receive events, and miss the first few.
sio = globvars.socketio_server
- if not sio:
- raise RuntimeError( "The socketio server has not been started." )
@sio.on( "start" )
- def on_start( data ): #pylint: disable=unused-variable,unused-argument
- # start the worker thread that prepares the data files
- # NOTE: We don't do this when the POST request comes in, but wait until the client
- # tells us it's ready (otherwise, it might miss the first event or two).
- def worker():
- try:
- _do_prepare_data_files( args, download_url )
- except Exception as ex: #pylint: disable=broad-except
- _logger.error( "PREPARE ERROR: %s\n%s", ex, traceback.format_exc() )
- globvars.socketio_server.emit( "error", str(ex) )
- threading.Thread( target=worker, daemon=True ).start()
+ def on_start(): #pylint: disable=unused-variable
+ # NOTE: We used to do this in a background thread (when we were using the Flask development server),
+ # but flask-socketio + eventlet handles concurrency differently, and we now do it synchronously,
+ # and periodically relinquish the CPU, so that we remain responsive (otherwise the client pings timeout,
+ # and it disconnects).
+ try:
+ _do_prepare_data_files( args, download_url )
+ except Exception as ex: #pylint: disable=broad-except
+ _logger.error( "PREPARE ERROR: %s\n%s", ex, traceback.format_exc() )
+ globvars.socketio_server.emit( "error", str(ex) )
return "ok"
@@ -100,6 +97,11 @@ def _do_prepare_data_files( args, download_url ):
msg = msg[2:]
sio.emit( msg_type, msg )
msg_types.add( msg_type )
+ # NOTE: There's no particular significance in relinquishing the CPU here, but this function
+ # is called regularly during processing, so it's a convenient place to do it.
+ # This function also gets passed into the low-level extract code (as a logging handler),
+ # which results in that code also relinquishing at regular intervals.
+ _relinq( msg )
# NOTE: The plan was to allow the user to change the default parameters in the UI,
# but this can be done (ahem) later. For now, if they really need to change something,
@@ -143,7 +145,8 @@ def _do_prepare_data_files( args, download_url ):
targets_file.name, vo_notes_file.name, 5,
prepared_file.name, "ebook",
gs_path,
- log_msg
+ log_msg,
+ relinq = _relinq
)
# fixup the PDF
@@ -153,7 +156,8 @@ def _do_prepare_data_files( args, download_url ):
fixup_mmp_pdf( prepared_file.name,
fixedup_file.name,
False, True, True,
- log_msg
+ log_msg,
+ relinq = _relinq
)
# read the final PDF data
with open( fixedup_file.name, "rb" ) as fp:
@@ -179,6 +183,10 @@ def _do_prepare_data_files( args, download_url ):
# NOTE: We don't bother shutting down the socketio server, since the user
# has to restart the server, using the newly-prepared data files.
+def _relinq( msg=None, delay=0 ): #pylint: disable=unused-argument
+ """Relinquish the CPU (to keep the webapp server responsive)."""
+ globvars.socketio_server.sleep( delay )
+
# ---------------------------------------------------------------------
@app.route( "/prepare/download" )
@@ -215,5 +223,5 @@ def _test_progress( npasses=100, status=10, warnings=None, errors=None, delay=0.
sio.emit( "error", "Progress {}: error".format( 1+i ) )
else:
sio.emit( "progress", "Progress {}.".format( 1+i ) )
- time.sleep( float( delay ) )
+ _relinq( delay=float(delay) )
sio.emit( "done" )
diff --git a/asl_rulebook2/webapp/run_server.py b/asl_rulebook2/webapp/run_server.py
index 7f9a977..03840d3 100755
--- a/asl_rulebook2/webapp/run_server.py
+++ b/asl_rulebook2/webapp/run_server.py
@@ -7,6 +7,7 @@ import urllib.request
import time
import glob
+import flask_socketio
import click
from asl_rulebook2.webapp import app, globvars
@@ -22,16 +23,16 @@ def main( bind_addr, data_dir, force_init_delay, flask_debug ):
"""Run the webapp server."""
# initialize
- port = None
+ flask_port = None
if bind_addr:
words = bind_addr.split( ":" )
- host = words[0]
+ flask_host = words[0]
if len(words) > 1:
- port = words[1]
+ flask_port = words[1]
else:
- host = app.config.get( "FLASK_HOST", "localhost" )
- if not port:
- port = app.config.get( "FLASK_PORT_NO" )
+ flask_host = app.config.get( "FLASK_HOST", "localhost" )
+ if not flask_port:
+ flask_port = app.config.get( "FLASK_PORT_NO" )
if not flask_debug:
flask_debug = app.config.get( "FLASK_DEBUG", False )
@@ -42,9 +43,9 @@ def main( bind_addr, data_dir, force_init_delay, flask_debug ):
app.config["DATA_DIR"] = data_dir
# validate the configuration
- if not host:
+ if not flask_host:
raise RuntimeError( "The server host was not set." )
- if not port:
+ if not flask_port:
raise RuntimeError( "The server port was not set." )
# monitor extra files for changes
@@ -75,36 +76,47 @@ def main( bind_addr, data_dir, force_init_delay, flask_debug ):
if force_init_delay > 0:
def _start_server():
time.sleep( force_init_delay )
- url = "http://{}:{}".format( host, port )
+ url = "http://{}:{}".format( flask_host, flask_port )
_ = urllib.request.urlopen( url )
threading.Thread( target=_start_server, daemon=True ).start()
- # check if the user needs to prepare their data files
- if not app.config.get( "DATA_DIR" ):
- # yup - initialize the socketio server
- init_prepare_socketio( app )
-
# run the server
- app.run( host=host, port=port, debug=flask_debug,
- extra_files = extra_files
- )
+ run_server( flask_host, flask_port, flask_debug, extra_files )
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-def init_prepare_socketio( flask_app ):
- """Initialize the socketio server needed to prepare the data files."""
- # NOTE: We only set this up if it's needed (i.e. because there is no data directory,
- # and the user needs to prepare their data files), rather than always having it running
- # on the off-chance that the user might need it :-/
- # NOTE: socketio doesn't really work well with threads, and it's tricky to get it to
- # send events to the client if we're using e.g. eventlet:
- # https://stackoverflow.com/questions/43801884/how-to-run-python-socketio-in-thread
- # https://python-socketio.readthedocs.io/en/latest/server.html#standard-threads
- # Using native threads is less-performant, but it's not an issue for us, and it works :-/
- import socketio
- sio = socketio.Server( async_mode="threading" )
- flask_app.wsgi_app = socketio.WSGIApp( sio, flask_app.wsgi_app )
+def run_server( host, port, debug, extra_files=None ):
+ """Run the webapp server."""
+
+ # NOTE: flask-socketio + eventlet handles concurrency differently to the Flask development server,
+ # and we need to remain responsive, otherwise pings from the socketio client will timeout, and it will
+ # disconnect (and show a big warning in the UI that the server has gone away). To avoid this,
+ # we relinquish the CPU regularly, but just in case, we increase the ping timeout (and allow the user
+ # to increase it even further, if necessary). This should only be an issue when preparing the data files,
+ # since the main program doesn't use socketio.
+ # NOTE: Setting the timeout high shouldn't be a problem, since if the server really does go away,
+ # the connection will be dropped, and the front-end Javascript will detect that immediately.
+ ping_timeout = app.config.get( "SOCKETIO_PING_TIMEOUT", 30 )
+
+ # run the server
+ sio = flask_socketio.SocketIO( app,
+ async_mode = "eventlet",
+ ping_timeout = ping_timeout
+ )
globvars.socketio_server = sio
+ args = {
+ "debug": debug,
+ "log_output": False
+ }
+ if extra_files:
+ args.update( {
+ "use_reloader": True,
+ "reloader_options": { "extra_files": extra_files },
+ } )
+ sio.run( app,
+ host=host, port=port,
+ **args
+ )
# ---------------------------------------------------------------------
diff --git a/asl_rulebook2/webapp/static/prepare.js b/asl_rulebook2/webapp/static/prepare.js
index 8b89ec8..0e536be 100644
--- a/asl_rulebook2/webapp/static/prepare.js
+++ b/asl_rulebook2/webapp/static/prepare.js
@@ -308,7 +308,7 @@ gPrepareApp.component( "download-panel", {
./run-container.sh --data ...
If you want to make changes permanent (so they happen if you redo this preparation process), check out the files in $/asl_rulebook2/extract/data/.