Changed how the webapp is served.

master
Pacman Ghost 3 years ago
parent d1297d8db8
commit 52b0d34a51
  1. 65
      asl_rulebook2/bin/fixup_mmp_pdf.py
  2. 38
      asl_rulebook2/bin/prepare_pdf.py
  3. 3
      asl_rulebook2/webapp/__init__.py
  4. 44
      asl_rulebook2/webapp/prepare.py
  5. 72
      asl_rulebook2/webapp/run_server.py
  6. 2
      asl_rulebook2/webapp/static/prepare.js
  7. 8
      conftest.py
  8. 6
      requirements.txt

@ -2,6 +2,8 @@
""" Fixup issues in the MMP eASLRB. """ """ Fixup issues in the MMP eASLRB. """
import os import os
import threading
import time
from pikepdf import Pdf, Page, OutlineItem, Encryption, make_page_destination from pikepdf import Pdf, Page, OutlineItem, Encryption, make_page_destination
import click import click
@ -10,7 +12,7 @@ from asl_rulebook2.utils import log_msg_stderr
# --------------------------------------------------------------------- # ---------------------------------------------------------------------
def fixup_mmp_pdf( fname, output_fname, fix_zoom, optimize_web, rotate, log=None ): def fixup_mmp_pdf( fname, output_fname, fix_zoom, optimize_web, rotate, log=None, relinq=None ):
"""Fixup the MMP eASLRB PDF.""" """Fixup the MMP eASLRB PDF."""
# NOTE: v1.03 had problems with links within the PDF being of type /Fit rather than /XYZ, # NOTE: v1.03 had problems with links within the PDF being of type /Fit rather than /XYZ,
@ -91,15 +93,28 @@ def fixup_mmp_pdf( fname, output_fname, fix_zoom, optimize_web, rotate, log=None
# save the updated PDF # save the updated PDF
log_msg( "progress", "Saving the fixed-up PDF..." ) log_msg( "progress", "Saving the fixed-up PDF..." )
# NOTE: Setting a blank password will encrypt the file, but doesn't require the user to enter a password # NOTE: Setting a blank password will encrypt the file, but doesn't require the user
# when opening the file (but it will be marked as "SECURE" in the UI). # to enter a password when opening the file (but it will be marked as "SECURE" in the UI).
enc = Encryption( owner="", user="" ) enc = Encryption( owner="", user="" )
def save_progress( pct ): # NOTE: We can't log progress messages if we're being run from the webapp, since log_msg()
if pct > 0 and pct % 10 == 0: # will try to relinquish the CPU, but it will be in the wrong thread. We could disable this,
log_msg( "verbose", "- Saved {}%.", pct ) # but it's more trouble than it's worth.
pdf.save( output_fname, encryption=enc, linearize=optimize_web, thread = SavePdfThread( pdf,
progress = save_progress output_fname, enc, optimize_web,
log_msg = None if relinq else log_msg
) )
thread.start()
pass_no = 0
while True:
if thread.done:
break
pass_no += 1
if relinq:
relinq( "Saving PDF: {}".format( pass_no ), delay=1 )
else:
time.sleep( 1 )
if thread.exc:
raise thread.exc
# compare the file sizes # compare the file sizes
old_size = os.path.getsize( fname ) old_size = os.path.getsize( fname )
@ -112,6 +127,40 @@ def fixup_mmp_pdf( fname, output_fname, fix_zoom, optimize_web, rotate, log=None
abs(ratio), "larger" if ratio > 0 else "smaller" abs(ratio), "larger" if ratio > 0 else "smaller"
) )
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
class SavePdfThread( threading.Thread ):
"""Save the PDF in a background thread."""
def __init__( self, pdf, fname, enc, optimize_web, log_msg ):
# initialize
super().__init__( daemon=True )
self.pdf = pdf
self.fname = fname
self.enc = enc
self.optimize_web = optimize_web
self._log_msg = log_msg
# initialize
self.done = False
self.exc = None
def run( self ):
"""Run the worker thread."""
try:
self.pdf.save( self.fname,
encryption=self.enc, linearize=self.optimize_web,
progress=self._log_progress
)
except Exception as ex: #pylint: disable=broad-except
self.exc = ex
finally:
self.done = True
def _log_progress( self, pct ):
"""Log progress."""
if self._log_msg and pct > 0 and pct % 10 == 0:
self._log_msg( "verbose", "- Saved {}%.", pct )
# --------------------------------------------------------------------- # ---------------------------------------------------------------------
@click.command() @click.command()

@ -23,7 +23,12 @@ _COMPRESSION_CHOICES = [
# --------------------------------------------------------------------- # ---------------------------------------------------------------------
def prepare_pdf( pdf_file, title, targets_fname, vo_notes_fname, yoffset, output_fname, compression, gs_path, log_msg ): def prepare_pdf( pdf_file,
title, targets_fname, vo_notes_fname, yoffset,
output_fname, compression,
gs_path,
log_msg, relinq=None
):
"""Prepare the MMP eASLRB PDF.""" """Prepare the MMP eASLRB PDF."""
# load the targets # load the targets
@ -47,7 +52,7 @@ def prepare_pdf( pdf_file, title, targets_fname, vo_notes_fname, yoffset, output
pdf_file pdf_file
] ]
start_time = time.time() start_time = time.time()
subprocess.run( args, check=True ) _run_subprocess( args, "compression", relinq )
elapsed_time = time.time() - start_time elapsed_time = time.time() - start_time
log_msg( "timestamp", "- Elapsed time: {}".format( log_msg( "timestamp", "- Elapsed time: {}".format(
datetime.timedelta( seconds=int(elapsed_time) ) ) datetime.timedelta( seconds=int(elapsed_time) ) )
@ -101,7 +106,7 @@ def prepare_pdf( pdf_file, title, targets_fname, vo_notes_fname, yoffset, output
args.extend( [ "-f", pdf_file ] ) args.extend( [ "-f", pdf_file ] )
args.append( pdfmarks_file.name ) args.append( pdfmarks_file.name )
start_time = time.time() start_time = time.time()
subprocess.run( args, check=True ) _run_subprocess( args, "pdfmarks", relinq )
elapsed_time = time.time() - start_time elapsed_time = time.time() - start_time
log_msg( "timestamp", "- Elapsed time: {}".format( log_msg( "timestamp", "- Elapsed time: {}".format(
datetime.timedelta( seconds=int(elapsed_time) ) ) datetime.timedelta( seconds=int(elapsed_time) ) )
@ -109,6 +114,33 @@ def prepare_pdf( pdf_file, title, targets_fname, vo_notes_fname, yoffset, output
# --------------------------------------------------------------------- # ---------------------------------------------------------------------
def _run_subprocess( args, caption, relinq ):
"""Run an external process."""
proc = subprocess.Popen( args )
try:
pass_no = 0
while True:
pass_no += 1
# check if the external process has finished
rc = proc.poll()
if rc is not None:
# yup - check its exit code
if rc != 0:
raise RuntimeError( "Sub-process \"{}\" failed: rc={}".format( caption, rc ) )
break
# delay for a bit before checking again
if relinq:
relinq( "Waiting for {}: {}".format( caption, pass_no ), delay=1 )
else:
time.sleep( 1 )
except ( Exception, KeyboardInterrupt ):
# NOTE: We want to kill the child process if something goes wrong, and while it's not
# 100%-guaranteed that we will get here (e.g. if we get killed), it's good enuf.
proc.terminate()
raise
# ---------------------------------------------------------------------
@click.command() @click.command()
@click.argument( "pdf_file", nargs=1, type=click.Path(exists=True,dir_okay=False) ) @click.argument( "pdf_file", nargs=1, type=click.Path(exists=True,dir_okay=False) )
@click.option( "--title", help="Document title." ) @click.option( "--title", help="Document title." )

@ -50,9 +50,6 @@ def _on_sigint( signum, stack ): #pylint: disable=unused-argument
# --------------------------------------------------------------------- # ---------------------------------------------------------------------
# disable the Flask startup banner
flask.cli.show_server_banner = lambda *args: None
# initialize Flask # initialize Flask
app = Flask( __name__ ) app = Flask( __name__ )

@ -1,9 +1,7 @@
""" Analyze the MMP eASLRB PDF and prepare the data files. """ """ Analyze the MMP eASLRB PDF and prepare the data files. """
import threading
import zipfile import zipfile
import io import io
import time
import base64 import base64
import traceback import traceback
import logging import logging
@ -33,21 +31,20 @@ def prepare_data_files():
download_url = url_for( "download_prepared_data" ) download_url = url_for( "download_prepared_data" )
# initialize the socketio server # initialize the socketio server
# NOTE: We wait until the client tells us to start processing (instead of when the POST data arrives),
# since it might not be ready to receive events, and miss the first few.
sio = globvars.socketio_server sio = globvars.socketio_server
if not sio:
raise RuntimeError( "The socketio server has not been started." )
@sio.on( "start" ) @sio.on( "start" )
def on_start( data ): #pylint: disable=unused-variable,unused-argument def on_start(): #pylint: disable=unused-variable
# start the worker thread that prepares the data files # NOTE: We used to do this in a background thread (when we were using the Flask development server),
# NOTE: We don't do this when the POST request comes in, but wait until the client # but flask-socketio + eventlet handles concurrency differently, and we now do it synchronously,
# tells us it's ready (otherwise, it might miss the first event or two). # and periodically relinquish the CPU, so that we remain responsive (otherwise the client pings timeout,
def worker(): # and it disconnects).
try: try:
_do_prepare_data_files( args, download_url ) _do_prepare_data_files( args, download_url )
except Exception as ex: #pylint: disable=broad-except except Exception as ex: #pylint: disable=broad-except
_logger.error( "PREPARE ERROR: %s\n%s", ex, traceback.format_exc() ) _logger.error( "PREPARE ERROR: %s\n%s", ex, traceback.format_exc() )
globvars.socketio_server.emit( "error", str(ex) ) globvars.socketio_server.emit( "error", str(ex) )
threading.Thread( target=worker, daemon=True ).start()
return "ok" return "ok"
@ -100,6 +97,11 @@ def _do_prepare_data_files( args, download_url ):
msg = msg[2:] msg = msg[2:]
sio.emit( msg_type, msg ) sio.emit( msg_type, msg )
msg_types.add( msg_type ) msg_types.add( msg_type )
# NOTE: There's no particular significance in relinquishing the CPU here, but this function
# is called regularly during processing, so it's a convenient place to do it.
# This function also gets passed into the low-level extract code (as a logging handler),
# which results in that code also relinquishing at regular intervals.
_relinq( msg )
# NOTE: The plan was to allow the user to change the default parameters in the UI, # NOTE: The plan was to allow the user to change the default parameters in the UI,
# but this can be done (ahem) later. For now, if they really need to change something, # but this can be done (ahem) later. For now, if they really need to change something,
@ -143,7 +145,8 @@ def _do_prepare_data_files( args, download_url ):
targets_file.name, vo_notes_file.name, 5, targets_file.name, vo_notes_file.name, 5,
prepared_file.name, "ebook", prepared_file.name, "ebook",
gs_path, gs_path,
log_msg log_msg,
relinq = _relinq
) )
# fixup the PDF # fixup the PDF
@ -153,7 +156,8 @@ def _do_prepare_data_files( args, download_url ):
fixup_mmp_pdf( prepared_file.name, fixup_mmp_pdf( prepared_file.name,
fixedup_file.name, fixedup_file.name,
False, True, True, False, True, True,
log_msg log_msg,
relinq = _relinq
) )
# read the final PDF data # read the final PDF data
with open( fixedup_file.name, "rb" ) as fp: with open( fixedup_file.name, "rb" ) as fp:
@ -179,6 +183,10 @@ def _do_prepare_data_files( args, download_url ):
# NOTE: We don't bother shutting down the socketio server, since the user # NOTE: We don't bother shutting down the socketio server, since the user
# has to restart the server, using the newly-prepared data files. # has to restart the server, using the newly-prepared data files.
def _relinq( msg=None, delay=0 ): #pylint: disable=unused-argument
"""Relinquish the CPU (to keep the webapp server responsive)."""
globvars.socketio_server.sleep( delay )
# --------------------------------------------------------------------- # ---------------------------------------------------------------------
@app.route( "/prepare/download" ) @app.route( "/prepare/download" )
@ -215,5 +223,5 @@ def _test_progress( npasses=100, status=10, warnings=None, errors=None, delay=0.
sio.emit( "error", "Progress {}: error".format( 1+i ) ) sio.emit( "error", "Progress {}: error".format( 1+i ) )
else: else:
sio.emit( "progress", "Progress {}.".format( 1+i ) ) sio.emit( "progress", "Progress {}.".format( 1+i ) )
time.sleep( float( delay ) ) _relinq( delay=float(delay) )
sio.emit( "done" ) sio.emit( "done" )

@ -7,6 +7,7 @@ import urllib.request
import time import time
import glob import glob
import flask_socketio
import click import click
from asl_rulebook2.webapp import app, globvars from asl_rulebook2.webapp import app, globvars
@ -22,16 +23,16 @@ def main( bind_addr, data_dir, force_init_delay, flask_debug ):
"""Run the webapp server.""" """Run the webapp server."""
# initialize # initialize
port = None flask_port = None
if bind_addr: if bind_addr:
words = bind_addr.split( ":" ) words = bind_addr.split( ":" )
host = words[0] flask_host = words[0]
if len(words) > 1: if len(words) > 1:
port = words[1] flask_port = words[1]
else: else:
host = app.config.get( "FLASK_HOST", "localhost" ) flask_host = app.config.get( "FLASK_HOST", "localhost" )
if not port: if not flask_port:
port = app.config.get( "FLASK_PORT_NO" ) flask_port = app.config.get( "FLASK_PORT_NO" )
if not flask_debug: if not flask_debug:
flask_debug = app.config.get( "FLASK_DEBUG", False ) flask_debug = app.config.get( "FLASK_DEBUG", False )
@ -42,9 +43,9 @@ def main( bind_addr, data_dir, force_init_delay, flask_debug ):
app.config["DATA_DIR"] = data_dir app.config["DATA_DIR"] = data_dir
# validate the configuration # validate the configuration
if not host: if not flask_host:
raise RuntimeError( "The server host was not set." ) raise RuntimeError( "The server host was not set." )
if not port: if not flask_port:
raise RuntimeError( "The server port was not set." ) raise RuntimeError( "The server port was not set." )
# monitor extra files for changes # monitor extra files for changes
@ -75,36 +76,47 @@ def main( bind_addr, data_dir, force_init_delay, flask_debug ):
if force_init_delay > 0: if force_init_delay > 0:
def _start_server(): def _start_server():
time.sleep( force_init_delay ) time.sleep( force_init_delay )
url = "http://{}:{}".format( host, port ) url = "http://{}:{}".format( flask_host, flask_port )
_ = urllib.request.urlopen( url ) _ = urllib.request.urlopen( url )
threading.Thread( target=_start_server, daemon=True ).start() threading.Thread( target=_start_server, daemon=True ).start()
# check if the user needs to prepare their data files
if not app.config.get( "DATA_DIR" ):
# yup - initialize the socketio server
init_prepare_socketio( app )
# run the server # run the server
app.run( host=host, port=port, debug=flask_debug, run_server( flask_host, flask_port, flask_debug, extra_files )
extra_files = extra_files
)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
def init_prepare_socketio( flask_app ): def run_server( host, port, debug, extra_files=None ):
"""Initialize the socketio server needed to prepare the data files.""" """Run the webapp server."""
# NOTE: We only set this up if it's needed (i.e. because there is no data directory,
# and the user needs to prepare their data files), rather than always having it running # NOTE: flask-socketio + eventlet handles concurrency differently to the Flask development server,
# on the off-chance that the user might need it :-/ # and we need to remain responsive, otherwise pings from the socketio client will timeout, and it will
# NOTE: socketio doesn't really work well with threads, and it's tricky to get it to # disconnect (and show a big warning in the UI that the server has gone away). To avoid this,
# send events to the client if we're using e.g. eventlet: # we relinquish the CPU regularly, but just in case, we increase the ping timeout (and allow the user
# https://stackoverflow.com/questions/43801884/how-to-run-python-socketio-in-thread # to increase it even further, if necessary). This should only be an issue when preparing the data files,
# https://python-socketio.readthedocs.io/en/latest/server.html#standard-threads # since the main program doesn't use socketio.
# Using native threads is less-performant, but it's not an issue for us, and it works :-/ # NOTE: Setting the timeout high shouldn't be a problem, since if the server really does go away,
import socketio # the connection will be dropped, and the front-end Javascript will detect that immediately.
sio = socketio.Server( async_mode="threading" ) ping_timeout = app.config.get( "SOCKETIO_PING_TIMEOUT", 30 )
flask_app.wsgi_app = socketio.WSGIApp( sio, flask_app.wsgi_app )
# run the server
sio = flask_socketio.SocketIO( app,
async_mode = "eventlet",
ping_timeout = ping_timeout
)
globvars.socketio_server = sio globvars.socketio_server = sio
args = {
"debug": debug,
"log_output": False
}
if extra_files:
args.update( {
"use_reloader": True,
"reloader_options": { "extra_files": extra_files },
} )
sio.run( app,
host=host, port=port,
**args
)
# --------------------------------------------------------------------- # ---------------------------------------------------------------------

@ -308,7 +308,7 @@ gPrepareApp.component( "download-panel", {
<code> ./run-container.sh --data ... </code> <code> ./run-container.sh --data ... </code>
</div> </div>
<div class="info"> <div class="info">
You can edit these files directly, if you want to make changes. You can edit the generated data files directly, if you want to make changes.
<p> If you want to make changes permanent (so they happen if you redo this preparation process), check out the files in <span class="pre">$/asl_rulebook2/extract/data/</span>. </p> <p> If you want to make changes permanent (so they happen if you redo this preparation process), check out the files in <span class="pre">$/asl_rulebook2/extract/data/</span>. </p>
</div> </div>
</div>`, </div>`,

@ -13,6 +13,7 @@ import pytest
from flask import url_for from flask import url_for
from asl_rulebook2.webapp import app from asl_rulebook2.webapp import app
from asl_rulebook2.webapp.run_server import run_server
from asl_rulebook2.webapp.tests.control_tests import ControlTests from asl_rulebook2.webapp.tests.control_tests import ControlTests
from asl_rulebook2.webapp.tests.utils import wait_for from asl_rulebook2.webapp.tests.utils import wait_for
@ -140,11 +141,6 @@ def _make_webapp():
else: else:
app.config.pop( "FORCE_CACHED_SEARCHDB", None ) app.config.pop( "FORCE_CACHED_SEARCHDB", None )
app.config[ "IGNORE_MISSING_DATA_FILES" ] = True app.config[ "IGNORE_MISSING_DATA_FILES" ] = True
# check if we will be running the prepare tests
if _pytest_options.enable_prepare:
# yup - initialize the socketio server
from asl_rulebook2.webapp.run_server import init_prepare_socketio
init_prepare_socketio( app )
# NOTE: We run the server thread as a daemon so that it won't prevent the tests from finishing # NOTE: We run the server thread as a daemon so that it won't prevent the tests from finishing
# when they're done. However, this makes it difficult to know when to shut the server down, # when they're done. However, this makes it difficult to know when to shut the server down,
# and, in particular, clean up the gRPC service. We send an EndTests message at the end of each test, # and, in particular, clean up the gRPC service. We send an EndTests message at the end of each test,
@ -152,7 +148,7 @@ def _make_webapp():
# or otherwise finish eearly before they get a chance to send the EndTests message), but we can # or otherwise finish eearly before they get a chance to send the EndTests message), but we can
# live with it. # live with it.
thread = threading.Thread( thread = threading.Thread(
target = lambda: app.run( host="0.0.0.0", port=_FLASK_WEBAPP_PORT, use_reloader=False ), target = lambda: run_server( "0.0.0.0", _FLASK_WEBAPP_PORT, False ),
daemon = True daemon = True
) )
thread.start() thread.start()

@ -1,13 +1,11 @@
# python 3.8.7 # python 3.8.7
flask==1.1.2 flask==1.1.2
python-socketio==5.2.1 flask-socketio==5.1.1
eventlet==0.33.0
pyyaml==5.4.1 pyyaml==5.4.1
lxml==4.6.2 lxml==4.6.2
click==7.1.2 click==7.1.2
pdfminer.six==20201018 pdfminer.six==20201018
pikepdf==2.5.2 pikepdf==2.5.2
# NOTE: This is needed for socketio when using the "threading" async mode.
simple-websocket==0.2.0

Loading…
Cancel
Save