Changed how the webapp is served.

master
Pacman Ghost 2 years ago
parent d1297d8db8
commit 52b0d34a51
  1. 65
      asl_rulebook2/bin/fixup_mmp_pdf.py
  2. 38
      asl_rulebook2/bin/prepare_pdf.py
  3. 3
      asl_rulebook2/webapp/__init__.py
  4. 44
      asl_rulebook2/webapp/prepare.py
  5. 72
      asl_rulebook2/webapp/run_server.py
  6. 2
      asl_rulebook2/webapp/static/prepare.js
  7. 8
      conftest.py
  8. 6
      requirements.txt

@ -2,6 +2,8 @@
""" Fixup issues in the MMP eASLRB. """
import os
import threading
import time
from pikepdf import Pdf, Page, OutlineItem, Encryption, make_page_destination
import click
@ -10,7 +12,7 @@ from asl_rulebook2.utils import log_msg_stderr
# ---------------------------------------------------------------------
def fixup_mmp_pdf( fname, output_fname, fix_zoom, optimize_web, rotate, log=None ):
def fixup_mmp_pdf( fname, output_fname, fix_zoom, optimize_web, rotate, log=None, relinq=None ):
"""Fixup the MMP eASLRB PDF."""
# NOTE: v1.03 had problems with links within the PDF being of type /Fit rather than /XYZ,
@ -91,15 +93,28 @@ def fixup_mmp_pdf( fname, output_fname, fix_zoom, optimize_web, rotate, log=None
# save the updated PDF
log_msg( "progress", "Saving the fixed-up PDF..." )
# NOTE: Setting a blank password will encrypt the file, but doesn't require the user to enter a password
# when opening the file (but it will be marked as "SECURE" in the UI).
# NOTE: Setting a blank password will encrypt the file, but doesn't require the user
# to enter a password when opening the file (but it will be marked as "SECURE" in the UI).
enc = Encryption( owner="", user="" )
def save_progress( pct ):
if pct > 0 and pct % 10 == 0:
log_msg( "verbose", "- Saved {}%.", pct )
pdf.save( output_fname, encryption=enc, linearize=optimize_web,
progress = save_progress
# NOTE: We can't log progress messages if we're being run from the webapp, since log_msg()
# will try to relinquish the CPU, but it will be in the wrong thread. We could disable this,
# but it's more trouble than it's worth.
thread = SavePdfThread( pdf,
output_fname, enc, optimize_web,
log_msg = None if relinq else log_msg
)
thread.start()
pass_no = 0
while True:
if thread.done:
break
pass_no += 1
if relinq:
relinq( "Saving PDF: {}".format( pass_no ), delay=1 )
else:
time.sleep( 1 )
if thread.exc:
raise thread.exc
# compare the file sizes
old_size = os.path.getsize( fname )
@ -112,6 +127,40 @@ def fixup_mmp_pdf( fname, output_fname, fix_zoom, optimize_web, rotate, log=None
abs(ratio), "larger" if ratio > 0 else "smaller"
)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
class SavePdfThread( threading.Thread ):
"""Save the PDF in a background thread."""
def __init__( self, pdf, fname, enc, optimize_web, log_msg ):
# initialize
super().__init__( daemon=True )
self.pdf = pdf
self.fname = fname
self.enc = enc
self.optimize_web = optimize_web
self._log_msg = log_msg
# initialize
self.done = False
self.exc = None
def run( self ):
"""Run the worker thread."""
try:
self.pdf.save( self.fname,
encryption=self.enc, linearize=self.optimize_web,
progress=self._log_progress
)
except Exception as ex: #pylint: disable=broad-except
self.exc = ex
finally:
self.done = True
def _log_progress( self, pct ):
"""Log progress."""
if self._log_msg and pct > 0 and pct % 10 == 0:
self._log_msg( "verbose", "- Saved {}%.", pct )
# ---------------------------------------------------------------------
@click.command()

@ -23,7 +23,12 @@ _COMPRESSION_CHOICES = [
# ---------------------------------------------------------------------
def prepare_pdf( pdf_file, title, targets_fname, vo_notes_fname, yoffset, output_fname, compression, gs_path, log_msg ):
def prepare_pdf( pdf_file,
title, targets_fname, vo_notes_fname, yoffset,
output_fname, compression,
gs_path,
log_msg, relinq=None
):
"""Prepare the MMP eASLRB PDF."""
# load the targets
@ -47,7 +52,7 @@ def prepare_pdf( pdf_file, title, targets_fname, vo_notes_fname, yoffset, output
pdf_file
]
start_time = time.time()
subprocess.run( args, check=True )
_run_subprocess( args, "compression", relinq )
elapsed_time = time.time() - start_time
log_msg( "timestamp", "- Elapsed time: {}".format(
datetime.timedelta( seconds=int(elapsed_time) ) )
@ -101,7 +106,7 @@ def prepare_pdf( pdf_file, title, targets_fname, vo_notes_fname, yoffset, output
args.extend( [ "-f", pdf_file ] )
args.append( pdfmarks_file.name )
start_time = time.time()
subprocess.run( args, check=True )
_run_subprocess( args, "pdfmarks", relinq )
elapsed_time = time.time() - start_time
log_msg( "timestamp", "- Elapsed time: {}".format(
datetime.timedelta( seconds=int(elapsed_time) ) )
@ -109,6 +114,33 @@ def prepare_pdf( pdf_file, title, targets_fname, vo_notes_fname, yoffset, output
# ---------------------------------------------------------------------
def _run_subprocess( args, caption, relinq ):
"""Run an external process."""
proc = subprocess.Popen( args )
try:
pass_no = 0
while True:
pass_no += 1
# check if the external process has finished
rc = proc.poll()
if rc is not None:
# yup - check its exit code
if rc != 0:
raise RuntimeError( "Sub-process \"{}\" failed: rc={}".format( caption, rc ) )
break
# delay for a bit before checking again
if relinq:
relinq( "Waiting for {}: {}".format( caption, pass_no ), delay=1 )
else:
time.sleep( 1 )
except ( Exception, KeyboardInterrupt ):
# NOTE: We want to kill the child process if something goes wrong, and while it's not
# 100%-guaranteed that we will get here (e.g. if we get killed), it's good enuf.
proc.terminate()
raise
# ---------------------------------------------------------------------
@click.command()
@click.argument( "pdf_file", nargs=1, type=click.Path(exists=True,dir_okay=False) )
@click.option( "--title", help="Document title." )

@ -50,9 +50,6 @@ def _on_sigint( signum, stack ): #pylint: disable=unused-argument
# ---------------------------------------------------------------------
# disable the Flask startup banner
flask.cli.show_server_banner = lambda *args: None
# initialize Flask
app = Flask( __name__ )

@ -1,9 +1,7 @@
""" Analyze the MMP eASLRB PDF and prepare the data files. """
import threading
import zipfile
import io
import time
import base64
import traceback
import logging
@ -33,21 +31,20 @@ def prepare_data_files():
download_url = url_for( "download_prepared_data" )
# initialize the socketio server
# NOTE: We wait until the client tells us to start processing (instead of when the POST data arrives),
# since it might not be ready to receive events, and miss the first few.
sio = globvars.socketio_server
if not sio:
raise RuntimeError( "The socketio server has not been started." )
@sio.on( "start" )
def on_start( data ): #pylint: disable=unused-variable,unused-argument
# start the worker thread that prepares the data files
# NOTE: We don't do this when the POST request comes in, but wait until the client
# tells us it's ready (otherwise, it might miss the first event or two).
def worker():
try:
_do_prepare_data_files( args, download_url )
except Exception as ex: #pylint: disable=broad-except
_logger.error( "PREPARE ERROR: %s\n%s", ex, traceback.format_exc() )
globvars.socketio_server.emit( "error", str(ex) )
threading.Thread( target=worker, daemon=True ).start()
def on_start(): #pylint: disable=unused-variable
# NOTE: We used to do this in a background thread (when we were using the Flask development server),
# but flask-socketio + eventlet handles concurrency differently, and we now do it synchronously,
# and periodically relinquish the CPU, so that we remain responsive (otherwise the client pings timeout,
# and it disconnects).
try:
_do_prepare_data_files( args, download_url )
except Exception as ex: #pylint: disable=broad-except
_logger.error( "PREPARE ERROR: %s\n%s", ex, traceback.format_exc() )
globvars.socketio_server.emit( "error", str(ex) )
return "ok"
@ -100,6 +97,11 @@ def _do_prepare_data_files( args, download_url ):
msg = msg[2:]
sio.emit( msg_type, msg )
msg_types.add( msg_type )
# NOTE: There's no particular significance in relinquishing the CPU here, but this function
# is called regularly during processing, so it's a convenient place to do it.
# This function also gets passed into the low-level extract code (as a logging handler),
# which results in that code also relinquishing at regular intervals.
_relinq( msg )
# NOTE: The plan was to allow the user to change the default parameters in the UI,
# but this can be done (ahem) later. For now, if they really need to change something,
@ -143,7 +145,8 @@ def _do_prepare_data_files( args, download_url ):
targets_file.name, vo_notes_file.name, 5,
prepared_file.name, "ebook",
gs_path,
log_msg
log_msg,
relinq = _relinq
)
# fixup the PDF
@ -153,7 +156,8 @@ def _do_prepare_data_files( args, download_url ):
fixup_mmp_pdf( prepared_file.name,
fixedup_file.name,
False, True, True,
log_msg
log_msg,
relinq = _relinq
)
# read the final PDF data
with open( fixedup_file.name, "rb" ) as fp:
@ -179,6 +183,10 @@ def _do_prepare_data_files( args, download_url ):
# NOTE: We don't bother shutting down the socketio server, since the user
# has to restart the server, using the newly-prepared data files.
def _relinq( msg=None, delay=0 ): #pylint: disable=unused-argument
"""Relinquish the CPU (to keep the webapp server responsive)."""
globvars.socketio_server.sleep( delay )
# ---------------------------------------------------------------------
@app.route( "/prepare/download" )
@ -215,5 +223,5 @@ def _test_progress( npasses=100, status=10, warnings=None, errors=None, delay=0.
sio.emit( "error", "Progress {}: error".format( 1+i ) )
else:
sio.emit( "progress", "Progress {}.".format( 1+i ) )
time.sleep( float( delay ) )
_relinq( delay=float(delay) )
sio.emit( "done" )

@ -7,6 +7,7 @@ import urllib.request
import time
import glob
import flask_socketio
import click
from asl_rulebook2.webapp import app, globvars
@ -22,16 +23,16 @@ def main( bind_addr, data_dir, force_init_delay, flask_debug ):
"""Run the webapp server."""
# initialize
port = None
flask_port = None
if bind_addr:
words = bind_addr.split( ":" )
host = words[0]
flask_host = words[0]
if len(words) > 1:
port = words[1]
flask_port = words[1]
else:
host = app.config.get( "FLASK_HOST", "localhost" )
if not port:
port = app.config.get( "FLASK_PORT_NO" )
flask_host = app.config.get( "FLASK_HOST", "localhost" )
if not flask_port:
flask_port = app.config.get( "FLASK_PORT_NO" )
if not flask_debug:
flask_debug = app.config.get( "FLASK_DEBUG", False )
@ -42,9 +43,9 @@ def main( bind_addr, data_dir, force_init_delay, flask_debug ):
app.config["DATA_DIR"] = data_dir
# validate the configuration
if not host:
if not flask_host:
raise RuntimeError( "The server host was not set." )
if not port:
if not flask_port:
raise RuntimeError( "The server port was not set." )
# monitor extra files for changes
@ -75,36 +76,47 @@ def main( bind_addr, data_dir, force_init_delay, flask_debug ):
if force_init_delay > 0:
def _start_server():
time.sleep( force_init_delay )
url = "http://{}:{}".format( host, port )
url = "http://{}:{}".format( flask_host, flask_port )
_ = urllib.request.urlopen( url )
threading.Thread( target=_start_server, daemon=True ).start()
# check if the user needs to prepare their data files
if not app.config.get( "DATA_DIR" ):
# yup - initialize the socketio server
init_prepare_socketio( app )
# run the server
app.run( host=host, port=port, debug=flask_debug,
extra_files = extra_files
)
run_server( flask_host, flask_port, flask_debug, extra_files )
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
def init_prepare_socketio( flask_app ):
"""Initialize the socketio server needed to prepare the data files."""
# NOTE: We only set this up if it's needed (i.e. because there is no data directory,
# and the user needs to prepare their data files), rather than always having it running
# on the off-chance that the user might need it :-/
# NOTE: socketio doesn't really work well with threads, and it's tricky to get it to
# send events to the client if we're using e.g. eventlet:
# https://stackoverflow.com/questions/43801884/how-to-run-python-socketio-in-thread
# https://python-socketio.readthedocs.io/en/latest/server.html#standard-threads
# Using native threads is less-performant, but it's not an issue for us, and it works :-/
import socketio
sio = socketio.Server( async_mode="threading" )
flask_app.wsgi_app = socketio.WSGIApp( sio, flask_app.wsgi_app )
def run_server( host, port, debug, extra_files=None ):
"""Run the webapp server."""
# NOTE: flask-socketio + eventlet handles concurrency differently to the Flask development server,
# and we need to remain responsive, otherwise pings from the socketio client will timeout, and it will
# disconnect (and show a big warning in the UI that the server has gone away). To avoid this,
# we relinquish the CPU regularly, but just in case, we increase the ping timeout (and allow the user
# to increase it even further, if necessary). This should only be an issue when preparing the data files,
# since the main program doesn't use socketio.
# NOTE: Setting the timeout high shouldn't be a problem, since if the server really does go away,
# the connection will be dropped, and the front-end Javascript will detect that immediately.
ping_timeout = app.config.get( "SOCKETIO_PING_TIMEOUT", 30 )
# run the server
sio = flask_socketio.SocketIO( app,
async_mode = "eventlet",
ping_timeout = ping_timeout
)
globvars.socketio_server = sio
args = {
"debug": debug,
"log_output": False
}
if extra_files:
args.update( {
"use_reloader": True,
"reloader_options": { "extra_files": extra_files },
} )
sio.run( app,
host=host, port=port,
**args
)
# ---------------------------------------------------------------------

@ -308,7 +308,7 @@ gPrepareApp.component( "download-panel", {
<code> ./run-container.sh --data ... </code>
</div>
<div class="info">
You can edit these files directly, if you want to make changes.
You can edit the generated data files directly, if you want to make changes.
<p> If you want to make changes permanent (so they happen if you redo this preparation process), check out the files in <span class="pre">$/asl_rulebook2/extract/data/</span>. </p>
</div>
</div>`,

@ -13,6 +13,7 @@ import pytest
from flask import url_for
from asl_rulebook2.webapp import app
from asl_rulebook2.webapp.run_server import run_server
from asl_rulebook2.webapp.tests.control_tests import ControlTests
from asl_rulebook2.webapp.tests.utils import wait_for
@ -140,11 +141,6 @@ def _make_webapp():
else:
app.config.pop( "FORCE_CACHED_SEARCHDB", None )
app.config[ "IGNORE_MISSING_DATA_FILES" ] = True
# check if we will be running the prepare tests
if _pytest_options.enable_prepare:
# yup - initialize the socketio server
from asl_rulebook2.webapp.run_server import init_prepare_socketio
init_prepare_socketio( app )
# NOTE: We run the server thread as a daemon so that it won't prevent the tests from finishing
# when they're done. However, this makes it difficult to know when to shut the server down,
# and, in particular, clean up the gRPC service. We send an EndTests message at the end of each test,
@ -152,7 +148,7 @@ def _make_webapp():
# or otherwise finish eearly before they get a chance to send the EndTests message), but we can
# live with it.
thread = threading.Thread(
target = lambda: app.run( host="0.0.0.0", port=_FLASK_WEBAPP_PORT, use_reloader=False ),
target = lambda: run_server( "0.0.0.0", _FLASK_WEBAPP_PORT, False ),
daemon = True
)
thread.start()

@ -1,13 +1,11 @@
# python 3.8.7
flask==1.1.2
python-socketio==5.2.1
flask-socketio==5.1.1
eventlet==0.33.0
pyyaml==5.4.1
lxml==4.6.2
click==7.1.2
pdfminer.six==20201018
pikepdf==2.5.2
# NOTE: This is needed for socketio when using the "threading" async mode.
simple-websocket==0.2.0

Loading…
Cancel
Save