You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
227 lines
8.7 KiB
227 lines
8.7 KiB
""" Analyze the MMP eASLRB PDF and prepare the data files. """
|
|
|
|
import zipfile
|
|
import io
|
|
import base64
|
|
import traceback
|
|
import logging
|
|
|
|
from flask import request, send_file, abort, url_for
|
|
|
|
from asl_rulebook2.extract.all import ExtractAll
|
|
from asl_rulebook2.bin.prepare_pdf import prepare_pdf
|
|
from asl_rulebook2.bin.fixup_mmp_pdf import fixup_mmp_pdf
|
|
from asl_rulebook2.pdf import PdfDoc
|
|
from asl_rulebook2.utils import TempFile
|
|
from asl_rulebook2.webapp import app, globvars
|
|
from asl_rulebook2.webapp.utils import get_gs_path
|
|
|
|
_zip_data_download = None
|
|
|
|
_logger = logging.getLogger( "prepare" )
|
|
|
|
# ---------------------------------------------------------------------
|
|
|
|
@app.route( "/prepare", methods=["POST"] )
|
|
def prepare_data_files():
|
|
"""Prepare the data files."""
|
|
|
|
# initialize
|
|
args = dict( request.json )
|
|
download_url = url_for( "download_prepared_data" )
|
|
|
|
# initialize the socketio server
|
|
# NOTE: We wait until the client tells us to start processing (instead of when the POST data arrives),
|
|
# since it might not be ready to receive events, and miss the first few.
|
|
sio = globvars.socketio_server
|
|
@sio.on( "start" )
|
|
def on_start(): #pylint: disable=unused-variable
|
|
# NOTE: We used to do this in a background thread (when we were using the Flask development server),
|
|
# but flask-socketio + eventlet handles concurrency differently, and we now do it synchronously,
|
|
# and periodically relinquish the CPU, so that we remain responsive (otherwise the client pings timeout,
|
|
# and it disconnects).
|
|
try:
|
|
_do_prepare_data_files( args, download_url )
|
|
except Exception as ex: #pylint: disable=broad-except
|
|
_logger.error( "PREPARE ERROR: %s\n%s", ex, traceback.format_exc() )
|
|
globvars.socketio_server.emit( "error", str(ex) )
|
|
|
|
return "ok"
|
|
|
|
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
|
|
def _do_prepare_data_files( args, download_url ):
|
|
|
|
# initialize
|
|
sio = globvars.socketio_server
|
|
pdf_data = args.get( "pdfData" )
|
|
if not pdf_data:
|
|
# no data was sent - this is a test of logging progress messages.
|
|
del args["pdfData"]
|
|
_test_progress( **args )
|
|
return
|
|
pdf_data = base64.b64decode( pdf_data )
|
|
|
|
def on_done( zip_data ):
|
|
global _zip_data_download
|
|
_zip_data_download = zip_data
|
|
sio.emit( "done", download_url )
|
|
|
|
# check if we should just return a pre-prepared ZIP file (for testing porpoises)
|
|
fname = app.config.get( "PREPARED_ZIP" )
|
|
if fname:
|
|
with open( fname, "rb" ) as fp:
|
|
on_done( fp.read() )
|
|
return
|
|
|
|
with TempFile() as input_file, TempFile() as prepared_file:
|
|
|
|
# save the PDF file data
|
|
input_file.write( pdf_data )
|
|
input_file.close( delete=False )
|
|
_logger.info( "Saved PDF file (#bytes=%d): %s", len(pdf_data), input_file.name )
|
|
|
|
# initialize logging
|
|
msg_types = set()
|
|
def log_msg( msg_type, msg ):
|
|
msg = msg.lstrip()
|
|
if msg_type == "status":
|
|
_logger.info( "[STATUS]: %s", msg )
|
|
elif msg_type == "warning":
|
|
_logger.warning( "[WARNING]: %s", msg )
|
|
elif msg_type == "error":
|
|
_logger.error( "[ERROR]: %s", msg )
|
|
else:
|
|
_logger.debug( "[%s] %s", msg_type, msg )
|
|
if msg.startswith( "- " ):
|
|
msg = msg[2:]
|
|
sio.emit( msg_type, msg )
|
|
msg_types.add( msg_type )
|
|
# NOTE: There's no particular significance in relinquishing the CPU here, but this function
|
|
# is called regularly during processing, so it's a convenient place to do it.
|
|
# This function also gets passed into the low-level extract code (as a logging handler),
|
|
# which results in that code also relinquishing at regular intervals.
|
|
_relinq( msg )
|
|
|
|
# NOTE: The plan was to allow the user to change the default parameters in the UI,
|
|
# but this can be done (ahem) later. For now, if they really need to change something,
|
|
# they can prepare the data files from the command-line.
|
|
args = []
|
|
|
|
# extract everything we need from the PDF
|
|
log_msg( "status", "Opening the PDF..." )
|
|
extract = ExtractAll( args, log_msg )
|
|
with PdfDoc( input_file.name ) as pdf:
|
|
extract.extract_all( pdf )
|
|
index_buf = io.StringIO()
|
|
extract.extract_index.save_as_json( index_buf )
|
|
targets_buf, chapters_buf, footnotes_buf = io.StringIO(), io.StringIO(), io.StringIO()
|
|
vo_notes_buf = io.StringIO()
|
|
extract.extract_content.save_as_json( targets_buf, chapters_buf, footnotes_buf, vo_notes_buf )
|
|
file_data = {
|
|
"index": index_buf.getvalue(),
|
|
"targets": targets_buf.getvalue(),
|
|
"chapters": chapters_buf.getvalue(),
|
|
"footnotes": footnotes_buf.getvalue(),
|
|
"vo-notes": vo_notes_buf.getvalue(),
|
|
}
|
|
|
|
# prepare the PDF
|
|
gs_path = get_gs_path()
|
|
if not gs_path:
|
|
raise RuntimeError( "Ghostscript is not available." )
|
|
with TempFile( mode="w", encoding="utf-8" ) as targets_file, \
|
|
TempFile( mode="w", encoding="utf-8" ) as vo_notes_file:
|
|
log_msg( "status", "Preparing the final PDF..." )
|
|
# save the extracted targets
|
|
targets_file.temp_file.write( file_data["targets"] )
|
|
targets_file.close( delete=False )
|
|
vo_notes_file.temp_file.write( file_data["vo-notes"] )
|
|
vo_notes_file.close( delete=False )
|
|
# prepare the PDF
|
|
prepared_file.close( delete=False )
|
|
prepare_pdf( input_file.name,
|
|
"ASL Rulebook",
|
|
targets_file.name, vo_notes_file.name, 5,
|
|
prepared_file.name, "ebook",
|
|
gs_path,
|
|
log_msg,
|
|
relinq = _relinq
|
|
)
|
|
|
|
# fixup the PDF
|
|
with TempFile() as fixedup_file:
|
|
log_msg( "status", "Fixing up the final PDF..." )
|
|
fixedup_file.close( delete=False )
|
|
fixup_mmp_pdf( prepared_file.name,
|
|
fixedup_file.name,
|
|
False, True, True,
|
|
log_msg,
|
|
relinq = _relinq
|
|
)
|
|
# read the final PDF data
|
|
with open( fixedup_file.name, "rb" ) as fp:
|
|
pdf_data = fp.read()
|
|
|
|
# prepare the ZIP for the user to download
|
|
log_msg( "status", "Preparing the download ZIP..." )
|
|
zip_data = io.BytesIO()
|
|
with zipfile.ZipFile( zip_data, "w", zipfile.ZIP_DEFLATED ) as zip_file:
|
|
fname_stem = "ASL Rulebook"
|
|
zip_file.writestr( fname_stem+".pdf", pdf_data )
|
|
for key, fdata in file_data.items():
|
|
fname = "{}.{}".format( fname_stem, key )
|
|
zip_file.writestr( fname, fdata )
|
|
zip_data = zip_data.getvalue()
|
|
|
|
# notify the front-end that we're done
|
|
on_done( zip_data )
|
|
_logger.debug( "Message types seen: %s",
|
|
" ; ".join( sorted( str(mt) for mt in msg_types ) )
|
|
)
|
|
|
|
# NOTE: We don't bother shutting down the socketio server, since the user
|
|
# has to restart the server, using the newly-prepared data files.
|
|
|
|
def _relinq( msg=None, delay=0 ): #pylint: disable=unused-argument
|
|
"""Relinquish the CPU (to keep the webapp server responsive)."""
|
|
globvars.socketio_server.sleep( delay )
|
|
|
|
# ---------------------------------------------------------------------
|
|
|
|
@app.route( "/prepare/download" )
|
|
def download_prepared_data():
|
|
"""Download the prepared data ZIP file."""
|
|
if not _zip_data_download:
|
|
abort( 404 )
|
|
return send_file(
|
|
io.BytesIO( _zip_data_download ),
|
|
as_attachment=True, attachment_filename="asl-rulebook2.zip"
|
|
)
|
|
|
|
# ---------------------------------------------------------------------
|
|
|
|
def _test_progress( npasses=100, status=10, warnings=None, errors=None, delay=0.1 ):
|
|
"""Test progress messages."""
|
|
|
|
# initialize
|
|
warnings = [ int(w) for w in warnings.split(",") ] if warnings else []
|
|
errors = [ int(e) for e in errors.split(",") ] if errors else []
|
|
|
|
# generate progress messages
|
|
sio = globvars.socketio_server
|
|
status_no = 0
|
|
for i in range( int(npasses) ):
|
|
# check if we should start a new status block
|
|
if i % status == 0:
|
|
status_no += 1
|
|
sio.emit( "status", "Status #{}".format( status_no ) )
|
|
# issue the next progress message
|
|
if 1+i in warnings:
|
|
sio.emit( "warning", "Progress {}: warning".format( 1+i ) )
|
|
if 1+i in errors:
|
|
sio.emit( "error", "Progress {}: error".format( 1+i ) )
|
|
else:
|
|
sio.emit( "progress", "Progress {}.".format( 1+i ) )
|
|
_relinq( delay=float(delay) )
|
|
sio.emit( "done" )
|
|
|