""" Analyze the MMP eASLRB PDF and prepare the data files. """ import zipfile import io import base64 import traceback import logging from flask import request, send_file, abort, url_for from asl_rulebook2.extract.all import ExtractAll from asl_rulebook2.bin.prepare_pdf import prepare_pdf from asl_rulebook2.bin.fixup_mmp_pdf import fixup_mmp_pdf from asl_rulebook2.pdf import PdfDoc from asl_rulebook2.utils import TempFile from asl_rulebook2.webapp import app, globvars from asl_rulebook2.webapp.utils import get_gs_path _zip_data_download = None _logger = logging.getLogger( "prepare" ) # --------------------------------------------------------------------- @app.route( "/prepare", methods=["POST"] ) def prepare_data_files(): """Prepare the data files.""" # initialize args = dict( request.json ) download_url = url_for( "download_prepared_data" ) # initialize the socketio server # NOTE: We wait until the client tells us to start processing (instead of when the POST data arrives), # since it might not be ready to receive events, and miss the first few. sio = globvars.socketio_server @sio.on( "start" ) def on_start(): #pylint: disable=unused-variable # NOTE: We used to do this in a background thread (when we were using the Flask development server), # but flask-socketio + eventlet handles concurrency differently, and we now do it synchronously, # and periodically relinquish the CPU, so that we remain responsive (otherwise the client pings timeout, # and it disconnects). try: _do_prepare_data_files( args, download_url ) except Exception as ex: #pylint: disable=broad-except _logger.error( "PREPARE ERROR: %s\n%s", ex, traceback.format_exc() ) globvars.socketio_server.emit( "error", str(ex) ) return "ok" # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - def _do_prepare_data_files( args, download_url ): # initialize sio = globvars.socketio_server pdf_data = args.get( "pdfData" ) if not pdf_data: # no data was sent - this is a test of logging progress messages. del args["pdfData"] _test_progress( **args ) return pdf_data = base64.b64decode( pdf_data ) def on_done( zip_data ): global _zip_data_download _zip_data_download = zip_data sio.emit( "done", download_url ) # check if we should just return a pre-prepared ZIP file (for testing porpoises) fname = app.config.get( "PREPARED_ZIP" ) if fname: with open( fname, "rb" ) as fp: on_done( fp.read() ) return with TempFile() as input_file, TempFile() as prepared_file: # save the PDF file data input_file.write( pdf_data ) input_file.close( delete=False ) _logger.info( "Saved PDF file (#bytes=%d): %s", len(pdf_data), input_file.name ) # initialize logging msg_types = set() def log_msg( msg_type, msg ): msg = msg.lstrip() if msg_type == "status": _logger.info( "[STATUS]: %s", msg ) elif msg_type == "warning": _logger.warning( "[WARNING]: %s", msg ) elif msg_type == "error": _logger.error( "[ERROR]: %s", msg ) else: _logger.debug( "[%s] %s", msg_type, msg ) if msg.startswith( "- " ): msg = msg[2:] sio.emit( msg_type, msg ) msg_types.add( msg_type ) # NOTE: There's no particular significance in relinquishing the CPU here, but this function # is called regularly during processing, so it's a convenient place to do it. # This function also gets passed into the low-level extract code (as a logging handler), # which results in that code also relinquishing at regular intervals. _relinq( msg ) # NOTE: The plan was to allow the user to change the default parameters in the UI, # but this can be done (ahem) later. For now, if they really need to change something, # they can prepare the data files from the command-line. args = [] # extract everything we need from the PDF log_msg( "status", "Opening the PDF..." ) extract = ExtractAll( args, log_msg ) with PdfDoc( input_file.name ) as pdf: extract.extract_all( pdf ) index_buf = io.StringIO() extract.extract_index.save_as_json( index_buf ) targets_buf, chapters_buf, footnotes_buf = io.StringIO(), io.StringIO(), io.StringIO() vo_notes_buf = io.StringIO() extract.extract_content.save_as_json( targets_buf, chapters_buf, footnotes_buf, vo_notes_buf ) file_data = { "index": index_buf.getvalue(), "targets": targets_buf.getvalue(), "chapters": chapters_buf.getvalue(), "footnotes": footnotes_buf.getvalue(), "vo-notes": vo_notes_buf.getvalue(), } # prepare the PDF gs_path = get_gs_path() if not gs_path: raise RuntimeError( "Ghostscript is not available." ) with TempFile( mode="w", encoding="utf-8" ) as targets_file, \ TempFile( mode="w", encoding="utf-8" ) as vo_notes_file: log_msg( "status", "Preparing the final PDF..." ) # save the extracted targets targets_file.temp_file.write( file_data["targets"] ) targets_file.close( delete=False ) vo_notes_file.temp_file.write( file_data["vo-notes"] ) vo_notes_file.close( delete=False ) # prepare the PDF prepared_file.close( delete=False ) prepare_pdf( input_file.name, "ASL Rulebook", targets_file.name, vo_notes_file.name, 5, prepared_file.name, "ebook", gs_path, log_msg, relinq = _relinq ) # fixup the PDF with TempFile() as fixedup_file: log_msg( "status", "Fixing up the final PDF..." ) fixedup_file.close( delete=False ) fixup_mmp_pdf( prepared_file.name, fixedup_file.name, False, True, True, log_msg, relinq = _relinq ) # read the final PDF data with open( fixedup_file.name, "rb" ) as fp: pdf_data = fp.read() # prepare the ZIP for the user to download log_msg( "status", "Preparing the download ZIP..." ) zip_data = io.BytesIO() with zipfile.ZipFile( zip_data, "w", zipfile.ZIP_DEFLATED ) as zip_file: fname_stem = "ASL Rulebook" zip_file.writestr( fname_stem+".pdf", pdf_data ) for key, fdata in file_data.items(): fname = "{}.{}".format( fname_stem, key ) zip_file.writestr( fname, fdata ) zip_data = zip_data.getvalue() # notify the front-end that we're done on_done( zip_data ) _logger.debug( "Message types seen: %s", " ; ".join( sorted( str(mt) for mt in msg_types ) ) ) # NOTE: We don't bother shutting down the socketio server, since the user # has to restart the server, using the newly-prepared data files. def _relinq( msg=None, delay=0 ): #pylint: disable=unused-argument """Relinquish the CPU (to keep the webapp server responsive).""" globvars.socketio_server.sleep( delay ) # --------------------------------------------------------------------- @app.route( "/prepare/download" ) def download_prepared_data(): """Download the prepared data ZIP file.""" if not _zip_data_download: abort( 404 ) return send_file( io.BytesIO( _zip_data_download ), as_attachment=True, attachment_filename="asl-rulebook2.zip" ) # --------------------------------------------------------------------- def _test_progress( npasses=100, status=10, warnings=None, errors=None, delay=0.1 ): """Test progress messages.""" # initialize warnings = [ int(w) for w in warnings.split(",") ] if warnings else [] errors = [ int(e) for e in errors.split(",") ] if errors else [] # generate progress messages sio = globvars.socketio_server status_no = 0 for i in range( int(npasses) ): # check if we should start a new status block if i % status == 0: status_no += 1 sio.emit( "status", "Status #{}".format( status_no ) ) # issue the next progress message if 1+i in warnings: sio.emit( "warning", "Progress {}: warning".format( 1+i ) ) if 1+i in errors: sio.emit( "error", "Progress {}: error".format( 1+i ) ) else: sio.emit( "progress", "Progress {}.".format( 1+i ) ) _relinq( delay=float(delay) ) sio.emit( "done" )