Added a web interface for preparing data files.

master
Pacman Ghost 3 years ago
parent fdd027cb2b
commit fee9f49f1c
  1. 0
      asl_rulebook2/bin/dump_pdf.py
  2. 0
      asl_rulebook2/bin/extract_pages.py
  3. 69
      asl_rulebook2/bin/fixup_mmp_pdf.py
  4. 70
      asl_rulebook2/bin/prepare_pdf.py
  5. 19
      asl_rulebook2/extract/all.py
  6. 12
      asl_rulebook2/extract/base.py
  7. 6
      asl_rulebook2/extract/content.py
  8. 6
      asl_rulebook2/extract/index.py
  9. 18
      asl_rulebook2/tests/test_extract.py
  10. 19
      asl_rulebook2/tests/utils.py
  11. 11
      asl_rulebook2/utils.py
  12. 1
      asl_rulebook2/webapp/__init__.py
  13. 2
      asl_rulebook2/webapp/globvars.py
  14. 20
      asl_rulebook2/webapp/main.py
  15. 214
      asl_rulebook2/webapp/prepare.py
  16. 24
      asl_rulebook2/webapp/run_server.py
  17. 1
      asl_rulebook2/webapp/static/css/global.css
  18. 37
      asl_rulebook2/webapp/static/css/prepare.css
  19. BIN
      asl_rulebook2/webapp/static/images/download.png
  20. BIN
      asl_rulebook2/webapp/static/images/eASLRB.png
  21. BIN
      asl_rulebook2/webapp/static/images/error.png
  22. BIN
      asl_rulebook2/webapp/static/images/warning.png
  23. 364
      asl_rulebook2/webapp/static/prepare.js
  24. 6046
      asl_rulebook2/webapp/static/socketio/socket.io.js
  25. 7
      asl_rulebook2/webapp/static/socketio/socket.io.min.js
  26. 39
      asl_rulebook2/webapp/templates/prepare.html
  27. 126
      asl_rulebook2/webapp/tests/test_prepare.py
  28. 20
      asl_rulebook2/webapp/tests/test_startup.py
  29. 19
      asl_rulebook2/webapp/tests/utils.py
  30. 5
      asl_rulebook2/webapp/utils.py
  31. 12
      conftest.py
  32. 1
      requirements.txt
  33. 6
      setup.py

@ -2,29 +2,26 @@
""" Fixup issues in the MMP eASLRB. """
import os
import math
from pikepdf import Pdf, Page, OutlineItem, Encryption, make_page_destination
import click
from asl_rulebook2.utils import log_msg_stderr
# ---------------------------------------------------------------------
def fixup_easlrb( fname, output_fname, optimize_web, rotate, log=None ):
"""Fixup the eASLRB."""
def fixup_mmp_pdf( fname, output_fname, optimize_web, rotate, log=None ):
"""Fixup the MMP eASLRB PDF."""
def log_msg( msg_type, msg, *args, **kwargs ):
if not log:
return
if isinstance( msg, list ):
msg = "\n".join( msg )
data = kwargs.pop( "data", None )
msg = msg.format( *args, **kwargs )
log( msg_type, msg, data=data )
log( msg_type, msg )
def percentage( curr, total ):
return math.floor( 100 * float(curr) / float(total) )
# NOTE: It would be nice to use the targetes file to get the TOC entries and annotations
# NOTE: It would be nice to use the targets file to get the TOC entries and annotations
# to point to the exact point on the page, but figuring out the text associated with each
# annotiation is extremely messy (annotations are simply a rectangle on a page, so we need
# to figure out which elements lie within that rectangle, and since things are not always
@ -32,24 +29,23 @@ def fixup_easlrb( fname, output_fname, optimize_web, rotate, log=None ):
with Pdf.open( fname ) as pdf:
log_msg( "start", "Loaded PDF: {}".format( fname ), data=[
( "PDF version", pdf.pdf_version ),
( "# pages", len(pdf.pages) ),
] )
log_msg( "start", "Loaded PDF: {}\n- PDF version = {}\n- #pages = {}".format(
fname, pdf.pdf_version, len(pdf.pages) )
)
log_msg( None, "" )
# fixup bookmarks in the TOC
log_msg( "toc", "Fixing up the TOC..." )
log_msg( "progress", "Fixing up the TOC..." )
def walk_toc( items, depth ):
for item_no,item in enumerate(items):
if item.destination[0].Type != "/Page" or item.destination[1] != "/Fit" \
or item.page_location is not None or item.page_location_kwargs != {}:
log_msg( "toc:warning", "Unexpected TOC item: {}/{}".format( depth, item_no ) )
log_msg( "warning", "Unexpected TOC item: {}/{}".format( depth, item_no ) )
continue
page = Page( item.destination[0] )
page_height = page.mediabox[3]
bullet = "#" if depth <= 1 else "-"
log_msg( "toc:detail", " {}{} {} => p{}",
log_msg( "verbose", " {}{} {} => p{}",
depth*" ", bullet, item.title, 1+page.index
)
walk_toc( item.children, depth+1 )
@ -60,16 +56,13 @@ def fixup_easlrb( fname, output_fname, optimize_web, rotate, log=None ):
with pdf.open_outline() as outline:
walk_toc( outline.root, 0 )
# NOTE: The TOC will be updated when we exit the context manager, and can take some time.
log_msg( "toc", "Installing the new TOC..." )
log_msg( "progress", "Installing the new TOC..." )
log_msg( None, "" )
# fixup up each page
log_msg( "annoations", "Fixing up the content..." )
log_msg( "progress", "Fixing up the content..." )
for page_no, raw_page in enumerate(pdf.pages):
log_msg( "annotations:progress", "- page {}",
1+page_no,
data = { "percentage": percentage( page_no, len(pdf.pages) ) }
)
log_msg( "verbose", "- page {}", 1+page_no )
if rotate:
# force pages to be landscape (so that we don't get an h-scrollbar in Firefox
# when we set the zoom to "fit width").
@ -83,21 +76,20 @@ def fixup_easlrb( fname, output_fname, optimize_web, rotate, log=None ):
dest = annot.get( "/Dest" )
if dest:
page_no = Page( dest[0] ).index
log_msg( "annotations:detail", " - {} => p{}",
log_msg( "verbose", " - {} => p{}",
repr(annot.Rect), 1+page_no
)
annot.Dest = make_page_destination( pdf, page_no, "XYZ", top=page_height )
log_msg( None, "" )
# save the updated PDF
log_msg( "save", "Saving updated PDF: {}", output_fname )
log_msg( "progress", "Saving the fixed-up PDF..." )
# NOTE: Setting a blank password will encrypt the file, but doesn't require the user to enter a password
# when opening the file (but it will be marked as "SECURE" in the UI).
enc = Encryption( owner="", user="" )
def save_progress( pct ):
log_msg( "save:progress", "- Saved {}%...", pct,
data = { "percentage": pct }
)
if pct > 0 and pct % 10 == 0:
log_msg( "verbose", "- Saved {}%.", pct )
pdf.save( output_fname, encryption=enc, linearize=optimize_web,
progress = save_progress
)
@ -107,9 +99,9 @@ def fixup_easlrb( fname, output_fname, optimize_web, rotate, log=None ):
new_size = os.path.getsize( output_fname )
ratio = round( 100 * float(new_size) / float(old_size) ) - 100
if ratio == 0:
log_msg( "save", "The updated PDF file is about the same size as the original file." )
log_msg( "verbose", "The updated PDF file is about the same size as the original file." )
else:
log_msg( "save", "The updated PDF file is about {}% {} than the original file.",
log_msg( "verbose", "The updated PDF file is about {}% {} than the original file.",
abs(ratio), "larger" if ratio > 0 else "smaller"
)
@ -120,23 +112,18 @@ def fixup_easlrb( fname, output_fname, optimize_web, rotate, log=None ):
@click.option( "--output","-o", required=True, type=click.Path(dir_okay=False), help="Where to save the fixed-up PDF." )
@click.option( "--optimize-web", is_flag=True, default=False, help="Optimize for use in a browser (larger file)." )
@click.option( "--rotate", is_flag=True, default=False, help="Rotate landscape pages." )
@click.option( "--verbose","-v", is_flag=True, default=False, help="Verbose output." )
@click.option( "--progress","-p", is_flag=True, default=False, help="Log progress." )
def main( pdf_file, output, optimize_web, rotate, verbose, progress ):
@click.option( "--verbose","-v", is_flag=True, default=False, help="Verbose output." )
def main( pdf_file, output, optimize_web, rotate, progress, verbose ):
"""Fixup the eASLRB."""
def log_msg( msg_type, msg, data=None ):
if not msg_type:
msg_type = ""
if msg_type.endswith( ":detail" ) and not verbose:
def log_msg( msg_type, msg ):
if msg_type in ("progress", "start", None) and not progress:
return
if msg_type.endswith( ":progress" ) and not progress:
if msg_type == "verbose" and not verbose:
return
print( msg )
if msg_type == "start":
for k, v in data:
print( "- {:<12} {}".format( k+":", v ) )
fixup_easlrb( pdf_file, output, optimize_web, rotate, log=log_msg )
log_msg_stderr( msg_type, msg )
fixup_mmp_pdf( pdf_file, output, optimize_web, rotate, log=log_msg )
if __name__ == "__main__":
main() #pylint: disable=no-value-for-parameter

@ -1,5 +1,5 @@
#!/usr/bin/env python3
""" Add named destinations to a PDF file. """
""" Prepare the MMP eASLRB PDF. """
import subprocess
import json
@ -8,7 +8,7 @@ import datetime
import click
from asl_rulebook2.utils import TempFile
from asl_rulebook2.utils import TempFile, log_msg_stderr
# NOTE: "screen" gives significant savings (~65%) but scanned PDF's become very blurry. The main MMP eASLRB
# is not too bad, but some images are also a bit unclear. "ebook" gives no savings for scanned PDF's, but
@ -23,22 +23,8 @@ _COMPRESSION_CHOICES = [
# ---------------------------------------------------------------------
@click.command()
@click.argument( "pdf_file", nargs=1, type=click.Path(exists=True,dir_okay=False) )
@click.option( "--title", help="Document title." )
@click.option( "--targets","-t","targets_fname", required=True, type=click.Path(dir_okay=False),
help="Target definition file."
)
@click.option( "--yoffset", default=5, help="Offset to add to y co-ordinates." )
@click.option( "--output","-o","output_fname", required=True, type=click.Path(dir_okay=False),
help="Output PDF file."
)
@click.option( "--compression", type=click.Choice(_COMPRESSION_CHOICES), default="ebook",
help="Level of compression."
)
@click.option( "--gs","gs_path", default="gs", help="Path to the Ghostscript executable." )
def main( pdf_file, title, targets_fname, yoffset, output_fname, compression, gs_path ):
"""Add named destinations to a PDF file."""
def prepare_pdf( pdf_file, title, targets_fname, yoffset, output_fname, compression, gs_path, log_msg ):
"""Prepare the MMP eASLRB PDF."""
# load the targets
with open( targets_fname, "r" ) as fp:
@ -48,7 +34,7 @@ def main( pdf_file, title, targets_fname, yoffset, output_fname, compression, gs
# compress the PDF
if compression and compression != "none":
print( "Compressing the PDF ({})...".format( compression ) )
log_msg( "progress", "Compressing the PDF ({})...".format( compression ) )
compressed_file.close( delete=False )
args = [ gs_path, "-sDEVICE=pdfwrite", "-dNOPAUSE", "-dQUIET", "-dBATCH",
"-dPDFSETTINGS=/{}".format( compression ),
@ -58,11 +44,13 @@ def main( pdf_file, title, targets_fname, yoffset, output_fname, compression, gs
start_time = time.time()
subprocess.run( args, check=True )
elapsed_time = time.time() - start_time
print( "- Elapsed time: {}".format( datetime.timedelta(seconds=int(elapsed_time)) ) )
log_msg( "timestamp", "- Elapsed time: {}".format(
datetime.timedelta( seconds=int(elapsed_time) ) )
)
pdf_file = compressed_file.name
# generate the pdfmarks
print( "Generating the pdfmarks..." )
log_msg( "progress", "Generating the pdfmarks..." )
if title:
print( "[ /Title ({})".format( title ), file=pdfmarks_file )
else:
@ -84,8 +72,7 @@ def main( pdf_file, title, targets_fname, yoffset, output_fname, compression, gs
pdfmarks_file.close( delete=False )
# generate the pdfmark'ed document
print( "Generating the pdfmark'ed document..." )
print( "- {} => {}".format( pdf_file, output_fname ) )
log_msg( "progress", "Adding targets to the PDF..." )
args = [ gs_path, "-q", "-dBATCH", "-dNOPAUSE", "-sDEVICE=pdfwrite" ]
args.extend( [ "-o", output_fname ] )
args.extend( [ "-f", pdf_file ] )
@ -93,9 +80,44 @@ def main( pdf_file, title, targets_fname, yoffset, output_fname, compression, gs
start_time = time.time()
subprocess.run( args, check=True )
elapsed_time = time.time() - start_time
print( "- Elapsed time: {}".format( datetime.timedelta(seconds=int(elapsed_time)) ) )
log_msg( "timestamp", "- Elapsed time: {}".format(
datetime.timedelta( seconds=int(elapsed_time) ) )
)
# ---------------------------------------------------------------------
@click.command()
@click.argument( "pdf_file", nargs=1, type=click.Path(exists=True,dir_okay=False) )
@click.option( "--title", help="Document title." )
@click.option( "--targets","-t","targets_fname", required=True, type=click.Path(dir_okay=False),
help="Target definition file."
)
@click.option( "--yoffset", default=5, help="Offset to add to y co-ordinates." )
@click.option( "--output","-o","output_fname", required=True, type=click.Path(dir_okay=False),
help="Output PDF file."
)
@click.option( "--compression", type=click.Choice(_COMPRESSION_CHOICES), default="ebook",
help="Level of compression."
)
@click.option( "--gs","gs_path", default="gs", help="Path to the Ghostscript executable." )
@click.option( "--progress","-p", is_flag=True, default=False, help="Log progress." )
def main( pdf_file, title, targets_fname, yoffset, output_fname, compression, gs_path, progress ):
"""Prepare the MMP eASLRB PDF."""
# initialize
def log_msg( msg_type, msg ):
if msg_type in ("progress", "start", "timestamp", None) and not progress:
return
log_msg_stderr( msg_type, msg )
# prepare the PDF
prepare_pdf(
pdf_file, title,
targets_fname, yoffset,
output_fname, compression,
gs_path,
log_msg
)
if __name__ == "__main__":
main() #pylint: disable=no-value-for-parameter

@ -8,10 +8,11 @@ import importlib
import click
from asl_rulebook2.pdf import PdfDoc
from asl_rulebook2.extract.base import ExtractBase, log_msg_stderr
from asl_rulebook2.extract.base import ExtractBase
from asl_rulebook2.extract.index import ExtractIndex
from asl_rulebook2.extract.content import ExtractContent
from asl_rulebook2.pdf import PdfDoc
from asl_rulebook2.utils import log_msg_stderr
# ---------------------------------------------------------------------
@ -34,13 +35,13 @@ class ExtractAll( ExtractBase ):
default_args.update( getattr( mod, "_DEFAULT_ARGS" ) )
# extract the index
self.log_msg( "progress", "\nExtracting the index..." )
self.log_msg( "status", "\nExtracting the index..." )
args = ExtractBase.parse_args( self._args, default_args )
self.extract_index = ExtractIndex( args, self._log )
self.extract_index.extract_index( pdf )
# extract the content
self.log_msg( "progress", "\nExtracting the content..." )
self.log_msg( "status", "\nExtracting the content..." )
args = ExtractBase.parse_args( self._args, default_args )
self.extract_content = ExtractContent( args, self._log )
self.extract_content.extract_content( pdf )
@ -125,13 +126,16 @@ class ExtractAll( ExtractBase ):
)
@click.option( "--save-index","save_index_fname", required=True, help="Where to save the extracted index." )
@click.option( "--save-targets","save_targets_fname", required=True, help="Where to save the extracted targets." )
@click.option( "--save-chapters","save_chapters_fname", required=True, help="Where to save the extracted chaopters." )
@click.option( "--save-footnotes","save_footnotes_fname", required=True, help="Where to save the extracted footnotes." )
def main( pdf_file, args, progress, output_fmt, save_index_fname, save_targets_fname, save_footnotes_fname ):
def main( pdf_file, args, progress, output_fmt,
save_index_fname, save_targets_fname, save_chapters_fname, save_footnotes_fname
):
"""Extract everything we need from the MMP eASLRB."""
# extract everything
def log_msg( msg_type, msg ):
if msg_type == "progress" and not progress:
if msg_type in ("status", "progress") and not progress:
return
log_msg_stderr( msg_type, msg )
extract = ExtractAll( args, log_msg )
@ -142,9 +146,10 @@ def main( pdf_file, args, progress, output_fmt, save_index_fname, save_targets_f
# save the results
with open( save_index_fname, "w", encoding="utf-8" ) as index_out, \
open( save_targets_fname, "w", encoding="utf-8" ) as targets_out, \
open( save_chapters_fname, "w", encoding="utf-8" ) as chapters_out, \
open( save_footnotes_fname, "w", encoding="utf-8" ) as footnotes_out:
getattr( extract.extract_index, "save_as_"+output_fmt )( index_out )
getattr( extract.extract_content, "save_as_"+output_fmt )( targets_out, footnotes_out )
getattr( extract.extract_content, "save_as_"+output_fmt )( targets_out, chapters_out, footnotes_out )
if __name__ == "__main__":
main() #pylint: disable=no-value-for-parameter

@ -1,9 +1,5 @@
""" Base class for the extraction classes. """
import sys
import click
# ---------------------------------------------------------------------
class ExtractBase:
@ -50,11 +46,3 @@ class ExtractBase:
return
msg = msg.format( *args, **kwargs )
self._log( msg_type, msg )
# ---------------------------------------------------------------------
def log_msg_stderr( msg_type, msg ):
"""Log a message to stderr."""
if msg_type == "warning":
msg = click.style( "WARNING: {}".format( msg ), fg="yellow" )
click.echo( msg, file=sys.stderr )

@ -9,9 +9,9 @@ import math
import click
from pdfminer.layout import LTChar
from asl_rulebook2.extract.base import ExtractBase, log_msg_stderr
from asl_rulebook2.extract.base import ExtractBase
from asl_rulebook2.pdf import PdfDoc, PageIterator, PageElemIterator
from asl_rulebook2.utils import parse_page_numbers, fixup_text, append_text, remove_trailing, jsonval
from asl_rulebook2.utils import parse_page_numbers, fixup_text, append_text, remove_trailing, jsonval, log_msg_stderr
# NOTE: Characters are laid out individually on the page, and we generally want to process them top-to-bottom,
# left-to-right, but in some cases, alignment is messed up (e.g. the bounding boxes don't line up properly
@ -104,7 +104,7 @@ class ExtractContent( ExtractBase ):
self._curr_pageid = "{}{}".format( # nb: this is the ASL page# (e.g. "A42"), not the PDF page#
self._curr_chapter, curr_chapter_pageno
)
self.log_msg( "progress", "- Processing page {} ({})...", page_no, self._curr_pageid )
self.log_msg( "progress", "- Analyzing page {} ({}).", page_no, self._curr_pageid )
# process each element on the page
curr_caption = None

@ -8,9 +8,9 @@ import re
import click
from pdfminer.layout import LTChar
from asl_rulebook2.extract.base import ExtractBase, log_msg_stderr
from asl_rulebook2.extract.base import ExtractBase
from asl_rulebook2.pdf import PdfDoc, PageIterator, PageElemIterator
from asl_rulebook2.utils import parse_page_numbers, fixup_text, extract_parens_content, jsonval
from asl_rulebook2.utils import parse_page_numbers, fixup_text, extract_parens_content, jsonval, log_msg_stderr
# ---------------------------------------------------------------------
@ -49,7 +49,7 @@ class ExtractIndex( ExtractBase ):
if page_no not in page_nos:
self.log_msg( "progress", "- Skipping page {}.", page_no )
continue
self.log_msg( "progress", "- Processing page {}...", page_no )
self.log_msg( "progress", "- Analyzing page {}.", page_no )
# process each element on the page
self._prev_y0 = 99999

@ -10,6 +10,7 @@ from asl_rulebook2.extract.index import ExtractIndex
from asl_rulebook2.extract.content import ExtractContent
from asl_rulebook2.extract.all import ExtractAll
from asl_rulebook2.tests import pytest_options
from asl_rulebook2.tests.utils import for_each_easlrb_version
# ---------------------------------------------------------------------
@ -34,7 +35,7 @@ def test_extract_index():
assert open( fname, "r", encoding="utf-8" ).read() == buf
# run the test
_for_each_version( do_test )
for_each_easlrb_version( do_test )
# ---------------------------------------------------------------------
@ -65,7 +66,7 @@ def test_extract_content():
assert open( fname2, "r", encoding="utf-8" ).read() == footnotes_buf
# run the test
_for_each_version( do_test )
for_each_easlrb_version( do_test )
# ---------------------------------------------------------------------
@ -101,21 +102,10 @@ def test_extract_all():
assert open( fname2, "r", encoding="utf-8" ).read() == footnotes_buf
# run the test
_for_each_version( do_test )
for_each_easlrb_version( do_test )
# ---------------------------------------------------------------------
def _for_each_version( func ):
"""Run tests for each version of the eASLRB."""
base_dir = pytest_options.easlrb_path
ncalls = 0
for name in os.listdir( base_dir ):
dname = os.path.join( base_dir, name )
if os.path.isfile( os.path.join( dname, "eASLRB.pdf" ) ):
func( dname )
ncalls += 1
assert ncalls > 0
def _check_log_msg( msg_type, msg ):
"""Check a log message."""
assert msg_type not in ( "warning", "error" ), \

@ -0,0 +1,19 @@
""" Helper utilities. """
import os
from asl_rulebook2.tests import pytest_options
# ---------------------------------------------------------------------
def for_each_easlrb_version( func ):
"""Run tests for each version of the eASLRB."""
assert pytest_options.easlrb_path
base_dir = pytest_options.easlrb_path
ncalls = 0
for name in os.listdir( base_dir ):
dname = os.path.join( base_dir, name )
if os.path.isfile( os.path.join( dname, "eASLRB.pdf" ) ):
func( dname )
ncalls += 1
assert ncalls > 0

@ -1,5 +1,6 @@
""" Miscellaneous utilities. """
import sys
import os
import pathlib
import tempfile
@ -8,6 +9,8 @@ import math
from io import StringIO
from html.parser import HTMLParser
import click
# ---------------------------------------------------------------------
class TempFile:
@ -160,6 +163,14 @@ def jsonval( val ):
assert False, "Unknown JSON data type: {}".format( type(val) )
return '"???"'
def log_msg_stderr( msg_type, msg ):
"""Log a message to stderr."""
if msg_type == "warning":
msg = click.style( "WARNING: {}".format( msg ), fg="yellow" )
elif msg_type == "error":
msg = click.style( "ERROR: {}".format( msg ), fg="red" )
click.echo( msg, file=sys.stderr )
def change_extn( fname, extn ):
"""Change a filename's extension."""
return pathlib.Path( fname ).with_suffix( extn )

@ -76,6 +76,7 @@ import asl_rulebook2.webapp.startup #pylint: disable=wrong-import-position,cycli
import asl_rulebook2.webapp.content #pylint: disable=wrong-import-position,cyclic-import
import asl_rulebook2.webapp.search #pylint: disable=wrong-import-position,cyclic-import
import asl_rulebook2.webapp.rule_info #pylint: disable=wrong-import-position,cyclic-import
import asl_rulebook2.webapp.prepare #pylint: disable=wrong-import-position,cyclic-import
from asl_rulebook2.webapp import globvars #pylint: disable=wrong-import-position,cyclic-import
app.before_request( globvars.on_request )

@ -9,6 +9,8 @@ from asl_rulebook2.webapp.config.constants import APP_NAME, APP_VERSION
cleanup_handlers = []
socketio_server = None
# ---------------------------------------------------------------------
_init_lock = threading.Lock()

@ -8,17 +8,27 @@ import logging
from flask import render_template, jsonify, abort
from asl_rulebook2.webapp import app, globvars, shutdown_event
from asl_rulebook2.webapp.utils import parse_int
from asl_rulebook2.webapp.utils import parse_int, get_gs_path
# ---------------------------------------------------------------------
@app.route( "/" )
def main():
"""Return the main page."""
from asl_rulebook2.webapp.asop import user_css_url
return render_template( "index.html",
ASOP_CSS_URL = user_css_url
)
if app.config.get( "DATA_DIR" ):
# return the main page
from asl_rulebook2.webapp.asop import user_css_url
return render_template( "index.html",
ASOP_CSS_URL = user_css_url
)
else:
# NOTE: If a data directory has not been configured, this is probably the first time the user
# has run the application, so we show the page that explains how to set things up.
# NOTE: Check for Ghostscript before we start.
args = {}
if get_gs_path():
args["HAVE_GHOSTSCRIPT"] = 1
return render_template( "prepare.html", **args )
# ---------------------------------------------------------------------

@ -0,0 +1,214 @@
""" Analyze the MMP eASLRB PDF and prepare the data files. """
import threading
import zipfile
import io
import time
import base64
import traceback
import logging
from flask import request, send_file, abort, url_for
from asl_rulebook2.extract.all import ExtractAll
from asl_rulebook2.bin.prepare_pdf import prepare_pdf
from asl_rulebook2.bin.fixup_mmp_pdf import fixup_mmp_pdf
from asl_rulebook2.pdf import PdfDoc
from asl_rulebook2.utils import TempFile
from asl_rulebook2.webapp import app, globvars
from asl_rulebook2.webapp.utils import get_gs_path
_zip_data_download = None
_logger = logging.getLogger( "prepare" )
# ---------------------------------------------------------------------
@app.route( "/prepare", methods=["POST"] )
def prepare_data_files():
"""Prepare the data files."""
# initialize
args = dict( request.json )
download_url = url_for( "download_prepared_data" )
# initialize the socketio server
sio = globvars.socketio_server
if not sio:
raise RuntimeError( "The socketio server has not been started." )
@sio.on( "start" )
def on_start( data ): #pylint: disable=unused-variable,unused-argument
# start the worker thread that prepares the data files
# NOTE: We don't do this when the POST request comes in, but wait until the client
# tells us it's ready (otherwise, it might miss the first event or two).
def worker():
try:
_do_prepare_data_files( args, download_url )
except Exception as ex: #pylint: disable=broad-except
_logger.error( "PREPARE ERROR: %s\n%s", ex, traceback.format_exc() )
globvars.socketio_server.emit( "error", str(ex) )
threading.Thread( target=worker, daemon=True ).start()
return "ok"
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
def _do_prepare_data_files( args, download_url ):
# initialize
sio = globvars.socketio_server
pdf_data = args.get( "pdfData" )
if not pdf_data:
# no data was sent - this is a test of logging progress messages.
del args["pdfData"]
_test_progress( **args )
return
pdf_data = base64.b64decode( pdf_data )
def on_done( zip_data ):
global _zip_data_download
_zip_data_download = zip_data
sio.emit( "done", download_url )
# check if we should just return a pre-prepared ZIP file (for testing porpoises)
fname = app.config.get( "PREPARED_ZIP" )
if fname:
with open( fname, "rb" ) as fp:
on_done( fp.read() )
return
with TempFile() as input_file, TempFile() as prepared_file:
# save the PDF file data
input_file.write( pdf_data )
input_file.close( delete=False )
_logger.info( "Saved PDF file (#bytes=%d): %s", len(pdf_data), input_file.name )
# initialize logging
msg_types = set()
def log_msg( msg_type, msg ):
msg = msg.lstrip()
if msg_type == "status":
_logger.info( "[STATUS]: %s", msg )
elif msg_type == "warning":
_logger.warning( "[WARNING]: %s", msg )
elif msg_type == "error":
_logger.error( "[ERROR]: %s", msg )
else:
_logger.debug( "[%s] %s", msg_type, msg )
if msg.startswith( "- " ):
msg = msg[2:]
sio.emit( msg_type, msg )
msg_types.add( msg_type )
# NOTE: The plan was to allow the user to change the default parameters in the UI,
# but this can be done (ahem) later. For now, if they really need to change something,
# they can prepare the data files from the command-line.
args = []
# extract everything we need from the PDF
log_msg( "status", "Opening the PDF..." )
extract = ExtractAll( args, log_msg )
with PdfDoc( input_file.name ) as pdf:
extract.extract_all( pdf )
index_buf = io.StringIO()
extract.extract_index.save_as_json( index_buf )
targets_buf, chapters_buf, footnotes_buf = io.StringIO(), io.StringIO(), io.StringIO()
extract.extract_content.save_as_json( targets_buf, chapters_buf, footnotes_buf )
file_data = {
"index": index_buf.getvalue(),
"targets": targets_buf.getvalue(),
"chapters": chapters_buf.getvalue(),
"footnotes": footnotes_buf.getvalue(),
}
# prepare the PDF
gs_path = get_gs_path()
if not gs_path:
raise RuntimeError( "Ghostscript is not available." )
with TempFile( mode="w", encoding="utf-8" ) as targets_file:
log_msg( "status", "Preparing the final PDF..." )
# save the extracted targets
targets_file.temp_file.write( file_data["targets"] )
targets_file.close( delete=False )
# prepare the PDF
prepared_file.close( delete=False )
prepare_pdf( input_file.name,
"ASL Rulebook",
targets_file.name, 5,
prepared_file.name, "ebook",
gs_path,
log_msg
)
# fixup the PDF
with TempFile() as fixedup_file:
log_msg( "status", "Fixing up the final PDF..." )
fixedup_file.close( delete=False )
fixup_mmp_pdf( prepared_file.name,
fixedup_file.name,
True, True,
log_msg
)
# read the final PDF data
with open( fixedup_file.name, "rb" ) as fp:
pdf_data = fp.read()
# prepare the ZIP for the user to download
log_msg( "status", "Preparing the download ZIP..." )
zip_data = io.BytesIO()
with zipfile.ZipFile( zip_data, "w", zipfile.ZIP_DEFLATED ) as zip_file:
fname_stem = "ASL Rulebook"
zip_file.writestr( fname_stem+".pdf", pdf_data )
for key in file_data:
fname = "{}.{}".format( fname_stem, key )
zip_file.writestr( fname, file_data[key] )
zip_data = zip_data.getvalue()
# notify the front-end that we're done
on_done( zip_data )
_logger.debug( "Message types seen: %s",
" ; ".join( sorted( str(mt) for mt in msg_types ) )
)
# NOTE: We don't bother shutting down the socketio server, since the user
# has to restart the server, using the newly-prepared data files.
# ---------------------------------------------------------------------
@app.route( "/prepare/download" )
def download_prepared_data():
"""Download the prepared data ZIP file."""
if not _zip_data_download:
abort( 404 )
return send_file(
io.BytesIO( _zip_data_download ),
as_attachment=True, attachment_filename="asl-rulebook2.zip"
)
# ---------------------------------------------------------------------
def _test_progress( npasses=100, status=10, warnings=None, errors=None, delay=0.1 ):
"""Test progress messages."""
# initialize
warnings = [ int(w) for w in warnings.split(",") ] if warnings else []
errors = [ int(e) for e in errors.split(",") ] if errors else []
# generate progress messages
sio = globvars.socketio_server
status_no = 0
for i in range( int(npasses) ):
# check if we should start a new status block
if i % status == 0:
status_no += 1
sio.emit( "status", "Status #{}".format( status_no ) )
# issue the next progress message
if 1+i in warnings:
sio.emit( "warning", "Progress {}: warning".format( 1+i ) )
if 1+i in errors:
sio.emit( "error", "Progress {}: error".format( 1+i ) )
else:
sio.emit( "progress", "Progress {}.".format( 1+i ) )
time.sleep( float( delay ) )
sio.emit( "done" )

@ -9,7 +9,7 @@ import glob
import click
from asl_rulebook2.webapp import app
from asl_rulebook2.webapp import app, globvars
# ---------------------------------------------------------------------
@ -79,11 +79,33 @@ def main( bind_addr, data_dir, force_init_delay, flask_debug ):
_ = urllib.request.urlopen( url )
threading.Thread( target=_start_server, daemon=True ).start()
# check if the user needs to prepare their data files
if not app.config.get( "DATA_DIR" ):
# yup - initialize the socketio server
init_prepare_socketio( app )
# run the server
app.run( host=host, port=port, debug=flask_debug,
extra_files = extra_files
)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
def init_prepare_socketio( flask_app ):
"""Initialize the socketio server needed to prepare the data files."""
# NOTE: We only set this up if it's needed (i.e. because there is no data directory,
# and the user needs to prepare their data files), rather than always having it running
# on the off-chance that the user might need it :-/
# NOTE: socketio doesn't really work well with threads, and it's tricky to get it to
# send events to the client if we're using e.g. eventlet:
# https://stackoverflow.com/questions/43801884/how-to-run-python-socketio-in-thread
# https://python-socketio.readthedocs.io/en/latest/server.html#standard-threads
# Using native threads is less-performant, but it's not an issue for us, and it works :-/
import socketio
sio = socketio.Server( async_mode="threading" )
flask_app.wsgi_app = socketio.WSGIApp( sio, flask_app.wsgi_app )
globvars.socketio_server = sio
# ---------------------------------------------------------------------
if __name__ == "__main__":

@ -19,6 +19,7 @@ ul ul ul { list-style-image: url(../images/bullet3.png) ; }
.exc .auto-ruleid { color: #555 ; }
.auto-ruleid { color: red ; }
.auto-ruleid:hover { background: #ffffcc ; }
span.pre { font-family: monospace ; }
/* notification balloons */
#growls-br { bottom: 22px ; right: 0 ; max-height: 40% ; }

@ -0,0 +1,37 @@
p { margin: 5px 0 ; }
code { display: block ; margin: 5px 0 5px 20px ; }
.info {
margin-top: 10px ; min-height: 25px ;
padding-left: 30px ; background: no-repeat url(../images/info.png) ;
font-size: 80% ; font-style: italic ; color: #444 ;
}
#prepare-app { height: 100% ; display: flex ; }
#header { margin-bottom: 5px ; }
#main { width: 100% ; margin: 10px ; display: flex ; flex-direction: column ; }
#fatal-error { margin-bottom: 10px ; font-size: 120% ; font-weight: bold ; }
#upload-panel { align-self: start ; border: 1px solid black ; border-radius: 5px ; padding: 10px ; }
#upload-panel button { height: 70px ; margin-right: 10px ; }
#upload-panel button img { margin-top: 3px ; height: 60px ; }
#progress-panel {
flex-grow: 1 ; overflow-y: auto ;
border: 1px solid black ; border-radius: 5px ; padding: 10px ;
font-family: monospace ; font-size: 90% ;
}
#progress-panel .progress { font-style: italic ; }
#progress-panel .status { margin: 5px 0 ; }
#progress-panel .status:first-of-type { margin-top: 0 ; }
#progress-panel .status table { margin-left: 2px ; }
#progress-panel .status table td { vertical-align: top ; }
#progress-panel .status img.icon { height: 15px ; margin: 1px 3px 0 0 ; }
#download-panel {
position: fixed ; bottom: 18px ; right: 18px ; width: 75% ;
border: 1px solid black ; border-radius: 5px ; background: white ;
padding: 10px ;
}
#download-panel button { height: 40px ; margin-right: 10px ; padding: 5px ; }
#download-panel button img { height: 30px ; }

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 KiB

@ -0,0 +1,364 @@
// create the main application
export const gPrepareApp = Vue.createApp( { //eslint-disable-line no-undef
template: "<prepare-app />",
} ) ;
$(document).ready( () => {
gPrepareApp.mount( "#prepare-app" ) ;
} ) ;
// parse any URL parameters
let gUrlParams = new URLSearchParams( window.location.search.substring(1) ) ;
let gProgressPanel = null ;
// --------------------------------------------------------------------
gPrepareApp.component( "prepare-app", {
data() { return {
isLoaded: false,
isProcessing: false,
downloadUrl: null,
fatalErrorMsg: gHaveGhostscript ? null : "Ghostscript is not available.", //eslint-disable-line no-undef
fatalErrorIconUrl: makeImageUrl( "error.png" ),
} ; },
template: `
<div id="main">
<div id="header">
No data directory has been configured.
<p> If you haven't used this program before, a few things need to be prepared first.
It will take around 10-15 minutes. </p>
</div>
<div v-show=fatalErrorMsg id="fatal-error" >
<img :src=fatalErrorIconUrl style="float:left;margin-right:5px;" />
{{fatalErrorMsg}}
</div>
<upload-panel v-show="!fatalErrorMsg &&!isProcessing" @file-selected=onFileSelected />
<progress-panel v-show=isProcessing @done=onDone @fatal=onFatalError ref=progressPanel />
<download-panel v-show=downloadUrl :downloadUrl=downloadUrl ref=downloadPanel />
<textarea id="testing-zip-data" style="display:none;" />
<div v-if=isLoaded id="_prepareapp-loaded_" />
</div>`,
mounted() {
// initialize the UI
$( "button" ).button() ;
this.isLoaded = true ;
},
methods: {
onFileSelected( file ) {
this.isProcessing = true ;
if ( ! file ) {
// this is a test of progress logging
this.uploadPdfData( null ) ;
return ;
}
if ( typeof file == "string" ) {
// this is PDF file data given to us by the test suite - just return it as is
this.uploadPdfData( file ) ;
return ;
}
this.$nextTick( () => {
gProgressPanel.addStatusBlock( "Uploading the PDF..." ) ;
// read the selected file
let fileReader = new FileReader() ;
fileReader.onload = () => {
let pdfData = fileReader.result ;
pdfData = removeBase64Prefix( pdfData ) ;
this.uploadPdfData( pdfData ) ;
} ;
fileReader.readAsDataURL( file ) ;
} ) ;
},
uploadPdfData( pdfData ) {
// upload the PDF file to the backend
let data = { pdfData: pdfData } ;
if ( gUrlParams.get( "test" ) ) {
[ "npasses", "status", "warnings", "errors", "delay" ].forEach( (arg) => {
let val = gUrlParams.get( arg ) ;
if ( val )
data[arg] = val ;
} ) ;
}
$.ajax( {
url: gPrepareDataFilesUrl, //eslint-disable-line no-undef
type: "POST",
data: JSON.stringify( data ),
contentType: "application/json",
} ).done( () => {
// tell the backend to start processing
gProgressPanel.socketIOClient.emit( "start" ) ;
} ).fail( (xhr, status, errorMsg) => {
this.fatalErrorMsg = "Couldn't start processing: " + errorMsg ;
} ) ;
},
onDone( downloadUrl ) {
// make the download available to the user
$( this.$refs.progressPanel.$el ).css( {
background: "#f0f0f0", color: "#444",
"border-color": "#aaa",
} ) ;
this.downloadUrl = downloadUrl ;
},
onFatalError( msg ) {
this.fatalErrorMsg = msg ;
},
},
} ) ;
// --------------------------------------------------------------------
gPrepareApp.component( "upload-panel", {
data() { return {
isTestMode: gUrlParams.get( "test" ),
uploadIconUrl: makeImageUrl( "eASLRB.png" ),
} ; },
template: `
<div id="upload-panel">
<div v-if=isTestMode>
<button @click=startTest style="height:28px;" > Go </button>
Click on the button to start a test run.
</div>
<div v-else style="display:flex;">
<input type="file" @change=onFileSelected accept=".pdf" style="display:none;" ref="selectFile" >
<button @click=onUploadProxy id="upload-proxy" ref="uploadProxy"> <img :src=uploadIconUrl /> </button>
<div> Click on the button, and select your copy <br> of MMP's electronic ASLRB.
<div class="info"> You <u>must</u> use the <a href="https://www.wargamevault.com/product/344879/Electronic-Advanced-Squad-Leader-Rulebook" target="_blank">offical MMP eASLRB</a>. <br>
A scanned copy of a printed RB <u>will not work</u>!
</div>
</div>
</div>
</div>`,
methods: {
onUploadProxy() {
// check if the test suite has left us some PDF file data to use
let $elem = $( "#testing-zip-data" ) ;
if ( $elem.length > 0 && $elem.val().length > 0 ) {
// yup - just return that
this.$emit( "file-selected", $elem.val() ) ;
$elem.val( "" ) ;
return ;
}
$elem.remove() ; // nb: this tells download-panel we are not being run by the test suite
// NOTE: It's difficult to style a file <input> element, so we make it hidden, and present
// a <button> element to the user, that clicks on the real file <input> when it is clicked.
this.$refs.selectFile.click() ;
},
onFileSelected( evt ) {
// NOTE: We would normally read the file here, but it takes some time because of its size,
// so we return the file object to the parent, so it can close us and open the progress panel,
// showing the "Uploading PDF" message, *then* we read the file and upload it.
this.$emit( "file-selected", evt.target.files[0] ) ;
},
startTest() {
this.$emit( "file-selected", null ) ;
},
},
} ) ;
// --------------------------------------------------------------------
gPrepareApp.component( "progress-panel", {
data() { return {
socketIOClient: null,
statusBlocks: [],
} ; },
template: `
<div id="progress-panel">
<status-block v-for="sb in statusBlocks" :statusBlock=sb :key=sb />
</div>`,
created() {
// initialize
gProgressPanel = this ;
this.initSocketIOClient() ;
},
methods: {
initSocketIOClient() {
// initialize the socketio client
let done = false ;
this.socketIOClient = io.connect() ; //eslint-disable-line no-undef
this.socketIOClient.on( "disconnect", () => {
if ( ! done )
this.$emit( "fatal", "The server has gone away. Please restart it, then reload this page." ) ;
} ) ;
this.socketIOClient.on( "status", (msg) => { this.addStatusBlock( msg ) ; } ) ;
this.socketIOClient.on( "progress", (msg) => { this.addProgressMsg( "info", msg ) ; } ) ;
this.socketIOClient.on( "warning", (msg) => { this.addProgressMsg( "warning", msg ) ; } ) ;
this.socketIOClient.on( "error", (msg) => { this.addProgressMsg( "error", msg ) ; } ) ;
this.socketIOClient.on( "done", (downloadUrl) => {
done = true ;
gProgressPanel.addStatusBlock( "All done." ) ;
this.socketIOClient.disconnect() ;
this.socketIOClient = null ;
this.$emit( "done", downloadUrl ) ;
} ) ;
},
addStatusBlock( statusMsg ) {
// de-activate the previous status block
if ( this.statusBlocks.length > 0 )
this.statusBlocks[ this.statusBlocks.length-1 ].isActive = false ;
// start a new status block
this.statusBlocks.push( {
status: statusMsg, progress: [],
isActive: true
} ) ;
},
addProgressMsg( msgType, msg ) {
// add a progress message to the current status block
if ( this.statusBlocks.length == 0 )
this.addStatusBlock( "" ) ;
this.statusBlocks[ this.statusBlocks.length-1 ].progress.push( [ msgType, msg ] ) ;
},
},
} ) ;
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
gPrepareApp.component( "status-block", {
props: [ "statusBlock" ],
template: `
<div class="status">
<div class="caption"> {{statusBlock.status}} </div>
<table v-if="statusBlock.progress.length > 0" >
<tr v-for="(p,pno) in statusBlock.progress" v-show="showProgress(p,pno)" >
<td> <img :src=makeIconUrl(p) :style=makeIconCss(p) class="icon" /> </td>
<td v-html=p[1] />
</tr>
</table>
</div>`,
methods: {
showProgress( progress, progressNo ) {
// figure out if we should show a progress message or not
if ( progress[0] != "info" )
return true ; // nb: always show warnings/errors
if ( this.statusBlock.isActive && progressNo == this.statusBlock.progress.length-1 )
return true ; // nb: show the last progress message of the last status block
return false ;
},
makeIconUrl( progress ) {
if ( progress[0] == "info" )
return makeImageUrl( "bullet2.png" ) ;
return makeImageUrl( progress[0] + ".png" ) ;
},
makeIconCss( progress ) {
if ( progress[0] == "info" )
return "height: 8px ; padding-left: 4px ;" ;
},
},
} ) ;
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
gPrepareApp.component( "download-panel", {
props: [ "downloadUrl" ],
data() { return {
downloadIconUrl: makeImageUrl( "download.png" ),
} ; },
template: `
<div id="download-panel">
<div style="display:flex;margin-bottom:10px;">
<button @click=onDownload id="download"> <img :src=downloadIconUrl /> </button>
<div> Your data files are ready.
<p> Click on the button to download them, and unpack them into a directory somewhere. </p>
</div>
</div>
<div> Then restart the server with a <span class="pre">--data</span> parameter pointing to that directory e.g.
<code> ./run-server.py --data ... </code>
or
<code> ./run-container.sh --data ... </code>
</div>
<div class="info">
You can edit these files directly, if you want to make changes.
<p> If you want to make changes permanent (so they happen if you redo this preparation process), check out the files in <span class="pre">$/asl_rulebook2/extract//data/</span>. </p>
</div>
</div>`,
methods: {
onDownload() {
if ( ! this.downloadUrl ) {
alert( "The download is not ready." ) ; // nb: should never get here!
return ;
}
// check if we are being run by the test suite
let $elem = $( "#testing-zip-data" ) ;
if ( $elem.length == 0 ) {
// nope - just return the download directly to the user
window.location.href = this.downloadUrl ;
return ;
}
// yup - download the ZIP file and make it available to the test suite
// FUDGE! Setting the response type in a jQuery Ajax request:
// $.ajax( { type: "GET", url: ...,
// xhrFields: { responseType: "arraybuffer" }
// } ) ;
// should work, but doesn't :-/ Instead, we do it by providing a custom XHR object
// to manage the download. Things are slow, but this only used by the test suite.
let xhrOverride = new XMLHttpRequest() ;
xhrOverride.responseType = "blob" ;
$.ajax( {
type: "GET", url: this.downloadUrl,
xhr: function() { return xhrOverride ; },
} ).done( () => {
// read the response
let fileReader = new FileReader() ;
fileReader.onload = function( evt ){
let zip_data = evt.target.result ;
// make the response available to the test suite
$( "#testing-zip-data" ).val( removeBase64Prefix( zip_data ) ) ;
};
fileReader.readAsDataURL( xhrOverride.response ) ;
} ).fail( (xhr, status, errorMsg) => {
alert( "Download failed: " + errorMsg ) ;
} ) ;
},
},
} ) ;
// --------------------------------------------------------------------
function makeImageUrl( fname ) {
return gImagesBaseUrl + "/" + fname ; //eslint-disable-line no-undef
}
function removeBase64Prefix( val )
{
// remove the base64 prefix from the start of the string
// - data: MIME-TYPE ; base64 , ...
return val.replace( /^data:.*?;base64,/, "" ) ;
}

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

@ -0,0 +1,39 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<title> Prepare {{APP_NAME}} data </title>
<link rel="shortcut icon" href="{{url_for('static', filename='images/favicon.ico')}}">
<link rel="stylesheet" type="text/css" href="{{ url_for( 'static',
filename = 'jquery-ui/jquery-ui' + WEB_DEBUG_MIN + '.css'
) }}" />
<link rel="stylesheet" type="text/css" href="{{ url_for( 'static', filename='css/global.css' ) }}" />
<link rel="stylesheet" type="text/css" href="{{ url_for( 'static', filename='css/prepare.css' ) }}" />
</head>
<body>
<div id="prepare-app"></div>
</body>
{%if WEB_DEBUG%}
<script src="{{ url_for( 'static', filename='vue/vue.global.js' ) }}"></script>
{%else%}
<script src="{{ url_for( 'static', filename='vue/vue.global.prod.js' ) }}"></script>
{%endif%}
<script src="{{ url_for( 'static', filename='jquery/jquery-3.6.0.js') }}"></script>
<script src="{{ url_for( 'static',
filename = 'jquery-ui/jquery-ui' + WEB_DEBUG_MIN + '.js'
) }}"></script>
<script src="{{ url_for( 'static',
filename = 'socketio/socket.io' + WEB_DEBUG_MIN + '.js'
) }}"></script>
<script type="module" src="{{ url_for( 'static', filename='prepare.js' ) }}"></script>
<script>
gHaveGhostscript = "{{HAVE_GHOSTSCRIPT}}" ;
gImagesBaseUrl = "{{ url_for( 'static', filename='images/' ) }}" ;
gPrepareDataFilesUrl = "{{ url_for( 'prepare_data_files' ) }}" ;
</script>
</html>

@ -0,0 +1,126 @@
""" Test preparing the data files. """
import os
import json
import zipfile
import io
import base64
import pytest
from asl_rulebook2.tests.utils import for_each_easlrb_version
from asl_rulebook2.webapp.tests import pytest_options
from asl_rulebook2.webapp.tests.utils import init_webapp, \
find_child, find_children, wait_for, wait_for_elem
# ---------------------------------------------------------------------
@pytest.mark.skipif( not pytest_options.enable_prepare, reason="Prepare tests are not enabled." )
def test_prepare_logging( webapp, webdriver ):
"""Test logging during the prepare process."""
# initialize
# NOTE: We load the webapp without setting a data directory first.
init_webapp( webapp, webdriver,
test=1, npasses=50, warnings="25,27,42", errors="39,43", delay=0
)
# generate some progress messages, check the results
find_child( "#upload-panel button" ).click()
def check_progress():
progress = _unload_progress()
return progress == [
[ "Status #1", [] ],
[ "Status #2", [] ],
[ "Status #3", [
[ "warning.png", "Progress 25: warning" ],
[ "warning.png", "Progress 27: warning" ],
] ],
[ "Status #4", [
[ "error.png", "Progress 39: error" ]
] ],
[ "Status #5", [
[ "warning.png", "Progress 42: warning" ],
[ "error.png", "Progress 43: error" ],
] ],
[ "All done.", [] ]
]
wait_for( 2, check_progress )
# ---------------------------------------------------------------------
@pytest.mark.skipif( not pytest_options.enable_prepare, reason="Prepare tests are not enabled." )
@pytest.mark.skipif( not pytest_options.easlrb_path, reason="eASLRB not available." )
def test_full_prepare( webapp, webdriver ):
"""Test the full prepare process."""
def do_test( dname ):
# initialize
# NOTE: We load the webapp without setting a data directory first.
init_webapp( webapp, webdriver )
# load the PDF file data into the web page (since we can't manipulate the "Open File" dialog)
fname = os.path.join( dname, "eASLRB.pdf" )
with open( fname, "rb" ) as fp:
zip_data = fp.read()
testing_zip_data = find_child( "#testing-zip-data", webdriver )
webdriver.execute_script( "arguments[0].value = arguments[1]", testing_zip_data,
base64.b64encode( zip_data ).decode( "ascii" )
)
# start the prepare process, and wait for it to finish
# NOTE: It will have auto-started because we passed in a filename to the webapp.
find_child( "button#upload-proxy" ).click()
wait_for_elem( 30*60, "#download-panel" )
# get the results
find_child( "button#download" ).click()
zip_data = wait_for( 20, lambda: testing_zip_data.get_attribute( "value" ) )
zip_data = base64.b64decode( zip_data )
# check the results
with zipfile.ZipFile( io.BytesIO( zip_data ) ) as zip_file:
assert set( zip_file.namelist() ) == set( [
"ASL Rulebook.pdf", "ASL Rulebook.index",
"ASL Rulebook.targets", "ASL Rulebook.chapters", "ASL Rulebook.footnotes"
] )
assert zip_file.getinfo( "ASL Rulebook.pdf" ).file_size > 40*1000
for ftype in [ "index", "targets", "chapters", "footnotes" ]:
fname = os.path.join( dname, ftype+".json" )
expected = json.load( open( fname, "r" ) )
fdata = zip_file.read( "ASL Rulebook.{}".format( ftype ) )
assert json.loads( fdata ) == expected
# run the test
for_each_easlrb_version( do_test )
# ---------------------------------------------------------------------
def _unload_progress():
"""Unload the progress messages."""
def unload_status_block( root ):
"""Unload a status block and its progress messages."""
caption = find_child( ".caption", root ).text
msgs = [
unload_msg( row )
for row in find_children( "tr", root )
if row.is_displayed()
]
return [ caption, msgs ]
def unload_msg( row ):
"""Unload a single progress message."""
cells = find_children( "td", row )
assert len(cells) == 2
img = find_child( "img", cells[0] )
url = img.get_attribute( "src" )
return [ os.path.basename(url), cells[1].text ]
# unload each status block
progress_panel = find_child( "#progress-panel" )
return [
unload_status_block( elem )
for elem in find_children( ".status", progress_panel )
]

@ -9,11 +9,15 @@ def test_load_content_docs( webapp, webdriver ):
# test handling of an invalid data directory
webapp.control_tests.set_data_dir( "_unknown_" )
init_webapp( webapp, webdriver, errors=["Invalid data directory."] )
init_webapp( webapp, webdriver,
expected_errors = [ "Invalid data directory." ]
)
# test handling of an invalid index file
webapp.control_tests.set_data_dir( "invalid-index" )
init_webapp( webapp, webdriver, errors=["Couldn't load \"test.index\"."] )
init_webapp( webapp, webdriver,
expected_errors = [ "Couldn't load \"test.index\"." ]
)
# NOTE: If we can't load the index file, the content doc is useless and we don't load it at all.
# If any of the associated files are invalid, the content doc is loaded (i.e. a tab will be shown
# for it), and we degrade gracefully.
@ -26,12 +30,18 @@ def test_init_search( webapp, webdriver ):
# test handling of an invalid search replacements file
webapp.control_tests.set_data_dir( "invalid-search-replacements" )
init_webapp( webapp, webdriver, warnings=["Can't load user search replacements."] )
init_webapp( webapp, webdriver,
expected_warnings = [ "Can't load user search replacements." ]
)
# test handling of an invalid search aliases file
webapp.control_tests.set_data_dir( "invalid-search-aliases" )
init_webapp( webapp, webdriver, warnings=["Can't load user search aliases."] )
init_webapp( webapp, webdriver,
expected_warnings = [ "Can't load user search aliases." ]
)
# test handling of an invalid search synonyms file
webapp.control_tests.set_data_dir( "invalid-search-synonyms" )
init_webapp( webapp, webdriver, warnings=["Can't load user search synonyms."] )
init_webapp( webapp, webdriver,
expected_warnings = [ "Can't load user search synonyms." ]
)

@ -22,8 +22,8 @@ def init_webapp( webapp, webdriver, **options ):
"""Initialize the webapp."""
# initialize
expected_warnings = options.pop( "warnings", [] )
expected_errors = options.pop( "errors", [] )
expected_warnings = options.pop( "expected_warnings", [] )
expected_errors = options.pop( "expected_errors", [] )
# initialize
global _webapp, _webdriver
@ -73,7 +73,10 @@ def refresh_webapp( webdriver ):
def _wait_for_webapp():
"""Wait for the webapp to finish initialization."""
timeout = 5
wait_for( timeout, lambda: find_child("#_mainapp-loaded_") )
wait_for( timeout,
lambda: find_child("#_mainapp-loaded_") \
or ( webapp_tests.pytest_options.enable_prepare and find_child("#_prepareapp-loaded_") )
)
# ---------------------------------------------------------------------
@ -243,9 +246,13 @@ def wait_for_elem( timeout, sel, parent=None ):
def wait_for( timeout, func ):
"""Wait for a condition to become true."""
WebDriverWait( _webdriver, timeout, poll_frequency=0.1 ).until(
lambda driver: func()
)
last_val = None
def wrapper( driver ): #pylint: disable=unused-argument
nonlocal last_val
last_val = func()
return last_val
WebDriverWait( _webdriver, timeout, poll_frequency=0.1 ).until( wrapper )
return last_val
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

@ -1,6 +1,7 @@
"""Helper functions."""
import os
import shutil
import pathlib
import re
import json
@ -66,3 +67,7 @@ def parse_int( val, default=None ):
return int( val )
except (ValueError, TypeError):
return default
def get_gs_path():
"""Find the Ghostscript executable."""
return app.config.get( "GS_PATH", shutil.which("gs") )

@ -56,6 +56,13 @@ def pytest_addoption( parser ):
"--short-tests", action="store_true", dest="short_tests", default=False,
help="Skip running the longer tests."
)
# NOTE: Care needs to be taken with this option if running the tests against a remote server,
# since it will need to be started with no data directory configured (so that the socketio server
# gets initialized).
parser.addoption(
"--prepare", action="store_true", dest="enable_prepare", default=False,
help="Enable the prepare tests."
)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
@ -122,6 +129,11 @@ def _make_webapp():
app.config.pop( "WEBAPP_INITIAL_QUERY_STRING", None )
app.config.pop( "DISABLE_FIXUP_CONTENT", None )
app.config[ "IGNORE_MISSING_DATA_FILES" ] = True
# check if we will be running the prepare tests
if _pytest_options.enable_prepare:
# yup - initialize the socketio server
from asl_rulebook2.webapp.run_server import init_prepare_socketio
init_prepare_socketio( app )
# NOTE: We run the server thread as a daemon so that it won't prevent the tests from finishing
# when they're done. However, this makes it difficult to know when to shut the server down,
# and, in particular, clean up the gRPC service. We send an EndTests message at the end of each test,

@ -3,5 +3,6 @@
flask==1.1.2
pdfminer.six==20201018
pikepdf==2.5.2
python-socketio==5.2.1
pyyaml==5.4.1
click==7.1.2

@ -40,10 +40,4 @@ setup(
data_files = [
( "asl-rulebook2", ["LICENSE.txt"] ),
],
entry_points = {
"console_scripts": [
"dump-pdf = bin.dump_pdf:main",
"extract-all = asl_rulebook2.extract.all:main"
],
}
)

Loading…
Cancel
Save