Changed how the PDF documents are served.

3 years ago · fdd027cb2b
parent 8767e3453c
commit fdd027cb2b
3 changed files with 40 additions and 11 deletions
--- a/asl_rulebook2/webapp/content.py
+++ b/asl_rulebook2/webapp/content.py
@ -2,7 +2,6 @@

 import os
 import re
-import io

 from flask import jsonify, send_file, url_for, abort

@ -85,8 +84,10 @@ def load_content_sets( startup_msgs, logger ):
                        if ruleid not in _footnote_index[ cdoc_id ]:
                            _footnote_index[ cdoc_id ][ ruleid ] = []
                        _footnote_index[ cdoc_id ][ ruleid ].append( footnote )
-        fname = fname_stem + ".pdf"
-        if not load_file( fname, content_doc, "content", startup_msgs.warning, binary=True ):
+        fname = os.path.join( data_dir, fname_stem+".pdf" )
+        if os.path.isfile( fname ):
+            content_doc["filename"] = fname
+        else:
            # NOTE: Things will work without this file, but from the user's point of view,
            # they've probably set something up incorrectly, so we give them a hint.
            if not app.config.get( "IGNORE_MISSING_DATA_FILES" ):
@ -211,7 +212,7 @@ def _dump_content_sets():
        print( "=== {} ({}) ===".format( cset["title"], cset_id ) )
        for cdoc_id, cdoc in cset["content_docs"].items():
            print( "Content doc: {} ({})".format( cdoc["title"], cdoc_id ) )
-            for key in [ "targets", "footnotes", "content" ]:
+            for key in [ "targets", "footnotes", "filename" ]:
                if key in cdoc:
                    print( "- {}: {}".format( key, len(cdoc[key]) ))

@ -305,7 +306,7 @@ def get_content_docs():
                "parent_cset_id": cset["cset_id"],
                "title": cdoc["title"],
            }
-            if "content" in cdoc:
+            if "filename" in cdoc:
                cdoc2["url"] = url_for( "get_content", cdoc_id=cdoc["cdoc_id"] )
            for key in [ "targets", "chapters", "background", "icon" ]:
                if key in cdoc:
@ -320,9 +321,14 @@ def get_content( cdoc_id ):
    """Return the content for the specified document."""
    for cset in _content_sets.values():
        for cdoc in cset["content_docs"].values():
-            if cdoc["cdoc_id"] == cdoc_id and "content" in cdoc:
-                buf = io.BytesIO( cdoc["content"] )
-                return send_file( buf, mimetype="application/pdf" )
+            if cdoc["cdoc_id"] == cdoc_id and "filename" in cdoc:
+                # NOTE: Important information is stored at the end of a PDF document, and PDF.js
+                # can get it early, *if* the server supports range requests, which will allow it
+                # to start rendering the document before it's received the entire file.
+                #   https://github.com/mozilla/pdf.js/wiki/Frequently-Asked-Questions#range
+                #   https://flask.palletsprojects.com/en/1.1.x/api/?highlight=send_file#flask.send_file
+                return send_file( cdoc["filename"], mimetype="application/pdf", conditional=True )
+
    abort( 404 )
    return None # stupid pylint :-/

--- a/asl_rulebook2/webapp/search.py
+++ b/asl_rulebook2/webapp/search.py
@ -825,6 +825,8 @@ def _fixup_searchable_content( sr_type, fixup_row, make_fields ):

    return plural( nrows, "row", "rows" )

+_last_sleep_time = 0
+
 def _tag_ruleids_in_field( obj, key, cset_id ):
    """Tag ruleid's in an optional field."""
    if isinstance( key, int ) or key in obj:
@ -837,6 +839,11 @@ def _tag_ruleids_in_field( obj, key, cset_id ):
        new_val = tag_ruleids( val, cset_id )
        with webapp_startup.fixup_content_lock:
            obj[key] = new_val
+        # FUDGE! Give other threads a chance to run :-/
+        global _last_sleep_time
+        if time.time() - _last_sleep_time > 1:
+            time.sleep( 0.1 )
+            _last_sleep_time = time.time()

 def _get_row_count( conn, table_name ):
    """Get the number of rows in a table."""
--- a/asl_rulebook2/webapp/startup.py
+++ b/asl_rulebook2/webapp/startup.py
@ -71,9 +71,9 @@ def init_webapp():
        # NOTE: It's useful to do this synchronously when running the test suite, since if the tests
        # need the linkified ruleid's, they can't start until the fixup has finished (and if they don't
        # it won't really matter, since there will be so little data, this process will be fast).
-        _do_fixup_content()
+        _do_fixup_content( False )
    else:
-        threading.Thread( target = _do_fixup_content ).start()
+        threading.Thread( target=_do_fixup_content, args=(True,) ).start()

 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

@ -83,10 +83,25 @@ def add_fixup_content_task( ctype, func ):
        return
    _fixup_content_tasks.append( ( ctype, func ) )

-def _do_fixup_content():
+def _do_fixup_content( delay ):
    """Run each task to fixup content."""
+
    if not _fixup_content_tasks:
        return
+
+    # FUDGE! If we start processing straight away, the main PDF loads very slowly because of us :-/,
+    # and since there's no way to set thread priorities in Python, we delay for a short time, to give
+    # the PDF time to load, before we start working.
+    # NOTE: This delay only helps the initial load of the main ASLRB PDF. After processing has started,
+    # if the user reloads the page, or tries to load another PDF, they will have the same problem of
+    # very slow loads. To work around this, _tag_ruleids_in_field() sleeps periodically, to give
+    # other threads a chance to run. The PDF's load a bit slowly, but it's acceptable.
+    if delay:
+        delay = parse_int( app.config.get( "FIXUP_CONTENT_DELAY" ), 5 )
+        time.sleep( delay )
+
+    # process each fixup task
+    _logger.info( "Processing fixup tasks..." )
    start_time = time.time()
    for task_no, (ctype, func) in enumerate( _fixup_content_tasks ):
        _logger.debug( "Fixing up %s (%d/%d)...", ctype, 1+task_no, len(_fixup_content_tasks) )
@ -98,6 +113,7 @@ def _do_fixup_content():
            continue
        elapsed_time = datetime.timedelta( seconds = int( time.time() - start_time2 ) )
        _logger.debug( "- Finished fixing up %s (%s): %s", ctype, elapsed_time, msg )
+
    elapsed_time = datetime.timedelta( seconds = int( time.time() - start_time ) )
    _logger.info( "All fixup tasks completed (%s).", elapsed_time )