Changed how the PDF documents are served.

master
Pacman Ghost 3 years ago
parent 8767e3453c
commit fdd027cb2b
  1. 22
      asl_rulebook2/webapp/content.py
  2. 7
      asl_rulebook2/webapp/search.py
  3. 22
      asl_rulebook2/webapp/startup.py

@ -2,7 +2,6 @@
import os
import re
import io
from flask import jsonify, send_file, url_for, abort
@ -85,8 +84,10 @@ def load_content_sets( startup_msgs, logger ):
if ruleid not in _footnote_index[ cdoc_id ]:
_footnote_index[ cdoc_id ][ ruleid ] = []
_footnote_index[ cdoc_id ][ ruleid ].append( footnote )
fname = fname_stem + ".pdf"
if not load_file( fname, content_doc, "content", startup_msgs.warning, binary=True ):
fname = os.path.join( data_dir, fname_stem+".pdf" )
if os.path.isfile( fname ):
content_doc["filename"] = fname
else:
# NOTE: Things will work without this file, but from the user's point of view,
# they've probably set something up incorrectly, so we give them a hint.
if not app.config.get( "IGNORE_MISSING_DATA_FILES" ):
@ -211,7 +212,7 @@ def _dump_content_sets():
print( "=== {} ({}) ===".format( cset["title"], cset_id ) )
for cdoc_id, cdoc in cset["content_docs"].items():
print( "Content doc: {} ({})".format( cdoc["title"], cdoc_id ) )
for key in [ "targets", "footnotes", "content" ]:
for key in [ "targets", "footnotes", "filename" ]:
if key in cdoc:
print( "- {}: {}".format( key, len(cdoc[key]) ))
@ -305,7 +306,7 @@ def get_content_docs():
"parent_cset_id": cset["cset_id"],
"title": cdoc["title"],
}
if "content" in cdoc:
if "filename" in cdoc:
cdoc2["url"] = url_for( "get_content", cdoc_id=cdoc["cdoc_id"] )
for key in [ "targets", "chapters", "background", "icon" ]:
if key in cdoc:
@ -320,9 +321,14 @@ def get_content( cdoc_id ):
"""Return the content for the specified document."""
for cset in _content_sets.values():
for cdoc in cset["content_docs"].values():
if cdoc["cdoc_id"] == cdoc_id and "content" in cdoc:
buf = io.BytesIO( cdoc["content"] )
return send_file( buf, mimetype="application/pdf" )
if cdoc["cdoc_id"] == cdoc_id and "filename" in cdoc:
# NOTE: Important information is stored at the end of a PDF document, and PDF.js
# can get it early, *if* the server supports range requests, which will allow it
# to start rendering the document before it's received the entire file.
# https://github.com/mozilla/pdf.js/wiki/Frequently-Asked-Questions#range
# https://flask.palletsprojects.com/en/1.1.x/api/?highlight=send_file#flask.send_file
return send_file( cdoc["filename"], mimetype="application/pdf", conditional=True )
abort( 404 )
return None # stupid pylint :-/

@ -825,6 +825,8 @@ def _fixup_searchable_content( sr_type, fixup_row, make_fields ):
return plural( nrows, "row", "rows" )
_last_sleep_time = 0
def _tag_ruleids_in_field( obj, key, cset_id ):
"""Tag ruleid's in an optional field."""
if isinstance( key, int ) or key in obj:
@ -837,6 +839,11 @@ def _tag_ruleids_in_field( obj, key, cset_id ):
new_val = tag_ruleids( val, cset_id )
with webapp_startup.fixup_content_lock:
obj[key] = new_val
# FUDGE! Give other threads a chance to run :-/
global _last_sleep_time
if time.time() - _last_sleep_time > 1:
time.sleep( 0.1 )
_last_sleep_time = time.time()
def _get_row_count( conn, table_name ):
"""Get the number of rows in a table."""

@ -71,9 +71,9 @@ def init_webapp():
# NOTE: It's useful to do this synchronously when running the test suite, since if the tests
# need the linkified ruleid's, they can't start until the fixup has finished (and if they don't
# it won't really matter, since there will be so little data, this process will be fast).
_do_fixup_content()
_do_fixup_content( False )
else:
threading.Thread( target = _do_fixup_content ).start()
threading.Thread( target=_do_fixup_content, args=(True,) ).start()
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
@ -83,10 +83,25 @@ def add_fixup_content_task( ctype, func ):
return
_fixup_content_tasks.append( ( ctype, func ) )
def _do_fixup_content():
def _do_fixup_content( delay ):
"""Run each task to fixup content."""
if not _fixup_content_tasks:
return
# FUDGE! If we start processing straight away, the main PDF loads very slowly because of us :-/,
# and since there's no way to set thread priorities in Python, we delay for a short time, to give
# the PDF time to load, before we start working.
# NOTE: This delay only helps the initial load of the main ASLRB PDF. After processing has started,
# if the user reloads the page, or tries to load another PDF, they will have the same problem of
# very slow loads. To work around this, _tag_ruleids_in_field() sleeps periodically, to give
# other threads a chance to run. The PDF's load a bit slowly, but it's acceptable.
if delay:
delay = parse_int( app.config.get( "FIXUP_CONTENT_DELAY" ), 5 )
time.sleep( delay )
# process each fixup task
_logger.info( "Processing fixup tasks..." )
start_time = time.time()
for task_no, (ctype, func) in enumerate( _fixup_content_tasks ):
_logger.debug( "Fixing up %s (%d/%d)...", ctype, 1+task_no, len(_fixup_content_tasks) )
@ -98,6 +113,7 @@ def _do_fixup_content():
continue
elapsed_time = datetime.timedelta( seconds = int( time.time() - start_time2 ) )
_logger.debug( "- Finished fixing up %s (%s): %s", ctype, elapsed_time, msg )
elapsed_time = datetime.timedelta( seconds = int( time.time() - start_time ) )
_logger.info( "All fixup tasks completed (%s).", elapsed_time )

Loading…
Cancel
Save