You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
160 lines
6.4 KiB
160 lines
6.4 KiB
""" Manage the content documents. """
|
|
|
|
import os
|
|
import io
|
|
import json
|
|
import glob
|
|
|
|
from flask import jsonify, send_file, url_for, abort
|
|
|
|
from asl_rulebook2.webapp import app
|
|
from asl_rulebook2.webapp.utils import slugify
|
|
|
|
content_sets = None
|
|
content_doc_index = None
|
|
|
|
# ---------------------------------------------------------------------
|
|
|
|
def load_content_sets( startup_msgs, logger ):
|
|
"""Load the content from the data directory."""
|
|
|
|
# NOTE: A "content set" is an index file, together with one or more "content docs".
|
|
# A "content doc" is a PDF file, with an associated targets and/or footnote file.
|
|
# This architecture allows us to have:
|
|
# - a single index file that references content spread over multiple PDF's (e.g. the MMP eASLRB,
|
|
# together with additional modules in separate PDF's (e.g. RB or KGP), until such time these
|
|
# get included in the main eASLRB).
|
|
# - rules for completely separate modules (e.g. third-party modules) that are not included
|
|
# in the MMP eASLRB index, and have their own index.
|
|
|
|
# initialize
|
|
global content_sets
|
|
content_sets = {}
|
|
dname = app.config.get( "DATA_DIR" )
|
|
if not dname:
|
|
return
|
|
if not os.path.isdir( dname ):
|
|
startup_msgs.error( "Invalid data directory.", dname )
|
|
return
|
|
|
|
def load_content_doc( fname_stem, title ):
|
|
# load the content doc files
|
|
content_doc = { "title": title }
|
|
load_file( fname_stem+".targets", content_doc, "targets", startup_msgs.warning )
|
|
load_file( fname_stem+".footnotes", content_doc, "footnotes", startup_msgs.warning )
|
|
load_file( fname_stem+".pdf", content_doc, "content", startup_msgs.warning, binary=True )
|
|
return content_doc
|
|
|
|
def load_file( fname, save_loc, key, on_error, binary=False ):
|
|
fname = os.path.join( dname, fname )
|
|
if not os.path.isfile( fname ):
|
|
return False
|
|
# load the specified file
|
|
try:
|
|
if binary:
|
|
with open( fname, mode="rb" ) as fp:
|
|
data = fp.read()
|
|
logger.debug( "- Loaded \"%s\" file: #bytes=%d", key, len(data) )
|
|
else:
|
|
with open( fname, "r", encoding="utf-8" ) as fp:
|
|
data = json.load( fp )
|
|
logger.debug( "- Loaded \"%s\" file.", key )
|
|
except Exception as ex: #pylint: disable=broad-except
|
|
on_error( "Couldn't load \"{}\".".format( os.path.basename(fname) ), str(ex) )
|
|
return False
|
|
# save the file data
|
|
save_loc[ key ] = data
|
|
return True
|
|
|
|
def find_assoc_cdocs( fname_stem ):
|
|
# find other content docs associated with the content set (names have the form "Foo (...)")
|
|
matches = set()
|
|
for fname in os.listdir( dname ):
|
|
if not fname.startswith( fname_stem ):
|
|
continue
|
|
fname = os.path.splitext( fname )[0]
|
|
fname = fname[len(fname_stem):].strip()
|
|
if fname.startswith( "(" ) and fname.endswith( ")" ):
|
|
matches.add( fname[1:-1] )
|
|
return matches
|
|
|
|
# load each content set
|
|
logger.info( "Loading content sets: %s", dname )
|
|
fspec = os.path.join( dname, "*.index" )
|
|
for fname in glob.glob( fspec ):
|
|
fname2 = os.path.basename( fname )
|
|
logger.info( "- %s", fname2 )
|
|
# load the index file
|
|
title = os.path.splitext( fname2 )[0]
|
|
cset_id = slugify( title )
|
|
content_set = {
|
|
"cset_id": cset_id,
|
|
"title": title,
|
|
"content_docs": {},
|
|
"index_fname": fname,
|
|
}
|
|
if not load_file( fname2, content_set, "index", startup_msgs.error ):
|
|
continue # nb: we can't do anything without an index file
|
|
# load the main content doc
|
|
fname_stem = os.path.splitext( fname2 )[0]
|
|
content_doc = load_content_doc( fname_stem, fname_stem )
|
|
cdoc_id = cset_id # nb: because this the main content document
|
|
content_doc[ "cdoc_id" ] = cdoc_id
|
|
content_set[ "content_docs" ][ cdoc_id ] = content_doc
|
|
# load any associated content docs
|
|
for fname_stem2 in find_assoc_cdocs( fname_stem ):
|
|
# nb: we assume there's only one space between the two filename stems :-/
|
|
content_doc = load_content_doc(
|
|
"{} ({})".format( fname_stem, fname_stem2 ),
|
|
fname_stem2
|
|
)
|
|
cdoc_id2 = "{}!{}".format( cdoc_id, slugify(fname_stem2) )
|
|
content_doc[ "cdoc_id" ] = cdoc_id2
|
|
content_set[ "content_docs" ][ cdoc_id2 ] = content_doc
|
|
# save the new content set
|
|
content_sets[ content_set["cset_id"] ] = content_set
|
|
|
|
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
|
|
def _dump_content_sets():
|
|
"""Dump the available content sets."""
|
|
for cset_id, cset in content_sets.items():
|
|
print( "=== {} ({}) ===".format( cset["title"], cset_id ) )
|
|
for cdoc_id, cdoc in cset["content_docs"].items():
|
|
print( "Content doc: {} ({})".format( cdoc["title"], cdoc_id ) )
|
|
for key in [ "targets", "footnotes", "content" ]:
|
|
if key in cdoc:
|
|
print( "- {}: {}".format( key, len(cdoc[key]) ))
|
|
|
|
# ---------------------------------------------------------------------
|
|
|
|
@app.route( "/content-docs" )
|
|
def get_content_docs():
|
|
"""Return the available content docs."""
|
|
resp = {}
|
|
for cset in content_sets.values():
|
|
for cdoc in cset["content_docs"].values():
|
|
cdoc2 = {
|
|
"cdoc_id": cdoc["cdoc_id"],
|
|
"parent_cset_id": cset["cset_id"],
|
|
"title": cdoc["title"],
|
|
}
|
|
if "content" in cdoc:
|
|
cdoc2["url"] = url_for( "get_content", cdoc_id=cdoc["cdoc_id"] )
|
|
if "targets" in cdoc:
|
|
cdoc2["targets"] = cdoc["targets"]
|
|
resp[ cdoc["cdoc_id"] ] = cdoc2
|
|
return jsonify( resp )
|
|
|
|
# ---------------------------------------------------------------------
|
|
|
|
@app.route( "/content/<cdoc_id>" )
|
|
def get_content( cdoc_id ):
|
|
"""Return the content for the specified document."""
|
|
for cset in content_sets.values():
|
|
for cdoc in cset["content_docs"].values():
|
|
if cdoc["cdoc_id"] == cdoc_id and "content" in cdoc:
|
|
buf = io.BytesIO( cdoc["content"] )
|
|
return send_file( buf, mimetype="application/pdf" )
|
|
abort( 404 )
|
|
return None # stupid pylint :-/
|
|
|