Implemented a basic search engine.

master
Pacman Ghost 3 years ago
parent 9d2495aa64
commit b387871bbe
  1. 78
      asl_rulebook2/utils.py
  2. 20
      asl_rulebook2/webapp/__init__.py
  3. 1
      asl_rulebook2/webapp/config/constants.py
  4. 25
      asl_rulebook2/webapp/config/search-aliases.json
  5. 14
      asl_rulebook2/webapp/config/search-replacements.json
  6. 51
      asl_rulebook2/webapp/config/search-synonyms.json
  7. 37
      asl_rulebook2/webapp/content.py
  8. 5
      asl_rulebook2/webapp/main.py
  9. 475
      asl_rulebook2/webapp/search.py
  10. 33
      asl_rulebook2/webapp/static/ContentPane.js
  11. 40
      asl_rulebook2/webapp/static/MainApp.js
  12. 15
      asl_rulebook2/webapp/static/NavPane.js
  13. 60
      asl_rulebook2/webapp/static/SearchPane.js
  14. 83
      asl_rulebook2/webapp/static/SearchResult.js
  15. 6
      asl_rulebook2/webapp/static/TabbedPages.js
  16. 2
      asl_rulebook2/webapp/static/css/SearchPane.css
  17. 14
      asl_rulebook2/webapp/static/css/SearchResult.css
  18. 18
      asl_rulebook2/webapp/static/utils.js
  19. 1
      asl_rulebook2/webapp/templates/index.html
  20. 17
      asl_rulebook2/webapp/tests/fixtures/simple/simple.index
  21. BIN
      asl_rulebook2/webapp/tests/fixtures/simple/simple.pdf
  22. 18
      asl_rulebook2/webapp/tests/fixtures/simple/simple.targets
  23. 298
      asl_rulebook2/webapp/tests/test_search.py
  24. 21
      asl_rulebook2/webapp/tests/utils.py
  25. 16
      asl_rulebook2/webapp/utils.py
  26. 66
      bin/add_pdf_dests.py
  27. 2
      bin/extract_pages.py

@ -1,8 +1,82 @@
""" Miscellaneous utilities. """ """ Miscellaneous utilities. """
import os
import pathlib import pathlib
import tempfile
import re import re
import math import math
from io import StringIO
from html.parser import HTMLParser
# ---------------------------------------------------------------------
class TempFile:
"""Manage a temp file that can be closed while it's still being used."""
def __init__( self, mode="wb", extn=None, encoding=None ):
self.mode = mode
self.extn = extn
self.encoding = encoding
self.temp_file = None
self.name = None
def open( self ):
"""Allocate a temp file."""
if self.encoding:
encoding = self.encoding
else:
encoding = "utf-8" if "b" not in self.mode else None
assert self.temp_file is None
self.temp_file = tempfile.NamedTemporaryFile(
mode = self.mode,
encoding = encoding,
suffix = self.extn,
delete = False
)
self.name = self.temp_file.name
def close( self, delete ):
"""Close the temp file."""
self.temp_file.close()
if delete:
os.unlink( self.temp_file.name )
def write( self, data ):
"""Write data to the temp file."""
self.temp_file.write( data )
def __enter__( self ):
"""Enter the context manager."""
self.open()
return self
def __exit__( self, exc_type, exc_val, exc_tb ):
"""Exit the context manager."""
self.close( delete=True )
# ---------------------------------------------------------------------
def strip_html( val ):
"""Strip HTML."""
if not val:
return val
buf = StringIO()
class StripHtml( HTMLParser ):
"""Strip HTML."""
def __init__( self ):
super().__init__()
self.strict = False
def handle_data( self, data ):
buf.write( data )
def error( self, message ):
pass
# strip HTML
html_stripper = StripHtml()
html_stripper.feed( val )
return buf.getvalue()
# --------------------------------------------------------------------- # ---------------------------------------------------------------------
@ -99,6 +173,10 @@ def append_text( buf, new ):
buf += " " buf += " "
return buf + new return buf + new
def plural( n, name1, name2 ):
"""Return the singular/plural form of a string."""
return "{} {}".format( n, name1 if n == 1 else name2 )
def remove_quotes( val ): def remove_quotes( val ):
"""Remove enclosing quotes from a string.""" """Remove enclosing quotes from a string."""
if val[0] in ('"',"'") and val[-1] == val[0]: if val[0] in ('"',"'") and val[-1] == val[0]:

@ -11,7 +11,7 @@ from flask import Flask
import flask.cli import flask.cli
import yaml import yaml
from asl_rulebook2.webapp.config.constants import BASE_DIR from asl_rulebook2.webapp.config.constants import BASE_DIR, CONFIG_DIR
shutdown_event = threading.Event() shutdown_event = threading.Event()
@ -19,6 +19,7 @@ shutdown_event = threading.Event()
def _load_config( fname, section ): def _load_config( fname, section ):
"""Load config settings from a file.""" """Load config settings from a file."""
fname = os.path.join( CONFIG_DIR, fname )
if not os.path.isfile( fname ): if not os.path.isfile( fname ):
return return
config_parser = configparser.ConfigParser() config_parser = configparser.ConfigParser()
@ -50,21 +51,12 @@ flask.cli.show_server_banner = lambda *args: None
app = Flask( __name__ ) app = Flask( __name__ )
# load the application configuration # load the application configuration
config_dir = os.path.join( BASE_DIR, "config" ) _load_config( "app.cfg", "System" )
_fname = os.path.join( config_dir, "app.cfg" ) _load_config( "site.cfg", "Site Config" )
_load_config( _fname, "System" ) _load_config( "debug.cfg", "Debug" )
# load any site configuration
_fname = os.path.join( config_dir, "site.cfg" )
_load_config( _fname, "Site Config" )
# load any debug configuration
_fname = os.path.join( config_dir, "debug.cfg" )
if os.path.isfile( _fname ) :
_load_config( _fname, "Debug" )
# initialize logging # initialize logging
_fname = os.path.join( config_dir, "logging.yaml" ) _fname = os.path.join( CONFIG_DIR, "logging.yaml" )
if os.path.isfile( _fname ): if os.path.isfile( _fname ):
with open( _fname, "r", encoding="utf-8" ) as fp: with open( _fname, "r", encoding="utf-8" ) as fp:
try: try:

@ -7,3 +7,4 @@ APP_VERSION = "v0.1" # nb: also update setup.py
APP_DESCRIPTION = "Search engine for the ASL Rulebook." APP_DESCRIPTION = "Search engine for the ASL Rulebook."
BASE_DIR = os.path.abspath( os.path.join( os.path.dirname(__file__), ".." ) ) BASE_DIR = os.path.abspath( os.path.join( os.path.dirname(__file__), ".." ) )
CONFIG_DIR = os.path.join( BASE_DIR, "config" )

@ -0,0 +1,25 @@
{
"_comment_": "This file defines search aliases.",
"_comment_": "Keys that appear in a query string will match itself or any of its associated values.",
"_comment_": " e.g. searching for 'entrenchments' will actually search for 'entrenchments OR foxhole OR trench OR ditch'",
"_comment_": "These differ from search synonyms in that only the key word will trigger the replacement, not any word from the set.",
"_comment_": "A user-defined version of this file in the data directory will also be loaded.",
"latw": [
"atmm", "atr", "baz", "mol-p", "mol-projector", "piat", "pf", "pfk", "psk"
],
"fortification/foritifcations": [
"cave", "a-t ditch", "foxhole", "sangar", "trench", "bunker", "minefield", "mines", "booby trap", "panji", "pillbox", "roadblock", "tetrahedron", "wire"
],
"entrenchment/entrenchments": [
"foxhole", "trench", "ditch"
],
"vehicle/vehicles": [
"tank", "halftrack", "half-track", "jeep", "carrier"
],
"illumination": [
"tarshell", "illuminating round", "trip flare"
]
}

@ -0,0 +1,14 @@
{
"_comment_": "This file defines search replacements.",
"_comment_": "Keys that appear in a query string will be replaced by the value.",
"_comment_": " e.g. searching for '1/2 MF' will actually search for '½ MF'",
"_comment_": "A user-defined version of this file in the data directory will also be loaded.",
"1/2": "½",
"3/4": "¾",
"3/8": "⅜",
"5/8": "⅝",
"(r)": "®"
}

@ -0,0 +1,51 @@
[
"This file defines search synonyms.",
"If a word appears in a query string, it will match any of the words in its set.",
" e.g. searching for 'finn gun' will actually search for '(finn OR finnish) AND gun'",
"These differ from search aliases in that any word from a set will trigger the replacement.",
"A user-defined version of this file in the data directory will also be loaded.",
[ "u.s.", "america", "american" ],
[ "usmc", "marine" ],
[ "finn", "finnish" ],
[ "romania", "romanian" ],
[ "hungary", "hungarian" ],
[ "slovakia", "slovakian" ],
[ "croatia", "croatian" ],
[ "bulgaria", "bulgarian" ],
[ "dc", "demo charge", "demolition charge" ],
[ "ft", "flamethrower", "flame-thrower" ],
[ "baz", "bazooka" ],
[ "pf", "panzerfaust" ],
[ "psk", "panzershreck" ],
[ "wp", "white phosphorous" ],
[ "mol", "molotov cocktail" ],
[ "ovr", "overrun" ],
[ "cc", "close combat" ],
[ "thh", "t-h hero", "tank-hunter hero" ],
[ "scw", "shaped-charge weapon" ],
[ "sw", "support weapon" ],
[ "mg", "machinegun", "machine-gun", "machine gun" ],
[ "firelane", "fire-lane", "fire lane" ],
[ "firegroup", "fire-group", "fire group" ],
[ "lc", "landing craft" ],
[ "ht", "halftrack", "half-track" ],
[ "wa", "wall advantage" ],
[ "hob", "heat of battle" ],
[ "cg", "campaign game" ],
[ "pbm", "pbem" ],
[ "rb", "red barricades" ],
[ "votg", "valor of the guards" ],
[ "kgp", "kampfgrupper peiper" ],
[ "kgs", "kampfgrupper scherer" ],
[ "brt", "br:t", "blood reef tarawa" ],
[ "pb", "pegasus bridge" ],
[ "ammo", "ammunition" ],
[ "armor", "armour" ],
[ "color", "colour" ]
]

@ -2,6 +2,7 @@
import os import os
import io import io
import json
import glob import glob
from flask import jsonify, send_file, url_for, abort from flask import jsonify, send_file, url_for, abort
@ -13,7 +14,7 @@ content_docs = None
# --------------------------------------------------------------------- # ---------------------------------------------------------------------
def load_content_docs(): def load_content_docs( logger ):
"""Load the content documents from the data directory.""" """Load the content documents from the data directory."""
# initialize # initialize
@ -29,26 +30,32 @@ def load_content_docs():
fname = os.path.join( dname, fname ) fname = os.path.join( dname, fname )
if not os.path.isfile( fname ): if not os.path.isfile( fname ):
return return
kwargs = {} if binary:
kwargs["mode"] = "rb" if binary else "r" with open( fname, mode="rb" ) as fp:
if not binary: data = fp.read()
kwargs["encoding"] = "utf-8" logger.debug( "- Loaded \"%s\" file: #bytes=%d", key, len(data) )
with open( fname, **kwargs ) as fp: content_doc[ key ] = data
content_doc[ key ] = fp.read() else:
with open( fname, "r", encoding="utf-8" ) as fp:
content_doc[ key ] = json.load( fp )
logger.debug( "- Loaded \"%s\" file.", key )
# load each content doc # load each content doc
logger.info( "Loading content docs: %s", dname )
fspec = os.path.join( dname, "*.index" ) fspec = os.path.join( dname, "*.index" )
for fname in glob.glob( fspec ): for fname in glob.glob( fspec ):
fname = os.path.basename( fname ) fname2 = os.path.basename( fname )
title = os.path.splitext( fname )[0] logger.info( "- %s", fname2 )
title = os.path.splitext( fname2 )[0]
content_doc = { content_doc = {
"_fname": fname,
"doc_id": slugify( title ), "doc_id": slugify( title ),
"title": title, "title": title,
} }
get_doc( content_doc, "index", fname ) get_doc( content_doc, "index", fname2 )
get_doc( content_doc, "targets", change_extn(fname,".targets") ) get_doc( content_doc, "targets", change_extn(fname2,".targets") )
get_doc( content_doc, "footnotes", change_extn(fname,".footnotes") ) get_doc( content_doc, "footnotes", change_extn(fname2,".footnotes") )
get_doc( content_doc, "content", change_extn(fname,".pdf"), binary=True ) get_doc( content_doc, "content", change_extn(fname2,".pdf"), binary=True )
content_docs[ content_doc["doc_id"] ] = content_doc content_docs[ content_doc["doc_id"] ] = content_doc
# --------------------------------------------------------------------- # ---------------------------------------------------------------------
@ -59,11 +66,13 @@ def get_content_docs():
resp = {} resp = {}
for cdoc in content_docs.values(): for cdoc in content_docs.values():
cdoc2 = { cdoc2 = {
"docId": cdoc["doc_id"], "doc_id": cdoc["doc_id"],
"title": cdoc["title"], "title": cdoc["title"],
} }
if "content" in cdoc: if "content" in cdoc:
cdoc2["url"] = url_for( "get_content", doc_id=cdoc["doc_id"] ) cdoc2["url"] = url_for( "get_content", doc_id=cdoc["doc_id"] )
if "targets" in cdoc:
cdoc2["targets"] = cdoc["targets"]
resp[ cdoc["doc_id"] ] = cdoc2 resp[ cdoc["doc_id"] ] = cdoc2
return jsonify( resp ) return jsonify( resp )

@ -9,6 +9,7 @@ from flask import render_template, jsonify, abort
from asl_rulebook2.webapp import app, globvars, shutdown_event from asl_rulebook2.webapp import app, globvars, shutdown_event
from asl_rulebook2.webapp.content import load_content_docs from asl_rulebook2.webapp.content import load_content_docs
from asl_rulebook2.webapp.search import init_search
from asl_rulebook2.webapp.utils import parse_int from asl_rulebook2.webapp.utils import parse_int
# --------------------------------------------------------------------- # ---------------------------------------------------------------------
@ -20,7 +21,9 @@ def init_webapp():
after that by the test suite, to reset the webapp before each test. after that by the test suite, to reset the webapp before each test.
""" """
# initialize the webapp # initialize the webapp
load_content_docs() logger = logging.getLogger( "startup" )
load_content_docs( logger )
init_search( logger )
# --------------------------------------------------------------------- # ---------------------------------------------------------------------

@ -0,0 +1,475 @@
""" Manage the search engine. """
import os
import sqlite3
import json
import re
import itertools
import string
import tempfile
import logging
import traceback
from flask import request, jsonify
from asl_rulebook2.utils import plural
from asl_rulebook2.webapp import app
from asl_rulebook2.webapp import content as webapp_content
from asl_rulebook2.webapp.utils import make_config_path, make_data_path
_sqlite_path = None
_fts_index_entries= None
_logger = logging.getLogger( "search" )
# these are used to highlight search matches (nb: the front-end looks for these)
_BEGIN_HIGHLIGHT = "!@:"
_END_HIGHLIGHT = ":@!"
# NOTE: These regex's fix up content returned to us by the SQLite search engine (typically problems
# with highlighting search terms).
_FIXUP_TEXT_REGEXES = [
[ re.compile( fixup[0].format( _BEGIN_HIGHLIGHT, _END_HIGHLIGHT ) ),
fixup[1].format( _BEGIN_HIGHLIGHT, _END_HIGHLIGHT )
]
for fixup in [
[ r"&{}(.+?){};", r"{}&\g<1>;{}" ], # HTML entities e.g. &((frac12)); -> (($frac12;))
[ r"{}(.+?){}#", r"{}\g<1>#{}" ], # e.g. ((TH)# -> ((TH#)
[ r"{}U\.S{}\.", "{}U.S.{}" ], # ((U.S)). -> ((U.S.))
]
]
# these are used to separate ruleref's in the FTS table (internal use only)
_RULEREF_SEPARATOR = "-:-"
_SEARCH_TERM_ADJUSTMENTS = None
# ---------------------------------------------------------------------
@app.route( "/search", methods=["POST"] )
def search() :
"""Run a search."""
# log the request
_logger.info( "SEARCH REQUEST:" )
args = dict( request.form.items() )
for key,val in args.items():
_logger.info( "- %s: %s", key, val )
# run the search
try:
return _do_search( args )
except Exception as exc: #pylint: disable=broad-except
msg = str( exc )
if msg.startswith( "fts5: " ):
msg = msg[5:] # nb: this is a sqlite3.OperationalError
_logger.warning( "SEARCH ERROR: %s\n%s", args, traceback.format_exc() )
return jsonify( { "error": msg } )
def _do_search( args ):
def fixup_text( val ):
if val is None:
return None
for regex in _FIXUP_TEXT_REGEXES:
val = regex[0].sub( regex[1], val )
return val
# run the search
query_string = args[ "queryString" ].strip()
if query_string == "!:simulated-error:!":
raise RuntimeError( "Simulated error." ) # nb: for the test suite
fts_query_string, search_terms = _make_fts_query_string( query_string )
_logger.debug( "FTS query string: %s", fts_query_string )
conn = sqlite3.connect( _sqlite_path )
def highlight( n ):
# NOTE: highlight() is an FTS extension function, and takes column numbers :-/
return "highlight(searchable,{},'{}','{}')".format( n, _BEGIN_HIGHLIGHT, _END_HIGHLIGHT )
sql = "SELECT rowid,doc_id,sr_type,rank,{},{},{},{} FROM searchable".format(
highlight(2), highlight(3), highlight(4), highlight(5)
)
sql += " WHERE searchable MATCH ?"
sql += " ORDER BY rank"
curs = conn.execute( sql,
( "{title subtitle content rulerefs}: " + fts_query_string, )
)
def get_col( sr, key, val ):
if val:
sr[key] = fixup_text( val )
# get the results
results = []
for row in curs:
if row[2] != "index":
_logger.error( "Unknown searchable row type (rowid=%d): %s", row[0], row[2] )
continue
index_entry = _fts_index_entries[ row[0] ]
result = {
"doc_id": row[1],
"sr_type": row[2],
"_score": - row[3],
}
get_col( result, "title", row[4] )
get_col( result, "subtitle", row[5] )
get_col( result, "content", row[6] )
if index_entry.get( "ruleids" ):
result["ruleids"] = index_entry["ruleids"]
if index_entry.get( "see_also" ):
result["see_also"] = index_entry["see_also"]
rulerefs = [ r.strip() for r in row[7].split(_RULEREF_SEPARATOR) ] if row[7] else []
assert len(rulerefs) == len(index_entry.get("rulerefs",[]))
if rulerefs:
result[ "rulerefs" ] = []
for i, ruleref in enumerate(rulerefs):
ruleref2 = {}
if "caption" in index_entry["rulerefs"][i]:
assert ruleref.replace( _BEGIN_HIGHLIGHT, "" ).replace( _END_HIGHLIGHT, "" ) \
== index_entry["rulerefs"][i]["caption"]
ruleref2["caption"] = fixup_text( ruleref )
if "ruleids" in index_entry["rulerefs"][i]:
ruleref2["ruleids"] = index_entry["rulerefs"][i]["ruleids"]
assert ruleref2
result["rulerefs"].append( ruleref2 )
results.append( result )
# fixup the results
results = _fixup_results_for_hash_terms( results, search_terms )
# adjust the sort order
results = _adjust_sort_order( results )
# return the results
_logger.debug( "Search results:" if len(results) > 0 else "Search results: none" )
for result in results:
_logger.debug( "- %s (%.3f)",
result["title"].replace( _BEGIN_HIGHLIGHT, "" ).replace( _END_HIGHLIGHT, "" ),
result["_score"]
)
return jsonify( results )
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
PASSTHROUGH_REGEXES = set([
re.compile( r"\bAND\b" ),
re.compile( r"\bOR\b" ),
re.compile( r"\bNOT\b" ),
re.compile( r"\((?![Rr]\))" ),
])
def _make_fts_query_string( query_string ):
"""Generate the SQLite query string.
SQLite's MATCH function recognizes a lot of special characters, which need
to be enclosed in double-quotes to disable.
"""
# check if this looks like a raw FTS query
if any( regex.search(query_string) for regex in PASSTHROUGH_REGEXES ):
return query_string.strip(), None
# split the search string into words (taking quoted phrases into account)
ignore = app.config.get( "SQLITE_FTS_IGNORE_CHARS", ",;!?$" )
query_string = "".join( ch for ch in query_string if ch not in ignore )
terms = query_string.lower().split()
i = 0
while True:
if i >= len(terms):
break
if i > 0 and terms[i-1].startswith( '"' ):
terms[i-1] += " {}".format( terms[i] )
del terms[i]
if terms[i-1].startswith( '"' ) and terms[i-1].endswith( '"' ):
terms[i-1] = terms[i-1][1:-1]
continue
i += 1
# clean up quoted phrases
terms = [ t[1:] if t.startswith('"') else t for t in terms ]
terms = [ t[:-1] if t.endswith('"') else t for t in terms ]
terms = [ t.strip() for t in terms ]
terms = [ t for t in terms if t ]
# adjust search terms
for term_no, term in enumerate(terms):
aliases = _SEARCH_TERM_ADJUSTMENTS.get( term )
if not aliases:
continue
if isinstance( aliases, str ):
# the search term is replaced by a new one
terms[ term_no ] = aliases
elif isinstance( aliases, set ):
# the search term is replaced by multiple new ones (that will be OR'ed together)
# NOTE: We sort the terms so that the tests will work reliably.
terms[ term_no ] = sorted( aliases )
else:
assert "Unknown search alias type: {}".format( type(aliases) )
# fixup each term
def has_special_char( term ):
"""Check if the term contains any special characters."""
for ch in term:
if ch in "*":
continue
if ch.isspace() or ch in string.punctuation:
return True
if ord(ch) < 32 or ord(ch) > 127:
return True
return False
def fixup_terms( terms ):
"""Fixup a list of terms."""
for term_no, term in enumerate(terms):
if isinstance( term, str ):
if has_special_char( term ):
terms[term_no] = '"{}"'.format( term )
else:
fixup_terms( term )
fixup_terms( terms )
# return the final FTS query string
def term_string( term ):
if isinstance( term, str ):
return term
assert isinstance( term, list )
return "( {} )".format( " OR ".join( term ) )
return " AND ".join( term_string(t) for t in terms ), terms
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
def _fixup_results_for_hash_terms( results, search_terms ):
"""Fixup search results for search terms that end with a hash.
SQLite doesn't handle search terms that end with a hash particularly well.
We correct highlighted search terms in fixup_text(), but searching for e.g. "US#"
will also match "use" and "using" - we remove such results here.
"""
# figure out which search terms end with a hash
# NOTE: We don't bother descending down into sub-terms.
if not search_terms:
return results
terms = [
t[1:-1] for t in search_terms
if isinstance(t,str) and t.startswith('"') and t.endswith('"')
]
terms = [
t[:-1].lower() for t in terms
if isinstance(t,str) and t.endswith("#")
]
if not terms:
return results
if "us" in terms:
terms.extend( [ "use", "used", "using", "user" ] )
def keep( sr ):
# remove every incorrectly matched search term (e.g. ((K)) when searching for "K#")
buf = json.dumps( sr ).lower()
for term in terms:
buf = buf.replace( "{}{}{}".format( _BEGIN_HIGHLIGHT, term, _END_HIGHLIGHT ), "_removed_" )
# we keep this search result if there are still some highlighted search terms
return _BEGIN_HIGHLIGHT in buf
return [
result for result in results if keep(result)
]
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
def _adjust_sort_order( results ):
"""Adjust the sort order of the search results."""
results2 = []
def extract_sr( func ):
# move results that pass the filter function to the new list
i = 0
while True:
if i >= len(results):
break
# NOTE: We never prefer small entries (i.e .have no ruleref's)
# e.g. those that only contain a "see also".
if func( results[i] ) and len(results[i].get("rulerefs",[])) > 0:
results2.append( results[i] )
del results[i]
else:
i += 1
def get( sr, key ):
val = sr.get( key )
return val if val else ""
# prefer search results whose title is an exact match
extract_sr(
lambda sr: get(sr,"title").startswith( _BEGIN_HIGHLIGHT ) and get(sr,"title").endswith( _END_HIGHLIGHT )
)
# prefer search results whose title starts with a match
extract_sr(
lambda sr: get(sr,"title").startswith( _BEGIN_HIGHLIGHT )
)
# prefer search results that have a match in the title
extract_sr(
lambda sr: _BEGIN_HIGHLIGHT in get(sr,"title")
)
# prefer search results that have a match in the subtitle
extract_sr(
lambda sr: _BEGIN_HIGHLIGHT in get(sr,"subtitle")
)
# include any remaining search results
results2.extend( results )
return results2
# ---------------------------------------------------------------------
def init_search( logger ):
"""Initialize the search engine."""
# initialize
global _fts_index_entries
_fts_index_entries = {}
# initialize the database
global _sqlite_path
_sqlite_path = app.config.get( "SQLITE_PATH" )
if not _sqlite_path:
# FUDGE! We should be able to create a shared, in-memory database using this:
# file::XYZ:?mode=memory&cache=shared
# but it doesn't seem to work (on Linux) and ends up creating a file with this name :-/
# We manually create a temp file, which has to have the same name each time, so that we don't
# keep creating a new database each time we start up. Sigh...
_sqlite_path = os.path.join( tempfile.gettempdir(), "asl-rulebook2.searchdb" )
if os.path.isfile( _sqlite_path ):
os.unlink( _sqlite_path )
logger.info( "Creating the search index: %s", _sqlite_path )
conn = sqlite3.connect( _sqlite_path )
# NOTE: Storing everything in a single table allows FTS to rank search results based on
# the overall content, and also lets us do AND/OR queries across all searchable content.
conn.execute(
"CREATE VIRTUAL TABLE searchable USING fts5"
" ( doc_id, sr_type, title, subtitle, content, rulerefs, tokenize='porter unicode61' )"
)
# load the searchable content
logger.info( "Loading the search index..." )
conn.execute( "DELETE FROM searchable" )
curs = conn.cursor()
for cdoc in webapp_content.content_docs.values():
logger.info( "- Loading index file: %s", cdoc["_fname"] )
nrows = 0
for index_entry in cdoc["index"]:
rulerefs = _RULEREF_SEPARATOR.join( r.get("caption","") for r in index_entry.get("rulerefs",[]) )
# NOTE: We should really strip content before adding it to the search index, otherwise any HTML tags
# will need to be included in search terms. However, this means that the content returned by a query
# will be this stripped content. We could go back to the original data to get the original HTML content,
# but that means we would lose the highlighting of search terms that SQLite gives us. We opt to insert
# the original content, since none of it should contain HTML, anyway.
curs.execute(
"INSERT INTO searchable (doc_id,sr_type,title,subtitle,content,rulerefs) VALUES (?,?,?,?,?,?)", (
cdoc["doc_id"], "index",
index_entry.get("title"), index_entry.get("subtitle"), index_entry.get("content"), rulerefs
) )
_fts_index_entries[ curs.lastrowid ] = index_entry
index_entry["_fts_rowid"] = curs.lastrowid
nrows += 1
conn.commit()
logger.info( " - Loaded %s.", plural(nrows,"index entry","index entries"), )
assert len(_fts_index_entries) == _get_row_count( conn, "searchable" )
# load the search config
load_search_config( logger )
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
def load_search_config( logger ):
"""Load the search config."""
# initialize
global _SEARCH_TERM_ADJUSTMENTS
_SEARCH_TERM_ADJUSTMENTS = {}
def add_search_term_adjustment( key, vals ):
# make sure everything is lower-case
key = key.lower()
if isinstance( vals, str ):
vals = vals.lower()
elif isinstance( vals, set ):
vals = set( v.lower() for v in vals )
else:
assert "Unknown search alias type: {}".format( type(vals) )
# add new the search term adjustment
if key not in _SEARCH_TERM_ADJUSTMENTS:
_SEARCH_TERM_ADJUSTMENTS[ key ] = vals
else:
# found a multiple definition - try to do something sensible
logger.warning( " - Duplicate search alias: %s\n- current aliases = %s\n- new aliases = %s", key,
_SEARCH_TERM_ADJUSTMENTS[key], vals
)
if isinstance( _SEARCH_TERM_ADJUSTMENTS[key], str ):
_SEARCH_TERM_ADJUSTMENTS[ key ] = vals
else:
assert isinstance( _SEARCH_TERM_ADJUSTMENTS[key], set )
_SEARCH_TERM_ADJUSTMENTS[ key ].update( vals )
# load the search replacements
def load_search_replacements( fname ):
if not os.path.isfile( fname ):
return
logger.info( "Loading search replacements: %s", fname )
with open( fname, "r", encoding="utf-8" ) as fp:
data = json.load( fp )
nitems = 0
for key, val in data.items():
if key.startswith( "_" ):
continue # nb: ignore comments
logger.debug( "- %s -> %s", key, val )
add_search_term_adjustment( key, val )
nitems += 1
logger.info( "- Loaded %s.", plural(nitems,"search replacement","search replacements") )
load_search_replacements( make_config_path( "search-replacements.json" ) )
load_search_replacements( make_data_path( "search-replacements.json" ) )
# load the search aliases
def load_search_aliases( fname ):
if not os.path.isfile( fname ):
return
logger.info( "Loading search aliases: %s", fname )
with open( fname, "r", encoding="utf-8" ) as fp:
data = json.load( fp )
nitems = 0
for keys, aliases in data.items():
if keys.startswith( "_" ):
continue # nb: ignore comments
logger.debug( "- %s -> %s", keys, " ; ".join(aliases) )
for key in keys.split( "/" ):
add_search_term_adjustment( key, set( itertools.chain( aliases, [key] ) ) )
nitems += 1
logger.info( "- Loaded %s.", plural(nitems,"search aliases","search aliases") )
load_search_aliases( make_config_path( "search-aliases.json" ) )
load_search_aliases( make_data_path( "search-aliases.json" ) )
# load the search synonyms
def load_search_synonyms( fname ):
if not os.path.isfile( fname ):
return
logger.info( "Loading search synonyms: %s", fname )
with open( fname, "r", encoding="utf-8" ) as fp:
data = json.load( fp )
nitems = 0
for synonyms in data:
if isinstance( synonyms, str ):
continue # nb: ignore comments
logger.debug( "- %s", " ; ".join(synonyms) )
synonyms = set( synonyms )
for term in synonyms:
add_search_term_adjustment( term, synonyms )
nitems += 1
logger.info( "- Loaded %s.", plural(nitems,"search synonym","search synonyms") )
load_search_synonyms( make_config_path( "search-synonyms.json" ) )
load_search_synonyms( make_data_path( "search-synonyms.json" ) )
# ---------------------------------------------------------------------
def _get_row_count( conn, table_name ):
"""Get the number of rows in a table."""
cur = conn.execute( "SELECT count(*) FROM {}".format( table_name ) )
return cur.fetchone()[0]

@ -8,13 +8,13 @@ gMainApp.component( "content-pane", {
template: ` template: `
<tabbed-pages ref="tabbedPages"> <tabbed-pages ref="tabbedPages">
<tabbed-page v-for="doc in contentDocs" :tabId=doc.docId :caption=doc.title > <tabbed-page v-for="doc in contentDocs" :tabId=doc.doc_id :caption=doc.title :key=doc.doc_id >
<content-doc :doc=doc /> <content-doc :doc=doc />
</tabbed-page> </tabbed-page>
</tabbed-pages>`, </tabbed-pages>`,
mounted() { mounted() {
gEventBus.on( "show-content-doc", (docId) => { gEventBus.on( "show-target", (docId, target) => { //eslint-disable-line no-unused-vars
this.$refs.tabbedPages.activateTab( docId ) ; // nb: tabId == docId this.$refs.tabbedPages.activateTab( docId ) ; // nb: tabId == docId
} ) ; } ) ;
}, },
@ -27,14 +27,37 @@ gMainApp.component( "content-doc", {
props: [ "doc" ], props: [ "doc" ],
data() { return { data() { return {
target: null,
noContent: gUrlParams.get( "no-content" ), noContent: gUrlParams.get( "no-content" ),
} ; }, } ; },
template: ` template: `
<div class="content-doc"> <div class="content-doc" :data-target=target >
<div v-if=noContent class="disabled"> Content disabled. </div> <div v-if=noContent class="disabled"> Content disabled. <div v-if=target>target = {{target}}</div> </div>
<iframe v-else-if=doc.url :src=doc.url /> <iframe v-else-if=doc.url :src=makeDocUrl />
<div v-else class="disabled"> No content. </div> <div v-else class="disabled"> No content. </div>
</div>`, </div>`,
created() {
gEventBus.on( "show-target", (docId, target) => {
if ( docId != this.doc.doc_id )
return ;
// FUDGE! We give the tab time to show itself before we scroll to the target.
setTimeout( () => {
this.target = target ;
}, 50 ) ;
} ) ;
},
computed: {
makeDocUrl() {
let url = this.doc.url ;
if ( this.target )
url += "#nameddest=" + this.target ;
return url ;
}
},
} ) ; } ) ;

@ -12,6 +12,10 @@ $(document).ready( () => {
gMainApp.mount( "#main-app" ) ; gMainApp.mount( "#main-app" ) ;
} ) ; } ) ;
// FUDGE! Can't seem to get access to the content docs via gMainApp, so we make them available
// to the rest of the program via this global variable :-/
export let gContentDocs = null ;
// -------------------------------------------------------------------- // --------------------------------------------------------------------
gMainApp.component( "main-app", { gMainApp.component( "main-app", {
@ -47,23 +51,27 @@ gMainApp.component( "main-app", {
methods: { methods: {
getContentDocs: (self) => new Promise( (resolve, reject) => { getContentDocs( self ) {
// get the content docs return new Promise( (resolve, reject) => {
$.getJSON( gGetContentDocsUrl, (resp) => { //eslint-disable-line no-undef // get the content docs
self.contentDocs = resp ; $.getJSON( gGetContentDocsUrl, (resp) => { //eslint-disable-line no-undef
let docIds = Object.keys( resp ) ; if ( gUrlParams.get( "add-empty-doc" ) )
if ( docIds.length > 0 ) { resp["empty"] = { "doc_id": "empty", "title": "Empty document" } ; // nb: for testing porpoises
Vue.nextTick( () => { gContentDocs = self.contentDocs = resp ;
gEventBus.emit( "show-content-doc", docIds[0] ) ; // FIXME! which one do we choose? let docIds = Object.keys( resp ) ;
} ) ; if ( docIds.length > 0 ) {
} Vue.nextTick( () => {
resolve() ; gEventBus.emit( "show-target", docIds[0], null ) ; // FIXME! which one do we choose?
} ).fail( (xhr, status, errorMsg) => { } ) ;
const msg = "Couldn't get the content docs." ; }
showErrorMsg( msg + " <div class='pre'>" + errorMsg + "</div>" ) ; resolve() ;
reject( msg ) } ).fail( (xhr, status, errorMsg) => {
const msg = "Couldn't get the content docs." ;
showErrorMsg( msg + " <div class='pre'>" + errorMsg + "</div>" ) ;
reject( msg )
} ) ;
} ) ; } ) ;
} ), },
}, },

@ -4,17 +4,28 @@ import { gMainApp, gEventBus } from "./MainApp.js" ;
gMainApp.component( "nav-pane", { gMainApp.component( "nav-pane", {
data() { return {
seqNo: 0, // nb: for the test suite
} ; },
template: ` template: `
<tabbed-pages> <tabbed-pages>
<tabbed-page tabId="search" caption="Search" data-display="flex" > <tabbed-page tabId="search" caption="Search" data-display="flex" >
<search-box id="search-box" @search=onSearch /> <search-box id="search-box" @search=onSearch />
<search-results id="search-results" /> <search-results id="search-results" :data-seqno=seqNo />
</tabbed-page> </tabbed-page>
</tabbed-pages>`, </tabbed-pages>`,
mounted() {
gEventBus.on( "search-done", () => {
// notify the test suite that the search results are now available
this.seqNo += 1 ;
} ) ;
},
methods: { methods: {
onSearch: (queryString) => { onSearch( queryString ) {
gEventBus.emit( "search", queryString ) ; gEventBus.emit( "search", queryString ) ;
}, },

@ -1,5 +1,5 @@
import { gMainApp, gEventBus } from "./MainApp.js" ; import { gMainApp, gEventBus } from "./MainApp.js" ;
import { IndexSearchResult } from "./SearchResult.js" ; import { fixupSearchHilites } from "./utils.js" ;
// -------------------------------------------------------------------- // --------------------------------------------------------------------
@ -30,7 +30,7 @@ gMainApp.component( "search-box", {
}, },
methods: { methods: {
onKeyUp: function( evt ) { onKeyUp( evt ) {
if ( evt.keyCode == 13 ) if ( evt.keyCode == 13 )
this.$refs["submit"].click() ; this.$refs["submit"].click() ;
} }
@ -43,12 +43,15 @@ gMainApp.component( "search-box", {
gMainApp.component( "search-results", { gMainApp.component( "search-results", {
data() { return { data() { return {
searchResults: [], searchResults: null,
errorMsg: null,
} ; }, } ; },
template: `<div> template: `<div>
<div v-for="sr in searchResults" :key=sr.key > <div v-if=errorMsg class="error"> Search error: <div class="pre"> {{errorMsg}} </div> </div>
<index-sr v-if="sr.srType == 'index'" :sr=sr /> <div v-else-if="searchResults != null && searchResults.length == 0" class="no-results"> Nothing was found. </div>
<div v-else v-for="sr in searchResults" :key=sr.key >
<index-sr v-if="sr.sr_type == 'index'" :sr=sr />
<div v-else> ??? </div> <div v-else> ??? </div>
</div> </div>
</div>`, </div>`,
@ -60,22 +63,39 @@ gMainApp.component( "search-results", {
methods: { methods: {
onSearch( queryString ) { onSearch( queryString ) {
// generate some dummy search results // submit the search request
let searchResults = [] ; const onError = (errorMsg) => {
for ( let i=0 ; i < queryString.length ; ++i ) { this.errorMsg = errorMsg ;
let buf = [ "Search result #" + (1+i) ] ; Vue.nextTick( () => {
let nItems = Math.floor( Math.sqrt( 100 * Math.random() ) ) - 1 ; gEventBus.emit( "search-done" ) ;
if ( nItems > 0 ) { } ) ;
buf.push( "<ul style='padding-left:1em;'>" ) ; } ;
for ( let j=0 ; j < nItems ; ++j ) this.errorMsg = null ;
buf.push( "<li> item " + (1+j) ) ; $.ajax( { url: gSearchUrl, type: "POST", //eslint-disable-line no-undef
buf.push( "</ul>" ) ; data: { queryString: queryString },
dataType: "json",
} ).done( (resp) => {
// check if there was an error
if ( resp.error ) {
onError( resp.error ) ;
return ;
} }
searchResults.push( // adjust highlighted text
new IndexSearchResult( i, buf.join("") ) resp.forEach( (sr) => {
) ; [ "title", "subtitle", "content" ].forEach( function( key ) {
} if ( sr[key] )
this.searchResults = searchResults ; sr[key] = fixupSearchHilites( sr[key] ) ;
} ) ;
} ) ;
// load the search results into the UI
this.$el.scrollTop = 0;
this.searchResults = resp ;
Vue.nextTick( () => {
gEventBus.emit( "search-done" ) ;
} ) ;
} ).fail( (xhr, status, errorMsg) => {
onError( errorMsg ) ;
} ) ;
}, },
}, },

@ -1,23 +1,80 @@
import { gMainApp } from "./MainApp.js" ; import { gMainApp, gEventBus, gContentDocs } from "./MainApp.js" ;
import { fixupSearchHilites } from "./utils.js" ;
// -------------------------------------------------------------------- // --------------------------------------------------------------------
export class IndexSearchResult {
constructor( key, content ) {
this.key = key ;
this.srType = "index" ;
this.content = content ;
}
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
gMainApp.component( "index-sr", { gMainApp.component( "index-sr", {
props: [ "sr" ], props: [ "sr" ],
template: ` template: `
<div class="sr index-sr" v-html=sr.content /> <div class="sr index-sr" >
`, <div v-if="sr.title || sr.subtitle" class="title" >
<span v-if=sr.title class="title" v-html=sr.title />
<span v-if=sr.subtitle class="subtitle" v-html=sr.subtitle />
</div>
<div class="body">
<div v-if=sr.content class="content" v-html=sr.content />
<div v-if=makeSeeAlso v-html=makeSeeAlso class="see-also" />
<div v-if=sr.ruleids class="ruleids" >
<ruleid v-for="rid in sr.ruleids" :docId=sr.doc_id :ruleId=rid :key=rid />
</div>
<ul v-if=sr.rulerefs class="rulerefs" >
<li v-for="rref in sr.rulerefs" :key=rref >
<span v-if=rref.caption class="caption" v-html=fixupHilites(rref.caption) />
<ruleid v-for="rid in rref.ruleids" :docId=sr.doc_id :ruleId=rid :key=rid />
</li>
</ul>
</div>
</div>`,
computed: {
makeSeeAlso() {
if ( this.sr.see_also )
return "See also: " + this.sr.see_also.join( ", " ) ;
return null ;
},
},
methods: {
fixupHilites( val ) {
return fixupSearchHilites( val ) ;
},
},
} ) ;
// --------------------------------------------------------------------
gMainApp.component( "ruleid", {
props: [ "docId", "ruleId" ],
data() { return {
target: null,
} ; },
template: `<span class="ruleid" v-bind:class="{unknown:!target}">[<a v-if=target @click=onClick>{{ruleId}}</a><span v-else>{{ruleId}}</span>]</span>`,
created() {
// figure out which rule is being referenced
let ruleId = this.ruleId ;
let pos = ruleId.indexOf( "-" ) ;
if ( pos >= 0 ) {
// NOTE: For ruleid's of the form "A12.3-.4", we want to target "A12.3".
ruleId = ruleId.substring( 0, pos ) ;
}
// check if the rule is one we know about
if ( gContentDocs[this.docId] && gContentDocs[this.docId].targets ) {
if ( gContentDocs[this.docId].targets[ ruleId ] )
this.target = ruleId ;
}
},
methods: {
onClick() {
// show the target
gEventBus.emit( "show-target", this.docId, this.target ) ;
},
},
} ) ; } ) ;

@ -13,7 +13,7 @@ gMainApp.component( "tabbed-pages", {
<div class="tabbed-pages"> <div class="tabbed-pages">
<slot /> <slot />
<div class="tab-strip"> <div class="tab-strip">
<div v-for="tab in tabs" :data-tabid=tab.tabId @click=onTabClicked class="tab" v-bind:class="{'active': tab.tabId == activeTabId}" > <div v-for="tab in tabs" :data-tabid=tab.tabId @click=onTabClicked class="tab" v-bind:class="{'active': tab.tabId == activeTabId}" :key=tab.tabId >
{{tab.caption}} {{tab.caption}}
</div> </div>
</div> </div>
@ -44,12 +44,12 @@ gMainApp.component( "tabbed-pages", {
methods: { methods: {
onTabClicked: function( evt ) { onTabClicked( evt ) {
// activate the selected tab // activate the selected tab
this.activateTab( evt.target.dataset.tabid ) ; this.activateTab( evt.target.dataset.tabid ) ;
}, },
activateTab: function( tabId ) { activateTab( tabId ) {
// activate the specified tab // activate the specified tab
this.activeTabId = tabId ; this.activeTabId = tabId ;
$( this.$el ).find( ".tabbed-page" ).each( function() { $( this.$el ).find( ".tabbed-page" ).each( function() {

@ -6,3 +6,5 @@
/* search results */ /* search results */
#search-results { flex-grow: -1 ; margin: 8px 0 2px 0 ; overflow-y: auto ; } #search-results { flex-grow: -1 ; margin: 8px 0 2px 0 ; overflow-y: auto ; }
#search-results .no-results { font-style: italic ; color: #666 ; }
#search-results .error .pre { font-family: monospace ; margin: 0.25em 0 0 0.5em ; }

@ -1 +1,13 @@
#search-results .sr { margin: 0 10px 2px 0 ; border: 1px dotted #666 ; padding: 5px ; } #search-results .sr { margin: 0 10px 2px 0 ; padding: 5px ; }
#search-results .sr .hilite { padding: 0 2px ; background: #ffa ; }
#search-results .index-sr .title { background: #e0e0e0 ; border-bottom: 1px solid #ccc ; padding: 2px 5px ; font-weight: bold ; }
#search-results .index-sr .subtitle { padding: 2px 5px ; font-weight: normal ; font-size: 80% ; font-style: italic ; }
#search-results .index-sr .body { padding: 2px 5px 0 5px ; font-size: 80% ; }
#search-results .index-sr .content { color: #444 ; }
#search-results .index-sr .see-also { color: #444 ; }
#search-results .index-sr ul.rulerefs { margin-left: 1.2em ; }
#search-results .index-sr ul.rulerefs .caption { padding-right: 0.5em ; }
#search-results .index-sr .ruleid { margin-right: 0.25em ; font-style: italic ; color: #444 ; }
#search-results .index-sr .ruleid.unknown { color: #888 ; }
#search-results .index-sr .ruleid a { cursor: pointer ; }

@ -1,3 +1,21 @@
// --------------------------------------------------------------------
const _HILITE_REGEXES = [
new RegExp("!@:","g"), new RegExp(":@!","g"),
] ;
export function fixupSearchHilites( val )
{
// NOTE: The search engine highlights search tems in the returned search content using special markers.
// We convert those markers to HTML span's here.
if ( val === null || val === undefined )
return val ;
return val.replace( _HILITE_REGEXES[0], "<span class='hilite'>" )
.replace( _HILITE_REGEXES[1], "</span>" ) ;
}
// --------------------------------------------------------------------
export function showInfoMsg( msg ) { _doShowNotificationMsg( "notice", msg ) ; } export function showInfoMsg( msg ) { _doShowNotificationMsg( "notice", msg ) ; }
export function showWarningMsg( msg ) { _doShowNotificationMsg( "warning", msg ) ; } export function showWarningMsg( msg ) { _doShowNotificationMsg( "warning", msg ) ; }
export function showErrorMsg( msg ) { _doShowNotificationMsg( "error", msg ) ; } export function showErrorMsg( msg ) { _doShowNotificationMsg( "error", msg ) ; }

@ -43,6 +43,7 @@
<script> <script>
gGetContentDocsUrl = "{{ url_for( 'get_content_docs') }}" ; gGetContentDocsUrl = "{{ url_for( 'get_content_docs') }}" ;
gSearchUrl = "{{ url_for( 'search' ) }}" ;
</script> </script>
<script type="module" src="{{ url_for( 'static', filename='MainApp.js' ) }}"></script> <script type="module" src="{{ url_for( 'static', filename='MainApp.js' ) }}"></script>

@ -11,7 +11,7 @@
{ "title": "Backblast", { "title": "Backblast",
"ruleids": [ "C13.8" ], "ruleids": [ "C13.8" ],
"rulerefs": [ "rulerefs": [
{ "caption": "Huts", "ruleids": [ "G5.62" ] }, { "caption": "HEAT", "ruleids": [ "C13.8" ] },
{ "caption": "RCL", "ruleids": [ "C12.3-.4" ] } { "caption": "RCL", "ruleids": [ "C12.3-.4" ] }
] ]
}, },
@ -31,11 +31,9 @@
"content": "Also known as \"running <em>really</em> fast.\"", "content": "Also known as \"running <em>really</em> fast.\"",
"rulerefs": [ "rulerefs": [
{ "caption": "ENEMY Guard Automatic Action", "ruleids": [ "S6.303" ] }, { "caption": "ENEMY Guard Automatic Action", "ruleids": [ "S6.303" ] },
{ "caption": "Manhandling", "ruleids": [ "C10.3" ] }, { "ruleids": [ "C10.3" ] },
{ "caption": "NA for Pathfinders", "ruleids": [ "T1.2" ] }, { "caption": "NA in Advance Phase", "ruleids": [ "A4.7" ] },
{ "caption": "S? NA", "ruleids": [ "S3.321" ] }, { "caption": "'S?' is \"&lt;NA&gt;\"" }
{ "caption": "Water Shortage", "ruleids": [ "RCG21" ] },
{ "caption": "Wire NA", "ruleids": [ "B26.46" ] }
] ]
}, },
@ -54,6 +52,7 @@
}, },
{ "title": "Firepower", { "title": "Firepower",
"content": "The U.S. has lots of this.",
"ruleids": [ "A1.21" ], "ruleids": [ "A1.21" ],
"see_also": [ "FP" ] "see_also": [ "FP" ]
}, },
@ -71,6 +70,12 @@
{ "title": "Identity, Vehicular", { "title": "Identity, Vehicular",
"ruleids": [ "D1.4" ] "ruleids": [ "D1.4" ]
},
{ "title": "HTML ti<u>tl</u>e",
"subtitle": "HTML subti<u>tl</u>e",
"content": "HTML con<u>ten</u>t: 2&frac34; MP",
"see_also": [ "HTML see-<u>al</u>so" ]
} }
] ]

@ -1,15 +1,15 @@
{ {
"A4.7": { "caption": "ADVANCE PHASE", "page_no": 1, "pos": [72,702] }, "A4.7": { "caption": "ADVANCE PHASE", "page_no": 1, "pos": [72,718] },
"C13.8": { "caption": "BACKBLAST", "page_no": 1, "pos": [72,404] }, "C13.8": { "caption": "BACKBLAST", "page_no": 1, "pos": [72,503] },
"A3.8": { "caption": "CLOSE COMBAT PHASE (CCPh)", "page_no": 1, "pos": [72.97] }, "A3.8": { "caption": "CLOSE COMBAT PHASE (CCPh)", "page_no": 1, "pos": [72,292] },
"A4.5": { "caption": "DOUBLE TIME", "page_no": 2, "pos": [72,702] }, "A4.5": { "caption": "DOUBLE TIME", "page_no": 2, "pos": [72,718] },
"A19.1": { "caption": "EXPERIENCE LEVEL RATING (ELR)", "page_no": 2, "pos": [72.404] }, "A19.1": { "caption": "EXPERIENCE LEVEL RATING (ELR)", "page_no": 2, "pos": [72,503] },
"A1.21": { "caption": "FIREPOWER (FP)", "page_no": 2, "pos": [72,97] }, "A1.21": { "caption": "FIREPOWER (FP)", "page_no": 2, "pos": [72,292] },
"A1.21": { "caption": "FIREPOWER (FP)", "page_no": 3, "pos": [72,702] }, "E11.21": { "caption": "GAPS", "page_no": 3, "pos":[72,718] },
"E11.21": { "caption": "GAPS", "page_no": 3, "pos":[72,404] }, "C8.3": { "caption": "HEAT (H)", "page_no": 3, "pos": [72,503] },
"C8.3": { "caption": "HEAT (H)", "page_no": 3, "pos": [72,97] } "D1.4": { "caption": "IDENTITY & GROUND PRESSURE", "page_no": 3, "pos": [72,292] }
} }

@ -0,0 +1,298 @@
""" Test search. """
import re
import logging
from selenium.webdriver.common.keys import Keys
from asl_rulebook2.utils import strip_html
from asl_rulebook2.webapp.search import load_search_config, _make_fts_query_string
from asl_rulebook2.webapp.tests.utils import init_webapp, select_tabbed_page, get_classes, \
wait_for, find_child, find_children
# ---------------------------------------------------------------------
def test_search( webapp, webdriver ):
"""Test search."""
# initialize
webapp.control_tests.set_data_dir( "simple" )
init_webapp( webapp, webdriver )
# test a search that finds nothing
results = _do_search( "oogah, boogah!" )
assert results is None
# test error handling
results = _do_search( "!:simulated-error:!" )
assert "Simulated error." in results
# do a search
results = _do_search( "enemy" )
assert results == [
{ "sr_type": "index",
"title": "CCPh", "subtitle": "Close Combat Phase",
"ruleids": [ "A3.8" ],
"rulerefs": [
{ "caption": "((ENEMY)) Attacks", "ruleids": [ "S11.5" ] },
{ "caption": "dropping SW before CC", "ruleids": [ "A4.43" ] },
]
},
{ "sr_type": "index",
"title": "Double Time",
"content": "Also known as \"running really fast.\"",
"see_also": [ "CX" ],
"ruleids": [ "A4.5-.51", "S6.222" ],
"rulerefs": [
{ "caption": "((ENEMY)) Guard Automatic Action", "ruleids": [ "S6.303" ] },
{ "ruleids": [ "C10.3" ] },
{ "caption": "NA in Advance Phase", "ruleids": [ "A4.7" ] },
{ "caption": "'S?' is \"<NA>\"" },
]
},
]
# do another search
results = _do_search( "gap" )
assert results == [
{ "sr_type": "index",
"title": "((Gaps)), Convoy",
"ruleids": [ "E11.21" ],
},
]
# ---------------------------------------------------------------------
def test_content_fixup( webapp, webdriver ):
"""Test fixing up of content returned by the search engine."""
# initialize
webapp.control_tests.set_data_dir( "simple" )
init_webapp( webapp, webdriver )
# search for a fraction
results = _do_search( "3/4" )
assert len(results) == 1
assert results[0]["content"] == "HTML content: 2((\u00be)) MP"
# search for something that ends with a hash
results = _do_search( "H#" )
assert len(results) == 1
assert results[0]["title"] == "((H#))"
# search for "U.S."
results = _do_search( "U.S." )
assert len(results) == 1
assert results[0]["content"] == "The ((U.S.)) has lots of this."
# ---------------------------------------------------------------------
def test_targets( webapp, webdriver ):
"""Test clicking on search results."""
# initialize
webapp.control_tests.set_data_dir( "simple" )
init_webapp( webapp, webdriver, no_content=1, add_empty_doc=1 )
def do_test( query_string, sel, expected ):
# select the dummy document
select_tabbed_page( "#content", "empty" )
# do the search
_do_search( query_string )
# click on a target
elem = find_child( "#search-results {}".format( sel ) )
elem.click()
def check_target():
# check the active tab
if find_child( "#content .tab-strip .tab.active" ).get_attribute( "data-tabid" ) != "simple":
return False
# check the current target
elem = find_child( "#content .tabbed-page[data-tabid='simple'] .content-doc" )
return elem.get_attribute( "data-target" ) == expected
wait_for( 2, check_target )
# do the tests
do_test( "CC", ".sr .ruleids .ruleid a", "A3.8" )
do_test( "time", ".sr .rulerefs .ruleid a", "A4.7" )
# ---------------------------------------------------------------------
def test_make_fts_query_string():
"""Test generating the FTS query string."""
# initialize
load_search_config( logging.getLogger("_unknown_") )
def check( query, expected ):
fts_query_string, _ = _make_fts_query_string(query)
assert fts_query_string == expected
# test some query strings
check( "", "" )
check( "hello", "hello" )
check( " hello, world! ", "hello AND world" )
check(
"foo 1+2 A-T K# bar",
'foo AND "1+2" AND "a-t" AND "k#" AND bar'
)
check(
"a'b a''b",
"\"a'b\" AND \"a''b\""
)
check(
'foo "set dc" bar',
'foo AND "set dc" AND bar'
)
# test some quoted phrases
check( '""', '' )
check( ' " " ', '' )
check(
'"hello world"',
'"hello world"'
)
check(
' foo "hello world" bar ',
'foo AND "hello world" AND bar'
)
check(
' foo " xyz " bar ',
'foo AND xyz AND bar'
)
check(
' foo " xyz 123 " bar ',
'foo AND "xyz 123" AND bar'
)
# test some incorrectly quoted phrases
check( '"', '' )
check( ' " " " ', '' )
check( ' a "b c d e', 'a AND "b c d e"' )
check( ' a b" c d e ', 'a AND b AND c AND d AND e' )
# test pass-through
check( "AND", "AND" )
check( " OR", "OR" )
check( "OR ", "OR" )
check( "foo OR bar", "foo OR bar" )
check( "(a OR b)", "(a OR b)" )
# test search replacements
check( "1/2 3/4 3/8 5/8", '"&frac12;" AND "&frac34;" AND "&frac38;" AND "&frac58;"' )
check( "(r)", '"&reg;"' )
# test search aliases
check( "entrenchment", "( ditch OR entrenchment OR foxhole OR trench )" )
check( "entrenchments", "( ditch OR entrenchments OR foxhole OR trench )" )
check( "foxhole", "foxhole" )
# test search synonyms
check( "armor", "( armor OR armour )" )
check( "american big armor", '( america OR american OR "u.s." ) AND big AND ( armor OR armour )' )
# ---------------------------------------------------------------------
def _do_search( query_string ):
"""Do a search."""
def get_seq_no():
return find_child( "#search-results" ).get_attribute( "data-seqno" )
# submit the search
select_tabbed_page( "#nav", "search" )
elem = find_child( "input#query-string" )
elem.clear()
elem.send_keys( query_string )
seq_no = get_seq_no()
elem.send_keys( Keys.RETURN )
# unload the results
wait_for( 2, lambda: get_seq_no() > seq_no )
elem = find_child( "#search-results .error" )
if elem:
return elem.text # nb: string = error message
elem = find_child( "#search-results .no-results" )
if elem:
assert elem.text == "Nothing was found."
return None # nb: None = no results
results = _unload_search_results()
assert isinstance( results, list ) # nb: list = search results
return results
def _unload_search_results():
"""Unload the search results."""
def unload_elem( result, key, elem ):
"""Unload a single element."""
if not elem:
return False
elem_text = get_elem_text( elem )
if not elem_text:
return False
result[key] = elem_text
return True
def get_elem_text( elem ):
"""Get the element's text content."""
val = elem.get_attribute( "innerHTML" )
# change how highlighted content is represented
matches = list( re.finditer( r'<span class="hilite">(.*?)</span>', val ) )
for mo in reversed(matches):
val = val[:mo.start()] + "((" + mo.group(1) + "))" + val[mo.end():]
# remove HTML tags
return strip_html( val.strip() )
def unload_ruleids( result, key, parent ):
"""Unload a list of ruleid's."""
if not parent:
return
ruleids = []
for elem in find_children( ".ruleid", parent ):
ruleid = get_elem_text( elem )
assert ruleid.startswith( "[" ) and ruleid.endswith( "]" )
ruleids.append( ruleid[1:-1] )
if ruleids:
result[key] = ruleids
def unload_rulerefs( result, key, parent ):
"""Unload a list of ruleref's."""
if not parent:
return
rulerefs = []
for elem in find_children( "li", parent ):
ruleref = {}
unload_elem( ruleref, "caption", find_child(".caption",elem) )
unload_ruleids( ruleref, "ruleids", elem )
rulerefs.append( ruleref )
if rulerefs:
result[key] = rulerefs
def unload_index_sr( sr ): #pylint: disable=possibly-unused-variable
"""Unload an "index" search result."""
result = {}
unload_elem( result, "title", find_child("span.title",sr) )
unload_elem( result, "subtitle", find_child(".subtitle",sr) )
unload_elem( result, "content", find_child(".content",sr) )
if unload_elem( result, "see_also", find_child(".see-also",sr) ):
assert result["see_also"].startswith( "See also:" )
result["see_also"] = [ s.strip() for s in result["see_also"][9:].split( "," ) ]
unload_ruleids( result, "ruleids", find_child(".ruleids",sr) )
unload_rulerefs( result, "rulerefs", find_child(".rulerefs",sr) )
return result
# unload the search results
results = []
for sr in find_children( "#search-results .sr"):
classes = get_classes( sr )
classes.remove( "sr" )
assert len(classes) == 1 and classes[0].endswith( "-sr" )
sr_type = classes[0][:-3]
func = locals()[ "unload_{}_sr".format( sr_type ) ]
sr = func( sr )
sr["sr_type"] = sr_type
results.append( sr )
return results

@ -17,6 +17,10 @@ def init_webapp( webapp, webdriver, **options ):
global _webapp, _webdriver global _webapp, _webdriver
_webapp = webapp _webapp = webapp
_webdriver = webdriver _webdriver = webdriver
options = {
key.replace("_","-"): val
for key, val in options.items()
}
# load the webapp # load the webapp
if get_pytest_option("webdriver") == "chrome" and get_pytest_option("headless"): if get_pytest_option("webdriver") == "chrome" and get_pytest_option("headless"):
@ -39,6 +43,18 @@ def _wait_for_webapp():
# --------------------------------------------------------------------- # ---------------------------------------------------------------------
def select_tabbed_page( parent_sel, tab_id ):
"""Select a tabbed page."""
tabbed_pages = find_child( ".tabbed-pages", find_child(parent_sel) )
btn = find_child( ".tab-strip .tab[data-tabid='{}']".format( tab_id ), tabbed_pages )
btn.click()
def find_tabbed_page():
elem = find_child( ".tabbed-page[data-tabid='{}']".format( tab_id ), tabbed_pages )
return elem and elem.is_displayed()
wait_for( 2, find_tabbed_page )
# ---------------------------------------------------------------------
def get_nav_panels(): def get_nav_panels():
"""Get the available nav panels.""" """Get the available nav panels."""
return _get_tab_ids( "#nav .tab-strip" ) return _get_tab_ids( "#nav .tab-strip" )
@ -72,6 +88,11 @@ def find_children( sel, parent=None ):
except NoSuchElementException: except NoSuchElementException:
return None return None
def get_classes( elem ):
"""Get the element's classes."""
classes = elem.get_attribute( "class" )
return classes.split()
# --------------------------------------------------------------------- # ---------------------------------------------------------------------
def wait_for( timeout, func ): def wait_for( timeout, func ):

@ -1,8 +1,24 @@
"""Helper functions.""" """Helper functions."""
import os
import pathlib import pathlib
import re import re
from asl_rulebook2.webapp import app, CONFIG_DIR
# ---------------------------------------------------------------------
def make_data_path( path ):
"""Generate a path relative to the data directory."""
dname = app.config.get( "DATA_DIR" )
if not dname:
return None
return os.path.join( dname, path )
def make_config_path( path ):
"""Generate a path in the config directory."""
return os.path.join( CONFIG_DIR, path )
# --------------------------------------------------------------------- # ---------------------------------------------------------------------
def change_extn( fname, extn ): def change_extn( fname, extn ):

@ -0,0 +1,66 @@
#!/usr/bin/env python3
""" Add named destinations to a PDF file. """
import subprocess
import json
import time
import datetime
import click
from asl_rulebook2.utils import TempFile
# ---------------------------------------------------------------------
@click.command()
@click.argument( "pdf_file", nargs=1, type=click.Path(exists=True,dir_okay=False) )
@click.option( "--title", help="Document title." )
@click.option( "--targets","-t","targets_fname", required=True, type=click.Path(dir_okay=False),
help="Target definition file."
)
@click.option( "--yoffset", default=5, help="Offset to add to y co-ordinates." )
@click.option( "--output","-o","output_fname", required=True, type=click.Path(dir_okay=False),
help="Output PDF file."
)
@click.option( "--gs","gs_path", default="gs", help="Path to the Ghostscript executable." )
def main( pdf_file, title, targets_fname, yoffset, output_fname, gs_path ):
"""Add named destinations to a PDF file."""
# load the targets
with open( targets_fname, "r" ) as fp:
targets = json.load( fp )
with TempFile( mode="w" ) as temp_file:
# generate the pdfmarks
print( "Generating the pdfmarks..." )
if title:
print( "[ /Title ({})".format( title ), file=temp_file )
else:
print( "[", file=temp_file )
print( " /DOCINFO pdfmark", file=temp_file )
print( file=temp_file )
for ruleid, target in targets.items():
xpos, ypos = target["pos"]
print( "[ /Dest /{} /Page {} /View [/XYZ {} {}] /DEST pdfmark".format(
ruleid, target["page_no"], xpos, ypos+yoffset
), file=temp_file )
print( file=temp_file )
temp_file.close( delete=False )
# generate the pdfmark'ed document
print( "Generating the pdfmark'ed document..." )
print( "- {} => {}".format( pdf_file, output_fname ) )
args = [ gs_path, "-q", "-dBATCH", "-dNOPAUSE", "-sDEVICE=pdfwrite" ]
args.extend( [ "-o", output_fname ] )
args.extend( [ "-f", pdf_file ] )
args.append( temp_file.name )
start_time = time.time()
subprocess.run( args, check=True )
elapsed_time = time.time() - start_time
print( "- Elapsed time: {}".format( datetime.timedelta(seconds=int(elapsed_time)) ) )
# ---------------------------------------------------------------------
if __name__ == "__main__":
main() #pylint: disable=no-value-for-parameter

@ -10,7 +10,7 @@ from asl_rulebook2.utils import parse_page_numbers
@click.command() @click.command()
@click.argument( "pdf_file", nargs=1, type=click.Path(exists=True,dir_okay=False) ) @click.argument( "pdf_file", nargs=1, type=click.Path(exists=True,dir_okay=False) )
@click.option( "--output","-o","output_fname", required=True, type=click.Path(dir_okay=False), help="Output PDF file" ) @click.option( "--output","-o","output_fname", required=True, type=click.Path(dir_okay=False), help="Output PDF file." )
@click.option( "--pages","-p", help="Page(s) to dump (e.g. 2,5,9-15)." ) @click.option( "--pages","-p", help="Page(s) to dump (e.g. 2,5,9-15)." )
def main( pdf_file, output_fname, pages ): def main( pdf_file, output_fname, pages ):
"""Extract pages from a PDF.""" """Extract pages from a PDF."""

Loading…
Cancel
Save