Implemented a basic search engine.

3 years ago · b387871bbe
parent 9d2495aa64
commit b387871bbe
27 changed files with 1311 additions and 105 deletions
--- a/asl_rulebook2/utils.py
+++ b/asl_rulebook2/utils.py
@ -1,8 +1,82 @@
 """ Miscellaneous utilities. """
 import os
 import pathlib
 import tempfile
 import re
 import math
 from io import StringIO
 from html.parser import HTMLParser
 # ---------------------------------------------------------------------
 class TempFile:
    """Manage a temp file that can be closed while it's still being used."""
    def __init__( self, mode="wb", extn=None, encoding=None ):
        self.mode = mode
        self.extn = extn
        self.encoding = encoding
        self.temp_file = None
        self.name = None
    def open( self ):
        """Allocate a temp file."""
        if self.encoding:
            encoding = self.encoding
        else:
            encoding = "utf-8" if "b" not in self.mode else None
        assert self.temp_file is None
        self.temp_file = tempfile.NamedTemporaryFile(
            mode = self.mode,
            encoding = encoding,
            suffix = self.extn,
            delete = False
        )
        self.name = self.temp_file.name
    def close( self, delete ):
        """Close the temp file."""
        self.temp_file.close()
        if delete:
            os.unlink( self.temp_file.name )
    def write( self, data ):
        """Write data to the temp file."""
        self.temp_file.write( data )
    def __enter__( self ):
        """Enter the context manager."""
        self.open()
        return self
    def __exit__( self, exc_type, exc_val, exc_tb ):
        """Exit the context manager."""
        self.close( delete=True )
 # ---------------------------------------------------------------------
 def strip_html( val ):
    """Strip HTML."""
    if not val:
        return val
    buf = StringIO()
    class StripHtml( HTMLParser ):
        """Strip HTML."""
        def __init__( self ):
            super().__init__()
            self.strict = False
        def handle_data( self, data ):
            buf.write( data )
        def error( self, message ):
            pass
    # strip HTML
    html_stripper = StripHtml()
    html_stripper.feed( val )
    return buf.getvalue()
 # ---------------------------------------------------------------------
@ -99,6 +173,10 @@ def append_text( buf, new ):
            buf += " "
    return buf + new
 def plural( n, name1, name2 ):
    """Return the singular/plural form of a string."""
    return "{} {}".format( n, name1 if n == 1 else name2 )
 def remove_quotes( val ):
    """Remove enclosing quotes from a string."""
    if val[0] in ('"',"'") and val[-1] == val[0]:
--- a/asl_rulebook2/webapp/init.py
+++ b/asl_rulebook2/webapp/init.py
@ -11,7 +11,7 @@ from flask import Flask
 import flask.cli
 import yaml
-from asl_rulebook2.webapp.config.constants import BASE_DIR
+from asl_rulebook2.webapp.config.constants import BASE_DIR, CONFIG_DIR
 shutdown_event = threading.Event()
@ -19,6 +19,7 @@ shutdown_event = threading.Event()
 def _load_config( fname, section ):
    """Load config settings from a file."""
    fname = os.path.join( CONFIG_DIR, fname )
    if not os.path.isfile( fname ):
        return
    config_parser = configparser.ConfigParser()
@ -50,21 +51,12 @@ flask.cli.show_server_banner = lambda *args: None
 app = Flask( __name__ )
 # load the application configuration
-config_dir = os.path.join( BASE_DIR, "config" )
+_load_config( "app.cfg", "System" )
-_fname = os.path.join( config_dir, "app.cfg" )
+_load_config( "site.cfg", "Site Config" )
-_load_config( _fname, "System" )
+_load_config( "debug.cfg", "Debug" )
 # load any site configuration
 _fname = os.path.join( config_dir, "site.cfg" )
 _load_config( _fname, "Site Config" )
 # load any debug configuration
 _fname = os.path.join( config_dir, "debug.cfg" )
 if os.path.isfile( _fname ) :
    _load_config( _fname, "Debug" )
 # initialize logging
-_fname = os.path.join( config_dir, "logging.yaml" )
+_fname = os.path.join( CONFIG_DIR, "logging.yaml" )
 if os.path.isfile( _fname ):
    with open( _fname, "r", encoding="utf-8" ) as fp:
        try:
--- a/asl_rulebook2/webapp/config/constants.py
+++ b/asl_rulebook2/webapp/config/constants.py
@ -7,3 +7,4 @@ APP_VERSION = "v0.1" # nb: also update setup.py
 APP_DESCRIPTION = "Search engine for the ASL Rulebook."
 BASE_DIR = os.path.abspath( os.path.join( os.path.dirname(__file__), ".." ) )
 CONFIG_DIR = os.path.join( BASE_DIR, "config" )
--- a/asl_rulebook2/webapp/config/search-aliases.json
+++ b/asl_rulebook2/webapp/config/search-aliases.json
@ -0,0 +1,25 @@
 {
 "_comment_": "This file defines search aliases.",
 "_comment_": "Keys that appear in a query string will match itself or any of its associated values.",
 "_comment_": "  e.g. searching for 'entrenchments' will actually search for 'entrenchments OR foxhole OR trench OR ditch'",
 "_comment_": "These differ from search synonyms in that only the key word will trigger the replacement, not any word from the set.",
 "_comment_": "A user-defined version of this file in the data directory will also be loaded.",
 "latw": [
    "atmm", "atr", "baz", "mol-p", "mol-projector", "piat", "pf", "pfk", "psk"
 ],
 "fortification/foritifcations": [
    "cave", "a-t ditch", "foxhole", "sangar", "trench", "bunker", "minefield", "mines", "booby trap", "panji", "pillbox", "roadblock", "tetrahedron", "wire"
 ],
 "entrenchment/entrenchments": [
    "foxhole", "trench", "ditch"
 ],
 "vehicle/vehicles": [
    "tank", "halftrack", "half-track", "jeep", "carrier"
 ],
 "illumination": [
    "tarshell", "illuminating round", "trip flare"
 ]
 }
--- a/asl_rulebook2/webapp/config/search-replacements.json
+++ b/asl_rulebook2/webapp/config/search-replacements.json
@ -0,0 +1,14 @@
 {
 "_comment_": "This file defines search replacements.",
 "_comment_": "Keys that appear in a query string will be replaced by the value.",
 "_comment_": "  e.g. searching for '1/2 MF' will actually search for '&frac12; MF'",
 "_comment_": "A user-defined version of this file in the data directory will also be loaded.",
 "1/2": "&frac12;",
 "3/4": "&frac34;",
 "3/8": "&frac38;",
 "5/8": "&frac58;",
 "(r)": "&reg;"
 }
--- a/asl_rulebook2/webapp/config/search-synonyms.json
+++ b/asl_rulebook2/webapp/config/search-synonyms.json
@ -0,0 +1,51 @@
 [
 "This file defines search synonyms.",
 "If a word appears in a query string, it will match any of the words in its set.",
 "  e.g. searching for 'finn gun' will actually search for '(finn OR finnish) AND gun'",
 "These differ from search aliases in that any word from a set will trigger the replacement.",
 "A user-defined version of this file in the data directory will also be loaded.",
 [ "u.s.", "america", "american" ],
 [ "usmc", "marine" ],
 [ "finn", "finnish" ],
 [ "romania", "romanian" ],
 [ "hungary", "hungarian" ],
 [ "slovakia", "slovakian" ],
 [ "croatia", "croatian" ],
 [ "bulgaria", "bulgarian" ],
 [ "dc", "demo charge", "demolition charge" ],
 [ "ft", "flamethrower", "flame-thrower" ],
 [ "baz", "bazooka" ],
 [ "pf", "panzerfaust" ],
 [ "psk", "panzershreck" ],
 [ "wp", "white phosphorous" ],
 [ "mol", "molotov cocktail" ],
 [ "ovr", "overrun" ],
 [ "cc", "close combat" ],
 [ "thh", "t-h hero", "tank-hunter hero" ],
 [ "scw", "shaped-charge weapon" ],
 [ "sw", "support weapon" ],
 [ "mg", "machinegun", "machine-gun", "machine gun" ],
 [ "firelane", "fire-lane", "fire lane" ],
 [ "firegroup", "fire-group", "fire group" ],
 [ "lc", "landing craft" ],
 [ "ht", "halftrack", "half-track" ],
 [ "wa", "wall advantage" ],
 [ "hob", "heat of battle" ],
 [ "cg", "campaign game" ],
 [ "pbm", "pbem" ],
 [ "rb", "red barricades" ],
 [ "votg", "valor of the guards" ],
 [ "kgp", "kampfgrupper peiper" ],
 [ "kgs", "kampfgrupper scherer" ],
 [ "brt", "br:t", "blood reef tarawa" ],
 [ "pb", "pegasus bridge" ],
 [ "ammo", "ammunition" ],
 [ "armor", "armour" ],
 [ "color", "colour" ]
 ]
--- a/asl_rulebook2/webapp/content.py
+++ b/asl_rulebook2/webapp/content.py
@ -2,6 +2,7 @@
 import os
 import io
 import json
 import glob
 from flask import jsonify, send_file, url_for, abort
@ -13,7 +14,7 @@ content_docs = None
 # ---------------------------------------------------------------------
-def load_content_docs():
+def load_content_docs( logger ):
    """Load the content documents from the data directory."""
    # initialize
@ -29,26 +30,32 @@ def load_content_docs():
        fname = os.path.join( dname, fname )
        if not os.path.isfile( fname ):
            return
-        kwargs = {}
+        if binary:
-        kwargs["mode"] = "rb" if binary else "r"
+            with open( fname, mode="rb" ) as fp:
-        if not binary:
+                data = fp.read()
-            kwargs["encoding"] = "utf-8"
+            logger.debug( "- Loaded \"%s\" file: #bytes=%d", key, len(data) )
-        with open( fname, **kwargs ) as fp:
+            content_doc[ key ] = data
-            content_doc[ key ] = fp.read()
+        else:
            with open( fname, "r", encoding="utf-8" ) as fp:
                content_doc[ key ] = json.load( fp )
            logger.debug( "- Loaded \"%s\" file.", key )
    # load each content doc
    logger.info( "Loading content docs: %s", dname )
    fspec = os.path.join( dname, "*.index" )
    for fname in glob.glob( fspec ):
-        fname = os.path.basename( fname )
+        fname2 = os.path.basename( fname )
-        title = os.path.splitext( fname )[0]
+        logger.info( "- %s", fname2 )
        title = os.path.splitext( fname2 )[0]
        content_doc = {
            "_fname": fname,
            "doc_id": slugify( title ),
            "title": title,
        }
-        get_doc( content_doc, "index", fname )
+        get_doc( content_doc, "index", fname2 )
-        get_doc( content_doc, "targets", change_extn(fname,".targets") )
+        get_doc( content_doc, "targets", change_extn(fname2,".targets") )
-        get_doc( content_doc, "footnotes", change_extn(fname,".footnotes") )
+        get_doc( content_doc, "footnotes", change_extn(fname2,".footnotes") )
-        get_doc( content_doc, "content", change_extn(fname,".pdf"), binary=True )
+        get_doc( content_doc, "content", change_extn(fname2,".pdf"), binary=True )
        content_docs[ content_doc["doc_id"] ] = content_doc
 # ---------------------------------------------------------------------
@ -59,11 +66,13 @@ def get_content_docs():
    resp = {}
    for cdoc in content_docs.values():
        cdoc2 = {
-            "docId": cdoc["doc_id"],
+            "doc_id": cdoc["doc_id"],
            "title": cdoc["title"],
        }
        if "content" in cdoc:
            cdoc2["url"] = url_for( "get_content", doc_id=cdoc["doc_id"] )
        if "targets" in cdoc:
            cdoc2["targets"] = cdoc["targets"]
        resp[ cdoc["doc_id"] ] = cdoc2
    return jsonify( resp )
--- a/asl_rulebook2/webapp/main.py
+++ b/asl_rulebook2/webapp/main.py
@ -9,6 +9,7 @@ from flask import render_template, jsonify, abort
 from asl_rulebook2.webapp import app, globvars, shutdown_event
 from asl_rulebook2.webapp.content import load_content_docs
 from asl_rulebook2.webapp.search import init_search
 from asl_rulebook2.webapp.utils import parse_int
 # ---------------------------------------------------------------------
@ -20,7 +21,9 @@ def init_webapp():
    after that by the test suite, to reset the webapp before each test.
    """
    # initialize the webapp
-    load_content_docs()
+    logger = logging.getLogger( "startup" )
    load_content_docs( logger )
    init_search( logger )
 # ---------------------------------------------------------------------
--- a/asl_rulebook2/webapp/search.py
+++ b/asl_rulebook2/webapp/search.py
@ -0,0 +1,475 @@
 """ Manage the search engine. """
 import os
 import sqlite3
 import json
 import re
 import itertools
 import string
 import tempfile
 import logging
 import traceback
 from flask import request, jsonify
 from asl_rulebook2.utils import plural
 from asl_rulebook2.webapp import app
 from asl_rulebook2.webapp import content as webapp_content
 from asl_rulebook2.webapp.utils import make_config_path, make_data_path
 _sqlite_path = None
 _fts_index_entries= None
 _logger = logging.getLogger( "search" )
 # these are used to highlight search matches (nb: the front-end looks for these)
 _BEGIN_HIGHLIGHT = "!@:"
 _END_HIGHLIGHT = ":@!"
 # NOTE: These regex's fix up content returned to us by the SQLite search engine (typically problems
 # with highlighting search terms).
 _FIXUP_TEXT_REGEXES = [
    [ re.compile( fixup[0].format( _BEGIN_HIGHLIGHT, _END_HIGHLIGHT ) ),
      fixup[1].format( _BEGIN_HIGHLIGHT, _END_HIGHLIGHT )
    ]
    for fixup in [
        [ r"&{}(.+?){};", r"{}&\g<1>;{}" ], # HTML entities e.g. &((frac12)); -> (($frac12;))
        [ r"{}(.+?){}#", r"{}\g<1>#{}" ], # e.g. ((TH)# -> ((TH#)
        [ r"{}U\.S{}\.", "{}U.S.{}" ], # ((U.S)). -> ((U.S.))
    ]
 ]
 # these are used to separate ruleref's in the FTS table (internal use only)
 _RULEREF_SEPARATOR = "-:-"
 _SEARCH_TERM_ADJUSTMENTS = None
 # ---------------------------------------------------------------------
@app.route( "/search", methods=["POST"] )
 def search() :
    """Run a search."""
    # log the request
    _logger.info( "SEARCH REQUEST:" )
    args = dict( request.form.items() )
    for key,val in args.items():
        _logger.info( "- %s: %s", key, val )
    # run the search
    try:
        return _do_search( args )
    except Exception as exc: #pylint: disable=broad-except
        msg = str( exc )
        if msg.startswith( "fts5: " ):
            msg = msg[5:] # nb: this is a sqlite3.OperationalError
        _logger.warning( "SEARCH ERROR: %s\n%s", args, traceback.format_exc() )
        return jsonify( { "error": msg } )
 def _do_search( args ):
    def fixup_text( val ):
        if val is None:
            return None
        for regex in _FIXUP_TEXT_REGEXES:
            val = regex[0].sub( regex[1], val )
        return val
    # run the search
    query_string = args[ "queryString" ].strip()
    if query_string == "!:simulated-error:!":
        raise RuntimeError( "Simulated error." ) # nb: for the test suite
    fts_query_string, search_terms = _make_fts_query_string( query_string )
    _logger.debug( "FTS query string: %s", fts_query_string )
    conn = sqlite3.connect( _sqlite_path )
    def highlight( n ):
         # NOTE: highlight() is an FTS extension function, and takes column numbers :-/
        return "highlight(searchable,{},'{}','{}')".format( n, _BEGIN_HIGHLIGHT, _END_HIGHLIGHT )
    sql = "SELECT rowid,doc_id,sr_type,rank,{},{},{},{} FROM searchable".format(
        highlight(2), highlight(3), highlight(4), highlight(5)
    )
    sql += " WHERE searchable MATCH ?"
    sql += " ORDER BY rank"
    curs = conn.execute( sql,
        ( "{title subtitle content rulerefs}: " + fts_query_string, )
    )
    def get_col( sr, key, val ):
        if val:
            sr[key] = fixup_text( val )
    # get the results
    results = []
    for row in curs:
        if row[2] != "index":
            _logger.error( "Unknown searchable row type (rowid=%d): %s", row[0], row[2] )
            continue
        index_entry = _fts_index_entries[ row[0] ]
        result = {
            "doc_id": row[1],
            "sr_type": row[2],
            "_score": - row[3],
        }
        get_col( result, "title", row[4] )
        get_col( result, "subtitle", row[5] )
        get_col( result, "content", row[6] )
        if index_entry.get( "ruleids" ):
            result["ruleids"] = index_entry["ruleids"]
        if index_entry.get( "see_also" ):
            result["see_also"] = index_entry["see_also"]
        rulerefs = [ r.strip() for r in row[7].split(_RULEREF_SEPARATOR) ] if row[7] else []
        assert len(rulerefs) == len(index_entry.get("rulerefs",[]))
        if rulerefs:
            result[ "rulerefs" ] = []
            for i, ruleref in enumerate(rulerefs):
                ruleref2 = {}
                if "caption" in index_entry["rulerefs"][i]:
                    assert ruleref.replace( _BEGIN_HIGHLIGHT, "" ).replace( _END_HIGHLIGHT, "" ) \
                           == index_entry["rulerefs"][i]["caption"]
                    ruleref2["caption"] = fixup_text( ruleref )
                if "ruleids" in index_entry["rulerefs"][i]:
                    ruleref2["ruleids"] = index_entry["rulerefs"][i]["ruleids"]
                assert ruleref2
                result["rulerefs"].append( ruleref2 )
        results.append( result )
    # fixup the results
    results = _fixup_results_for_hash_terms( results, search_terms )
    # adjust the sort order
    results = _adjust_sort_order( results )
    # return the results
    _logger.debug( "Search results:" if len(results) > 0 else "Search results: none" )
    for result in results:
        _logger.debug( "- %s (%.3f)",
           result["title"].replace( _BEGIN_HIGHLIGHT, "" ).replace( _END_HIGHLIGHT, "" ),
           result["_score"]
        )
    return jsonify( results )
 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 PASSTHROUGH_REGEXES = set([
    re.compile( r"\bAND\b" ),
    re.compile( r"\bOR\b" ),
    re.compile( r"\bNOT\b" ),
    re.compile( r"\((?![Rr]\))" ),
 ])
 def _make_fts_query_string( query_string ):
    """Generate the SQLite query string.
    SQLite's MATCH function recognizes a lot of special characters, which need
    to be enclosed in double-quotes to disable.
    """
    # check if this looks like a raw FTS query
    if any( regex.search(query_string) for regex in PASSTHROUGH_REGEXES ):
        return query_string.strip(), None
    # split the search string into words (taking quoted phrases into account)
    ignore = app.config.get( "SQLITE_FTS_IGNORE_CHARS", ",;!?$" )
    query_string = "".join( ch for ch in query_string if ch not in ignore )
    terms = query_string.lower().split()
    i = 0
    while True:
        if i >= len(terms):
            break
        if i > 0 and terms[i-1].startswith( '"' ):
            terms[i-1] += " {}".format( terms[i] )
            del terms[i]
            if terms[i-1].startswith( '"' ) and terms[i-1].endswith( '"' ):
                terms[i-1] = terms[i-1][1:-1]
            continue
        i += 1
    # clean up quoted phrases
    terms = [ t[1:] if t.startswith('"') else t for t in terms ]
    terms = [ t[:-1] if t.endswith('"') else t for t in terms ]
    terms = [ t.strip() for t in terms ]
    terms = [ t for t in terms if t ]
    # adjust search terms
    for term_no, term in enumerate(terms):
        aliases = _SEARCH_TERM_ADJUSTMENTS.get( term )
        if not aliases:
            continue
        if isinstance( aliases, str ):
            # the search term is replaced by a new one
            terms[ term_no ] = aliases
        elif isinstance( aliases, set ):
            # the search term is replaced by multiple new ones (that will be OR'ed together)
            # NOTE: We sort the terms so that the tests will work reliably.
            terms[ term_no ] = sorted( aliases )
        else:
            assert "Unknown search alias type: {}".format( type(aliases) )
    # fixup each term
    def has_special_char( term ):
        """Check if the term contains any special characters."""
        for ch in term:
            if ch in "*":
                continue
            if ch.isspace() or ch in string.punctuation:
                return True
            if ord(ch) < 32 or ord(ch) > 127:
                return True
        return False
    def fixup_terms( terms ):
        """Fixup a list of terms."""
        for term_no, term in enumerate(terms):
            if isinstance( term, str ):
                if has_special_char( term ):
                    terms[term_no] = '"{}"'.format( term )
            else:
                fixup_terms( term )
    fixup_terms( terms )
    # return the final FTS query string
    def term_string( term ):
        if isinstance( term, str ):
            return term
        assert isinstance( term, list )
        return "( {} )".format( " OR ".join( term ) )
    return " AND ".join( term_string(t) for t in terms ), terms
 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 def _fixup_results_for_hash_terms( results, search_terms ):
    """Fixup search results for search terms that end with a hash.
    SQLite doesn't handle search terms that end with a hash particularly well.
    We correct highlighted search terms in fixup_text(), but searching for e.g. "US#"
    will also match "use" and "using" - we remove such results here.
    """
    # figure out which search terms end with a hash
    # NOTE: We don't bother descending down into sub-terms.
    if not search_terms:
        return results
    terms = [
        t[1:-1] for t in search_terms
        if isinstance(t,str) and t.startswith('"') and t.endswith('"')
    ]
    terms = [
        t[:-1].lower() for t in terms
        if isinstance(t,str) and t.endswith("#")
    ]
    if not terms:
        return results
    if "us" in terms:
        terms.extend( [ "use", "used", "using", "user" ] )
    def keep( sr ):
        # remove every incorrectly matched search term (e.g. ((K)) when searching for "K#")
        buf = json.dumps( sr ).lower()
        for term in terms:
            buf = buf.replace( "{}{}{}".format( _BEGIN_HIGHLIGHT, term, _END_HIGHLIGHT ), "_removed_" )
        # we keep this search result if there are still some highlighted search terms
        return _BEGIN_HIGHLIGHT in buf
    return [
        result for result in results if keep(result)
    ]
 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 def _adjust_sort_order( results ):
    """Adjust the sort order of the search results."""
    results2 = []
    def extract_sr( func ):
        # move results that pass the filter function to the new list
        i = 0
        while True:
            if i >= len(results):
                break
            # NOTE: We never prefer small entries (i.e .have no ruleref's)
            # e.g. those that only contain a "see also".
            if func( results[i] ) and len(results[i].get("rulerefs",[])) > 0:
                results2.append( results[i] )
                del results[i]
            else:
                i += 1
    def get( sr, key ):
        val = sr.get( key )
        return val if val else ""
    # prefer search results whose title is an exact match
    extract_sr(
        lambda sr: get(sr,"title").startswith( _BEGIN_HIGHLIGHT ) and get(sr,"title").endswith( _END_HIGHLIGHT )
    )
    # prefer search results whose title starts with a match
    extract_sr(
        lambda sr: get(sr,"title").startswith( _BEGIN_HIGHLIGHT )
    )
    # prefer search results that have a match in the title
    extract_sr(
        lambda sr: _BEGIN_HIGHLIGHT in get(sr,"title")
    )
    # prefer search results that have a match in the subtitle
    extract_sr(
        lambda sr: _BEGIN_HIGHLIGHT in get(sr,"subtitle")
    )
    # include any remaining search results
    results2.extend( results )
    return results2
 # ---------------------------------------------------------------------
 def init_search( logger ):
    """Initialize the search engine."""
    # initialize
    global _fts_index_entries
    _fts_index_entries = {}
    # initialize the database
    global _sqlite_path
    _sqlite_path = app.config.get( "SQLITE_PATH" )
    if not _sqlite_path:
        # FUDGE! We should be able to create a shared, in-memory database using this:
        #   file::XYZ:?mode=memory&cache=shared
        # but it doesn't seem to work (on Linux) and ends up creating a file with this name :-/
        # We manually create a temp file, which has to have the same name each time, so that we don't
        # keep creating a new database each time we start up. Sigh...
        _sqlite_path = os.path.join( tempfile.gettempdir(), "asl-rulebook2.searchdb" )
    if os.path.isfile( _sqlite_path ):
        os.unlink( _sqlite_path )
    logger.info( "Creating the search index: %s", _sqlite_path )
    conn = sqlite3.connect( _sqlite_path )
    # NOTE: Storing everything in a single table allows FTS to rank search results based on
    # the overall content, and also lets us do AND/OR queries across all searchable content.
    conn.execute(
        "CREATE VIRTUAL TABLE searchable USING fts5"
        " ( doc_id, sr_type, title, subtitle, content, rulerefs, tokenize='porter unicode61' )"
    )
    # load the searchable content
    logger.info( "Loading the search index..." )
    conn.execute( "DELETE FROM searchable" )
    curs = conn.cursor()
    for cdoc in webapp_content.content_docs.values():
        logger.info( "- Loading index file: %s", cdoc["_fname"] )
        nrows = 0
        for index_entry in cdoc["index"]:
            rulerefs = _RULEREF_SEPARATOR.join( r.get("caption","") for r in index_entry.get("rulerefs",[]) )
            # NOTE: We should really strip content before adding it to the search index, otherwise any HTML tags
            # will need to be included in search terms. However, this means that the content returned by a query
            # will be this stripped content. We could go back to the original data to get the original HTML content,
            # but that means we would lose the highlighting of search terms that SQLite gives us. We opt to insert
            # the original content, since none of it should contain HTML, anyway.
            curs.execute(
                "INSERT INTO searchable (doc_id,sr_type,title,subtitle,content,rulerefs) VALUES (?,?,?,?,?,?)", (
                    cdoc["doc_id"], "index",
                    index_entry.get("title"), index_entry.get("subtitle"), index_entry.get("content"), rulerefs
            ) )
            _fts_index_entries[ curs.lastrowid ] = index_entry
            index_entry["_fts_rowid"] = curs.lastrowid
            nrows += 1
        conn.commit()
        logger.info( "  - Loaded %s.", plural(nrows,"index entry","index entries"),  )
    assert len(_fts_index_entries) == _get_row_count( conn, "searchable" )
    # load the search config
    load_search_config( logger )
 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 def load_search_config( logger ):
    """Load the search config."""
    # initialize
    global _SEARCH_TERM_ADJUSTMENTS
    _SEARCH_TERM_ADJUSTMENTS = {}
    def add_search_term_adjustment( key, vals ):
        # make sure everything is lower-case
        key = key.lower()
        if isinstance( vals, str ):
            vals = vals.lower()
        elif isinstance( vals, set ):
            vals = set( v.lower() for v in vals )
        else:
            assert "Unknown search alias type: {}".format( type(vals) )
        # add new the search term adjustment
        if key not in _SEARCH_TERM_ADJUSTMENTS:
            _SEARCH_TERM_ADJUSTMENTS[ key ] = vals
        else:
            # found a multiple definition - try to do something sensible
            logger.warning( "  - Duplicate search alias: %s\n- current aliases = %s\n- new aliases = %s", key,
                _SEARCH_TERM_ADJUSTMENTS[key], vals
            )
            if isinstance( _SEARCH_TERM_ADJUSTMENTS[key], str ):
                _SEARCH_TERM_ADJUSTMENTS[ key ] = vals
            else:
                assert isinstance( _SEARCH_TERM_ADJUSTMENTS[key], set )
                _SEARCH_TERM_ADJUSTMENTS[ key ].update( vals )
    # load the search replacements
    def load_search_replacements( fname ):
        if not os.path.isfile( fname ):
            return
        logger.info( "Loading search replacements: %s", fname )
        with open( fname, "r", encoding="utf-8" ) as fp:
            data = json.load( fp )
        nitems = 0
        for key, val in data.items():
            if key.startswith( "_" ):
                continue # nb: ignore comments
            logger.debug( "- %s -> %s", key, val )
            add_search_term_adjustment( key, val )
            nitems += 1
        logger.info( "- Loaded %s.", plural(nitems,"search replacement","search replacements") )
    load_search_replacements( make_config_path( "search-replacements.json" ) )
    load_search_replacements( make_data_path( "search-replacements.json" ) )
    # load the search aliases
    def load_search_aliases( fname ):
        if not os.path.isfile( fname ):
            return
        logger.info( "Loading search aliases: %s", fname )
        with open( fname, "r", encoding="utf-8" ) as fp:
            data = json.load( fp )
        nitems = 0
        for keys, aliases in data.items():
            if keys.startswith( "_" ):
                continue # nb: ignore comments
            logger.debug( "- %s -> %s", keys, " ; ".join(aliases) )
            for key in keys.split( "/" ):
                add_search_term_adjustment( key, set( itertools.chain( aliases, [key] ) ) )
            nitems += 1
        logger.info( "- Loaded %s.", plural(nitems,"search aliases","search aliases") )
    load_search_aliases( make_config_path( "search-aliases.json" ) )
    load_search_aliases( make_data_path( "search-aliases.json" ) )
    # load the search synonyms
    def load_search_synonyms( fname ):
        if not os.path.isfile( fname ):
            return
        logger.info( "Loading search synonyms: %s", fname )
        with open( fname, "r", encoding="utf-8" ) as fp:
            data = json.load( fp )
        nitems = 0
        for synonyms in data:
            if isinstance( synonyms, str ):
                continue # nb: ignore comments
            logger.debug( "- %s", " ; ".join(synonyms) )
            synonyms = set( synonyms )
            for term in synonyms:
                add_search_term_adjustment( term, synonyms )
            nitems += 1
        logger.info( "- Loaded %s.", plural(nitems,"search synonym","search synonyms") )
    load_search_synonyms( make_config_path( "search-synonyms.json" ) )
    load_search_synonyms( make_data_path( "search-synonyms.json" ) )
 # ---------------------------------------------------------------------
 def _get_row_count( conn, table_name ):
    """Get the number of rows in a table."""
    cur = conn.execute( "SELECT count(*) FROM {}".format( table_name ) )
    return cur.fetchone()[0]
--- a/asl_rulebook2/webapp/static/ContentPane.js
+++ b/asl_rulebook2/webapp/static/ContentPane.js
@ -8,13 +8,13 @@ gMainApp.component( "content-pane", {
    template: `
 <tabbed-pages ref="tabbedPages">
-    <tabbed-page v-for="doc in contentDocs" :tabId=doc.docId :caption=doc.title >
+    <tabbed-page v-for="doc in contentDocs" :tabId=doc.doc_id :caption=doc.title :key=doc.doc_id >
        <content-doc :doc=doc />
    </tabbed-page>
 </tabbed-pages>`,
    mounted() {
-        gEventBus.on( "show-content-doc", (docId) => {
+        gEventBus.on( "show-target", (docId, target) => { //eslint-disable-line no-unused-vars
            this.$refs.tabbedPages.activateTab( docId ) ; // nb: tabId == docId
        } ) ;
    },
@ -27,14 +27,37 @@ gMainApp.component( "content-doc", {
    props: [ "doc" ],
    data() { return {
        target: null,
        noContent: gUrlParams.get( "no-content" ),
    } ; },
    template: `
-<div class="content-doc">
+<div class="content-doc" :data-target=target >
-    <div v-if=noContent class="disabled"> Content disabled. </div>
+    <div v-if=noContent class="disabled"> Content disabled. <div v-if=target>target = {{target}}</div> </div>
-    <iframe v-else-if=doc.url :src=doc.url />
+    <iframe v-else-if=doc.url :src=makeDocUrl />
    <div v-else class="disabled"> No content. </div>
 </div>`,
    created() {
        gEventBus.on( "show-target", (docId, target) => {
            if ( docId != this.doc.doc_id )
                return ;
            // FUDGE! We give the tab time to show itself before we scroll to the target.
            setTimeout( () => {
                this.target = target ;
            }, 50 ) ;
        } ) ;
    },
    computed: {
        makeDocUrl() {
            let url = this.doc.url ;
            if ( this.target )
                url += "#nameddest=" + this.target ;
            return url ;
        }
    },
 } ) ;
--- a/asl_rulebook2/webapp/static/MainApp.js
+++ b/asl_rulebook2/webapp/static/MainApp.js
@ -12,6 +12,10 @@ $(document).ready( () => {
    gMainApp.mount( "#main-app" ) ;
 } ) ;
 // FUDGE! Can't seem to get access to the content docs via gMainApp, so we make them available
 // to the rest of the program via this global variable :-/
 export let gContentDocs = null ;
 // --------------------------------------------------------------------
 gMainApp.component( "main-app", {
@ -47,23 +51,27 @@ gMainApp.component( "main-app", {
    methods: {
-        getContentDocs: (self) => new Promise( (resolve, reject) => {
+        getContentDocs( self ) {
-            // get the content docs
+            return new Promise( (resolve, reject) => {
-            $.getJSON( gGetContentDocsUrl, (resp) => { //eslint-disable-line no-undef
+                // get the content docs
-                self.contentDocs = resp ;
+                $.getJSON( gGetContentDocsUrl, (resp) => { //eslint-disable-line no-undef
-                let docIds = Object.keys( resp ) ;
+                    if ( gUrlParams.get( "add-empty-doc" ) )
-                if ( docIds.length > 0 ) {
+                        resp["empty"] = { "doc_id": "empty", "title": "Empty document" } ; // nb: for testing porpoises
-                    Vue.nextTick( () => {
+                    gContentDocs = self.contentDocs = resp ;
-                        gEventBus.emit( "show-content-doc", docIds[0] ) ; // FIXME! which one do we choose?
+                    let docIds = Object.keys( resp ) ;
-                    } ) ;
+                    if ( docIds.length > 0 ) {
-                }
+                        Vue.nextTick( () => {
-                resolve() ;
+                            gEventBus.emit( "show-target", docIds[0], null ) ; // FIXME! which one do we choose?
-            } ).fail( (xhr, status, errorMsg) => {
+                        } ) ;
-                const msg = "Couldn't get the content docs." ;
+                    }
-                showErrorMsg( msg + " <div class='pre'>" + errorMsg + "</div>" ) ;
+                    resolve() ;
-                reject( msg )
+                } ).fail( (xhr, status, errorMsg) => {
                    const msg = "Couldn't get the content docs." ;
                    showErrorMsg( msg + " <div class='pre'>" + errorMsg + "</div>" ) ;
                    reject( msg )
                } ) ;
            } ) ;
-        } ),
+        },
    },
--- a/asl_rulebook2/webapp/static/NavPane.js
+++ b/asl_rulebook2/webapp/static/NavPane.js
@ -4,17 +4,28 @@ import { gMainApp, gEventBus } from "./MainApp.js" ;
 gMainApp.component( "nav-pane", {
    data() { return {
        seqNo: 0, // nb: for the test suite
    } ; },
    template: `
 <tabbed-pages>
    <tabbed-page tabId="search" caption="Search" data-display="flex" >
        <search-box id="search-box" @search=onSearch />
-        <search-results id="search-results" />
+        <search-results id="search-results" :data-seqno=seqNo />
    </tabbed-page>
 </tabbed-pages>`,
    mounted() {
        gEventBus.on( "search-done", () => {
            // notify the test suite that the search results are now available
            this.seqNo += 1 ;
        } ) ;
    },
    methods: {
-        onSearch: (queryString) => {
+        onSearch( queryString ) {
            gEventBus.emit( "search", queryString ) ;
        },
--- a/asl_rulebook2/webapp/static/SearchPane.js
+++ b/asl_rulebook2/webapp/static/SearchPane.js
@ -1,5 +1,5 @@
 import { gMainApp, gEventBus } from "./MainApp.js" ;
-import { IndexSearchResult } from "./SearchResult.js" ;
+import { fixupSearchHilites } from "./utils.js" ;
 // --------------------------------------------------------------------
@ -30,7 +30,7 @@ gMainApp.component( "search-box", {
    },
    methods: {
-        onKeyUp: function( evt ) {
+        onKeyUp( evt ) {
            if ( evt.keyCode == 13 )
                this.$refs["submit"].click() ;
        }
@ -43,12 +43,15 @@ gMainApp.component( "search-box", {
 gMainApp.component( "search-results", {
    data() { return {
-        searchResults: [],
+        searchResults: null,
        errorMsg: null,
    } ; },
    template: `<div>
-<div v-for="sr in searchResults" :key=sr.key >
+<div v-if=errorMsg class="error"> Search error: <div class="pre"> {{errorMsg}} </div> </div>
-    <index-sr v-if="sr.srType == 'index'" :sr=sr />
+<div v-else-if="searchResults != null && searchResults.length == 0" class="no-results"> Nothing was found. </div>
 <div v-else v-for="sr in searchResults" :key=sr.key >
    <index-sr v-if="sr.sr_type == 'index'" :sr=sr />
    <div v-else> ??? </div>
 </div>
 </div>`,
@ -60,22 +63,39 @@ gMainApp.component( "search-results", {
    methods: {
        onSearch( queryString ) {
-            // generate some dummy search results
+            // submit the search request
-            let searchResults = [] ;
+            const onError = (errorMsg) => {
-            for ( let i=0 ; i < queryString.length ; ++i ) {
+                this.errorMsg = errorMsg ;
-                let buf = [ "Search result #" + (1+i) ] ;
+                Vue.nextTick( () => {
-                let nItems = Math.floor( Math.sqrt( 100 * Math.random() ) ) - 1 ;
+                    gEventBus.emit( "search-done" ) ;
-                if ( nItems > 0 ) {
+                } ) ;
-                    buf.push( "<ul style='padding-left:1em;'>" ) ;
+            } ;
-                    for ( let j=0 ; j < nItems ; ++j )
+            this.errorMsg = null ;
-                        buf.push( "<li> item " + (1+j) ) ;
+            $.ajax( { url: gSearchUrl, type: "POST", //eslint-disable-line no-undef
-                    buf.push( "</ul>" ) ;
+                data: { queryString: queryString },
                dataType: "json",
            } ).done( (resp) => {
                // check if there was an error
                if ( resp.error ) {
                    onError( resp.error ) ;
                    return ;
                }
-                searchResults.push(
+                // adjust highlighted text
-                    new IndexSearchResult( i, buf.join("") )
+                resp.forEach( (sr) => {
-                ) ;
+                    [ "title", "subtitle", "content" ].forEach( function( key ) {
-            }
+                        if ( sr[key] )
-            this.searchResults = searchResults ;
+                            sr[key] = fixupSearchHilites( sr[key] ) ;
                    } ) ;
                } ) ;
                // load the search results into the UI
                this.$el.scrollTop = 0;
                this.searchResults = resp ;
                Vue.nextTick( () => {
                    gEventBus.emit( "search-done" ) ;
                } ) ;
            } ).fail( (xhr, status, errorMsg) => {
                onError( errorMsg ) ;
            } ) ;
        },
    },
--- a/asl_rulebook2/webapp/static/SearchResult.js
+++ b/asl_rulebook2/webapp/static/SearchResult.js
@ -1,23 +1,80 @@
-import { gMainApp } from "./MainApp.js" ;
+import { gMainApp, gEventBus, gContentDocs } from "./MainApp.js" ;
 import { fixupSearchHilites } from "./utils.js" ;
 // --------------------------------------------------------------------
 export class IndexSearchResult {
    constructor( key, content ) {
        this.key = key ;
        this.srType = "index" ;
        this.content = content ;
    }
 }
 // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 gMainApp.component( "index-sr", {
    props: [ "sr" ],
    template: `
-<div class="sr index-sr" v-html=sr.content />
+<div class="sr index-sr" >
-`,
+    <div v-if="sr.title || sr.subtitle" class="title" >
        <span v-if=sr.title class="title" v-html=sr.title />
        <span v-if=sr.subtitle class="subtitle" v-html=sr.subtitle />
    </div>
    <div class="body">
        <div v-if=sr.content class="content" v-html=sr.content />
        <div v-if=makeSeeAlso v-html=makeSeeAlso class="see-also" />
        <div v-if=sr.ruleids class="ruleids" >
            <ruleid v-for="rid in sr.ruleids" :docId=sr.doc_id :ruleId=rid :key=rid />
        </div>
        <ul v-if=sr.rulerefs class="rulerefs" >
            <li v-for="rref in sr.rulerefs" :key=rref >
                <span v-if=rref.caption class="caption" v-html=fixupHilites(rref.caption) />
                <ruleid v-for="rid in rref.ruleids" :docId=sr.doc_id :ruleId=rid :key=rid />
            </li>
        </ul>
    </div>
 </div>`,
    computed: {
        makeSeeAlso() {
            if ( this.sr.see_also )
                return "See also: " + this.sr.see_also.join( ", " ) ;
            return null ;
        },
    },
    methods: {
        fixupHilites( val ) {
            return fixupSearchHilites( val ) ;
        },
    },
 } ) ;
 // --------------------------------------------------------------------
 gMainApp.component( "ruleid", {
    props: [ "docId", "ruleId" ],
    data() { return {
        target: null,
    } ; },
    template: `<span class="ruleid" v-bind:class="{unknown:!target}">[<a v-if=target @click=onClick>{{ruleId}}</a><span v-else>{{ruleId}}</span>]</span>`,
    created() {
        // figure out which rule is being referenced
        let ruleId = this.ruleId ;
        let pos = ruleId.indexOf( "-" ) ;
        if ( pos >= 0 ) {
            // NOTE: For ruleid's of the form "A12.3-.4", we want to target "A12.3".
            ruleId = ruleId.substring( 0, pos ) ;
        }
        // check if the rule is one we know about
        if ( gContentDocs[this.docId] && gContentDocs[this.docId].targets ) {
            if ( gContentDocs[this.docId].targets[ ruleId ] )
                this.target = ruleId ;
        }
    },
    methods: {
        onClick() {
            // show the target
            gEventBus.emit( "show-target", this.docId, this.target ) ;
        },
    },
 } ) ;
--- a/asl_rulebook2/webapp/static/TabbedPages.js
+++ b/asl_rulebook2/webapp/static/TabbedPages.js
@ -13,7 +13,7 @@ gMainApp.component( "tabbed-pages", {
 <div class="tabbed-pages">
    <slot />
    <div class="tab-strip">
-        <div v-for="tab in tabs" :data-tabid=tab.tabId @click=onTabClicked class="tab" v-bind:class="{'active': tab.tabId == activeTabId}" >
+        <div v-for="tab in tabs" :data-tabid=tab.tabId @click=onTabClicked class="tab" v-bind:class="{'active': tab.tabId == activeTabId}" :key=tab.tabId >
            {{tab.caption}}
        </div>
    </div>
@ -44,12 +44,12 @@ gMainApp.component( "tabbed-pages", {
    methods: {
-        onTabClicked: function( evt ) {
+        onTabClicked( evt ) {
            // activate the selected tab
            this.activateTab( evt.target.dataset.tabid ) ;
        },
-        activateTab: function( tabId ) {
+        activateTab( tabId ) {
            // activate the specified tab
            this.activeTabId = tabId ;
            $( this.$el ).find( ".tabbed-page" ).each( function() {
--- a/asl_rulebook2/webapp/static/css/SearchPane.css
+++ b/asl_rulebook2/webapp/static/css/SearchPane.css
@ -6,3 +6,5 @@
 /* search results */
 #search-results { flex-grow: -1 ; margin: 8px 0 2px 0 ; overflow-y: auto ; }
 #search-results .no-results { font-style: italic ; color: #666 ; }
 #search-results .error .pre { font-family: monospace ; margin: 0.25em 0 0 0.5em ; }
--- a/asl_rulebook2/webapp/static/css/SearchResult.css
+++ b/asl_rulebook2/webapp/static/css/SearchResult.css
@ -1 +1,13 @@
-#search-results .sr { margin: 0 10px 2px 0 ; border: 1px dotted #666 ; padding: 5px ; }
+#search-results .sr { margin: 0 10px 2px 0 ; padding: 5px ; }
 #search-results .sr .hilite { padding: 0 2px ; background: #ffa ; }
 #search-results .index-sr .title { background: #e0e0e0 ; border-bottom: 1px solid #ccc ; padding: 2px 5px ; font-weight: bold ; }
 #search-results .index-sr .subtitle { padding: 2px 5px ; font-weight: normal ; font-size: 80% ; font-style: italic ; }
 #search-results .index-sr .body { padding: 2px 5px 0 5px ; font-size: 80% ; }
 #search-results .index-sr .content { color: #444 ; }
 #search-results .index-sr .see-also { color: #444 ; }
 #search-results .index-sr ul.rulerefs { margin-left: 1.2em ; }
 #search-results .index-sr ul.rulerefs .caption { padding-right: 0.5em ; }
 #search-results .index-sr .ruleid { margin-right: 0.25em ; font-style: italic ; color: #444 ; }
 #search-results .index-sr .ruleid.unknown {  color: #888 ; }
 #search-results .index-sr .ruleid a { cursor: pointer ; }
--- a/asl_rulebook2/webapp/static/utils.js
+++ b/asl_rulebook2/webapp/static/utils.js
@ -1,3 +1,21 @@
 // --------------------------------------------------------------------
 const _HILITE_REGEXES = [
    new RegExp("!@:","g"), new RegExp(":@!","g"),
 ] ;
 export function fixupSearchHilites( val )
 {
    // NOTE: The search engine highlights search tems in the returned search content using special markers.
    // We convert those markers to HTML span's here.
    if ( val === null || val === undefined )
        return val ;
    return val.replace( _HILITE_REGEXES[0], "<span class='hilite'>" )
              .replace( _HILITE_REGEXES[1], "</span>" ) ;
 }
 // --------------------------------------------------------------------
 export function showInfoMsg( msg ) { _doShowNotificationMsg( "notice", msg ) ; }
 export function showWarningMsg( msg ) { _doShowNotificationMsg( "warning", msg ) ; }
 export function showErrorMsg( msg ) { _doShowNotificationMsg( "error", msg ) ; }
--- a/asl_rulebook2/webapp/templates/index.html
+++ b/asl_rulebook2/webapp/templates/index.html
@ -43,6 +43,7 @@
 <script>
 gGetContentDocsUrl = "{{ url_for( 'get_content_docs') }}" ;
 gSearchUrl = "{{ url_for( 'search' ) }}" ;
 </script>
 <script type="module" src="{{ url_for( 'static', filename='MainApp.js' ) }}"></script>
--- a/asl_rulebook2/webapp/tests/fixtures/simple/simple.index
+++ b/asl_rulebook2/webapp/tests/fixtures/simple/simple.index
@ -11,7 +11,7 @@
 { "title": "Backblast",
  "ruleids": [ "C13.8" ],
  "rulerefs": [
-    { "caption": "Huts", "ruleids": [ "G5.62" ] },
+    { "caption": "HEAT", "ruleids": [ "C13.8" ] },
    { "caption": "RCL", "ruleids": [ "C12.3-.4" ] }
  ]
 },
@ -31,11 +31,9 @@
  "content": "Also known as \"running <em>really</em> fast.\"",
  "rulerefs": [
    { "caption": "ENEMY Guard Automatic Action", "ruleids": [ "S6.303" ] },
-    { "caption": "Manhandling", "ruleids": [ "C10.3" ] },
+    { "ruleids": [ "C10.3" ] },
-    { "caption": "NA for Pathfinders", "ruleids": [ "T1.2" ] },
+    { "caption": "NA in Advance Phase", "ruleids": [ "A4.7" ] },
-    { "caption": "S? NA", "ruleids": [ "S3.321" ] },
+    { "caption": "'S?' is \"&lt;NA&gt;\"" }
    { "caption": "Water Shortage", "ruleids": [ "RCG21" ] },
    { "caption": "Wire NA", "ruleids": [ "B26.46" ] }
  ]
 },
@ -54,6 +52,7 @@
 },
 { "title": "Firepower",
  "content": "The U.S. has lots of this.",
  "ruleids": [ "A1.21" ],
  "see_also": [ "FP" ]
 },
@ -71,6 +70,12 @@
 { "title": "Identity, Vehicular",
  "ruleids": [ "D1.4" ]
 },
 { "title": "HTML ti<u>tl</u>e",
  "subtitle": "HTML subti<u>tl</u>e",
  "content": "HTML con<u>ten</u>t: 2&frac34; MP",
  "see_also": [ "HTML see-<u>al</u>so" ]
 }
 ]
--- a/asl_rulebook2/webapp/tests/fixtures/simple/simple.pdf
+++ b/asl_rulebook2/webapp/tests/fixtures/simple/simple.pdf
--- a/asl_rulebook2/webapp/tests/fixtures/simple/simple.targets
+++ b/asl_rulebook2/webapp/tests/fixtures/simple/simple.targets
@ -1,15 +1,15 @@
 {
-"A4.7": { "caption": "ADVANCE PHASE", "page_no": 1, "pos": [72,702] },
+"A4.7": { "caption": "ADVANCE PHASE", "page_no": 1, "pos": [72,718] },
-"C13.8": { "caption": "BACKBLAST", "page_no": 1, "pos": [72,404] },
+"C13.8": { "caption": "BACKBLAST", "page_no": 1, "pos": [72,503] },
-"A3.8": { "caption": "CLOSE COMBAT PHASE (CCPh)", "page_no": 1, "pos": [72.97] },
+"A3.8": { "caption": "CLOSE COMBAT PHASE (CCPh)", "page_no": 1, "pos": [72,292] },
-"A4.5": { "caption": "DOUBLE TIME", "page_no": 2, "pos": [72,702] },
+"A4.5": { "caption": "DOUBLE TIME", "page_no": 2, "pos": [72,718] },
-"A19.1": { "caption": "EXPERIENCE LEVEL RATING (ELR)", "page_no": 2, "pos": [72.404] },
+"A19.1": { "caption": "EXPERIENCE LEVEL RATING (ELR)", "page_no": 2, "pos": [72,503] },
-"A1.21": { "caption": "FIREPOWER (FP)", "page_no": 2, "pos": [72,97] },
+"A1.21": { "caption": "FIREPOWER (FP)", "page_no": 2, "pos": [72,292] },
-"A1.21": { "caption": "FIREPOWER (FP)", "page_no": 3, "pos": [72,702] },
+"E11.21": { "caption": "GAPS", "page_no": 3, "pos":[72,718] },
-"E11.21": { "caption": "GAPS", "page_no": 3, "pos":[72,404] },
+"C8.3": { "caption": "HEAT (H)", "page_no": 3, "pos": [72,503] },
-"C8.3": { "caption": "HEAT (H)", "page_no": 3, "pos": [72,97] }
+"D1.4": { "caption": "IDENTITY & GROUND PRESSURE", "page_no": 3, "pos": [72,292] }
 }
--- a/asl_rulebook2/webapp/tests/test_search.py
+++ b/asl_rulebook2/webapp/tests/test_search.py
@ -0,0 +1,298 @@
 """ Test search. """
 import re
 import logging
 from selenium.webdriver.common.keys import Keys
 from asl_rulebook2.utils import strip_html
 from asl_rulebook2.webapp.search import load_search_config, _make_fts_query_string
 from asl_rulebook2.webapp.tests.utils import init_webapp, select_tabbed_page, get_classes, \
    wait_for, find_child, find_children
 # ---------------------------------------------------------------------
 def test_search( webapp, webdriver ):
    """Test search."""
    # initialize
    webapp.control_tests.set_data_dir( "simple" )
    init_webapp( webapp, webdriver )
    # test a search that finds nothing
    results = _do_search( "oogah, boogah!" )
    assert results is None
    # test error handling
    results = _do_search( "!:simulated-error:!" )
    assert "Simulated error." in results
    # do a search
    results = _do_search( "enemy" )
    assert results == [
        { "sr_type": "index",
          "title": "CCPh", "subtitle": "Close Combat Phase",
          "ruleids": [ "A3.8" ],
          "rulerefs": [
              { "caption": "((ENEMY)) Attacks", "ruleids": [ "S11.5" ] },
              { "caption": "dropping SW before CC", "ruleids": [ "A4.43" ] },
          ]
        },
        { "sr_type": "index",
          "title": "Double Time",
          "content": "Also known as \"running really fast.\"",
          "see_also": [ "CX" ],
          "ruleids": [ "A4.5-.51", "S6.222" ],
          "rulerefs": [
              { "caption": "((ENEMY)) Guard Automatic Action", "ruleids": [ "S6.303" ] },
              { "ruleids": [ "C10.3" ] },
              { "caption": "NA in Advance Phase", "ruleids": [ "A4.7" ] },
              { "caption": "'S?' is \"<NA>\"" },
          ]
      },
    ]
    # do another search
    results = _do_search( "gap" )
    assert results == [
        { "sr_type": "index",
          "title": "((Gaps)), Convoy",
          "ruleids": [ "E11.21" ],
        },
    ]
 # ---------------------------------------------------------------------
 def test_content_fixup( webapp, webdriver ):
    """Test fixing up of content returned by the search engine."""
    # initialize
    webapp.control_tests.set_data_dir( "simple" )
    init_webapp( webapp, webdriver )
    # search for a fraction
    results = _do_search( "3/4" )
    assert len(results) == 1
    assert results[0]["content"] == "HTML content: 2((\u00be)) MP"
    # search for something that ends with a hash
    results = _do_search( "H#" )
    assert len(results) == 1
    assert results[0]["title"] == "((H#))"
    # search for "U.S."
    results = _do_search( "U.S." )
    assert len(results) == 1
    assert results[0]["content"] == "The ((U.S.)) has lots of this."
 # ---------------------------------------------------------------------
 def test_targets( webapp, webdriver ):
    """Test clicking on search results."""
    # initialize
    webapp.control_tests.set_data_dir( "simple" )
    init_webapp( webapp, webdriver, no_content=1, add_empty_doc=1 )
    def do_test( query_string, sel, expected ):
        # select the dummy document
        select_tabbed_page( "#content", "empty" )
        # do the search
        _do_search( query_string )
        # click on a target
        elem = find_child( "#search-results {}".format( sel ) )
        elem.click()
        def check_target():
            # check the active tab
            if find_child( "#content .tab-strip .tab.active" ).get_attribute( "data-tabid" ) != "simple":
                return False
            # check the current target
            elem = find_child( "#content .tabbed-page[data-tabid='simple'] .content-doc" )
            return elem.get_attribute( "data-target" ) == expected
        wait_for( 2, check_target )
    # do the tests
    do_test( "CC", ".sr .ruleids .ruleid a", "A3.8" )
    do_test( "time", ".sr .rulerefs .ruleid a", "A4.7" )
 # ---------------------------------------------------------------------
 def test_make_fts_query_string():
    """Test generating the FTS query string."""
    # initialize
    load_search_config( logging.getLogger("_unknown_") )
    def check( query, expected ):
        fts_query_string, _ = _make_fts_query_string(query)
        assert fts_query_string == expected
    # test some query strings
    check( "", "" )
    check( "hello", "hello" )
    check( "  hello,  world!  ", "hello AND world" )
    check(
        "foo 1+2 A-T K# bar",
        'foo AND "1+2" AND "a-t" AND "k#" AND bar'
    )
    check(
        "a'b a''b",
        "\"a'b\" AND \"a''b\""
    )
    check(
        'foo "set dc" bar',
        'foo AND "set dc" AND bar'
    )
    # test some quoted phrases
    check( '""', '' )
    check( ' " " ', '' )
    check(
        '"hello world"',
        '"hello world"'
    )
    check(
        '  foo  "hello  world"  bar  ',
        'foo AND "hello world" AND bar'
    )
    check(
        ' foo " xyz " bar ',
        'foo AND xyz AND bar'
    )
    check(
        ' foo " xyz 123 " bar ',
        'foo AND "xyz 123" AND bar'
    )
    # test some incorrectly quoted phrases
    check( '"', '' )
    check( ' " " " ', '' )
    check( ' a "b c d e', 'a AND "b c d e"' )
    check( ' a b" c d e ', 'a AND b AND c AND d AND e' )
    # test pass-through
    check( "AND", "AND" )
    check( " OR", "OR" )
    check( "OR ", "OR" )
    check( "foo OR bar", "foo OR bar" )
    check( "(a OR b)", "(a OR b)" )
    # test search replacements
    check( "1/2 3/4 3/8 5/8", '"&frac12;" AND "&frac34;" AND "&frac38;" AND "&frac58;"' )
    check( "(r)", '"&reg;"' )
    # test search aliases
    check( "entrenchment", "( ditch OR entrenchment OR foxhole OR trench )" )
    check( "entrenchments", "( ditch OR entrenchments OR foxhole OR trench )" )
    check( "foxhole", "foxhole" )
    # test search synonyms
    check( "armor", "( armor OR armour )" )
    check( "american big armor", '( america OR american OR "u.s." ) AND big AND ( armor OR armour )' )
 # ---------------------------------------------------------------------
 def _do_search( query_string ):
    """Do a search."""
    def get_seq_no():
        return find_child( "#search-results" ).get_attribute( "data-seqno" )
    # submit the search
    select_tabbed_page( "#nav", "search" )
    elem = find_child( "input#query-string" )
    elem.clear()
    elem.send_keys( query_string )
    seq_no = get_seq_no()
    elem.send_keys( Keys.RETURN )
    # unload the results
    wait_for( 2, lambda: get_seq_no() > seq_no )
    elem = find_child( "#search-results .error" )
    if elem:
        return elem.text # nb: string = error message
    elem = find_child( "#search-results .no-results" )
    if elem:
        assert elem.text == "Nothing was found."
        return None # nb: None = no results
    results = _unload_search_results()
    assert isinstance( results, list ) # nb: list = search results
    return results
 def _unload_search_results():
    """Unload the search results."""
    def unload_elem( result, key, elem ):
        """Unload a single element."""
        if not elem:
            return False
        elem_text = get_elem_text( elem )
        if not elem_text:
            return False
        result[key] = elem_text
        return True
    def get_elem_text( elem ):
        """Get the element's text content."""
        val = elem.get_attribute( "innerHTML" )
        # change how highlighted content is represented
        matches = list( re.finditer( r'<span class="hilite">(.*?)</span>', val ) )
        for mo in reversed(matches):
            val = val[:mo.start()] + "((" + mo.group(1) + "))" + val[mo.end():]
        # remove HTML tags
        return strip_html( val.strip() )
    def unload_ruleids( result, key, parent ):
        """Unload a list of ruleid's."""
        if not parent:
            return
        ruleids = []
        for elem in find_children( ".ruleid", parent ):
            ruleid = get_elem_text( elem )
            assert ruleid.startswith( "[" ) and ruleid.endswith( "]" )
            ruleids.append( ruleid[1:-1] )
        if ruleids:
            result[key] = ruleids
    def unload_rulerefs( result, key, parent ):
        """Unload a list of ruleref's."""
        if not parent:
            return
        rulerefs = []
        for elem in find_children( "li", parent ):
            ruleref = {}
            unload_elem( ruleref, "caption", find_child(".caption",elem) )
            unload_ruleids( ruleref, "ruleids", elem )
            rulerefs.append( ruleref )
        if rulerefs:
            result[key] = rulerefs
    def unload_index_sr( sr ): #pylint: disable=possibly-unused-variable
        """Unload an "index" search result."""
        result = {}
        unload_elem( result, "title", find_child("span.title",sr) )
        unload_elem( result, "subtitle", find_child(".subtitle",sr) )
        unload_elem( result, "content", find_child(".content",sr) )
        if unload_elem( result, "see_also", find_child(".see-also",sr) ):
            assert result["see_also"].startswith( "See also:" )
            result["see_also"] = [ s.strip() for s in result["see_also"][9:].split( "," ) ]
        unload_ruleids( result, "ruleids", find_child(".ruleids",sr) )
        unload_rulerefs( result, "rulerefs", find_child(".rulerefs",sr) )
        return result
    # unload the search results
    results = []
    for sr in find_children( "#search-results .sr"):
        classes = get_classes( sr )
        classes.remove( "sr" )
        assert len(classes) == 1 and classes[0].endswith( "-sr" )
        sr_type = classes[0][:-3]
        func = locals()[ "unload_{}_sr".format( sr_type ) ]
        sr = func( sr )
        sr["sr_type"] = sr_type
        results.append( sr )
    return results
--- a/asl_rulebook2/webapp/tests/utils.py
+++ b/asl_rulebook2/webapp/tests/utils.py
@ -17,6 +17,10 @@ def init_webapp( webapp, webdriver, **options ):
    global _webapp, _webdriver
    _webapp = webapp
    _webdriver = webdriver
    options = {
        key.replace("_","-"): val
        for key, val in options.items()
    }
    # load the webapp
    if get_pytest_option("webdriver") == "chrome" and get_pytest_option("headless"):
@ -39,6 +43,18 @@ def _wait_for_webapp():
 # ---------------------------------------------------------------------
 def select_tabbed_page( parent_sel, tab_id ):
    """Select a tabbed page."""
    tabbed_pages = find_child( ".tabbed-pages", find_child(parent_sel) )
    btn = find_child( ".tab-strip .tab[data-tabid='{}']".format( tab_id ), tabbed_pages )
    btn.click()
    def find_tabbed_page():
        elem = find_child( ".tabbed-page[data-tabid='{}']".format( tab_id ), tabbed_pages )
        return elem and elem.is_displayed()
    wait_for( 2, find_tabbed_page )
 # ---------------------------------------------------------------------
 def get_nav_panels():
    """Get the available nav panels."""
    return _get_tab_ids( "#nav .tab-strip" )
@ -72,6 +88,11 @@ def find_children( sel, parent=None ):
    except NoSuchElementException:
        return None
 def get_classes( elem ):
    """Get the element's classes."""
    classes = elem.get_attribute( "class" )
    return classes.split()
 # ---------------------------------------------------------------------
 def wait_for( timeout, func ):
--- a/asl_rulebook2/webapp/utils.py
+++ b/asl_rulebook2/webapp/utils.py
@ -1,8 +1,24 @@
 """Helper functions."""
 import os
 import pathlib
 import re
 from asl_rulebook2.webapp import app, CONFIG_DIR
 # ---------------------------------------------------------------------
 def make_data_path( path ):
    """Generate a path relative to the data directory."""
    dname = app.config.get( "DATA_DIR" )
    if not dname:
        return None
    return os.path.join( dname, path )
 def make_config_path( path ):
    """Generate a path in the config directory."""
    return os.path.join( CONFIG_DIR, path )
 # ---------------------------------------------------------------------
 def change_extn( fname, extn ):
--- a/bin/add_pdf_dests.py
+++ b/bin/add_pdf_dests.py
@ -0,0 +1,66 @@
 #!/usr/bin/env python3
 """ Add named destinations to a PDF file. """
 import subprocess
 import json
 import time
 import datetime
 import click
 from asl_rulebook2.utils import TempFile
 # ---------------------------------------------------------------------
@click.command()
@click.argument( "pdf_file", nargs=1, type=click.Path(exists=True,dir_okay=False) )
@click.option( "--title", help="Document title." )
@click.option( "--targets","-t","targets_fname", required=True, type=click.Path(dir_okay=False),
    help="Target definition file."
 )
@click.option( "--yoffset", default=5, help="Offset to add to y co-ordinates." )
@click.option( "--output","-o","output_fname", required=True, type=click.Path(dir_okay=False),
    help="Output PDF file."
 )
@click.option( "--gs","gs_path", default="gs",  help="Path to the Ghostscript executable." )
 def main( pdf_file, title, targets_fname, yoffset, output_fname, gs_path ):
    """Add named destinations to a PDF file."""
    # load the targets
    with open( targets_fname, "r" ) as fp:
        targets = json.load( fp )
    with TempFile( mode="w" ) as temp_file:
        # generate the pdfmarks
        print( "Generating the pdfmarks..." )
        if title:
            print( "[ /Title ({})".format( title ), file=temp_file )
        else:
            print( "[", file=temp_file )
        print( "  /DOCINFO pdfmark", file=temp_file )
        print( file=temp_file )
        for ruleid, target in targets.items():
            xpos, ypos = target["pos"]
            print( "[ /Dest /{} /Page {} /View [/XYZ {} {}] /DEST pdfmark".format(
                ruleid, target["page_no"], xpos, ypos+yoffset
            ), file=temp_file )
        print( file=temp_file )
        temp_file.close( delete=False )
        # generate the pdfmark'ed document
        print( "Generating the pdfmark'ed document..." )
        print( "- {} => {}".format( pdf_file, output_fname ) )
        args = [ gs_path, "-q", "-dBATCH", "-dNOPAUSE", "-sDEVICE=pdfwrite" ]
        args.extend( [ "-o", output_fname ] )
        args.extend( [ "-f", pdf_file ] )
        args.append( temp_file.name )
        start_time = time.time()
        subprocess.run( args, check=True )
        elapsed_time = time.time() - start_time
        print( "- Elapsed time: {}".format( datetime.timedelta(seconds=int(elapsed_time)) ) )
 # ---------------------------------------------------------------------
 if __name__ == "__main__":
    main() #pylint: disable=no-value-for-parameter
--- a/bin/extract_pages.py
+++ b/bin/extract_pages.py
@ -10,7 +10,7 @@ from asl_rulebook2.utils import parse_page_numbers
@click.command()
@click.argument( "pdf_file", nargs=1, type=click.Path(exists=True,dir_okay=False) )
-@click.option( "--output","-o","output_fname", required=True, type=click.Path(dir_okay=False), help="Output PDF file" )
+@click.option( "--output","-o","output_fname", required=True, type=click.Path(dir_okay=False), help="Output PDF file." )
@click.option( "--pages","-p", help="Page(s) to dump (e.g. 2,5,9-15)." )
 def main( pdf_file, output_fname, pages ):
    """Extract pages from a PDF."""