parent
9d2495aa64
commit
b387871bbe
@ -0,0 +1,25 @@ |
||||
{ |
||||
|
||||
"_comment_": "This file defines search aliases.", |
||||
"_comment_": "Keys that appear in a query string will match itself or any of its associated values.", |
||||
"_comment_": " e.g. searching for 'entrenchments' will actually search for 'entrenchments OR foxhole OR trench OR ditch'", |
||||
"_comment_": "These differ from search synonyms in that only the key word will trigger the replacement, not any word from the set.", |
||||
"_comment_": "A user-defined version of this file in the data directory will also be loaded.", |
||||
|
||||
"latw": [ |
||||
"atmm", "atr", "baz", "mol-p", "mol-projector", "piat", "pf", "pfk", "psk" |
||||
], |
||||
"fortification/foritifcations": [ |
||||
"cave", "a-t ditch", "foxhole", "sangar", "trench", "bunker", "minefield", "mines", "booby trap", "panji", "pillbox", "roadblock", "tetrahedron", "wire" |
||||
], |
||||
"entrenchment/entrenchments": [ |
||||
"foxhole", "trench", "ditch" |
||||
], |
||||
"vehicle/vehicles": [ |
||||
"tank", "halftrack", "half-track", "jeep", "carrier" |
||||
], |
||||
"illumination": [ |
||||
"tarshell", "illuminating round", "trip flare" |
||||
] |
||||
|
||||
} |
@ -0,0 +1,14 @@ |
||||
{ |
||||
|
||||
"_comment_": "This file defines search replacements.", |
||||
"_comment_": "Keys that appear in a query string will be replaced by the value.", |
||||
"_comment_": " e.g. searching for '1/2 MF' will actually search for '½ MF'", |
||||
"_comment_": "A user-defined version of this file in the data directory will also be loaded.", |
||||
|
||||
"1/2": "½", |
||||
"3/4": "¾", |
||||
"3/8": "⅜", |
||||
"5/8": "⅝", |
||||
"(r)": "®" |
||||
|
||||
} |
@ -0,0 +1,51 @@ |
||||
[ |
||||
|
||||
"This file defines search synonyms.", |
||||
"If a word appears in a query string, it will match any of the words in its set.", |
||||
" e.g. searching for 'finn gun' will actually search for '(finn OR finnish) AND gun'", |
||||
"These differ from search aliases in that any word from a set will trigger the replacement.", |
||||
"A user-defined version of this file in the data directory will also be loaded.", |
||||
|
||||
[ "u.s.", "america", "american" ], |
||||
[ "usmc", "marine" ], |
||||
[ "finn", "finnish" ], |
||||
[ "romania", "romanian" ], |
||||
[ "hungary", "hungarian" ], |
||||
[ "slovakia", "slovakian" ], |
||||
[ "croatia", "croatian" ], |
||||
[ "bulgaria", "bulgarian" ], |
||||
|
||||
[ "dc", "demo charge", "demolition charge" ], |
||||
[ "ft", "flamethrower", "flame-thrower" ], |
||||
[ "baz", "bazooka" ], |
||||
[ "pf", "panzerfaust" ], |
||||
[ "psk", "panzershreck" ], |
||||
[ "wp", "white phosphorous" ], |
||||
[ "mol", "molotov cocktail" ], |
||||
[ "ovr", "overrun" ], |
||||
[ "cc", "close combat" ], |
||||
[ "thh", "t-h hero", "tank-hunter hero" ], |
||||
[ "scw", "shaped-charge weapon" ], |
||||
[ "sw", "support weapon" ], |
||||
[ "mg", "machinegun", "machine-gun", "machine gun" ], |
||||
[ "firelane", "fire-lane", "fire lane" ], |
||||
[ "firegroup", "fire-group", "fire group" ], |
||||
[ "lc", "landing craft" ], |
||||
[ "ht", "halftrack", "half-track" ], |
||||
[ "wa", "wall advantage" ], |
||||
[ "hob", "heat of battle" ], |
||||
[ "cg", "campaign game" ], |
||||
[ "pbm", "pbem" ], |
||||
|
||||
[ "rb", "red barricades" ], |
||||
[ "votg", "valor of the guards" ], |
||||
[ "kgp", "kampfgrupper peiper" ], |
||||
[ "kgs", "kampfgrupper scherer" ], |
||||
[ "brt", "br:t", "blood reef tarawa" ], |
||||
[ "pb", "pegasus bridge" ], |
||||
|
||||
[ "ammo", "ammunition" ], |
||||
[ "armor", "armour" ], |
||||
[ "color", "colour" ] |
||||
|
||||
] |
@ -0,0 +1,475 @@ |
||||
""" Manage the search engine. """ |
||||
|
||||
import os |
||||
import sqlite3 |
||||
import json |
||||
import re |
||||
import itertools |
||||
import string |
||||
import tempfile |
||||
import logging |
||||
import traceback |
||||
|
||||
from flask import request, jsonify |
||||
|
||||
from asl_rulebook2.utils import plural |
||||
from asl_rulebook2.webapp import app |
||||
from asl_rulebook2.webapp import content as webapp_content |
||||
from asl_rulebook2.webapp.utils import make_config_path, make_data_path |
||||
|
||||
_sqlite_path = None |
||||
_fts_index_entries= None |
||||
|
||||
_logger = logging.getLogger( "search" ) |
||||
|
||||
# these are used to highlight search matches (nb: the front-end looks for these) |
||||
_BEGIN_HIGHLIGHT = "!@:" |
||||
_END_HIGHLIGHT = ":@!" |
||||
|
||||
# NOTE: These regex's fix up content returned to us by the SQLite search engine (typically problems |
||||
# with highlighting search terms). |
||||
_FIXUP_TEXT_REGEXES = [ |
||||
[ re.compile( fixup[0].format( _BEGIN_HIGHLIGHT, _END_HIGHLIGHT ) ), |
||||
fixup[1].format( _BEGIN_HIGHLIGHT, _END_HIGHLIGHT ) |
||||
] |
||||
for fixup in [ |
||||
[ r"&{}(.+?){};", r"{}&\g<1>;{}" ], # HTML entities e.g. &((frac12)); -> (($frac12;)) |
||||
[ r"{}(.+?){}#", r"{}\g<1>#{}" ], # e.g. ((TH)# -> ((TH#) |
||||
[ r"{}U\.S{}\.", "{}U.S.{}" ], # ((U.S)). -> ((U.S.)) |
||||
] |
||||
] |
||||
|
||||
# these are used to separate ruleref's in the FTS table (internal use only) |
||||
_RULEREF_SEPARATOR = "-:-" |
||||
|
||||
_SEARCH_TERM_ADJUSTMENTS = None |
||||
|
||||
# --------------------------------------------------------------------- |
||||
|
||||
@app.route( "/search", methods=["POST"] ) |
||||
def search() : |
||||
"""Run a search.""" |
||||
|
||||
# log the request |
||||
_logger.info( "SEARCH REQUEST:" ) |
||||
args = dict( request.form.items() ) |
||||
for key,val in args.items(): |
||||
_logger.info( "- %s: %s", key, val ) |
||||
|
||||
# run the search |
||||
try: |
||||
return _do_search( args ) |
||||
except Exception as exc: #pylint: disable=broad-except |
||||
msg = str( exc ) |
||||
if msg.startswith( "fts5: " ): |
||||
msg = msg[5:] # nb: this is a sqlite3.OperationalError |
||||
_logger.warning( "SEARCH ERROR: %s\n%s", args, traceback.format_exc() ) |
||||
return jsonify( { "error": msg } ) |
||||
|
||||
def _do_search( args ): |
||||
|
||||
def fixup_text( val ): |
||||
if val is None: |
||||
return None |
||||
for regex in _FIXUP_TEXT_REGEXES: |
||||
val = regex[0].sub( regex[1], val ) |
||||
return val |
||||
|
||||
# run the search |
||||
query_string = args[ "queryString" ].strip() |
||||
if query_string == "!:simulated-error:!": |
||||
raise RuntimeError( "Simulated error." ) # nb: for the test suite |
||||
fts_query_string, search_terms = _make_fts_query_string( query_string ) |
||||
_logger.debug( "FTS query string: %s", fts_query_string ) |
||||
conn = sqlite3.connect( _sqlite_path ) |
||||
def highlight( n ): |
||||
# NOTE: highlight() is an FTS extension function, and takes column numbers :-/ |
||||
return "highlight(searchable,{},'{}','{}')".format( n, _BEGIN_HIGHLIGHT, _END_HIGHLIGHT ) |
||||
sql = "SELECT rowid,doc_id,sr_type,rank,{},{},{},{} FROM searchable".format( |
||||
highlight(2), highlight(3), highlight(4), highlight(5) |
||||
) |
||||
sql += " WHERE searchable MATCH ?" |
||||
sql += " ORDER BY rank" |
||||
curs = conn.execute( sql, |
||||
( "{title subtitle content rulerefs}: " + fts_query_string, ) |
||||
) |
||||
|
||||
def get_col( sr, key, val ): |
||||
if val: |
||||
sr[key] = fixup_text( val ) |
||||
|
||||
# get the results |
||||
results = [] |
||||
for row in curs: |
||||
if row[2] != "index": |
||||
_logger.error( "Unknown searchable row type (rowid=%d): %s", row[0], row[2] ) |
||||
continue |
||||
index_entry = _fts_index_entries[ row[0] ] |
||||
result = { |
||||
"doc_id": row[1], |
||||
"sr_type": row[2], |
||||
"_score": - row[3], |
||||
} |
||||
get_col( result, "title", row[4] ) |
||||
get_col( result, "subtitle", row[5] ) |
||||
get_col( result, "content", row[6] ) |
||||
if index_entry.get( "ruleids" ): |
||||
result["ruleids"] = index_entry["ruleids"] |
||||
if index_entry.get( "see_also" ): |
||||
result["see_also"] = index_entry["see_also"] |
||||
rulerefs = [ r.strip() for r in row[7].split(_RULEREF_SEPARATOR) ] if row[7] else [] |
||||
assert len(rulerefs) == len(index_entry.get("rulerefs",[])) |
||||
if rulerefs: |
||||
result[ "rulerefs" ] = [] |
||||
for i, ruleref in enumerate(rulerefs): |
||||
ruleref2 = {} |
||||
if "caption" in index_entry["rulerefs"][i]: |
||||
assert ruleref.replace( _BEGIN_HIGHLIGHT, "" ).replace( _END_HIGHLIGHT, "" ) \ |
||||
== index_entry["rulerefs"][i]["caption"] |
||||
ruleref2["caption"] = fixup_text( ruleref ) |
||||
if "ruleids" in index_entry["rulerefs"][i]: |
||||
ruleref2["ruleids"] = index_entry["rulerefs"][i]["ruleids"] |
||||
assert ruleref2 |
||||
result["rulerefs"].append( ruleref2 ) |
||||
results.append( result ) |
||||
|
||||
# fixup the results |
||||
results = _fixup_results_for_hash_terms( results, search_terms ) |
||||
|
||||
# adjust the sort order |
||||
results = _adjust_sort_order( results ) |
||||
|
||||
# return the results |
||||
_logger.debug( "Search results:" if len(results) > 0 else "Search results: none" ) |
||||
for result in results: |
||||
_logger.debug( "- %s (%.3f)", |
||||
result["title"].replace( _BEGIN_HIGHLIGHT, "" ).replace( _END_HIGHLIGHT, "" ), |
||||
result["_score"] |
||||
) |
||||
return jsonify( results ) |
||||
|
||||
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
||||
|
||||
PASSTHROUGH_REGEXES = set([ |
||||
re.compile( r"\bAND\b" ), |
||||
re.compile( r"\bOR\b" ), |
||||
re.compile( r"\bNOT\b" ), |
||||
re.compile( r"\((?![Rr]\))" ), |
||||
]) |
||||
|
||||
def _make_fts_query_string( query_string ): |
||||
"""Generate the SQLite query string. |
||||
|
||||
SQLite's MATCH function recognizes a lot of special characters, which need |
||||
to be enclosed in double-quotes to disable. |
||||
""" |
||||
|
||||
# check if this looks like a raw FTS query |
||||
if any( regex.search(query_string) for regex in PASSTHROUGH_REGEXES ): |
||||
return query_string.strip(), None |
||||
|
||||
# split the search string into words (taking quoted phrases into account) |
||||
ignore = app.config.get( "SQLITE_FTS_IGNORE_CHARS", ",;!?$" ) |
||||
query_string = "".join( ch for ch in query_string if ch not in ignore ) |
||||
terms = query_string.lower().split() |
||||
i = 0 |
||||
while True: |
||||
if i >= len(terms): |
||||
break |
||||
if i > 0 and terms[i-1].startswith( '"' ): |
||||
terms[i-1] += " {}".format( terms[i] ) |
||||
del terms[i] |
||||
if terms[i-1].startswith( '"' ) and terms[i-1].endswith( '"' ): |
||||
terms[i-1] = terms[i-1][1:-1] |
||||
continue |
||||
i += 1 |
||||
|
||||
# clean up quoted phrases |
||||
terms = [ t[1:] if t.startswith('"') else t for t in terms ] |
||||
terms = [ t[:-1] if t.endswith('"') else t for t in terms ] |
||||
terms = [ t.strip() for t in terms ] |
||||
terms = [ t for t in terms if t ] |
||||
|
||||
# adjust search terms |
||||
for term_no, term in enumerate(terms): |
||||
aliases = _SEARCH_TERM_ADJUSTMENTS.get( term ) |
||||
if not aliases: |
||||
continue |
||||
if isinstance( aliases, str ): |
||||
# the search term is replaced by a new one |
||||
terms[ term_no ] = aliases |
||||
elif isinstance( aliases, set ): |
||||
# the search term is replaced by multiple new ones (that will be OR'ed together) |
||||
# NOTE: We sort the terms so that the tests will work reliably. |
||||
terms[ term_no ] = sorted( aliases ) |
||||
else: |
||||
assert "Unknown search alias type: {}".format( type(aliases) ) |
||||
|
||||
# fixup each term |
||||
def has_special_char( term ): |
||||
"""Check if the term contains any special characters.""" |
||||
for ch in term: |
||||
if ch in "*": |
||||
continue |
||||
if ch.isspace() or ch in string.punctuation: |
||||
return True |
||||
if ord(ch) < 32 or ord(ch) > 127: |
||||
return True |
||||
return False |
||||
def fixup_terms( terms ): |
||||
"""Fixup a list of terms.""" |
||||
for term_no, term in enumerate(terms): |
||||
if isinstance( term, str ): |
||||
if has_special_char( term ): |
||||
terms[term_no] = '"{}"'.format( term ) |
||||
else: |
||||
fixup_terms( term ) |
||||
fixup_terms( terms ) |
||||
|
||||
# return the final FTS query string |
||||
def term_string( term ): |
||||
if isinstance( term, str ): |
||||
return term |
||||
assert isinstance( term, list ) |
||||
return "( {} )".format( " OR ".join( term ) ) |
||||
return " AND ".join( term_string(t) for t in terms ), terms |
||||
|
||||
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
||||
|
||||
def _fixup_results_for_hash_terms( results, search_terms ): |
||||
"""Fixup search results for search terms that end with a hash. |
||||
|
||||
SQLite doesn't handle search terms that end with a hash particularly well. |
||||
We correct highlighted search terms in fixup_text(), but searching for e.g. "US#" |
||||
will also match "use" and "using" - we remove such results here. |
||||
""" |
||||
|
||||
# figure out which search terms end with a hash |
||||
# NOTE: We don't bother descending down into sub-terms. |
||||
if not search_terms: |
||||
return results |
||||
terms = [ |
||||
t[1:-1] for t in search_terms |
||||
if isinstance(t,str) and t.startswith('"') and t.endswith('"') |
||||
] |
||||
terms = [ |
||||
t[:-1].lower() for t in terms |
||||
if isinstance(t,str) and t.endswith("#") |
||||
] |
||||
if not terms: |
||||
return results |
||||
if "us" in terms: |
||||
terms.extend( [ "use", "used", "using", "user" ] ) |
||||
|
||||
def keep( sr ): |
||||
# remove every incorrectly matched search term (e.g. ((K)) when searching for "K#") |
||||
buf = json.dumps( sr ).lower() |
||||
for term in terms: |
||||
buf = buf.replace( "{}{}{}".format( _BEGIN_HIGHLIGHT, term, _END_HIGHLIGHT ), "_removed_" ) |
||||
# we keep this search result if there are still some highlighted search terms |
||||
return _BEGIN_HIGHLIGHT in buf |
||||
|
||||
return [ |
||||
result for result in results if keep(result) |
||||
] |
||||
|
||||
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
||||
|
||||
def _adjust_sort_order( results ): |
||||
"""Adjust the sort order of the search results.""" |
||||
|
||||
results2 = [] |
||||
def extract_sr( func ): |
||||
# move results that pass the filter function to the new list |
||||
i = 0 |
||||
while True: |
||||
if i >= len(results): |
||||
break |
||||
# NOTE: We never prefer small entries (i.e .have no ruleref's) |
||||
# e.g. those that only contain a "see also". |
||||
if func( results[i] ) and len(results[i].get("rulerefs",[])) > 0: |
||||
results2.append( results[i] ) |
||||
del results[i] |
||||
else: |
||||
i += 1 |
||||
|
||||
def get( sr, key ): |
||||
val = sr.get( key ) |
||||
return val if val else "" |
||||
|
||||
# prefer search results whose title is an exact match |
||||
extract_sr( |
||||
lambda sr: get(sr,"title").startswith( _BEGIN_HIGHLIGHT ) and get(sr,"title").endswith( _END_HIGHLIGHT ) |
||||
) |
||||
# prefer search results whose title starts with a match |
||||
extract_sr( |
||||
lambda sr: get(sr,"title").startswith( _BEGIN_HIGHLIGHT ) |
||||
) |
||||
# prefer search results that have a match in the title |
||||
extract_sr( |
||||
lambda sr: _BEGIN_HIGHLIGHT in get(sr,"title") |
||||
) |
||||
# prefer search results that have a match in the subtitle |
||||
extract_sr( |
||||
lambda sr: _BEGIN_HIGHLIGHT in get(sr,"subtitle") |
||||
) |
||||
|
||||
# include any remaining search results |
||||
results2.extend( results ) |
||||
|
||||
return results2 |
||||
|
||||
# --------------------------------------------------------------------- |
||||
|
||||
def init_search( logger ): |
||||
"""Initialize the search engine.""" |
||||
|
||||
# initialize |
||||
global _fts_index_entries |
||||
_fts_index_entries = {} |
||||
|
||||
# initialize the database |
||||
global _sqlite_path |
||||
_sqlite_path = app.config.get( "SQLITE_PATH" ) |
||||
if not _sqlite_path: |
||||
# FUDGE! We should be able to create a shared, in-memory database using this: |
||||
# file::XYZ:?mode=memory&cache=shared |
||||
# but it doesn't seem to work (on Linux) and ends up creating a file with this name :-/ |
||||
# We manually create a temp file, which has to have the same name each time, so that we don't |
||||
# keep creating a new database each time we start up. Sigh... |
||||
_sqlite_path = os.path.join( tempfile.gettempdir(), "asl-rulebook2.searchdb" ) |
||||
if os.path.isfile( _sqlite_path ): |
||||
os.unlink( _sqlite_path ) |
||||
logger.info( "Creating the search index: %s", _sqlite_path ) |
||||
conn = sqlite3.connect( _sqlite_path ) |
||||
# NOTE: Storing everything in a single table allows FTS to rank search results based on |
||||
# the overall content, and also lets us do AND/OR queries across all searchable content. |
||||
conn.execute( |
||||
"CREATE VIRTUAL TABLE searchable USING fts5" |
||||
" ( doc_id, sr_type, title, subtitle, content, rulerefs, tokenize='porter unicode61' )" |
||||
) |
||||
|
||||
# load the searchable content |
||||
logger.info( "Loading the search index..." ) |
||||
conn.execute( "DELETE FROM searchable" ) |
||||
curs = conn.cursor() |
||||
for cdoc in webapp_content.content_docs.values(): |
||||
logger.info( "- Loading index file: %s", cdoc["_fname"] ) |
||||
nrows = 0 |
||||
for index_entry in cdoc["index"]: |
||||
rulerefs = _RULEREF_SEPARATOR.join( r.get("caption","") for r in index_entry.get("rulerefs",[]) ) |
||||
# NOTE: We should really strip content before adding it to the search index, otherwise any HTML tags |
||||
# will need to be included in search terms. However, this means that the content returned by a query |
||||
# will be this stripped content. We could go back to the original data to get the original HTML content, |
||||
# but that means we would lose the highlighting of search terms that SQLite gives us. We opt to insert |
||||
# the original content, since none of it should contain HTML, anyway. |
||||
curs.execute( |
||||
"INSERT INTO searchable (doc_id,sr_type,title,subtitle,content,rulerefs) VALUES (?,?,?,?,?,?)", ( |
||||
cdoc["doc_id"], "index", |
||||
index_entry.get("title"), index_entry.get("subtitle"), index_entry.get("content"), rulerefs |
||||
) ) |
||||
_fts_index_entries[ curs.lastrowid ] = index_entry |
||||
index_entry["_fts_rowid"] = curs.lastrowid |
||||
nrows += 1 |
||||
conn.commit() |
||||
logger.info( " - Loaded %s.", plural(nrows,"index entry","index entries"), ) |
||||
assert len(_fts_index_entries) == _get_row_count( conn, "searchable" ) |
||||
|
||||
# load the search config |
||||
load_search_config( logger ) |
||||
|
||||
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
||||
|
||||
def load_search_config( logger ): |
||||
"""Load the search config.""" |
||||
|
||||
# initialize |
||||
global _SEARCH_TERM_ADJUSTMENTS |
||||
_SEARCH_TERM_ADJUSTMENTS = {} |
||||
|
||||
def add_search_term_adjustment( key, vals ): |
||||
# make sure everything is lower-case |
||||
key = key.lower() |
||||
if isinstance( vals, str ): |
||||
vals = vals.lower() |
||||
elif isinstance( vals, set ): |
||||
vals = set( v.lower() for v in vals ) |
||||
else: |
||||
assert "Unknown search alias type: {}".format( type(vals) ) |
||||
# add new the search term adjustment |
||||
if key not in _SEARCH_TERM_ADJUSTMENTS: |
||||
_SEARCH_TERM_ADJUSTMENTS[ key ] = vals |
||||
else: |
||||
# found a multiple definition - try to do something sensible |
||||
logger.warning( " - Duplicate search alias: %s\n- current aliases = %s\n- new aliases = %s", key, |
||||
_SEARCH_TERM_ADJUSTMENTS[key], vals |
||||
) |
||||
if isinstance( _SEARCH_TERM_ADJUSTMENTS[key], str ): |
||||
_SEARCH_TERM_ADJUSTMENTS[ key ] = vals |
||||
else: |
||||
assert isinstance( _SEARCH_TERM_ADJUSTMENTS[key], set ) |
||||
_SEARCH_TERM_ADJUSTMENTS[ key ].update( vals ) |
||||
|
||||
# load the search replacements |
||||
def load_search_replacements( fname ): |
||||
if not os.path.isfile( fname ): |
||||
return |
||||
logger.info( "Loading search replacements: %s", fname ) |
||||
with open( fname, "r", encoding="utf-8" ) as fp: |
||||
data = json.load( fp ) |
||||
nitems = 0 |
||||
for key, val in data.items(): |
||||
if key.startswith( "_" ): |
||||
continue # nb: ignore comments |
||||
logger.debug( "- %s -> %s", key, val ) |
||||
add_search_term_adjustment( key, val ) |
||||
nitems += 1 |
||||
logger.info( "- Loaded %s.", plural(nitems,"search replacement","search replacements") ) |
||||
load_search_replacements( make_config_path( "search-replacements.json" ) ) |
||||
load_search_replacements( make_data_path( "search-replacements.json" ) ) |
||||
|
||||
# load the search aliases |
||||
def load_search_aliases( fname ): |
||||
if not os.path.isfile( fname ): |
||||
return |
||||
logger.info( "Loading search aliases: %s", fname ) |
||||
with open( fname, "r", encoding="utf-8" ) as fp: |
||||
data = json.load( fp ) |
||||
nitems = 0 |
||||
for keys, aliases in data.items(): |
||||
if keys.startswith( "_" ): |
||||
continue # nb: ignore comments |
||||
logger.debug( "- %s -> %s", keys, " ; ".join(aliases) ) |
||||
for key in keys.split( "/" ): |
||||
add_search_term_adjustment( key, set( itertools.chain( aliases, [key] ) ) ) |
||||
nitems += 1 |
||||
logger.info( "- Loaded %s.", plural(nitems,"search aliases","search aliases") ) |
||||
load_search_aliases( make_config_path( "search-aliases.json" ) ) |
||||
load_search_aliases( make_data_path( "search-aliases.json" ) ) |
||||
|
||||
# load the search synonyms |
||||
def load_search_synonyms( fname ): |
||||
if not os.path.isfile( fname ): |
||||
return |
||||
logger.info( "Loading search synonyms: %s", fname ) |
||||
with open( fname, "r", encoding="utf-8" ) as fp: |
||||
data = json.load( fp ) |
||||
nitems = 0 |
||||
for synonyms in data: |
||||
if isinstance( synonyms, str ): |
||||
continue # nb: ignore comments |
||||
logger.debug( "- %s", " ; ".join(synonyms) ) |
||||
synonyms = set( synonyms ) |
||||
for term in synonyms: |
||||
add_search_term_adjustment( term, synonyms ) |
||||
nitems += 1 |
||||
logger.info( "- Loaded %s.", plural(nitems,"search synonym","search synonyms") ) |
||||
load_search_synonyms( make_config_path( "search-synonyms.json" ) ) |
||||
load_search_synonyms( make_data_path( "search-synonyms.json" ) ) |
||||
|
||||
# --------------------------------------------------------------------- |
||||
|
||||
def _get_row_count( conn, table_name ): |
||||
"""Get the number of rows in a table.""" |
||||
cur = conn.execute( "SELECT count(*) FROM {}".format( table_name ) ) |
||||
return cur.fetchone()[0] |
@ -1,23 +1,80 @@ |
||||
import { gMainApp } from "./MainApp.js" ; |
||||
import { gMainApp, gEventBus, gContentDocs } from "./MainApp.js" ; |
||||
import { fixupSearchHilites } from "./utils.js" ; |
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
export class IndexSearchResult { |
||||
constructor( key, content ) { |
||||
this.key = key ; |
||||
this.srType = "index" ; |
||||
this.content = content ; |
||||
} |
||||
} |
||||
|
||||
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
||||
|
||||
gMainApp.component( "index-sr", { |
||||
|
||||
props: [ "sr" ], |
||||
|
||||
template: ` |
||||
<div class="sr index-sr" v-html=sr.content /> |
||||
`,
|
||||
<div class="sr index-sr" > |
||||
<div v-if="sr.title || sr.subtitle" class="title" > |
||||
<span v-if=sr.title class="title" v-html=sr.title /> |
||||
<span v-if=sr.subtitle class="subtitle" v-html=sr.subtitle /> |
||||
</div> |
||||
<div class="body"> |
||||
<div v-if=sr.content class="content" v-html=sr.content /> |
||||
<div v-if=makeSeeAlso v-html=makeSeeAlso class="see-also" /> |
||||
<div v-if=sr.ruleids class="ruleids" > |
||||
<ruleid v-for="rid in sr.ruleids" :docId=sr.doc_id :ruleId=rid :key=rid /> |
||||
</div> |
||||
<ul v-if=sr.rulerefs class="rulerefs" > |
||||
<li v-for="rref in sr.rulerefs" :key=rref > |
||||
<span v-if=rref.caption class="caption" v-html=fixupHilites(rref.caption) /> |
||||
<ruleid v-for="rid in rref.ruleids" :docId=sr.doc_id :ruleId=rid :key=rid /> |
||||
</li> |
||||
</ul> |
||||
</div> |
||||
</div>`, |
||||
|
||||
computed: { |
||||
makeSeeAlso() { |
||||
if ( this.sr.see_also ) |
||||
return "See also: " + this.sr.see_also.join( ", " ) ; |
||||
return null ; |
||||
}, |
||||
}, |
||||
|
||||
methods: { |
||||
fixupHilites( val ) { |
||||
return fixupSearchHilites( val ) ; |
||||
}, |
||||
}, |
||||
|
||||
} ) ; |
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
gMainApp.component( "ruleid", { |
||||
|
||||
props: [ "docId", "ruleId" ], |
||||
data() { return { |
||||
target: null, |
||||
} ; }, |
||||
|
||||
template: `<span class="ruleid" v-bind:class="{unknown:!target}">[<a v-if=target @click=onClick>{{ruleId}}</a><span v-else>{{ruleId}}</span>]</span>`, |
||||
|
||||
created() { |
||||
// figure out which rule is being referenced
|
||||
let ruleId = this.ruleId ; |
||||
let pos = ruleId.indexOf( "-" ) ; |
||||
if ( pos >= 0 ) { |
||||
// NOTE: For ruleid's of the form "A12.3-.4", we want to target "A12.3".
|
||||
ruleId = ruleId.substring( 0, pos ) ; |
||||
} |
||||
// check if the rule is one we know about
|
||||
if ( gContentDocs[this.docId] && gContentDocs[this.docId].targets ) { |
||||
if ( gContentDocs[this.docId].targets[ ruleId ] ) |
||||
this.target = ruleId ; |
||||
} |
||||
}, |
||||
|
||||
methods: { |
||||
onClick() { |
||||
// show the target
|
||||
gEventBus.emit( "show-target", this.docId, this.target ) ; |
||||
}, |
||||
}, |
||||
|
||||
} ) ; |
||||
|
@ -1 +1,13 @@ |
||||
#search-results .sr { margin: 0 10px 2px 0 ; border: 1px dotted #666 ; padding: 5px ; } |
||||
#search-results .sr { margin: 0 10px 2px 0 ; padding: 5px ; } |
||||
#search-results .sr .hilite { padding: 0 2px ; background: #ffa ; } |
||||
|
||||
#search-results .index-sr .title { background: #e0e0e0 ; border-bottom: 1px solid #ccc ; padding: 2px 5px ; font-weight: bold ; } |
||||
#search-results .index-sr .subtitle { padding: 2px 5px ; font-weight: normal ; font-size: 80% ; font-style: italic ; } |
||||
#search-results .index-sr .body { padding: 2px 5px 0 5px ; font-size: 80% ; } |
||||
#search-results .index-sr .content { color: #444 ; } |
||||
#search-results .index-sr .see-also { color: #444 ; } |
||||
#search-results .index-sr ul.rulerefs { margin-left: 1.2em ; } |
||||
#search-results .index-sr ul.rulerefs .caption { padding-right: 0.5em ; } |
||||
#search-results .index-sr .ruleid { margin-right: 0.25em ; font-style: italic ; color: #444 ; } |
||||
#search-results .index-sr .ruleid.unknown { color: #888 ; } |
||||
#search-results .index-sr .ruleid a { cursor: pointer ; } |
||||
|
Binary file not shown.
@ -1,15 +1,15 @@ |
||||
{ |
||||
|
||||
"A4.7": { "caption": "ADVANCE PHASE", "page_no": 1, "pos": [72,702] }, |
||||
"C13.8": { "caption": "BACKBLAST", "page_no": 1, "pos": [72,404] }, |
||||
"A3.8": { "caption": "CLOSE COMBAT PHASE (CCPh)", "page_no": 1, "pos": [72.97] }, |
||||
"A4.7": { "caption": "ADVANCE PHASE", "page_no": 1, "pos": [72,718] }, |
||||
"C13.8": { "caption": "BACKBLAST", "page_no": 1, "pos": [72,503] }, |
||||
"A3.8": { "caption": "CLOSE COMBAT PHASE (CCPh)", "page_no": 1, "pos": [72,292] }, |
||||
|
||||
"A4.5": { "caption": "DOUBLE TIME", "page_no": 2, "pos": [72,702] }, |
||||
"A19.1": { "caption": "EXPERIENCE LEVEL RATING (ELR)", "page_no": 2, "pos": [72.404] }, |
||||
"A1.21": { "caption": "FIREPOWER (FP)", "page_no": 2, "pos": [72,97] }, |
||||
"A4.5": { "caption": "DOUBLE TIME", "page_no": 2, "pos": [72,718] }, |
||||
"A19.1": { "caption": "EXPERIENCE LEVEL RATING (ELR)", "page_no": 2, "pos": [72,503] }, |
||||
"A1.21": { "caption": "FIREPOWER (FP)", "page_no": 2, "pos": [72,292] }, |
||||
|
||||
"A1.21": { "caption": "FIREPOWER (FP)", "page_no": 3, "pos": [72,702] }, |
||||
"E11.21": { "caption": "GAPS", "page_no": 3, "pos":[72,404] }, |
||||
"C8.3": { "caption": "HEAT (H)", "page_no": 3, "pos": [72,97] } |
||||
"E11.21": { "caption": "GAPS", "page_no": 3, "pos":[72,718] }, |
||||
"C8.3": { "caption": "HEAT (H)", "page_no": 3, "pos": [72,503] }, |
||||
"D1.4": { "caption": "IDENTITY & GROUND PRESSURE", "page_no": 3, "pos": [72,292] } |
||||
|
||||
} |
||||
|
@ -0,0 +1,298 @@ |
||||
""" Test search. """ |
||||
|
||||
import re |
||||
import logging |
||||
|
||||
from selenium.webdriver.common.keys import Keys |
||||
|
||||
from asl_rulebook2.utils import strip_html |
||||
from asl_rulebook2.webapp.search import load_search_config, _make_fts_query_string |
||||
from asl_rulebook2.webapp.tests.utils import init_webapp, select_tabbed_page, get_classes, \ |
||||
wait_for, find_child, find_children |
||||
|
||||
# --------------------------------------------------------------------- |
||||
|
||||
def test_search( webapp, webdriver ): |
||||
"""Test search.""" |
||||
|
||||
# initialize |
||||
webapp.control_tests.set_data_dir( "simple" ) |
||||
init_webapp( webapp, webdriver ) |
||||
|
||||
# test a search that finds nothing |
||||
results = _do_search( "oogah, boogah!" ) |
||||
assert results is None |
||||
|
||||
# test error handling |
||||
results = _do_search( "!:simulated-error:!" ) |
||||
assert "Simulated error." in results |
||||
|
||||
# do a search |
||||
results = _do_search( "enemy" ) |
||||
assert results == [ |
||||
{ "sr_type": "index", |
||||
"title": "CCPh", "subtitle": "Close Combat Phase", |
||||
"ruleids": [ "A3.8" ], |
||||
"rulerefs": [ |
||||
{ "caption": "((ENEMY)) Attacks", "ruleids": [ "S11.5" ] }, |
||||
{ "caption": "dropping SW before CC", "ruleids": [ "A4.43" ] }, |
||||
] |
||||
}, |
||||
{ "sr_type": "index", |
||||
"title": "Double Time", |
||||
"content": "Also known as \"running really fast.\"", |
||||
"see_also": [ "CX" ], |
||||
"ruleids": [ "A4.5-.51", "S6.222" ], |
||||
"rulerefs": [ |
||||
{ "caption": "((ENEMY)) Guard Automatic Action", "ruleids": [ "S6.303" ] }, |
||||
{ "ruleids": [ "C10.3" ] }, |
||||
{ "caption": "NA in Advance Phase", "ruleids": [ "A4.7" ] }, |
||||
{ "caption": "'S?' is \"<NA>\"" }, |
||||
] |
||||
}, |
||||
] |
||||
|
||||
# do another search |
||||
results = _do_search( "gap" ) |
||||
assert results == [ |
||||
{ "sr_type": "index", |
||||
"title": "((Gaps)), Convoy", |
||||
"ruleids": [ "E11.21" ], |
||||
}, |
||||
] |
||||
|
||||
# --------------------------------------------------------------------- |
||||
|
||||
def test_content_fixup( webapp, webdriver ): |
||||
"""Test fixing up of content returned by the search engine.""" |
||||
|
||||
# initialize |
||||
webapp.control_tests.set_data_dir( "simple" ) |
||||
init_webapp( webapp, webdriver ) |
||||
|
||||
# search for a fraction |
||||
results = _do_search( "3/4" ) |
||||
assert len(results) == 1 |
||||
assert results[0]["content"] == "HTML content: 2((\u00be)) MP" |
||||
|
||||
# search for something that ends with a hash |
||||
results = _do_search( "H#" ) |
||||
assert len(results) == 1 |
||||
assert results[0]["title"] == "((H#))" |
||||
|
||||
# search for "U.S." |
||||
results = _do_search( "U.S." ) |
||||
assert len(results) == 1 |
||||
assert results[0]["content"] == "The ((U.S.)) has lots of this." |
||||
|
||||
# --------------------------------------------------------------------- |
||||
|
||||
def test_targets( webapp, webdriver ): |
||||
"""Test clicking on search results.""" |
||||
|
||||
# initialize |
||||
webapp.control_tests.set_data_dir( "simple" ) |
||||
init_webapp( webapp, webdriver, no_content=1, add_empty_doc=1 ) |
||||
|
||||
def do_test( query_string, sel, expected ): |
||||
|
||||
# select the dummy document |
||||
select_tabbed_page( "#content", "empty" ) |
||||
|
||||
# do the search |
||||
_do_search( query_string ) |
||||
|
||||
# click on a target |
||||
elem = find_child( "#search-results {}".format( sel ) ) |
||||
elem.click() |
||||
def check_target(): |
||||
# check the active tab |
||||
if find_child( "#content .tab-strip .tab.active" ).get_attribute( "data-tabid" ) != "simple": |
||||
return False |
||||
# check the current target |
||||
elem = find_child( "#content .tabbed-page[data-tabid='simple'] .content-doc" ) |
||||
return elem.get_attribute( "data-target" ) == expected |
||||
wait_for( 2, check_target ) |
||||
|
||||
# do the tests |
||||
do_test( "CC", ".sr .ruleids .ruleid a", "A3.8" ) |
||||
do_test( "time", ".sr .rulerefs .ruleid a", "A4.7" ) |
||||
|
||||
# --------------------------------------------------------------------- |
||||
|
||||
def test_make_fts_query_string(): |
||||
"""Test generating the FTS query string.""" |
||||
|
||||
# initialize |
||||
load_search_config( logging.getLogger("_unknown_") ) |
||||
|
||||
def check( query, expected ): |
||||
fts_query_string, _ = _make_fts_query_string(query) |
||||
assert fts_query_string == expected |
||||
|
||||
# test some query strings |
||||
check( "", "" ) |
||||
check( "hello", "hello" ) |
||||
check( " hello, world! ", "hello AND world" ) |
||||
check( |
||||
"foo 1+2 A-T K# bar", |
||||
'foo AND "1+2" AND "a-t" AND "k#" AND bar' |
||||
) |
||||
check( |
||||
"a'b a''b", |
||||
"\"a'b\" AND \"a''b\"" |
||||
) |
||||
check( |
||||
'foo "set dc" bar', |
||||
'foo AND "set dc" AND bar' |
||||
) |
||||
|
||||
# test some quoted phrases |
||||
check( '""', '' ) |
||||
check( ' " " ', '' ) |
||||
check( |
||||
'"hello world"', |
||||
'"hello world"' |
||||
) |
||||
check( |
||||
' foo "hello world" bar ', |
||||
'foo AND "hello world" AND bar' |
||||
) |
||||
check( |
||||
' foo " xyz " bar ', |
||||
'foo AND xyz AND bar' |
||||
) |
||||
check( |
||||
' foo " xyz 123 " bar ', |
||||
'foo AND "xyz 123" AND bar' |
||||
) |
||||
|
||||
# test some incorrectly quoted phrases |
||||
check( '"', '' ) |
||||
check( ' " " " ', '' ) |
||||
check( ' a "b c d e', 'a AND "b c d e"' ) |
||||
check( ' a b" c d e ', 'a AND b AND c AND d AND e' ) |
||||
|
||||
# test pass-through |
||||
check( "AND", "AND" ) |
||||
check( " OR", "OR" ) |
||||
check( "OR ", "OR" ) |
||||
check( "foo OR bar", "foo OR bar" ) |
||||
check( "(a OR b)", "(a OR b)" ) |
||||
|
||||
# test search replacements |
||||
check( "1/2 3/4 3/8 5/8", '"½" AND "¾" AND "⅜" AND "⅝"' ) |
||||
check( "(r)", '"®"' ) |
||||
|
||||
# test search aliases |
||||
check( "entrenchment", "( ditch OR entrenchment OR foxhole OR trench )" ) |
||||
check( "entrenchments", "( ditch OR entrenchments OR foxhole OR trench )" ) |
||||
check( "foxhole", "foxhole" ) |
||||
|
||||
# test search synonyms |
||||
check( "armor", "( armor OR armour )" ) |
||||
check( "american big armor", '( america OR american OR "u.s." ) AND big AND ( armor OR armour )' ) |
||||
|
||||
# --------------------------------------------------------------------- |
||||
|
||||
def _do_search( query_string ): |
||||
"""Do a search.""" |
||||
|
||||
def get_seq_no(): |
||||
return find_child( "#search-results" ).get_attribute( "data-seqno" ) |
||||
|
||||
# submit the search |
||||
select_tabbed_page( "#nav", "search" ) |
||||
elem = find_child( "input#query-string" ) |
||||
elem.clear() |
||||
elem.send_keys( query_string ) |
||||
seq_no = get_seq_no() |
||||
elem.send_keys( Keys.RETURN ) |
||||
|
||||
# unload the results |
||||
wait_for( 2, lambda: get_seq_no() > seq_no ) |
||||
elem = find_child( "#search-results .error" ) |
||||
if elem: |
||||
return elem.text # nb: string = error message |
||||
elem = find_child( "#search-results .no-results" ) |
||||
if elem: |
||||
assert elem.text == "Nothing was found." |
||||
return None # nb: None = no results |
||||
results = _unload_search_results() |
||||
assert isinstance( results, list ) # nb: list = search results |
||||
return results |
||||
|
||||
def _unload_search_results(): |
||||
"""Unload the search results.""" |
||||
|
||||
def unload_elem( result, key, elem ): |
||||
"""Unload a single element.""" |
||||
if not elem: |
||||
return False |
||||
elem_text = get_elem_text( elem ) |
||||
if not elem_text: |
||||
return False |
||||
result[key] = elem_text |
||||
return True |
||||
|
||||
def get_elem_text( elem ): |
||||
"""Get the element's text content.""" |
||||
val = elem.get_attribute( "innerHTML" ) |
||||
# change how highlighted content is represented |
||||
matches = list( re.finditer( r'<span class="hilite">(.*?)</span>', val ) ) |
||||
for mo in reversed(matches): |
||||
val = val[:mo.start()] + "((" + mo.group(1) + "))" + val[mo.end():] |
||||
# remove HTML tags |
||||
return strip_html( val.strip() ) |
||||
|
||||
def unload_ruleids( result, key, parent ): |
||||
"""Unload a list of ruleid's.""" |
||||
if not parent: |
||||
return |
||||
ruleids = [] |
||||
for elem in find_children( ".ruleid", parent ): |
||||
ruleid = get_elem_text( elem ) |
||||
assert ruleid.startswith( "[" ) and ruleid.endswith( "]" ) |
||||
ruleids.append( ruleid[1:-1] ) |
||||
if ruleids: |
||||
result[key] = ruleids |
||||
|
||||
def unload_rulerefs( result, key, parent ): |
||||
"""Unload a list of ruleref's.""" |
||||
if not parent: |
||||
return |
||||
rulerefs = [] |
||||
for elem in find_children( "li", parent ): |
||||
ruleref = {} |
||||
unload_elem( ruleref, "caption", find_child(".caption",elem) ) |
||||
unload_ruleids( ruleref, "ruleids", elem ) |
||||
rulerefs.append( ruleref ) |
||||
if rulerefs: |
||||
result[key] = rulerefs |
||||
|
||||
def unload_index_sr( sr ): #pylint: disable=possibly-unused-variable |
||||
"""Unload an "index" search result.""" |
||||
result = {} |
||||
unload_elem( result, "title", find_child("span.title",sr) ) |
||||
unload_elem( result, "subtitle", find_child(".subtitle",sr) ) |
||||
unload_elem( result, "content", find_child(".content",sr) ) |
||||
if unload_elem( result, "see_also", find_child(".see-also",sr) ): |
||||
assert result["see_also"].startswith( "See also:" ) |
||||
result["see_also"] = [ s.strip() for s in result["see_also"][9:].split( "," ) ] |
||||
unload_ruleids( result, "ruleids", find_child(".ruleids",sr) ) |
||||
unload_rulerefs( result, "rulerefs", find_child(".rulerefs",sr) ) |
||||
return result |
||||
|
||||
# unload the search results |
||||
results = [] |
||||
for sr in find_children( "#search-results .sr"): |
||||
classes = get_classes( sr ) |
||||
classes.remove( "sr" ) |
||||
assert len(classes) == 1 and classes[0].endswith( "-sr" ) |
||||
sr_type = classes[0][:-3] |
||||
func = locals()[ "unload_{}_sr".format( sr_type ) ] |
||||
sr = func( sr ) |
||||
sr["sr_type"] = sr_type |
||||
results.append( sr ) |
||||
|
||||
return results |
@ -0,0 +1,66 @@ |
||||
#!/usr/bin/env python3 |
||||
""" Add named destinations to a PDF file. """ |
||||
|
||||
import subprocess |
||||
import json |
||||
import time |
||||
import datetime |
||||
|
||||
import click |
||||
|
||||
from asl_rulebook2.utils import TempFile |
||||
|
||||
# --------------------------------------------------------------------- |
||||
|
||||
@click.command() |
||||
@click.argument( "pdf_file", nargs=1, type=click.Path(exists=True,dir_okay=False) ) |
||||
@click.option( "--title", help="Document title." ) |
||||
@click.option( "--targets","-t","targets_fname", required=True, type=click.Path(dir_okay=False), |
||||
help="Target definition file." |
||||
) |
||||
@click.option( "--yoffset", default=5, help="Offset to add to y co-ordinates." ) |
||||
@click.option( "--output","-o","output_fname", required=True, type=click.Path(dir_okay=False), |
||||
help="Output PDF file." |
||||
) |
||||
@click.option( "--gs","gs_path", default="gs", help="Path to the Ghostscript executable." ) |
||||
def main( pdf_file, title, targets_fname, yoffset, output_fname, gs_path ): |
||||
"""Add named destinations to a PDF file.""" |
||||
|
||||
# load the targets |
||||
with open( targets_fname, "r" ) as fp: |
||||
targets = json.load( fp ) |
||||
|
||||
with TempFile( mode="w" ) as temp_file: |
||||
|
||||
# generate the pdfmarks |
||||
print( "Generating the pdfmarks..." ) |
||||
if title: |
||||
print( "[ /Title ({})".format( title ), file=temp_file ) |
||||
else: |
||||
print( "[", file=temp_file ) |
||||
print( " /DOCINFO pdfmark", file=temp_file ) |
||||
print( file=temp_file ) |
||||
for ruleid, target in targets.items(): |
||||
xpos, ypos = target["pos"] |
||||
print( "[ /Dest /{} /Page {} /View [/XYZ {} {}] /DEST pdfmark".format( |
||||
ruleid, target["page_no"], xpos, ypos+yoffset |
||||
), file=temp_file ) |
||||
print( file=temp_file ) |
||||
temp_file.close( delete=False ) |
||||
|
||||
# generate the pdfmark'ed document |
||||
print( "Generating the pdfmark'ed document..." ) |
||||
print( "- {} => {}".format( pdf_file, output_fname ) ) |
||||
args = [ gs_path, "-q", "-dBATCH", "-dNOPAUSE", "-sDEVICE=pdfwrite" ] |
||||
args.extend( [ "-o", output_fname ] ) |
||||
args.extend( [ "-f", pdf_file ] ) |
||||
args.append( temp_file.name ) |
||||
start_time = time.time() |
||||
subprocess.run( args, check=True ) |
||||
elapsed_time = time.time() - start_time |
||||
print( "- Elapsed time: {}".format( datetime.timedelta(seconds=int(elapsed_time)) ) ) |
||||
|
||||
# --------------------------------------------------------------------- |
||||
|
||||
if __name__ == "__main__": |
||||
main() #pylint: disable=no-value-for-parameter |
Loading…
Reference in new issue