parent
9d2495aa64
commit
b387871bbe
@ -0,0 +1,25 @@ |
|||||||
|
{ |
||||||
|
|
||||||
|
"_comment_": "This file defines search aliases.", |
||||||
|
"_comment_": "Keys that appear in a query string will match itself or any of its associated values.", |
||||||
|
"_comment_": " e.g. searching for 'entrenchments' will actually search for 'entrenchments OR foxhole OR trench OR ditch'", |
||||||
|
"_comment_": "These differ from search synonyms in that only the key word will trigger the replacement, not any word from the set.", |
||||||
|
"_comment_": "A user-defined version of this file in the data directory will also be loaded.", |
||||||
|
|
||||||
|
"latw": [ |
||||||
|
"atmm", "atr", "baz", "mol-p", "mol-projector", "piat", "pf", "pfk", "psk" |
||||||
|
], |
||||||
|
"fortification/foritifcations": [ |
||||||
|
"cave", "a-t ditch", "foxhole", "sangar", "trench", "bunker", "minefield", "mines", "booby trap", "panji", "pillbox", "roadblock", "tetrahedron", "wire" |
||||||
|
], |
||||||
|
"entrenchment/entrenchments": [ |
||||||
|
"foxhole", "trench", "ditch" |
||||||
|
], |
||||||
|
"vehicle/vehicles": [ |
||||||
|
"tank", "halftrack", "half-track", "jeep", "carrier" |
||||||
|
], |
||||||
|
"illumination": [ |
||||||
|
"tarshell", "illuminating round", "trip flare" |
||||||
|
] |
||||||
|
|
||||||
|
} |
@ -0,0 +1,14 @@ |
|||||||
|
{ |
||||||
|
|
||||||
|
"_comment_": "This file defines search replacements.", |
||||||
|
"_comment_": "Keys that appear in a query string will be replaced by the value.", |
||||||
|
"_comment_": " e.g. searching for '1/2 MF' will actually search for '½ MF'", |
||||||
|
"_comment_": "A user-defined version of this file in the data directory will also be loaded.", |
||||||
|
|
||||||
|
"1/2": "½", |
||||||
|
"3/4": "¾", |
||||||
|
"3/8": "⅜", |
||||||
|
"5/8": "⅝", |
||||||
|
"(r)": "®" |
||||||
|
|
||||||
|
} |
@ -0,0 +1,51 @@ |
|||||||
|
[ |
||||||
|
|
||||||
|
"This file defines search synonyms.", |
||||||
|
"If a word appears in a query string, it will match any of the words in its set.", |
||||||
|
" e.g. searching for 'finn gun' will actually search for '(finn OR finnish) AND gun'", |
||||||
|
"These differ from search aliases in that any word from a set will trigger the replacement.", |
||||||
|
"A user-defined version of this file in the data directory will also be loaded.", |
||||||
|
|
||||||
|
[ "u.s.", "america", "american" ], |
||||||
|
[ "usmc", "marine" ], |
||||||
|
[ "finn", "finnish" ], |
||||||
|
[ "romania", "romanian" ], |
||||||
|
[ "hungary", "hungarian" ], |
||||||
|
[ "slovakia", "slovakian" ], |
||||||
|
[ "croatia", "croatian" ], |
||||||
|
[ "bulgaria", "bulgarian" ], |
||||||
|
|
||||||
|
[ "dc", "demo charge", "demolition charge" ], |
||||||
|
[ "ft", "flamethrower", "flame-thrower" ], |
||||||
|
[ "baz", "bazooka" ], |
||||||
|
[ "pf", "panzerfaust" ], |
||||||
|
[ "psk", "panzershreck" ], |
||||||
|
[ "wp", "white phosphorous" ], |
||||||
|
[ "mol", "molotov cocktail" ], |
||||||
|
[ "ovr", "overrun" ], |
||||||
|
[ "cc", "close combat" ], |
||||||
|
[ "thh", "t-h hero", "tank-hunter hero" ], |
||||||
|
[ "scw", "shaped-charge weapon" ], |
||||||
|
[ "sw", "support weapon" ], |
||||||
|
[ "mg", "machinegun", "machine-gun", "machine gun" ], |
||||||
|
[ "firelane", "fire-lane", "fire lane" ], |
||||||
|
[ "firegroup", "fire-group", "fire group" ], |
||||||
|
[ "lc", "landing craft" ], |
||||||
|
[ "ht", "halftrack", "half-track" ], |
||||||
|
[ "wa", "wall advantage" ], |
||||||
|
[ "hob", "heat of battle" ], |
||||||
|
[ "cg", "campaign game" ], |
||||||
|
[ "pbm", "pbem" ], |
||||||
|
|
||||||
|
[ "rb", "red barricades" ], |
||||||
|
[ "votg", "valor of the guards" ], |
||||||
|
[ "kgp", "kampfgrupper peiper" ], |
||||||
|
[ "kgs", "kampfgrupper scherer" ], |
||||||
|
[ "brt", "br:t", "blood reef tarawa" ], |
||||||
|
[ "pb", "pegasus bridge" ], |
||||||
|
|
||||||
|
[ "ammo", "ammunition" ], |
||||||
|
[ "armor", "armour" ], |
||||||
|
[ "color", "colour" ] |
||||||
|
|
||||||
|
] |
@ -0,0 +1,475 @@ |
|||||||
|
""" Manage the search engine. """ |
||||||
|
|
||||||
|
import os |
||||||
|
import sqlite3 |
||||||
|
import json |
||||||
|
import re |
||||||
|
import itertools |
||||||
|
import string |
||||||
|
import tempfile |
||||||
|
import logging |
||||||
|
import traceback |
||||||
|
|
||||||
|
from flask import request, jsonify |
||||||
|
|
||||||
|
from asl_rulebook2.utils import plural |
||||||
|
from asl_rulebook2.webapp import app |
||||||
|
from asl_rulebook2.webapp import content as webapp_content |
||||||
|
from asl_rulebook2.webapp.utils import make_config_path, make_data_path |
||||||
|
|
||||||
|
_sqlite_path = None |
||||||
|
_fts_index_entries= None |
||||||
|
|
||||||
|
_logger = logging.getLogger( "search" ) |
||||||
|
|
||||||
|
# these are used to highlight search matches (nb: the front-end looks for these) |
||||||
|
_BEGIN_HIGHLIGHT = "!@:" |
||||||
|
_END_HIGHLIGHT = ":@!" |
||||||
|
|
||||||
|
# NOTE: These regex's fix up content returned to us by the SQLite search engine (typically problems |
||||||
|
# with highlighting search terms). |
||||||
|
_FIXUP_TEXT_REGEXES = [ |
||||||
|
[ re.compile( fixup[0].format( _BEGIN_HIGHLIGHT, _END_HIGHLIGHT ) ), |
||||||
|
fixup[1].format( _BEGIN_HIGHLIGHT, _END_HIGHLIGHT ) |
||||||
|
] |
||||||
|
for fixup in [ |
||||||
|
[ r"&{}(.+?){};", r"{}&\g<1>;{}" ], # HTML entities e.g. &((frac12)); -> (($frac12;)) |
||||||
|
[ r"{}(.+?){}#", r"{}\g<1>#{}" ], # e.g. ((TH)# -> ((TH#) |
||||||
|
[ r"{}U\.S{}\.", "{}U.S.{}" ], # ((U.S)). -> ((U.S.)) |
||||||
|
] |
||||||
|
] |
||||||
|
|
||||||
|
# these are used to separate ruleref's in the FTS table (internal use only) |
||||||
|
_RULEREF_SEPARATOR = "-:-" |
||||||
|
|
||||||
|
_SEARCH_TERM_ADJUSTMENTS = None |
||||||
|
|
||||||
|
# --------------------------------------------------------------------- |
||||||
|
|
||||||
|
@app.route( "/search", methods=["POST"] ) |
||||||
|
def search() : |
||||||
|
"""Run a search.""" |
||||||
|
|
||||||
|
# log the request |
||||||
|
_logger.info( "SEARCH REQUEST:" ) |
||||||
|
args = dict( request.form.items() ) |
||||||
|
for key,val in args.items(): |
||||||
|
_logger.info( "- %s: %s", key, val ) |
||||||
|
|
||||||
|
# run the search |
||||||
|
try: |
||||||
|
return _do_search( args ) |
||||||
|
except Exception as exc: #pylint: disable=broad-except |
||||||
|
msg = str( exc ) |
||||||
|
if msg.startswith( "fts5: " ): |
||||||
|
msg = msg[5:] # nb: this is a sqlite3.OperationalError |
||||||
|
_logger.warning( "SEARCH ERROR: %s\n%s", args, traceback.format_exc() ) |
||||||
|
return jsonify( { "error": msg } ) |
||||||
|
|
||||||
|
def _do_search( args ): |
||||||
|
|
||||||
|
def fixup_text( val ): |
||||||
|
if val is None: |
||||||
|
return None |
||||||
|
for regex in _FIXUP_TEXT_REGEXES: |
||||||
|
val = regex[0].sub( regex[1], val ) |
||||||
|
return val |
||||||
|
|
||||||
|
# run the search |
||||||
|
query_string = args[ "queryString" ].strip() |
||||||
|
if query_string == "!:simulated-error:!": |
||||||
|
raise RuntimeError( "Simulated error." ) # nb: for the test suite |
||||||
|
fts_query_string, search_terms = _make_fts_query_string( query_string ) |
||||||
|
_logger.debug( "FTS query string: %s", fts_query_string ) |
||||||
|
conn = sqlite3.connect( _sqlite_path ) |
||||||
|
def highlight( n ): |
||||||
|
# NOTE: highlight() is an FTS extension function, and takes column numbers :-/ |
||||||
|
return "highlight(searchable,{},'{}','{}')".format( n, _BEGIN_HIGHLIGHT, _END_HIGHLIGHT ) |
||||||
|
sql = "SELECT rowid,doc_id,sr_type,rank,{},{},{},{} FROM searchable".format( |
||||||
|
highlight(2), highlight(3), highlight(4), highlight(5) |
||||||
|
) |
||||||
|
sql += " WHERE searchable MATCH ?" |
||||||
|
sql += " ORDER BY rank" |
||||||
|
curs = conn.execute( sql, |
||||||
|
( "{title subtitle content rulerefs}: " + fts_query_string, ) |
||||||
|
) |
||||||
|
|
||||||
|
def get_col( sr, key, val ): |
||||||
|
if val: |
||||||
|
sr[key] = fixup_text( val ) |
||||||
|
|
||||||
|
# get the results |
||||||
|
results = [] |
||||||
|
for row in curs: |
||||||
|
if row[2] != "index": |
||||||
|
_logger.error( "Unknown searchable row type (rowid=%d): %s", row[0], row[2] ) |
||||||
|
continue |
||||||
|
index_entry = _fts_index_entries[ row[0] ] |
||||||
|
result = { |
||||||
|
"doc_id": row[1], |
||||||
|
"sr_type": row[2], |
||||||
|
"_score": - row[3], |
||||||
|
} |
||||||
|
get_col( result, "title", row[4] ) |
||||||
|
get_col( result, "subtitle", row[5] ) |
||||||
|
get_col( result, "content", row[6] ) |
||||||
|
if index_entry.get( "ruleids" ): |
||||||
|
result["ruleids"] = index_entry["ruleids"] |
||||||
|
if index_entry.get( "see_also" ): |
||||||
|
result["see_also"] = index_entry["see_also"] |
||||||
|
rulerefs = [ r.strip() for r in row[7].split(_RULEREF_SEPARATOR) ] if row[7] else [] |
||||||
|
assert len(rulerefs) == len(index_entry.get("rulerefs",[])) |
||||||
|
if rulerefs: |
||||||
|
result[ "rulerefs" ] = [] |
||||||
|
for i, ruleref in enumerate(rulerefs): |
||||||
|
ruleref2 = {} |
||||||
|
if "caption" in index_entry["rulerefs"][i]: |
||||||
|
assert ruleref.replace( _BEGIN_HIGHLIGHT, "" ).replace( _END_HIGHLIGHT, "" ) \ |
||||||
|
== index_entry["rulerefs"][i]["caption"] |
||||||
|
ruleref2["caption"] = fixup_text( ruleref ) |
||||||
|
if "ruleids" in index_entry["rulerefs"][i]: |
||||||
|
ruleref2["ruleids"] = index_entry["rulerefs"][i]["ruleids"] |
||||||
|
assert ruleref2 |
||||||
|
result["rulerefs"].append( ruleref2 ) |
||||||
|
results.append( result ) |
||||||
|
|
||||||
|
# fixup the results |
||||||
|
results = _fixup_results_for_hash_terms( results, search_terms ) |
||||||
|
|
||||||
|
# adjust the sort order |
||||||
|
results = _adjust_sort_order( results ) |
||||||
|
|
||||||
|
# return the results |
||||||
|
_logger.debug( "Search results:" if len(results) > 0 else "Search results: none" ) |
||||||
|
for result in results: |
||||||
|
_logger.debug( "- %s (%.3f)", |
||||||
|
result["title"].replace( _BEGIN_HIGHLIGHT, "" ).replace( _END_HIGHLIGHT, "" ), |
||||||
|
result["_score"] |
||||||
|
) |
||||||
|
return jsonify( results ) |
||||||
|
|
||||||
|
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
||||||
|
|
||||||
|
PASSTHROUGH_REGEXES = set([ |
||||||
|
re.compile( r"\bAND\b" ), |
||||||
|
re.compile( r"\bOR\b" ), |
||||||
|
re.compile( r"\bNOT\b" ), |
||||||
|
re.compile( r"\((?![Rr]\))" ), |
||||||
|
]) |
||||||
|
|
||||||
|
def _make_fts_query_string( query_string ): |
||||||
|
"""Generate the SQLite query string. |
||||||
|
|
||||||
|
SQLite's MATCH function recognizes a lot of special characters, which need |
||||||
|
to be enclosed in double-quotes to disable. |
||||||
|
""" |
||||||
|
|
||||||
|
# check if this looks like a raw FTS query |
||||||
|
if any( regex.search(query_string) for regex in PASSTHROUGH_REGEXES ): |
||||||
|
return query_string.strip(), None |
||||||
|
|
||||||
|
# split the search string into words (taking quoted phrases into account) |
||||||
|
ignore = app.config.get( "SQLITE_FTS_IGNORE_CHARS", ",;!?$" ) |
||||||
|
query_string = "".join( ch for ch in query_string if ch not in ignore ) |
||||||
|
terms = query_string.lower().split() |
||||||
|
i = 0 |
||||||
|
while True: |
||||||
|
if i >= len(terms): |
||||||
|
break |
||||||
|
if i > 0 and terms[i-1].startswith( '"' ): |
||||||
|
terms[i-1] += " {}".format( terms[i] ) |
||||||
|
del terms[i] |
||||||
|
if terms[i-1].startswith( '"' ) and terms[i-1].endswith( '"' ): |
||||||
|
terms[i-1] = terms[i-1][1:-1] |
||||||
|
continue |
||||||
|
i += 1 |
||||||
|
|
||||||
|
# clean up quoted phrases |
||||||
|
terms = [ t[1:] if t.startswith('"') else t for t in terms ] |
||||||
|
terms = [ t[:-1] if t.endswith('"') else t for t in terms ] |
||||||
|
terms = [ t.strip() for t in terms ] |
||||||
|
terms = [ t for t in terms if t ] |
||||||
|
|
||||||
|
# adjust search terms |
||||||
|
for term_no, term in enumerate(terms): |
||||||
|
aliases = _SEARCH_TERM_ADJUSTMENTS.get( term ) |
||||||
|
if not aliases: |
||||||
|
continue |
||||||
|
if isinstance( aliases, str ): |
||||||
|
# the search term is replaced by a new one |
||||||
|
terms[ term_no ] = aliases |
||||||
|
elif isinstance( aliases, set ): |
||||||
|
# the search term is replaced by multiple new ones (that will be OR'ed together) |
||||||
|
# NOTE: We sort the terms so that the tests will work reliably. |
||||||
|
terms[ term_no ] = sorted( aliases ) |
||||||
|
else: |
||||||
|
assert "Unknown search alias type: {}".format( type(aliases) ) |
||||||
|
|
||||||
|
# fixup each term |
||||||
|
def has_special_char( term ): |
||||||
|
"""Check if the term contains any special characters.""" |
||||||
|
for ch in term: |
||||||
|
if ch in "*": |
||||||
|
continue |
||||||
|
if ch.isspace() or ch in string.punctuation: |
||||||
|
return True |
||||||
|
if ord(ch) < 32 or ord(ch) > 127: |
||||||
|
return True |
||||||
|
return False |
||||||
|
def fixup_terms( terms ): |
||||||
|
"""Fixup a list of terms.""" |
||||||
|
for term_no, term in enumerate(terms): |
||||||
|
if isinstance( term, str ): |
||||||
|
if has_special_char( term ): |
||||||
|
terms[term_no] = '"{}"'.format( term ) |
||||||
|
else: |
||||||
|
fixup_terms( term ) |
||||||
|
fixup_terms( terms ) |
||||||
|
|
||||||
|
# return the final FTS query string |
||||||
|
def term_string( term ): |
||||||
|
if isinstance( term, str ): |
||||||
|
return term |
||||||
|
assert isinstance( term, list ) |
||||||
|
return "( {} )".format( " OR ".join( term ) ) |
||||||
|
return " AND ".join( term_string(t) for t in terms ), terms |
||||||
|
|
||||||
|
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
||||||
|
|
||||||
|
def _fixup_results_for_hash_terms( results, search_terms ): |
||||||
|
"""Fixup search results for search terms that end with a hash. |
||||||
|
|
||||||
|
SQLite doesn't handle search terms that end with a hash particularly well. |
||||||
|
We correct highlighted search terms in fixup_text(), but searching for e.g. "US#" |
||||||
|
will also match "use" and "using" - we remove such results here. |
||||||
|
""" |
||||||
|
|
||||||
|
# figure out which search terms end with a hash |
||||||
|
# NOTE: We don't bother descending down into sub-terms. |
||||||
|
if not search_terms: |
||||||
|
return results |
||||||
|
terms = [ |
||||||
|
t[1:-1] for t in search_terms |
||||||
|
if isinstance(t,str) and t.startswith('"') and t.endswith('"') |
||||||
|
] |
||||||
|
terms = [ |
||||||
|
t[:-1].lower() for t in terms |
||||||
|
if isinstance(t,str) and t.endswith("#") |
||||||
|
] |
||||||
|
if not terms: |
||||||
|
return results |
||||||
|
if "us" in terms: |
||||||
|
terms.extend( [ "use", "used", "using", "user" ] ) |
||||||
|
|
||||||
|
def keep( sr ): |
||||||
|
# remove every incorrectly matched search term (e.g. ((K)) when searching for "K#") |
||||||
|
buf = json.dumps( sr ).lower() |
||||||
|
for term in terms: |
||||||
|
buf = buf.replace( "{}{}{}".format( _BEGIN_HIGHLIGHT, term, _END_HIGHLIGHT ), "_removed_" ) |
||||||
|
# we keep this search result if there are still some highlighted search terms |
||||||
|
return _BEGIN_HIGHLIGHT in buf |
||||||
|
|
||||||
|
return [ |
||||||
|
result for result in results if keep(result) |
||||||
|
] |
||||||
|
|
||||||
|
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
||||||
|
|
||||||
|
def _adjust_sort_order( results ): |
||||||
|
"""Adjust the sort order of the search results.""" |
||||||
|
|
||||||
|
results2 = [] |
||||||
|
def extract_sr( func ): |
||||||
|
# move results that pass the filter function to the new list |
||||||
|
i = 0 |
||||||
|
while True: |
||||||
|
if i >= len(results): |
||||||
|
break |
||||||
|
# NOTE: We never prefer small entries (i.e .have no ruleref's) |
||||||
|
# e.g. those that only contain a "see also". |
||||||
|
if func( results[i] ) and len(results[i].get("rulerefs",[])) > 0: |
||||||
|
results2.append( results[i] ) |
||||||
|
del results[i] |
||||||
|
else: |
||||||
|
i += 1 |
||||||
|
|
||||||
|
def get( sr, key ): |
||||||
|
val = sr.get( key ) |
||||||
|
return val if val else "" |
||||||
|
|
||||||
|
# prefer search results whose title is an exact match |
||||||
|
extract_sr( |
||||||
|
lambda sr: get(sr,"title").startswith( _BEGIN_HIGHLIGHT ) and get(sr,"title").endswith( _END_HIGHLIGHT ) |
||||||
|
) |
||||||
|
# prefer search results whose title starts with a match |
||||||
|
extract_sr( |
||||||
|
lambda sr: get(sr,"title").startswith( _BEGIN_HIGHLIGHT ) |
||||||
|
) |
||||||
|
# prefer search results that have a match in the title |
||||||
|
extract_sr( |
||||||
|
lambda sr: _BEGIN_HIGHLIGHT in get(sr,"title") |
||||||
|
) |
||||||
|
# prefer search results that have a match in the subtitle |
||||||
|
extract_sr( |
||||||
|
lambda sr: _BEGIN_HIGHLIGHT in get(sr,"subtitle") |
||||||
|
) |
||||||
|
|
||||||
|
# include any remaining search results |
||||||
|
results2.extend( results ) |
||||||
|
|
||||||
|
return results2 |
||||||
|
|
||||||
|
# --------------------------------------------------------------------- |
||||||
|
|
||||||
|
def init_search( logger ): |
||||||
|
"""Initialize the search engine.""" |
||||||
|
|
||||||
|
# initialize |
||||||
|
global _fts_index_entries |
||||||
|
_fts_index_entries = {} |
||||||
|
|
||||||
|
# initialize the database |
||||||
|
global _sqlite_path |
||||||
|
_sqlite_path = app.config.get( "SQLITE_PATH" ) |
||||||
|
if not _sqlite_path: |
||||||
|
# FUDGE! We should be able to create a shared, in-memory database using this: |
||||||
|
# file::XYZ:?mode=memory&cache=shared |
||||||
|
# but it doesn't seem to work (on Linux) and ends up creating a file with this name :-/ |
||||||
|
# We manually create a temp file, which has to have the same name each time, so that we don't |
||||||
|
# keep creating a new database each time we start up. Sigh... |
||||||
|
_sqlite_path = os.path.join( tempfile.gettempdir(), "asl-rulebook2.searchdb" ) |
||||||
|
if os.path.isfile( _sqlite_path ): |
||||||
|
os.unlink( _sqlite_path ) |
||||||
|
logger.info( "Creating the search index: %s", _sqlite_path ) |
||||||
|
conn = sqlite3.connect( _sqlite_path ) |
||||||
|
# NOTE: Storing everything in a single table allows FTS to rank search results based on |
||||||
|
# the overall content, and also lets us do AND/OR queries across all searchable content. |
||||||
|
conn.execute( |
||||||
|
"CREATE VIRTUAL TABLE searchable USING fts5" |
||||||
|
" ( doc_id, sr_type, title, subtitle, content, rulerefs, tokenize='porter unicode61' )" |
||||||
|
) |
||||||
|
|
||||||
|
# load the searchable content |
||||||
|
logger.info( "Loading the search index..." ) |
||||||
|
conn.execute( "DELETE FROM searchable" ) |
||||||
|
curs = conn.cursor() |
||||||
|
for cdoc in webapp_content.content_docs.values(): |
||||||
|
logger.info( "- Loading index file: %s", cdoc["_fname"] ) |
||||||
|
nrows = 0 |
||||||
|
for index_entry in cdoc["index"]: |
||||||
|
rulerefs = _RULEREF_SEPARATOR.join( r.get("caption","") for r in index_entry.get("rulerefs",[]) ) |
||||||
|
# NOTE: We should really strip content before adding it to the search index, otherwise any HTML tags |
||||||
|
# will need to be included in search terms. However, this means that the content returned by a query |
||||||
|
# will be this stripped content. We could go back to the original data to get the original HTML content, |
||||||
|
# but that means we would lose the highlighting of search terms that SQLite gives us. We opt to insert |
||||||
|
# the original content, since none of it should contain HTML, anyway. |
||||||
|
curs.execute( |
||||||
|
"INSERT INTO searchable (doc_id,sr_type,title,subtitle,content,rulerefs) VALUES (?,?,?,?,?,?)", ( |
||||||
|
cdoc["doc_id"], "index", |
||||||
|
index_entry.get("title"), index_entry.get("subtitle"), index_entry.get("content"), rulerefs |
||||||
|
) ) |
||||||
|
_fts_index_entries[ curs.lastrowid ] = index_entry |
||||||
|
index_entry["_fts_rowid"] = curs.lastrowid |
||||||
|
nrows += 1 |
||||||
|
conn.commit() |
||||||
|
logger.info( " - Loaded %s.", plural(nrows,"index entry","index entries"), ) |
||||||
|
assert len(_fts_index_entries) == _get_row_count( conn, "searchable" ) |
||||||
|
|
||||||
|
# load the search config |
||||||
|
load_search_config( logger ) |
||||||
|
|
||||||
|
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
||||||
|
|
||||||
|
def load_search_config( logger ): |
||||||
|
"""Load the search config.""" |
||||||
|
|
||||||
|
# initialize |
||||||
|
global _SEARCH_TERM_ADJUSTMENTS |
||||||
|
_SEARCH_TERM_ADJUSTMENTS = {} |
||||||
|
|
||||||
|
def add_search_term_adjustment( key, vals ): |
||||||
|
# make sure everything is lower-case |
||||||
|
key = key.lower() |
||||||
|
if isinstance( vals, str ): |
||||||
|
vals = vals.lower() |
||||||
|
elif isinstance( vals, set ): |
||||||
|
vals = set( v.lower() for v in vals ) |
||||||
|
else: |
||||||
|
assert "Unknown search alias type: {}".format( type(vals) ) |
||||||
|
# add new the search term adjustment |
||||||
|
if key not in _SEARCH_TERM_ADJUSTMENTS: |
||||||
|
_SEARCH_TERM_ADJUSTMENTS[ key ] = vals |
||||||
|
else: |
||||||
|
# found a multiple definition - try to do something sensible |
||||||
|
logger.warning( " - Duplicate search alias: %s\n- current aliases = %s\n- new aliases = %s", key, |
||||||
|
_SEARCH_TERM_ADJUSTMENTS[key], vals |
||||||
|
) |
||||||
|
if isinstance( _SEARCH_TERM_ADJUSTMENTS[key], str ): |
||||||
|
_SEARCH_TERM_ADJUSTMENTS[ key ] = vals |
||||||
|
else: |
||||||
|
assert isinstance( _SEARCH_TERM_ADJUSTMENTS[key], set ) |
||||||
|
_SEARCH_TERM_ADJUSTMENTS[ key ].update( vals ) |
||||||
|
|
||||||
|
# load the search replacements |
||||||
|
def load_search_replacements( fname ): |
||||||
|
if not os.path.isfile( fname ): |
||||||
|
return |
||||||
|
logger.info( "Loading search replacements: %s", fname ) |
||||||
|
with open( fname, "r", encoding="utf-8" ) as fp: |
||||||
|
data = json.load( fp ) |
||||||
|
nitems = 0 |
||||||
|
for key, val in data.items(): |
||||||
|
if key.startswith( "_" ): |
||||||
|
continue # nb: ignore comments |
||||||
|
logger.debug( "- %s -> %s", key, val ) |
||||||
|
add_search_term_adjustment( key, val ) |
||||||
|
nitems += 1 |
||||||
|
logger.info( "- Loaded %s.", plural(nitems,"search replacement","search replacements") ) |
||||||
|
load_search_replacements( make_config_path( "search-replacements.json" ) ) |
||||||
|
load_search_replacements( make_data_path( "search-replacements.json" ) ) |
||||||
|
|
||||||
|
# load the search aliases |
||||||
|
def load_search_aliases( fname ): |
||||||
|
if not os.path.isfile( fname ): |
||||||
|
return |
||||||
|
logger.info( "Loading search aliases: %s", fname ) |
||||||
|
with open( fname, "r", encoding="utf-8" ) as fp: |
||||||
|
data = json.load( fp ) |
||||||
|
nitems = 0 |
||||||
|
for keys, aliases in data.items(): |
||||||
|
if keys.startswith( "_" ): |
||||||
|
continue # nb: ignore comments |
||||||
|
logger.debug( "- %s -> %s", keys, " ; ".join(aliases) ) |
||||||
|
for key in keys.split( "/" ): |
||||||
|
add_search_term_adjustment( key, set( itertools.chain( aliases, [key] ) ) ) |
||||||
|
nitems += 1 |
||||||
|
logger.info( "- Loaded %s.", plural(nitems,"search aliases","search aliases") ) |
||||||
|
load_search_aliases( make_config_path( "search-aliases.json" ) ) |
||||||
|
load_search_aliases( make_data_path( "search-aliases.json" ) ) |
||||||
|
|
||||||
|
# load the search synonyms |
||||||
|
def load_search_synonyms( fname ): |
||||||
|
if not os.path.isfile( fname ): |
||||||
|
return |
||||||
|
logger.info( "Loading search synonyms: %s", fname ) |
||||||
|
with open( fname, "r", encoding="utf-8" ) as fp: |
||||||
|
data = json.load( fp ) |
||||||
|
nitems = 0 |
||||||
|
for synonyms in data: |
||||||
|
if isinstance( synonyms, str ): |
||||||
|
continue # nb: ignore comments |
||||||
|
logger.debug( "- %s", " ; ".join(synonyms) ) |
||||||
|
synonyms = set( synonyms ) |
||||||
|
for term in synonyms: |
||||||
|
add_search_term_adjustment( term, synonyms ) |
||||||
|
nitems += 1 |
||||||
|
logger.info( "- Loaded %s.", plural(nitems,"search synonym","search synonyms") ) |
||||||
|
load_search_synonyms( make_config_path( "search-synonyms.json" ) ) |
||||||
|
load_search_synonyms( make_data_path( "search-synonyms.json" ) ) |
||||||
|
|
||||||
|
# --------------------------------------------------------------------- |
||||||
|
|
||||||
|
def _get_row_count( conn, table_name ): |
||||||
|
"""Get the number of rows in a table.""" |
||||||
|
cur = conn.execute( "SELECT count(*) FROM {}".format( table_name ) ) |
||||||
|
return cur.fetchone()[0] |
@ -1,23 +1,80 @@ |
|||||||
import { gMainApp } from "./MainApp.js" ; |
import { gMainApp, gEventBus, gContentDocs } from "./MainApp.js" ; |
||||||
|
import { fixupSearchHilites } from "./utils.js" ; |
||||||
|
|
||||||
// --------------------------------------------------------------------
|
// --------------------------------------------------------------------
|
||||||
|
|
||||||
export class IndexSearchResult { |
|
||||||
constructor( key, content ) { |
|
||||||
this.key = key ; |
|
||||||
this.srType = "index" ; |
|
||||||
this.content = content ; |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
||||||
|
|
||||||
gMainApp.component( "index-sr", { |
gMainApp.component( "index-sr", { |
||||||
|
|
||||||
props: [ "sr" ], |
props: [ "sr" ], |
||||||
|
|
||||||
template: ` |
template: ` |
||||||
<div class="sr index-sr" v-html=sr.content /> |
<div class="sr index-sr" > |
||||||
`,
|
<div v-if="sr.title || sr.subtitle" class="title" > |
||||||
|
<span v-if=sr.title class="title" v-html=sr.title /> |
||||||
|
<span v-if=sr.subtitle class="subtitle" v-html=sr.subtitle /> |
||||||
|
</div> |
||||||
|
<div class="body"> |
||||||
|
<div v-if=sr.content class="content" v-html=sr.content /> |
||||||
|
<div v-if=makeSeeAlso v-html=makeSeeAlso class="see-also" /> |
||||||
|
<div v-if=sr.ruleids class="ruleids" > |
||||||
|
<ruleid v-for="rid in sr.ruleids" :docId=sr.doc_id :ruleId=rid :key=rid /> |
||||||
|
</div> |
||||||
|
<ul v-if=sr.rulerefs class="rulerefs" > |
||||||
|
<li v-for="rref in sr.rulerefs" :key=rref > |
||||||
|
<span v-if=rref.caption class="caption" v-html=fixupHilites(rref.caption) /> |
||||||
|
<ruleid v-for="rid in rref.ruleids" :docId=sr.doc_id :ruleId=rid :key=rid /> |
||||||
|
</li> |
||||||
|
</ul> |
||||||
|
</div> |
||||||
|
</div>`, |
||||||
|
|
||||||
|
computed: { |
||||||
|
makeSeeAlso() { |
||||||
|
if ( this.sr.see_also ) |
||||||
|
return "See also: " + this.sr.see_also.join( ", " ) ; |
||||||
|
return null ; |
||||||
|
}, |
||||||
|
}, |
||||||
|
|
||||||
|
methods: { |
||||||
|
fixupHilites( val ) { |
||||||
|
return fixupSearchHilites( val ) ; |
||||||
|
}, |
||||||
|
}, |
||||||
|
|
||||||
|
} ) ; |
||||||
|
|
||||||
|
// --------------------------------------------------------------------
|
||||||
|
|
||||||
|
gMainApp.component( "ruleid", { |
||||||
|
|
||||||
|
props: [ "docId", "ruleId" ], |
||||||
|
data() { return { |
||||||
|
target: null, |
||||||
|
} ; }, |
||||||
|
|
||||||
|
template: `<span class="ruleid" v-bind:class="{unknown:!target}">[<a v-if=target @click=onClick>{{ruleId}}</a><span v-else>{{ruleId}}</span>]</span>`, |
||||||
|
|
||||||
|
created() { |
||||||
|
// figure out which rule is being referenced
|
||||||
|
let ruleId = this.ruleId ; |
||||||
|
let pos = ruleId.indexOf( "-" ) ; |
||||||
|
if ( pos >= 0 ) { |
||||||
|
// NOTE: For ruleid's of the form "A12.3-.4", we want to target "A12.3".
|
||||||
|
ruleId = ruleId.substring( 0, pos ) ; |
||||||
|
} |
||||||
|
// check if the rule is one we know about
|
||||||
|
if ( gContentDocs[this.docId] && gContentDocs[this.docId].targets ) { |
||||||
|
if ( gContentDocs[this.docId].targets[ ruleId ] ) |
||||||
|
this.target = ruleId ; |
||||||
|
} |
||||||
|
}, |
||||||
|
|
||||||
|
methods: { |
||||||
|
onClick() { |
||||||
|
// show the target
|
||||||
|
gEventBus.emit( "show-target", this.docId, this.target ) ; |
||||||
|
}, |
||||||
|
}, |
||||||
|
|
||||||
} ) ; |
} ) ; |
||||||
|
@ -1 +1,13 @@ |
|||||||
#search-results .sr { margin: 0 10px 2px 0 ; border: 1px dotted #666 ; padding: 5px ; } |
#search-results .sr { margin: 0 10px 2px 0 ; padding: 5px ; } |
||||||
|
#search-results .sr .hilite { padding: 0 2px ; background: #ffa ; } |
||||||
|
|
||||||
|
#search-results .index-sr .title { background: #e0e0e0 ; border-bottom: 1px solid #ccc ; padding: 2px 5px ; font-weight: bold ; } |
||||||
|
#search-results .index-sr .subtitle { padding: 2px 5px ; font-weight: normal ; font-size: 80% ; font-style: italic ; } |
||||||
|
#search-results .index-sr .body { padding: 2px 5px 0 5px ; font-size: 80% ; } |
||||||
|
#search-results .index-sr .content { color: #444 ; } |
||||||
|
#search-results .index-sr .see-also { color: #444 ; } |
||||||
|
#search-results .index-sr ul.rulerefs { margin-left: 1.2em ; } |
||||||
|
#search-results .index-sr ul.rulerefs .caption { padding-right: 0.5em ; } |
||||||
|
#search-results .index-sr .ruleid { margin-right: 0.25em ; font-style: italic ; color: #444 ; } |
||||||
|
#search-results .index-sr .ruleid.unknown { color: #888 ; } |
||||||
|
#search-results .index-sr .ruleid a { cursor: pointer ; } |
||||||
|
Binary file not shown.
@ -1,15 +1,15 @@ |
|||||||
{ |
{ |
||||||
|
|
||||||
"A4.7": { "caption": "ADVANCE PHASE", "page_no": 1, "pos": [72,702] }, |
"A4.7": { "caption": "ADVANCE PHASE", "page_no": 1, "pos": [72,718] }, |
||||||
"C13.8": { "caption": "BACKBLAST", "page_no": 1, "pos": [72,404] }, |
"C13.8": { "caption": "BACKBLAST", "page_no": 1, "pos": [72,503] }, |
||||||
"A3.8": { "caption": "CLOSE COMBAT PHASE (CCPh)", "page_no": 1, "pos": [72.97] }, |
"A3.8": { "caption": "CLOSE COMBAT PHASE (CCPh)", "page_no": 1, "pos": [72,292] }, |
||||||
|
|
||||||
"A4.5": { "caption": "DOUBLE TIME", "page_no": 2, "pos": [72,702] }, |
"A4.5": { "caption": "DOUBLE TIME", "page_no": 2, "pos": [72,718] }, |
||||||
"A19.1": { "caption": "EXPERIENCE LEVEL RATING (ELR)", "page_no": 2, "pos": [72.404] }, |
"A19.1": { "caption": "EXPERIENCE LEVEL RATING (ELR)", "page_no": 2, "pos": [72,503] }, |
||||||
"A1.21": { "caption": "FIREPOWER (FP)", "page_no": 2, "pos": [72,97] }, |
"A1.21": { "caption": "FIREPOWER (FP)", "page_no": 2, "pos": [72,292] }, |
||||||
|
|
||||||
"A1.21": { "caption": "FIREPOWER (FP)", "page_no": 3, "pos": [72,702] }, |
"E11.21": { "caption": "GAPS", "page_no": 3, "pos":[72,718] }, |
||||||
"E11.21": { "caption": "GAPS", "page_no": 3, "pos":[72,404] }, |
"C8.3": { "caption": "HEAT (H)", "page_no": 3, "pos": [72,503] }, |
||||||
"C8.3": { "caption": "HEAT (H)", "page_no": 3, "pos": [72,97] } |
"D1.4": { "caption": "IDENTITY & GROUND PRESSURE", "page_no": 3, "pos": [72,292] } |
||||||
|
|
||||||
} |
} |
||||||
|
@ -0,0 +1,298 @@ |
|||||||
|
""" Test search. """ |
||||||
|
|
||||||
|
import re |
||||||
|
import logging |
||||||
|
|
||||||
|
from selenium.webdriver.common.keys import Keys |
||||||
|
|
||||||
|
from asl_rulebook2.utils import strip_html |
||||||
|
from asl_rulebook2.webapp.search import load_search_config, _make_fts_query_string |
||||||
|
from asl_rulebook2.webapp.tests.utils import init_webapp, select_tabbed_page, get_classes, \ |
||||||
|
wait_for, find_child, find_children |
||||||
|
|
||||||
|
# --------------------------------------------------------------------- |
||||||
|
|
||||||
|
def test_search( webapp, webdriver ): |
||||||
|
"""Test search.""" |
||||||
|
|
||||||
|
# initialize |
||||||
|
webapp.control_tests.set_data_dir( "simple" ) |
||||||
|
init_webapp( webapp, webdriver ) |
||||||
|
|
||||||
|
# test a search that finds nothing |
||||||
|
results = _do_search( "oogah, boogah!" ) |
||||||
|
assert results is None |
||||||
|
|
||||||
|
# test error handling |
||||||
|
results = _do_search( "!:simulated-error:!" ) |
||||||
|
assert "Simulated error." in results |
||||||
|
|
||||||
|
# do a search |
||||||
|
results = _do_search( "enemy" ) |
||||||
|
assert results == [ |
||||||
|
{ "sr_type": "index", |
||||||
|
"title": "CCPh", "subtitle": "Close Combat Phase", |
||||||
|
"ruleids": [ "A3.8" ], |
||||||
|
"rulerefs": [ |
||||||
|
{ "caption": "((ENEMY)) Attacks", "ruleids": [ "S11.5" ] }, |
||||||
|
{ "caption": "dropping SW before CC", "ruleids": [ "A4.43" ] }, |
||||||
|
] |
||||||
|
}, |
||||||
|
{ "sr_type": "index", |
||||||
|
"title": "Double Time", |
||||||
|
"content": "Also known as \"running really fast.\"", |
||||||
|
"see_also": [ "CX" ], |
||||||
|
"ruleids": [ "A4.5-.51", "S6.222" ], |
||||||
|
"rulerefs": [ |
||||||
|
{ "caption": "((ENEMY)) Guard Automatic Action", "ruleids": [ "S6.303" ] }, |
||||||
|
{ "ruleids": [ "C10.3" ] }, |
||||||
|
{ "caption": "NA in Advance Phase", "ruleids": [ "A4.7" ] }, |
||||||
|
{ "caption": "'S?' is \"<NA>\"" }, |
||||||
|
] |
||||||
|
}, |
||||||
|
] |
||||||
|
|
||||||
|
# do another search |
||||||
|
results = _do_search( "gap" ) |
||||||
|
assert results == [ |
||||||
|
{ "sr_type": "index", |
||||||
|
"title": "((Gaps)), Convoy", |
||||||
|
"ruleids": [ "E11.21" ], |
||||||
|
}, |
||||||
|
] |
||||||
|
|
||||||
|
# --------------------------------------------------------------------- |
||||||
|
|
||||||
|
def test_content_fixup( webapp, webdriver ): |
||||||
|
"""Test fixing up of content returned by the search engine.""" |
||||||
|
|
||||||
|
# initialize |
||||||
|
webapp.control_tests.set_data_dir( "simple" ) |
||||||
|
init_webapp( webapp, webdriver ) |
||||||
|
|
||||||
|
# search for a fraction |
||||||
|
results = _do_search( "3/4" ) |
||||||
|
assert len(results) == 1 |
||||||
|
assert results[0]["content"] == "HTML content: 2((\u00be)) MP" |
||||||
|
|
||||||
|
# search for something that ends with a hash |
||||||
|
results = _do_search( "H#" ) |
||||||
|
assert len(results) == 1 |
||||||
|
assert results[0]["title"] == "((H#))" |
||||||
|
|
||||||
|
# search for "U.S." |
||||||
|
results = _do_search( "U.S." ) |
||||||
|
assert len(results) == 1 |
||||||
|
assert results[0]["content"] == "The ((U.S.)) has lots of this." |
||||||
|
|
||||||
|
# --------------------------------------------------------------------- |
||||||
|
|
||||||
|
def test_targets( webapp, webdriver ): |
||||||
|
"""Test clicking on search results.""" |
||||||
|
|
||||||
|
# initialize |
||||||
|
webapp.control_tests.set_data_dir( "simple" ) |
||||||
|
init_webapp( webapp, webdriver, no_content=1, add_empty_doc=1 ) |
||||||
|
|
||||||
|
def do_test( query_string, sel, expected ): |
||||||
|
|
||||||
|
# select the dummy document |
||||||
|
select_tabbed_page( "#content", "empty" ) |
||||||
|
|
||||||
|
# do the search |
||||||
|
_do_search( query_string ) |
||||||
|
|
||||||
|
# click on a target |
||||||
|
elem = find_child( "#search-results {}".format( sel ) ) |
||||||
|
elem.click() |
||||||
|
def check_target(): |
||||||
|
# check the active tab |
||||||
|
if find_child( "#content .tab-strip .tab.active" ).get_attribute( "data-tabid" ) != "simple": |
||||||
|
return False |
||||||
|
# check the current target |
||||||
|
elem = find_child( "#content .tabbed-page[data-tabid='simple'] .content-doc" ) |
||||||
|
return elem.get_attribute( "data-target" ) == expected |
||||||
|
wait_for( 2, check_target ) |
||||||
|
|
||||||
|
# do the tests |
||||||
|
do_test( "CC", ".sr .ruleids .ruleid a", "A3.8" ) |
||||||
|
do_test( "time", ".sr .rulerefs .ruleid a", "A4.7" ) |
||||||
|
|
||||||
|
# --------------------------------------------------------------------- |
||||||
|
|
||||||
|
def test_make_fts_query_string(): |
||||||
|
"""Test generating the FTS query string.""" |
||||||
|
|
||||||
|
# initialize |
||||||
|
load_search_config( logging.getLogger("_unknown_") ) |
||||||
|
|
||||||
|
def check( query, expected ): |
||||||
|
fts_query_string, _ = _make_fts_query_string(query) |
||||||
|
assert fts_query_string == expected |
||||||
|
|
||||||
|
# test some query strings |
||||||
|
check( "", "" ) |
||||||
|
check( "hello", "hello" ) |
||||||
|
check( " hello, world! ", "hello AND world" ) |
||||||
|
check( |
||||||
|
"foo 1+2 A-T K# bar", |
||||||
|
'foo AND "1+2" AND "a-t" AND "k#" AND bar' |
||||||
|
) |
||||||
|
check( |
||||||
|
"a'b a''b", |
||||||
|
"\"a'b\" AND \"a''b\"" |
||||||
|
) |
||||||
|
check( |
||||||
|
'foo "set dc" bar', |
||||||
|
'foo AND "set dc" AND bar' |
||||||
|
) |
||||||
|
|
||||||
|
# test some quoted phrases |
||||||
|
check( '""', '' ) |
||||||
|
check( ' " " ', '' ) |
||||||
|
check( |
||||||
|
'"hello world"', |
||||||
|
'"hello world"' |
||||||
|
) |
||||||
|
check( |
||||||
|
' foo "hello world" bar ', |
||||||
|
'foo AND "hello world" AND bar' |
||||||
|
) |
||||||
|
check( |
||||||
|
' foo " xyz " bar ', |
||||||
|
'foo AND xyz AND bar' |
||||||
|
) |
||||||
|
check( |
||||||
|
' foo " xyz 123 " bar ', |
||||||
|
'foo AND "xyz 123" AND bar' |
||||||
|
) |
||||||
|
|
||||||
|
# test some incorrectly quoted phrases |
||||||
|
check( '"', '' ) |
||||||
|
check( ' " " " ', '' ) |
||||||
|
check( ' a "b c d e', 'a AND "b c d e"' ) |
||||||
|
check( ' a b" c d e ', 'a AND b AND c AND d AND e' ) |
||||||
|
|
||||||
|
# test pass-through |
||||||
|
check( "AND", "AND" ) |
||||||
|
check( " OR", "OR" ) |
||||||
|
check( "OR ", "OR" ) |
||||||
|
check( "foo OR bar", "foo OR bar" ) |
||||||
|
check( "(a OR b)", "(a OR b)" ) |
||||||
|
|
||||||
|
# test search replacements |
||||||
|
check( "1/2 3/4 3/8 5/8", '"½" AND "¾" AND "⅜" AND "⅝"' ) |
||||||
|
check( "(r)", '"®"' ) |
||||||
|
|
||||||
|
# test search aliases |
||||||
|
check( "entrenchment", "( ditch OR entrenchment OR foxhole OR trench )" ) |
||||||
|
check( "entrenchments", "( ditch OR entrenchments OR foxhole OR trench )" ) |
||||||
|
check( "foxhole", "foxhole" ) |
||||||
|
|
||||||
|
# test search synonyms |
||||||
|
check( "armor", "( armor OR armour )" ) |
||||||
|
check( "american big armor", '( america OR american OR "u.s." ) AND big AND ( armor OR armour )' ) |
||||||
|
|
||||||
|
# --------------------------------------------------------------------- |
||||||
|
|
||||||
|
def _do_search( query_string ): |
||||||
|
"""Do a search.""" |
||||||
|
|
||||||
|
def get_seq_no(): |
||||||
|
return find_child( "#search-results" ).get_attribute( "data-seqno" ) |
||||||
|
|
||||||
|
# submit the search |
||||||
|
select_tabbed_page( "#nav", "search" ) |
||||||
|
elem = find_child( "input#query-string" ) |
||||||
|
elem.clear() |
||||||
|
elem.send_keys( query_string ) |
||||||
|
seq_no = get_seq_no() |
||||||
|
elem.send_keys( Keys.RETURN ) |
||||||
|
|
||||||
|
# unload the results |
||||||
|
wait_for( 2, lambda: get_seq_no() > seq_no ) |
||||||
|
elem = find_child( "#search-results .error" ) |
||||||
|
if elem: |
||||||
|
return elem.text # nb: string = error message |
||||||
|
elem = find_child( "#search-results .no-results" ) |
||||||
|
if elem: |
||||||
|
assert elem.text == "Nothing was found." |
||||||
|
return None # nb: None = no results |
||||||
|
results = _unload_search_results() |
||||||
|
assert isinstance( results, list ) # nb: list = search results |
||||||
|
return results |
||||||
|
|
||||||
|
def _unload_search_results(): |
||||||
|
"""Unload the search results.""" |
||||||
|
|
||||||
|
def unload_elem( result, key, elem ): |
||||||
|
"""Unload a single element.""" |
||||||
|
if not elem: |
||||||
|
return False |
||||||
|
elem_text = get_elem_text( elem ) |
||||||
|
if not elem_text: |
||||||
|
return False |
||||||
|
result[key] = elem_text |
||||||
|
return True |
||||||
|
|
||||||
|
def get_elem_text( elem ): |
||||||
|
"""Get the element's text content.""" |
||||||
|
val = elem.get_attribute( "innerHTML" ) |
||||||
|
# change how highlighted content is represented |
||||||
|
matches = list( re.finditer( r'<span class="hilite">(.*?)</span>', val ) ) |
||||||
|
for mo in reversed(matches): |
||||||
|
val = val[:mo.start()] + "((" + mo.group(1) + "))" + val[mo.end():] |
||||||
|
# remove HTML tags |
||||||
|
return strip_html( val.strip() ) |
||||||
|
|
||||||
|
def unload_ruleids( result, key, parent ): |
||||||
|
"""Unload a list of ruleid's.""" |
||||||
|
if not parent: |
||||||
|
return |
||||||
|
ruleids = [] |
||||||
|
for elem in find_children( ".ruleid", parent ): |
||||||
|
ruleid = get_elem_text( elem ) |
||||||
|
assert ruleid.startswith( "[" ) and ruleid.endswith( "]" ) |
||||||
|
ruleids.append( ruleid[1:-1] ) |
||||||
|
if ruleids: |
||||||
|
result[key] = ruleids |
||||||
|
|
||||||
|
def unload_rulerefs( result, key, parent ): |
||||||
|
"""Unload a list of ruleref's.""" |
||||||
|
if not parent: |
||||||
|
return |
||||||
|
rulerefs = [] |
||||||
|
for elem in find_children( "li", parent ): |
||||||
|
ruleref = {} |
||||||
|
unload_elem( ruleref, "caption", find_child(".caption",elem) ) |
||||||
|
unload_ruleids( ruleref, "ruleids", elem ) |
||||||
|
rulerefs.append( ruleref ) |
||||||
|
if rulerefs: |
||||||
|
result[key] = rulerefs |
||||||
|
|
||||||
|
def unload_index_sr( sr ): #pylint: disable=possibly-unused-variable |
||||||
|
"""Unload an "index" search result.""" |
||||||
|
result = {} |
||||||
|
unload_elem( result, "title", find_child("span.title",sr) ) |
||||||
|
unload_elem( result, "subtitle", find_child(".subtitle",sr) ) |
||||||
|
unload_elem( result, "content", find_child(".content",sr) ) |
||||||
|
if unload_elem( result, "see_also", find_child(".see-also",sr) ): |
||||||
|
assert result["see_also"].startswith( "See also:" ) |
||||||
|
result["see_also"] = [ s.strip() for s in result["see_also"][9:].split( "," ) ] |
||||||
|
unload_ruleids( result, "ruleids", find_child(".ruleids",sr) ) |
||||||
|
unload_rulerefs( result, "rulerefs", find_child(".rulerefs",sr) ) |
||||||
|
return result |
||||||
|
|
||||||
|
# unload the search results |
||||||
|
results = [] |
||||||
|
for sr in find_children( "#search-results .sr"): |
||||||
|
classes = get_classes( sr ) |
||||||
|
classes.remove( "sr" ) |
||||||
|
assert len(classes) == 1 and classes[0].endswith( "-sr" ) |
||||||
|
sr_type = classes[0][:-3] |
||||||
|
func = locals()[ "unload_{}_sr".format( sr_type ) ] |
||||||
|
sr = func( sr ) |
||||||
|
sr["sr_type"] = sr_type |
||||||
|
results.append( sr ) |
||||||
|
|
||||||
|
return results |
@ -0,0 +1,66 @@ |
|||||||
|
#!/usr/bin/env python3 |
||||||
|
""" Add named destinations to a PDF file. """ |
||||||
|
|
||||||
|
import subprocess |
||||||
|
import json |
||||||
|
import time |
||||||
|
import datetime |
||||||
|
|
||||||
|
import click |
||||||
|
|
||||||
|
from asl_rulebook2.utils import TempFile |
||||||
|
|
||||||
|
# --------------------------------------------------------------------- |
||||||
|
|
||||||
|
@click.command() |
||||||
|
@click.argument( "pdf_file", nargs=1, type=click.Path(exists=True,dir_okay=False) ) |
||||||
|
@click.option( "--title", help="Document title." ) |
||||||
|
@click.option( "--targets","-t","targets_fname", required=True, type=click.Path(dir_okay=False), |
||||||
|
help="Target definition file." |
||||||
|
) |
||||||
|
@click.option( "--yoffset", default=5, help="Offset to add to y co-ordinates." ) |
||||||
|
@click.option( "--output","-o","output_fname", required=True, type=click.Path(dir_okay=False), |
||||||
|
help="Output PDF file." |
||||||
|
) |
||||||
|
@click.option( "--gs","gs_path", default="gs", help="Path to the Ghostscript executable." ) |
||||||
|
def main( pdf_file, title, targets_fname, yoffset, output_fname, gs_path ): |
||||||
|
"""Add named destinations to a PDF file.""" |
||||||
|
|
||||||
|
# load the targets |
||||||
|
with open( targets_fname, "r" ) as fp: |
||||||
|
targets = json.load( fp ) |
||||||
|
|
||||||
|
with TempFile( mode="w" ) as temp_file: |
||||||
|
|
||||||
|
# generate the pdfmarks |
||||||
|
print( "Generating the pdfmarks..." ) |
||||||
|
if title: |
||||||
|
print( "[ /Title ({})".format( title ), file=temp_file ) |
||||||
|
else: |
||||||
|
print( "[", file=temp_file ) |
||||||
|
print( " /DOCINFO pdfmark", file=temp_file ) |
||||||
|
print( file=temp_file ) |
||||||
|
for ruleid, target in targets.items(): |
||||||
|
xpos, ypos = target["pos"] |
||||||
|
print( "[ /Dest /{} /Page {} /View [/XYZ {} {}] /DEST pdfmark".format( |
||||||
|
ruleid, target["page_no"], xpos, ypos+yoffset |
||||||
|
), file=temp_file ) |
||||||
|
print( file=temp_file ) |
||||||
|
temp_file.close( delete=False ) |
||||||
|
|
||||||
|
# generate the pdfmark'ed document |
||||||
|
print( "Generating the pdfmark'ed document..." ) |
||||||
|
print( "- {} => {}".format( pdf_file, output_fname ) ) |
||||||
|
args = [ gs_path, "-q", "-dBATCH", "-dNOPAUSE", "-sDEVICE=pdfwrite" ] |
||||||
|
args.extend( [ "-o", output_fname ] ) |
||||||
|
args.extend( [ "-f", pdf_file ] ) |
||||||
|
args.append( temp_file.name ) |
||||||
|
start_time = time.time() |
||||||
|
subprocess.run( args, check=True ) |
||||||
|
elapsed_time = time.time() - start_time |
||||||
|
print( "- Elapsed time: {}".format( datetime.timedelta(seconds=int(elapsed_time)) ) ) |
||||||
|
|
||||||
|
# --------------------------------------------------------------------- |
||||||
|
|
||||||
|
if __name__ == "__main__": |
||||||
|
main() #pylint: disable=no-value-for-parameter |
Loading…
Reference in new issue