|
|
|
""" Manage the search engine. """
|
|
|
|
|
|
|
|
import os
|
|
|
|
import sqlite3
|
|
|
|
import json
|
|
|
|
import re
|
|
|
|
import itertools
|
|
|
|
import string
|
|
|
|
import tempfile
|
|
|
|
import logging
|
|
|
|
import traceback
|
|
|
|
|
|
|
|
from flask import request, jsonify
|
|
|
|
|
|
|
|
from asl_rulebook2.utils import plural
|
|
|
|
from asl_rulebook2.webapp import app
|
|
|
|
from asl_rulebook2.webapp import content as webapp_content
|
|
|
|
from asl_rulebook2.webapp.utils import make_config_path, make_data_path
|
|
|
|
|
|
|
|
_sqlite_path = None
|
|
|
|
_fts_index_entries= None
|
|
|
|
|
|
|
|
_logger = logging.getLogger( "search" )
|
|
|
|
|
|
|
|
# these are used to highlight search matches (nb: the front-end looks for these)
|
|
|
|
_BEGIN_HIGHLIGHT = "!@:"
|
|
|
|
_END_HIGHLIGHT = ":@!"
|
|
|
|
|
|
|
|
# NOTE: These regex's fix up content returned to us by the SQLite search engine (typically problems
|
|
|
|
# with highlighting search terms).
|
|
|
|
_FIXUP_TEXT_REGEXES = [
|
|
|
|
[ re.compile( fixup[0].format( _BEGIN_HIGHLIGHT, _END_HIGHLIGHT ) ),
|
|
|
|
fixup[1].format( _BEGIN_HIGHLIGHT, _END_HIGHLIGHT )
|
|
|
|
]
|
|
|
|
for fixup in [
|
|
|
|
[ r"&{}(.+?){};", r"{}&\g<1>;{}" ], # HTML entities e.g. &((frac12)); -> (($frac12;))
|
|
|
|
[ r"{}(.+?){}#", r"{}\g<1>#{}" ], # e.g. ((TH)# -> ((TH#)
|
|
|
|
[ r"{}U\.S{}\.", "{}U.S.{}" ], # ((U.S)). -> ((U.S.))
|
|
|
|
]
|
|
|
|
]
|
|
|
|
|
|
|
|
# these are used to separate ruleref's in the FTS table (internal use only)
|
|
|
|
_RULEREF_SEPARATOR = "-:-"
|
|
|
|
|
|
|
|
_SEARCH_TERM_ADJUSTMENTS = None
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------
|
|
|
|
|
|
|
|
@app.route( "/search", methods=["POST"] )
|
|
|
|
def search() :
|
|
|
|
"""Run a search."""
|
|
|
|
|
|
|
|
# log the request
|
|
|
|
_logger.info( "SEARCH REQUEST:" )
|
|
|
|
args = dict( request.form.items() )
|
|
|
|
for key,val in args.items():
|
|
|
|
_logger.info( "- %s: %s", key, val )
|
|
|
|
|
|
|
|
# run the search
|
|
|
|
try:
|
|
|
|
return _do_search( args )
|
|
|
|
except Exception as exc: #pylint: disable=broad-except
|
|
|
|
msg = str( exc )
|
|
|
|
if msg.startswith( "fts5: " ):
|
|
|
|
msg = msg[5:] # nb: this is a sqlite3.OperationalError
|
|
|
|
_logger.warning( "SEARCH ERROR: %s\n%s", args, traceback.format_exc() )
|
|
|
|
return jsonify( { "error": msg } )
|
|
|
|
|
|
|
|
def _do_search( args ):
|
|
|
|
|
|
|
|
def fixup_text( val ):
|
|
|
|
if val is None:
|
|
|
|
return None
|
|
|
|
for regex in _FIXUP_TEXT_REGEXES:
|
|
|
|
val = regex[0].sub( regex[1], val )
|
|
|
|
return val
|
|
|
|
|
|
|
|
# run the search
|
|
|
|
query_string = args[ "queryString" ].strip()
|
|
|
|
if query_string == "!:simulated-error:!":
|
|
|
|
raise RuntimeError( "Simulated error." ) # nb: for the test suite
|
|
|
|
fts_query_string, search_terms = _make_fts_query_string( query_string )
|
|
|
|
_logger.debug( "FTS query string: %s", fts_query_string )
|
|
|
|
conn = sqlite3.connect( _sqlite_path )
|
|
|
|
def highlight( n ):
|
|
|
|
# NOTE: highlight() is an FTS extension function, and takes column numbers :-/
|
|
|
|
return "highlight(searchable,{},'{}','{}')".format( n, _BEGIN_HIGHLIGHT, _END_HIGHLIGHT )
|
|
|
|
sql = "SELECT rowid,doc_id,sr_type,rank,{},{},{},{} FROM searchable".format(
|
|
|
|
highlight(2), highlight(3), highlight(4), highlight(5)
|
|
|
|
)
|
|
|
|
sql += " WHERE searchable MATCH ?"
|
|
|
|
sql += " ORDER BY rank"
|
|
|
|
curs = conn.execute( sql,
|
|
|
|
( "{title subtitle content rulerefs}: " + fts_query_string, )
|
|
|
|
)
|
|
|
|
|
|
|
|
def get_col( sr, key, val ):
|
|
|
|
if val:
|
|
|
|
sr[key] = fixup_text( val )
|
|
|
|
|
|
|
|
# get the results
|
|
|
|
results = []
|
|
|
|
for row in curs:
|
|
|
|
if row[2] != "index":
|
|
|
|
_logger.error( "Unknown searchable row type (rowid=%d): %s", row[0], row[2] )
|
|
|
|
continue
|
|
|
|
index_entry = _fts_index_entries[ row[0] ]
|
|
|
|
result = {
|
|
|
|
"doc_id": row[1],
|
|
|
|
"sr_type": row[2],
|
|
|
|
"_key": "{}:{}:{}".format( row[1], row[2], row[0] ),
|
|
|
|
"_score": - row[3],
|
|
|
|
}
|
|
|
|
get_col( result, "title", row[4] )
|
|
|
|
get_col( result, "subtitle", row[5] )
|
|
|
|
get_col( result, "content", row[6] )
|
|
|
|
if index_entry.get( "ruleids" ):
|
|
|
|
result["ruleids"] = index_entry["ruleids"]
|
|
|
|
if index_entry.get( "see_also" ):
|
|
|
|
result["see_also"] = index_entry["see_also"]
|
|
|
|
rulerefs = [ r.strip() for r in row[7].split(_RULEREF_SEPARATOR) ] if row[7] else []
|
|
|
|
assert len(rulerefs) == len(index_entry.get("rulerefs",[]))
|
|
|
|
if rulerefs:
|
|
|
|
result[ "rulerefs" ] = []
|
|
|
|
for i, ruleref in enumerate(rulerefs):
|
|
|
|
ruleref2 = {}
|
|
|
|
if "caption" in index_entry["rulerefs"][i]:
|
|
|
|
assert ruleref.replace( _BEGIN_HIGHLIGHT, "" ).replace( _END_HIGHLIGHT, "" ) \
|
|
|
|
== index_entry["rulerefs"][i]["caption"]
|
|
|
|
ruleref2["caption"] = fixup_text( ruleref )
|
|
|
|
if "ruleids" in index_entry["rulerefs"][i]:
|
|
|
|
ruleref2["ruleids"] = index_entry["rulerefs"][i]["ruleids"]
|
|
|
|
assert ruleref2
|
|
|
|
result["rulerefs"].append( ruleref2 )
|
|
|
|
results.append( result )
|
|
|
|
|
|
|
|
# fixup the results
|
|
|
|
results = _fixup_results_for_hash_terms( results, search_terms )
|
|
|
|
|
|
|
|
# adjust the sort order
|
|
|
|
results = _adjust_sort_order( results )
|
|
|
|
|
|
|
|
# return the results
|
|
|
|
_logger.debug( "Search results:" if len(results) > 0 else "Search results: none" )
|
|
|
|
for result in results:
|
|
|
|
_logger.debug( "- %s (%.3f)",
|
|
|
|
result["title"].replace( _BEGIN_HIGHLIGHT, "" ).replace( _END_HIGHLIGHT, "" ),
|
|
|
|
result["_score"]
|
|
|
|
)
|
|
|
|
return jsonify( results )
|
|
|
|
|
|
|
|
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
|
|
|
|
|
|
PASSTHROUGH_REGEXES = set([
|
|
|
|
re.compile( r"\bAND\b" ),
|
|
|
|
re.compile( r"\bOR\b" ),
|
|
|
|
re.compile( r"\bNOT\b" ),
|
|
|
|
re.compile( r"\((?![Rr]\))" ),
|
|
|
|
])
|
|
|
|
|
|
|
|
def _make_fts_query_string( query_string ):
|
|
|
|
"""Generate the SQLite query string.
|
|
|
|
|
|
|
|
SQLite's MATCH function recognizes a lot of special characters, which need
|
|
|
|
to be enclosed in double-quotes to disable.
|
|
|
|
"""
|
|
|
|
|
|
|
|
# check if this looks like a raw FTS query
|
|
|
|
if any( regex.search(query_string) for regex in PASSTHROUGH_REGEXES ):
|
|
|
|
return query_string.strip(), None
|
|
|
|
|
|
|
|
# split the search string into words (taking quoted phrases into account)
|
|
|
|
ignore = app.config.get( "SQLITE_FTS_IGNORE_CHARS", ",;!?$" )
|
|
|
|
query_string = "".join( ch for ch in query_string if ch not in ignore )
|
|
|
|
terms = query_string.lower().split()
|
|
|
|
i = 0
|
|
|
|
while True:
|
|
|
|
if i >= len(terms):
|
|
|
|
break
|
|
|
|
if i > 0 and terms[i-1].startswith( '"' ):
|
|
|
|
terms[i-1] += " {}".format( terms[i] )
|
|
|
|
del terms[i]
|
|
|
|
if terms[i-1].startswith( '"' ) and terms[i-1].endswith( '"' ):
|
|
|
|
terms[i-1] = terms[i-1][1:-1]
|
|
|
|
continue
|
|
|
|
i += 1
|
|
|
|
|
|
|
|
# clean up quoted phrases
|
|
|
|
terms = [ t[1:] if t.startswith('"') else t for t in terms ]
|
|
|
|
terms = [ t[:-1] if t.endswith('"') else t for t in terms ]
|
|
|
|
terms = [ t.strip() for t in terms ]
|
|
|
|
terms = [ t for t in terms if t ]
|
|
|
|
|
|
|
|
# adjust search terms
|
|
|
|
for term_no, term in enumerate(terms):
|
|
|
|
aliases = _SEARCH_TERM_ADJUSTMENTS.get( term )
|
|
|
|
if not aliases:
|
|
|
|
continue
|
|
|
|
if isinstance( aliases, str ):
|
|
|
|
# the search term is replaced by a new one
|
|
|
|
terms[ term_no ] = aliases
|
|
|
|
elif isinstance( aliases, set ):
|
|
|
|
# the search term is replaced by multiple new ones (that will be OR'ed together)
|
|
|
|
# NOTE: We sort the terms so that the tests will work reliably.
|
|
|
|
terms[ term_no ] = sorted( aliases )
|
|
|
|
else:
|
|
|
|
assert "Unknown search alias type: {}".format( type(aliases) )
|
|
|
|
|
|
|
|
# fixup each term
|
|
|
|
def has_special_char( term ):
|
|
|
|
"""Check if the term contains any special characters."""
|
|
|
|
for ch in term:
|
|
|
|
if ch in "*":
|
|
|
|
continue
|
|
|
|
if ch.isspace() or ch in string.punctuation:
|
|
|
|
return True
|
|
|
|
if ord(ch) < 32 or ord(ch) > 127:
|
|
|
|
return True
|
|
|
|
return False
|
|
|
|
def fixup_terms( terms ):
|
|
|
|
"""Fixup a list of terms."""
|
|
|
|
for term_no, term in enumerate(terms):
|
|
|
|
if isinstance( term, str ):
|
|
|
|
if has_special_char( term ):
|
|
|
|
terms[term_no] = '"{}"'.format( term )
|
|
|
|
else:
|
|
|
|
fixup_terms( term )
|
|
|
|
fixup_terms( terms )
|
|
|
|
|
|
|
|
# return the final FTS query string
|
|
|
|
def term_string( term ):
|
|
|
|
if isinstance( term, str ):
|
|
|
|
return term
|
|
|
|
assert isinstance( term, list )
|
|
|
|
return "( {} )".format( " OR ".join( term ) )
|
|
|
|
return " AND ".join( term_string(t) for t in terms ), terms
|
|
|
|
|
|
|
|
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
|
|
|
|
|
|
def _fixup_results_for_hash_terms( results, search_terms ):
|
|
|
|
"""Fixup search results for search terms that end with a hash.
|
|
|
|
|
|
|
|
SQLite doesn't handle search terms that end with a hash particularly well.
|
|
|
|
We correct highlighted search terms in fixup_text(), but searching for e.g. "US#"
|
|
|
|
will also match "use" and "using" - we remove such results here.
|
|
|
|
"""
|
|
|
|
|
|
|
|
# figure out which search terms end with a hash
|
|
|
|
# NOTE: We don't bother descending down into sub-terms.
|
|
|
|
if not search_terms:
|
|
|
|
return results
|
|
|
|
terms = [
|
|
|
|
t[1:-1] for t in search_terms
|
|
|
|
if isinstance(t,str) and t.startswith('"') and t.endswith('"')
|
|
|
|
]
|
|
|
|
terms = [
|
|
|
|
t[:-1].lower() for t in terms
|
|
|
|
if isinstance(t,str) and t.endswith("#")
|
|
|
|
]
|
|
|
|
if not terms:
|
|
|
|
return results
|
|
|
|
if "us" in terms:
|
|
|
|
terms.extend( [ "use", "used", "using", "user" ] )
|
|
|
|
|
|
|
|
def keep( sr ):
|
|
|
|
# remove every incorrectly matched search term (e.g. ((K)) when searching for "K#")
|
|
|
|
buf = json.dumps( sr ).lower()
|
|
|
|
for term in terms:
|
|
|
|
buf = buf.replace( "{}{}{}".format( _BEGIN_HIGHLIGHT, term, _END_HIGHLIGHT ), "_removed_" )
|
|
|
|
# we keep this search result if there are still some highlighted search terms
|
|
|
|
return _BEGIN_HIGHLIGHT in buf
|
|
|
|
|
|
|
|
return [
|
|
|
|
result for result in results if keep(result)
|
|
|
|
]
|
|
|
|
|
|
|
|
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
|
|
|
|
|
|
def _adjust_sort_order( results ):
|
|
|
|
"""Adjust the sort order of the search results."""
|
|
|
|
|
|
|
|
results2 = []
|
|
|
|
def extract_sr( func ):
|
|
|
|
# move results that pass the filter function to the new list
|
|
|
|
i = 0
|
|
|
|
while True:
|
|
|
|
if i >= len(results):
|
|
|
|
break
|
|
|
|
# NOTE: We never prefer small entries (i.e .have no ruleref's)
|
|
|
|
# e.g. those that only contain a "see also".
|
|
|
|
if func( results[i] ) and len(results[i].get("rulerefs",[])) > 0:
|
|
|
|
results2.append( results[i] )
|
|
|
|
del results[i]
|
|
|
|
else:
|
|
|
|
i += 1
|
|
|
|
|
|
|
|
def get( sr, key ):
|
|
|
|
val = sr.get( key )
|
|
|
|
return val if val else ""
|
|
|
|
|
|
|
|
# prefer search results whose title is an exact match
|
|
|
|
extract_sr(
|
|
|
|
lambda sr: get(sr,"title").startswith( _BEGIN_HIGHLIGHT ) and get(sr,"title").endswith( _END_HIGHLIGHT )
|
|
|
|
)
|
|
|
|
# prefer search results whose title starts with a match
|
|
|
|
extract_sr(
|
|
|
|
lambda sr: get(sr,"title").startswith( _BEGIN_HIGHLIGHT )
|
|
|
|
)
|
|
|
|
# prefer search results that have a match in the title
|
|
|
|
extract_sr(
|
|
|
|
lambda sr: _BEGIN_HIGHLIGHT in get(sr,"title")
|
|
|
|
)
|
|
|
|
# prefer search results that have a match in the subtitle
|
|
|
|
extract_sr(
|
|
|
|
lambda sr: _BEGIN_HIGHLIGHT in get(sr,"subtitle")
|
|
|
|
)
|
|
|
|
|
|
|
|
# include any remaining search results
|
|
|
|
results2.extend( results )
|
|
|
|
|
|
|
|
return results2
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------
|
|
|
|
|
|
|
|
def init_search( startup_msgs, logger ):
|
|
|
|
"""Initialize the search engine."""
|
|
|
|
|
|
|
|
# initialize
|
|
|
|
global _fts_index_entries
|
|
|
|
_fts_index_entries = {}
|
|
|
|
|
|
|
|
# initialize the database
|
|
|
|
global _sqlite_path
|
|
|
|
_sqlite_path = app.config.get( "SQLITE_PATH" )
|
|
|
|
if not _sqlite_path:
|
|
|
|
# FUDGE! We should be able to create a shared, in-memory database using this:
|
|
|
|
# file::XYZ:?mode=memory&cache=shared
|
|
|
|
# but it doesn't seem to work (on Linux) and ends up creating a file with this name :-/
|
|
|
|
# We manually create a temp file, which has to have the same name each time, so that we don't
|
|
|
|
# keep creating a new database each time we start up. Sigh...
|
|
|
|
_sqlite_path = os.path.join( tempfile.gettempdir(), "asl-rulebook2.searchdb" )
|
|
|
|
if os.path.isfile( _sqlite_path ):
|
|
|
|
os.unlink( _sqlite_path )
|
|
|
|
logger.info( "Creating the search index: %s", _sqlite_path )
|
|
|
|
conn = sqlite3.connect( _sqlite_path )
|
|
|
|
# NOTE: Storing everything in a single table allows FTS to rank search results based on
|
|
|
|
# the overall content, and also lets us do AND/OR queries across all searchable content.
|
|
|
|
conn.execute(
|
|
|
|
"CREATE VIRTUAL TABLE searchable USING fts5"
|
|
|
|
" ( doc_id, sr_type, title, subtitle, content, rulerefs, tokenize='porter unicode61' )"
|
|
|
|
)
|
|
|
|
|
|
|
|
# load the searchable content
|
|
|
|
logger.info( "Loading the search index..." )
|
|
|
|
conn.execute( "DELETE FROM searchable" )
|
|
|
|
curs = conn.cursor()
|
|
|
|
for cdoc in webapp_content.content_docs.values():
|
|
|
|
logger.info( "- Loading index file: %s", cdoc["_fname"] )
|
|
|
|
nrows = 0
|
|
|
|
for index_entry in cdoc["index"]:
|
|
|
|
rulerefs = _RULEREF_SEPARATOR.join( r.get("caption","") for r in index_entry.get("rulerefs",[]) )
|
|
|
|
# NOTE: We should really strip content before adding it to the search index, otherwise any HTML tags
|
|
|
|
# will need to be included in search terms. However, this means that the content returned by a query
|
|
|
|
# will be this stripped content. We could go back to the original data to get the original HTML content,
|
|
|
|
# but that means we would lose the highlighting of search terms that SQLite gives us. We opt to insert
|
|
|
|
# the original content, since none of it should contain HTML, anyway.
|
|
|
|
curs.execute(
|
|
|
|
"INSERT INTO searchable (doc_id,sr_type,title,subtitle,content,rulerefs) VALUES (?,?,?,?,?,?)", (
|
|
|
|
cdoc["doc_id"], "index",
|
|
|
|
index_entry.get("title"), index_entry.get("subtitle"), index_entry.get("content"), rulerefs
|
|
|
|
) )
|
|
|
|
_fts_index_entries[ curs.lastrowid ] = index_entry
|
|
|
|
index_entry["_fts_rowid"] = curs.lastrowid
|
|
|
|
nrows += 1
|
|
|
|
conn.commit()
|
|
|
|
logger.info( " - Loaded %s.", plural(nrows,"index entry","index entries"), )
|
|
|
|
assert len(_fts_index_entries) == _get_row_count( conn, "searchable" )
|
|
|
|
|
|
|
|
# load the search config
|
|
|
|
load_search_config( startup_msgs, logger )
|
|
|
|
|
|
|
|
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
|
|
|
|
|
|
def load_search_config( startup_msgs, logger ):
|
|
|
|
"""Load the search config."""
|
|
|
|
|
|
|
|
# initialize
|
|
|
|
global _SEARCH_TERM_ADJUSTMENTS
|
|
|
|
_SEARCH_TERM_ADJUSTMENTS = {}
|
|
|
|
|
|
|
|
def add_search_term_adjustment( key, vals ):
|
|
|
|
# make sure everything is lower-case
|
|
|
|
key = key.lower()
|
|
|
|
if isinstance( vals, str ):
|
|
|
|
vals = vals.lower()
|
|
|
|
elif isinstance( vals, set ):
|
|
|
|
vals = set( v.lower() for v in vals )
|
|
|
|
else:
|
|
|
|
assert "Unknown search alias type: {}".format( type(vals) )
|
|
|
|
# add new the search term adjustment
|
|
|
|
if key not in _SEARCH_TERM_ADJUSTMENTS:
|
|
|
|
_SEARCH_TERM_ADJUSTMENTS[ key ] = vals
|
|
|
|
else:
|
|
|
|
# found a multiple definition - try to do something sensible
|
|
|
|
logger.warning( " - Duplicate search alias: %s\n- current aliases = %s\n- new aliases = %s", key,
|
|
|
|
_SEARCH_TERM_ADJUSTMENTS[key], vals
|
|
|
|
)
|
|
|
|
if isinstance( _SEARCH_TERM_ADJUSTMENTS[key], str ):
|
|
|
|
_SEARCH_TERM_ADJUSTMENTS[ key ] = vals
|
|
|
|
else:
|
|
|
|
assert isinstance( _SEARCH_TERM_ADJUSTMENTS[key], set )
|
|
|
|
_SEARCH_TERM_ADJUSTMENTS[ key ].update( vals )
|
|
|
|
|
|
|
|
# load the search replacements
|
|
|
|
def load_search_replacements( fname, ftype ):
|
|
|
|
if not os.path.isfile( fname ):
|
|
|
|
return
|
|
|
|
logger.info( "Loading search replacements: %s", fname )
|
|
|
|
try:
|
|
|
|
with open( fname, "r", encoding="utf-8" ) as fp:
|
|
|
|
data = json.load( fp )
|
|
|
|
except Exception as ex: #pylint: disable=broad-except
|
|
|
|
startup_msgs.warning( "Can't load {} search replacements.".format( ftype ), str(ex) )
|
|
|
|
return
|
|
|
|
nitems = 0
|
|
|
|
for key, val in data.items():
|
|
|
|
if key.startswith( "_" ):
|
|
|
|
continue # nb: ignore comments
|
|
|
|
logger.debug( "- %s -> %s", key, val )
|
|
|
|
add_search_term_adjustment( key, val )
|
|
|
|
nitems += 1
|
|
|
|
logger.info( "- Loaded %s.", plural(nitems,"search replacement","search replacements") )
|
|
|
|
load_search_replacements( make_config_path( "search-replacements.json" ), "default" )
|
|
|
|
load_search_replacements( make_data_path( "search-replacements.json" ), "user" )
|
|
|
|
|
|
|
|
# load the search aliases
|
|
|
|
def load_search_aliases( fname, ftype ):
|
|
|
|
if not os.path.isfile( fname ):
|
|
|
|
return
|
|
|
|
logger.info( "Loading search aliases: %s", fname )
|
|
|
|
try:
|
|
|
|
with open( fname, "r", encoding="utf-8" ) as fp:
|
|
|
|
data = json.load( fp )
|
|
|
|
except Exception as ex: #pylint: disable=broad-except
|
|
|
|
startup_msgs.warning( "Can't load {} search aliases.".format( ftype ), str(ex) )
|
|
|
|
return
|
|
|
|
nitems = 0
|
|
|
|
for keys, aliases in data.items():
|
|
|
|
if keys.startswith( "_" ):
|
|
|
|
continue # nb: ignore comments
|
|
|
|
logger.debug( "- %s -> %s", keys, " ; ".join(aliases) )
|
|
|
|
for key in keys.split( "/" ):
|
|
|
|
add_search_term_adjustment( key, set( itertools.chain( aliases, [key] ) ) )
|
|
|
|
nitems += 1
|
|
|
|
logger.info( "- Loaded %s.", plural(nitems,"search aliases","search aliases") )
|
|
|
|
load_search_aliases( make_config_path( "search-aliases.json" ), "default" )
|
|
|
|
load_search_aliases( make_data_path( "search-aliases.json" ), "user" )
|
|
|
|
|
|
|
|
# load the search synonyms
|
|
|
|
def load_search_synonyms( fname, ftype ):
|
|
|
|
if not os.path.isfile( fname ):
|
|
|
|
return
|
|
|
|
logger.info( "Loading search synonyms: %s", fname )
|
|
|
|
try:
|
|
|
|
with open( fname, "r", encoding="utf-8" ) as fp:
|
|
|
|
data = json.load( fp )
|
|
|
|
except Exception as ex: #pylint: disable=broad-except
|
|
|
|
startup_msgs.warning( "Can't load {} search synonyms.".format( ftype ), str(ex) )
|
|
|
|
return
|
|
|
|
nitems = 0
|
|
|
|
for synonyms in data:
|
|
|
|
if isinstance( synonyms, str ):
|
|
|
|
continue # nb: ignore comments
|
|
|
|
logger.debug( "- %s", " ; ".join(synonyms) )
|
|
|
|
synonyms = set( synonyms )
|
|
|
|
for term in synonyms:
|
|
|
|
add_search_term_adjustment( term, synonyms )
|
|
|
|
nitems += 1
|
|
|
|
logger.info( "- Loaded %s.", plural(nitems,"search synonym","search synonyms") )
|
|
|
|
load_search_synonyms( make_config_path( "search-synonyms.json" ), "default" )
|
|
|
|
load_search_synonyms( make_data_path( "search-synonyms.json" ), "user" )
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------
|
|
|
|
|
|
|
|
def _get_row_count( conn, table_name ):
|
|
|
|
"""Get the number of rows in a table."""
|
|
|
|
cur = conn.execute( "SELECT count(*) FROM {}".format( table_name ) )
|
|
|
|
return cur.fetchone()[0]
|