Allow fields to be weighted when doing searches.

master
Pacman Ghost 4 years ago
parent 9d4572d3d8
commit 1c074703e5
  1. 8
      asl_articles/config/app.cfg
  2. 25
      asl_articles/search.py

@ -5,6 +5,14 @@
;HTML_ATTR_WHITELIST =
;HTML_TAG_WHITELIST =
[Search weights]
; This section defines the relative weights of the searchable fields (see _SEARCHABLE_COL_NAMES).
; Each hit in a field scores 1 point, unless otherwise specified otherwise here.
tags = 10
name = 5
name2 = 3
authors = 5
[Search aliases]
; This section defines search term aliases.
; For example, the entry "a = b ; c" means that searching for "a" will become "( a OR b OR c )".

@ -22,6 +22,7 @@ from asl_articles.utils import decode_tags, to_bool
_search_index_path = None
_search_aliases = {}
_search_weights = {}
_logger = logging.getLogger( "search" )
_SQLITE_FTS_SPECIAL_CHARS = "+-#':/.@$"
@ -252,15 +253,22 @@ def _do_fts_search( fts_query_string, col_names, results=None ): #pylint: disabl
# in the same thread they were created in.
with SearchDbConn() as dbconn:
# generate the search weights
weights = []
weights.append( 0.0 ) # nb: this is for the "owner" column
for col_name in _SEARCHABLE_COL_NAMES:
weights.append( _search_weights.get( col_name, 1.0 ) )
# run the search
hilites = [ "", "" ] if no_hilite else [ BEGIN_HILITE, END_HILITE ]
def highlight( n ):
return "highlight( searchable, {}, '{}', '{}' )".format(
n, hilites[0], hilites[1]
)
sql = "SELECT owner, rank, {}, {}, {}, {}, {}, {}, rating FROM searchable" \
sql = "SELECT owner, bm25(searchable,{}) AS rank, {}, {}, {}, {}, {}, {}, rating FROM searchable" \
" WHERE searchable MATCH ?" \
" ORDER BY rating DESC, rank".format(
",".join( str(w) for w in weights ),
highlight(1), highlight(2), highlight(3), highlight(4), highlight(5), highlight(6)
)
match = "{{ {} }}: {}".format(
@ -387,6 +395,8 @@ def init_search( session, logger ):
# NOTE: We would like to make "owner" the primary key, but FTS doesn't support primary keys
# (nor UNIQUE constraints), so we have to manage this manually :-(
# IMPORTANT: The column order is important here, since we use the column index to generate
# the bm25() clause when doing searches.
dbconn.conn.execute(
"CREATE VIRTUAL TABLE searchable USING fts5"
" ( owner, {}, rating, tokenize='porter unicode61' )".format(
@ -421,6 +431,19 @@ def init_search( session, logger ):
return []
_search_aliases = _load_search_aliases( get_section("Search aliases"), get_section("Search aliases 2") )
# load the search weights
_logger.debug( "Loading search weights:" )
global _search_weights
for row in get_section( "Search weights" ):
if row[0] not in _SEARCHABLE_COL_NAMES:
_logger.warning( "- Unknown search weight field: %s", row[0] )
continue
try:
_search_weights[ row[0] ] = float( row[1] )
_logger.debug( "- %s = %s", row[0], row[1] )
except ValueError:
_logger.warning( "- Invalid search weight for \"%s\": %s", row[0], row[1] )
def _load_search_aliases( aliases, aliases2 ):
"""Load the search aliases."""

Loading…
Cancel
Save