Added search aliases.

master
Pacman Ghost 4 years ago
parent bf8549f861
commit b76fa3cd84
  1. 9
      asl_articles/config/app.cfg
  2. 43
      asl_articles/search.py
  3. 14
      asl_articles/tests/fixtures/search.json
  4. 26
      asl_articles/tests/test_search.py

@ -4,3 +4,12 @@
; https://github.com/lxml/lxml/blob/master/src/lxml/html/defs.py
;HTML_ATTR_WHITELIST =
;HTML_TAG_WHITELIST =
[Search aliases]
asl = Advanced Squad Leader
mmp = Multi-Man Publishing ; Multiman Publishing
ah = Avalon Hill
vftt = View From The Trenches
dftb = Dispatches From The Bunker
ch = Critical Hit
aslj = ASL Journal

@ -2,12 +2,15 @@
import os
import sqlite3
import configparser
import itertools
import tempfile
import re
import logging
from flask import request, jsonify
import asl_articles
from asl_articles import app, db
from asl_articles.models import Publisher, Publication, Article, Author, Scenario, get_model_from_table_name
from asl_articles.publishers import get_publisher_vals
@ -16,6 +19,7 @@ from asl_articles.articles import get_article_vals
from asl_articles.utils import decode_tags, to_bool
_search_index_path = None
_search_aliases = {}
_logger = logging.getLogger( "search" )
_SQLITE_FTS_SPECIAL_CHARS = "+-#':/."
@ -143,7 +147,7 @@ def _do_search(): #pylint: disable=too-many-locals,too-many-statements,too-many-
return jsonify( results )
# prepare the query
fts_query_string = _make_fts_query_string( query_string )
fts_query_string = _make_fts_query_string( query_string, _search_aliases )
_logger.debug( "FTS query string: %s", fts_query_string )
# NOTE: We would like to cache the connection, but SQLite connections can only be used
@ -201,7 +205,7 @@ def _do_search(): #pylint: disable=too-many-locals,too-many-statements,too-many-
return jsonify( results )
def _make_fts_query_string( query_string ):
def _make_fts_query_string( query_string, search_aliases ):
"""Generate the SQLite query string."""
# check if this looks like a raw FTS query
@ -235,6 +239,19 @@ def _make_fts_query_string( query_string ):
return '"{}"'.format(word) if has_special_char(word) else word
words = [ quote_word(w) for w in words ]
# handle search aliases
for i,word in enumerate(words):
word = word.lower()
if word.startswith( '"' ) and word.endswith( '"' ):
word = word[1:-1]
aliases = search_aliases.get( word )
if aliases:
aliases = [ quote_word( a ) for a in aliases ]
aliases.sort() # nb: so that tests will work reliably
words[i] = "({})".format(
" OR ".join( aliases )
)
# escape any special characters
words = [ w.replace("'","''") for w in words ]
@ -280,6 +297,28 @@ def init_search( session, logger ):
for article in session.query( Article ):
add_or_update_article( dbconn, article )
# load the search aliases
cfg = configparser.ConfigParser()
fname = os.path.join( asl_articles.config_dir, "app.cfg" )
_logger.debug( "Loading search aliases: %s", fname )
cfg.read( fname )
global _search_aliases
_search_aliases = _load_search_aliases( cfg.items( "Search aliases" ) )
def _load_search_aliases( aliases ):
"""Load the search aliases."""
search_aliases = {}
for row in aliases:
vals = itertools.chain( [row[0]], row[1].split(";") )
vals = [ v.strip().lower() for v in vals ]
_logger.debug( "- %s", vals )
for v in vals:
if v in search_aliases:
_logger.warning( "Found duplicate search alias: %s", v )
continue
search_aliases[ v ] = vals
return search_aliases
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
def add_or_update_publisher( dbconn, publ ):

@ -16,20 +16,20 @@
{ "pub_id": 10,
"pub_name": "ASL Journal",
"pub_edition": 4,
"pub_tags": "aslj",
"pub_tags": "#aslj",
"publ_id": 1
},
{ "pub_id": 11,
"pub_name": "ASL Journal",
"pub_edition": 5,
"pub_tags": "aslj",
"pub_tags": "#aslj",
"publ_id": 1
},
{ "pub_id": 12,
"pub_name": "View From The Trenches",
"pub_edition": 100,
"pub_description": "Fantastic 100th issue!",
"pub_tags": "vftt",
"pub_tags": "#vftt",
"publ_id": 2
}
],
@ -39,27 +39,27 @@
"article_title": "Hit 'Em High, Or Hit 'Em Low",
"article_subtitle": "Some things about light mortars you might like to know",
"article_snippet": "Light mortars in ASL can be game winners depending on what they can shoot at, how low you roll and how often you get rate.",
"article_tags": "aslj\nmortars",
"article_tags": "#aslj\n#mortars",
"pub_id": 10
},
{ "article_id": 501,
"article_title": "'Bolts From Above",
"article_snippet": "Infantry often found itself battling the elements as well as the enemy. ASL has made provisions for the inclusion of inclement weather conditions, such as rain and snow.",
"article_tags": "aslj\nweather",
"article_tags": "#aslj\n#weather",
"pub_id": 10
},
{ "article_id": 510,
"article_title": "The Jungle Isn't Neutral",
"article_subtitle": "Up close and personal in the PTO",
"article_snippet": "British Lieutenant Colonel F. Spencer Chapman wrote a memoir of jungle fighting in Malaysia titled \"The Jungle Is Neutral.\"",
"article_tags": "aslj\nPTO",
"article_tags": "#aslj\n#PTO",
"pub_id": 11
},
{ "article_id": 511,
"article_title": "Hunting DUKWs and Buffalos",
"article_subtitle": "Scenario Analysis: HS17 \"Water Foul\"",
"article_snippet": "This scenario features a late-war Canadian assault on a German-occupied flooded town - an unusual tactical challenge in ASL.",
"article_tags": "aslj",
"article_tags": "#aslj",
"pub_id": 11
},
{ "article_id": 520,

@ -1,6 +1,6 @@
""" Test search operations. """
from asl_articles.search import _make_fts_query_string
from asl_articles.search import _load_search_aliases, _make_fts_query_string
from asl_articles.tests.test_publishers import create_publisher, edit_publisher
from asl_articles.tests.test_publications import create_publication, edit_publication
@ -135,11 +135,11 @@ def test_search_tags( webdriver, flask_app, dbconn ):
init_tests( webdriver, flask_app, dbconn, fixtures="search.json" )
# search for some publication tags
_do_test_search( "vftt", ["View From The Trenches (100)"] )
_do_test_search( "#vftt", ["View From The Trenches (100)"] )
# search for some article tags
_do_test_search( "pto", ["The Jungle Isn't Neutral"] )
_do_test_search( "aslj", [
_do_test_search( "#aslj", [
"ASL Journal (4)", "ASL Journal (5)",
"'Bolts From Above", "The Jungle Isn't Neutral", "Hunting DUKWs and Buffalos", "Hit 'Em High, Or Hit 'Em Low"
] )
@ -168,7 +168,7 @@ def test_multiple_search_results( webdriver, flask_app, dbconn ):
init_tests( webdriver, flask_app, dbconn, fixtures="search.json" )
# do a search
_do_test_search( "asl", [
_do_test_search( "#asl", [
"View From The Trenches",
"ASL Journal (4)", "ASL Journal (5)",
"Hunting DUKWs and Buffalos", "'Bolts From Above", "Hit 'Em High, Or Hit 'Em Low"
@ -217,7 +217,7 @@ def test_highlighting( webdriver, flask_app, dbconn ):
"View From The Trenches (100)",
["View"], ["Fantastic"], []
)
check_publication_highlights( "vftt",
check_publication_highlights( "#vftt",
"View From The Trenches (100)",
[], [], ["vftt"]
)
@ -274,8 +274,13 @@ def test_highlighting( webdriver, flask_app, dbconn ):
def test_make_fts_query_string():
"""Test generating FTS query strings."""
# initialize
search_aliases = _load_search_aliases( [
( "mmp", "Multi-Man Publishing ; Multiman Publishing" )
] )
def do_test( query, expected ):
assert _make_fts_query_string(query) == expected
assert _make_fts_query_string( query, search_aliases ) == expected
# test some query strings
do_test( "", "" )
@ -327,6 +332,15 @@ def test_make_fts_query_string():
do_test( "foo OR bar", "foo OR bar" )
do_test( "(a OR b)", "(a OR b)" )
# test search aliases
do_test( "MMP", '("multi-man publishing" OR "multiman publishing" OR mmp)' )
do_test( "Xmmp", "Xmmp" )
do_test( "mmpX", "mmpX" )
do_test( "multi-man publishing", '"multi-man" AND publishing' )
do_test( 'abc "multi-man publishing" xyz',
'abc AND ("multi-man publishing" OR "multiman publishing" OR mmp) AND xyz'
)
# ---------------------------------------------------------------------
def _do_test_search( query, expected ):

Loading…
Cancel
Save