Handle quoted words in author names.

master
Pacman Ghost 2 years ago
parent 11c8f0dced
commit 51ff9e960b
  1. 14
      asl_articles/search.py
  2. 11
      asl_articles/tests/test_search.py

@ -410,20 +410,24 @@ def _make_fts_query_string( query_string, search_aliases ): #pylint: disable=too
if is_raw_query:
return [ val.strip() ]
tokens = []
DQUOTE_MARKER = "<!~!>"
for word in val.split():
# FUDGE! It's difficult to figure out if we have a multi-word quoted phrase when the query string
# contains nested quotes, so we hack around this by temporarily removing the inner quotes.
word = word.replace( '""', DQUOTE_MARKER )
if len(tokens) > 0:
if tokens[-1].startswith( '"' ) and not tokens[-1].endswith( '"' ):
# the previous token is a quoted phrase, continue it
# the previous token is a the start of a quoted phrase - continue it
tokens[-1] += " " + word
continue
if not tokens[-1].startswith( '"' ) and word.endswith( '"' ):
tokens.append( quote( word[:-1] ) )
continue
tokens.append( quote( word ) )
if len(tokens) > 0 and tokens[-1].startswith( '"' ) and not tokens[-1].endswith( '"' ):
# we have an unterminated quoted phrase, terminate it
tokens[-1] += '"'
return [ t for t in tokens if t ]
return [
t.replace( DQUOTE_MARKER, '""' )
for t in tokens if t
]
# split the query string into parts (alias replacement texts, and everything else)
parts, pos = [], 0

@ -588,6 +588,17 @@ def test_make_fts_query_string():
do_test( '"Mr. Jones"', '"Mr. Jones"' )
do_test( 'foo "Mr. Jones" bar', 'foo AND "Mr. Jones" AND bar' )
# test nested quoted phrases
# NOTE: This is important since searching for an author wraps their name in double quotes,
# so we need to be able to handle a quoted phrase (e.g. a nickname) within the name.
do_test( 'Joseph "Joey" Blow', 'Joseph AND "Joey" AND Blow' )
do_test( 'Joseph "Joey Joe" Blow', 'Joseph AND "Joey Joe" AND Blow' )
do_test( 'Joseph ""Joey"" Blow', 'Joseph AND ""Joey"" AND Blow' )
# NOTE: This one doesn't work properly, but no-one is going to be doing this :-/
# do_test( 'Joseph ""Joey Joe"" Blow', 'Joseph AND ""Joey Joe"" AND Blow' )
do_test( '"Joseph ""Joey"" Blow"', '"Joseph ""Joey"" Blow"' )
do_test( '"Joseph ""Joey Joe"" Blow"', '"Joseph ""Joey Joe"" Blow"' )
# test some incorrectly quoted phrases
do_test( '"', '' )
do_test( ' " " " ', '' )

Loading…
Cancel
Save