Added integration with an eASLRB.

master
Pacman Ghost 4 years ago
parent 64141166f1
commit 922479ff57
  1. 3
      asl_articles/config/site.cfg.example
  2. 124
      asl_articles/search.py
  3. 54
      asl_articles/tests/test_search.py
  4. 3
      docker-compose.yml
  5. 7
      run-containers.sh
  6. 1
      web/src/SearchResults.css

@ -7,3 +7,6 @@ DB_CONN_STRING = ...
; Base directory for external documents.
EXTERNAL_DOCS_BASEDIR = ...
; Base URL for the eASLRB.
ASLRB_BASE_URL = ...

@ -27,6 +27,7 @@ _logger = logging.getLogger( "search" )
_SQLITE_FTS_SPECIAL_CHARS = "+-#':/.@$"
# NOTE: The column order defined here is important, since we have to access row results by column index.
_SEARCHABLE_COL_NAMES = [ "name", "name2", "description", "authors", "scenarios", "tags" ]
_get_publisher_vals = lambda p: get_publisher_vals( p, True )
@ -147,7 +148,9 @@ def search_publication( pub_id ):
results = [ get_publication_vals( pub, True, True ) ]
articles = sorted( pub.articles, key=get_article_sort_key )
for article in articles:
results.append( get_article_vals( article, True ) )
article = get_article_vals( article, True )
_create_aslrb_links( article )
results.append( article )
return jsonify( results )
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
@ -158,9 +161,11 @@ def search_article( article_id ):
article = Article.query.get( article_id )
if not article:
return jsonify( [] )
results = [ get_article_vals( article, True ) ]
if article.pub_id:
pub = Publication.query.get( article.pub_id )
article = get_article_vals( article, True )
_create_aslrb_links( article )
results = [ article ]
if article["pub_id"]:
pub = Publication.query.get( article["pub_id"] )
if pub:
results.append( get_publication_vals( pub, True, True ) )
return jsonify( results )
@ -293,7 +298,8 @@ def _do_fts_search( fts_query_string, col_names, results=None ): #pylint: disabl
# return highlighted versions of the content to the caller
fields = _FIELD_MAPPINGS[ owner_type ]
for col_no,col_name in enumerate(["name","name2","description"]):
assert _SEARCHABLE_COL_NAMES[:3] == [ "name", "name2", "description" ]
for col_no,col_name in enumerate(_SEARCHABLE_COL_NAMES[:3]):
field = fields.get( col_name )
if not field:
continue
@ -309,6 +315,10 @@ def _do_fts_search( fts_query_string, col_names, results=None ): #pylint: disabl
if row[7] and BEGIN_HILITE in row[7]:
result[ "tags!" ] = row[7].split( "\n" )
# create links to the eASLRB
if owner_type == "article":
_create_aslrb_links( result )
# add the result to the list
results.append( result )
@ -374,6 +384,110 @@ def _make_fts_query_string( query_string, search_aliases ):
# ---------------------------------------------------------------------
# regex's that specify what a ruleid looks like
_RULEID_REGEXES = [
re.compile( r"\b[A-Z]\d{0,3}\.\d{1,5}[A-Za-z]?\b" ),
# nb: while there are ruleid's like "C5", it's far more likely this is referring to a hex :-/
#re.compile( r"\b[A-Z]\d{1,4}[A-Za-z]?\b" ),
]
def _create_aslrb_links( article ):
"""Create links to the ASLRB for ruleid's."""
# initialize
base_url = app.config.get( "ASLRB_BASE_URL", os.environ.get("ASLRB_BASE_URL") )
if not base_url:
return
if "article_snippet!" in article:
snippet = article[ "article_snippet!" ]
else:
snippet = article[ "article_snippet" ]
def make_link( startpos, endpos, ruleid, caption ):
nonlocal snippet
if ruleid:
link = "<a href='{}#{}' class='aslrb' target='_blank'>{}</a>".format(
base_url, ruleid, caption
)
snippet = snippet[:startpos] + link + snippet[endpos:]
else:
# NOTE: We can get here when a manually-created link has no ruleid e.g. because the content
# contains something that is incorrectly being detected as a ruleid, and the user has fixed it up.
snippet = snippet[:startpos] + caption + snippet[endpos:]
# find ruleid's in the snippet and replace them with links to the ASLRB
matches = _find_aslrb_ruleids( snippet )
for match in reversed(matches):
startpos, endpos, ruleid, caption = match
make_link( startpos, endpos, ruleid, caption )
article[ "article_snippet!" ] = snippet
def _find_aslrb_ruleids( val ): #pylint: disable=too-many-branches
"""Find ruleid's."""
# locate any manually-created links; format is "{:ruleid|caption:}"
# NOTE: The ruleid is optional, so that if something is incorrectly being detected as a ruleid,
# the user can disable the link by creating one of these with no ruleid.
manual = list( re.finditer( r"{:(.*?)\|(.+?):}", val ) )
def is_manual( target ):
return any(
target.start() >= mo.start() and target.end() <= mo.end()
for mo in manual
)
# look for ruleid's
matches = []
for regex in _RULEID_REGEXES:
for mo in regex.finditer( val ):
if is_manual( mo ):
continue # nb: ignore any ruleid's that are part of a manually-created link
matches.append( mo )
# FUDGE! Remove overlapping matches e.g. if we have "B1.23", we will have matches for "B1" and "B1.23".
matches2, prev_mo = [], None
matches.sort( key=lambda mo: mo.start() )
for mo in matches:
if prev_mo and mo.start() == prev_mo.start() and len(mo.group()) < len(prev_mo.group()):
continue
matches2.append( mo )
prev_mo = mo
# extract the start/end positions of each match, ruleid and caption
matches = [
[ mo.start(), mo.end(), mo.group(), mo.group() ]
for mo in matches2
]
# NOTE: If we have something like "C1.23-.45", we want to link to "C1.23",
# but have the <a> tag wrap the whole thing.
# NOTE: This won't work if the user searched for "C1.23", since it will be wrapped
# in a highlight <span>.
for match in matches:
endpos = match[1]
if endpos == len(val) or val[endpos] != "-":
continue
nchars, allow_dot = 1, True
while endpos + nchars < len(val):
ch = val[ endpos + nchars ]
if ch.isdigit():
nchars += 1
elif ch == "." and allow_dot:
nchars += 1
allow_dot = False
else:
break
if nchars > 1:
match[1] += nchars
match[3] = val[ match[0] : match[1] ]
# add any manually-created links
for mo in manual:
matches.append( [ mo.start(), mo.end(), mo.group(1), mo.group(2) ] )
return sorted( matches, key=lambda m: m[0] )
# ---------------------------------------------------------------------
def init_search( session, logger ):
"""Initialize the search engine."""

@ -1,6 +1,6 @@
""" Test search operations. """
from asl_articles.search import _load_search_aliases, _make_fts_query_string
from asl_articles.search import _load_search_aliases, _make_fts_query_string, _find_aslrb_ruleids
from asl_articles.search import SEARCH_ALL
from asl_articles.tests.test_publishers import create_publisher, edit_publisher
@ -584,6 +584,58 @@ def test_make_fts_query_string():
# ---------------------------------------------------------------------
def test_aslrb_links():
"""Test creating links to the ASLRB."""
def do_test( snippet, expected ):
matches = _find_aslrb_ruleids( snippet )
if expected:
assert len(matches) == len(expected)
for match,exp in zip(matches,expected):
startpos, endpos, ruleid, caption = match
if isinstance( exp, str ):
assert exp == ruleid == caption
assert exp == snippet[ startpos : endpos ]
else:
assert isinstance( exp, tuple )
assert exp[0] == ruleid
assert exp[1] == caption
else:
assert matches == []
# test detecting ruleid's
do_test( "A1.23", ["A1.23"] )
do_test( " A1.23 ", ["A1.23"] )
do_test( ".A1.23,", ["A1.23"] )
do_test( "xA1.23,", None )
do_test( "A1.23 B.4 C5. D6", ["A1.23","B.4"] )
do_test( "A1.23 B.4,C5.;D6", ["A1.23","B.4"] )
# test ruleid ranges
do_test( "A1.23-", ["A1.23"] )
do_test( "A1.23-4", [ ("A1.23","A1.23-4") ] )
do_test( "A1.23-45", [ ("A1.23","A1.23-45") ] )
do_test( "A1.23-.6", [ ("A1.23","A1.23-.6") ] )
do_test( "A1.23-.6.7", [ ("A1.23","A1.23-.6") ] )
# test manually created links
do_test( "A1.23 Z9.99",
[ "A1.23", "Z9.99" ]
)
do_test( "A1.23 {:D5.6|foo:} Z9.99",
[ "A1.23", ("D5.6","foo"), "Z9.99" ]
)
do_test( "A1.23 {:|foo:} Z9.99",
[ "A1.23", ("","foo"), "Z9.99" ]
)
# NOTE: Because the following manual link has no caption, it won't get detected as a manual link,
# and so the ruleid is detected as a normal ruleid.
do_test( "A1.23 {:D5.6|:} Z9.99",
[ "A1.23", "D5.6", "Z9.99" ]
)
# ---------------------------------------------------------------------
def _do_test_search( query, expected ):
"""Run a search and check the results."""
results = do_search( query )

@ -12,6 +12,8 @@
# Similarly, EXTERNAL_DOCS_BASEDIR is the base directory for external documents that we want to link to,
# but it needs to be set even if it's not being used :-/
#
## Similary, ASLRB_BASE_URL is the base URL for an external eASLRB.
#
# See the run-containers.sh script that manages all of this.
version: "3"
@ -36,3 +38,4 @@ services:
environment:
- DBCONN
- EXTERNAL_DOCS_BASEDIR
- ASLRB_BASE_URL

@ -3,7 +3,7 @@
# parse the command-line arguments
if [ -z "$1" ]; then
echo "Usage: `basename "$0"` <db-conn> <external-docs>"
echo "Usage: `basename "$0"` <db-conn> <external-docs> <aslrb-url>"
echo " Build and launch the \"asl-articles\" containers, using the specified database e.g."
echo " ~/asl-articles.db (path to a SQLite database)"
echo " postgresql://USER:PASS@host/dbname (database connection string)"
@ -11,6 +11,8 @@ if [ -z "$1" ]; then
echo
echo " If you want link articles to their original documents, specify a base directory for the documents."
echo
echo " If you want to have links to an eASLRB, specify its base URL."
echo
echo " The TAG env variable can also be set to specify which containers to run e.g."
echo " TAG=testing ./run.sh /tmp/asl-articles.db"
exit 0
@ -35,6 +37,9 @@ else
# FUDGE! This needs to be set, even if it's not being used :-/
export EXTERNAL_DOCS_BASEDIR=/dev/null
fi
if [ ! -z "$3" ]; then
export ASLRB_BASE_URL=$3
fi
# initialize
if [ "$TAG" == "testing" ]; then

@ -29,6 +29,7 @@
.search-result .content { padding: 2px 5px ; font-size: 90% ; }
.search-result .content p:not(:first-child) { margin-top: 0.25em ; }
.search-result .content ul p, .search-result .content ol p { margin-top: 0.1em ; }
.search-result .content a.aslrb { color: #804040 ; text-decoration: none ; border-bottom: 1px dotted #804040 ; }
.search-result .content .image { float: left ; margin: 0.25em 0.5em 0.5em 0 ; max-height: 8em ; max-width: 6em ; }
.search-result .content .collapsible { margin-top:0.5em ; font-size: 90% ; color: #333 ; }
.search-result .content .collapsible a { color: #333 ; text-decoration: none ; }

Loading…
Cancel
Save