Allow author names to be aliased.

master
Pacman Ghost 4 years ago
parent 59f9c8ccd1
commit 6b44b8fd33
  1. 6
      asl_articles/articles.py
  2. 42
      asl_articles/config/author-aliases.cfg.example
  3. 4
      asl_articles/publications.py
  4. 4
      asl_articles/publishers.py
  5. 127
      asl_articles/search.py
  6. 3
      asl_articles/tests/fixtures/author-aliases.cfg
  7. 31
      asl_articles/tests/fixtures/author-aliases.json
  8. 34
      asl_articles/tests/test_search.py
  9. 16
      asl_articles/utils.py
  10. 1
      docker-compose.yml
  11. 20
      run-containers.sh

@ -99,7 +99,7 @@ def create_article():
_save_image( article, updated )
db.session.commit()
_logger.debug( "- New ID: %d", new_article_id )
search.add_or_update_article( None, article )
search.add_or_update_article( None, article, None )
# generate the response
extras = { "article_id": new_article_id }
@ -251,7 +251,7 @@ def update_article():
_save_scenarios( article, updated )
_save_image( article, updated )
db.session.commit()
search.add_or_update_article( None, article )
search.add_or_update_article( None, article, None )
# generate the response
extras = {}
@ -287,7 +287,7 @@ def update_article_rating():
abort( 404 )
article.article_rating = new_rating
db.session.commit()
search.add_or_update_article( None, article )
search.add_or_update_article( None, article, None )
return "OK"

@ -0,0 +1,42 @@
[Author aliases]
Andrew Hershey = Andrew H. Hershey
Andy Goldin = CPT Andy Goldin
Bob Medrow = Robert Medrow
Bruce Bakken = Bruce E. Bakken
Carl Fago = Carl D. Fago
Charlie Kibler = Charles Kibler
Chas Smith = Captain Chas Smith
Chris Doary = Chris "Clouseaux" Doary
Derek Tocher = Derek A. Tocher
Ed Beekman = Edward Beekman
Jeff Shields = Jeffrey Shields
Joe Suchar = Joseph Suchar
John Slotwinski = Dr. John Slotwinski
Jon Mishcon = M. J. Mishcon = M. Johnathon Mishcon
JR Van Mechelen = Jonathan Van Mechelen
Mark Nixon = Mark C. Nixon
Mark Walz = Mark S. Walz
Matt Cicero = Matthew Cicero
Matt Shostak = Matthew Shostak
Michael Dorosh = Michael A. Dorosh
Mike Clay = Dr. Michael Clay
Mike Conklin = Michael Conklin = Michael "6+3" Conklin
Mike Licari = Michael Licari = Michael J. Licari
Paul Venard = Paul J. Venard
Ray Tapio = Raymond J. Tapio
Rex Martin = Rex A. Martin
Robert Seulowitz = Dr. Rob Seulowitz
Robert Walden = Bob Walden
Rob Modarelli = Robert Modarelli = Captain Robert Modarelli III
Roy Connelly = Roy W. Connelly
Russ Bunten = Russell Bunten
Sam Rockwell = Samuel Rockwell
Scott Jackson = Scott "Stonewall" Jackson
Scott Thompson = Scott E. Thompson
Seth Fancher = Seth W. Fancher
Steve Linton = Steven Linton
Steve Pleva = Steven J. Pleva = Steve "Gor Gor" Pleva
Steve Swann = Steve C. Swann = Steven Swann = Steven C. Swann
Tom Huntington = Thomas Huntington
Trevor Edwards = Trev Edwards

@ -112,7 +112,7 @@ def create_publication():
_save_image( pub, updated )
db.session.commit()
_logger.debug( "- New ID: %d", pub.pub_id )
search.add_or_update_publication( None, pub )
search.add_or_update_publication( None, pub, None )
# generate the response
extras = { "pub_id": pub.pub_id }
@ -209,7 +209,7 @@ def update_publication():
pub_id, ", ".join(str(k) for k in articles)
)
db.session.commit()
search.add_or_update_publication( None, pub )
search.add_or_update_publication( None, pub, None )
# generate the response
extras = {}

@ -85,7 +85,7 @@ def create_publisher():
_save_image( publ, updated )
db.session.commit()
_logger.debug( "- New ID: %d", publ.publ_id )
search.add_or_update_publisher( None, publ )
search.add_or_update_publisher( None, publ, None )
# generate the response
extras = { "publ_id": publ.publ_id }
@ -139,7 +139,7 @@ def update_publisher():
vals[ "time_updated" ] = datetime.datetime.now()
apply_attrs( publ, vals )
db.session.commit()
search.add_or_update_publisher( None, publ )
search.add_or_update_publisher( None, publ, None )
# generate the response
extras = {}

@ -2,7 +2,6 @@
import os
import sqlite3
import configparser
import itertools
import random
import tempfile
@ -18,11 +17,12 @@ from asl_articles.models import Publisher, Publication, Article, Author, Scenari
from asl_articles.publishers import get_publisher_vals
from asl_articles.publications import get_publication_vals, get_publication_sort_key
from asl_articles.articles import get_article_vals, get_article_sort_key
from asl_articles.utils import decode_tags, to_bool
from asl_articles.utils import AppConfigParser, decode_tags, to_bool
_search_index_path = None
_search_aliases = {}
_search_weights = {}
_author_aliases = {}
_logger = logging.getLogger( "search" )
_SQLITE_FTS_SPECIAL_CHARS = "+-#':/.@$"
@ -67,17 +67,17 @@ class SearchDbConn:
# ---------------------------------------------------------------------
def _get_authors( article ):
def _get_authors( article, session ):
"""Return the searchable authors for an article."""
query = db.session.query( Author, ArticleAuthor ) \
query = (session or db.session).query( Author, ArticleAuthor ) \
.filter( ArticleAuthor.article_id == article.article_id ) \
.join( Author, ArticleAuthor.author_id == Author.author_id ) \
.order_by( ArticleAuthor.seq_no )
return "\n".join( a[0].author_name for a in query )
def _get_scenarios( article ):
def _get_scenarios( article, session ):
"""Return the searchable scenarios for an article."""
query = db.session.query( Scenario, ArticleScenario ) \
query = (session or db.session).query( Scenario, ArticleScenario ) \
.filter( ArticleScenario.article_id == article.article_id ) \
.join( Scenario, ArticleScenario.scenario_id == Scenario.scenario_id ) \
.order_by( ArticleScenario.seq_no )
@ -97,11 +97,11 @@ def _get_tags( tags ):
_FIELD_MAPPINGS = {
"publisher": { "name": "publ_name", "description": "publ_description" },
"publication": { "name": "pub_name", "description": "pub_description",
"tags": lambda pub: _get_tags( pub.pub_tags )
"tags": lambda pub,_: _get_tags( pub.pub_tags )
},
"article": { "name": "article_title", "name2": "article_subtitle", "description": "article_snippet",
"authors": _get_authors, "scenarios": _get_scenarios,
"tags": lambda article: _get_tags( article.article_tags ),
"tags": lambda article,_: _get_tags( article.article_tags ),
"rating": "article_rating"
}
}
@ -175,11 +175,19 @@ def search_article( article_id ):
@app.route( "/search/author/<author_id>", methods=["POST","GET"] )
def search_author( author_id ):
"""Search for an author."""
author = Author.query.get( author_id )
if not author:
try:
author_id = int( author_id )
except ValueError:
return jsonify( [] )
author_ids = _author_aliases.get( author_id, [author_id] )
authors = Author.query.filter( Author.author_id.in_( author_ids ) ).all()
if not authors:
return jsonify( [] )
author_name = '"{}"'.format( author.author_name.replace( '"', '""' ) )
return _do_search( author_name, [ "authors" ] )
author_names = [
'"{}"'.format( a.author_name.replace( '"', '""' ) )
for a in authors
]
return _do_search( " OR ".join(author_names), [ "authors" ] )
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
@ -402,6 +410,8 @@ def _create_aslrb_links( article ):
snippet = article[ "article_snippet!" ]
else:
snippet = article[ "article_snippet" ]
if not snippet:
return
def make_link( startpos, endpos, ruleid, caption ):
nonlocal snippet
@ -524,31 +534,28 @@ def init_search( session, logger ):
logger.debug( "Loading the search index..." )
logger.debug( "- Loading publishers." )
for publ in session.query( Publisher ).order_by( Publisher.time_created.desc() ):
add_or_update_publisher( dbconn, publ )
add_or_update_publisher( dbconn, publ, session )
logger.debug( "- Loading publications." )
for pub in session.query( Publication ).order_by( Publication.time_created.desc() ):
add_or_update_publication( dbconn, pub )
add_or_update_publication( dbconn, pub, session )
logger.debug( "- Loading articles." )
for article in session.query( Article ).order_by( Article.time_created.desc() ):
add_or_update_article( dbconn, article )
add_or_update_article( dbconn, article, session )
# load the search aliases
cfg = configparser.ConfigParser()
fname = os.path.join( asl_articles.config_dir, "app.cfg" )
_logger.debug( "Loading search aliases: %s", fname )
cfg.read( fname )
cfg = AppConfigParser( fname )
global _search_aliases
def get_section( section_name ):
try:
return cfg.items( section_name )
except configparser.NoSectionError:
return []
_search_aliases = _load_search_aliases( get_section("Search aliases"), get_section("Search aliases 2") )
_search_aliases = _load_search_aliases(
cfg.get_section( "Search aliases" ),
cfg.get_section( "Search aliases 2" )
)
# load the search weights
_logger.debug( "Loading search weights:" )
global _search_weights
for row in get_section( "Search weights" ):
for row in cfg.get_section( "Search weights" ):
if row[0] not in _SEARCHABLE_COL_NAMES:
asl_articles.startup.log_startup_msg( "warning",
"Unknown search weight field: {}", row[0],
@ -564,6 +571,25 @@ def init_search( session, logger ):
logger = _logger
)
# load the author aliases
# NOTE: These should really be stored in the database, but the UI would be so insanely hairy,
# we just keep them in a text file and let the user manage them manually :-/
global _author_aliases
fname = os.path.join( asl_articles.config_dir, "author-aliases.cfg" )
if os.path.isfile( fname ):
_logger.debug( "Loading author aliases: %s", fname )
cfg = AppConfigParser( fname )
_author_aliases = _load_author_aliases( cfg.get_section("Author aliases"), session, False )
# NOTE: We load the test aliases here as well (the test suite can't mock them because
# they might be running in a different process).
fname = os.path.join( os.path.split(__file__)[0], "tests/fixtures/author-aliases.cfg" )
if os.path.isfile( fname ):
_logger.debug( "Loading test author aliases: %s", fname )
cfg = AppConfigParser( fname )
_author_aliases.update(
_load_author_aliases( cfg.get_section("Author aliases"), session, True )
)
def _load_search_aliases( aliases, aliases2 ):
"""Load the search aliases."""
@ -595,33 +621,70 @@ def _load_search_aliases( aliases, aliases2 ):
return search_aliases
def _load_author_aliases( aliases, session, silent ):
"""Load the author aliases."""
# initialize
if not session:
session = db.session
# load the author aliases
author_aliases = {}
for row in aliases:
vals = itertools.chain( [row[0]], row[1].split("=") )
vals = [ v.strip() for v in vals ]
authors = []
for author_name in vals:
author = session.query( Author ).filter(
Author.author_name == author_name
).one_or_none()
if author:
authors.append( author )
else:
if not silent:
asl_articles.startup.log_startup_msg( "warning",
"Unknown author for alias: {}", author_name,
logger = _logger
)
if len(authors) <= 1:
continue
_logger.debug( "- %s", " ; ".join( str(a) for a in authors ) )
authors = [ a.author_id for a in authors ]
for author_id in authors:
author_aliases[ author_id ] = authors
return author_aliases
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
def add_or_update_publisher( dbconn, publ ):
def add_or_update_publisher( dbconn, publ, session ):
"""Add/update a publisher in the search index."""
_do_add_or_update_searchable( dbconn, "publisher",
_make_publisher_key(publ), publ
_make_publisher_key(publ), publ,
session
)
def add_or_update_publication( dbconn, pub ):
def add_or_update_publication( dbconn, pub, session ):
"""Add/update a publication in the search index."""
_do_add_or_update_searchable( dbconn, "publication",
_make_publication_key(pub.pub_id), pub
_make_publication_key(pub.pub_id), pub,
session
)
def add_or_update_article( dbconn, article ):
def add_or_update_article( dbconn, article, session ):
"""Add/update an article in the search index."""
_do_add_or_update_searchable( dbconn, "article",
_make_article_key(article.article_id), article
_make_article_key(article.article_id), article,
session
)
def _do_add_or_update_searchable( dbconn, owner_type, owner, obj ):
def _do_add_or_update_searchable( dbconn, owner_type, owner, obj, session ):
"""Add or update a record in the search index."""
# prepare the fields
fields = _FIELD_MAPPINGS[ owner_type ]
vals = {
f: getattr( obj,fields[f] ) if isinstance( fields[f], str ) else fields[f]( obj )
f: getattr( obj, fields[f] ) if isinstance( fields[f], str ) else fields[f]( obj, session )
for f in fields
}
# NOTE: We used to strip HTML here, but we prefer to see formatted content

@ -0,0 +1,3 @@
[Author aliases]
Chuck Jones = Charles M. Jones = Charles Martin Jones
Joe Blow = Joseph Blow

@ -0,0 +1,31 @@
{
"article": [
{ "article_id": 101, "article_title": "By Charles M. Jones" },
{ "article_id": 102, "article_title": "By Chuck Jones" },
{ "article_id": 103, "article_title": "By Charles Martin Jones" },
{ "article_id": 104, "article_title": "By Joseph Blow" },
{ "article_id": 105, "article_title": "By Joe Blow" },
{ "article_id": 106, "article_title": "By John Doe" }
],
"author": [
{ "author_id": 1, "author_name": "Charles M. Jones" },
{ "author_id": 2, "author_name": "Joseph Blow" },
{ "author_id": 3, "author_name": "Chuck Jones" },
{ "author_id": 4, "author_name": "Joe Blow" },
{ "author_id": 5, "author_name": "Charles Martin Jones" },
{ "author_id": 6, "author_name": "John Doe" }
],
"article_author": [
{ "seq_no": 1, "article_id": 101, "author_id": 1 },
{ "seq_no": 1, "article_id": 102, "author_id": 3 },
{ "seq_no": 1, "article_id": 103, "author_id": 5 },
{ "seq_no": 1, "article_id": 104, "author_id": 2 },
{ "seq_no": 1, "article_id": 105, "author_id": 4 },
{ "seq_no": 1, "article_id": 106, "author_id": 6 }
]
}

@ -506,6 +506,40 @@ def test_special_searches( webdriver, flask_app, dbconn ):
# ---------------------------------------------------------------------
def test_author_aliases( webdriver, flask_app, dbconn ):
"""Test author aliases."""
# initialize
# NOTE: We can't monkeypatch the author aliases table, since we might be talking to
# a remote Flask server not under our control (e.g. in a Docker container). Instead,
# we define the aliases we need in a test config file, which is always loaded.
init_tests( webdriver, flask_app, dbconn, fixtures="author-aliases.json" )
def do_test( author_names ):
# test each author in the alias group
expected = set( "By {}".format(a) for a in author_names )
for author_name in author_names:
# find the author's article
results = do_search( '"{}"'.format( author_name ) )
assert len(results) == 1
# click on the author's name
authors = find_children( ".author", results[0] )
assert len(authors) == 1
authors[0].click()
# check that we found all the articles by the aliased names
wait_for( 2, lambda: set( get_search_result_names() ) == expected )
# test author aliases
do_test( [ "Charles M. Jones", "Chuck Jones", "Charles Martin Jones" ] )
do_test( [ "Joseph Blow", "Joe Blow" ] )
do_test( [ "John Doe" ] )
# ---------------------------------------------------------------------
def test_make_fts_query_string():
"""Test generating FTS query strings."""

@ -1,6 +1,7 @@
""" Helper utilities. """
import re
import configparser
import typing
import itertools
import logging
@ -196,6 +197,21 @@ def decode_tags( tags ):
# ---------------------------------------------------------------------
class AppConfigParser():
"""Wrapper around the standard ConfigParser."""
def __init__( self, fname ):
self._configparser = configparser.ConfigParser()
self._configparser.optionxform = str # preserve case for the keys :-/
self._configparser.read( fname )
def get_section( self, section_name ):
"""Read a section from the config."""
try:
return self._configparser.items( section_name )
except configparser.NoSectionError:
return []
# ---------------------------------------------------------------------
def apply_attrs( obj, vals ):
"""Update an object's attributes."""
for k,v in vals.items():

@ -35,6 +35,7 @@ services:
volumes:
- $SQLITE:/data/sqlite.db
- $EXTERNAL_DOCS_BASEDIR:/data/docs/
- $AUTHOR_ALIASES:/app/asl_articles/config/author-aliases.cfg
environment:
- DBCONN
- EXTERNAL_DOCS_BASEDIR

@ -14,6 +14,7 @@ function print_help {
echo " Note that the database server address is relative to the container i.e. NOT \"localhost\"."
echo " -e --extdocs Base directory for external documents (to allow articles to link to them)."
echo " -r --aslrb Base URL for an eASLRB."
echo " -a --author-aliases Author aliases config file (see config/author-aliases.cfg.example)."
echo
echo " The TAG env variable can also be set to specify which containers to run e.g."
echo " TAG=testing `basename "$0"` /tmp/asl-articles.db"
@ -28,6 +29,7 @@ export DBCONN=
export SQLITE=
export EXTERNAL_DOCS_BASEDIR=
export ASLRB_BASE_URL=
export AUTHOR_ALIASES=
export ENABLE_TESTS=
# parse the command-line arguments
@ -35,7 +37,7 @@ if [ $# -eq 0 ]; then
print_help
exit 0
fi
params="$(getopt -o t:d:e:r:h -l tag:,dbconn:,extdocs:,aslrb:,help --name "$0" -- "$@")"
params="$(getopt -o t:d:e:r:a:h -l tag:,dbconn:,extdocs:,aslrb:,author-aliases:,help --name "$0" -- "$@")"
if [ $? -ne 0 ]; then exit 1; fi
eval set -- "$params"
while true; do
@ -52,6 +54,9 @@ while true; do
-r | --aslrb )
ASLRB_BASE_URL=$2
shift 2 ;;
-a | --author-aliases )
AUTHOR_ALIASES=$2
shift 2 ;;
-h | --help )
print_help
exit 0 ;;
@ -90,7 +95,7 @@ fi
# check the external documents directory
if [ -n "$EXTERNAL_DOCS_BASEDIR" ]; then
if [ ! -d "$EXTERNAL_DOCS_BASEDIR" ]; then
echo "Invalid document base directory: $EXTERNAL_DOCS_BASEDIR"
echo "Can't find the document base directory: $EXTERNAL_DOCS_BASEDIR"
exit 1
fi
else
@ -98,6 +103,17 @@ else
EXTERNAL_DOCS_BASEDIR=/dev/null
fi
# check the author aliases
if [ -n "$AUTHOR_ALIASES" ]; then
if [ ! -f "$AUTHOR_ALIASES" ]; then
echo "Can't find the author aliases config file: $AUTHOR_ALIASES"
exit 1
fi
else
# FUDGE! This needs to be set, even if it's not being used :-/
AUTHOR_ALIASES=/dev/null
fi
# build the containers
echo Building the \"$TAG\" containers...
docker-compose build --build-arg ENABLE_TESTS=$ENABLE_TESTS 2>&1 \

Loading…
Cancel
Save