From 1963558ba97d7080902403eded5ce890e1e99516 Mon Sep 17 00:00:00 2001 From: Taka Date: Sat, 14 Nov 2020 17:07:16 +1100 Subject: [PATCH] Download the scenario index files continuously. --- vasl_templates/webapp/downloads.py | 134 ++++++++++++---------- vasl_templates/webapp/scenarios.py | 49 +++++--- vasl_templates/webapp/static/roar.js | 38 +++--- vasl_templates/webapp/static/scenarios.js | 60 +++++++--- 4 files changed, 170 insertions(+), 111 deletions(-) diff --git a/vasl_templates/webapp/downloads.py b/vasl_templates/webapp/downloads.py index 53b7c45..57e63d4 100644 --- a/vasl_templates/webapp/downloads.py +++ b/vasl_templates/webapp/downloads.py @@ -7,6 +7,7 @@ import os import threading import json import urllib.request +import urllib.error import time import datetime import tempfile @@ -18,6 +19,8 @@ from vasl_templates.webapp.utils import parse_int _registry = set() _logger = logging.getLogger( "downloads" ) +_etags = {} + # --------------------------------------------------------------------- class DownloadedFile: @@ -92,68 +95,75 @@ class DownloadedFile: """Download fresh copies of each file.""" #pylint: disable=protected-access - # process each DownloadedFile - for df in _registry: - - # check if we should simulate slow downloads - delay = parse_int( app.config.get( "DOWNLOADED_FILES_DELAY" ) ) - if delay: - _logger.debug( "Simulating a slow download for the %s file: delay=%s", df.key, delay ) - time.sleep( delay ) - - # get the download URL - url = app.config.get( "{}_DOWNLOAD_URL".format( df.key.upper() ), df.url ) - if os.path.isfile( url ): - # read the data directly from a file (for debugging porpoises) - _logger.info( "Loading the %s data directly from a file: %s", df.key, url ) - df._set_data( url ) - continue - - # check if we have a cached copy of the file - ttl = parse_int( app.config.get( "{}_DOWNLOAD_CACHE_TTL".format( df.key ), df.ttl ), 24 ) - if ttl <= 0: - _logger.info( "Download of the %s file has been disabled.", df.key ) - continue - ttl *= 60*60 - if os.path.isfile( df.cache_fname ): - # yup - check how long ago it was downloaded - mtime = os.path.getmtime( df.cache_fname ) - age = int( time.time() - mtime ) - _logger.debug( "Checking the cached %s file: age=%s, ttl=%s (mtime=%s)", - df.key, - datetime.timedelta( seconds=age ), - datetime.timedelta( seconds=ttl ), - time.strftime( "%Y-%m-%d %H:%M:%S", time.localtime(mtime) ) - ) - if age < ttl: + # loop forever (until the program exits) + while True: + + # process each DownloadedFile + # NOTE: The DownloadedFile registry is built once at startup, so we don't need to lock it. + for df in _registry: + + # get the download URL + url = app.config.get( "{}_DOWNLOAD_URL".format( df.key.upper() ), df.url ) + if os.path.isfile( url ): + # read the data directly from a file (for debugging porpoises) + _logger.info( "Loading the %s data directly from a file: %s", df.key, url ) + df._set_data( url ) continue - # download the file - _logger.info( "Downloading the %s file: %s", df.key, url ) - try: - req = urllib.request.Request( url, + # check if we have a cached copy of the file + ttl = parse_int( app.config.get( "{}_DOWNLOAD_CACHE_TTL".format( df.key ), df.ttl ), 24 ) + if ttl <= 0: + _logger.info( "Download of the %s file has been disabled.", df.key ) + continue + ttl *= 60*60 + if os.path.isfile( df.cache_fname ): + # yup - check how long ago it was downloaded + mtime = os.path.getmtime( df.cache_fname ) + age = int( time.time() - mtime ) + _logger.debug( "Checking the cached %s file: age=%s, ttl=%s (mtime=%s)", + df.key, + datetime.timedelta( seconds=age ), + datetime.timedelta( seconds=ttl ), + time.strftime( "%Y-%m-%d %H:%M:%S", time.localtime(mtime) ) + ) + if age < ttl: + continue + + # download the file + _logger.info( "Downloading the %s file: %s", df.key, url ) + try: headers = { "Accept-Encoding": "gzip, deflate" } - ) - fp = urllib.request.urlopen( req ) - data = fp.read().decode( "utf-8" ) - except Exception as ex: #pylint: disable=broad-except - msg = str( getattr(ex,"reason",None) or ex ) - _logger.error( "Can't download the %s file: %s", df.key, msg ) - df.error_msg = msg - continue - _logger.info( "Downloaded the %s file OK: %d bytes", df.key, len(data) ) - - # install the new data - df._set_data( data ) - # NOTE: We only need to worry about thread-safety because a fresh copy of the file is downloaded - # while the old one is in use, but because downloads are only done once at startup, once we get here, - # we could delete the lock and allow unfettered access to the underlying data (since it's all - # going to be read-only). - # For simplicty, we leave the lock in place. It will slow things down a bit, since we will be - # serializing access to the data (unnecessarily, because it's all read-only) but none of the code - # is performance-critical and we can probably live it. - - # save a cached copy of the data - _logger.debug( "Saving a cached copy of the %s file: %s", df.key, df.cache_fname ) - with open( df.cache_fname, "w", encoding="utf-8" ) as fp: - fp.write( data ) + if url in _etags: + _logger.debug( "- If-None-Match = %s", _etags[url] ) + headers[ "If-None-Match" ] = _etags[ url ] + req = urllib.request.Request( url, headers=headers ) + resp = urllib.request.urlopen( req ) + data = resp.read().decode( "utf-8" ) + etag = resp.headers.get( "ETag" ) + _logger.info( "Downloaded the %s file OK: %d bytes", df.key, len(data) ) + if etag: + _logger.debug( "- Got etag: %s", etag ) + _etags[ url ] = etag + except Exception as ex: #pylint: disable=broad-except + if isinstance( ex, urllib.error.HTTPError ) and ex.code == 304: #pylint: disable=no-member + _logger.info( "Download %s file: 304 Not Modified", df.key ) + if os.path.isfile( df.cache_fname ): + # NOTE: We touch the file so that the TTL check will work the next time around. + os.utime( df.cache_fname ) + continue + msg = str( getattr(ex,"reason",None) or ex ) + _logger.error( "Can't download the %s file: %s", df.key, msg ) + df.error_msg = msg + continue + + # install the new data + df._set_data( data ) + + # save a cached copy of the data + _logger.debug( "Saving a cached copy of the %s file: %s", df.key, df.cache_fname ) + with open( df.cache_fname, "w", encoding="utf-8" ) as fp: + fp.write( data ) + + # sleep before looping back and doing it all again + delay = parse_int( app.config.get( "DOWNLOAD_CHECK_INTERVAL" ), 2 ) + time.sleep( delay * 60*60 ) diff --git a/vasl_templates/webapp/scenarios.py b/vasl_templates/webapp/scenarios.py index ec75aa7..2a97ad1 100644 --- a/vasl_templates/webapp/scenarios.py +++ b/vasl_templates/webapp/scenarios.py @@ -10,9 +10,10 @@ import base64 import re import time import math +import hashlib import logging -from flask import request, render_template, jsonify, abort +from flask import request, render_template, make_response, jsonify, abort from PIL import Image, ImageOps from vasl_templates.webapp import app @@ -26,15 +27,18 @@ from vasl_templates.webapp.utils import TempFile, \ def _build_asa_scenario_index( df, new_data, logger ): """Build the ASL Scenario Archive index.""" - df.index = { + # parse the scenario index + index = { scenario["scenario_id"]: scenario for scenario in new_data["scenarios"] } + # install the results + df.index = index if logger: logger.debug( "Loaded the ASL Secenario Archive index: #scenarios=%d", len(df.index) ) logger.debug( "- Generated at: %s", new_data.get( "_generatedAt_", "n/a" ) ) -_asa_scenarios = DownloadedFile( "ASA", 1*24, +_asa_scenarios = DownloadedFile( "ASA", 6, # nb: TTL = #hours "asl-scenario-archive.json", "https://vasl-templates.org/services/asl-scenario-archive/scenario-index.json", _build_asa_scenario_index, @@ -45,14 +49,17 @@ _asa_scenarios = DownloadedFile( "ASA", 1*24, def _build_roar_scenario_index( df, new_data, logger ): """Build the ROAR scenario index.""" - df.index, df.title_matching, df.id_matching = {}, {}, {} + # parse the scenario index + index, title_matching, id_matching = {}, {}, {} for roar_id,scenario in new_data.items(): if roar_id.startswith( "_" ): continue scenario[ "roar_id" ] = roar_id - df.index[ roar_id ] = scenario - _update_roar_matching_index( df.title_matching, scenario.get("name"), roar_id ) - _update_roar_matching_index( df.id_matching, scenario.get("scenario_id"), roar_id ) + index[ roar_id ] = scenario + _update_roar_matching_index( title_matching, scenario.get("name"), roar_id ) + _update_roar_matching_index( id_matching, scenario.get("scenario_id"), roar_id ) + # install the results + df.index, df.title_matching, df.id_matching = index, title_matching, id_matching if logger: logger.debug( "Loaded the ROAR scenario index: #scenarios=%d", len(df.index) ) logger.debug( "- Generated at: %s", new_data.get( "_generatedAt_", "n/a" ) ) @@ -74,7 +81,7 @@ def _make_roar_matching_key( val ): return val return re.sub( "[^a-z0-9]", "", val.lower() ) -_roar_scenarios = DownloadedFile( "ROAR", 1*24, +_roar_scenarios = DownloadedFile( "ROAR", 6, # nb: TTL = #hours "roar-scenario-index.json", "https://vasl-templates.org/services/roar/scenario-index.json", _build_roar_scenario_index, @@ -115,10 +122,15 @@ def get_scenario_index(): return _make_not_available_response( "Please wait, the scenario index is still downloading.", None ) - return jsonify( [ + etag = hashlib.md5( json.dumps( _asa_scenarios.index ).encode( "utf-8" ) ).hexdigest() + if request.headers.get( "If-None-Match" ) == etag: + return "Not Modified", 304 + resp = make_response( jsonify( [ make_entry( scenario ) for scenario in _asa_scenarios.index.values() - ] ) + ] ) ) + resp.headers["ETag"] = etag + return resp @app.route( "/roar/scenario-index" ) def get_roar_scenario_index(): @@ -133,7 +145,12 @@ def get_roar_scenario_index(): return _make_not_available_response( "Please wait, the ROAR scenarios are still downloading.", None ) - return jsonify( _roar_scenarios.index ) + etag = hashlib.md5( json.dumps( _roar_scenarios.index ).encode( "utf-8" ) ).hexdigest() + if request.headers.get( "If-None-Match" ) == etag: + return "Not Modified", 304 + resp = make_response( jsonify( _roar_scenarios.index ) ) + resp.headers["ETag"] = etag + return resp def _make_not_available_response( msg, msg2 ): """Generate a "not available" response.""" @@ -595,7 +612,7 @@ def test_asa_upload( scenario_id ): fp.write( data ) logger.info( " - Saved to: %s", fname ) - def make_response( fname ): + def make_resp( fname ): """Generate a response.""" dname = os.path.join( os.path.dirname(__file__), "tests/fixtures/asa-responses/" ) fname = os.path.join( dname, "{}.json".format( fname ) ) @@ -610,12 +627,12 @@ def test_asa_upload( scenario_id ): # parse the request user_name = request.args.get( "user" ) if not user_name: - return make_response( "missing-user-name" ) + return make_resp( "missing-user-name" ) api_token = request.args.get( "token" ) if not api_token: - return make_response( "missing-token" ) + return make_resp( "missing-token" ) if api_token == "incorrect-token": - return make_response( "incorrect-token" ) + return make_resp( "incorrect-token" ) # process the request logger.info( "ASA upload: id=%s ; user=\"%s\" ; token=\"%s\"", scenario_id, user_name,api_token ) @@ -628,7 +645,7 @@ def test_asa_upload( scenario_id ): global _last_asa_upload _last_asa_upload = asa_upload - return make_response( "ok" ) + return make_resp( "ok" ) # --------------------------------------------------------------------- diff --git a/vasl_templates/webapp/static/roar.js b/vasl_templates/webapp/static/roar.js index 9086e9f..b4ab319 100644 --- a/vasl_templates/webapp/static/roar.js +++ b/vasl_templates/webapp/static/roar.js @@ -158,19 +158,26 @@ function loadScenarios( $sel, scenarios ) // -------------------------------------------------------------------- var _roarScenarioIndex = null ; // nb: don't access this directly, use getRoarScenarioIndex() +var _roarScenarioIndexETag ; function getRoarScenarioIndex( onReady ) { - // check if we already have the ROAR scenario index - if ( _roarScenarioIndex ) { - - // yup - just do it - onReady( _roarScenarioIndex ) ; - - } else { - - // nope - download it - $.getJSON( gGetRoarScenarioIndexUrl, function( resp ) { + // nope - download it + $.ajax( { + url: gGetRoarScenarioIndexUrl, + type: "GET", + datatype: "json", + beforeSend: function( xhr ) { + if ( _roarScenarioIndexETag ) + xhr.setRequestHeader( "If-None-Match", _roarScenarioIndexETag ) ; + }, + success: function( resp, status, xhr ) { + if ( xhr.status == 304 ) { + // our cached copy is still valid + onReady( _roarScenarioIndex ) ; + return ; + } + // check if a warning was issued if ( resp.warning ) { var msg = resp.warning ; if ( resp.message ) @@ -178,13 +185,16 @@ function getRoarScenarioIndex( onReady ) showWarningMsg( msg ) ; return ; } + // save a copy of the data, then notify the caller _roarScenarioIndex = resp ; + _roarScenarioIndexETag = xhr.getResponseHeader( "ETag" ) ; onReady( resp ) ; - } ).fail( function( xhr, status, errorMsg ) { + }, + error: function( xhr, status, errorMsg ) { showErrorMsg( "Can't get the ROAR scenario index:
" + escapeHTML(errorMsg) + "
" ) ; - } ) ; - - } + return ; + }, + } ) ; } // -------------------------------------------------------------------- diff --git a/vasl_templates/webapp/static/scenarios.js b/vasl_templates/webapp/static/scenarios.js index 28b4a19..3c9bb61 100644 --- a/vasl_templates/webapp/static/scenarios.js +++ b/vasl_templates/webapp/static/scenarios.js @@ -28,7 +28,7 @@ window.searchForScenario = function() var eventHandlers = new jQueryHandlers() ; // NOTE: We have to get the scenario index before we can do anything. - getScenarioIndex( function( scenarios ) { + getScenarioIndex( function( scenarios, isNewScenarios ) { // show the dialog $( "#scenario-search" ).dialog( { @@ -42,7 +42,6 @@ window.searchForScenario = function() minHeight: 400, position: { my: "center center", at: "center center", of: window }, create: function() { - initPrefixIndex( scenarios ) ; initDialog( $(this), scenarios ) ; // FUDGE! This works around a weird layout problem. The very first time the dialog opens, // the search input box (the whole .select2-dropdown, actually) is too far left. The layout @@ -52,6 +51,8 @@ window.searchForScenario = function() open: function() { // initialize $dlg = $(this) ; + if ( isNewScenarios ) + initSearchResults( $dlg, scenarios ) ; // reset everything $gSearchQueryInputBox.val( "" ) ; $gDialog.find( ".select2-results__option" ).remove() ; @@ -116,8 +117,14 @@ function initDialog( $dlg, scenarios ) } ) ; var $gripper = $( "" ) ; $dlg.find( ".gutter.gutter-horizontal" ).append( $gripper ) ; +} + +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // initialize the select2 +function initSearchResults( $dlg, scenarios ) +{ + // initialize the search results + initPrefixIndex( scenarios ) ; var options = [] ; scenarios.forEach( function( scenario ) { options.push( { @@ -127,6 +134,12 @@ function initDialog( $dlg, scenarios ) } ) ; } ) ; sortScenarios( options ) ; + + // load the search results + if ( $gScenariosSelect ) { + // clean up the previous select2 + $gScenariosSelect.empty().select2( "destroy" ) ; + } $gScenariosSelect = $dlg.find( ".scenarios select" ) ; $gScenariosSelect.select2( { data: options, @@ -146,6 +159,8 @@ function initDialog( $dlg, scenarios ) $gScenariosSelect.on( "select2:select", function( evt ) { onItemSelected( evt.params.data.id ) ; } ) ; + + // keep the UI up-to-date as items are selected $gSearchQueryInputBox = $dlg.find( ".select2-search__field" ) ; $gSearchQueryInputBox.on( "input", function() { // FUDGE! select2 rebuilds the list of matching items, and selects the first one, @@ -1333,20 +1348,25 @@ window.updateForConnectedScenario = function( scenarioId, roarId ) // -------------------------------------------------------------------- var _scenarioIndex ; // nb: don't access this directly, use getScenarioIndex() +var _scenarioIndexETag ; function getScenarioIndex( onReady ) { - // check if we already have the scenario index - if ( _scenarioIndex ) { - - // yup - just do it - onReady( _scenarioIndex ) ; - - } else { - - // nope - download it (nb: we do this on-demand, instead of during startup, - // to give the backend time if it wants to download a fresh copy). - $.getJSON( gGetScenarioIndexUrl, function( resp ) { + $.ajax( { + url: gGetScenarioIndexUrl, + type: "GET", + datatype: "json", + beforeSend: function( xhr ) { + if ( _scenarioIndexETag ) + xhr.setRequestHeader( "If-None-Match", _scenarioIndexETag ) ; + }, + success: function( resp, status, xhr ) { + if ( xhr.status == 304 ) { + // our cached copy is still valid + onReady( _scenarioIndex, false ) ; + return ; + } + // check if a warning was issued if ( resp.warning ) { var msg = resp.warning ; if ( resp.message ) @@ -1354,14 +1374,16 @@ function getScenarioIndex( onReady ) showWarningMsg( msg ) ; return ; } + // save a copy of the data, then notify the caller _scenarioIndex = resp ; - onReady( resp ) ; - } ).fail( function( xhr, status, errorMsg ) { + _scenarioIndexETag = xhr.getResponseHeader( "ETag" ) ; + onReady( resp, true ) ; + }, + error: function( xhr, status, errorMsg ) { showErrorMsg( "Can't get the scenario index:
" + escapeHTML(errorMsg) + "
" ) ; return ; - } ) ; - - } + }, + } ) ; } // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -