Download the scenario index files continuously.

3 years ago · 1963558ba9
parent 6fe7a693c3
commit 1963558ba9
4 changed files with 170 additions and 111 deletions
--- a/vasl_templates/webapp/downloads.py
+++ b/vasl_templates/webapp/downloads.py
@ -7,6 +7,7 @@ import os
 import threading
 import json
 import urllib.request
+import urllib.error
 import time
 import datetime
 import tempfile
@ -18,6 +19,8 @@ from vasl_templates.webapp.utils import parse_int
 _registry = set()
 _logger = logging.getLogger( "downloads" )

+_etags = {}
+
 # ---------------------------------------------------------------------

 class DownloadedFile:
@ -92,68 +95,75 @@ class DownloadedFile:
        """Download fresh copies of each file."""
        #pylint: disable=protected-access

-        # process each DownloadedFile
-        for df in _registry:
-
-            # check if we should simulate slow downloads
-            delay = parse_int( app.config.get( "DOWNLOADED_FILES_DELAY" ) )
-            if delay:
-                _logger.debug( "Simulating a slow download for the %s file: delay=%s", df.key, delay )
-                time.sleep( delay )
-
-            # get the download URL
-            url = app.config.get( "{}_DOWNLOAD_URL".format( df.key.upper() ), df.url )
-            if os.path.isfile( url ):
-                # read the data directly from a file (for debugging porpoises)
-                _logger.info( "Loading the %s data directly from a file: %s", df.key, url )
-                df._set_data( url )
-                continue
-
-            # check if we have a cached copy of the file
-            ttl = parse_int( app.config.get( "{}_DOWNLOAD_CACHE_TTL".format( df.key ), df.ttl ), 24 )
-            if ttl <= 0:
-                _logger.info( "Download of the %s file has been disabled.", df.key )
-                continue
-            ttl *= 60*60
-            if os.path.isfile( df.cache_fname ):
-                # yup - check how long ago it was downloaded
-                mtime = os.path.getmtime( df.cache_fname )
-                age = int( time.time() - mtime )
-                _logger.debug( "Checking the cached %s file: age=%s, ttl=%s (mtime=%s)",
-                    df.key,
-                    datetime.timedelta( seconds=age ),
-                    datetime.timedelta( seconds=ttl ),
-                    time.strftime( "%Y-%m-%d %H:%M:%S", time.localtime(mtime) )
-                )
-                if age < ttl:
+        # loop forever (until the program exits)
+        while True:
+
+            # process each DownloadedFile
+            # NOTE: The DownloadedFile registry is built once at startup, so we don't need to lock it.
+            for df in _registry:
+
+                # get the download URL
+                url = app.config.get( "{}_DOWNLOAD_URL".format( df.key.upper() ), df.url )
+                if os.path.isfile( url ):
+                    # read the data directly from a file (for debugging porpoises)
+                    _logger.info( "Loading the %s data directly from a file: %s", df.key, url )
+                    df._set_data( url )
                    continue

-            # download the file
-            _logger.info( "Downloading the %s file: %s", df.key, url )
-            try:
-                req = urllib.request.Request( url,
+                # check if we have a cached copy of the file
+                ttl = parse_int( app.config.get( "{}_DOWNLOAD_CACHE_TTL".format( df.key ), df.ttl ), 24 )
+                if ttl <= 0:
+                    _logger.info( "Download of the %s file has been disabled.", df.key )
+                    continue
+                ttl *= 60*60
+                if os.path.isfile( df.cache_fname ):
+                    # yup - check how long ago it was downloaded
+                    mtime = os.path.getmtime( df.cache_fname )
+                    age = int( time.time() - mtime )
+                    _logger.debug( "Checking the cached %s file: age=%s, ttl=%s (mtime=%s)",
+                        df.key,
+                        datetime.timedelta( seconds=age ),
+                        datetime.timedelta( seconds=ttl ),
+                        time.strftime( "%Y-%m-%d %H:%M:%S", time.localtime(mtime) )
+                    )
+                    if age < ttl:
+                        continue
+
+                # download the file
+                _logger.info( "Downloading the %s file: %s", df.key, url )
+                try:
                    headers = { "Accept-Encoding": "gzip, deflate" }
-                )
-                fp = urllib.request.urlopen( req )
-                data = fp.read().decode( "utf-8" )
-            except Exception as ex: #pylint: disable=broad-except
-                msg = str( getattr(ex,"reason",None) or ex )
-                _logger.error( "Can't download the %s file: %s", df.key, msg )
-                df.error_msg = msg
-                continue
-            _logger.info( "Downloaded the %s file OK: %d bytes", df.key, len(data) )
-
-            # install the new data
-            df._set_data( data )
-            # NOTE: We only need to worry about thread-safety because a fresh copy of the file is downloaded
-            # while the old one is in use, but because downloads are only done once at startup, once we get here,
-            # we could delete the lock and allow unfettered access to the underlying data (since it's all
-            # going to be read-only).
-            # For simplicty, we leave the lock in place. It will slow things down a bit, since we will be
-            # serializing access to the data (unnecessarily, because it's all read-only) but none of the code
-            # is performance-critical and we can probably live it.
-
-            # save a cached copy of the data
-            _logger.debug( "Saving a cached copy of the %s file: %s", df.key, df.cache_fname )
-            with open( df.cache_fname, "w", encoding="utf-8" ) as fp:
-                fp.write( data )
+                    if url in _etags:
+                        _logger.debug( "- If-None-Match = %s", _etags[url] )
+                        headers[ "If-None-Match" ] = _etags[ url ]
+                    req = urllib.request.Request( url, headers=headers )
+                    resp = urllib.request.urlopen( req )
+                    data = resp.read().decode( "utf-8" )
+                    etag = resp.headers.get( "ETag" )
+                    _logger.info( "Downloaded the %s file OK: %d bytes", df.key, len(data) )
+                    if etag:
+                        _logger.debug( "- Got etag: %s", etag )
+                        _etags[ url ] = etag
+                except Exception as ex: #pylint: disable=broad-except
+                    if isinstance( ex, urllib.error.HTTPError ) and ex.code == 304: #pylint: disable=no-member
+                        _logger.info( "Download %s file: 304 Not Modified", df.key )
+                        if os.path.isfile( df.cache_fname ):
+                            # NOTE: We touch the file so that the TTL check will work the next time around.
+                            os.utime( df.cache_fname )
+                        continue
+                    msg = str( getattr(ex,"reason",None) or ex )
+                    _logger.error( "Can't download the %s file: %s", df.key, msg )
+                    df.error_msg = msg
+                    continue
+
+                # install the new data
+                df._set_data( data )
+
+                # save a cached copy of the data
+                _logger.debug( "Saving a cached copy of the %s file: %s", df.key, df.cache_fname )
+                with open( df.cache_fname, "w", encoding="utf-8" ) as fp:
+                    fp.write( data )
+
+            # sleep before looping back and doing it all again
+            delay = parse_int( app.config.get( "DOWNLOAD_CHECK_INTERVAL" ), 2 )
+            time.sleep( delay * 60*60 )
--- a/vasl_templates/webapp/scenarios.py
+++ b/vasl_templates/webapp/scenarios.py
@ -10,9 +10,10 @@ import base64
 import re
 import time
 import math
+import hashlib
 import logging

-from flask import request, render_template, jsonify, abort
+from flask import request, render_template, make_response, jsonify, abort
 from PIL import Image, ImageOps

 from vasl_templates.webapp import app
@ -26,15 +27,18 @@ from vasl_templates.webapp.utils import TempFile, \

 def _build_asa_scenario_index( df, new_data, logger ):
    """Build the ASL Scenario Archive index."""
-    df.index = {
+    # parse the scenario index
+    index = {
        scenario["scenario_id"]: scenario
        for scenario in new_data["scenarios"]
    }
+    # install the results
+    df.index = index
    if logger:
        logger.debug( "Loaded the ASL Secenario Archive index: #scenarios=%d", len(df.index) )
        logger.debug( "- Generated at: %s", new_data.get( "_generatedAt_", "n/a" ) )

-_asa_scenarios = DownloadedFile( "ASA", 1*24,
+_asa_scenarios = DownloadedFile( "ASA", 6, # nb: TTL = #hours
    "asl-scenario-archive.json",
    "https://vasl-templates.org/services/asl-scenario-archive/scenario-index.json",
    _build_asa_scenario_index,
@ -45,14 +49,17 @@ _asa_scenarios = DownloadedFile( "ASA", 1*24,

 def _build_roar_scenario_index( df, new_data, logger ):
    """Build the ROAR scenario index."""
-    df.index, df.title_matching, df.id_matching = {}, {}, {}
+    # parse the scenario index
+    index, title_matching, id_matching = {}, {}, {}
    for roar_id,scenario in new_data.items():
        if roar_id.startswith( "_" ):
            continue
        scenario[ "roar_id" ] = roar_id
-        df.index[ roar_id ] = scenario
-        _update_roar_matching_index( df.title_matching, scenario.get("name"), roar_id )
-        _update_roar_matching_index( df.id_matching, scenario.get("scenario_id"), roar_id )
+        index[ roar_id ] = scenario
+        _update_roar_matching_index( title_matching, scenario.get("name"), roar_id )
+        _update_roar_matching_index( id_matching, scenario.get("scenario_id"), roar_id )
+    # install the results
+    df.index, df.title_matching, df.id_matching = index, title_matching, id_matching
    if logger:
        logger.debug( "Loaded the ROAR scenario index: #scenarios=%d", len(df.index) )
        logger.debug( "- Generated at: %s", new_data.get( "_generatedAt_", "n/a" ) )
@ -74,7 +81,7 @@ def _make_roar_matching_key( val ):
        return val
    return re.sub( "[^a-z0-9]", "", val.lower() )

-_roar_scenarios = DownloadedFile( "ROAR", 1*24,
+_roar_scenarios = DownloadedFile( "ROAR", 6, # nb: TTL = #hours
    "roar-scenario-index.json",
    "https://vasl-templates.org/services/roar/scenario-index.json",
    _build_roar_scenario_index,
@ -115,10 +122,15 @@ def get_scenario_index():
                return _make_not_available_response(
                    "Please wait, the scenario index is still downloading.", None
                )
-        return jsonify( [
+        etag = hashlib.md5( json.dumps( _asa_scenarios.index ).encode( "utf-8" ) ).hexdigest()
+        if request.headers.get( "If-None-Match" ) == etag:
+            return "Not Modified", 304
+        resp = make_response( jsonify( [
            make_entry( scenario )
            for scenario in _asa_scenarios.index.values()
-        ] )
+        ] ) )
+        resp.headers["ETag"] = etag
+        return resp

@app.route( "/roar/scenario-index" )
 def get_roar_scenario_index():
@ -133,7 +145,12 @@ def get_roar_scenario_index():
                return _make_not_available_response(
                    "Please wait, the ROAR scenarios are still downloading.", None
                )
-        return jsonify( _roar_scenarios.index )
+        etag = hashlib.md5( json.dumps( _roar_scenarios.index ).encode( "utf-8" ) ).hexdigest()
+        if request.headers.get( "If-None-Match" ) == etag:
+            return "Not Modified", 304
+        resp = make_response( jsonify( _roar_scenarios.index ) )
+        resp.headers["ETag"] = etag
+        return resp

 def _make_not_available_response( msg, msg2 ):
    """Generate a "not available" response."""
@ -595,7 +612,7 @@ def test_asa_upload( scenario_id ):
                fp.write( data )
            logger.info( "  - Saved to: %s", fname )

-    def make_response( fname ):
+    def make_resp( fname ):
        """Generate a response."""
        dname = os.path.join( os.path.dirname(__file__), "tests/fixtures/asa-responses/" )
        fname = os.path.join( dname, "{}.json".format( fname ) )
@ -610,12 +627,12 @@ def test_asa_upload( scenario_id ):
    # parse the request
    user_name = request.args.get( "user" )
    if not user_name:
-        return make_response( "missing-user-name" )
+        return make_resp( "missing-user-name" )
    api_token = request.args.get( "token" )
    if not api_token:
-        return make_response( "missing-token" )
+        return make_resp( "missing-token" )
    if api_token == "incorrect-token":
-        return make_response( "incorrect-token" )
+        return make_resp( "incorrect-token" )

    # process the request
    logger.info( "ASA upload: id=%s ; user=\"%s\" ; token=\"%s\"", scenario_id, user_name,api_token )
@ -628,7 +645,7 @@ def test_asa_upload( scenario_id ):
        global _last_asa_upload
        _last_asa_upload = asa_upload

-    return make_response( "ok" )
+    return make_resp( "ok" )

 # ---------------------------------------------------------------------

--- a/vasl_templates/webapp/static/roar.js
+++ b/vasl_templates/webapp/static/roar.js
@ -158,19 +158,26 @@ function loadScenarios( $sel, scenarios )
 // --------------------------------------------------------------------

 var _roarScenarioIndex = null ; // nb: don't access this directly, use getRoarScenarioIndex()
+var _roarScenarioIndexETag ;

 function getRoarScenarioIndex( onReady )
 {
-    // check if we already have the ROAR scenario index
-    if ( _roarScenarioIndex  ) {
-
-        // yup - just do it
-        onReady( _roarScenarioIndex ) ;
-
-    } else {
-
-        // nope - download it
-        $.getJSON( gGetRoarScenarioIndexUrl, function( resp ) {
+    // nope - download it
+    $.ajax( {
+        url: gGetRoarScenarioIndexUrl,
+        type: "GET",
+        datatype: "json",
+        beforeSend: function( xhr ) {
+            if ( _roarScenarioIndexETag )
+                xhr.setRequestHeader( "If-None-Match", _roarScenarioIndexETag ) ;
+        },
+        success: function( resp, status, xhr ) {
+            if ( xhr.status == 304 ) {
+                // our cached copy is still valid
+                onReady( _roarScenarioIndex ) ;
+                return ;
+            }
+            // check if a warning was issued
            if ( resp.warning ) {
                var msg = resp.warning ;
                if ( resp.message )
@ -178,13 +185,16 @@ function getRoarScenarioIndex( onReady )
                showWarningMsg( msg ) ;
                return ;
            }
+            // save a copy of the data, then notify the caller
            _roarScenarioIndex = resp ;
+            _roarScenarioIndexETag = xhr.getResponseHeader( "ETag" ) ;
            onReady( resp ) ;
-        } ).fail( function( xhr, status, errorMsg ) {
+        },
+        error: function( xhr, status, errorMsg ) {
            showErrorMsg( "Can't get the ROAR scenario index:<div class='pre'>" + escapeHTML(errorMsg) + "</div>" ) ;
-        } ) ;
-
-    }
+            return ;
+        },
+    } ) ;
 }

 // --------------------------------------------------------------------
--- a/vasl_templates/webapp/static/scenarios.js
+++ b/vasl_templates/webapp/static/scenarios.js
@ -28,7 +28,7 @@ window.searchForScenario = function()
    var eventHandlers = new jQueryHandlers() ;

    // NOTE: We have to get the scenario index before we can do anything.
-    getScenarioIndex( function( scenarios ) {
+    getScenarioIndex( function( scenarios, isNewScenarios ) {

        // show the dialog
        $( "#scenario-search" ).dialog( {
@ -42,7 +42,6 @@ window.searchForScenario = function()
            minHeight: 400,
            position: { my: "center center", at: "center center", of: window },
            create: function() {
-                initPrefixIndex( scenarios ) ;
                initDialog( $(this), scenarios ) ;
                // FUDGE! This works around a weird layout problem. The very first time the dialog opens,
                // the search input box (the whole .select2-dropdown, actually) is too far left. The layout
@ -52,6 +51,8 @@ window.searchForScenario = function()
            open: function() {
                // initialize
                $dlg = $(this) ;
+                if ( isNewScenarios )
+                    initSearchResults( $dlg, scenarios ) ;
                // reset everything
                $gSearchQueryInputBox.val( "" ) ;
                $gDialog.find( ".select2-results__option" ).remove() ;
@ -116,8 +117,14 @@ function initDialog( $dlg, scenarios )
    } ) ;
    var $gripper = $( "<img src='" + gImagesBaseUrl + "/gripper-vert.png'>" ) ;
    $dlg.find( ".gutter.gutter-horizontal" ).append( $gripper ) ;
+}
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

-    // initialize the select2
+function initSearchResults( $dlg, scenarios )
+{
+    // initialize the search results
+    initPrefixIndex( scenarios ) ;
    var options = [] ;
    scenarios.forEach( function( scenario ) {
        options.push( {
@ -127,6 +134,12 @@ function initDialog( $dlg, scenarios )
        } ) ;
    } ) ;
    sortScenarios( options ) ;
+
+    // load the search results
+    if ( $gScenariosSelect ) {
+        // clean up the previous select2
+        $gScenariosSelect.empty().select2( "destroy" ) ;
+    }
    $gScenariosSelect = $dlg.find( ".scenarios select" ) ;
    $gScenariosSelect.select2( {
        data: options,
@ -146,6 +159,8 @@ function initDialog( $dlg, scenarios )
    $gScenariosSelect.on( "select2:select", function( evt ) {
        onItemSelected( evt.params.data.id ) ;
    } ) ;
+
+    // keep the UI up-to-date as items are selected
    $gSearchQueryInputBox = $dlg.find( ".select2-search__field" ) ;
    $gSearchQueryInputBox.on( "input", function() {
        // FUDGE! select2 rebuilds the list of matching items, and selects the first one,
@ -1333,20 +1348,25 @@ window.updateForConnectedScenario = function( scenarioId, roarId )
 // --------------------------------------------------------------------

 var _scenarioIndex ; // nb: don't access this directly, use getScenarioIndex()
+var _scenarioIndexETag ;

 function getScenarioIndex( onReady )
 {
-    // check if we already have the scenario index
-    if ( _scenarioIndex ) {
-
-        // yup - just do it
-        onReady( _scenarioIndex ) ;
-
-    } else {
-
-        // nope - download it (nb: we do this on-demand, instead of during startup,
-        // to give the backend time if it wants to download a fresh copy).
-        $.getJSON( gGetScenarioIndexUrl, function( resp ) {
+    $.ajax( {
+        url: gGetScenarioIndexUrl,
+        type: "GET",
+        datatype: "json",
+        beforeSend: function( xhr ) {
+            if ( _scenarioIndexETag )
+                xhr.setRequestHeader( "If-None-Match", _scenarioIndexETag ) ;
+        },
+        success: function( resp, status, xhr ) {
+            if ( xhr.status == 304 ) {
+                // our cached copy is still valid
+                onReady( _scenarioIndex, false ) ;
+                return ;
+            }
+            // check if a warning was issued
            if ( resp.warning ) {
                var msg = resp.warning ;
                if ( resp.message )
@ -1354,14 +1374,16 @@ function getScenarioIndex( onReady )
                showWarningMsg( msg ) ;
                return ;
            }
+            // save a copy of the data, then notify the caller
            _scenarioIndex = resp ;
-            onReady( resp ) ;
-        } ).fail( function( xhr, status, errorMsg ) {
+            _scenarioIndexETag = xhr.getResponseHeader( "ETag" ) ;
+            onReady( resp, true ) ;
+        },
+        error: function( xhr, status, errorMsg ) {
            showErrorMsg( "Can't get the scenario index:<div class='pre'>" + escapeHTML(errorMsg) + "</div>" ) ;
            return ;
-        } ) ;
-
-    }
+        },
+    } ) ;
 }

 // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -