From f3e487d03d33ef56dfb223d4f64ba880489c3af5 Mon Sep 17 00:00:00 2001 From: Taka Date: Sat, 12 Dec 2020 22:18:01 +1100 Subject: [PATCH] Handle encoding when reading text files. --- vasl_templates/webapp/__init__.py | 2 +- vasl_templates/webapp/main.py | 4 ++-- vasl_templates/webapp/scenarios.py | 6 +++--- vasl_templates/webapp/snippets.py | 33 +++++++++++++++--------------- vasl_templates/webapp/utils.py | 26 +++++++++++++++++++++++ vasl_templates/webapp/vasl_mod.py | 6 +++--- vasl_templates/webapp/vassal.py | 2 +- vasl_templates/webapp/vo.py | 4 ++-- vasl_templates/webapp/vo_notes.py | 5 ++--- vasl_templates/webapp/vo_utils.py | 2 +- 10 files changed, 58 insertions(+), 32 deletions(-) diff --git a/vasl_templates/webapp/__init__.py b/vasl_templates/webapp/__init__.py index f4ede85..048347e 100644 --- a/vasl_templates/webapp/__init__.py +++ b/vasl_templates/webapp/__init__.py @@ -216,7 +216,7 @@ _set_config_from_env( "USER_FILES_DIR" ) # initialize logging _fname = os.path.join( config_dir, "logging.yaml" ) if os.path.isfile( _fname ): - with open( _fname, "r" ) as fp: + with open( _fname, "r", encoding="utf-8" ) as fp: try: logging.config.dictConfig( yaml.safe_load( fp ) ) except Exception as ex: #pylint: disable=broad-except diff --git a/vasl_templates/webapp/main.py b/vasl_templates/webapp/main.py index eff370a..48ae90e 100644 --- a/vasl_templates/webapp/main.py +++ b/vasl_templates/webapp/main.py @@ -147,7 +147,7 @@ def get_app_config(): vals[ key ] = app.config.get( key ) fname = os.path.join( DATA_DIR, "asl-scenario-archive.json" ) if os.path.isfile( fname ): - with open( fname, "r" ) as fp: + with open( fname, "r", encoding="utf-8" ) as fp: try: vals[ "SCENARIOS_CONFIG" ] = json.load( fp ) except json.decoder.JSONDecodeError as ex: @@ -299,7 +299,7 @@ def get_default_scenario(): fname = os.path.join( app.config.get("DATA_DIR",DATA_DIR), "default-scenario.json" ) # return the default scenario - with open(fname,"r") as fp: + with open( fname, "r", encoding="utf-8" ) as fp: return jsonify( json.load( fp ) ) # --------------------------------------------------------------------- diff --git a/vasl_templates/webapp/scenarios.py b/vasl_templates/webapp/scenarios.py index 233f4ef..8373d6e 100644 --- a/vasl_templates/webapp/scenarios.py +++ b/vasl_templates/webapp/scenarios.py @@ -576,10 +576,10 @@ def on_successful_asa_upload( scenario_id ): # the most-recent version is the one that will ultimately be used. This lets us identify these temporary changes # that have been made to the cached index file (which will be overwritten when we download a fresh copy). # This is not particularly efficient, but it won't happen too often. - with open( _asa_scenarios.cache_fname, "r" ) as fp: + with open( _asa_scenarios.cache_fname, "r", encoding="utf-8" ) as fp: data = json.load( fp ) data["scenarios"].append( new_scenario ) - with open( _asa_scenarios.cache_fname, "w" ) as fp: + with open( _asa_scenarios.cache_fname, "w", encoding="utf-8" ) as fp: json.dump( data, fp ) # update the in-memory scenario index @@ -618,7 +618,7 @@ def test_asa_upload( scenario_id ): """Generate a response.""" dname = os.path.join( os.path.dirname(__file__), "tests/fixtures/asa-responses/" ) fname = os.path.join( dname, "{}.json".format( fname ) ) - resp = json.load( open( fname, "r" ) ) + resp = json.load( open( fname, "r", encoding="utf-8" ) ) return jsonify( resp ) # simulate a slow response diff --git a/vasl_templates/webapp/snippets.py b/vasl_templates/webapp/snippets.py index cf51bfe..cdef7ff 100644 --- a/vasl_templates/webapp/snippets.py +++ b/vasl_templates/webapp/snippets.py @@ -15,6 +15,7 @@ from PIL import Image from vasl_templates.webapp import app, globvars from vasl_templates.webapp.config.constants import DATA_DIR from vasl_templates.webapp.webdriver import WebDriver +from vasl_templates.webapp.utils import read_text_file default_template_pack = None @@ -44,9 +45,9 @@ def load_default_template_pack(): #pylint: disable=too-many-locals "default-template-pack/" ) data = { "templates": {} } - with open( os.path.join( base_dir, "nationalities.json" ), "r") as fp: + with open( os.path.join( base_dir, "nationalities.json" ), "r", encoding="utf-8" ) as fp: data["nationalities"] = json.load( fp ) - with open( os.path.join( base_dir, "national-capabilities.json" ), "r" ) as fp: + with open( os.path.join( base_dir, "national-capabilities.json" ), "r", encoding="utf-8" ) as fp: data["national-capabilities"] = json.load( fp ) # NOTE: Similarly, we always load the default extras templates, and user-defined template packs @@ -111,20 +112,20 @@ def _do_get_template_pack( dname ): for fname in fnames: # add the next file to the results fname_stem, extn = os.path.splitext( fname ) - fname = os.path.join( root, fname ) - with open( fname, "r" ) as fp: - if (fname_stem, extn) == ("nationalities", ".json"): - nationalities = json.load( fp ) - continue - if extn == ".j2": - relpath = os.path.relpath( os.path.abspath(fname), dname ) - if relpath.startswith( "extras" + os.sep ): - fname_stem = "extras/" + fname_stem - templates[ fname_stem ] = fp.read() - elif extn == ".css": - css[ fname_stem ] = fp.read() - elif extn == ".include": - includes[ fname_stem ] = fp.read() + fname = os.path.join( root, fname ) + buf = read_text_file( fname ) + if (fname_stem, extn) == ("nationalities", ".json"): + nationalities = json.loads( buf ) + continue + if extn == ".j2": + relpath = os.path.relpath( os.path.abspath(fname), dname ) + if relpath.startswith( "extras" + os.sep ): + fname_stem = "extras/" + fname_stem + templates[ fname_stem ] = buf + elif extn == ".css": + css[ fname_stem ] = buf + elif extn == ".include": + includes[ fname_stem ] = buf return nationalities, templates, css, includes # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/vasl_templates/webapp/utils.py b/vasl_templates/webapp/utils.py index 1781999..b22f7d6 100644 --- a/vasl_templates/webapp/utils.py +++ b/vasl_templates/webapp/utils.py @@ -7,11 +7,14 @@ import tempfile import pathlib import math import re +import logging from collections import defaultdict from flask import request, Response, send_file from PIL import Image, ImageChops +from vasl_templates.webapp import app + # --------------------------------------------------------------------- class MsgStore: @@ -108,6 +111,29 @@ class TempFile: # --------------------------------------------------------------------- +def read_text_file( fname ): + """Read a text file.""" + # NOTE: There are several places where we read user-generated files (e.g. template packs, Chapter H notes), + # which contain HTML content, so the ideal case is that they be plain ASCII, with special characters specified + # as HTML entities. However, people are copy-and-pasting Chapter H content from their eASLRB's, which means + # we need to handle encoding. chardet is overkill for what we need, and we simply try the most common cases. + encodings = app.config.get( "TEXT_FILE_ENCODINGS", "ascii,utf-8,windows-1252,iso-8859-1" ) + with open( fname, "rb" ) as fp: + buf = fp.read() + if buf[0:3] == b"\xEF\xBB\xBF": + buf = buf[3:] + encodings = "utf-8" + for enc in encodings.split( "," ): + try: + return buf.decode( enc.strip() ) + except UnicodeDecodeError: + pass + msg = "Can't decode text file: {}".format( fname ) + logging.warning( msg ) + return msg + +# --------------------------------------------------------------------- + def resize_image_response( resp, default_width=None, default_height=None, default_scaling=None ): """Resize an image that will be returned as a Flask response.""" diff --git a/vasl_templates/webapp/vasl_mod.py b/vasl_templates/webapp/vasl_mod.py index a9808c8..b5e62bf 100644 --- a/vasl_templates/webapp/vasl_mod.py +++ b/vasl_templates/webapp/vasl_mod.py @@ -78,7 +78,7 @@ def _load_vasl_extns( extn_dir, msg_store ): #pylint: disable=too-many-locals,to # NOTE: We sort the filenames so that the test results are stable. for fname in sorted( glob.glob( os.path.join(dname,"*.json") ) ): _logger.debug( "Loading VASL extension info: %s", fname ) - with open( fname, "r" ) as fp: + with open( fname, "r", encoding="utf-8" ) as fp: extn_info = json.load( fp ) all_extn_info[ ( extn_info["extensionId"], extn_info["version"] ) ] = extn_info _logger.debug( "- id=%s ; version=%s", extn_info["extensionId"], extn_info["version"] ) @@ -237,9 +237,9 @@ class VaslMod: # load our overrides fname = os.path.join( data_dir, "vasl-overrides.json" ) - vasl_overrides = json.load( open( fname, "r" ) ) + vasl_overrides = json.load( open( fname, "r", encoding="utf-8" ) ) fname = os.path.join( data_dir, "expected-multiple-images.json" ) - expected_multiple_images = json.load( open( fname, "r" ) ) + expected_multiple_images = json.load( open( fname, "r", encoding="utf-8" ) ) # get the VASL version build_info = self._files[0][0].read( "buildFile" ) diff --git a/vasl_templates/webapp/vassal.py b/vasl_templates/webapp/vassal.py index 0a6e25a..1a102db 100644 --- a/vasl_templates/webapp/vassal.py +++ b/vasl_templates/webapp/vassal.py @@ -336,7 +336,7 @@ class VassalShim: with TempFile() as temp_file: temp_file.close( delete=False ) VassalShim()._run_vassal_shim( "version", temp_file.name ) #pylint: disable=protected-access - with open( temp_file.name, "r" ) as fp: + with open( temp_file.name, "r", encoding="utf-8" ) as fp: return fp.read() def dump_scenario( self, fname ): diff --git a/vasl_templates/webapp/vo.py b/vasl_templates/webapp/vo.py index 91654ce..c8acc6e 100644 --- a/vasl_templates/webapp/vo.py +++ b/vasl_templates/webapp/vo.py @@ -83,11 +83,11 @@ def _do_load_vo_listings( vasl_mod, vo_type, merge_common, real_data_dir, msg_st nat = minor_type + "-common" else: minor_nats[minor_type].add( nat ) - with open( os.path.join(root,fname), "r" ) as fp: + with open( os.path.join(root,fname), "r", encoding="utf-8" ) as fp: ( _kfw_listings[vo_type] if is_kfw else listings )[ nat ] = json.load( fp ) fname2 = os.path.join( root, "{}.lend-lease.json".format( fname_stem ) ) if os.path.isfile( fname2 ): - with open( fname2, "r" ) as fp: + with open( fname2, "r", encoding="utf-8" ) as fp: listings[nat].extend( json.load( fp ) ) # apply any changes for VASL extensions diff --git a/vasl_templates/webapp/vo_notes.py b/vasl_templates/webapp/vo_notes.py index c591b64..fd23a96 100644 --- a/vasl_templates/webapp/vo_notes.py +++ b/vasl_templates/webapp/vo_notes.py @@ -13,7 +13,7 @@ from flask import request, render_template, jsonify, send_file, abort, Response, from vasl_templates.webapp import app, globvars from vasl_templates.webapp.files import FileServer from vasl_templates.webapp.webdriver import WebDriver -from vasl_templates.webapp.utils import resize_image_response, is_image_file, is_empty_file +from vasl_templates.webapp.utils import read_text_file, resize_image_response, is_image_file, is_empty_file # --------------------------------------------------------------------- @@ -138,8 +138,7 @@ def load_vo_notes( msg_store ): #pylint: disable=too-many-statements,too-many-lo # HTML file - read the content fname = os.path.join( root, fname ) - with open( fname, "r" ) as fp: - html_content = fp.read().strip() + html_content = read_text_file( fname ).strip() if "½" in html_content: # NOTE: VASSAL doesn't like this, use "frac12;" :-/ logging.warning( "Found ½ in HTML: %s", fname ) diff --git a/vasl_templates/webapp/vo_utils.py b/vasl_templates/webapp/vo_utils.py index aaf63fb..64ceaf9 100644 --- a/vasl_templates/webapp/vo_utils.py +++ b/vasl_templates/webapp/vo_utils.py @@ -206,7 +206,7 @@ def add_vo_comments( listings, vo_type, msg_store ): global _vo_comments if not _vo_comments: fname = os.path.join( app.config.get("DATA_DIR",DATA_DIR), "vo-comments.json" ) - _vo_comments = json.load( open( fname, "r" ) ) + _vo_comments = json.load( open( fname, "r", encoding="utf-8" ) ) # process each vehicle/ordnance for nat,vo_entries in listings.items():