From e1fe8df938fff65c932ee57f88650857a124aa73 Mon Sep 17 00:00:00 2001
From: Taka <taka@vm-linux-dev>
Date: Sat, 1 May 2021 06:03:20 +1000
Subject: [PATCH] Updated for v1.05 of the MMP eASLRB PDF.

---
 asl_rulebook2/bin/fixup_mmp_pdf.py            | 80 ++++++++++---------
 asl_rulebook2/extract/all.py                  |  2 +-
 asl_rulebook2/extract/content.py              |  9 +++
 asl_rulebook2/extract/data/index-fixups.json  | 27 ++-----
 asl_rulebook2/extract/data/target-fixups.json | 23 ++++++
 asl_rulebook2/tests/utils.py                  |  6 +-
 asl_rulebook2/webapp/prepare.py               |  2 +-
 asl_rulebook2/webapp/static/css/prepare.css   |  2 +-
 asl_rulebook2/webapp/static/prepare.js        | 15 ++--
 doc/prepare.md                                |  2 +-
 10 files changed, 98 insertions(+), 70 deletions(-)

diff --git a/asl_rulebook2/bin/fixup_mmp_pdf.py b/asl_rulebook2/bin/fixup_mmp_pdf.py
index 091178f..867574f 100755
--- a/asl_rulebook2/bin/fixup_mmp_pdf.py
+++ b/asl_rulebook2/bin/fixup_mmp_pdf.py
@@ -10,9 +10,14 @@ from asl_rulebook2.utils import log_msg_stderr
 
 # ---------------------------------------------------------------------
 
-def fixup_mmp_pdf( fname, output_fname, optimize_web, rotate, log=None ):
+def fixup_mmp_pdf( fname, output_fname, fix_zoom, optimize_web, rotate, log=None ):
     """Fixup the MMP eASLRB PDF."""
 
+    # NOTE: v1.03 had problems with links within the PDF being of type /Fit rather than /XYZ,
+    # which meant that the document viewer kept changing the zoom when you clicked on them :-/
+    # This seems to have been fixed in v1.05 (even in the non-"inherit zoom" version), but
+    # we leave the code in-place, just in case, accessible via a switch.
+
     def log_msg( msg_type, msg, *args, **kwargs ):
         if not log:
             return
@@ -35,29 +40,30 @@ def fixup_mmp_pdf( fname, output_fname, optimize_web, rotate, log=None ):
         log_msg( None, "" )
 
         # fixup bookmarks in the TOC
-        log_msg( "progress", "Fixing up the TOC..." )
-        def walk_toc( items, depth ):
-            for item_no,item in enumerate(items):
-                if item.destination[0].Type != "/Page" or item.destination[1] != "/Fit" \
-                   or item.page_location is not None or item.page_location_kwargs != {}:
-                    log_msg( "warning", "Unexpected TOC item: {}/{}".format( depth, item_no ) )
-                    continue
-                page = Page( item.destination[0] )
-                page_height = page.mediabox[3]
-                bullet = "#" if depth <= 1 else "-"
-                log_msg( "verbose", "  {}{} {} => p{}",
-                    depth*"  ", bullet, item.title, 1+page.index
-                )
-                walk_toc( item.children, depth+1 )
-                new_item = OutlineItem( item.title, page.index, "XYZ", top=page_height )
-                new_item.children = item.children
-                new_item.is_closed = True
-                items[ item_no ] = new_item
-        with pdf.open_outline() as outline:
-            walk_toc( outline.root, 0 )
-            # NOTE: The TOC will be updated when we exit the context manager, and can take some time.
-            log_msg( "progress", "Installing the new TOC..." )
-        log_msg( None, "" )
+        if fix_zoom:
+            log_msg( "progress", "Fixing up the TOC..." )
+            def walk_toc( items, depth ):
+                for item_no,item in enumerate(items):
+                    if item.destination[0].Type != "/Page" or item.destination[1] != "/Fit" \
+                       or item.page_location is not None or item.page_location_kwargs != {}:
+                        log_msg( "warning", "Unexpected TOC item: {}/{}".format( depth, item_no ) )
+                        continue
+                    page = Page( item.destination[0] )
+                    page_height = page.mediabox[3]
+                    bullet = "#" if depth <= 1 else "-"
+                    log_msg( "verbose", "  {}{} {} => p{}",
+                        depth*"  ", bullet, item.title, 1+page.index
+                    )
+                    walk_toc( item.children, depth+1 )
+                    new_item = OutlineItem( item.title, page.index, "XYZ", top=page_height )
+                    new_item.children = item.children
+                    new_item.is_closed = True
+                    items[ item_no ] = new_item
+            with pdf.open_outline() as outline:
+                walk_toc( outline.root, 0 )
+                # NOTE: The TOC will be updated when we exit the context manager, and can take some time.
+                log_msg( "progress", "Installing the new TOC..." )
+            log_msg( None, "" )
 
         # fixup up each page
         log_msg( "progress", "Fixing up the content..." )
@@ -70,16 +76,17 @@ def fixup_mmp_pdf( fname, output_fname, optimize_web, rotate, log=None ):
                     raw_page.Rotate = 270
                 else:
                     raw_page.Rotate = 0
-            page = Page( raw_page )
-            page_height = page.mediabox[3]
-            for annot in raw_page.get( "/Annots", [] ):
-                dest = annot.get( "/Dest" )
-                if dest:
-                    page_no = Page( dest[0] ).index
-                    log_msg( "verbose", "  - {} => p{}",
-                        repr(annot.Rect), 1+page_no
-                    )
-                    annot.Dest = make_page_destination( pdf, page_no, "XYZ", top=page_height )
+            if fix_zoom:
+                page = Page( raw_page )
+                page_height = page.mediabox[3]
+                for annot in raw_page.get( "/Annots", [] ):
+                    dest = annot.get( "/Dest" )
+                    if dest:
+                        page_no = Page( dest[0] ).index
+                        log_msg( "verbose", "  - {} => p{}",
+                            repr(annot.Rect), 1+page_no
+                        )
+                        annot.Dest = make_page_destination( pdf, page_no, "XYZ", top=page_height )
         log_msg( None, "" )
 
         # save the updated PDF
@@ -110,11 +117,12 @@ def fixup_mmp_pdf( fname, output_fname, optimize_web, rotate, log=None ):
 @click.command()
 @click.argument( "pdf_file", nargs=1, type=click.Path(exists=True,dir_okay=False) )
 @click.option( "--output","-o", required=True, type=click.Path(dir_okay=False), help="Where to save the fixed-up PDF." )
+@click.option( "--fix-zoom", is_flag=True, default=False, help="Fix zoom problems for links within the PDF." )
 @click.option( "--optimize-web", is_flag=True, default=False, help="Optimize for use in a browser (larger file)." )
 @click.option( "--rotate", is_flag=True, default=False, help="Rotate landscape pages." )
 @click.option( "--progress","-p", is_flag=True, default=False, help="Log progress." )
 @click.option( "--verbose","-v", is_flag=True, default=False, help="Verbose output." )
-def main( pdf_file, output, optimize_web, rotate, progress, verbose ):
+def main( pdf_file, output, fix_zoom, optimize_web, rotate, progress, verbose ):
     """Fixup the eASLRB."""
 
     def log_msg( msg_type, msg ):
@@ -123,7 +131,7 @@ def main( pdf_file, output, optimize_web, rotate, progress, verbose ):
         if msg_type == "verbose" and not verbose:
             return
         log_msg_stderr( msg_type, msg )
-    fixup_mmp_pdf( pdf_file, output, optimize_web, rotate, log=log_msg )
+    fixup_mmp_pdf( pdf_file, output, fix_zoom, optimize_web, rotate, log=log_msg )
 
 if __name__ == "__main__":
     main() #pylint: disable=no-value-for-parameter
diff --git a/asl_rulebook2/extract/all.py b/asl_rulebook2/extract/all.py
index 5cb4ab1..51018d3 100755
--- a/asl_rulebook2/extract/all.py
+++ b/asl_rulebook2/extract/all.py
@@ -112,7 +112,7 @@ class ExtractAll( ExtractBase ):
                     first = False
                 errors = [ "- {}".format( e ) for e in errors ]
                 self.log_msg( "warning", "{}:\n{}",
-                    index_entry["caption"], "\n".join(errors)
+                    index_entry.get("caption","???"), "\n".join(errors)
                 )
 
 # ---------------------------------------------------------------------
diff --git a/asl_rulebook2/extract/content.py b/asl_rulebook2/extract/content.py
index a809314..ba2fe95 100755
--- a/asl_rulebook2/extract/content.py
+++ b/asl_rulebook2/extract/content.py
@@ -20,7 +20,9 @@ from asl_rulebook2.utils import parse_page_numbers, fixup_text, append_text, rem
 # they appear in the PDF document.
 _DISABLE_SORT_ITEMS = [
     "B40", # nb: to detect B31.1 NARROW STREET
+    "A16",
     "A58","A59","A60", # Chapter A footnotes (nb: page A61 is a mess wrt element order :-/)
+    "B1",
     "B45", "B46", # Chapter B footnotes
     "C25", "C26", # Chapter C footnotes
     "D27", # Chapter D footnotes
@@ -113,6 +115,13 @@ class ExtractContent( ExtractBase ):
             sort_elems = self._curr_pageid not in disable_sort_items
             for _, elem in PageElemIterator( lt_page, elem_filter=elem_filter, sort_elems=sort_elems ):
 
+                # skip problematic elements
+                if elem.fontname == "OYULKV+MyriadPro-Regular":
+                    # FUDGE! Some symbols are represented as characters, which can sometimes cause problems
+                    # (e.g. in v1.05, the diamond for A7.8 PIN broke caption parsing), and the easiest way
+                    # to work-around this is to just ignore those characters.
+                    continue
+
                 # keep track of the top-left-most bold element
                 if self._is_bold( elem ):
                     if self._top_left_elem is None \
diff --git a/asl_rulebook2/extract/data/index-fixups.json b/asl_rulebook2/extract/data/index-fixups.json
index 11eb320..4ff4728 100644
--- a/asl_rulebook2/extract/data/index-fixups.json
+++ b/asl_rulebook2/extract/data/index-fixups.json
@@ -30,6 +30,12 @@
     "new_content": null
 },
 
+"Bog": {
+    "replace": [
+        [ "BarbedWire", "Barbed-Wire" ]
+    ]
+},
+
 "Broken Units": {
     "replace": [
         [ "[Pin NA: A7.8(EXCInterdiction and Huts)]", "[Pin NA (EXC Interdiction and Huts): A7.8]" ]
@@ -133,12 +139,6 @@
     ]
 },
 
-"Minefield": {
-    "replace": [
-        [ "[fully-tracked A FV T B: B 8.61]", "[fully-tracked AFV TB: B8.61]" ]
-    ]
-},
-
 "Morale": {
     "replace": [
         [ "[Gain:", "Gain:" ],
@@ -169,12 +169,6 @@
     "new_content": "(Pre-AFV Advance/Attack Task Check; NA to berserk/Fanatic/Japanese/SMC): A11.6, G1.62 [vs Armored Cupola: O.7] [DC Placement: A23.3] [ENEMY Advance into CC/Melee: S11.4] [1PAATC: Chinese, Non-Elite Italians, Inexperienced, Allied/Axis Minors] [OVR vs \"?\": A12.41] [CC Reaction Fire: D7.21]"
 },
 
-"PBF": {
-    "replace": [
-        [ "A11.l", "A11.1" ]
-    ]
-},
-
 "Pillbox": {
     "replace": [
         [ "[Control: B30.91; in BRT: TCG15]", "[Control: B30.91] [Control (in BRT): TCG15]" ]
@@ -236,21 +230,12 @@
     ]
 },
 
-"Stall": {
-    "old_content": "(Rules are givenin a ChapterH Vehicle Note ifa nationality's AFV are subject to Stall; for example, German Multi-Applicable Vehicle Note H): [Platoon Movement: D14.22]",
-    "new_content": "(Rules are given in a Chapter H Vehicle Note if a nationality's AFV are subject to Stall; for example, German Multi-Applicable Vehicle Note H): [Platoon Movement: D14.22]"
-},
-
 "Target Size": {
     "replace": [
         [ "[Vehicular: D1.7, Concealment: D1.76]", "[Vehicular: D1.7] [Vehicular (Concealment): D1.76]" ]
     ]
 },
 
-"Uncon irmed Kill": {
-    "new_title": "Unconfirmed Kill"
-},
-
 "Unarmored Vehicles": {
     "replace": [
         [ "[AFV (vs A-P mines: B28.42) (vs A-T mines: B28.52)]", "[AFV (vs A-P mines): B28.42] [AFV (vs A-T mines): B28.52]" ]
diff --git a/asl_rulebook2/extract/data/target-fixups.json b/asl_rulebook2/extract/data/target-fixups.json
index 873b3bc..5b7b0df 100644
--- a/asl_rulebook2/extract/data/target-fixups.json
+++ b/asl_rulebook2/extract/data/target-fixups.json
@@ -26,6 +26,16 @@
     }
 },
 
+"A24": {
+    "K2M2": { "new_ruleid": null },
+    "J2L2N2": { "new_ruleid": null },
+    "K3M3": { "new_ruleid": null },
+    "J3L3N3": { "new_ruleid": null },
+    "K4M4O4": { "new_ruleid": null },
+    "J4L4N4": { "new_ruleid": null },
+    "K5": { "new_ruleid": null }
+},
+
 "A28": {
     "1 -": { "new_ruleid": null }
 },
@@ -112,6 +122,19 @@
     }
 },
 
+"B1": {
+    "N5": { "new_ruleid": null },
+    "N6": { "new_ruleid": null },
+    "N8": { "new_ruleid": null },
+    "S5": { "new_ruleid": null },
+    "S6": { "new_ruleid": null },
+    "R5": { "new_ruleid": null },
+    "T10": { "new_ruleid": null },
+    "W5": { "new_ruleid": null },
+    "V5": { "new_ruleid": null },
+    "X6": { "new_ruleid": null }
+},
+
 "B4": {
     "6.BRIDGES": {
         "new_ruleid": "B6",
diff --git a/asl_rulebook2/tests/utils.py b/asl_rulebook2/tests/utils.py
index 2ec40b6..f9de869 100644
--- a/asl_rulebook2/tests/utils.py
+++ b/asl_rulebook2/tests/utils.py
@@ -11,9 +11,9 @@ def for_each_easlrb_version( func ):
     assert pytest_options.easlrb_path
     base_dir = pytest_options.easlrb_path
     ncalls = 0
-    for name in os.listdir( base_dir ):
-        dname = os.path.join( base_dir, name )
-        if os.path.isfile( os.path.join( dname, "eASLRB.pdf" ) ):
+    for dname, _, _ in os.walk( base_dir ):
+        fname = os.path.join( dname, "eASLRB.pdf" )
+        if os.path.isfile( fname ):
             func( dname )
             ncalls += 1
     assert ncalls > 0
diff --git a/asl_rulebook2/webapp/prepare.py b/asl_rulebook2/webapp/prepare.py
index 222df8f..79bd8d4 100644
--- a/asl_rulebook2/webapp/prepare.py
+++ b/asl_rulebook2/webapp/prepare.py
@@ -147,7 +147,7 @@ def _do_prepare_data_files( args, download_url ):
             fixedup_file.close( delete=False )
             fixup_mmp_pdf( prepared_file.name,
                 fixedup_file.name,
-                True, True,
+                False, True, True,
                 log_msg
             )
             # read the final PDF data
diff --git a/asl_rulebook2/webapp/static/css/prepare.css b/asl_rulebook2/webapp/static/css/prepare.css
index 4599b40..7deea7a 100644
--- a/asl_rulebook2/webapp/static/css/prepare.css
+++ b/asl_rulebook2/webapp/static/css/prepare.css
@@ -3,7 +3,7 @@ code { display: block ; margin: 5px 0 5px 20px ; }
 .info {
     margin-top: 10px ; min-height: 25px ;
     padding-left: 30px ; background: no-repeat url(../images/info.png) ;
-    font-size: 80% ; font-style: italic ; color: #444 ;
+    font-style: italic ; color: #444 ;
 }
 
 #prepare-app { height: 100% ; display: flex ; }
diff --git a/asl_rulebook2/webapp/static/prepare.js b/asl_rulebook2/webapp/static/prepare.js
index d44883b..b6a0d12 100644
--- a/asl_rulebook2/webapp/static/prepare.js
+++ b/asl_rulebook2/webapp/static/prepare.js
@@ -28,7 +28,11 @@ gPrepareApp.component( "prepare-app", {
     <div id="header">
         No data directory has been configured.
         <p> If you haven't used this program before, a few things need to be prepared first.
-        It will take around 10-15 minutes. </p>
+            It will take around 10-15 minutes.
+        </p>
+        <p> If there are problems, you can try to prepare your data files manually,
+            as described <a href="https://github.com/pacman-ghost/asl-rulebook2/blob/master/doc/prepare.md" target="_blank">here</a>.
+        </p>
     </div>
     <div v-show=fatalErrorMsg id="fatal-error" >
         <img :src=fatalErrorIconUrl style="float:left;margin-right:5px;" />
@@ -132,12 +136,11 @@ gPrepareApp.component( "upload-panel", {
     <div v-else style="display:flex;">
         <input type="file" @change=onFileSelected accept=".pdf" style="display:none;" ref="selectFile" >
         <button @click=onUploadProxy id="upload-proxy" ref="uploadProxy"> <img :src=uploadIconUrl /> </button>
-        <div> Click on the button, and select your copy <br> of MMP's electronic ASLRB.
+        <div style="width:29em;">
+            Click on the button, and select your copy of MMP's eASLRB.
             <div class="info"> You <u>must</u> use the <a href="https://www.wargamevault.com/product/344879/Electronic-Advanced-Squad-Leader-Rulebook" target="_blank">offical MMP eASLRB</a>. <br>
-                A scanned copy of a printed RB <u>will not work</u>!
-            </div>
-            <div class="info"> If there are problems here, you can try to prepare <br>
-                your data files <a href="https://github.com/pacman-ghost/asl-rulebook2/blob/master/doc/prepare.md" target="_blank">manually</a>.
+                A scan of a printed rulebook <u>will not work</u>!
+                <p> You should use v1.05 of the eASLRB PDF (normal version, not the "inherited zoom" version). Other versions <i>may</i> work, but may have warnings and/or errors. </p>
             </div>
         </div>
     </div>
diff --git a/doc/prepare.md b/doc/prepare.md
index 412bc4b..b79e600 100644
--- a/doc/prepare.md
+++ b/doc/prepare.md
@@ -47,7 +47,7 @@ Finally, we need to fixup some issues in the PDF:
         --output /tmp/prepared/ASL\ Rulebook.pdf \
         --progress
 ```
-This fixes the zoom problem when clicking on links within the PDF, and rotates any landscape pages.
+This rotates any landscape pages, so that the browser shows pages at the correct width (without a horizontal scrollbar).
 
 ### Using the prepared files