Located each Chapter H vehicle/ordnance note.

3 years ago · cabab224e7
parent 463df8bb6c
commit cabab224e7
12 changed files with 1050 additions and 51 deletions
--- a/asl_rulebook2/bin/prepare_pdf.py
+++ b/asl_rulebook2/bin/prepare_pdf.py
@ -23,12 +23,17 @@ _COMPRESSION_CHOICES = [

 # ---------------------------------------------------------------------

-def prepare_pdf( pdf_file, title, targets_fname, yoffset, output_fname, compression, gs_path, log_msg ):
+def prepare_pdf( pdf_file, title, targets_fname, vo_notes_fname, yoffset, output_fname, compression, gs_path, log_msg ):
    """Prepare the MMP eASLRB PDF."""

    # load the targets
    with open( targets_fname, "r" ) as fp:
        targets = json.load( fp )
+    if vo_notes_fname:
+        with open( vo_notes_fname, "r" ) as fp:
+            vo_notes_targets = json.load( fp )
+    else:
+        vo_notes_targets = None

    with TempFile(mode="w") as compressed_file, TempFile(mode="w") as pdfmarks_file:

@ -49,6 +54,16 @@ def prepare_pdf( pdf_file, title, targets_fname, yoffset, output_fname, compress
            )
            pdf_file = compressed_file.name

+        def add_vo_notes_dests( key, vo_entries, yoffset, out ):
+            for vo_note_id, vo_entry in vo_entries.items():
+                dest = "{}:{}".format( key, vo_note_id )
+                xpos, ypos = vo_entry.get( "pos", ["null","null"] )
+                if isinstance( ypos, int ):
+                    ypos += yoffset
+                print( "[ /Dest /{} /Page {} /View [/XYZ {} {}] /DEST pdfmark".format(
+                    dest, vo_entry["page_no"], xpos, ypos
+                ), file=out )
+
        # generate the pdfmarks
        log_msg( "progress", "Generating the pdfmarks..." )
        if title:
@ -68,7 +83,15 @@ def prepare_pdf( pdf_file, title, targets_fname, yoffset, output_fname, compress
            print( "[ /Dest /{} /Page {} /View [/XYZ {} {}] /DEST pdfmark".format(
                ruleid, target["page_no"], xpos, ypos
            ), file=pdfmarks_file )
-        print( file=pdfmarks_file )
+        if vo_notes_targets:
+            print( file=pdfmarks_file )
+            for nat in vo_notes_targets:
+                if nat == "landing-craft":
+                    add_vo_notes_dests( nat, vo_notes_targets[nat], yoffset, pdfmarks_file )
+                    continue
+                for vo_type, vo_entries in vo_notes_targets[nat].items():
+                    key = "{}_{}".format( nat, vo_type )
+                    add_vo_notes_dests( key, vo_entries, yoffset, pdfmarks_file )
        pdfmarks_file.close( delete=False )

        # generate the pdfmark'ed document
@ -92,6 +115,9 @@ def prepare_pdf( pdf_file, title, targets_fname, yoffset, output_fname, compress
@click.option( "--targets","-t","targets_fname", required=True, type=click.Path(dir_okay=False),
    help="Target definition file."
 )
+@click.option( "--vo-notes","vo_notes_fname", required=False, type=click.Path(dir_okay=False),
+    help="Vehicle/ordnance notes definition file."
+)
@click.option( "--yoffset", default=5, help="Offset to add to y co-ordinates." )
@click.option( "--output","-o","output_fname", required=True, type=click.Path(dir_okay=False),
    help="Output PDF file."
@ -101,7 +127,7 @@ def prepare_pdf( pdf_file, title, targets_fname, yoffset, output_fname, compress
 )
@click.option( "--gs","gs_path", default="gs",  help="Path to the Ghostscript executable." )
@click.option( "--progress","-p", is_flag=True, default=False, help="Log progress." )
-def main( pdf_file, title, targets_fname, yoffset, output_fname, compression, gs_path, progress ):
+def main( pdf_file, title, targets_fname, vo_notes_fname, yoffset, output_fname, compression, gs_path, progress ):
    """Prepare the MMP eASLRB PDF."""

    # initialize
@ -113,7 +139,7 @@ def main( pdf_file, title, targets_fname, yoffset, output_fname, compression, gs
    # prepare the PDF
    prepare_pdf(
        pdf_file, title,
-        targets_fname, yoffset,
+        targets_fname, vo_notes_fname, yoffset,
        output_fname, compression,
        gs_path,
        log_msg
--- a/asl_rulebook2/extract/all.py
+++ b/asl_rulebook2/extract/all.py
@ -128,8 +128,11 @@ class ExtractAll( ExtractBase ):
@click.option( "--save-targets","save_targets_fname", required=True, help="Where to save the extracted targets." )
@click.option( "--save-chapters","save_chapters_fname", required=True, help="Where to save the extracted chaopters." )
@click.option( "--save-footnotes","save_footnotes_fname", required=True, help="Where to save the extracted footnotes." )
+@click.option( "--save-vo-notes","save_vo_notes_fname", required=True,
+    help="Where to save the extracted vehicle/ordnance notes targets."
+)
 def main( pdf_file, args, progress, output_fmt,
-  save_index_fname, save_targets_fname, save_chapters_fname, save_footnotes_fname
+  save_index_fname, save_targets_fname, save_chapters_fname, save_footnotes_fname, save_vo_notes_fname
 ):
    """Extract everything we need from the MMP eASLRB."""

@ -147,9 +150,12 @@ def main( pdf_file, args, progress, output_fmt,
    with open( save_index_fname, "w", encoding="utf-8" ) as index_out, \
         open( save_targets_fname, "w", encoding="utf-8" ) as targets_out, \
         open( save_chapters_fname, "w", encoding="utf-8" ) as chapters_out, \
-         open( save_footnotes_fname, "w", encoding="utf-8" ) as footnotes_out:
+         open( save_footnotes_fname, "w", encoding="utf-8" ) as footnotes_out, \
+         open( save_vo_notes_fname, "w", encoding="utf-8" ) as vo_notes_out:
        getattr( extract.extract_index, "save_as_"+output_fmt )( index_out )
-        getattr( extract.extract_content, "save_as_"+output_fmt )( targets_out, chapters_out, footnotes_out )
+        getattr( extract.extract_content, "save_as_"+output_fmt )(
+            targets_out, chapters_out, footnotes_out, vo_notes_out
+        )

 if __name__ == "__main__":
    main() #pylint: disable=no-value-for-parameter
--- a/asl_rulebook2/extract/base.py
+++ b/asl_rulebook2/extract/base.py
@ -38,7 +38,7 @@ class ExtractBase:
    @staticmethod
    def _is_bold( elem ):
        """Check if an element is using a bold font."""
-        return elem.fontname.endswith( ( "-Bold", ",Bold", "-BoldMT" ) )
+        return elem.fontname.endswith( ( ",Bold", "-BoldMT" ) ) or "-Bold" in elem.fontname

    def log_msg( self, msg_type, msg, *args, **kwargs ):
        """Log a message."""
--- a/asl_rulebook2/extract/content.py
+++ b/asl_rulebook2/extract/content.py
@ -5,6 +5,7 @@ import os
 import json
 import re
 import math
+from collections import defaultdict

 import click
 from pdfminer.layout import LTChar
@ -30,6 +31,8 @@ _DISABLE_SORT_ITEMS = [
    "F20", "F21", # Chapter F footnotes
    "G48", "G49", "G50", # Chapter G footnotes
    "H9", # Chapter H footnotes
+    429,431,432,433,434,435, # Italian vehicle notes
+    436,437,438,439, # Italian ordnance notes
 ]

 _DEFAULT_ARGS = {
@ -38,9 +41,44 @@ _DEFAULT_ARGS = {
    "chapter-j": "593",
    "chapter-w": "647-664",
    "content_vp_left": 0, "content_vp_right": 565, "content_vp_top": 715, "content_vp_bottom": 28, # viewport
-    "disable-sort-items": ",".join( _DISABLE_SORT_ITEMS )
+    "disable-sort-items": ",".join( str(si) for si in _DISABLE_SORT_ITEMS )
 }

+# NOTE: The exact mappings here are actually not that important. What's important is:
+# - the order of the nationality + V/O types
+# - the page numbers themselves (so that they get parsed)
+_VO_NOTE_SECTIONS = [
+    [ "german", "vehicles", "330,332,334-343", True ],
+    [ "german", "ordnance", "344-348", True ],
+    [ "russian", "vehicles", "348,350-355", True ],
+    [ "russian", "ordnance", "356-358", True ],
+    [ "russian", "vehicles", "362,364-368", False ],
+    [ "russian", "ordnance", "369", False ],
+    [ "american", "vehicles", "371,373-383", True ],
+    [ "american", "ordnance", "385-389", True ],
+    [ "british", "vehicles", "395,398-417", True ],
+    [ "british", "ordnance", "419-423", True ],
+    [ "italian", "vehicles", "429,431-435", True ],
+    [ "italian", "ordnance", "436-439", True ],
+    [ "japanese", "vehicles", "443-448", True ],
+    [ "japanese", "ordnance", "448-452", True ],
+    [ "chinese", "vehicles", "456-459", True ],
+    [ "chinese", "ordnance", "459-463", True ],
+    [ "landing-craft", "vehicles", "467-468", True ],
+    [ "french", "vehicles", "470,472-480", True ],
+    [ "french", "ordnance", "482-487", True ],
+    [ "allied-minor", "vehicles", "492-493,495-500", True ],
+    [ "allied-minor", "ordnance", "501-504", True ],
+    [ "axis-minor", "vehicles", "506,508-515", True ],
+    [ "axis-minor", "ordnance", "516,518-527", True ],
+    [ "finnish", "vehicles", "536,538-541", True ],
+    [ "finnish", "ordnance", "543,545-549", True ],
+    [ "un-forces", "vehicles", "554,556-565", True ],
+    [ "un-forces", "ordnance", "567-570", True ],
+    [ "communist-forces", "vehicles", "580", True ],
+    [ "communist-forces", "ordnance", "581-585", True ],
+]
+
 # ---------------------------------------------------------------------

 class ExtractContent( ExtractBase ):
@ -51,6 +89,8 @@ class ExtractContent( ExtractBase ):
        self.targets = {}
        self._chapters = []
        self._footnotes = {}
+        self._vo_notes = self._prev_vo_note_id = None
+        self._curr_vo_note_section = 0
        self._curr_chapter = self._curr_footnote = self._curr_pageid = None
        self._prev_elem = self._top_left_elem = None
        # prepare to fixup problems in the content
@ -61,6 +101,7 @@ class ExtractContent( ExtractBase ):
        self._target_fixups = load_fixup( "target-fixups.json" )
        self._chapter_fixups = load_fixup( "chapter-fixups.json" )
        self._footnote_fixups = load_fixup( "footnote-fixups.json" )
+        self._vo_note_fixups = load_fixup( "vo-note-fixups.json" )

    def extract_content( self, pdf ):
        """Extract content from the MMP eASLRB."""
@ -78,6 +119,12 @@ class ExtractContent( ExtractBase ):
                    page_index[ page_no ] = chapter
        disable_sort_items = set( self._args["disable-sort-items"].split( "," ) )

+        # include the pages for the Chapter H vehicle/ordnance notes
+        for _, _, page_nos, _ in _VO_NOTE_SECTIONS:
+            page_nos = parse_page_numbers( page_nos )
+            for page_no in page_nos:
+                page_index[ page_no ] = "H"
+
        # initialize
        self._curr_chapter = None
        curr_chapter_pageno = None
@ -109,13 +156,18 @@ class ExtractContent( ExtractBase ):
            self._curr_pageid = "{}{}".format( # nb: this is the ASL page# (e.g. "A42"), not the PDF page#
                self._curr_chapter, curr_chapter_pageno
            )
-            self.log_msg( "progress", "- Analyzing page {} ({}).", page_no, self._curr_pageid )
+            # NOTE: There have been so many extra pages added to Chapter H, there's no easy way to calculate
+            # the page ID. We could set up a table mapping physical page numbers to page ID's, but that's
+            # far more trouble than it's worth.
+            self.log_msg( "progress", "- Analyzing page {}{}.",
+                page_no, " ({})".format( self._curr_pageid ) if not self._curr_pageid.startswith("H") else ""
+            )

            # process each element on the page
            curr_caption = None
            self._top_left_elem = self._prev_elem = None
            elem_filter = lambda e: isinstance( e, LTChar )
-            sort_elems = self._curr_pageid not in disable_sort_items
+            sort_elems = self._curr_pageid not in disable_sort_items and str(page_no) not in disable_sort_items
            for _, elem in PageElemIterator( lt_page, elem_filter=elem_filter, sort_elems=sort_elems ):

                # skip problematic elements
@ -137,7 +189,7 @@ class ExtractContent( ExtractBase ):

                # check if we're currently extracting footnotes
                if self._curr_footnote is not None:
-                    self._on_footnote_elem( elem, lt_page )
+                    self._on_footnote_elem( elem, lt_page, page_no )
                    self._prev_elem = elem
                    continue

@ -195,6 +247,8 @@ class ExtractContent( ExtractBase ):
            self.log_msg( "warning", "Unused fixups: {}", self._target_fixups )
        if self._footnote_fixups:
            self.log_msg( "warning", "Unused fixups: {}", self._footnote_fixups )
+        if self._vo_note_fixups:
+            self.log_msg( "warning", "Unused fixups: {}", self._vo_note_fixups )

        # extract the chapters
        self._extract_chapters()
@ -215,7 +269,7 @@ class ExtractContent( ExtractBase ):
            # yup - notify the main loop
            self._curr_footnote = []
            if elem:
-                self._on_footnote_elem( elem, lt_page )
+                self._on_footnote_elem( elem, lt_page, page_no )
            return

        # check if the entry needs to be fixed up
@ -266,14 +320,15 @@ class ExtractContent( ExtractBase ):
            "raw_caption": orig_caption
        }

-    def _on_footnote_elem( self, elem, lt_page ):
+    def _on_footnote_elem( self, elem, lt_page, page_no ):
        """Process an element while we're parsing footnotes."""
        # check if we've found the start of a new footnote
        if self._is_bold( elem ):
            if elem.get_text().isdigit() and self._is_start_of_line( elem, lt_page ):
                # yup - save the current footnote, start collecting the new one
                self._save_footnote()
-                self._curr_footnote = [ elem.get_text(), "" ]
+                elem_pos = ( elem.x0, elem.y1 )
+                self._curr_footnote = [ elem.get_text(), "", page_no, elem_pos ]
            else:
                if self._curr_footnote[1]:
                    # FUDGE! Some footnote content has bold text hard-up at the left margin,
@ -334,10 +389,17 @@ class ExtractContent( ExtractBase ):
            if footnote_id == "9" and "9" in footnote_ids:
                footnote_id = "29"

-        # check if we've gone past the end of the Chapter H footnotes
-        if self._curr_chapter == "H" and len(footnote_id) > 1:
-            self._curr_footnote = None
-            return
+        if self._curr_chapter == "H":
+            # check if we've gone past the end of the Chapter H footnotes
+            if self._vo_notes is None and len(footnote_id) > 1:
+                # yup - start collecting vehicle/ordnance notes
+                self._vo_notes = defaultdict( lambda: defaultdict( list ) )
+            # check if we're collecting Chapter H vehicle/ordnance notes
+            if self._vo_notes is not None:
+                # yup - save the next entry (the "footnote" is actually a vehicle/ordnance note)
+                self._save_vo_note( footnote_id, self._curr_footnote[2], self._curr_footnote[3] )
+                self._curr_footnote = None
+                return

        # clean up the content
        content = re.sub( r"\s+", " ", content ).strip()
@ -415,6 +477,108 @@ class ExtractContent( ExtractBase ):
        } )
        self._curr_footnote = None

+    def _save_vo_note( self, caption, page_no, page_pos ):
+        """Save an extracted vehicle/ordnance note."""
+
+        # NOTE: Some pieces of text cause the parsing code to go wonky (typically because it's seen
+        # a "1" and so thinks it's found the start of a new section), so we manually skip over these.
+        skips = self._vo_note_fixups.get( "skips", {} ).get( str(page_no) )
+        if skips:
+            for i, target in enumerate(skips):
+                if self._check_string( caption, target ):
+                    # we've got a caption that should be skipped - remove it from the list, and return
+                    del skips[i]
+                    if not skips:
+                        del self._vo_note_fixups["skips"][ str(page_no) ]
+                        if not self._vo_note_fixups["skips"]:
+                            del self._vo_note_fixups["skips"]
+                    return
+        if caption.isdigit() and page_no not in (354, 417):
+            return
+
+        def apply_fixups( vo_note_id, caption ):
+            nat, vo_type, _, _ = _VO_NOTE_SECTIONS[ self._curr_vo_note_section ]
+            fixup = self._vo_note_fixups.get( nat, {} ).get( vo_type, {} ).get( vo_note_id )
+            if fixup:
+                if self._check_string( caption, fixup["old_caption"] ):
+                    # remove the fixup
+                    del self._vo_note_fixups[ nat ][ vo_type ][ vo_note_id ]
+                    cleanup_fixups( nat, vo_type )
+                    # apply the fixup
+                    if "new_vo_note_id" in fixup:
+                        vo_note_id = fixup["new_vo_note_id"]
+                    if "new_caption" in fixup:
+                        caption = fixup["new_caption"]
+            return vo_note_id, caption
+
+        def cleanup_fixups( nat, vo_type ):
+            if nat not in self._vo_note_fixups:
+                return
+            if vo_type in self._vo_note_fixups[nat] and not self._vo_note_fixups[ nat ][ vo_type ]:
+                del self._vo_note_fixups[ nat ][ vo_type ]
+                if nat in self._vo_note_fixups and not self._vo_note_fixups[ nat ]:
+                    del self._vo_note_fixups[ nat ]
+
+        # extract the note number and caption
+        mo = re.search( r"^[1-9][0-9.]*", caption )
+        if not mo:
+            return
+        vo_note_id = mo.group()
+        caption = caption[ mo.end() : ].strip()
+        if vo_note_id.endswith( "." ):
+            vo_note_id = vo_note_id[:-1]
+        if caption.endswith( ":" ):
+            caption = caption[:-1].strip()
+        if caption.startswith( ( "cm ", "mm ", "pdr", "-cwt" ) ):
+            # FUDGE! Things like "5.1 2.2cm Big Gun" are getting parsed as "5.12.2: + "cm Big Gun" :-/
+            pos = vo_note_id.find( "." )
+            if pos >= 0:
+                caption = vo_note_id[pos+1:] + caption
+                vo_note_id = vo_note_id[:pos]
+
+        # check for any fixups
+        vo_note_id, caption = apply_fixups( vo_note_id, caption )
+
+        # compare the note ID with the previous one
+        nat, vo_type, _, check_seq = _VO_NOTE_SECTIONS[ self._curr_vo_note_section ]
+        def get_base_note_id( val ):
+            pos = val.find( "." )
+            return int( val[:pos] if pos >= 0 else val )
+        base_note_id = get_base_note_id( vo_note_id )
+        if self._prev_vo_note_id:
+            # check if we've found the start of the next section
+            if base_note_id == 1:
+                # yup - add any extra entries to the current section
+                add_vo_entries = self._vo_note_fixups.get( nat, {} ).get( vo_type, {} ).pop( "add", [] )
+                for vo_entry in add_vo_entries:
+                    self._vo_notes[ nat ][ vo_type ].append( vo_entry )
+                cleanup_fixups( nat, vo_type )
+                # get the next nationality + V/O type
+                self._curr_vo_note_section += 1
+                nat, vo_type, _, _ = _VO_NOTE_SECTIONS[ self._curr_vo_note_section ]
+                # check for any fixups
+                vo_note_id, caption = apply_fixups( vo_note_id, caption )
+            elif check_seq:
+                # compare the note ID with the previous one
+                prev_base_note_id = get_base_note_id( self._prev_vo_note_id )
+                if base_note_id == prev_base_note_id + 1:
+                    pass # nb: this is the normal case, we've found the next V/O note
+                elif base_note_id == prev_base_note_id and "." in vo_note_id:
+                    pass # nb: this is to allow things like "9.1" following "9"
+                else:
+                    return # nb: we got some junk that can be ignored
+
+        # save the V/O note
+        self._vo_notes[ nat ][ vo_type ].append( {
+            "vo_note_id": vo_note_id, "caption": caption,
+            "page_no": page_no, "page_pos": page_pos
+        } )
+        if nat == "allied-minor" and vo_type == "ordnance" and vo_note_id == "19":
+            # FUDGE! Because we're not seing Allied Minor Ordnance Note 20 :-/
+            self._prev_vo_note_id = "20"
+        else:
+            self._prev_vo_note_id = vo_note_id
+
    def _extract_chapters( self ):
        """Extract the chapters and their sections."""

@ -492,15 +656,15 @@ class ExtractContent( ExtractBase ):
            return True # the element is at the top of the right column
        return False

-    def save_as_raw( self, targets_out, chapters_out, footnotes_out ):
+    def save_as_raw( self, targets_out, chapters_out, footnotes_out, vo_notes_out ):
        """Save the raw results."""
-        self._save_as_raw_or_text( targets_out, chapters_out, footnotes_out, True )
+        self._save_as_raw_or_text( targets_out, chapters_out, footnotes_out, vo_notes_out, True )

-    def save_as_text( self, targets_out, chapters_out, footnotes_out ):
+    def save_as_text( self, targets_out, chapters_out, footnotes_out, vo_notes_out ):
        """Save the results as plain-text."""
-        self._save_as_raw_or_text( targets_out, chapters_out, footnotes_out, False )
+        self._save_as_raw_or_text( targets_out, chapters_out, footnotes_out, vo_notes_out, False )

-    def _save_as_raw_or_text( self, targets_out, chapters_out, footnotes_out, raw ):
+    def _save_as_raw_or_text( self, targets_out, chapters_out, footnotes_out, vo_notes_out, raw ):
        """Save the results as raw or plain-text."""

        # save the targets
@ -511,7 +675,7 @@ class ExtractContent( ExtractBase ):
                    print( file=targets_out )
                print( "=== p{} ===".format( target["page_no"] ), file=targets_out )
                curr_page_no = target["page_no"]
-            xpos, ypos = self._get_target_pos( target )
+            xpos, ypos = self._get_page_pos( target["pos"] )
            if raw:
                print( "[{},{}] = {}".format(
                    xpos, ypos, target["raw_caption"]
@ -556,13 +720,35 @@ class ExtractContent( ExtractBase ):
                    print( " ; ".join( make_caption(c) for c in footnote["captions"] ), file=footnotes_out )
                    print( footnote["content"], file=footnotes_out )

-    def save_as_json( self, targets_out, chapters_out, footnotes_out ):
+        # save the vehicle/ordnance notes
+        first = True
+        for nat, vo_types in self._vo_notes.items():
+            for vo_type, vo_entries in vo_types.items():
+                if first:
+                    first = False
+                else:
+                    print( file=vo_notes_out )
+                print( "=== {} ===".format(
+                    nat if nat == "landing-craft" else "{} {}".format( nat, vo_type )
+                ), file=vo_notes_out )
+                for vo_entry in vo_entries:
+                    if "page_pos" in vo_entry:
+                        xpos, ypos = ExtractContent._get_page_pos( vo_entry["page_pos"] )
+                        page_pos = "[{},{}]".format( xpos, ypos )
+                    else:
+                        page_pos = None
+                    print( "{:<5} {} @p{}{}".format(
+                        vo_entry["vo_note_id"]+":", vo_entry["caption"], vo_entry["page_no"],
+                        ":"+page_pos if page_pos else ""
+                    ), file=vo_notes_out )
+
+    def save_as_json( self, targets_out, chapters_out, footnotes_out, vo_notes_out ):
        """Save the results as JSON."""

        # save the targets
        targets, curr_chapter = [], None
        for ruleid, target in self.targets.items():
-            xpos, ypos = self._get_target_pos( target )
+            xpos, ypos = self._get_page_pos( target["pos"] )
            targets.append( "{}: {{ \"caption\": {}, \"page_no\": {}, \"pos\": [{},{}] }}".format(
                jsonval( ruleid ),
                jsonval(target["caption"]), target["page_no"], xpos, ypos
@ -613,12 +799,46 @@ class ExtractContent( ExtractBase ):
            ",\n\n".join( chapters )
        ), file=footnotes_out )

+        # save the vehicle/ordnance notes
+        vo_notes = []
+        for nat in self._vo_notes:
+            vo_types = []
+            for vo_type, vo_entries in self._vo_notes[nat].items():
+                entries = []
+                for vo_entry in vo_entries:
+                    val = "{}: {{ \"caption\": {}, \"page_no\": {}".format(
+                        jsonval(vo_entry["vo_note_id"]), jsonval(vo_entry["caption"]), jsonval(vo_entry["page_no"])
+                    )
+                    if "page_pos" in vo_entry:
+                        xpos, ypos = self._get_page_pos( vo_entry["page_pos"] )
+                        val += ", \"pos\": [{},{}]".format( xpos, ypos )
+                    val += " }"
+                    entries.append( "    {}".format( val ) )
+                if nat == "landing-craft":
+                    vo_types.append( ",\n".join( entries ) )
+                else:
+                    vo_types.append( "{}: {{\n{}\n}}".format(
+                        jsonval(vo_type), ",\n".join( entries )
+                    ) )
+            vo_notes.append( "{}: {{\n{}\n}}".format(
+                jsonval(nat), ",\n".join( vo_types )
+            ) )
+        print( "{{\n\n{}\n\n}}".format(
+            ",\n\n".join( vo_notes )
+        ), file=vo_notes_out )
+
    @staticmethod
-    def _get_target_pos( target ):
-        """Return a target's X/Y position on the page."""
-        xpos = math.floor( target["pos"][0] )
-        ypos = math.ceil( target["pos"][1] )
-        return xpos, ypos
+    def _check_string( val, target ):
+        """Check if a string matches a target."""
+        if target.startswith( "^" ):
+            return val.startswith( target[1:] )
+        else:
+            return val == target
+
+    @staticmethod
+    def _get_page_pos( pos ):
+        """Return a X/Y position on the page."""
+        return math.floor( pos[0] ), math.ceil( pos[1] )

 # ---------------------------------------------------------------------

@ -632,7 +852,12 @@ class ExtractContent( ExtractBase ):
@click.option( "--save-targets","save_targets_fname", required=True, help="Where to save the extracted targets." )
@click.option( "--save-chapters","save_chapters_fname", required=True, help="Where to save the extracted chaopters." )
@click.option( "--save-footnotes","save_footnotes_fname", required=True, help="Where to save the extracted footnotes." )
-def main( pdf_file, args, progress, output_fmt, save_targets_fname, save_chapters_fname, save_footnotes_fname ):
+@click.option( "--save-vo-notes","save_vo_notes_fname", required=True,
+    help="Where to save the extracted vehicle/ordnance notes."
+)
+def main( pdf_file, args, progress, output_fmt,
+  save_targets_fname, save_chapters_fname, save_footnotes_fname, save_vo_notes_fname
+):
    """Extract content from the MMP eASLRB."""

    # initialize
@ -651,8 +876,9 @@ def main( pdf_file, args, progress, output_fmt, save_targets_fname, save_chapter
    # save the results
    with open( save_targets_fname, "w", encoding="utf-8" ) as targets_out, \
         open( save_chapters_fname, "w", encoding="utf-8" ) as chapters_out, \
-         open( save_footnotes_fname, "w", encoding="utf-8" ) as footnotes_out:
-        getattr( extract, "save_as_"+output_fmt, )( targets_out, chapters_out, footnotes_out )
+         open( save_footnotes_fname, "w", encoding="utf-8" ) as footnotes_out, \
+         open( save_vo_notes_fname, "w", encoding="utf-8" ) as vo_notes_out:
+        getattr( extract, "save_as_"+output_fmt, )( targets_out, chapters_out, footnotes_out, vo_notes_out )

 if __name__ == "__main__":
    main() #pylint: disable=no-value-for-parameter
--- a/asl_rulebook2/extract/data/index-fixups.json
+++ b/asl_rulebook2/extract/data/index-fixups.json
@ -83,12 +83,6 @@
    ]
 },

-"EX": {
-    "old_content": "ExampleEXC: Exception",
-    "new_content": "Example",
-    "_comment_": "The code manually inserts an entry for EXC: Exception"
-},
-
 "Fortification": {
    "replace": [
        [ "[in BRT: SSR1 (BRT Sand: T3.2) (NA in Betio Piers: T9.2)]", "[in BRT: SSR1 (BRT Sand): T3.2] [in BRT: SSR1 (NA in Betio Piers): T9.2]" ]
--- a/asl_rulebook2/extract/data/target-fixups.json
+++ b/asl_rulebook2/extract/data/target-fixups.json
@ -394,7 +394,8 @@
    "10": { "new_ruleid": null },
    "11": { "new_ruleid": null },
    "12": { "new_ruleid": null },
-    "55": { "new_ruleid": null }
+    "55": { "new_ruleid": null },
+    "678876987": { "new_ruleid": null }
 },

 "H2": {
--- a/asl_rulebook2/extract/data/vo-note-fixups.json
+++ b/asl_rulebook2/extract/data/vo-note-fixups.json
@ -0,0 +1,734 @@
+{
+
+"skips": {
+    "382": [ "1, 3" ],
+    "429": [ "^1,660,", "^1and Fiat 3000", "^9/43 armistice", "^4/41 (.9)" ],
+    "431": [ "^1, for East Africa", "^9/42 (1.4)," ],
+    "432": [ "^1 (l.2),", "^1. Sources vary" ],
+    "434": [ "1-", "^1.5 for 11/41-6/42," ],
+    "438": [ "^1/41-5/43" ],
+    "439": [ "1 (1", "^1/43 ( 1.2),", "^1/43 (1.3),", "^1/42-5/43." ],
+    "492": [ "1B11CE/FPNA", "1B11CE/FPNA" ],
+    "493": [ "1T", "1B" ],
+    "496": [ "1B" ],
+    "501": [ "1h-d" ],
+    "502": [ "1s5", "1s5" ],
+    "503": [ "1AP5", "1s6" ],
+    "504": [ "^1.3)" ],
+    "514": [ "1.4 for 45" ],
+    "556": [ "1#" ],
+    "560": [ "1, 3" ]
+},
+
+"german": {
+"vehicles": {
+    "9.1": {
+        "old_caption": "FT-17 730(f) &",
+        "new_caption": "FT-17 730(f) & FT-17 730m(f)"
+    },
+    "22": {
+        "old_caption": "PzKpfw IVF: 1",
+        "new_caption": "PzKpfw IVF(1)"
+    },
+    "23": {
+        "old_caption": "PzKpfw IVF:2",
+        "new_caption": "PzKpfw IVF(2)"
+    },
+    "35": {
+        "old_caption": "StuG IIIG (L) &",
+        "new_caption": "StuG IIIG (L) & StuH 42 (L)"
+    },
+    "52": {
+        "old_caption": "JgdPz IV & JgdPz",
+        "new_caption": "JgdPz IV & JgdPz IV(L)"
+    },
+    "58": {
+        "old_caption": "SPW 250/sMG",
+        "new_caption": "SPW 250/sMG & 251/sMG"
+    },
+    "59": {
+        "old_caption": "SPW 250/7 &",
+        "new_caption": "SPW 250/7 & 251/2"
+    },
+    "72": {
+        "old_caption": "PSW 231(8 rad)",
+        "new_caption": "PSW 231(8 rad) & 232"
+    },
+    "85.37": {
+        "old_caption": "FlaK/Pz IV",
+        "new_vo_note_id": "85", "new_caption": "37 FlaK/Pz IV"
+    },
+    "92": {
+        "old_caption": "2cm & 3.7cm",
+        "new_caption": "2cm & 3.7cm FlaK LKW"
+    },
+    "96": {
+        "old_caption": "Opel 6700 &Buessing-NAG",
+        "new_caption": "Opel 6700 & Buessing-NAG 4500"
+    },
+    "add": [
+        { "_comment_": "This gets parsed as '4' and '5.1 GSW 39H(f) PaK' :-/",
+          "vo_note_id": "45.1", "caption": "GSW 39H(f) PaK", "page_no": 337, "page_pos": [380,561]
+        },
+        { "vo_note_id": "37.1", "caption": "Sturmtiger", "page_no": 532, "page_pos": [118,640] },
+        { "vo_note_id": "88.1", "caption": "SdKfz 10/5", "page_no": 532, "page_pos": [399,713] }
+    ]
+}
+},
+
+"russian": {
+"vehicles": {
+    "4": {
+        "old_caption": "T-60 M40 &",
+        "new_caption": "T-60 M40 & M42"
+    },
+    "6": {
+        "old_caption": "T-26 M33 & T-",
+        "new_caption": "T-26 M33 & T-26S M37/39"
+    },
+    "1": {
+        "old_caption": "T-28 M34",
+        "new_vo_note_id": "11"
+    },
+    "11.1": {
+        "old_caption": "T-28 M34(L) & 12.1. T-28E M40(L)",
+        "new_caption": "T-28 M34(L)"
+    },
+    "23": {
+        "old_caption": "KV-lE, KV-1 M41,",
+        "new_caption": "KV-lE, KV-1 M41, & KV-1 M-42"
+    },
+    "34": {
+        "old_caption": "ISU-122 & ISU-",
+        "new_caption": "ISU-122 & ISU-152"
+    },
+    "add": [
+        { "vo_note_id": "12.1", "caption": "T-28E M40(L)", "page_no": 364, "page_pos": [394,289] }
+    ]
+}
+},
+
+"american": {
+"vehicles": {
+    "14": {
+        "old_caption": "M4A3E2 & M4A3E2 (L) MediumTanks",
+        "new_caption": "M4A3E2 & M4A3E2 (L) Medium Tanks"
+    },
+    "17": {
+        "old_caption": "M4(105) & M4A3(105) MediumTanks",
+        "new_caption": "M4(105) & M4A3(105) Medium Tanks"
+    }
+}
+},
+
+"british": {
+"vehicles": {
+    "2": {
+        "old_caption": "(A17) Tetrarch & Tetrarch CS[Light Tanks Mk VII & Mk VII CS]",
+        "new_caption": "(A17) Tetrarch & Tetrarch CS [Light Tanks Mk VII & Mk VII CS]"
+    },
+    "6": {
+        "old_caption": "A9 & A9 CS [Cruiser Tanks Mk I& Mk I CS]",
+        "new_caption": "A9 & A9 CS [Cruiser Tanks Mk I & Mk I CS]"
+    },
+    "26": {
+        "old_caption": "(A12) Matilda II & II CS [In-fantry Tank Mk II]",
+        "new_caption": "(A12) Matilda II & II CS [Infantry Tank Mk II]"
+    },
+    "36": {
+        "old_caption": "Valentine & Churchill Bridgelay-ers",
+        "new_caption": "Valentine & Churchill Bridgelayers"
+    },
+    "45": {
+        "old_caption": "Humber III & Otter Light Re-connaissance Cars",
+        "new_caption": "Humber III & Otter Light Reconnaissance Cars"
+    },
+    "82": {
+        "old_caption": "",
+        "new_caption": "30-cwt Lorry"
+    },
+    "83": {
+        "old_caption": "",
+        "new_caption": "3-Ton Lorry"
+    }
+}
+},
+
+"italian": {
+"vehicles": {
+    "1": {
+        "old_caption": "LS/21 & LS/3",
+        "new_caption": "L5/21 & L5/30"
+    },
+    "2": {
+        "old_caption": "^L3/35: Derived from",
+        "new_caption": "L3/35"
+    },
+    "3": {
+        "old_caption": "^L3 aa: Some L3",
+        "new_caption": "L3 aa"
+    },
+    "4": {
+        "old_caption": "^L3 cc: During the early months",
+        "new_caption": "L3 cc"
+    },
+    "5": {
+        "old_caption": "^L3 Lf: Development of",
+        "new_caption": "L3 Lf"
+    },
+    "6": {
+        "old_caption": "^L6/40: Designed to replace",
+        "new_caption": "L6/40"
+    },
+    "7": {
+        "old_caption": "^Mll/39: This tank carried",
+        "new_caption": "M11/39"
+    },
+    "8": {
+        "old_caption": "^Ml3/40: Replacing the",
+        "new_caption": "M13/40"
+    },
+    "9": {
+        "old_caption": "^M14/41: This tank,",
+        "new_caption": "M14/41"
+    },
+    "10": {
+        "old_caption": "^M15/42: This, the last version",
+        "new_caption": "M15/42"
+    },
+    "11": {
+        "old_caption": "^MR/35(f): The Germans provided",
+        "new_caption": "MR/35(f)"
+    },
+    "12": {
+        "old_caption": "Semovente M40 & M41 da",
+        "new_caption": "Semovente M40 & M41 da 75/18"
+    },
+    "13": {
+        "old_caption": "^Semovente M42 da 75/1&75/32: The last model",
+        "new_caption": "Semovente M42 da 75/18 & 75/32"
+    },
+    "14": {
+        "old_caption": "^Semovente M43 da 105/25: Nicknathe",
+        "new_caption": "Semovente M43 da 105/25"
+    },
+    "15": {
+        "old_caption": "Semovente L40 da 47/32: The SMV",
+        "new_caption": "Semovente L40 da 47/32"
+    },
+    "16": {
+        "old_caption": "^Semovente M41M da 90/53: This AFV",
+        "new_caption": "Semovente M41M da 90/53"
+    },
+    "18": {
+        "old_caption": "^Lince: The Lince (Lynx)",
+        "new_caption": "Lince"
+    },
+    "19": {
+        "old_caption": "^Lancia lZM: In late 1912",
+        "new_caption": "Lancia 1ZM"
+    },
+    "20": {
+        "old_caption": "^Fiat 611A & 611BThese armoredcars",
+        "new_caption": "Fiat 611A & 611B"
+    },
+    "21": {
+        "old_caption": "^AB 40 & AB41These two auto",
+        "new_caption": "AB 40 & AB 41"
+    },
+    "22": {
+        "old_caption": "^Autoprotetto S37: This APC",
+        "new_caption": "Autoprotetto S37"
+    },
+    "23": {
+        "old_caption": "Autocannoni da",
+        "new_caption": "Autocannoni da 20/65(b) & 65/17(b)"
+    },
+    "24": {
+        "old_caption": "Autocannoni da",
+        "new_caption": "Autocannoni da 75/27 CK & 90/53"
+    },
+    "25": {
+        "old_caption": "^TL 37, TM 40 &TP 32",
+        "new_caption": "TL 37, TM 40 & TP 32"
+    },
+    "26": {
+        "old_caption": "^Autocarretta: As the portee",
+        "new_caption": "Autocarretta"
+    },
+    "27": {
+        "old_caption": "^Fiat 508 MC: Derived from",
+        "new_caption": "Fiat 508 MC"
+    },
+    "28": {
+        "old_caption": "^Autocarri L, M & P: The ItalianArmy",
+        "new_caption": "Autocarri L, M & P"
+    }
+},
+"ordnance": {
+    "1": {
+        "old_caption": "^Mortaio da 45 \"Brixia\": This weapon,",
+        "new_caption": "Mortaio da 45 \"Brixia\""
+    },
+    "2": {
+        "old_caption": "^Mortaio da 81/14: First usedi",
+        "new_caption": "Mortaio da 81/14"
+    },
+    "3": {
+        "old_caption": "^Fucile-cc S: Like several other",
+        "new_caption": "Fucile-cc S"
+    },
+    "4": {
+        "old_caption": "^Cannone-cc da 37/45: This was",
+        "new_caption": "Cannone-cc da 37/45"
+    },
+    "5": {
+        "old_caption": "^Cannone da 47/32: This was",
+        "new_caption": "Cannone da 47/32"
+    },
+    "6": {
+        "old_caption": "^Cannone da 65/17: This was",
+        "new_caption": "Cannone da 65/17"
+    },
+    "7": {
+        "old_caption": "^Cannone da 70/15: This",
+        "new_caption": "Cannone da 70/15"
+    },
+    "8": {
+        "old_caption": "^Obice da 75/13: The Skoda",
+        "new_caption": "Obice da 75/13"
+    },
+    "9": {
+        "old_caption": "^Cannone da 75/27: This was",
+        "new_caption": "Cannone da 75/27"
+    },
+    "10": {
+        "old_caption": "^Obice da 75/18: This game piece",
+        "new_caption": "Obice da 75/18"
+    },
+    "11": {
+        "old_caption": "^Cannone da 75/32: The 75/32",
+        "new_caption": "Cannone da 75/32"
+    },
+    "12": {
+        "old_caption": "^Obice da 100/17: Another old",
+        "new_caption": "Obice da 100/17"
+    },
+    "13": {
+        "old_caption": "^Cannone da 105/28: This was",
+        "new_caption": "Cannone da 105/28"
+    },
+    "14": {
+        "old_caption": "^Obice da 149/13: This piece",
+        "new_caption": "Obice da 149/13"
+    },
+    "15": {
+        "old_caption": "^Cannone da 149/35: Another",
+        "new_caption": "Cannone da 149/35"
+    },
+    "16": {
+        "old_caption": "^Cannone da 149/40: To replace",
+        "new_caption": "Cannone da 149/40"
+    },
+    "17": {
+        "old_caption": "^Cannone-mitragliera da 20/65: Thiswas",
+        "new_caption": "Cannone-mitragliera da 20/65"
+    },
+    "18": {
+        "old_caption": "^Cannone-aa da 75/39: This was",
+        "new_caption": "Cannone-aa da 75/39"
+    },
+    "add": [
+        { "vo_note_id": "19", "caption": "Cannone-aa da 75/46", "page_no": 439, "page_pos": [283,42] },
+        { "vo_note_id": "20", "caption": "Cannone-aa da 90/53", "page_no": 439, "page_pos": [384,541] }
+    ]
+}
+},
+
+"japanese": {
+"vehicles": {
+    "3": {
+        "old_caption": "Type 95 SO-KI Armored Railway Ve-hicle",
+        "new_caption": "Type 95 SO-KI Armored Railway Vehicle"
+    },
+    "4": {
+        "old_caption": "Types 97A & 97B TE-KE Light Ar-mored Vehicles",
+        "new_caption": "Types 97A & 97B TE-KE Light Armored Vehicles"
+    },
+    "7": {
+        "old_caption": "Types 89A & 89B CHI-RO MediumTanks",
+        "new_caption": "Types 89A & 89B CHI-RO Medium Tanks"
+    },
+    "8": {
+        "old_caption": "Types 97A & 97B CHI-HA MediumTanks",
+        "new_caption": "Types 97A & 97B CHI-HA Medium Tanks"
+    },
+    "13": {
+        "old_caption": "Type 4 HO-RO Self-Propelled How-itzer",
+        "new_caption": "Type 4 HO-RO Self-Propelled Howitzer"
+    },
+    "14": {
+        "old_caption": "Type 1 HO-KI Armored Troop-Vehi-cle",
+        "new_caption": "Type 1 HO-KI Armored Troop-Vehicle"
+    }
+},
+"ordnance": {
+    "2": {
+        "old_caption": "Year-11 Type Curved-Fire InfantryGun",
+        "new_caption": "Year-11 Type Curved-Fire Infantry Gun"
+    },
+    "1": {
+        "old_caption": "Type 98 High-Angle Machine Can-non",
+        "new_vo_note_id": "21",
+        "new_caption": "Type 98 High-Angle Machine Cannon"
+    },
+    "17": {
+        "old_caption": "Year-3 Type 14cm Naval SeacoastGun",
+        "new_caption": "Year-3 Type 14cm Naval Seacoast Gun"
+    },
+    "20": {
+        "old_caption": "Type 93 Twin-Mount High-Angle Ma-chine Gun",
+        "new_caption": "Type 93 Twin-Mount High-Angle Machine Gun"
+    },
+    "22": {
+        "old_caption": "Type 96 Single-, Twin-, & Triple-Mount Naval High-Angle Machine Can-",
+        "new_caption": "Type 96 Single-, Twin-, & Triple-Mount Naval High-Angle Machine Cannons"
+    },
+    "24": {
+        "old_caption": "Year-10 Type 12cm Naval High-AngleGun",
+        "new_caption": "Year-10 Type 12cm Naval High-Angle Gun"
+    }
+}
+},
+
+"chinese": {
+"ordnance": {
+    "2": {
+        "old_caption": "Mortaio da 45 “Brixia”, 5cm leGrW 36,",
+        "new_caption": "Mortaio da 45 “Brixia”, 5cm leGrW 36, 50mm RM obr.38, & Type 89 Heavy Grenade Launcher"
+    },
+    "4": {
+        "old_caption": "Stokes 3-in., 8cm GrW 34, & 82mmBM obr. 37",
+        "new_caption": "Stokes 3-in., 8cm GrW 34, & 82mm BM obr. 37"
+    },
+    "7": {
+        "old_caption": "37mm PP obr. 15R & Cannone da",
+        "new_caption": "37mm PP obr. 15R & Cannone da 70/15"
+    },
+    "11": {
+        "old_caption": "7.7cm FK 16, 76.2mm P obr. 02/30, &OQF 18-Pounder",
+        "new_caption": "7.7cm FK 16, 76.2mm P obr. 02/30, & OQF 18-Pounder"
+    },
+    "12": {
+        "old_caption": "10.5cm leFH 16, Cannone da 105/28,& M2A1 105mm Howitzer",
+        "new_captipn": "10.5cm leFH 16, Cannone da 105/28, & M2A1 105mm Howitzer"
+    },
+    "15": {
+        "old_caption": "Oerlikon FF, Cannone-mitragliera da",
+        "new_caption": "Oerlikon FF, Cannone-mitragliera da 20/65, & 2cm FlaK 30"
+    },
+    "16": {
+        "old_caption": "3.7cm FlaK 36 o. 37 & Bofors 40mmL/60",
+        "new_caption": "3.7cm FlaK 36 o. 37 & Bofors 40mm L/60"
+    }
+}
+},
+
+"french": {
+"vehicles": {
+    "20": {
+        "old_caption": "Autocanon de 75 mle 97 & Autocanonde 75 Conus(b)",
+        "new_caption": "Autocanon de 75 mle 97 & Autocanon de 75 Conus(b)"
+    },
+    "21": {
+        "old_caption": "Camion de Mitrailleuse Contre-Avions, Camion de 13.2 CAJ, Camion de",
+        "new_caption": "Camion de Mitrailleuse Contre-Avions, Camion de 13.2 CAJ, Camion de 20 CA, & Autocanon de 25 CA"
+    },
+    "36": {
+        "old_caption": "Peugeot 202, Citroën 23, & RenaultAGR2",
+        "new_caption": "Peugeot 202, Citroën 23, & Renault AGR2"
+    },
+    "40": {
+        "old_caption": "M4A3(75)W, M4A3(76)W, & M4A3(105) Medium Tanks, & M4Tankdozer",
+        "new_caption": "M4A3(75)W, M4A3(76)W, & M4A3(105) Medium Tanks, & M4 Tankdozer"
+    }
+},
+"ordnance": {
+    "6": {
+        "old_caption": "Canon Antichar de 47SA mle 37 APX",
+        "new_caption": "Canon Antichar de 47 SA mle 37 APX"
+    },
+    "18": {
+        "old_caption": "Mitrailleuse de 13.2 CAJmle 30",
+        "new_caption": "Mitrailleuse de 13.2 CAJ mle 30"
+    }
+}
+},
+
+"allied-minor": {
+"vehicles": {
+    "1": {
+        "old_caption": "TKS&TKS(L)",
+        "new_caption": "TKS & TKS(L)"
+    },
+    "2": {
+        "old_caption": "VickersEdw(b)&Ejw(b)",
+        "new_caption": "Vickers Edw(b) & Ejw(b)"
+    },
+    "3.7": {
+        "old_caption": "TPdw & 7TPjw",
+        "new_vo_note_id": "3",
+        "new_caption": "7TPdw & 7TPjw"
+    },
+    "6": {
+        "old_caption": "wz.29",
+        "new_caption": "wz.29 \"Ursus\""
+    },
+    "10.302": {
+        "old_caption": "T",
+        "new_vo_note_id": "10",
+        "new_caption": "302T"
+    },
+    "13": {
+        "old_caption": "Horse-Drawn",
+        "new_caption": "Horse-Drawn \"Taczanka\""
+    },
+    "22": {
+        "old_caption": "M3A3(a) FlaK38",
+        "new_caption": "M3A3(a) FlaK 38"
+    },
+    "29": {
+        "old_caption": "Marmon-Herrington III(b) Armored",
+        "new_caption": "Marmon-Herrington III(b) Armored Cars"
+    },
+    "31": {
+        "old_caption": "L5/30(i) & L3/35(i) &",
+        "new_caption": "L5/30(i) & L3/35(i) & L6/40(i) & M13/40(i)"
+    },
+    "37": {
+        "old_caption": "Light Truck & Medium Truck &",
+        "new_caption": "Light Truck & Medium Truck & Heavy Truck"
+    }
+},
+"ordnance": {
+    "30.75": {
+        "old_caption": "M 19S",
+        "new_vo_note_id": "30",
+        "new_caption": "75M 19S"
+    },
+    "add": [
+        { "vo_note_id": "20", "caption": "3.7cm Infantry Gun", "page_no": 502, "page_pos": [393,616] }
+    ]
+}
+},
+
+"axis-minor": {
+"vehicles": {
+    "7.38": {
+        "old_caption": "M Toldi I",
+        "new_vo_note_id": "7",
+        "new_caption": "38M Toldi I"
+    },
+    "8.38": {
+        "old_caption": "M Toldi IIA",
+        "new_vo_note_id": "8",
+        "new_caption": "38M Toldi IIA"
+    },
+    "9.40": {
+        "old_caption": "M Turan I(r)",
+        "new_vo_note_id": "9",
+        "new_caption": "40M Turan I(r)"
+    },
+    "10.41": {
+        "old_caption": "M Turan II(r)",
+        "new_vo_note_id": "10",
+        "new_caption": "41M Turan II(r)"
+    },
+    "13.43": {
+        "old_caption": "M Zrinyi II",
+        "new_vo_note_id": "13",
+        "new_caption": "43M Zrinyi II"
+    },
+    "14.39": {
+        "old_caption": "M Csaba & 40M Csaba",
+        "new_vo_note_id": "14",
+        "new_caption": "39M Csaba & 40M Csaba"
+    },
+    "16.40": {
+        "old_caption": "M Nimrod",
+        "new_vo_note_id": "16",
+        "new_caption": "40M Nimrod"
+    },
+    "50": {
+        "old_caption": "Light Truck, Medium Truck, &Heavy Truck",
+        "new_caption": "Light Truck, Medium Truck, & Heavy Truck"
+    }
+},
+"ordnance": {
+    "20": {
+        "old_caption": "Canon Automatique de 25 CAmle 38",
+        "new_caption": "Canon Automatique de 25 CA mle 38"
+    },
+    "29": {
+        "old_caption": "47mm KanonPUV vz. 36(t)",
+        "new_caption": "47mm Kanon PUV vz. 36(t)"
+    }
+}
+},
+
+"finnish": {
+"vehicles": {
+    "27": {
+        "old_caption": "Light Truck, Medium Truck, &Heavy Truck",
+        "new_caption": "Light Truck, Medium Truck, & Heavy Truck"
+    }
+},
+"ordnance": {
+    "1.47": {
+        "old_caption": "Krh/41",
+        "new_vo_note_id": "1",
+        "new_caption": "47 Krh/41"
+    },
+    "2.50": {
+        "old_caption": "Krh/39(r)",
+        "new_vo_note_id": "2",
+        "new_caption": "50 Krh/39(r)"
+    },
+    "3.81": {
+        "old_caption": "Krh/32",
+        "new_vo_note_id": "3",
+        "new_caption": "81 Krh/32"
+    },
+    "4.81": {
+        "old_caption": "Savunheitin M/42",
+        "new_vo_note_id": "4",
+        "new_caption": "81 Savunheitin M/42"
+    },
+    "5.120": {
+        "old_caption": "Krh/40",
+        "new_vo_note_id": "5",
+        "new_caption": "120 Krh/40"
+    },
+    "8.20": {
+        "old_caption": "PstK/40",
+        "new_vo_note_id": "8",
+        "new_caption": "20 PstK/40"
+    },
+    "11.37": {
+        "old_caption": "PstK/37(g)",
+        "new_vo_note_id": "11",
+        "new_caption": "37 PstK/37(g)"
+    },
+    "12.45": {
+        "old_caption": "PstK/32(r)",
+        "new_vo_note_id": "12",
+        "new_caption": "45 PstK/32(r)"
+    },
+    "13.50": {
+        "old_caption": "PstK/38(g)",
+        "new_vo_note_id": "13",
+        "new_caption": "50 PstK/38(g)"
+    },
+    "14.75": {
+        "old_caption": "PstK/97-38(g)",
+        "new_vo_note_id": "14",
+        "new_caption": "75 PstK/97-38(g)"
+    },
+    "15.75": {
+        "old_caption": "PstK/40(g)",
+        "new_vo_note_id": "15",
+        "new_caption": "75 PstK/40(g)"
+    },
+    "16.76": {
+        "old_caption": "RK/27(r)",
+        "new_vo_note_id": "16",
+        "new_caption": "76 RK/27(r)"
+    },
+    "17.75": {
+        "old_caption": "K/02",
+        "new_vo_note_id": "17",
+        "new_caption": "75 K/02"
+    },
+    "18.76": {
+        "old_caption": "LK/13",
+        "new_vo_note_id": "18",
+        "new_caption": "76 LK/13"
+    },
+    "30.150": {
+        "old_caption": "H/40(g)",
+        "new_vo_note_id": "30",
+        "new_caption": "150 H/40(g)"
+    },
+    "31.155": {
+        "old_caption": "H/17(f)",
+        "new_vo_note_id": "31",
+        "new_caption": "155 H/17(f)"
+    },
+    "33.20": {
+        "old_caption": "ItK/30 BSW(g)",
+        "new_vo_note_id": "33",
+        "new_caption": "20 ItK/30 BSW(g)"
+    },
+    "34.20": {
+        "old_caption": "ItK/38 BSW(g)",
+        "new_vo_note_id": "34",
+        "new_caption": "20 ItK/38 BSW(g)"
+    },
+    "35.20": {
+        "old_caption": "ItK/35 Br",
+        "new_vo_note_id": "35",
+        "new_caption": "20 ItK/35 Br"
+    },
+    "36.20": {
+        "old_caption": "ItK/40 VKT",
+        "new_vo_note_id": "36",
+        "new_caption": "20 ItK/40 VKT"
+    },
+    "38.76": {
+        "old_caption": "ItK/28 B(s)",
+        "new_vo_note_id": "38",
+        "new_caption": "76 ItK/28 B(s)"
+    },
+    "39.76": {
+        "old_caption": "ItK/31(r)",
+        "new_vo_note_id": "39",
+        "new_caption": "76 ItK/31(r)"
+    }
+}
+},
+
+"un-forces": {
+"vehicles": {
+    "19": {
+        "old_caption": "M39 Armored Utility Vehicle & M39Mortar Carrier",
+        "new_caption": "M39 Armored Utility Vehicle & M39 Mortar Carrier"
+    },
+    "7": {
+        "old_caption": "Churchill VII",
+        "new_vo_note_id": "37"
+    },
+    "34": {
+        "old_caption": "M4A3E8(a) Medium Tank & M4A3E8Dozer(a)",
+        "new_caption": "M4A3E8(a) Medium Tank & M4A3E8 Dozer(a)"
+    },
+    "47": {
+        "old_caption": "Oxford Carrier, MMG & Oxford Car-rier, HMG",
+        "new_caption": "Oxford Carrier, MMG & Oxford Carrier, HMG"
+    },
+    "57": {
+        "old_caption": "Jeep, ⁄-Ton Truck, & 2½-Ton Truck:4",
+        "new_caption": "Jeep, ¾-Ton Truck, & 2½-Ton Truck"
+    }
+}
+},
+
+"communist-forces": {
+"ordnance": {
+    "32": {
+        "old_caption": "Type 93 Twin-Mount High-Angle Ma-chine Gun",
+        "new_caption": "Type 93 Twin-Mount High-Angle Machine Gun"
+    }
+}
+}
+
+}
--- a/asl_rulebook2/extract/index.py
+++ b/asl_rulebook2/extract/index.py
@ -146,9 +146,6 @@ class ExtractIndex( ExtractBase ):
            index_entry = self._make_index_entry( title, content )
            if index_entry:
                self.index_entries.append( index_entry )
-            # FUDGE! EX/EXC are mis-parsed as a single index entry - we correct that in the fixups, and here.
-            if title == "EX":
-                self.index_entries.append( self._make_index_entry( "EXC", "Exception" ) )

    def _make_index_entry( self, title, content ):
        """Create a new index entry."""
--- a/asl_rulebook2/tests/test_extract.py
+++ b/asl_rulebook2/tests/test_extract.py
@ -52,10 +52,12 @@ def test_extract_content():
            extract = ExtractContent( args={}, log=_check_log_msg )
            extract.extract_content( pdf )
        targets_buf, chapters_buf, footnotes_buf = io.StringIO(), io.StringIO(), io.StringIO()
-        extract.save_as_text( targets_buf, chapters_buf, footnotes_buf )
+        vo_notes_buf = io.StringIO()
+        extract.save_as_text( targets_buf, chapters_buf, footnotes_buf, vo_notes_buf )
        targets_buf = targets_buf.getvalue()
        chapters_buf = chapters_buf.getvalue()
        footnotes_buf = footnotes_buf.getvalue()
+        vo_notes_buf = vo_notes_buf.getvalue()

        # check the results
        fname2 = os.path.join( dname, "targets.txt" )
@ -64,6 +66,8 @@ def test_extract_content():
        assert open( fname2, "r", encoding="utf-8" ).read() == chapters_buf
        fname2 = os.path.join( dname, "footnotes.txt" )
        assert open( fname2, "r", encoding="utf-8" ).read() == footnotes_buf
+        fname2 = os.path.join( dname, "vo-notes.txt" )
+        assert open( fname2, "r", encoding="utf-8" ).read() == vo_notes_buf

    # run the test
    for_each_easlrb_version( do_test )
@ -86,10 +90,12 @@ def test_extract_all():
        extract.extract_index.save_as_json( index_buf )
        index_buf = index_buf.getvalue()
        targets_buf, chapters_buf, footnotes_buf = io.StringIO(), io.StringIO(), io.StringIO()
-        extract.extract_content.save_as_json( targets_buf, chapters_buf, footnotes_buf )
+        vo_notes_buf = io.StringIO()
+        extract.extract_content.save_as_json( targets_buf, chapters_buf, footnotes_buf, vo_notes_buf )
        targets_buf = targets_buf.getvalue()
        chapters_buf = chapters_buf.getvalue()
        footnotes_buf = footnotes_buf.getvalue()
+        vo_notes_buf = vo_notes_buf.getvalue()

        # check the results
        fname2 = os.path.join( dname, "index.json" )
@ -100,6 +106,8 @@ def test_extract_all():
        assert open( fname2, "r", encoding="utf-8" ).read() == chapters_buf
        fname2 = os.path.join( dname, "footnotes.json" )
        assert open( fname2, "r", encoding="utf-8" ).read() == footnotes_buf
+        fname2 = os.path.join( dname, "vo-notes.json" )
+        assert open( fname2, "r", encoding="utf-8" ).read() == vo_notes_buf

    # run the test
    for_each_easlrb_version( do_test )
--- a/asl_rulebook2/webapp/prepare.py
+++ b/asl_rulebook2/webapp/prepare.py
@ -114,28 +114,33 @@ def _do_prepare_data_files( args, download_url ):
        index_buf = io.StringIO()
        extract.extract_index.save_as_json( index_buf )
        targets_buf, chapters_buf, footnotes_buf = io.StringIO(), io.StringIO(), io.StringIO()
-        extract.extract_content.save_as_json( targets_buf, chapters_buf, footnotes_buf )
+        vo_notes_buf = io.StringIO()
+        extract.extract_content.save_as_json( targets_buf, chapters_buf, footnotes_buf, vo_notes_buf )
        file_data = {
            "index": index_buf.getvalue(),
            "targets": targets_buf.getvalue(),
            "chapters": chapters_buf.getvalue(),
            "footnotes": footnotes_buf.getvalue(),
+            "vo-notes": vo_notes_buf.getvalue(),
        }

        # prepare the PDF
        gs_path = get_gs_path()
        if not gs_path:
            raise RuntimeError( "Ghostscript is not available." )
-        with TempFile( mode="w", encoding="utf-8" ) as targets_file:
+        with TempFile( mode="w", encoding="utf-8" ) as targets_file, \
+          TempFile( mode="w", encoding="utf-8" ) as vo_notes_file:
            log_msg( "status", "Preparing the final PDF..." )
            # save the extracted targets
            targets_file.temp_file.write( file_data["targets"] )
            targets_file.close( delete=False )
+            vo_notes_file.temp_file.write( file_data["vo-notes"] )
+            vo_notes_file.close( delete=False )
            # prepare the PDF
            prepared_file.close( delete=False )
            prepare_pdf( input_file.name,
                "ASL Rulebook",
-                targets_file.name, 5,
+                targets_file.name, vo_notes_file.name, 5,
                prepared_file.name, "ebook",
                gs_path,
                log_msg
--- a/asl_rulebook2/webapp/tests/test_prepare.py
+++ b/asl_rulebook2/webapp/tests/test_prepare.py
@ -83,7 +83,7 @@ def test_full_prepare( webapp, webdriver ):
        with zipfile.ZipFile( io.BytesIO( zip_data ) ) as zip_file:
            assert set( zip_file.namelist() ) == set( [
                "ASL Rulebook.pdf", "ASL Rulebook.index",
-                "ASL Rulebook.targets", "ASL Rulebook.chapters", "ASL Rulebook.footnotes"
+                "ASL Rulebook.targets", "ASL Rulebook.chapters", "ASL Rulebook.footnotes", "ASL Rulebook.vo-notes"
            ] )
            assert zip_file.getinfo( "ASL Rulebook.pdf" ).file_size > 40*1000
            for ftype in [ "index", "targets", "chapters", "footnotes" ]:
--- a/doc/prepare.md
+++ b/doc/prepare.md
@ -18,6 +18,7 @@ The first step is to extract the information we need from the eASLRB PDF.
        --save-targets /tmp/prepared/ASL\ Rulebook.targets \
        --save-chapters /tmp/prepared/ASL\ Rulebook.chapters \
        --save-footnotes /tmp/prepared/ASL\ Rulebook.footnotes \
+        --save-vo-notes /tmp/prepared/ASL\ Rulebook.vo-notes \
        --progress
 ```
 This extracts the information we need, and saves it in the 4 data files.
@ -29,6 +30,7 @@ Next, we need to prepare the eASLRB PDF, namely create bookmarks for each rule,
    asl_rulebook2/bin/prepare_pdf.py \
        $EASLRB \
        --targets /tmp/prepared/ASL\ Rulebook.targets \
+        --vo-notes /tmp/prepared/ASL\ Rulebook.vo-notes \
        --yoffset 5 \
        --output /tmp/prepared.pdf \
        --compression ebook \