Try to keep going after a file fails processing.

master
Pacman Ghost 7 years ago
parent 21c005aa3e
commit 049eb322f9
  1. 47
      asl_cards/parse.py
  2. 22
      startup_widget.py

@ -20,14 +20,21 @@ from asl_cards.db import AslCard , AslCardImage
# ---------------------------------------------------------------------
class AnalyzeCancelledException( RuntimeError ) :
def __init__( self ) :
super().__init__( "Cancelled." )
# ---------------------------------------------------------------------
class PdfParser:
def __init__( self , index_dir , ask=None , progress=None , progress2=None ) :
def __init__( self , index_dir , progress=None , progress2=None , on_ask=None , on_error=None ) :
# initialize
self.index_dir = index_dir
self.ask = ask # nb: for asking the user something during processing
self.progress = progress # nb: for tracking file progress
self.progress2 = progress2 # nb: for tracking page progress within a file
self.on_ask = on_ask # nb: for asking the user something during processing
self.on_error = on_error # nb: for showing the user an error message
self.cancelling = False
def parse( self , target , max_pages=-1 , images=True ) :
@ -44,8 +51,21 @@ class PdfParser:
# parse each file
cards = []
for file_no,fname in enumerate(fnames) :
if self.cancelling : raise RuntimeError("Cancelled.")
file_cards = self._do_parse_file( float(file_no)/len(fnames) , fname , max_pages , images )
if self.cancelling : raise AnalyzeCancelledException()
try :
file_cards = self._do_parse_file( float(file_no)/len(fnames) , fname , max_pages , images )
except AnalyzeCancelledException as ex :
raise
except Exception as ex :
# notify the caller of the error
if not self.on_error :
raise
self.on_error(
"An error occured while processing {}:\n\n{}\n\nThis file will be ignored.".format(
os.path.split(fname)[1] , str(ex)
)
)
continue
if file_cards :
cards.extend( file_cards )
self._progress( 1.0 , "Done." )
@ -87,8 +107,8 @@ class PdfParser:
) )
else :
# ask the user if they want to try parsing the PDF
if self.ask :
rc = self.ask(
if self.on_ask :
rc = self.on_ask(
"Can't find an index file for {}.\n\nDo you want to try parsing the PDF (slow and unreliable)?".format(
os.path.split( fname )[ 1 ]
) ,
@ -97,6 +117,11 @@ class PdfParser:
if rc != QMessageBox.Yes :
return None
# extract each AslCard from the file
# NOTE: Some of the PDF's have cards that have not been filled out - we detect this correctly (because
# they don't have a "Vehicle" or "Ordnance" tag, but we barf later because the image extractor thinks
# they're a valid card, and so we get a different number of cards vs. images.
# It's not really worth fixing this, since we're now using index files instead of extracting the info
# from the PDF's (because extraction is giving such poor results :-/).
self._progress( pval , "Analyzing {}...".format( os.path.split(fname)[1] ) )
rmgr = PDFResourceManager()
laparams = LAParams()
@ -105,7 +130,7 @@ class PdfParser:
with open(fname,"rb") as fp :
pages = list( PDFPage.get_pages( fp ) )
for page_no,page in enumerate(pages) :
if self.cancelling : raise RuntimeError("Cancelled.")
if self.cancelling : raise AnalyzeCancelledException()
self._progress2( float(page_no) / len(pages) )
page_cards = self._parse_page( cards , interp , page_no , page )
cards.extend( page_cards )
@ -122,7 +147,7 @@ class PdfParser:
)
)
for i in range(0,len(cards)) :
if self.cancelling : raise RuntimeError("Cancelled.")
if self.cancelling : raise AnalyzeCancelledException()
cards[i].card_image = AslCardImage( image_data=card_images[i] )
return cards
@ -134,14 +159,14 @@ class PdfParser:
# locate the info box for each card (in the top-left corner)
info_boxes = []
for item in lt_page :
if self.cancelling : raise RuntimeError("Cancelled.")
if self.cancelling : raise AnalyzeCancelledException()
if type(item) is not LTTextBoxHorizontal : continue
item_text = item.get_text().strip()
if item_text.startswith( ("Vehicle","Ordnance") ) :
info_boxes.append( [item] )
# get the details from each info box
for item in lt_page :
if self.cancelling : raise RuntimeError("Cancelled.")
if self.cancelling : raise AnalyzeCancelledException()
if type(item) is not LTTextBoxHorizontal : continue
# check if the next item could be part of an info box - it must be within the left/right boundary
# of the first item (within a certain tolerance), and below it (but not too far)
@ -203,7 +228,7 @@ class PdfParser:
# extract the cards from each page
card_images = []
for page_no in range(0,npages) :
if self.cancelling : raise RuntimeError("Cancelled.")
if self.cancelling : raise AnalyzeCancelledException()
# open the next page image
self._progress2( float(page_no) / npages )
fname = fname_template % (1+page_no)

@ -35,9 +35,10 @@ class AnalyzeThread( QThread ) :
# parse the files
self.parser = PdfParser(
os.path.join( globals.base_dir , "index" ) ,
ask = self.ask ,
progress = lambda pval,msg: self.progress_signal.emit( -1 if pval is None else pval , msg ) ,
progress2 = lambda pval: self.progress2_signal.emit( pval )
progress2 = lambda pval: self.progress2_signal.emit( pval ) ,
on_ask = self.on_ask ,
on_error = self.on_error ,
)
cards = self.parser.parse( self.cards_dir )
db.open_database( self.db_fname , True )
@ -52,7 +53,16 @@ class AnalyzeThread( QThread ) :
# notify slots that we've finished
self.completed_signal.emit( "" )
def ask( self , msg , btns , default ) :
def on_error( self , msg ) :
"""Show the user an error message."""
# NOTE: We are running in a worker thread, so we need to delegate showing the message box
# to the GUI thread.
QMetaObject.invokeMethod(
StartupWidget._instance , "on_error" , Qt.BlockingQueuedConnection ,
Q_ARG( str , msg )
)
def on_ask( self , msg , btns , default ) :
"""Ask the user a question."""
# NOTE: We are running in a worker thread, so we need to delegate showing the message box
# to the GUI thread.
@ -77,6 +87,7 @@ class StartupWidget( QWidget ) :
def __init__( self , db_fname , parent=None ) :
# initialize
super(StartupWidget,self).__init__( parent=parent )
assert StartupWidget._instance is None
StartupWidget._instance = self
self.analyze_thread = None
# FUDGE! Workaround recursive import's :-/
@ -190,6 +201,11 @@ class StartupWidget( QWidget ) :
"""Update the analysis progress in the UI."""
self.pb_pages.setValue( int( 100*pval + 0.5 ) )
@pyqtSlot( str )
def on_error( self , msg ) :
"""Show an error message box."""
MainWindow.show_error_msg( msg )
@pyqtSlot( str , QMessageBox.StandardButtons , QMessageBox.StandardButton , result=QMessageBox.StandardButton )
def on_ask( self , msg , buttons , default ) :
"""Ask the user a question."""

Loading…
Cancel
Save