Clean up old extracted image temp files before doing a new extraction.

master
Pacman Ghost 7 years ago
parent ea8676e7eb
commit fe5ffd4324
  1. 42
      asl_cards/parse.py

@ -26,6 +26,25 @@ class AnalyzeCancelledException( RuntimeError ) :
# ---------------------------------------------------------------------
# NOTE: Ghostscript extracts PDF pages to image files - this value defines where to put them.
_EXTRACTED_IMAGES_FILENAME_TEMPLATE = os.path.join( tempfile.gettempdir() , "asl_cards-%d.png" )
def _find_extracted_image_files() :
"""Find the image files extracted by Ghostscript."""
fnames = []
# NOTE: We assume there are never more than 500 of these.
# This method is used to clean up files left over from a previous (failed) run, so we can't
# just start at 1 and increment as we look for files. We could do some funky stuff with
# os.listdir() and regex's, but we need the extracted files in page order, and it's more
# trouble than it's worth... :-/
for i in range(1,500) :
fname = _EXTRACTED_IMAGES_FILENAME_TEMPLATE % i
if os.path.isfile( fname ) :
fnames.append( fname )
return fnames
# ---------------------------------------------------------------------
class PdfParser:
def __init__( self , index_dir , progress=None , progress2=None , on_ask=None , on_error=None ) :
@ -203,35 +222,30 @@ class PdfParser:
def _extract_images( self , fname , max_pages ) :
"""Extract card images from a file."""
# clean up any leftover extracted images from a previous run
# NOTE: It's important we do this, otherwise we might think they're part of this run.
for f in _find_extracted_image_files() :
os.unlink( f )
# extract each page from the PDF as an image
fname_template = os.path.join( tempfile.gettempdir() , "asl_cards-%d.png" )
resolution = 300 # pixels/inch
args = [
"_ignored_" , "-dQUIET" , "-dSAFER" , "-dNOPAUSE" ,
"-sDEVICE=png16m" , "-r"+str(resolution) ,
"-sOutputFile="+fname_template ,
"-sOutputFile="+_EXTRACTED_IMAGES_FILENAME_TEMPLATE
]
if max_pages > 0 :
args.append( "-dLastPage={}".format(max_pages) )
args.extend( [ "-f" , fname ] )
# FIXME! clean up left-over temp files before we start
args = [ s.encode(locale.getpreferredencoding()) for s in args ]
# FIXME! stop GhostScript from issuing warnings (stdout).
ghostscript.Ghostscript( *args )
# figure out how many files were created (so we can show progress)
npages = 0
for i in range(0,99999) :
fname = fname_template % (1+i)
if not os.path.isfile( fname ) :
break
npages += 1
image_fnames = _find_extracted_image_files()
# extract the cards from each page
card_images = []
for page_no in range(0,npages) :
for page_no,fname in enumerate(image_fnames) :
if self.cancelling : raise AnalyzeCancelledException()
# open the next page image
self._progress2( float(page_no) / npages )
fname = fname_template % (1+page_no)
self._progress2( float(page_no) / len(image_fnames) )
img = Image.open( fname )
img_width , img_height = img.size
# extract the cards (by splitting the page in half)
@ -247,7 +261,7 @@ class PdfParser:
os.path.join( fname2[0] , fname2[1][0]+"b"+fname2[1][1] )
)
# check if this is the last page, and it has just 1 card on it
if page_no == npages-1 and size1[1] < 1000 and size2[1] < 1000 :
if page_no == len(image_fnames)-1 and size1[1] < 1000 and size2[1] < 1000 :
# yup - extract it
buf , _ = self._crop_image(
img , (0,0,img_width,img_height) ,

Loading…
Cancel
Save