Added some tests using synthetic data files.

master
Pacman Ghost 7 years ago
parent bd26c9a38a
commit 8e65f68994
  1. 4
      asl_cards/parse.py
  2. 39
      asl_cards/tests/_test_case_base.py
  3. BIN
      asl_cards/tests/synthetic-data/1-card.doc
  4. BIN
      asl_cards/tests/synthetic-data/1-card.pdf
  5. BIN
      asl_cards/tests/synthetic-data/2-cards.doc
  6. BIN
      asl_cards/tests/synthetic-data/2-cards.pdf
  7. BIN
      asl_cards/tests/synthetic-data/3-cards.doc
  8. BIN
      asl_cards/tests/synthetic-data/3-cards.pdf
  9. BIN
      asl_cards/tests/synthetic-data/bad-spacing.doc
  10. BIN
      asl_cards/tests/synthetic-data/bad-spacing.pdf
  11. BIN
      asl_cards/tests/synthetic-data/empty.doc
  12. BIN
      asl_cards/tests/synthetic-data/empty.pdf
  13. 0
      asl_cards/tests/synthetic-data/null.pdf
  14. 29
      asl_cards/tests/test_real_data.py
  15. 67
      asl_cards/tests/test_synthetic_data.py

@ -116,8 +116,8 @@ class PdfParser:
page_pos = 0 if items[0].y0 > lt_page.height/2 else 1
return AslCard(
card_tag = _tidy( item_texts[0] ).replace( "# ", "#" ) ,
nationality = _tidy( item_texts[1] ) ,
name = _tidy( item_texts[2] ) ,
nationality = _tidy(item_texts[1]) if len(item_texts) > 1 else "" ,
name = _tidy(item_texts[2]) if len(item_texts) > 2 else "" ,
page_id = lt_page.pageid ,
page_pos = page_pos ,
)

@ -0,0 +1,39 @@
import sys
import os
import unittest
base_dir = os.path.split( __file__ )[ 0 ]
sys.path.append( "../.." ) # fudge! need this to allow a script to run within a package :-/
from asl_cards.parse import PdfParser
# ---------------------------------------------------------------------
class TestCaseBase( unittest.TestCase ) :
"""Base for all test classes."""
def _test_pdf_parser( self , fname , expected_cards ) :
# parse the specified PDF
fname2 = os.path.join( base_dir , fname )
if not os.path.isfile( fname2 ) :
raise RuntimeError( "Missing data file: {}".format( fname2 ) )
pdf_parser = PdfParser(
#progress = lambda _,msg: print( msg , file=sys.stderr , flush=True )
)
cards = pdf_parser.parse( fname2 , images=False )
if False :
for c in cards :
print(c)
# check the results
if len(cards) != len(expected_cards) :
raise RuntimeError( "{}: got {} cards, expected {}.".format( fname , len(cards) , len(expected_cards) ) )
if len(cards) == 0 :
return
# get the attributes we're interested in
card = expected_cards[0]
attrs = [ a for a in dir(card) if not a.startswith("_") and not callable(getattr(card,a)) ]
attrs.remove( "card_image" ) # this is messing things up :-/
# compare the extracted cards with the expected results
for i in range(0,len(cards)) :
if not all( getattr(cards[i],a) == getattr(expected_cards[i],a) for a in attrs ) :
raise RuntimeError( "{}: Card mismatch ({}): got {}, expected {}.".format( fname , i , cards[i] , expected_cards[i] ) )

@ -4,36 +4,17 @@ import sys
import os
import unittest
base_dir = os.path.split( __file__ )[ 0 ]
sys.path.append( ".." ) # fudge! need this to allow a script to run within a package :-/
from asl_cards.parse import PdfParser , AslCard
from _test_case_base import TestCaseBase
from asl_cards.parse import AslCard
# ---------------------------------------------------------------------
class TestRealData( unittest.TestCase ) :
class TestRealData( TestCaseBase ) :
"""Run tests using the real "ASL Cards" PDF files."""
def _test_pdf_parser( self , fname , expected_cards ) :
# parse the specified PDF
fname2 = os.path.join( base_dir , os.path.join("real-data",fname) )
if not os.path.isfile( fname2 ) :
raise RuntimeError( "Missing data file: {}".format( fname2 ) )
pdf_parser = PdfParser(
#progress = lambda _,msg: print( msg , file=sys.stderr , flush=True )
)
cards = pdf_parser.parse( fname2 , images=False )
# check the results
if len(cards) != len(expected_cards) :
raise RuntimeError( "{}: got {} cards, expected {}.".format( fname , len(cards) , len(expected_cards) ) )
# get the attributes we're interested in
card = expected_cards[0]
attrs = [ a for a in dir(card) if not a.startswith("_") and not callable(getattr(card,a)) ]
attrs.remove( "card_image" ) # this is messing things up :-/
# compare the extracted cards with the expected results
for i in range(0,len(cards)) :
if not all( getattr(cards[i],a) == getattr(expected_cards[i],a) for a in attrs ) :
raise RuntimeError( "{}: Card mismatch ({}): got {}, expected {}.".format( fname , i , cards[i] , expected_cards[i] ) )
"""Test the PDF parser."""
super()._test_pdf_parser( os.path.join("real-data",fname) , expected_cards )
def test_italian_ordnance( self ) :
self._test_pdf_parser( "ItalianOrdnance.pdf" , [

@ -0,0 +1,67 @@
#!/usr/bin/env python3
import sys
import os
import unittest
from pdfminer.pdfparser import PDFSyntaxError
from _test_case_base import TestCaseBase
from asl_cards.parse import AslCard
# ---------------------------------------------------------------------
class TestSyntheticData( TestCaseBase ) :
"""Run tests using the synthetic PDF files.
We test with some generated test files, since the real "ASL Cards" files need to purchased,
so we can't keep them in source control.
"""
def _test_pdf_parser( self , fname , expected_cards ) :
"""Test the PDF parser."""
super()._test_pdf_parser( os.path.join("synthetic-data",fname) , expected_cards )
def test_null_file( self ) :
# try parsing a zero-byte file
self.assertRaises(
PDFSyntaxError ,
self._test_pdf_parser , "null.pdf" , None
)
def test_empty_file( self ) :
# try parsing an empty file
self._test_pdf_parser( "empty.pdf" , [] )
def test_1card_file( self ) :
# try parsing a file with 1 card
self._test_pdf_parser( "1-card.pdf" , [
AslCard( page_id=1 , page_pos=0 , card_tag="Vehicle #1" , nationality="Moldovia" , name="Big Tank" ) ,
] )
def test_2card_file( self ) :
# try parsing a file with 2 cards
self._test_pdf_parser( "2-cards.pdf" , [
AslCard( page_id=1 , page_pos=0 , card_tag="Vehicle #1" , nationality="Moldovia" , name="Big Tank" ) ,
AslCard( page_id=1 , page_pos=1 , card_tag="Vehicle #2" , nationality="Moldovia" , name="Little Tank" ) ,
] )
def test_3card_file( self ) :
# try parsing a file with 3 cards
self._test_pdf_parser( "3-cards.pdf" , [
AslCard( page_id=1 , page_pos=0 , card_tag="Vehicle #1" , nationality="Moldovia" , name="Big Tank" ) ,
AslCard( page_id=1 , page_pos=1 , card_tag="Vehicle #2" , nationality="Moldovia" , name="Little Tank" ) ,
AslCard( page_id=2 , page_pos=0 , card_tag="Ordnance #1" , nationality="Moldovia" , name="Big Gun" ) ,
] )
def test_bad_spacing( self ) :
# try parsing cards with bad spacing
self._test_pdf_parser( "bad-spacing.pdf" , [
AslCard( page_id=1 , page_pos=0 , card_tag="Vehicle #1" , nationality="" , name="" ) ,
AslCard( page_id=1 , page_pos=1 , card_tag="Vehicle #2" , nationality="Moldovia" , name="" ) ,
] )
# ---------------------------------------------------------------------
if __name__ == "__main__" :
unittest.main()
Loading…
Cancel
Save