Moves card dimensional validation to the normalize.py module.

Tests passing for all card and non-card samples. Made original punchcard module use logging instead of debug flag. Fixes some rounding errors related to casting to int. Cropping done in normalize.py is no longer pass to the Punchcard class, instead we only use to to perform orientation/rotation transposes. (corner cut detection) fixes for the debugging image output of punchcard.py
AI-Collaboratory · Aug 20, 2019 · 0c010d3 · 0c010d3
1 parent 724d502
commit 0c010d3
Show file tree

Hide file tree

Showing 9 changed files with 115 additions and 728 deletions.
diff --git a/images/Key-card.jpg → Key-card.jpg b/images/Key-card.jpg → Key-card.jpg
diff --git a/images/Example of programming analysis.html b/images/Example of programming analysis.html
diff --git a/images/not_punchcards/graph.png b/images/not_punchcards/graph.png
diff --git a/images/not_punchcards/graph2.png b/images/not_punchcards/graph2.png
diff --git a/punchcards/__init__.py b/punchcards/__init__.py
@@ -1,2 +1,3 @@
 from punchcards.punchcard import PunchCard
 from punchcards.normalize import find_card
+from punchcards import cli
diff --git a/punchcards/cli.py b/punchcards/cli.py
@@ -1,8 +1,19 @@
 from .punchcard import PunchCard
+from .normalize import find_card
+from PIL import Image
 from docopt import docopt
+import logging
+
+__doc_opt__ = """Punchcard Command Line Interface.
+
+Usage:
+  punchcard <image-file> ...
+  punchcard (-h | --help)
+  punchcard --version
+"""
 
 if __name__ == '__main__':
-    args = docopt(__doc__, version='Punch Card Reader 1.0')
+    args = docopt(__doc_opt__, version='Punch Card Reader 1.0')
 
     logger = logging.getLogger('punchcard')
     logger.setLevel(logging.WARN)
@@ -20,4 +31,4 @@
         image = find_card(image)
         image.show()
         card = PunchCard(image, bright=127) # using neutral gray as threshold color
-        return card.text
+        print(card.text)
diff --git a/punchcards/normalize.py b/punchcards/normalize.py
@@ -17,6 +17,12 @@
 import logging
 from .punchcard import PunchCard
 
+CARD_WIDTH = 7.0 + 3.0/8.0 # Inches
+CARD_HEIGHT = 3.25 # Inches
+CARD_SPEC_TOLERANCE = .15  # inches, adjust as needed
+CARD_W_TO_H_RATIO_HIGH = (CARD_WIDTH + CARD_SPEC_TOLERANCE) / CARD_HEIGHT
+CARD_W_TO_H_RATIO_LOW = (CARD_WIDTH - CARD_SPEC_TOLERANCE) / CARD_HEIGHT
+
 logger = logging.getLogger('punchcard')
 
 def example():
@@ -33,22 +39,27 @@ def example():
     print("Card Text:"+card.text)
 
 # Identify the lightest corner and place it top-left
-def normalizeFlip(orig_img):
+def normalizeFlip(orig_img, cropped_img):
     testbox = (0,0,20,20)
     brightest = -1
-    test_img = orig_img
-    best_flip = test_img
+    test_img = cropped_img
+    best_flip = (0,0)
     for (x,y), value in numpy.ndenumerate(numpy.zeros((2,2))):
         if x:
-            test_img = orig_img.transpose(Image.FLIP_LEFT_RIGHT)
+            test_img = cropped_img.transpose(Image.FLIP_LEFT_RIGHT)
         if y:
             test_img = test_img.transpose(Image.FLIP_TOP_BOTTOM)
         # test_img.show()
         b = brightness(test_img.crop(testbox))
         if b > brightest:
             brightest = b
-            best_flip = test_img
-    return best_flip
+            best_flip = (x,y)
+    result = orig_img
+    if best_flip[0]:
+        result = result.transpose(Image.FLIP_LEFT_RIGHT)
+    if best_flip[1]:
+        result = result.transpose(Image.FLIP_TOP_BOTTOM)
+    return result
 
 def cropCard(im):
     # crop along X axis
@@ -62,13 +73,12 @@ def cropCard(im):
     top, bottom = findMargins(x_cropped, axis=1)
     y_crop_box = (0, top, x_cropped.size[0]-1, bottom)
     result = x_cropped.crop(y_crop_box)
-    result.show()
     return result
 
 # Find the index values where dark region begins and ends
 def findMargins(im, axis=0, threshold=.2):
     pix = numpy.array(im)
-    max = im.size[axis]*255
+    max = pix.shape[axis]*255
     max = max - int(max*threshold)
     vector = numpy.sum(pix, axis=axis)
     first = 0
@@ -103,16 +113,41 @@ def brightness( im ):
     stat = ImageStat.Stat(im)
     return stat.mean[0]
 
+def combine_images( imgs ):
+    imgs = [i.convert(mode="RGB") for i in imgs]
+    max_width = sorted( [(i.size[0]) for i in imgs])[-1]
+    diagnostic = numpy.vstack( (numpy.asarray( i.resize( (max_width, i.size[1]*max_width/i.size[0])) ) for i in imgs ) )
+    return Image.fromarray( diagnostic, mode="RGB" )
+
+def is_card_dimensions(image):
+    card_ratio = float(image.size[0]) / float(image.size[1])
+    print(str(card_ratio))
+    print(str(CARD_W_TO_H_RATIO_HIGH) + " to " + str(CARD_W_TO_H_RATIO_LOW))
+    return card_ratio <= CARD_W_TO_H_RATIO_HIGH and card_ratio > CARD_W_TO_H_RATIO_LOW
+
 def find_card(image):
-    image = image.convert(mode="L")
-    if(isnotbacklit(image)):
-        image = ImageOps.invert(image)
-    image = cropCard(image)
+    image2 = image.convert(mode="L")
+    diag = combine_images([image, image2])
+    image3 = image2
+    if(isnotbacklit(image2)):
+        image3 = ImageOps.invert(image2)
+    diag = combine_images([diag, image3])
+    cropped = cropCard(image3)
+    diag = combine_images([diag, cropped])
     #image.show()
-    if(image.size[1] > image.size[0]):
-        image = image.transpose(Image.ROTATE_90)
-    image = normalizeFlip(image)
-    return image
+    image4 = image3
+    if(cropped.size[1] > cropped.size[0]):
+        print('rotate 90')
+        image4 = image3.transpose(Image.ROTATE_90)
+        cropped = cropped.transpose(Image.ROTATE_90)
+    if not is_card_dimensions(cropped):
+        return None
+    diag = combine_images([diag, image4])
+    image5 = normalizeFlip(image4, cropped)
+    diag = combine_images([diag, image5])
+    if logger.isEnabledFor(logging.DEBUG):
+        diag.show()
+    return image5
 
 if __name__ == '__main__':
     example()
diff --git a/punchcards/punchcard.py b/punchcards/punchcard.py
@@ -15,6 +15,8 @@ class PunchCard(object):
 
     logger = logging.getLogger('punchcard')
 
+    SPEC_IBM_MODEL_029 = "IBM Model 029 Punch Card"  # only one for now
+
     CARD_COLUMNS = 80
     CARD_ROWS = 12
 
@@ -70,7 +72,7 @@ def create_card_map(self):
                 self.translate[tuple(v[1:])] = v[0]
 
     # generate a range of floats
-    def drange(start, stop, step=1.0):
+    def drange(self, start, stop, step=1.0):
         r = start
         while (step >= 0.0 and r < stop) or (step < 0.0 and r > stop):
             yield r
@@ -82,7 +84,6 @@ def __init__(self, image, bright=-1, debug=False, xstart=0, xstop=0, ystart=0, y
         self.text = ''
         self.decoded = []
         self.surface = []
-        self.debug = debug
         self.threshold = 0
         self.ymin = ystart
         self.ymax = ystop
@@ -92,6 +93,10 @@ def __init__(self, image, bright=-1, debug=False, xstart=0, xstop=0, ystart=0, y
         self.image = image
         self.pix = image.load()
         self._crop()
+        if debug:
+            self.logger.setLevel(logging.DEBUG)
+        else:
+            self.logger.setLevel(logging.WARN)
         self._scan(bright)
 
     # Brightness is the average of RGB values
@@ -111,8 +116,8 @@ def _crop(self):
             self.xmax = self.xsize
         if self.ymax == 0:
             self.ymax = self.ysize
-        self.midx = int(self.xmin + (self.xmax - self.xmin) / 2 + self.xadjust)
-        self.midy = int(self.ymin + (self.ymax - self.ymin) / 2)
+        self.midx = self.xmin + (self.xmax - self.xmin) / 2 + self.xadjust
+        self.midy = self.ymin + (self.ymax - self.ymin) / 2
 
     # heuristic for finding a reasonable cutoff brightness
     def _find_threshold_brightness(self):
@@ -173,9 +178,9 @@ def _find_data_horiz_dimensions(self, probe_y):
         col_width = width * self.CARD_COL_WIDTH_RATIO
         hole_width = width * self.CARD_HOLE_WIDTH_RATIO
         #print col_width
-        if self.debug:
+        if self.logger.isEnabledFor(logging.DEBUG):
             # mark left and right edges on the copy
-            for y in range(int(probe_y) - int(self.ysize/100), int(probe_y) + int(self.ysize/100)):
+            for y in range(int(probe_y - self.ysize/100), int(probe_y + self.ysize/100)):
                 self.debug_pix[left_border if left_border > 0 else 0,y] = 255
                 self.debug_pix[right_border if right_border < self.xmax else self.xmax - 1,y] = 255
             for x in range(1, int((self.xmax - self.xmin) / 200)):
@@ -203,14 +208,14 @@ def _find_data_vert_dimensions(self):
         hole_height = int(card_height * self.CARD_HOLE_HEIGHT_RATIO)
         data_top_y = data_begins + hole_height / 2
         col_height = int(card_height * self.CARD_ROW_HEIGHT_RATIO)
-        if self.debug:
+        if self.logger.isEnabledFor(logging.DEBUG):
             # mark up the copy with the edges
             for x in range(self.xmin, self.xmax-1):
                 self.debug_pix[x,top_border] = 255
                 self.debug_pix[x,bottom_border] = 255
-        if self.debug:
+        if self.logger.isEnabledFor(logging.DEBUG):
             # mark search parameters
-            for x in range(self.midx - int(self.xsize/20), self.midx + int(self.xsize/20)):
+            for x in range(self.midx - self.xsize/20, self.midx + self.xsize/20):
                self.debug_pix[x,self.ymin] = 255
                self.debug_pix[x,self.ymax - 1] = 255
             for y in range(0, self.ymin):
@@ -220,7 +225,7 @@ def _find_data_vert_dimensions(self):
         return data_top_y, data_top_y + col_height * 11, col_height, hole_height
 
     def _scan(self, bright=-1):
-        if self.debug:
+        if self.logger.isEnabledFor(logging.DEBUG):
             # if debugging make a copy we can draw on
             self.debug_image = self.image.copy()
             self.debug_pix = self.debug_image.load()
@@ -243,7 +248,7 @@ def _scan(self, bright=-1):
                 if val >= self.threshold:
                     if left_edge == -1:
                         left_edge = x
-                    if self.debug:
+                    if self.logger.isEnabledFor(logging.DEBUG):
                         self.debug_pix[x,y] = self._flip(self.pix[x,y])
                 else:
                     if left_edge > -1:
@@ -252,22 +257,22 @@ def _scan(self, bright=-1):
                             col_num = int((left_edge + hole_length / 2.0 - x_data_left) / col_width + 0.25)
                             data[(col_num, row_num)] = hole_length
                         left_edge = -1
-            if (self.debug):
+            if self.logger.isEnabledFor(logging.DEBUG):
                 # Plot where holes might be on this row
                 expected_top_edge = y - hole_height / 2
                 expected_bottom_edge = y + hole_height / 2
                 blue = 255 * 256 * 256
-                for expected_left_edge in drange(x_data_left, x_data_right - 1, col_width):
-                    for y_plot in drange(expected_top_edge, expected_bottom_edge, 2):
+                for expected_left_edge in self.drange(x_data_left, x_data_right - 1, col_width):
+                    for y_plot in self.drange(expected_top_edge, expected_bottom_edge, 2):
                         self.debug_pix[expected_left_edge,y_plot] = blue
                         #self.debug_pix[x + hole_width/2,yline] = 255 * 256 * 256
                         self.debug_pix[expected_left_edge + hole_width,y_plot] = blue
-                    for x_plot in drange(expected_left_edge, expected_left_edge + hole_width):
+                    for x_plot in self.drange(expected_left_edge, expected_left_edge + hole_width):
                         self.debug_pix[x_plot, expected_top_edge] = blue
                         self.debug_pix[x_plot, expected_bottom_edge] = blue
             y += col_height
 
-        if self.debug:
+        if self.logger.isEnabledFor(logging.DEBUG):
             self.debug_image.show()
             # prevent run-a-way debug shows causing my desktop to run out of memory
             raw_input("Press Enter to continue...")

diff --git a/tests/test_punchcards.py b/tests/test_punchcards.py
@@ -1,7 +1,30 @@
+from PIL import Image
+from punchcards.normalize import find_card
+from punchcards.punchcard import PunchCard
+from os import listdir
+from os.path import isfile, join
+
+def testcard( filepath ):
+  print(filepath)
+  im = Image.open(filepath)
+  im = find_card(im)
+  if im is not None:
+    card = PunchCard(im, bright=127, debug=False)
+    print(card.text)
+  else:
+    print('Not a punchcard.')
+
+def test_cards():
+    card_images = [f for f in listdir('images/') if isfile(join('images/', f))]
+    for name in card_images:
+        testcard(join('images/', name))
+
+def test_noncards():
+    non_card_images = [f for f in listdir('images/not_punchcards/') if isfile(join('images/not_punchcards/', f))]
+    for name in non_card_images:
+        testcard(join('images/not_punchcards/', name))
 
 def test_examples():
-    from PIL import Image
-    from punchcards import read_card
     files = [
         'images/C04D01L-0001.tif',
         'images/C04D01L-0001-90.tif',
@@ -17,3 +40,7 @@ def test_examples():
             image = Image.open(file)
             image.show()
         assert result == text
+
+if __name__ == "__main__":
+    test_cards()
+    test_noncards()