Skip to content

Commit

Permalink
Moves card dimensional validation to the normalize.py module.
Browse files Browse the repository at this point in the history
Tests passing for all card and non-card samples.
Made original punchcard module use logging instead of debug flag.
Fixes some rounding errors related to casting to int.
Cropping done in normalize.py is no longer pass to the Punchcard class, instead
  we only use to to perform orientation/rotation transposes. (corner cut detection)
fixes for the debugging image output of punchcard.py
  • Loading branch information
gregjan committed Aug 20, 2019
1 parent 724d502 commit 0c010d3
Show file tree
Hide file tree
Showing 9 changed files with 115 additions and 728 deletions.
File renamed without changes
692 changes: 0 additions & 692 deletions images/Example of programming analysis.html

This file was deleted.

Binary file added images/not_punchcards/graph.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added images/not_punchcards/graph2.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions punchcards/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
from punchcards.punchcard import PunchCard
from punchcards.normalize import find_card
from punchcards import cli
15 changes: 13 additions & 2 deletions punchcards/cli.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,19 @@
from .punchcard import PunchCard
from .normalize import find_card
from PIL import Image
from docopt import docopt
import logging

__doc_opt__ = """Punchcard Command Line Interface.
Usage:
punchcard <image-file> ...
punchcard (-h | --help)
punchcard --version
"""

if __name__ == '__main__':
args = docopt(__doc__, version='Punch Card Reader 1.0')
args = docopt(__doc_opt__, version='Punch Card Reader 1.0')

logger = logging.getLogger('punchcard')
logger.setLevel(logging.WARN)
Expand All @@ -20,4 +31,4 @@
image = find_card(image)
image.show()
card = PunchCard(image, bright=127) # using neutral gray as threshold color
return card.text
print(card.text)
67 changes: 51 additions & 16 deletions punchcards/normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,12 @@
import logging
from .punchcard import PunchCard

CARD_WIDTH = 7.0 + 3.0/8.0 # Inches
CARD_HEIGHT = 3.25 # Inches
CARD_SPEC_TOLERANCE = .15 # inches, adjust as needed
CARD_W_TO_H_RATIO_HIGH = (CARD_WIDTH + CARD_SPEC_TOLERANCE) / CARD_HEIGHT
CARD_W_TO_H_RATIO_LOW = (CARD_WIDTH - CARD_SPEC_TOLERANCE) / CARD_HEIGHT

logger = logging.getLogger('punchcard')

def example():
Expand All @@ -33,22 +39,27 @@ def example():
print("Card Text:"+card.text)

# Identify the lightest corner and place it top-left
def normalizeFlip(orig_img):
def normalizeFlip(orig_img, cropped_img):
testbox = (0,0,20,20)
brightest = -1
test_img = orig_img
best_flip = test_img
test_img = cropped_img
best_flip = (0,0)
for (x,y), value in numpy.ndenumerate(numpy.zeros((2,2))):
if x:
test_img = orig_img.transpose(Image.FLIP_LEFT_RIGHT)
test_img = cropped_img.transpose(Image.FLIP_LEFT_RIGHT)
if y:
test_img = test_img.transpose(Image.FLIP_TOP_BOTTOM)
# test_img.show()
b = brightness(test_img.crop(testbox))
if b > brightest:
brightest = b
best_flip = test_img
return best_flip
best_flip = (x,y)
result = orig_img
if best_flip[0]:
result = result.transpose(Image.FLIP_LEFT_RIGHT)
if best_flip[1]:
result = result.transpose(Image.FLIP_TOP_BOTTOM)
return result

def cropCard(im):
# crop along X axis
Expand All @@ -62,13 +73,12 @@ def cropCard(im):
top, bottom = findMargins(x_cropped, axis=1)
y_crop_box = (0, top, x_cropped.size[0]-1, bottom)
result = x_cropped.crop(y_crop_box)
result.show()
return result

# Find the index values where dark region begins and ends
def findMargins(im, axis=0, threshold=.2):
pix = numpy.array(im)
max = im.size[axis]*255
max = pix.shape[axis]*255
max = max - int(max*threshold)
vector = numpy.sum(pix, axis=axis)
first = 0
Expand Down Expand Up @@ -103,16 +113,41 @@ def brightness( im ):
stat = ImageStat.Stat(im)
return stat.mean[0]

def combine_images( imgs ):
imgs = [i.convert(mode="RGB") for i in imgs]
max_width = sorted( [(i.size[0]) for i in imgs])[-1]
diagnostic = numpy.vstack( (numpy.asarray( i.resize( (max_width, i.size[1]*max_width/i.size[0])) ) for i in imgs ) )
return Image.fromarray( diagnostic, mode="RGB" )

def is_card_dimensions(image):
card_ratio = float(image.size[0]) / float(image.size[1])
print(str(card_ratio))
print(str(CARD_W_TO_H_RATIO_HIGH) + " to " + str(CARD_W_TO_H_RATIO_LOW))
return card_ratio <= CARD_W_TO_H_RATIO_HIGH and card_ratio > CARD_W_TO_H_RATIO_LOW

def find_card(image):
image = image.convert(mode="L")
if(isnotbacklit(image)):
image = ImageOps.invert(image)
image = cropCard(image)
image2 = image.convert(mode="L")
diag = combine_images([image, image2])
image3 = image2
if(isnotbacklit(image2)):
image3 = ImageOps.invert(image2)
diag = combine_images([diag, image3])
cropped = cropCard(image3)
diag = combine_images([diag, cropped])
#image.show()
if(image.size[1] > image.size[0]):
image = image.transpose(Image.ROTATE_90)
image = normalizeFlip(image)
return image
image4 = image3
if(cropped.size[1] > cropped.size[0]):
print('rotate 90')
image4 = image3.transpose(Image.ROTATE_90)
cropped = cropped.transpose(Image.ROTATE_90)
if not is_card_dimensions(cropped):
return None
diag = combine_images([diag, image4])
image5 = normalizeFlip(image4, cropped)
diag = combine_images([diag, image5])
if logger.isEnabledFor(logging.DEBUG):
diag.show()
return image5

if __name__ == '__main__':
example()
37 changes: 21 additions & 16 deletions punchcards/punchcard.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ class PunchCard(object):

logger = logging.getLogger('punchcard')

SPEC_IBM_MODEL_029 = "IBM Model 029 Punch Card" # only one for now

CARD_COLUMNS = 80
CARD_ROWS = 12

Expand Down Expand Up @@ -70,7 +72,7 @@ def create_card_map(self):
self.translate[tuple(v[1:])] = v[0]

# generate a range of floats
def drange(start, stop, step=1.0):
def drange(self, start, stop, step=1.0):
r = start
while (step >= 0.0 and r < stop) or (step < 0.0 and r > stop):
yield r
Expand All @@ -82,7 +84,6 @@ def __init__(self, image, bright=-1, debug=False, xstart=0, xstop=0, ystart=0, y
self.text = ''
self.decoded = []
self.surface = []
self.debug = debug
self.threshold = 0
self.ymin = ystart
self.ymax = ystop
Expand All @@ -92,6 +93,10 @@ def __init__(self, image, bright=-1, debug=False, xstart=0, xstop=0, ystart=0, y
self.image = image
self.pix = image.load()
self._crop()
if debug:
self.logger.setLevel(logging.DEBUG)
else:
self.logger.setLevel(logging.WARN)
self._scan(bright)

# Brightness is the average of RGB values
Expand All @@ -111,8 +116,8 @@ def _crop(self):
self.xmax = self.xsize
if self.ymax == 0:
self.ymax = self.ysize
self.midx = int(self.xmin + (self.xmax - self.xmin) / 2 + self.xadjust)
self.midy = int(self.ymin + (self.ymax - self.ymin) / 2)
self.midx = self.xmin + (self.xmax - self.xmin) / 2 + self.xadjust
self.midy = self.ymin + (self.ymax - self.ymin) / 2

# heuristic for finding a reasonable cutoff brightness
def _find_threshold_brightness(self):
Expand Down Expand Up @@ -173,9 +178,9 @@ def _find_data_horiz_dimensions(self, probe_y):
col_width = width * self.CARD_COL_WIDTH_RATIO
hole_width = width * self.CARD_HOLE_WIDTH_RATIO
#print col_width
if self.debug:
if self.logger.isEnabledFor(logging.DEBUG):
# mark left and right edges on the copy
for y in range(int(probe_y) - int(self.ysize/100), int(probe_y) + int(self.ysize/100)):
for y in range(int(probe_y - self.ysize/100), int(probe_y + self.ysize/100)):
self.debug_pix[left_border if left_border > 0 else 0,y] = 255
self.debug_pix[right_border if right_border < self.xmax else self.xmax - 1,y] = 255
for x in range(1, int((self.xmax - self.xmin) / 200)):
Expand Down Expand Up @@ -203,14 +208,14 @@ def _find_data_vert_dimensions(self):
hole_height = int(card_height * self.CARD_HOLE_HEIGHT_RATIO)
data_top_y = data_begins + hole_height / 2
col_height = int(card_height * self.CARD_ROW_HEIGHT_RATIO)
if self.debug:
if self.logger.isEnabledFor(logging.DEBUG):
# mark up the copy with the edges
for x in range(self.xmin, self.xmax-1):
self.debug_pix[x,top_border] = 255
self.debug_pix[x,bottom_border] = 255
if self.debug:
if self.logger.isEnabledFor(logging.DEBUG):
# mark search parameters
for x in range(self.midx - int(self.xsize/20), self.midx + int(self.xsize/20)):
for x in range(self.midx - self.xsize/20, self.midx + self.xsize/20):
self.debug_pix[x,self.ymin] = 255
self.debug_pix[x,self.ymax - 1] = 255
for y in range(0, self.ymin):
Expand All @@ -220,7 +225,7 @@ def _find_data_vert_dimensions(self):
return data_top_y, data_top_y + col_height * 11, col_height, hole_height

def _scan(self, bright=-1):
if self.debug:
if self.logger.isEnabledFor(logging.DEBUG):
# if debugging make a copy we can draw on
self.debug_image = self.image.copy()
self.debug_pix = self.debug_image.load()
Expand All @@ -243,7 +248,7 @@ def _scan(self, bright=-1):
if val >= self.threshold:
if left_edge == -1:
left_edge = x
if self.debug:
if self.logger.isEnabledFor(logging.DEBUG):
self.debug_pix[x,y] = self._flip(self.pix[x,y])
else:
if left_edge > -1:
Expand All @@ -252,22 +257,22 @@ def _scan(self, bright=-1):
col_num = int((left_edge + hole_length / 2.0 - x_data_left) / col_width + 0.25)
data[(col_num, row_num)] = hole_length
left_edge = -1
if (self.debug):
if self.logger.isEnabledFor(logging.DEBUG):
# Plot where holes might be on this row
expected_top_edge = y - hole_height / 2
expected_bottom_edge = y + hole_height / 2
blue = 255 * 256 * 256
for expected_left_edge in drange(x_data_left, x_data_right - 1, col_width):
for y_plot in drange(expected_top_edge, expected_bottom_edge, 2):
for expected_left_edge in self.drange(x_data_left, x_data_right - 1, col_width):
for y_plot in self.drange(expected_top_edge, expected_bottom_edge, 2):
self.debug_pix[expected_left_edge,y_plot] = blue
#self.debug_pix[x + hole_width/2,yline] = 255 * 256 * 256
self.debug_pix[expected_left_edge + hole_width,y_plot] = blue
for x_plot in drange(expected_left_edge, expected_left_edge + hole_width):
for x_plot in self.drange(expected_left_edge, expected_left_edge + hole_width):
self.debug_pix[x_plot, expected_top_edge] = blue
self.debug_pix[x_plot, expected_bottom_edge] = blue
y += col_height

if self.debug:
if self.logger.isEnabledFor(logging.DEBUG):
self.debug_image.show()
# prevent run-a-way debug shows causing my desktop to run out of memory
raw_input("Press Enter to continue...")
Expand Down
31 changes: 29 additions & 2 deletions tests/test_punchcards.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,30 @@
from PIL import Image
from punchcards.normalize import find_card
from punchcards.punchcard import PunchCard
from os import listdir
from os.path import isfile, join

def testcard( filepath ):
print(filepath)
im = Image.open(filepath)
im = find_card(im)
if im is not None:
card = PunchCard(im, bright=127, debug=False)
print(card.text)
else:
print('Not a punchcard.')

def test_cards():
card_images = [f for f in listdir('images/') if isfile(join('images/', f))]
for name in card_images:
testcard(join('images/', name))

def test_noncards():
non_card_images = [f for f in listdir('images/not_punchcards/') if isfile(join('images/not_punchcards/', f))]
for name in non_card_images:
testcard(join('images/not_punchcards/', name))

def test_examples():
from PIL import Image
from punchcards import read_card
files = [
'images/C04D01L-0001.tif',
'images/C04D01L-0001-90.tif',
Expand All @@ -17,3 +40,7 @@ def test_examples():
image = Image.open(file)
image.show()
assert result == text

if __name__ == "__main__":
test_cards()
test_noncards()

0 comments on commit 0c010d3

Please sign in to comment.