forked from Shreeshrii/ocr-evaluation-tools
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtessaccsummary
executable file
·56 lines (47 loc) · 1.57 KB
/
tessaccsummary
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#!/bin/sh
# Copyright 2013 Nick White <[email protected]>
#
# Permission to use, copy, modify, and/or distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
usage="$0 [-w] imgdir traineddata
-w test word accuracy, rather than character accuracy
imgdir directory containing .png images with accompanying
.txt text files
traineddata .traineddata file"
if test "$1" = "-w"; then
accprog=wordacc
shift
else
accprog=accuracy
fi
test $# -ne 2 && echo "Usage: $usage" && exit 1
imgdir="$1"
training="$2"
# Copy training to temporary directory and point Tesseract to it
t=`mktemp -d`
tessdatadir="$t"
lang=`basename "$training" | awk -F . '{print $1}'`
# echo "$training"
cp "$training" "$t/$lang.traineddata"
# tesseract seems to complain without a eng.traineddata file available
ln -s "$t/$lang.traineddata" "$t/eng.traineddata"
sum=0
n=0
for i in `find "$imgdir" -type f -name '*.png' | sort`; do
b=`basename "$i" .png`
name=$(echo "$i" | sed -e 's/\.[^.]*$//')
printf "%s: " "$b"
err=`tesseract "$i" "$t/$b" --tessdata-dir $tessdatadir --oem 1 --psm 6 -l $lang 2>&1`
test $? -ne 0 && continue
acc=`ocrevalutf8 $accprog "$name-gt.txt" "$t/$b.txt" \
| awk '/Accuracy$/ {print $1; exit}' | sed 's/%//'`
n=`expr $n + 1`
sum=`echo $sum + $acc | bc -l`
# printf "%.2f%%\n" "$acc"
done
if test $n -ne 0; then
avg=`echo $sum / $n | bc -l`
printf "$training - Average Accuracy: %.2f%%\n" "$avg"
fi
rm -rf "$t"