-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathenglish.go
137 lines (119 loc) · 2.91 KB
/
english.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
package cryptopals
/*
## Cryptopals Solutions by Mohit Muthanna Cheppudira 2020.
This file consists of text analysis functions to detect the validity of
plain-text blocks.
*/
import (
"math"
"strings"
)
// getExpectedFreqForChar returns the probabilty of char being in a piece
// of english text. This is not a complete set -- I pulled the numbers from
// some website \o/
func getExpectedFreqForChar(char byte) float64 {
// Default value (helps prevent divide-by-zero)
value := float64(0.00001)
freqMap := map[byte]float64{
' ': 10,
'\'': 0.1,
'\n': 0.1,
',': 0.1,
'.': 0.1,
'E': 12.02,
'T': 9.1,
'A': 8.12,
'O': 7.68,
'I': 7.31,
'N': 6.95,
'S': 6.28,
'R': 6.02,
'H': 5.92,
'D': 4.32,
'L': 3.98,
'U': 2.88,
'C': 2.71,
'M': 2.61,
'F': 2.3,
'Y': 2.11,
'W': 2.09,
'G': 2.03,
'P': 1.82,
'B': 1.49,
'V': 1.11,
'K': 0.69,
'X': 0.17,
'Q': 0.11,
'J': 0.10,
'Z': 0.1,
'0': 0.1,
'1': 0.2,
'2': 0.1,
'3': 0.1,
'4': 0.1,
'5': 0.1,
'6': 0.1,
'7': 0.1,
'8': 0.1,
'9': 0.1,
}
if freq, ok := freqMap[strings.ToUpper(string(char))[0]]; ok {
value = freq
}
return value
}
// Calculates the liklihood of str being an English string using chi-squared testing. Lower
// cost means higher liklihood.
func calcStringCost(str []byte) float64 {
countMap := map[byte]int{}
totalChars := len(str)
for _, char := range str {
key := strings.ToUpper(string(char))[0]
if count, ok := countMap[key]; ok {
countMap[key] = count + 1
} else {
countMap[key] = 1
}
}
cost := float64(0)
for k, v := range countMap {
expectedCount := (getExpectedFreqForChar(k) / 100) * float64(totalChars)
observedCount := float64(v)
cost += math.Pow(expectedCount-observedCount, 2) / expectedCount
}
return math.Sqrt(cost)
}
// Calculates the liklihood of str being an English string using correlation. Higher score
// means higher liklihood.
func calcStringScore(str []byte) float64 {
score := float64(0)
for _, char := range str {
c := strings.ToUpper(string(char))[0]
score += getExpectedFreqForChar(c)
}
return score
}
// Try to crack XOR-encrypted cipherText by trying all 256 possible (byte) keys, and returning
// the plainText that is most English-like. This is used in challenges in sets 1 and 3.
func crackXORByteCost(cipherText []byte) (key byte, cost float64, plainText string) {
bestCost := float64(len(cipherText) * 100)
var bestString string
var bestKey byte
for i := 0; i < 256; i++ {
key := byte(i)
plainText := make([]byte, len(cipherText))
// Decrypt with XOR
for i := range cipherText {
plainText[i] = cipherText[i] ^ key
}
// Calculate the "englishness" of plainText. Lower is better.
cost := calcStringCost(plainText)
// Keep track of the lowest cost.
if cost < bestCost {
bestCost = cost
bestString = string(plainText)
bestKey = byte(key)
}
}
return bestKey, bestCost, bestString
}