-
Notifications
You must be signed in to change notification settings - Fork 30
/
Copy pathtest_team_frost.py
243 lines (189 loc) · 10.6 KB
/
test_team_frost.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
import unittest
from pii_team_frost import *
from pii_detect import find_city_state, find_account_number, anonymize_pii
class TeamFrostTests(unittest.TestCase):
def test_find_us_phone_number(self):
# test phone number at the end of a string
results_list = find_us_phone_number('My phone number is 301-526-4113')
self.assertEqual(results_list[0], '301-526-4113')
# test phone number at the beginning of a string
results_list = find_us_phone_number('123-456-7890 is my phone number')
self.assertEqual(results_list[0], '123-456-7890')
# test phone number in the middle of a string
results_list = find_us_phone_number('You can reach me at 333-402-7890. That is my number')
self.assertEqual(results_list[0], '333-402-7890')
# test multiple phone numbers
results_list = find_us_phone_number('123-456-7890 is my phone number. Her number is 987-654-3210')
self.assertEqual(results_list[0], '123-456-7890')
self.assertEqual(results_list[1], '987-654-3210')
# test an invalid phone number
results_list = find_us_phone_number('1234567890 is my phone number')
self.assertFalse(results_list)
def test_find_visa_mastercard(self):
results_list = find_visa_mastercard('My credit card number is 1234-5678-9012-3456')
self.assertEqual(results_list[0], '1234-5678-9012-3456')
results_list = find_visa_mastercard('1234-5678-9012-3456 is my credit card number')
self.assertEqual(results_list[0], '1234-5678-9012-3456')
# more than one card number
results_list = find_visa_mastercard('I have 2 cards. one number is 1234-5678-9012-3456 '
'the other is 5489-1304-2985-7529')
self.assertEqual(results_list[0], '1234-5678-9012-3456')
self.assertEqual(results_list[1], '5489-1304-2985-7529')
# wrong format-credit card number incomplete
results_list = find_visa_mastercard('My credit card number is 1234-5678-3456')
self.assertFalse(results_list)
# with a letter
results_list = find_visa_mastercard('My credit card number is 1AB4-5678-9012-3456')
self.assertFalse(results_list)
def test_find_us_ssn(self):
results_list = find_us_ssn('My social security number is 123-45-6789')
lst = ["123-45-6789"]
message = " - US SSN Lists Dont Match"
self.assertEqual(results_list, lst, message)
# Test for multiple US SSN within a given sentance
results_list = find_us_ssn('My friends social security is 245-57-8359, and my other friends social security is 678-52-4878')
lst2 = ["245-57-8359", "678-52-4878"]
message = " - US SSN Lists Dont Match"
self.assertEqual(results_list, lst2,message)
# Test for invalid case where SSN was not in right format
results_list = find_us_ssn('My Social Security is 23-56-4576')
message = " - Invalid SSN Format"
self.assertFalse(results_list, message)
def test_find_email(self):
# Test an alphanumeric email
results_list = find_email('My email address is [email protected]')
self.assertEqual(results_list[0], '[email protected]')
# Test a case-insensitive email
results_list = find_email('My email address is [email protected]')
self.assertEqual(results_list[0], '[email protected]')
# Test an email a period in the username
results_list = find_email('My email address is [email protected]')
self.assertEqual(results_list[0], '[email protected]')
# Test an email with an underscore in the username
results_list = find_email('My email address is [email protected]')
self.assertEqual(results_list[0], '[email protected]')
# Test an email with a second-level domain
results_list = find_email('My email address is [email protected]')
self.assertEqual(results_list[0], '[email protected]')
# Test an email with subaddressing
results_list = find_email('My email address is [email protected]')
self.assertEqual(results_list[0], '[email protected]')
def test_find_instagram_handle(self):
# Test an alphabetical token
result_list = find_instagram_handle('My instagram handle is @masterchief')
self.assertEqual(result_list[0], '@masterchief')
# Test a case-insensitive token
result_list = find_instagram_handle('My instagram handle is @MasterChief')
self.assertEqual(result_list[0], '@MasterChief')
# Test an alphanumeric token
result_list = find_instagram_handle('My instagram handle is @m4st3rch1ef')
self.assertEqual(result_list[0], '@m4st3rch1ef')
# Test token with an enclosed underscore
result_list = find_instagram_handle('My instagram handle is @master_chief')
self.assertEqual(result_list[0], '@master_chief')
# Test token with leading/trailing underscore(s)
result_list = find_instagram_handle('My instagram handle is @_masterchief_')
self.assertEqual(result_list[0], '@_masterchief_')
# Test token with an enclosed period
result_list = find_instagram_handle('My instagram handle is @master.chief')
self.assertEqual(result_list[0], '@master.chief')
# Test token with leading/trailing period(s)
result_list = find_instagram_handle('My instagram handle is @.masterchief.')
self.assertEqual(result_list[0], '@.masterchief.')
# Test a single account number
result_list = find_instagram_handle('My instagram handle is @_.masterchief._')
self.assertEqual(result_list[0], '@_.masterchief._')
# Test that emails do not match
result_list = find_instagram_handle('My email is [email protected]')
self.assertFalse(result_list)
# Test that no character can precede the handle
result_list = find_instagram_handle('My email is:@outlook.com')
self.assertFalse(result_list)
def test_find_amex(self):
# Test that number with under 15 digits is denied
results_list = find_amex('My credit card number is 1234-567890-1234')
self.assertEqual(results_list, [])
# Test that number with over 15 digits is denied
results_list = find_amex('My credit card number is 1234-567890-123456')
self.assertEqual(results_list, [])
# Test that number starting with 34 is accepted
results_list = find_amex('My credit card number is 3412-567890-12345')
self.assertEqual(results_list[0], '3412-567890-12345')
# Test that number starting with 37 is accepted
results_list = find_amex('My credit card number is 3712-567890-12345')
self.assertEqual(results_list[0], '3712-567890-12345')
class Comp410TestPII(unittest.TestCase):
def test_find_city_state(self):
# Test a single city and state
result_list = find_city_state('I live in Houston, TX')
self.assertEqual(result_list[0], 'Houston, TX')
# Test two cities and states
result_list = find_city_state('I have lived in Houston, TX and Dallas, TX')
self.assertEqual(result_list[0], 'Houston, TX')
self.assertEqual(result_list[1], 'Dallas, TX')
# Test beginning of string
result_list = find_city_state('Houston, TX is a great city')
self.assertEqual(result_list[0], 'Houston, TX')
# Test middle of string
result_list = find_city_state('I lived in Houston, TX for 10 years')
self.assertEqual(result_list[0], 'Houston, TX')
# Test an invalid case where the state is not capitalized
result_list = find_city_state('I live in houston, TX')
# result_list should be empty
self.assertFalse(result_list)
# Test a two-word city
result_list = find_city_state('I live in New York, NY')
self.assertEqual(result_list[0], 'New York, NY')
# Test an invalid state abbreviation
# TODO - it is currently not a requirement to support invalid state abbreviations
# result_list = find_city_state('I live in Houston, AA')
# result_list should be empty
# self.assertFalse(result_list)
def test_find_account_number(self):
# Test a single account number
result_list = find_account_number('My account number is 1234567890')
self.assertEqual(result_list[0], '1234567890')
# Test account number at start of string
result_list = find_account_number('1234567890 is my account number')
self.assertEqual(result_list[0], '1234567890')
# Test account number in middle of string
result_list = find_account_number('My account 1234567890 is not active')
self.assertEqual(result_list[0], '1234567890')
# Test account number at end of string
result_list = find_account_number('My account number is 1234567890')
self.assertEqual(result_list[0], '1234567890')
# Test multiple account numbers
result_list = find_account_number('My account numbers are 1234567890 and 0987654321')
self.assertEqual(result_list[0], '1234567890')
self.assertEqual(result_list[1], '0987654321')
# Test account number with dashes
result_list = find_account_number('My account number is 123-456-7890')
# Dashes are not supported
self.assertFalse(result_list)
def test_replace_name(self):
test_str = 'My name is John Edwards'
expected = 'My name is <PERSON>'
result = anonymize_pii(test_str)
self.assertEqual(expected, result.text)
def test_replace_account_number(self):
test_str = 'My account numbers are 123-12345 and 1234-12345'
expected = 'My account numbers are <ACCOUNT_NUMBER> and <ACCOUNT_NUMBER>'
result = anonymize_pii(test_str)
self.assertEqual(expected, result.text)
def test_replace_credit_card(self):
test_str = 'My cc is 4095-3434-2424-1414'
expected = 'My cc is <CREDIT_CARD>'
result = anonymize_pii(test_str)
self.assertEqual(expected, result.text)
def test_replace_nothing(self):
test_str = 'I am not going to tell you what my name is'
expected = 'I am not going to tell you what my name is'
result = anonymize_pii(test_str)
self.assertEqual(expected, result.text)
def test_replace_multiple(self):
test_str = '750-12-1234 and 4095-3434-2424-1414 and 919-555-1212'
expected = '<US_SSN> and <CREDIT_CARD> and <PHONE_NUMBER>'
result = anonymize_pii(test_str)
self.assertEqual(expected, result.text)
if __name__ == '__main__':
unittest.main()