-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_scrub.py
49 lines (32 loc) · 1.53 KB
/
test_scrub.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
#!/usr/bin/env python
# coding: utf-8
import scrub, unittest
import numpy as np
class TestScrub(unittest.TestCase):
def test_clean(self):
with open('static/sample_text.txt') as f:
sample = f.read()
with open('static/clean_sample.txt') as f:
clean_sample = f.read()
self.assertEqual(scrub.clean(sample),clean_sample)
def test_tokenize(self):
text = "He's not the Messiah. He's a very naughty boy! Now, piss off!"
self.assertListEqual(scrub.tokenize(scrub.clean(text)),
['hes', 'messiah', 'hes', 'naughty', 'boy', 'piss'])
def test_represent(self):
text = ['foo','bar','name','baz','nose color', 'colour','moo',
'heh','finish','tasty','taste finish']
(segments,V) = scrub.represent(text,'name')
s = np.array([ 0., 0., 1., 0., 2., 1., 0., 0., 1., 0., 2.])
self.assertTrue(np.array_equal(V.sum(1),s))
text.pop(2)
(segments,V) = scrub.represent(text,'name')
s = np.array([ 1., 0., 0., 0., 2., 1., 0., 0., 1., 0., 2.])
self.assertTrue(np.array_equal(V.sum(1),s))
def test_relevant_segment_index(self):
text = ['foo','bar','name','baz','nose color', 'colour','moo',
'heh','finish','tasty','taste finish']
(segments,V) = scrub.represent(text,'name')
idx = scrub.relevant_segment_index(V)
new_text = [text[m] for m in idx]
self.assertListEqual(new_text,['name','nose color','colour','finish','taste finish'])