forked from bjbroder/Rivet-Labs-Internship-2017
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathExisting CSVs -> Bit Vectors
36 lines (32 loc) · 1.06 KB
/
Existing CSVs -> Bit Vectors
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import csv
import pickle
from BitVector import *
def vectorize(name,amount):
toCompare = []
for i in range(amount):
toCompare.append(str ("C:\\Users\Brielle\Documents\internship\Tables\\" + name + str(i) + ".csv"))
universalColumns = {}
for table in toCompare:
with open(table, "r") as f:
reader = csv.reader(f)
i = next(reader)
for column in i:
if column not in universalColumns:
universalColumns[column] = len(universalColumns)
allBVs = []
for table in toCompare:
bv = BitVector( size = len(universalColumns))
with open(table, "r") as f:
reader = csv.reader(f)
i = next(reader)
for column in i:
bv[universalColumns[column]] = 1
allBVs.append(bv)
output = open(name + '.pkl', 'wb')
pickle.dump(allBVs, output)
output.close()
#with open(name + '.pkl', 'rb') as f:
# allBVs = pickle.load(f)
return allBVs
#universalBV = BitVector( size = len(universalColumns))
print(vectorize("table",100))