forked from jaimin-shah/classification-using-spark
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathexclude0.py
108 lines (90 loc) · 2.78 KB
/
exclude0.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import csv
ch0 = list()
ch1 = list()
ch2 = list()
ch3 = list()
ch4 = list()
ch5 = list()
ch6 = list()
ch7 = list()
with open('diabetes.csv') as csvfile:
spamreader = csv.reader(csvfile, delimiter=',')
j = 0
lis = list()
se = set()
i = 0
avg = list()
lis0 = list()
lis1 = list()
lis2 = list()
lis3 = list()
lis4 = list()
lis5 = list()
lis6 = list()
lis7 = list()
lis8 = list()
for row in spamreader:
lis0.append(int(row[0]))
lis1.append(int(row[1]))
lis2.append(int(row[2]))
lis3.append(int(row[3]))
lis4.append(int(row[4]))
lis5.append(float(row[5]))
lis6.append(float(row[6]))
lis7.append(int(row[7]))
lis8.append(int(row[8]))
avg.append(sum(lis1) / len(list( filter(lambda x: x != 0 ,lis0))))
avg.append(sum(lis1) / len(list(filter(lambda x: x != 0, lis1))))
avg.append(sum(lis2) / len(list(filter(lambda x: x != 0, lis2))))
avg.append(sum(lis3) / len(list(filter(lambda x: x != 0, lis3))))
avg.append(sum(lis4) / len(list(filter(lambda x: x != 0, lis4))))
avg.append(sum(lis5) / len(list(filter(lambda x: x != 0, lis5))))
avg.append(sum(lis6) / len(list(filter(lambda x: x != 0, lis6))))
avg.append(sum(lis7) / len(list(filter(lambda x: x != 0, lis7))))
for cnt in range(len(lis0)):
if lis0[cnt] == 0:
ch0.append(avg[0])
else:
ch0.append(lis0[cnt])
for cnt in range(len(lis1)):
if lis1[cnt] == 0:
ch1.append(avg[1])
else:
ch1.append(lis1[cnt])
for cnt in range(len(lis2)):
if lis2[cnt] == 0:
ch2.append(avg[2])
else:
ch2.append(lis2[cnt])
for cnt in range(len(lis3)):
if lis3[cnt] == 0:
ch3.append(avg[3])
else:
ch3.append(lis3[cnt])
for cnt in range(len(lis4)):
if lis4[cnt] == 0:
ch4.append(avg[4])
else:
ch4.append(lis4[cnt])
for cnt in range(len(lis5)):
if lis5[cnt] == 0:
ch5.append(avg[5])
else:
ch5.append(lis5[cnt])
for cnt in range(len(lis6)):
if lis6[cnt] == 0:
ch6.append(avg[6])
else:
ch6.append(lis6[cnt])
for cnt in range(len(lis7)):
if lis7[cnt] == 0:
ch7.append(avg[7])
else:
ch7.append(lis7[cnt])
#print(lis8)
with open('exclude0.csv', 'w') as csvopfile:
colname = ['r0', 'r1', 'r2', 'r3', 'r4', 'r5', 'r6', 'r7', 'r8']
writer = csv.DictWriter(csvopfile, colname,lineterminator='\n')
for x in range(len(ch0)):
writer.writerow(
{'r0': ch0[x], 'r1': ch1[x], 'r2': ch2[x], 'r3': ch3[x], 'r4': ch4[x], 'r5': ch5[x], 'r6': ch6[x], 'r7': ch7[x], 'r8': lis8[x]})