-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathUtil.py
108 lines (99 loc) · 4.54 KB
/
Util.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import numpy as np
import time
class Util(object):
'''
Purpose: Load the dataset in an object.
Usage: dataset=Util.load_dataset(filename)
'''
@staticmethod
def load_dataset(filename):
return np.loadtxt(filename, delimiter=',', dtype=np.uint32)
'''
Purpose: Take a dataset having zeros and ones and return all pairwise counts
For each variable x and y, we return
xy_counts[0,0]; xy_counts[0,1]; xy_counts[1,0]; and xy_counts[1,1]
Alert: No Laplace correction is performed.
'''
@staticmethod
def compute_xycounts(dataset):
nvariables=dataset.shape[1]
prob_xy = np.zeros((nvariables, nvariables, 2, 2))
prob_xy[:, :, 0, 0] = np.einsum('ij,ik->jk', (dataset == 0).astype(int), (dataset == 0).astype(int))
prob_xy[:, :, 0, 1] = np.einsum('ij,ik->jk', (dataset == 0).astype(int), (dataset == 1).astype(int))
prob_xy[:, :, 1, 0] = np.einsum('ij,ik->jk', (dataset == 1).astype(int), (dataset == 0).astype(int))
prob_xy[:, :, 1, 1] = np.einsum('ij,ik->jk', (dataset == 1).astype(int), (dataset == 1).astype(int))
return prob_xy
'''
Purpose: Take a dataset having zeros and ones and return marginal counts
For each variable x, we return x_count[0] and x_count[1]
Alert: No Laplace correction is performed.
'''
@staticmethod
def compute_xcounts(dataset):
nvariables = dataset.shape[1]
prob_x = np.zeros((nvariables, 2))
prob_x[:,0]=np.einsum('ij->j',(dataset == 0).astype(int))
prob_x[:,1] = np.einsum('ij->j',(dataset == 1).astype(int))
return prob_x
'''
Purpose: Take a weighted dataset having zeros and ones and return all pairwise counts
For each variable x and y, we return weighted
xy_counts[0,0]; xy_counts[0,1]; xy_counts[1,0]; and xy_counts[1,1]
Alert: No Laplace correction is performed.
'''
@staticmethod
def compute_weighted_xycounts(dataset,weights):
nvariables=dataset.shape[1]
prob_xy = np.zeros((nvariables, nvariables, 2, 2))
prob_xy[:, :, 0, 0] = np.einsum('ij,ik->jk', (dataset == 0).astype(int)* weights[:, np.newaxis], (dataset == 0).astype(int))
prob_xy[:, :, 0, 1] = np.einsum('ij,ik->jk', (dataset == 0).astype(int)* weights[:, np.newaxis], (dataset == 1).astype(int))
prob_xy[:, :, 1, 0] = np.einsum('ij,ik->jk', (dataset == 1).astype(int)* weights[:, np.newaxis], (dataset == 0).astype(int))
prob_xy[:, :, 1, 1] = np.einsum('ij,ik->jk', (dataset == 1).astype(int)* weights[:, np.newaxis], (dataset == 1).astype(int))
return prob_xy
'''
Purpose: Take a weighted dataset having zeros and ones and return marginal counts
For each variable x, we return weighted x_count[0] and x_count[1]
Alert: No Laplace correction is performed.
'''
@staticmethod
def compute_weighted_xcounts(dataset,weights):
nvariables = dataset.shape[1]
prob_x = np.zeros((nvariables, 2))
prob_x[:,0]=np.einsum('ij->j',(dataset == 0).astype(int) * weights[:, np.newaxis])
prob_x[:,1] = np.einsum('ij->j',(dataset == 1).astype(int) * weights[:, np.newaxis])
return prob_x
'''
Purpose: Normalize a Bivariate Distribution
'''
@staticmethod
def normalize2d(xycounts):
xycountsf=xycounts.astype(np.float64)
norm_const=np.einsum('ijkl->ij',xycountsf)
return xycountsf/norm_const[:,:,np.newaxis,np.newaxis]
'''
Purpose: Normalize a Univariate Distribution
'''
@staticmethod
def normalize1d(xcounts):
xcountsf = xcounts.astype(np.float64)
norm_const = np.einsum('ij->i', xcountsf)
return xcountsf/norm_const[:,np.newaxis]
'''
Purpose: Normalize the weights
'''
@staticmethod
def normalize(weights):
norm_const=np.sum(weights)
return weights/norm_const
'''
Purpose: Compute the edge weights for the Chow-Liu Algorithm
'''
@staticmethod
def compute_MI_prob(p_xy,p_x):
p_x_r = np.reciprocal(p_x)
sum_xy=np.zeros((p_x_r.shape[0], p_x_r.shape[0]))
sum_xy += p_xy[:,:,0,0]*np.log(np.einsum('ij,i,j->ij',p_xy[:,:,0,0],p_x_r[:,0],p_x_r[:,0]))
sum_xy += p_xy[:,:,0,1]*np.log(np.einsum('ij,i,j->ij',p_xy[:,:,0,1],p_x_r[:,0],p_x_r[:,1]))
sum_xy += p_xy[:,:,1,0]*np.log(np.einsum('ij,i,j->ij',p_xy[:,:,1,0],p_x_r[:,1],p_x_r[:,0]))
sum_xy += p_xy[:,:,1,1]*np.log(np.einsum('ij,i,j->ij',p_xy[:,:,1,1],p_x_r[:,1],p_x_r[:,1]))
return sum_xy