-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathIsomap-Pac.py
157 lines (130 loc) · 5.38 KB
/
Isomap-Pac.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
#%matplotlib inline
plt.figure(figsize=(6,6))
# display multiple outputs within a cell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all";
np.random.seed(8888)
# spiral dataset
n = np.sqrt(np.random.rand(200,1)) * 1440 * (2*np.pi)/360
d1x = -np.cos(n)*n + np.random.rand(200,1) * 0.8
d1y = np.sin(n)*n + np.random.rand(200,1) * 0.8
plt.scatter(d1x, d1y, c = 'black', s=40)
plt.title('Spiral Dataset')
X = np.hstack((d1x,d1y));
# compute pairwise distance matrix to find k nearest neighbors for each x_i in X
from sklearn.metrics import pairwise_distances
dist_matrix = pairwise_distances(X)
print("Distance matrix shape: ", dist_matrix.shape)
'''
# function that outputs N x k matrix with k nearest neighbors for each observation in X
def nearest_neighbors(X, k):
# we use k+1 here since Xi will have the shortest distance to itself
knn_matrix = np.zeros((len(X), k))
# compute pairwise distances
dist_matrix = pairwise_distances(X)
# for each row find indices of k nearest neighbors
for i in range(len(X)):
knn_matrix[i] = dist_matrix[i,:].argsort()[1:k+1]
return knn_matrix
# set number of neighbors and find neighborhood matrix
k = 10
X_neighbors = nearest_neighbors(X, k)
# loop through each data point and draw lines to nearest neighbors
plt.figure(figsize=(6,6))
plt.scatter(X[:,0], X[:,1], alpha=0.3, c='black', s=50);
for i in range(len(X)):
neighbors = X_neighbors[i]
for j in range(len(neighbors)):
plt.plot(X[[i, neighbors.astype('int')[j]], 0], X[[i, neighbors.astype('int')[j]], 1], c='gray')
plt.title('Data with Nearest Neighbors k=' + str(k))
plt.scatter(X[:,0], X[:,1], c='black', s=60);
# neighbors for a given xi (first element in X)
neighbors = X_neighbors[0] # indices of neighbors
plt.figure(figsize=(6,6))
plt.scatter(X[0, 0], X[0, 1], c='blue', s=50, alpha=0.8)
plt.text(X[0, 0]-3, X[0, 1] - 3, s='$X_i$', size=20)
plt.scatter(X[neighbors.astype('int'), 0], X[neighbors.astype('int'), 1], c='red', alpha = 0.8, s=50)
for i in range(len(neighbors)):
plt.plot(X[[0, neighbors.astype('int')[i]], 0], X[[0, neighbors.astype('int')[i]], 1], c='gray')
plt.scatter(X[:,0], X[:,1], alpha=0.3, c='black', s=50)
plt.title('Nearest neighbors of data point xi');
# get coordinates for the neighborhood of xi
xi_nn = []
for i in range(len(neighbors)):
xi_nn.append(X[neighbors.astype('int')[i]])
xi_nn = np.array(xi_nn)
# zoom in on xi with nearest neighbor edges
j = k-1
plt.scatter(X[0, 0], X[0, 1], c='blue', s=50, alpha=0.8)
plt.text(X[0, 0] - 1, X[0, 1] - 2, s='$X_i$', size=20)
plt.text(xi_nn[j][0]-1.0, xi_nn[j][1] + 0.5, s='$X_j$', size=20)
plt.scatter(X[neighbors.astype('int'), 0], X[neighbors.astype('int'), 1], c='red', alpha = 0.8, s=50)
for i in range(len(neighbors)):
plt.plot(X[[0, neighbors.astype('int')[i]], 0], X[[0, neighbors.astype('int')[i]], 1], c='gray')
plt.scatter(X[:,0], X[:,1], alpha=0.2, c='black', s=50);
plt.scatter(X[neighbors.astype('int'), 0], X[neighbors.astype('int'), 1], c='red', alpha = 0.8, s=50)
plt.title('k-NN for some Xi')
plt.xlim(np.min(xi_nn[:,0])-3,np.max(xi_nn[:,0])+3)
plt.ylim(np.min(xi_nn[:,1]-3),np.max(xi_nn[:,1])+3);
# Example - Graph Geodesics on the Swiss Roll
# generate data
n = 1000
x = np.random.rand(2,n)
# swiss roll transformation
v = 3*np.pi/2*(.1 + 2*x[0,:])
X = np.zeros([3,n])
X[1,:] = 20*x[1,:]
X[0,:] = - np.cos(v)*v
X[2,:] = np.sin(v)*v
from mpl_toolkits.mplot3d import Axes3D
# plot swiss roll
fig = plt.figure(figsize=(14,10))
ax = fig.add_subplot(111, projection="3d")
ax.scatter(X[0,:], X[1,:], X[2,:], c=plt.cm.jet((X[0,:]**2+X[2,:]**2)/100), s=200, lw=0, alpha=1)
ax.set_xlim(np.min(X[0,:]),np.max(X[0,:]))
ax.set_ylim(np.min(X[1,:]),np.max(X[1,:]))
ax.set_zlim(np.min(X[2,:]),np.max(X[2,:]))
ax.axis("off");
# format X as (n_samples, n_features)
X = np.transpose(X)
fig = plt.figure(figsize=(14,10))
ax = fig.add_subplot(111, projection="3d")
# plot original swiss roll
ax.scatter(X[:,0], X[:,1], X[:,2], c=plt.cm.jet((X[:,0]**2+X[:,2]**2)/100), s=200, lw=0, alpha=1);
# loop through each data point and plot lines connecting nearest neighbors
k = 6 # number of nearest neighbors
knn = nearest_neighbors(X, k)
for i in range(len(X)):
neighbors = knn[i]
for j in range(len(neighbors)):
ax.plot(X[[i, neighbors.astype('int')[j]], 0],
X[[i, neighbors.astype('int')[j]], 1],
X[[i, neighbors.astype('int')[j]], 2], color='black');
# configure axis settings
ax.axis("off")
ax.set_xlim(np.min(X[:,0]),np.max(X[:,0]))
ax.set_ylim(np.min(X[:,1]),np.max(X[:,1]))
ax.set_zlim(np.min(X[:,2]),np.max(X[:,2]))
plt.show();
'''
from sklearn.manifold import Isomap
data = X
# apply isomap with k = 6 and output dimension = 2
model = Isomap(n_components=2, n_neighbors=6)
proj = model.fit_transform(data)
print(proj)
# plot the isomap projection
plt.figure(figsize=(14,10))
plt.scatter(proj[:, 0], proj[:, 1], c=plt.cm.jet((X[:,0]**2+X[:,2]**2)/100), s=200, lw=0, alpha=1)
# plot lines connecting the same neighboring points from our original data
for i in range(len(X)):
neighbors = knn[i]
for j in range(len(neighbors)):
plt.plot(proj[[i, neighbors.astype('int')[j]], 0],
proj[[i, neighbors.astype('int')[j]], 1], color='black');
plt.title('Isomap with k-Nearest Neighbors = ' + str(k), size=30)
plt.axis("off")
plt.show();