From 8a51649f2435b50381d4b11fc4718a63861be0ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C3=ABl=20Defferrard?= Date: Thu, 14 Feb 2019 01:00:40 +0100 Subject: [PATCH] NNGraph: clean and doc (PR #21) --- README.rst | 2 +- pygsp/graphs/nngraphs/bunny.py | 4 +- pygsp/graphs/nngraphs/cube.py | 2 +- pygsp/graphs/nngraphs/imgpatches.py | 8 +- pygsp/graphs/nngraphs/nngraph.py | 507 ++++++++++++++-------------- pygsp/graphs/nngraphs/sensor.py | 2 +- pygsp/graphs/nngraphs/sphere.py | 2 +- pygsp/graphs/nngraphs/twomoons.py | 30 +- pygsp/tests/test_graphs.py | 146 ++++---- pygsp/tests/test_plotting.py | 4 +- setup.py | 3 +- 11 files changed, 359 insertions(+), 351 deletions(-) diff --git a/README.rst b/README.rst index a768428a..7f0b8776 100644 --- a/README.rst +++ b/README.rst @@ -37,7 +37,7 @@ The documentation is available on `Read the Docs `_ and development takes place on `GitHub `_. -(A (mostly unmaintained) `Matlab version `_ exists.) +A (mostly unmaintained) `Matlab version `_ exists. The PyGSP facilitates a wide variety of operations on graphs, like computing their Fourier basis, filtering or interpolating signals, plotting graphs, diff --git a/pygsp/graphs/nngraphs/bunny.py b/pygsp/graphs/nngraphs/bunny.py index d9dac407..21729823 100644 --- a/pygsp/graphs/nngraphs/bunny.py +++ b/pygsp/graphs/nngraphs/bunny.py @@ -34,7 +34,7 @@ def __init__(self, **kwargs): 'distance': 8, } - super(Bunny, self).__init__(Xin=data['bunny'], - epsilon=0.02, NNtype='radius', + super(Bunny, self).__init__(data['bunny'], center=False, rescale=False, + kind='radius', radius=0.02, plotting=plotting, **kwargs) diff --git a/pygsp/graphs/nngraphs/cube.py b/pygsp/graphs/nngraphs/cube.py index 820e401c..85f51347 100644 --- a/pygsp/graphs/nngraphs/cube.py +++ b/pygsp/graphs/nngraphs/cube.py @@ -89,7 +89,7 @@ def __init__(self, 'distance': 9, } - super(Cube, self).__init__(Xin=pts, k=10, + super(Cube, self).__init__(pts, k=10, center=False, rescale=False, plotting=plotting, **kwargs) diff --git a/pygsp/graphs/nngraphs/imgpatches.py b/pygsp/graphs/nngraphs/imgpatches.py index ea11be06..95ce9149 100644 --- a/pygsp/graphs/nngraphs/imgpatches.py +++ b/pygsp/graphs/nngraphs/imgpatches.py @@ -10,7 +10,8 @@ class ImgPatches(NNGraph): Extract a feature vector in the form of a patch for every pixel of an image, then construct a nearest-neighbor graph between these feature - vectors. The feature matrix, i.e. the patches, can be found in :attr:`Xin`. + vectors. The feature matrix, i.e., the patches, can be found in + :attr:`features`. Parameters ---------- @@ -35,9 +36,10 @@ class ImgPatches(NNGraph): >>> from skimage import data, img_as_float >>> img = img_as_float(data.camera()[::64, ::64]) >>> G = graphs.ImgPatches(img, patch_shape=(3, 3)) - >>> print('{} nodes ({} x {} pixels)'.format(G.Xin.shape[0], *img.shape)) + >>> N, d = G.features.shape + >>> print('{} nodes ({} x {} pixels)'.format(N, *img.shape)) 64 nodes (8 x 8 pixels) - >>> print('{} features per node'.format(G.Xin.shape[1])) + >>> print('{} features per node'.format(d)) 9 features per node >>> G.set_coordinates(kind='spring', seed=42) >>> fig, axes = plt.subplots(1, 2) diff --git a/pygsp/graphs/nngraphs/nngraph.py b/pygsp/graphs/nngraphs/nngraph.py index c099a546..85b7ab43 100644 --- a/pygsp/graphs/nngraphs/nngraph.py +++ b/pygsp/graphs/nngraphs/nngraph.py @@ -1,44 +1,51 @@ # -*- coding: utf-8 -*- +from __future__ import division + +import multiprocessing import traceback import numpy as np -import numpy.matlib -from scipy import sparse -import scipy.spatial as sps -import scipy.spatial.distance as spsd -import multiprocessing +from scipy import sparse, spatial from pygsp import utils from pygsp.graphs import Graph # prevent circular import in Python < 3.5 -_logger = utils.build_logger(__name__) # conversion between the FLANN conventions and the various backend functions -_dist_translation = { - 'scipy-kdtree': { - 'euclidean': 2, - 'manhattan': 1, - 'max_dist': np.inf - }, - 'scipy-ckdtree': { - 'euclidean': 2, - 'manhattan': 1, - 'max_dist': np.inf - }, - 'scipy-pdist' : { - 'euclidean': 'euclidean', - 'manhattan': 'cityblock', - 'max_dist': 'chebyshev', - 'minkowski': 'minkowski' - }, - 'nmslib' : { - 'euclidean': 'l2', - 'manhattan': 'l1', - 'max_dist': 'linf', - 'minkowski': 'lp' - } - } +_metrics = { + 'scipy-pdist': { + 'euclidean': 'euclidean', + 'manhattan': 'cityblock', + 'max_dist': 'chebyshev', + 'minkowski': 'minkowski', + }, + 'scipy-kdtree': { + 'euclidean': 2, + 'manhattan': 1, + 'max_dist': np.inf, + 'minkowski': 0, + }, + 'scipy-ckdtree': { + 'euclidean': 2, + 'manhattan': 1, + 'max_dist': np.inf, + 'minkowski': 0, + }, + 'flann': { + 'euclidean': 'euclidean', + 'manhattan': 'manhattan', +# 'max_dist': 'max_dist', # produces incorrect results + 'minkowski': 'minkowski', + }, + 'nmslib': { + 'euclidean': 'l2', + 'manhattan': 'l1', + 'max_dist': 'linf', +# 'minkowski': 'lp', # unsupported + } +} + def _import_cfl(): try: @@ -50,6 +57,7 @@ def _import_cfl(): 'Original exception: {}'.format(e)) return cfl + def _import_nmslib(): try: import nmslib as nms @@ -58,58 +66,55 @@ def _import_nmslib(): 'neighbors method or try to install it with ' 'pip (or conda) install nmslib.') return nms - -def _knn_sp_kdtree(X, num_neighbors, dist_type, order=0): - kdt = sps.KDTree(X) - D, NN = kdt.query(X, k=(num_neighbors + 1), - p=_dist_translation['scipy-kdtree'][dist_type]) + + +def _knn_sp_kdtree(features, num_neighbors, metric, order): + p = order if metric == 'minkowski' else _metrics['scipy-kdtree'][metric] + kdt = spatial.KDTree(features) + D, NN = kdt.query(features, k=(num_neighbors + 1), p=p) return NN, D -def _knn_sp_ckdtree(X, num_neighbors, dist_type, order=0): - kdt = sps.cKDTree(X) - D, NN = kdt.query(X, k=(num_neighbors + 1), - p=_dist_translation['scipy-ckdtree'][dist_type], - n_jobs=-1) + +def _knn_sp_ckdtree(features, num_neighbors, metric, order): + p = order if metric == 'minkowski' else _metrics['scipy-ckdtree'][metric] + kdt = spatial.cKDTree(features) + D, NN = kdt.query(features, k=(num_neighbors + 1), p=p, n_jobs=-1) return NN, D -def _knn_flann(X, num_neighbors, dist_type, order): - # the combination FLANN + max_dist produces incorrect results - # do not allow it - if dist_type == 'max_dist': - raise ValueError('FLANN and max_dist is not supported') - +def _knn_flann(features, num_neighbors, metric, order): cfl = _import_cfl() - cfl.set_distance_type(dist_type, order=order) + cfl.set_distance_type(metric, order=order) c = cfl.FLANNIndex(algorithm='kdtree') - c.build_index(X) + c.build_index(features) # Default FLANN parameters (I tried changing the algorithm and # testing performance on huge matrices, but the default one # seems to work best). - NN, D = c.nn_index(X, num_neighbors + 1) + NN, D = c.nn_index(features, num_neighbors + 1) c.free_index() - if dist_type == 'euclidean': # flann returns squared distances + if metric == 'euclidean': # flann returns squared distances return NN, np.sqrt(D) return NN, D -def _radius_sp_kdtree(X, epsilon, dist_type, order=0): - kdt = sps.KDTree(X) - D, NN = kdt.query(X, k=None, distance_upper_bound=epsilon, - p=_dist_translation['scipy-kdtree'][dist_type]) + +def _radius_sp_kdtree(features, radius, metric, order): + p = order if metric == 'minkowski' else _metrics['scipy-kdtree'][metric] + kdt = spatial.KDTree(features) + D, NN = kdt.query(features, k=None, distance_upper_bound=radius, p=p) return NN, D -def _radius_sp_ckdtree(X, epsilon, dist_type, order=0): - N, dim = np.shape(X) - kdt = sps.cKDTree(X) - nn = kdt.query_ball_point(X, r=epsilon, - p=_dist_translation['scipy-ckdtree'][dist_type], - n_jobs=-1) + +def _radius_sp_ckdtree(features, radius, metric, order): + p = order if metric == 'minkowski' else _metrics['scipy-ckdtree'][metric] + n_vertices, _ = features.shape + kdt = spatial.cKDTree(features) + nn = kdt.query_ball_point(features, r=radius, p=p, n_jobs=-1) D = [] NN = [] - for k in range(N): - x = np.matlib.repmat(X[k, :], len(nn[k]), 1) - d = np.linalg.norm(x - X[nn[k], :], - ord=_dist_translation['scipy-ckdtree'][dist_type], + for k in range(n_vertices): + x = np.tile(features[k, :], (len(nn[k]), 1)) + d = np.linalg.norm(x - features[nn[k], :], + ord=_metrics['scipy-ckdtree'][metric], axis=1) nidx = d.argsort() NN.append(np.take(nn[k], nidx)) @@ -117,250 +122,260 @@ def _radius_sp_ckdtree(X, epsilon, dist_type, order=0): return NN, D -def _knn_sp_pdist(X, num_neighbors, dist_type, order): - if dist_type == 'minkowski': - p = spsd.pdist(X, metric=_dist_translation['scipy-pdist'][dist_type], - p=order) - else: - p = spsd.pdist(X, metric=_dist_translation['scipy-pdist'][dist_type]) - pd = spsd.squareform(p) - pds = np.sort(pd)[:, 0:num_neighbors+1] - pdi = pd.argsort()[:, 0:num_neighbors+1] +def _knn_sp_pdist(features, num_neighbors, metric, order): + p = spatial.distance.pdist(features, + metric=_metrics['scipy-pdist'][metric], + p=order) + pd = spatial.distance.squareform(p) + pds = np.sort(pd)[:, :num_neighbors+1] + pdi = pd.argsort()[:, :num_neighbors+1] return pdi, pds - -def _knn_nmslib(X, num_neighbors, dist_type, order): - N, dim = np.shape(X) + + +def _knn_nmslib(features, num_neighbors, metric, _): + n_vertices, _ = features.shape ncpu = multiprocessing.cpu_count() - if dist_type == 'minkowski': - raise ValueError('unsupported distance type (lp) for nmslib') nms = _import_nmslib() - nmsidx = nms.init(space=_dist_translation['nmslib'][dist_type]) - nmsidx.addDataPointBatch(X) + nmsidx = nms.init(space=_metrics['nmslib'][metric]) + nmsidx.addDataPointBatch(features) nmsidx.createIndex() - q = nmsidx.knnQueryBatch(X, k=num_neighbors+1, num_threads=int(ncpu/2)) + q = nmsidx.knnQueryBatch(features, k=num_neighbors+1, + num_threads=int(ncpu/2)) nn, d = zip(*q) - D = np.concatenate(d).reshape(N, num_neighbors+1) - NN = np.concatenate(nn).reshape(N, num_neighbors+1) + D = np.concatenate(d).reshape(n_vertices, num_neighbors+1) + NN = np.concatenate(nn).reshape(n_vertices, num_neighbors+1) return NN, D - -def _radius_sp_pdist(X, epsilon, dist_type, order): - N, dim = np.shape(X) - if dist_type == 'minkowski': - p = spsd.pdist(X, metric=_dist_translation['scipy-pdist'][dist_type], - p=order) - else: - p = spsd.pdist(X, metric=_dist_translation['scipy-pdist'][dist_type]) - pd = spsd.squareform(p) - pdf = pd < epsilon + + +def _radius_sp_pdist(features, radius, metric, order): + n_vertices, _ = np.shape(features) + p = spatial.distance.pdist(features, + metric=_metrics['scipy-pdist'][metric], + p=order) + pd = spatial.distance.squareform(p) + pdf = pd < radius D = [] NN = [] - for k in range(N): + for k in range(n_vertices): v = pd[k, pdf[k, :]] d = pd[k, :].argsort() # use the same conventions as in scipy.distance.kdtree NN.append(d[0:len(v)]) D.append(np.sort(v)) - + return NN, D -def _radius_flann(X, epsilon, dist_type, order=0): - N, dim = np.shape(X) - # the combination FLANN + max_dist produces incorrect results - # do not allow it - if dist_type == 'max_dist': - raise ValueError('FLANN and max_dist is not supported') - + +def _radius_flann(features, radius, metric, order): + n_vertices, _ = features.shape cfl = _import_cfl() - cfl.set_distance_type(dist_type, order=order) + cfl.set_distance_type(metric, order=order) c = cfl.FLANNIndex(algorithm='kdtree') - c.build_index(X) + c.build_index(features) D = [] NN = [] - for k in range(N): - nn, d = c.nn_radius(X[k, :], epsilon*epsilon) + for k in range(n_vertices): + nn, d = c.nn_radius(features[k, :], radius**2) D.append(d) NN.append(nn) c.free_index() - if dist_type == 'euclidean': # flann returns squared distances - return NN, list(map(np.sqrt, D)) + if metric == 'euclidean': + # Flann returns squared distances. + D = list(map(np.sqrt, D)) return NN, D -def _radius_nmslib(X, epsilon, dist_type, order=0): - raise ValueError('nmslib does not support (yet?) range queries') -def center_input(X, N): - return X - np.kron(np.ones((N, 1)), np.mean(X, axis=0)) +_nn_functions = { + 'knn': { + 'scipy-pdist': _knn_sp_pdist, + 'scipy-kdtree': _knn_sp_kdtree, + 'scipy-ckdtree': _knn_sp_ckdtree, + 'flann': _knn_flann, + 'nmslib': _knn_nmslib, + }, + 'radius': { + 'scipy-pdist': _radius_sp_pdist, + 'scipy-kdtree': _radius_sp_kdtree, + 'scipy-ckdtree': _radius_sp_ckdtree, + 'flann': _radius_flann, + }, +} + + +def center_features(features): + n_vertices, _ = features.shape + return features - np.kron(np.ones((n_vertices, 1)), np.mean(features, 0)) + + +def rescale_features(features): + n_vertices, dimensionality = features.shape + bounding_radius = 0.5 * np.linalg.norm(np.amax(features, axis=0) - + np.amin(features, axis=0), 2) + scale = np.power(n_vertices, 1 / min(dimensionality, 3)) / 10 + return features * scale / bounding_radius -def rescale_input(X, N, d): - bounding_radius = 0.5 * np.linalg.norm(np.amax(X, axis=0) - - np.amin(X, axis=0), 2) - scale = np.power(N, 1. / float(min(d, 3))) / 10. - return X * scale / bounding_radius class NNGraph(Graph): - r"""Nearest-neighbor graph from given point cloud. + r"""Nearest-neighbor graph. + + The nearest-neighbor graph is built from a set of features, where the edge + weight between vertices :math:`v_i` and :math:`v_j` is given by + + .. math:: A(i,j) = \exp \left( -\frac{d^2(v_i, v_j)}{\sigma^2} \right), + + where :math:`d(v_i, v_j)` is a distance measure between some representation + (the features) of :math:`v_i` and :math:`v_j`. For example, the features + might be the 3D coordinates of points in a point cloud. Then, if + ``metric='euclidean'``, :math:`d(v_i, v_j) = \| x_i - x_j \|_2`, where + :math:`x_i` is the 3D position of vertex :math:`v_i`. + + The similarity matrix :math:`A` is sparsified by either keeping the ``k`` + closest vertices for each vertex (if ``type='knn'``), or by setting to zero + any distance greater than ``radius`` (if ``type='radius'``). Parameters ---------- - Xin : ndarray - Input points, Should be an `N`-by-`d` matrix, where `N` is the number - of nodes in the graph and `d` is the dimension of the feature space. - NNtype : string, optional - Type of nearest neighbor graph to create. The options are 'knn' for - k-Nearest Neighbors or 'radius' for epsilon-Nearest Neighbors (default - is 'knn'). - backend : {'scipy-kdtree', 'scipy-pdist', 'flann'} - Type of the backend for graph construction. - - 'scipy-kdtree' will use scipy.spatial.KDTree - - 'scipy-ckdtree'(default) will use scipy.spatial.cKDTree - - 'scipy-pdist' will use scipy.spatial.distance.pdist (slowest but exact) - - 'flann' use Fast Library for Approximate Nearest Neighbors (FLANN) - - 'nmslib' use nmslib for approximate nearest neighbors (faster in high-dimensional spaces) + features : ndarray + An `N`-by-`d` matrix, where `N` is the number of nodes in the graph and + `d` is the number of features. center : bool, optional - Center the data so that it has zero mean (default is True) + Whether to center the features to have zero mean. rescale : bool, optional - Rescale the data so that it lies in a l2-sphere (default is True) - k : int, optional - Number of neighbors for knn (default is 10) - sigma : float, optional - Width of the similarity kernel. - By default, it is set to the average of the nearest neighbor distance. - epsilon : float, optional - Radius for the epsilon-neighborhood search (default is 0.01) - plotting : dict, optional - Dictionary of plotting parameters. See :obj:`pygsp.plotting`. - (default is {}) - symmetrize_type : string, optional - Type of symmetrization to use for the adjacency matrix. See - :func:`pygsp.utils.symmetrization` for the options. - (default is 'average') - dist_type : string, optional - Type of distance to compute. See - :func:`pyflann.index.set_distance_type` for possible options. - (default is 'euclidean') + Whether to scale the features so that they lie in an l2-ball. + metric : {'euclidean', 'manhattan', 'minkowski', 'max_dist'}, optional + Metric used to compute pairwise distances. order : float, optional - Only used if dist_type is 'minkowski'; represents the order of the - Minkowski distance. (default is 0) + The order of the Minkowski distance for ``metric='minkowski'``. + kind : {'knn', 'radius'}, optional + Kind of nearest neighbor graph to create. Either ``'knn'`` for + k-nearest neighbors or ``'radius'`` for epsilon-nearest neighbors. + k : int, optional + Number of neighbors considered when building a k-NN graph with + ``type='knn'``. + radius : float, optional + Radius of the ball when building a radius graph with ``type='radius'``. + kernel_width : float, optional + Width of the Gaussian kernel. By default, it is set to the average of + the distances of neighboring vertices. + backend : string, optional + * ``'scipy-pdist'`` uses :func:`scipy.spatial.distance.pdist` to + compute pairwise distances. The method is brute force and computes + all distances. That is the slowest method. + * ``'scipy-kdtree'`` uses :class:`scipy.spatial.KDTree`. The method + builds a k-d tree to prune the number of pairwise distances it has to + compute. + * ``'scipy-ckdtree'`` uses :class:`scipy.spatial.cKDTree`. The same as + ``'scipy-kdtree'`` but with C bindings, which should be faster. + * ``'flann'`` uses the `Fast Library for Approximate Nearest Neighbors + (FLANN) `_. That method is an + approximation. + * ``'nmslib'`` uses the `Non-Metric Space Library (NMSLIB) + `_. That method is an + approximation. It should be the fastest in high-dimensional spaces. Examples -------- >>> import matplotlib.pyplot as plt - >>> X = np.random.RandomState(42).uniform(size=(30, 2)) - >>> G = graphs.NNGraph(X) + >>> features = np.random.RandomState(42).uniform(size=(30, 2)) + >>> G = graphs.NNGraph(features) >>> fig, axes = plt.subplots(1, 2) >>> _ = axes[0].spy(G.W, markersize=5) >>> _ = G.plot(ax=axes[1]) """ - def __init__(self, Xin, NNtype='knn', backend='scipy-ckdtree', center=True, - rescale=True, k=10, sigma=None, epsilon=0.01, - plotting={}, symmetrize_type='average', dist_type='euclidean', - order=0, **kwargs): + def __init__(self, features, *, center=True, rescale=True, + metric='euclidean', order=0, + kind='knn', k=10, radius=0.01, + kernel_width=None, + backend='scipy-ckdtree', + **kwargs): - self.Xin = Xin - self.NNtype = NNtype - self.backend = backend + self.features = features # stored in coords, but scaled and centered self.center = center self.rescale = rescale - self.k = k - self.sigma = sigma - self.epsilon = epsilon - - _dist_translation['scipy-kdtree']['minkowski'] = order - _dist_translation['scipy-ckdtree']['minkowski'] = order - - self._nn_functions = { - 'knn': { - 'scipy-kdtree': _knn_sp_kdtree, - 'scipy-ckdtree': _knn_sp_ckdtree, - 'scipy-pdist': _knn_sp_pdist, - 'flann': _knn_flann, - 'nmslib': _knn_nmslib - }, - 'radius': { - 'scipy-kdtree': _radius_sp_kdtree, - 'scipy-ckdtree': _radius_sp_ckdtree, - 'scipy-pdist': _radius_sp_pdist, - 'flann': _radius_flann, - 'nmslib': _radius_nmslib - }, - } - - - - self.symmetrize_type = symmetrize_type - self.dist_type = dist_type + self.metric = metric self.order = order + self.kind = kind + self.k = k + self.radius = radius + self.kernel_width = kernel_width + self.backend = backend + + N, d = np.shape(features) + + if _nn_functions.get(kind) is None: + raise ValueError('Invalid kind "{}".'.format(kind)) - N, d = np.shape(self.Xin) - Xout = self.Xin + if backend not in _metrics.keys(): + raise ValueError('Invalid backend "{}".'.format(backend)) - if k >= N: + if _metrics['scipy-pdist'].get(metric) is None: + raise ValueError('Invalid metric "{}".'.format(metric)) + + if _nn_functions[kind].get(backend) is None: + raise ValueError('{} does not support kind "{}".'.format( + backend, kind)) + + if _metrics[backend].get(metric) is None: + raise ValueError('{} does not support the {} metric.'.format( + backend, metric)) + + if kind == 'knn' and k >= N: raise ValueError('The number of neighbors (k={}) must be smaller ' - 'than the number of nodes ({}).'.format(k, N)) - - if self.center: - Xout = center_input(Xout, N) - - if self.rescale: - Xout = rescale_input(Xout, N, d) - - if self._nn_functions.get(NNtype) == None: - raise ValueError('Invalid NNtype {}'.format(self.NNtype)) - - if self._nn_functions[NNtype].get(backend) == None: - raise ValueError('Invalid backend {} for type {}'.format(backend, - self.NNtype)) - - if self.NNtype == 'knn': - NN, D = self._nn_functions[NNtype][backend](Xout, k, - dist_type, order) - if self.sigma is None: - self.sigma = np.mean(D[:, 1:]) # Discard distance to self. - - elif self.NNtype == 'radius': - NN, D = self._nn_functions[NNtype][backend](Xout, epsilon, - dist_type, order) - if self.sigma is None: + 'than the number of vertices ({}).'.format(k, N)) + + if center: + features = center_features(features) + + if rescale: + features = rescale_features(features) + + if kind == 'knn': + NN, D = _nn_functions[kind][backend](features, k, metric, order) + if self.kernel_width is None: + # Discard distance to self. + self.kernel_width = np.mean(D[:, 1:]) + + elif kind == 'radius': + NN, D = _nn_functions[kind][backend](features, radius, + metric, order) + if self.kernel_width is None: # Discard distance to self. - self.sigma = np.mean([np.mean(d[1:]) for d in D]) + self.kernel_width = np.mean([np.mean(d[1:]) for d in D]) + countV = list(map(len, NN)) - count = sum(countV) + count = sum(countV) spi = np.zeros((count)) spj = np.zeros((count)) spv = np.zeros((count)) start = 0 for i in range(N): - leng = countV[i] - 1 - spi[start:start + leng] = np.kron(np.ones((leng)), i) - spj[start:start + leng] = NN[i][1:] - spv[start:start + leng] = np.exp(-np.power(D[i][1:], 2) / - float(self.sigma)) - start = start + leng + length = countV[i] - 1 + distance = np.power(D[i][1:], 2) + spi[start:start + length] = np.kron(np.ones((length)), i) + spj[start:start + length] = NN[i][1:] + spv[start:start + length] = np.exp(-distance / self.kernel_width) + start = start + length W = sparse.csc_matrix((spv, (spi, spj)), shape=(N, N)) - # Sanity check - if np.shape(W)[0] != np.shape(W)[1]: - raise ValueError('Weight matrix W is not square') - - # Enforce symmetry. Note that checking symmetry with - # np.abs(W - W.T).sum() is as costly as the symmetrization itself. - W = utils.symmetrize(W, method=symmetrize_type) + # Enforce symmetry. May have been broken by k-NN. Checking symmetry + # with np.abs(W - W.T).sum() is as costly as the symmetrization itself. + W = utils.symmetrize(W, method='average') - super(NNGraph, self).__init__(W=W, plotting=plotting, - coords=Xout, **kwargs) + super(NNGraph, self).__init__(W=W, coords=features, **kwargs) def _get_extra_repr(self): - return {'NNtype': self.NNtype, - 'backend': self.backend, - 'center': self.center, - 'rescale': self.rescale, - 'k': self.k, - 'sigma': '{:.2f}'.format(self.sigma), - 'epsilon': '{:.2f}'.format(self.epsilon), - 'symmetrize_type': self.symmetrize_type, - 'dist_type': self.dist_type, - 'order': self.order} + return { + 'center': self.center, + 'rescale': self.rescale, + 'metric': self.metric, + 'order': self.order, + 'kind': self.kind, + 'k': self.k, + 'radius': '{:.2f}'.format(self.radius), + 'kernel_width': '{:.2f}'.format(self.kernel_width), + 'backend': self.backend, + } diff --git a/pygsp/graphs/nngraphs/sensor.py b/pygsp/graphs/nngraphs/sensor.py index ac623a50..5491e0c7 100644 --- a/pygsp/graphs/nngraphs/sensor.py +++ b/pygsp/graphs/nngraphs/sensor.py @@ -75,7 +75,7 @@ def __init__(self, N=64, k=6, distributed=False, seed=None, **kwargs): coords = rs.uniform(0, 1, (N, 2)) - super(Sensor, self).__init__(Xin=coords, k=k, + super(Sensor, self).__init__(coords, k=k, rescale=False, center=False, plotting=plotting, **kwargs) diff --git a/pygsp/graphs/nngraphs/sphere.py b/pygsp/graphs/nngraphs/sphere.py index e375b5b4..2ad9c233 100644 --- a/pygsp/graphs/nngraphs/sphere.py +++ b/pygsp/graphs/nngraphs/sphere.py @@ -64,7 +64,7 @@ def __init__(self, 'vertex_size': 80, } - super(Sphere, self).__init__(Xin=pts, k=10, + super(Sphere, self).__init__(pts, k=10, center=False, rescale=False, plotting=plotting, **kwargs) diff --git a/pygsp/graphs/nngraphs/twomoons.py b/pygsp/graphs/nngraphs/twomoons.py index af87d0d8..6da8ca95 100644 --- a/pygsp/graphs/nngraphs/twomoons.py +++ b/pygsp/graphs/nngraphs/twomoons.py @@ -16,7 +16,7 @@ class TwoMoons(NNGraph): two_moons graph or a synthesized one (default is 'standard'). 'standard' : Create a two_moons graph from a based graph. 'synthesized' : Create a synthesized two_moon - sigmag : float + kernel_width : float Variance of the distance kernel (default = 0.05) dim : int The dimensionality of the points (default = 2). @@ -24,7 +24,7 @@ class TwoMoons(NNGraph): N : int Number of vertices (default = 2000) Only valid for moontype == 'synthesized'. - sigmad : float + variance : float Variance of the data (do not set it too high or you won't see anything) (default = 0.05) Only valid for moontype == 'synthesized'. @@ -44,11 +44,11 @@ class TwoMoons(NNGraph): """ - def _create_arc_moon(self, N, sigmad, distance, number, seed): + def _create_arc_moon(self, N, variance, distance, number, seed): rs = np.random.RandomState(seed) phi = rs.rand(N, 1) * np.pi r = 1 - rb = sigmad * rs.normal(size=(N, 1)) + rb = variance * rs.normal(size=(N, 1)) ab = rs.rand(N, 1) * 2 * np.pi b = rb * np.exp(1j * ab) bx = np.real(b) @@ -63,29 +63,29 @@ def _create_arc_moon(self, N, sigmad, distance, number, seed): return np.concatenate((moonx, moony), axis=1) - def __init__(self, moontype='standard', dim=2, sigmag=0.05, - N=400, sigmad=0.07, distance=0.5, seed=None, **kwargs): + def __init__(self, moontype='standard', dim=2, kernel_width=0.05, + N=400, variance=0.07, distance=0.5, seed=None, **kwargs): self.moontype = moontype self.dim = dim - self.sigmag = sigmag - self.sigmad = sigmad + self.kernel_width = kernel_width + self.variance = variance self.distance = distance self.seed = seed if moontype == 'standard': N1, N2 = 1000, 1000 data = utils.loadmat('pointclouds/two_moons') - Xin = data['features'][:dim].T + coords = data['features'][:dim].T elif moontype == 'synthesized': N1 = N // 2 N2 = N - N1 - coords1 = self._create_arc_moon(N1, sigmad, distance, 1, seed) - coords2 = self._create_arc_moon(N2, sigmad, distance, 2, seed) + coords1 = self._create_arc_moon(N1, variance, distance, 1, seed) + coords2 = self._create_arc_moon(N2, variance, distance, 2, seed) - Xin = np.concatenate((coords1, coords2)) + coords = np.concatenate((coords1, coords2)) else: raise ValueError('Unknown moontype {}'.format(moontype)) @@ -96,15 +96,15 @@ def __init__(self, moontype='standard', dim=2, sigmag=0.05, 'vertex_size': 30, } - super(TwoMoons, self).__init__(Xin=Xin, sigma=sigmag, k=5, + super(TwoMoons, self).__init__(coords, kernel_width=kernel_width, k=5, center=False, rescale=False, plotting=plotting, **kwargs) def _get_extra_repr(self): attrs = {'moontype': self.moontype, 'dim': self.dim, - 'sigmag': '{:.2f}'.format(self.sigmag), - 'sigmad': '{:.2f}'.format(self.sigmad), + 'kernel_width': '{:.2f}'.format(self.kernel_width), + 'variance': '{:.2f}'.format(self.variance), 'distance': '{:.2f}'.format(self.distance), 'seed': self.seed} attrs.update(super(TwoMoons, self)._get_extra_repr()) diff --git a/pygsp/tests/test_graphs.py b/pygsp/tests/test_graphs.py index df23a0db..c516a8a7 100644 --- a/pygsp/tests/test_graphs.py +++ b/pygsp/tests/test_graphs.py @@ -8,7 +8,6 @@ from __future__ import division import unittest -import os import numpy as np import scipy.linalg from scipy import sparse @@ -349,96 +348,89 @@ def test_set_coordinates(self): self.assertRaises(ValueError, G.set_coordinates, 'invalid') def test_nngraph(self, n_vertices=30): - rs = np.random.RandomState(42) - Xin = rs.normal(size=(n_vertices, 3)) - dist_types = ['euclidean', 'manhattan', 'max_dist', 'minkowski'] - backends = ['scipy-kdtree', 'scipy-ckdtree', 'scipy-pdist', 'nmslib'] - if os.name != 'nt': - backends.append('flann') - order=3 # for minkowski, FLANN only accepts integer orders - - for cur_backend in backends: - for dist_type in dist_types: - #print("backend={} dist={}".format(cur_backend, dist_type)) - if (cur_backend == 'flann' and - dist_type == 'max_dist') or (cur_backend == 'nmslib' and - dist_type == 'minkowski'): - self.assertRaises(ValueError, graphs.NNGraph, Xin, - NNtype='knn', backend=cur_backend, - dist_type=dist_type) - self.assertRaises(ValueError, graphs.NNGraph, Xin, - NNtype='radius', backend=cur_backend, - dist_type=dist_type) + features = np.random.RandomState(42).normal(size=(n_vertices, 3)) + metrics = ['euclidean', 'manhattan', 'max_dist', 'minkowski'] + backends = ['scipy-kdtree', 'scipy-ckdtree', 'scipy-pdist', 'nmslib', + 'flann'] + order = 3 # for minkowski, FLANN only accepts integer orders + + for backend in backends: + for metric in metrics: + if ((backend == 'flann' and metric == 'max_dist') or + (backend == 'nmslib' and metric == 'minkowski')): + self.assertRaises(ValueError, graphs.NNGraph, features, + kind='knn', backend=backend, + metric=metric) + self.assertRaises(ValueError, graphs.NNGraph, features, + kind='radius', backend=backend, + metric=metric) else: - if cur_backend == 'nmslib': - self.assertRaises(ValueError, graphs.NNGraph, Xin, - NNtype='radius', backend=cur_backend, - dist_type=dist_type, order=order) + if backend == 'nmslib': + self.assertRaises(ValueError, graphs.NNGraph, features, + kind='radius', backend=backend, + metric=metric, order=order) else: - graphs.NNGraph(Xin, NNtype='radius', - backend=cur_backend, - dist_type=dist_type, order=order) - graphs.NNGraph(Xin, NNtype='knn', - backend=cur_backend, - dist_type=dist_type, order=order) - graphs.NNGraph(Xin, NNtype='knn', - backend=cur_backend, - dist_type=dist_type, order=order, + graphs.NNGraph(features, kind='radius', + backend=backend, + metric=metric, order=order) + graphs.NNGraph(features, kind='knn', + backend=backend, + metric=metric, order=order) + graphs.NNGraph(features, kind='knn', + backend=backend, + metric=metric, order=order, center=False) - graphs.NNGraph(Xin, NNtype='knn', - backend=cur_backend, - dist_type=dist_type, order=order, + graphs.NNGraph(features, kind='knn', + backend=backend, + metric=metric, order=order, rescale=False) - graphs.NNGraph(Xin, NNtype='knn', - backend=cur_backend, - dist_type=dist_type, order=order, + graphs.NNGraph(features, kind='knn', + backend=backend, + metric=metric, order=order, rescale=False, center=False) - self.assertRaises(ValueError, graphs.NNGraph, Xin, - NNtype='badtype', backend=cur_backend, - dist_type=dist_type) - self.assertRaises(ValueError, graphs.NNGraph, Xin, - NNtype='knn', backend='badtype', - dist_type=dist_type) + self.assertRaises(ValueError, graphs.NNGraph, features, + kind='invalid', backend=backend, + metric=metric) + self.assertRaises(ValueError, graphs.NNGraph, features, + kind='knn', backend='invalid', + metric=metric) + self.assertRaises(ValueError, graphs.NNGraph, features, + kind='knn', k=n_vertices+1) def test_nngraph_consistency(self): - Xin = np.arange(90).reshape(30, 3) - dist_types = ['euclidean', 'manhattan', 'max_dist', 'minkowski'] - backends = ['scipy-kdtree', 'scipy-ckdtree', 'nmslib'] - if os.name != 'nt': - backends.append('flann') - num_neighbors=4 - epsilon=0.1 - - # use pdist as ground truth - G = graphs.NNGraph(Xin, NNtype='knn', + features = np.arange(90).reshape(30, 3) + metrics = ['euclidean', 'manhattan', 'max_dist', 'minkowski'] + backends = ['scipy-kdtree', 'scipy-ckdtree', 'flann', 'nmslib'] + num_neighbors = 4 + radius = 0.1 + + G = graphs.NNGraph(features, kind='knn', backend='scipy-pdist', k=num_neighbors) - for cur_backend in backends: - for dist_type in dist_types: - if cur_backend == 'flann' and dist_type == 'max_dist': + for backend in backends: + for metric in metrics: + if backend == 'flann' and metric == 'max_dist': continue - if cur_backend == 'nmslib' and dist_type == 'minkowski': + if backend == 'nmslib' and metric == 'minkowski': continue - #print("backend={} dist={}".format(cur_backend, dist_type)) - Gt = graphs.NNGraph(Xin, NNtype='knn', - backend=cur_backend, k=num_neighbors) + Gt = graphs.NNGraph(features, kind='knn', + backend=backend, k=num_neighbors) d = sparse.linalg.norm(G.W - Gt.W) - self.assertTrue(d < 0.01, 'Graphs (knn {}/{}) are not identical error={}'.format(cur_backend, dist_type, d)) - - G = graphs.NNGraph(Xin, NNtype='radius', - backend='scipy-pdist', epsilon=epsilon) - for cur_backend in backends: - for dist_type in dist_types: - if cur_backend == 'flann' and dist_type == 'max_dist': + self.assertTrue(d < 0.01, 'Graphs (knn {}/{}) are not identical error={}'.format(backend, metric, d)) + + G = graphs.NNGraph(features, kind='radius', + backend='scipy-pdist', radius=radius) + for backend in backends: + for metric in metrics: + if backend == 'flann' and metric == 'max_dist': continue - if cur_backend == 'nmslib': #unsupported + if backend == 'nmslib': continue - #print("backend={} dist={}".format(cur_backend, dist_type)) - Gt = graphs.NNGraph(Xin, NNtype='radius', - backend=cur_backend, epsilon=epsilon) + Gt = graphs.NNGraph(features, kind='radius', + backend=backend, radius=radius) d = sparse.linalg.norm(G.W - Gt.W, ord=1) - self.assertTrue(d < 0.01, - 'Graphs (radius {}/{}) are not identical error={}'.format(cur_backend, dist_type, d)) - + self.assertTrue(d < 0.01, + 'Graphs (radius {}/{}) are not identical error={}'.format(backend, metric, d)) + def test_bunny(self): graphs.Bunny() diff --git a/pygsp/tests/test_plotting.py b/pygsp/tests/test_plotting.py index 0aa29d44..a28ae0c3 100644 --- a/pygsp/tests/test_plotting.py +++ b/pygsp/tests/test_plotting.py @@ -50,8 +50,8 @@ def test_plot_graphs(self): # Classes who require parameters. if classname == 'NNGraph': - Xin = np.arange(90).reshape(30, 3) - Gs.append(Graph(Xin)) + features = np.random.RandomState(42).normal(size=(30, 3)) + Gs.append(Graph(features)) elif classname in ['ImgPatches', 'Grid2dImgPatches']: Gs.append(Graph(img=self._img, patch_shape=(3, 3))) else: diff --git a/setup.py b/setup.py index b49ade95..6bdd0b57 100644 --- a/setup.py +++ b/setup.py @@ -39,7 +39,6 @@ 'scikit-image', # Approximate nearest neighbors for kNN graphs. 'cyflann', - 'pybind11', 'nmslib', # Convex optimization on graph. 'pyunlocbox', @@ -69,7 +68,7 @@ # Dependencies to build and upload packages. 'pkg': [ 'wheel', - 'twine' + 'twine', ], }, license="BSD",