-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathparis.py
121 lines (110 loc) · 3.57 KB
/
paris.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
# -*- coding: utf-8 -*-
#
# Copyright (C) 2018 by
# Thomas Bonald <[email protected]>
# Bertrand Charpentier <[email protected]>
# All rights reserved.
# BSD license.
import numpy as np
import networkx as nx
def paris(G, copy_graph = True):
n = G.number_of_nodes()
if copy_graph:
F = G.copy()
else:
F = G
# index nodes from 0 to n - 1
if set(F.nodes()) != set(range(n)):
F = nx.convert_node_labels_to_integers(F)
# node weights
w = {u: 0 for u in range(n)}
wtot = 0
for (u,v) in F.edges():
if 'weight' not in F[u][v]:
F[u][v]['weight'] = 1
weight = F[u][v]['weight']
w[u] += weight
w[v] += weight
wtot += weight
if u != v:
wtot += weight
# cluster sizes
s = {u: 1 for u in range(n)}
# connected components
cc = []
# dendrogram as list of merges
D = []
# cluster index
u = n
while n > 0:
# nearest-neighbor chain
chain = [list(F.nodes())[0]]
while chain != []:
a = chain.pop()
# nearest neighbor
dmin = float("inf")
b = -1
for v in F.neighbors(a):
if v != a:
d = w[v] * w[a] / float(F[a][v]['weight']) / float(wtot)
if d < dmin:
b = v
dmin = d
elif d == dmin:
b = min(b,v)
d = dmin
if chain != []:
c = chain.pop()
if b == c:
# merge a,b
D.append([a,b,d,s[a] + s[b]])
# update graph
F.add_node(u)
neighbors_a = list(F.neighbors(a))
neighbors_b = list(F.neighbors(b))
for v in neighbors_a:
F.add_edge(u,v,weight = F[a][v]['weight'])
for v in neighbors_b:
if F.has_edge(u,v):
F[u][v]['weight'] += F[b][v]['weight']
else:
F.add_edge(u,v,weight = F[b][v]['weight'])
F.remove_node(a)
F.remove_node(b)
n -= 1
# update weight and size
w[u] = w.pop(a) + w.pop(b)
s[u] = s.pop(a) + s.pop(b)
# change cluster index
u += 1
else:
chain.append(c)
chain.append(a)
chain.append(b)
elif b >= 0:
chain.append(a)
chain.append(b)
else:
# remove the connected component
cc.append((a,s[a]))
F.remove_node(a)
w.pop(a)
s.pop(a)
n -= 1
# add connected components to the dendrogram
a,s = cc.pop()
for b,t in cc:
s += t
D.append([a,b,float("inf"),s])
a = u
u += 1
return reorder_dendrogram(np.array(D))
def reorder_dendrogram(D):
n = np.shape(D)[0] + 1
order = np.zeros((2,n - 1),float)
order[0] = range(n - 1)
order[1] = np.array(D)[:,2]
index = np.lexsort(order)
nindex = {i:i for i in range(n)}
nindex.update({n + index[t]:n + t for t in range(n - 1)})
return np.array([[nindex[int(D[t][0])],nindex[int(D[t][1])],D[t][2],D[t][3]] for t in range(n - 1)])[index,:]