-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrbc_npy2Perseus.py
500 lines (466 loc) · 16.6 KB
/
rbc_npy2Perseus.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
"""
Module to convert RBC frames (npy files) to input required by Perseus software (for computing topological persistence)
created by kel 5/23/2012
New Cells: 110125, 130125, 140125, 40125, 50125
Old Cells: 100125, 120125, 50125, 90125
NOTE: There's been found to be discrepancies between sparse and dense cubical
formats
- Sparse returns persistence intervals more in line with intuition
"""
import numpy
import matplotlib.pylab as plt
import re
import os
import pickle as pkl
from scipy.spatial import distance
def write_file ( fname, output ):
"""
----DENSE FORMAT---
- Write contents of single frame to output file
e.g. new cell name on simplex:
'/data/jberwald/wyss/data/Cells_Jesse/New/frames/new_110125/new_110125-concatenated-ASCII_324.npy'
e.g. old cell name on simplex:
'/data/jberwald/wyss/data/Cells_Jesse/Old/frames/old_100125/old_100125-concatenated-ASCII_1525.npy'
Perseus input requires following format:
#Dimension of cubical grid
#Number of cubes in first dimension
#Number of cubes in second dimension
#Number of cubes in ... dimension
#Birth time of cube anchored at (0,0,...,0)
#Birth time of cube anchored at (0,0,...,1)
#Note: Output is file name
"""
#Load npy file
data = numpy.load(fname)
size = data.shape#(203,198)
dim = len ( size )
#Open file
text = open (output + ".txt", "w")
#Write number of dimensions
text . write (str(dim) + "\n")
#Write resolutions
for i in xrange(len(size)):
text . write (str(size[i]) + "\n")
#Write strictly 2D file
for i in xrange(size[0]):#203
for j in xrange(size[1]):#198
if int(data[i][j]) == 0:
text . write (str(-1) + "\n")
else:
text . write (str(int(data[i][j])) + "\n")
text . close()
def write_sparse_array( arr, output, ndim=2 ):
"""
Write an array to *sparse* Perseus format.
arr -- numpy ndarray
output -- name (full path) of output file
NOTE --
"""
with open( output, 'w' ) as fh:
fh.write( str( arr.ndim )+'\n' )
for i in xrange( arr.shape[0] ):
for j in xrange( arr.shape[1] ):
if int( arr[i,j] ) != 0:
pos = str(i) + ' ' + str(j) + ' '
fh.write (pos + str(int( arr[i,j] )) + "\n")
def write_sparse_file ( fname, output=None ):
"""
fname -- filename without extension. Extension must be .txt or .npy.
output -- name of output file. If None, '_pers' is apended to fname, yielding <fname>_pers.txt.
- Write sparse See above arguments
"""
#dimension array
numDim = 2
#Open file
if output == None:
output = fname + '_pers'
text = open (output + ".txt", "w")
#Write number of dimensions
text . write (str(numDim) + "\n")
# now open the array file
if fname.endswith( 'npy' ):
data = numpy.load(fname)
else:
data = numpy.loadtxt(fname)
size = data.shape
for i in range(0,size[0]):
for j in range(0,size[1]):
if int(data[i][j]) != 0:
pos = str(i) + ' ' + str(j) + ' '
text . write (pos + str(int(data[i][j])) + "\n")
text . close()
def write_double_file ( fname, output ):
"""
- Write contents of single frame to output file
e.g. new cell name on simplex:
'/data/jberwald/wyss/data/Cells_Jesse/New/frames/new_110125/new_110125-concatenated-ASCII_324.npy'
e.g. old cell name on simplex:
'/data/jberwald/wyss/data/Cells_Jesse/Old/frames/old_100125/old_100125-concatenated-ASCII_1525.npy'
Perseus input requires following format:
#Dimension of cubical grid
#Number of cubes in first dimension
#Number of cubes in second dimension
#Number of cubes in ... dimension
#Birth time of cube anchored at (0,0,...,0)
#Birth time of cube anchored at (0,0,...,1)
#Note: Output is file name
"""
#Load npy file
data = numpy.load(fname)
size = data.shape
dim = len ( size )
#Write to output
#Open file
text = open (output + ".txt", "w")
#Write number of dimensions
text . write (str(dim) + "\n")
#Write resolutions
for i in size:
text . write (str(i) + "\n")
#Write strictly 2D file
for i in range(0,size[0]):
for j in range(0,size[1]):
if data[i][j] == 0:
text . write (str(-1) + "\n")
else:
text . write (str(data[i][j]) + "\n")
text . close()
def write_distance_mat( data, output=None, g=0.1, stepsize=0.2,
nsteps=10, dcap=None, metric='euclidean' ):
"""
For use with
perseus distmat <path to distance matrix file> <output string>
"""
# load from file if data is not an array of points
if not hasattr( data, '__index__' ):
# .npy or .txt
try:
data = numpy.load( data )
except IOError:
data = numpy.loadtxt( data )
dist = distance.pdist( data, metric=metric )
if output:
dist = distance.squareform( dist )
space = " "
# num points x dimension
nx, ny = data.shape
if not dcap:
dcap = ny
if not output.endswith( 'txt' ):
output += '.txt'
with open( output, 'w' ) as fh:
# nx x nx distance matrix
fh.write( str( nx )+'\n' )
# initial threshold, step size, num steps, dimension cap==ny
params = [ str( g ), str( stepsize ), str( nsteps ), str( dcap ) ]
params = space.join( params )
params += '\n'
fh.write( params )
for row in dist:
r = [ str( x ) for x in row ]
r = space.join( r )
r += '\n'
fh.write( r )
return dist
else:
return distance.squareform( dist )
def plot_frame( frame ):
"""
Use pylab.imshow() to plot a frame (npy array).
Note: The boundary should be removed, thus there will not be a halo
"""
data = numpy.load(frame)
fig = plt.figure()
plt.title("RBC frame")
plt.imshow(data)
plt.colorbar()
plt.show()
def plot_block( files, m, n ):
"""
- files is directory for cell
- m starting index, n ending index
Use pylab.imshow() to plot a frame (npy array).
Note: The boundary should be removed, thus there will not be a halo
"""
#Get all frames
if os.path.isdir (files):
fdir = files + '/'
dlist = os.listdir(fdir)
frames = []
for f in dlist:
if f.endswith('npy') and not os.path.isdir(fdir+f):
frames.append(f)
else:
print 'Error - input is not a directory'
return
#Sort frames
frames . sort(key=natural_key)
stack = []
#load npy arrays
for ind in xrange(len(frames)):
if ind >= m and ind <= n:
stack.append(numpy.load(files + frames[ind]))
#Create 3d array
d = numpy.dstack(stack)
d = numpy.rollaxis(d,-1)
fig = plt.figure()
plt.title("RBC Stack")
plt.imshow(d[1])
plt.colorbar()
plt.show()
def write_All_Cells ( dir, output):
"""
Collects all cells in same directory (say '../New/frames') and writes perseus output (currently writes sparse output)
Note: dir, output need to end with '/'
Ex dir: '/data/rbc-Diff-Frames/New/frames/10-diff/'
ex output: '/home/kellys/Persistence/Perseus/Formatted_Cells/frames/New/diff-10/'
"""
cellFolders = []
cellOutput = []
dlist = os.listdir(dir)
for f in dlist:
if os.path.isdir(dir+f) and (f.startswith('new') or f.startswith('old')):
cellFolders.append(dir+f+'/')
cellOutput.append(output+f+'/')
while cellFolders: #if cellF not empty
write_Cell (cellFolders.pop(0), cellOutput.pop(0) )#call write Cell
def write_Cell ( files, output ):
"""
- Using write_file for cell's stack of frames
- output is location of desired output directory
e.g. files = '/data/jberwald/wyss/data/Cells_Jesse/Old/frames/old_120125/'
output = '/home/kellys/Dropbox/rbc_shared/'
- warning: careful use of output (probably not in Dropbox)
"""
#grab files, skip directories
if os.path.isdir (files):
fdir = files + '/'
dlist = os.listdir(fdir)
frames = []
for f in dlist:
if f.endswith('npy') and not os.path.isdir(fdir+f):
frames.append(f)
else:
print 'error'
#sort frames
frames.sort(key=natural_key)
#for all frames call write_file
for frame in frames:
write_sparse_file ( fdir+frame, output+frame.rstrip('.npy'))
def write_Comp_Cell ( compArray, output ):
"""
- WRITE COMPLIMENT CELL
- Using write_file for cell's stack of frames
- output is location of desired output directory
e.g. files = '/data/jberwald/wyss/data/Cells_Jesse/Old/frames/old_120125/'
output = '/home/kellys/Dropbox/rbc_shared/'
- warning: careful use of output (probably not in Dropbox)
"""
data = numpy.load(compArray)
#dimension array
numDim = 2
#Write to output
for ind in xrange(data.shape[0]):
#Open file
text = open (output +'_' + str(ind) + ".txt", "w")
#Write number of dimensions
text . write (str(numDim) + "\n")
#Write strictly 2D file
size = data[ind].shape
for i in range(0,size[0]):
for j in range(0,size[1]):
if int(data[ind][i][j]) != 0:
pos = str(i) + ' ' + str(j) + ' '
text . write (pos + str(int(data[ind][i][j])) + "\n")
text . close()
def write_Cell_Block ( cAbbr ):
"""
- WRITE 3D NUMPY FILE INTO PERSEUS FORMAT
- NOTE: cnames is a dictionary of paths to numpy files
fnames is a dictionary of paths to output file (perseus format)
- Using write_file for cell's stack of frames
- output is location of desired output directory
e.g. files = '/data/jberwald/wyss/data/Cells_Jesse/Old/frames/old_120125/'
output = '/home/kellys/Dropbox/rbc_shared/'
- warning: careful use of output (probably not in Dropbox)
"""
cnames = pkl.load(open('moreCellNames.pkl','r'))
fnames = pkl.load(open('moreFileNames.pkl','r'))
data = numpy.load(cnames[cAbbr])
output = fnames[cAbbr]
#dimension array
numDim = 2
#Write to output
for ind in xrange(data.shape[-1]):
#Open file
name = output.split('/')[-2] + '-concatenated-ASCII_'+str(ind)
text = open (output + name + ".txt", "w")
#Write number of dimensions
text . write (str(numDim) + "\n")
#Write strictly 2D file
size = data[:,:,ind].shape
for i in range(0,size[0]):
for j in range(0,size[1]):
if int(data[i][j][ind]) != 0:
pos = str(i) + ' ' + str(j) + ' '
text . write (pos + str(int(data[i][j][ind])) + "\n")
text . close()
def write_block_dense ( files, output, m, n ):
"""
- Write contents of 3D stack of frames to output
required by perseus
e.g. files = '/data/jberwald/wyss/data/Cells_Jesse/Old/frames/old_120125/'
- DENSE CUBICAL TOPLEX FORMAT
- m is index of frame to start from (index start at 0)
- n is index of frame to end at
"""
#grab files, skip directories
if os.path.isdir (files):
fdir = files + '/'
dlist = os.listdir(fdir)
frames = []
for f in dlist:
if f.endswith('npy') and not os.path.isdir(fdir+f):
frames.append(f)
else:
print 'Error - First argument not a directory!!'
return
#Get necessary dimensions
numDim = 3
dataDims = []
dataDims . append(n-m+1)
imgSize = numpy.load(files + frames[0]).shape
for size in imgSize:
dataDims . append (size)
#Error handling for block bounds
if n > m:
if m < 0:
print 'Starting index out of bounds'
return
if n > (len(frames)-1):
print 'End index out of bounds'
return
#Sort frames
frames.sort(key=natural_key)
#Open file ( and write dimensions )
text = open (output + ".txt", "w")
text . write (str(numDim) + "\n")
for i in xrange(len(dataDims)):
#text . write (str(dataDims[i]) + "\n") #Iterate forward
text . write (str(dataDims[-1-i]) + "\n")#Iterate backward
#Write data
for ind, frame in zip(xrange(len(frames)),frames):
if ind >= m and ind <= n: #if within block
data = numpy.load(files+frame)
for i in xrange(imgSize[0]):
for j in xrange(imgSize[1]):
if int(data[i][j]) == 0:
text . write (str(-1) + "\n")
else:
text . write (str(int(data[i][j])) + "\n")
text . close()
def write_sparse ( files, output, m ):
"""
- Write contents of frames to output
required by perseus
e.g. files = '/data/jberwald/wyss/data/Cells_Jesse/Old/frames/old_120125/'
- Currently in beta
- m is index of frame to start from (index start at 0)
- n is index of frame to end at
"""
#grab files, skip directories
if os.path.isdir (files):
fdir = files + '/'
dlist = os.listdir(fdir)
frames = []
for f in dlist:
if f.endswith('npy') and not os.path.isdir(fdir+f):
frames.append(f)
else:
print 'Error - First argument not a directory!!'
return
#dimension array
numDim = 2
nFrames = len(frames)
dim = []
#Error handling for bounds
if m < 0 or m > (nFrames-1):
print 'index out of bounds'
return
#append number of frames, Perseus doesn't use this
#dim.append( nFrames )
size = numpy.load(files + frames[0]).shape
#append sizes of frame, Perseus doesn't use this
#dim.append(size[0])
#dim.append(size[1])
#Sort frames
frames.sort(key=natural_key)
#Write to output
#Open file
text = open (output + ".txt", "w")
#Write number of dimensions
text . write (str(numDim) + "\n")
#Write strictly 2D file
data = numpy.load(files + frames[m])
for i in xrange(size[0]):
for j in xrange(size[1]):
if int(data[i][j]) != 0:
pos = str(i) + ' ' + str(j) + ' '
text . write (pos + str(int(data[i][j])) + "\n")
text . close()
def write_block_sparse ( files, output, m, n ):
"""
- Write contents of 3D stack of frames to output
required by perseus
e.g. files = '/data/jberwald/wyss/data/Cells_Jesse/Old/frames/old_120125/'
- Currently in beta
- m is index of frame to start from (index start at 0)
- n is index of frame to end at
"""
#grab files, skip directories
if os.path.isdir (files):
fdir = files + '/'
dlist = os.listdir(fdir)
frames = []
for f in dlist:
if f.endswith('npy') and not os.path.isdir(fdir+f):
frames.append(f)
else:
print 'Error - First argument not a directory!!'
return
#dimension array
numDim = 3
nFrames = len(frames)
dim = []
#Error handling for block bounds
if n > m:
if m < 0:
print 'Starting index out of bounds'
return
if n > (nFrames-1):
print 'End index out of bounds'
return
#dim.append( nFrames )
size = numpy.load(files + frames[0]).shape
#Sort frames
frames.sort(key=natural_key)
#Write to output
#Open file
text = open (output + ".txt", "w")
#Write number of dimensions
text . write (str(numDim) + "\n")
#Write strictly 2D file
for ind, frame in zip(xrange(len(frames)),frames):
if ind >= m and ind <= n: #if within block
data = numpy.load(files + frame)
for i in xrange(size[0]):
for j in xrange(size[1]):
if int(data[i][j]) != 0:
pos = str(ind) + ' ' + str(i) + ' ' + str(j) + ' '
text . write ( pos + str(int(data[i][j])) + "\n")
text . close()
def natural_key(string_):
"""
Use with frames.sort(key=natural_key)
"""
return [int(s) if s.isdigit() else s for s in re.split(r'(\d+)', string_)]