-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathgenerate_geojson.py
executable file
·125 lines (108 loc) · 5.71 KB
/
generate_geojson.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import sys
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import datetime
import pandas as pd
import numpy as np
import diplib as dip
import geojson
data_folder = os.environ.get('PIPEX_DATA')
included_markers = []
cluster_id = ""
cluster_color = ""
#Function to handle the command line parameters passed
def options(argv):
if len(argv) == 0:
print('generate_geojson.py arguments:\n\t-data=<optional /path/to/images/folder, defaults to /home/pipex/data> : example -> -data=/lab/projectX/images\n\t-included_markers=<optional, list of present specific markers to include> : example -> -included_markers=AMY2A,SST,GORASP2\n\t-cluster_id=<optional, name of the column to add as cluster id information from cell_data.csv> : example -> -cluster_id=kmeans\n\t-cluster_color=<optional, name of the column to add as cluster information color from cell_data.csv> : example -> -cluster_color=kmeans_color', flush=True)
sys.exit()
else:
for arg in argv:
if arg.startswith('-help'):
print('generate_geojson.py arguments:\n\t-data=<optional /path/to/images/folder, defaults to /home/pipex/data> : example -> -data=/lab/projectX/images\n\t-included_markers=<optional, list of present specific markers to include> : example -> -included_markers=AMY2A,SST,GORASP2\n\t-cluster_id=<optional, name of the column to add as cluster id information from cell_data.csv> : example -> -cluster_id=kmeans\n\t-cluster_color=<optional, name of the column to add as cluster information color from cell_data.csv> : example -> -cluster_color=kmeans_color', flush=True)
sys.exit()
elif arg.startswith('-data='):
global data_folder
data_folder = arg[6:]
elif arg.startswith('-included_markers='):
global included_markers
included_markers = arg[18:].split(",")
elif arg.startswith('-cluster_id='):
global cluster_id
cluster_id = arg[12:]
elif arg.startswith('-cluster_color='):
global cluster_color
cluster_color = arg[15:]
if __name__ =='__main__':
options(sys.argv[1:])
pidfile_filename = './RUNNING'
if "PIPEX_WORK" in os.environ:
pidfile_filename = './work/RUNNING'
with open(pidfile_filename, 'w', encoding='utf-8') as f:
f.write(str(os.getpid()))
f.close()
with open(os.path.join(data_folder, 'log_settings_geojson.txt'), 'w+', encoding='utf-8') as f:
f.write(">>> Start time geojson = " + datetime.datetime.now().strftime(" %H:%M:%S_%d/%m/%Y") + "\n")
f.write(' '.join(sys.argv))
f.close()
print(">>> Start time generate_geojson =", datetime.datetime.now().strftime("%d/%m/%Y %H:%M:%S"), flush=True)
#Load segmentation data in numpy array format
labels = np.load(os.path.join(data_folder, 'analysis', 'segmentation_data.npy'), allow_pickle=True)
df = pd.read_csv(os.path.join(data_folder, 'analysis', 'cell_data.csv'))
markers = []
#Getting the list of marker names
markers = list(df.columns.values)
markers = markers[(df.columns.get_loc("y") + 1):]
#saveguard if analysis.py has been executed before and cluster_id + cluster_color already exists
if 'cluster_id' in markers:
markers = markers[:-(len(df.columns) - df.columns.get_loc("cluster_id"))]
elif 'leiden' in markers:
markers = markers[:-(len(df.columns) - df.columns.get_loc("leiden"))]
elif 'kmeans' in markers:
markers = markers[:-(len(df.columns) - df.columns.get_loc("kmeans"))]
# If a specific list of markers is informed, we use it
if len(included_markers) > 0:
markers = included_markers
#calculate segmentation polygons via fast chaincodes from diplib
chaincodes = dip.GetImageChainCodes(labels.astype('uint32'))
borders = {}
for chaincode in chaincodes:
borders[chaincode.objectID] = np.array(chaincode.Polygon()).tolist()
print(">>> Labelled regions to approximate polygons conversion finished =", datetime.datetime.now().strftime("%d/%m/%Y %H:%M:%S"), flush=True)
#generating geojson data to import in qupath
GEOdata = []
for label in borders:
if label not in df['cell_id'].values:
continue
cell_row = (df['cell_id'] == label)
final_coords = borders[label]
final_coords.append(final_coords[0])
cell_data = {}
cell_data["id"] = "cell" + str(label)
cell_data["type"] = "Feature"
cell_data["geometry"] = {
"type" : "Polygon",
"coordinates" : [final_coords],
}
cell_data["properties"] = {
"name" : "cell" + str(label),
"object_type" : "detection",
"isLocked" : "false",
"collectionIndex": 0,
}
cell_data["properties"]["measurements"] = []
for marker in markers:
cell_data["properties"]["measurements"].append({
"name" : marker,
"value" : str(df[cell_row][marker].values[0])
})
#if cluster_id parameter is selected, add cluster_id and cluster_color
if cluster_id != '' and len(df[cell_row][cluster_id]) > 0:
cell_data["properties"]["classification"] = {
"name": str(df[cell_row][cluster_id].values[0]),
"colorRGB": str(df[cell_row][cluster_color].values[0] if cluster_color != '' else '')
}
GEOdata.append(cell_data)
#dump GEOdata variable to json file
with open(os.path.join(data_folder, 'analysis', 'cell_segmentation_geo.json'), 'w') as outfile:
geojson.dump(GEOdata, outfile)
print(">>> End time generate_geojson =", datetime.datetime.now().strftime("%d/%m/%Y %H:%M:%S"), flush=True)