tree_parse.py

# -*- coding: utf-8 -*-
"""
Divide the airway tree into branches

"""

import numpy as np
import os
import json
from scipy import ndimage
import skimage.measure as measure
from skimage.morphology import skeletonize_3d
from typing import Tuple, Any, Dict
import SimpleITK as sitk
from atm_evaluation import branch_detected_calculation, tree_length_calculation
import glob
from multiprocessing import Queue, Process, cpu_count, Manager


def find_bb_3D(label):
    if len(label.shape) != 3:
        print("The dimension of input is not 3!")
        os._exit()
    sum_x = np.sum(label, axis = (1,2))
    sum_y = np.sum(label, axis = (0,2))
    sum_z = np.sum(label, axis = (0,1))
    xf = np.where(sum_x)
    xf = xf[0]
    yf = np.where(sum_y)
    yf = yf[0]
    zf = np.where(sum_z)
    zf = zf[0]
    x_length = xf.max() - xf.min() + 1
    y_length = yf.max() - yf.min() + 1
    z_length = zf.max() - zf.min() + 1
    x1 = xf.min()
    y1 = yf.min()
    z1 = zf.min()
    
    cs = [x_length+8, y_length+8, z_length+8]
    for j in range(3):
        if cs[j]>label.shape[j]:
            cs[j] = label.shape[j]
            
    cs = np.array(cs, dtype=np.uint16)
    size = label.shape
    xl = x1 - (cs[0]-x_length)//2
    yl = y1 - (cs[1]-y_length)//2
    zl = z1 - (cs[2] - z_length)//2
    xr = xl + cs[0]
    yr = yl + cs[1]
    zr = zl + cs[2]
    if xl < 0:
        xl = 0
        xr = cs[0]
    if xr > size[0]:
        xr = size[0]
        xl = xr - cs[0]
    if yl < 0:
        yl = 0
        yr = cs[1]
    if yr > size[1]:
        yr = size[1]
        yl = yr - cs[1]
    if zl < 0:
        zl = 0
        zr = cs[2]
    if zr > size[2]:
        zr = size[2]
        zl = zr - cs[2]
    return xl, xr, yl, yr, zl, zr

def large_connected_domain(label):
    cd, num = measure.label(label, return_num = True, connectivity=1)
    volume = np.zeros([num])
    for k in range(num):
        volume[k] = ((cd==(k+1)).astype(np.uint8)).sum()
    volume_sort = np.argsort(volume)
    label = (cd==(volume_sort[-1]+1)).astype(np.uint8)
    label = ndimage.binary_fill_holes(label)
    label = label.astype(np.uint8)
    return label

def skeleton_parsing(skeleton):
    #separate the skeleton
    neighbor_filter = ndimage.generate_binary_structure(3, 3)
    skeleton_filtered = ndimage.convolve(skeleton, neighbor_filter) * skeleton
    #distribution = skeleton_filtered[skeleton_filtered>0]
    #plt.hist(distribution)
    skeleton_parse = skeleton.copy()
    skeleton_parse[skeleton_filtered>3] = 0
    con_filter = ndimage.generate_binary_structure(3, 3)
    cd, num = ndimage.label(skeleton_parse, structure = con_filter)
    #remove small branches
    for i in range(num):
        a = cd[cd==(i+1)]
        if a.shape[0]<5:
            skeleton_parse[cd==(i+1)] = 0
    cd, num = ndimage.label(skeleton_parse, structure = con_filter)
    return skeleton_parse, cd, num

def tree_parsing_func(skeleton_parse, label, cd):
    #parse the airway tree
    edt, inds = ndimage.distance_transform_edt(1-skeleton_parse, return_indices=True)
    tree_parsing = np.zeros(label.shape, dtype = np.uint16)
    tree_parsing = cd[inds[0,...], inds[1,...], inds[2,...]] * label
    return tree_parsing

def loc_trachea(tree_parsing, num):
    #find the trachea
    volume = np.zeros([num])
    for k in range(num):
        volume[k] = ((tree_parsing==(k+1)).astype(np.uint8)).sum()
    volume_sort = np.argsort(volume)
    trachea = (volume_sort[-1]+1)
    return trachea

def adjacent_map(tree_parsing, num):
    #build the adjacency matric
    ad_matric = np.zeros((num, num), dtype=np.uint8)
    #i = 1
    for i in range(num):
        cd_cur = (tree_parsing==(i+1)).astype(np.uint8)
        xl, xr, yl, yr, zl, zr = find_bb_3D(cd_cur)
        cd_cur = cd_cur[xl:xr, yl:yr, zl:zr]
        #edt = ndimage.distance_transform_edt(1-cd_cur, return_indices=False)
        dilation_filter = ndimage.generate_binary_structure(3, 1)
        boundary = ndimage.binary_dilation(cd_cur, structure=dilation_filter).astype(cd_cur.dtype) - cd_cur
        adjacency = boundary*tree_parsing[xl:xr, yl:yr, zl:zr]
        adjacency_elements = np.unique(adjacency[adjacency>0])
        for j in range(len(adjacency_elements)):
            ad_matric[i,adjacency_elements[j]-1] = 1
    return ad_matric

def parent_children_map(ad_matric, trachea, num):
    #build the parent map and children map
    parent_map = np.zeros((num, num), dtype=np.uint8)
    children_map = np.zeros((num, num), dtype=np.uint8)
    generation = np.zeros((num), dtype=np.uint8)
    processed = np.zeros((num), dtype=np.uint8)
    
    processing = [trachea-1]
    parent_map[trachea-1, trachea-1] = 1
    while len(processing)>0:
        iteration = processing
        processed[processing] = 1
        processing = []
        while len(iteration)>0:
            cur = iteration.pop()
            children = np.where(ad_matric[cur,:]>0)[0]
            for i in range(len(children)):
                cur_child = children[i]
                if parent_map[cur_child,:].sum()==0:
                    parent_map[cur_child, cur] = 1
                    children_map[cur, cur_child] = 1
                    generation[cur_child] = generation[cur] + 1
                    processing.append(cur_child)
                else:
                    if generation[cur]+1 == generation[cur_child]:
                        parent_map[cur_child, cur] = 1
                        children_map[cur, cur_child] = 1
    return parent_map, children_map, generation

def tree_refinement(parent_map, children_map, tree_parsing, num, trachea):
    witem = np.sum(parent_map, axis=1)
    witems = np.where(witem>1)[0]
    if len(witems)>0:
        for i in range(len(witems)):
            pass
       
    child_num = np.sum(children_map, axis=1)
    problem1_loc = np.where(child_num==1)[0]
    
    #First, fuse the parents of one child
    delete_ids = []
    if len(witems)>0:
        for i in range(len(witems)):
            cur_witem = np.where(parent_map[witems[i],:]>0)[0]
            for j in range(1, len(cur_witem)):
                tree_parsing[tree_parsing==(cur_witem[j]+1)] = cur_witem[0]+1
                if cur_witem[j] not in delete_ids:
                    delete_ids.append(cur_witem[j])
    
    #second, delete the only child
    for i in range(len(problem1_loc)):
        cur_loc = problem1_loc[i]
        if cur_loc not in delete_ids:
            cur_child = np.where(children_map[cur_loc,:]==1)[0][0]
            if cur_child not in delete_ids:
                tree_parsing[tree_parsing==(cur_child+1)] = cur_loc+1
                delete_ids.append(cur_child)
                
# =============================================================================
#     #Third, delete the wrong trachea blocks
#     Tchildren = np.where(children_map[trachea-1,:]>0)[0]
#     z_trachea = np.mean(np.where(cd==(trachea))[0])
#     for i in range(len(Tchildren)):
#         z_child = np.mean(np.where(cd==(Tchildren[i]+1))[0])
#         if z_child > z_trachea:
#             if Tchildren[i] not in delete_ids:
#                 tree_parsing[tree_parsing==(Tchildren[i]+1)] = trachea
#                 delete_ids.append(Tchildren[i])
# =============================================================================
                
    #delete the problematic blocks from the tree
    for i in range(num):
        if i not in delete_ids:
            move = len(np.where(np.array(delete_ids)<i)[0])
            tree_parsing[tree_parsing==(i+1)] = i+1-move
    num = num - len(delete_ids) 
    
    return tree_parsing, num

def whether_refinement(parent_map, children_map, tree_parsing, num, trachea):
    witem = np.sum(parent_map, axis=1)
    witems = np.where(witem>1)[0]
    child_num = np.sum(children_map, axis=1)
    problem1_loc = np.where(child_num==1)[0]
    
    #First, fuse the parents of one child
    delete_ids = []
    if len(witems)>0:
        for i in range(len(witems)):
            cur_witem = np.where(parent_map[witems[i],:]>0)[0]
            for j in range(1, len(cur_witem)):
                tree_parsing[tree_parsing==(cur_witem[j]+1)] = cur_witem[0]+1
                if cur_witem[j] not in delete_ids:
                    delete_ids.append(cur_witem[j])
    
    #second, delete the alone child
    for i in range(len(problem1_loc)):
        cur_loc = problem1_loc[i]
        if cur_loc not in delete_ids:
            cur_child = np.where(children_map[cur_loc,:]==1)[0][0]
            if cur_child not in delete_ids:
                tree_parsing[tree_parsing==(cur_child+1)] = cur_loc+1
                delete_ids.append(cur_child)
                
# =============================================================================
#     #Third, delete the wrong trachea blocks
#     Tchildren = np.where(children_map[trachea-1,:]>0)[0]
#     z_trachea = np.mean(np.where(cd==(trachea))[0])
#     for i in range(len(Tchildren)):
#         z_child = np.mean(np.where(cd==(Tchildren[i]+1))[0])
#         if z_child > z_trachea:
#             if Tchildren[i] not in delete_ids:
#                 tree_parsing[tree_parsing==(Tchildren[i]+1)] = trachea
#                 delete_ids.append(Tchildren[i])
# =============================================================================
                
    if len(delete_ids) == 0:
        return False
    else:
        return True


def tree_parse(label: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
    label = (label > 0).astype(np.uint8)
    
    label = large_connected_domain(label)
    skeleton = skeletonize_3d(label)
    skeleton_parse, cd, num = skeleton_parsing(skeleton)
    tree_parsing = tree_parsing_func(skeleton_parse, label, cd)
    trachea = loc_trachea(tree_parsing, num)
    ad_matric = adjacent_map(tree_parsing, num)
    parent_map, children_map, generation = parent_children_map(ad_matric, trachea, num)
    
    while whether_refinement(parent_map, children_map, tree_parsing, num, trachea) is True:
        tree_parsing, num = tree_refinement(parent_map, children_map, tree_parsing, num, trachea)
        trachea = loc_trachea(tree_parsing, num)
        ad_matric = adjacent_map(tree_parsing, num)
        parent_map, children_map, generation = parent_children_map(ad_matric, trachea, num)
    
    return tree_parsing, skeleton


def compute_bd_td(pred: np.ndarray, label: np.ndarray) -> Tuple[float, float]:
    tree_parsing, skeleton =  tree_parse(label)

    total_branch_num, detected_branch_num, detected_branch_ratio = branch_detected_calculation(pred, tree_parsing, skeleton)
    tree_length = tree_length_calculation(pred, skeleton)
    
    return detected_branch_ratio, tree_length


def worker_compute_bd_td(queue: Queue, 
                         output_dict: Dict[str, Dict[str, Any]]):
    while True:
        msg = queue.get()
        if msg == "quit":
            return 
        else:
            pred_file, label_file = msg
            print(f"Processing: {pred_file}/{label_file}")
            pred = sitk.GetArrayFromImage(sitk.ReadImage(pred_file)).astype(np.uint8)
            label = sitk.GetArrayFromImage(sitk.ReadImage(label_file)).astype(np.uint8)

            bd, td = compute_bd_td(pred, label)
            results = {label_file: {"pred_file": pred_file,
                                    "bd": bd,
                                    "td": td}}
            print(f"Results: {results}")

            output_dict.update(results)


def multiprocess_compute_bd_td_file_list(pred_folder:str, 
                                         label_folder: str, 
                                         nworkers: int = cpu_count()):
    '''
    Assumes files in pred_folder and label_Folder have the same name 
    '''
    queue: Queue = Queue()
    manager = Manager()
    output_dict: Dict[str, Dict[str, Any]] = manager.dict()
    ps = [Process(target=worker_compute_bd_td, args=(queue, output_dict)) for _ in range(nworkers)]
    for p in ps:
        p.start()
    
    label_files = glob.glob(os.path.join(label_folder, "*.nii.gz"))
    for label_file in label_files:
        candidate_pred_file = glob.glob(os.path.join(pred_folder, os.path.basename(label_file)))
        assert len(candidate_pred_file) == 1, f"Found multiple candidate prediction files {candidate_pred_file} to {label_file}"
        pred_file = candidate_pred_file[0]
        queue.put((pred_file, label_file))

    for _ in range(nworkers):
        queue.put("quit")
    for p in ps:
        p.join()

    pred_IDs = []
    IDs = []
    bds = []
    tds = []
    for label_file, metrics in output_dict.items():
        IDs.append(os.path.basename(label_file))
        pred_IDs.append(os.path.basename(metrics["pred_file"]))
        bds.append(metrics["bd"])
        tds.append(metrics["td"])
    td_mean = np.array(tds).mean()
    td_std = np.array(tds).std()
    bd_mean = np.array(bds).mean()
    bd_std = np.array(bds).std()
    output_data = {"IDs": IDs,
                   "pred_IDs": pred_IDs,
                   "BDS": bds,
                   "TDS": tds,
                   "BD": {"mean": bd_mean, 
                          "std": bd_std},
                   "TD": {"mean": td_mean,
                          "std": td_std}}
    for k, v in output_data.items():
        print(f"{k}: {v}")

    with open(os.path.join(pred_folder, "tree_parse_metrics.json"), 'w') as output_file:
        json.dump(output_data, output_file)
    

if __name__=='__main__':
    from argparse import ArgumentParser
    parser = ArgumentParser()
    parser.add_argument("--pred", type=str, default=None)
    parser.add_argument("--label", type=str, default=None)
    parser.add_argument("--pred_folder", type=str, default=None)
    parser.add_argument("--label_folder", type=str, default=None)
    parser.add_argument("--nworkers", type=int, default=cpu_count())
    args = parser.parse_args()
    if args.pred is not None and args.label is not None:
        pred = sitk.GetArrayFromImage(sitk.ReadImage(args.pred)).astype(np.uint8)
        label = sitk.GetArrayFromImage(sitk.ReadImage(args.label)).astype(np.uint8)
        bd, td = compute_bd_td(pred, label)
        print(args)
        print(f"BD: {bd}")
        print(f"TD: {td}")
    elif args.pred_folder is not None and args.label_folder is not None:
        multiprocess_compute_bd_td_file_list(args.pred_folder, args.label_folder, args.nworkers)