Merge PR #381 | Add tests for and fix nuclei-assisted post-processing

kreshuklab · Jan 22, 2025 · 220cc99 · 220cc99
2 parents fcadb5a + 9e93736
commit 220cc99
Show file tree

Hide file tree

Showing 8 changed files with 300 additions and 114 deletions.
diff --git a/plantseg/functionals/dataprocessing/advanced_dataprocessing.py b/plantseg/functionals/dataprocessing/advanced_dataprocessing.py
@@ -256,46 +256,53 @@ def fix_over_segmentation(
 def fix_over_under_segmentation_from_nuclei(
     cell_seg: np.ndarray,
     nuclei_seg: np.ndarray,
-    threshold_merge: float = 0.33,
-    threshold_split: float = 0.66,
-    quantiles_nuclei: tuple[float, float] = (0.3, 0.99),
+    threshold_merge: float,
+    threshold_split: float,
+    quantile_min: float,
+    quantile_max: float,
     boundary: np.ndarray | None = None,
 ) -> np.ndarray:
     """
-    Corrects over-segmentation and under-segmentation of cells based on a trusted nuclei segmentation.
+    Correct over-segmentation and under-segmentation of cells based on nuclei information.
 
     This function uses information from nuclei segmentation to refine cell segmentation by first identifying
     over-segmented cells (cells mistakenly split into multiple segments) and merging them. It then corrects
     under-segmented cells (multiple nuclei within a single cell) by splitting them based on nuclei position
     and optional boundary information.
 
     Args:
-        cell_seg (np.ndarray): A 2D or 3D array representing segmented cell instances.
-        nuclei_seg (np.ndarray): A 2D or 3D array representing segmented nuclei instances, with the same shape as `cell_seg`.
-        threshold_merge (float, optional): Threshold for identifying over-segmentation, based on the ratio of nuclei overlap.
-            Cells with overlap below this threshold will be merged. Default is 0.33.
-        threshold_split (float, optional): Threshold for identifying under-segmentation, based on the ratio of nuclei overlap.
-            Cells with overlap above this threshold will be split. Default is 0.66.
-        quantiles_nuclei (tuple[float, float], optional): Quantile range for filtering nuclei based on size, helping to ignore
-            outliers such as very small or very large nuclei. Default is (0.3, 0.99).
-        boundary (np.ndarray | None, optional): An optional boundary probability map for the cells. If None, a constant map
-            is used to treat all regions equally. This can help refine under-segmentation correction.
+        cell_seg (np.ndarray): A 2D or 3D array of segmented cells, where each integer represents a unique cell.
+        nuclei_seg (np.ndarray): A 2D or 3D array of segmented nuclei, matching the shape of `cell_seg`.
+            Used to guide merging and splitting.
+        threshold_merge (float, optional): A value between 0 and 1. Cells with less than this fraction of nuclei overlap
+            are considered over-segmented and will be merged. Default is 0.33.
+        threshold_split (float, optional): A value between 0 and 1. Cells with more than this fraction of nuclei overlap
+            are considered under-segmented and will be split. Default is 0.66.
+        quantile_min (float, optional): The lower size limit for nuclei, as a fraction (0-1). Nuclei smaller than this
+            quantile are ignored. Default is 0.3.
+        quantile_max (float, optional): The upper size limit for nuclei, as a fraction (0-1). Nuclei larger than this
+            quantile are ignored. Default is 0.99.
+        boundary (np.ndarray | None, optional): Optional boundary map of the same shape as `cell_seg`. High values
+            indicate cell boundaries and help refine splitting. If None, all regions are treated equally.
 
     Returns:
-        np.ndarray: The corrected cell segmentation array, of the same shape as the input `cell_seg`.
+        np.ndarray: Corrected cell segmentation array.
     """
+    # Find overlaps between cells and nuclei
     cell_counts, nuclei_counts, cell_nuclei_counts = numba_find_overlaps(cell_seg, nuclei_seg)
-    nuclei_assignments = find_potential_over_seg(nuclei_counts, cell_nuclei_counts, threshold=threshold_merge)
 
+    # Identify over-segmentation and correct it
+    nuclei_assignments = find_potential_over_seg(nuclei_counts, cell_nuclei_counts, threshold=threshold_merge)
     corrected_seg = fix_over_segmentation(cell_seg, nuclei_assignments)
 
+    # Identify under-segmentation and correct it
     cell_counts, nuclei_counts, cell_nuclei_counts = numba_find_overlaps(corrected_seg, nuclei_seg)
     cell_assignments = find_potential_under_seg(
         nuclei_counts,
         cell_counts,
         cell_nuclei_counts,
         threshold=threshold_split,
-        quantiles_clip=quantiles_nuclei,
+        quantiles_clip=(quantile_min, quantile_max),
     )
 
     boundary_pmap = np.ones_like(cell_seg) if boundary is None else boundary

diff --git a/plantseg/tasks/dataprocessing_tasks.py b/plantseg/tasks/dataprocessing_tasks.py
@@ -232,42 +232,37 @@ def remove_false_positives_by_foreground_probability_task(
 def fix_over_under_segmentation_from_nuclei_task(
     cell_seg: PlantSegImage,
     nuclei_seg: PlantSegImage,
-    threshold_merge: float = 0.33,
-    threshold_split: float = 0.66,
-    quantiles_nuclei: tuple[float, float] = (0.3, 0.99),
+    threshold_merge: float,
+    threshold_split: float,
+    quantile_min: float,
+    quantile_max: float,
     boundary: PlantSegImage | None = None,
 ) -> PlantSegImage:
     """
-    Task function to fix over- and under-segmentation in cell segmentation based on nuclear segmentation.
-
-    This function is used to run the over- and under-segmentation correction within a task management system.
-    It uses the segmentation arrays and nuclear information to merge and split cell regions. This task ensures
-    that the provided `cell_seg` and `nuclei_seg` have matching shapes and processes the data accordingly.
+    Task to fix over- and under-segmentation of cells based on nuclear segmentation.
 
     Args:
-        cell_seg (PlantSegImage): Input cell segmentation as a `PlantSegImage` object.
-        nuclei_seg (PlantSegImage): Input nuclear segmentation as a `PlantSegImage` object.
-        threshold_merge (float, optional): Threshold for merging cells based on the overlap with nuclei. Default is 0.33.
-        threshold_split (float, optional): Threshold for splitting cells based on the overlap with nuclei. Default is 0.66.
-        quantiles_nuclei (tuple[float, float], optional): Quantiles used to filter nuclei by size. Default is (0.3, 0.99).
-        boundary (PlantSegImage | None, optional): Optional boundary probability map. If not provided, a constant map is used.
+        cell_seg (PlantSegImage): Input cell segmentation as a PlantSegImage object.
+        nuclei_seg (PlantSegImage): Input nuclear segmentation as a PlantSegImage object.
+        threshold_merge (float): Threshold for merging cells, as a fraction (0-1).
+        threshold_split (float): Threshold for splitting cells, as a fraction (0-1).
+        quantile_min (float): Minimum quantile for filtering nuclei sizes, as a fraction (0-1).
+        quantile_max (float): Maximum quantile for filtering nuclei sizes, as a fraction (0-1).
+        boundary (PlantSegImage | None, optional): Optional boundary probability map for segmentation refinement.
 
     Returns:
-        PlantSegImage: A new `PlantSegImage` object containing the corrected cell segmentation.
+        PlantSegImage: Corrected cell segmentation as a PlantSegImage object.
     """
-    if cell_seg.shape != nuclei_seg.shape:
-        raise ValueError("Cell and nuclei segmentation must have the same shape.")
-
-    out_data = fix_over_under_segmentation_from_nuclei(
+    corrected_data = fix_over_under_segmentation_from_nuclei(
         cell_seg.get_data(),
         nuclei_seg.get_data(),
         threshold_merge=threshold_merge,
         threshold_split=threshold_split,
-        quantiles_nuclei=quantiles_nuclei,
+        quantile_min=quantile_min,
+        quantile_max=quantile_max,
         boundary=boundary.get_data() if boundary else None,
     )
-    new_image = cell_seg.derive_new(out_data, name=f"{cell_seg.name}_nuc_fixed")
-    return new_image
+    return cell_seg.derive_new(corrected_data, name=f"{cell_seg.name}_nuc_fixed")
 
 
 @task_tracker

diff --git a/plantseg/viewer_napari/widgets/dataprocessing.py b/plantseg/viewer_napari/widgets/dataprocessing.py
@@ -483,16 +483,19 @@ def widget_remove_false_positives_by_foreground(
     segmentation_nuclei={'label': 'Nuclear instances'},
     boundary_pmaps={'label': 'Boundary image'},
     threshold={
-        'label': 'Boundary threshold',
-        'tooltip': 'Threshold range for merging (first value) and splitting (second value) cells. ',
+        'label': 'Boundary Threshold (%)',
+        'tooltip': 'Set the percentage range for merging (first value) and splitting (second value) cells. '
+        'For example, "33" means cells with less than 33% overlap with nuclei are merged, and '
+        '"66" means cells with more than 66% overlap are split.',
         'widget_type': 'FloatRangeSlider',
         'max': 100,
         'min': 0,
         'step': 0.1,
     },
     quantile={
-        'label': 'Nuclei size filter',
-        'tooltip': 'Quantile range to filter nuclei size, ignoring outliers.',
+        'label': 'Nuclei Size Filter (%)',
+        'tooltip': 'Set the size range to filter nuclei, represented as percentages. '
+        'For example, "0.3" excludes the smallest 30%, and "99.9" excludes the largest 0.1% of nuclei.',
         'widget_type': 'FloatRangeSlider',
         'max': 100,
         'min': 0,
@@ -507,31 +510,37 @@ def widget_fix_over_under_segmentation_from_nuclei(
     quantile=(0.3, 99.9),
 ) -> None:
     """
-    Widget interface for correcting over- and under-segmentation of cells based on nuclei segmentation.
+    Widget for correcting over- and under-segmentation of cells based on nuclei segmentation.
 
-    This GUI interface allows the user to specify the input cell and nuclear segmentations, along with optional boundary
-    probability maps. The user can control the merging and splitting thresholds, and define quantiles to filter out
-    irregular nuclei. The widget schedules the correction task in the background and updates the displayed results accordingly.
+    This widget allows users to adjust cell segmentation by leveraging nuclei segmentation. It supports
+    merging over-segmented cells and splitting under-segmented cells, with optional boundary refinement.
 
     Args:
-        cell_segmentation (Labels): Input label layer for cell segmentation.
-        nuclei_segmentation (Labels): Input label layer for nuclei segmentation.
-        boundary_pmaps (Image | None, optional): Optional boundary probability map or image to assist in segmentation refinement.
-        threshold (tuple[float, float], optional): Threshold range for merging (first value) and splitting (second value) cells.
-            The values should be between 0 and 100, corresponding to 0%-100% overlap. Default is (33, 66).
-        quantile (tuple[float, float], optional): Quantile range to filter nuclei size, ignoring outliers.
-            Values should be between 0 and 100. Default is (0.3, 99.9).
+        segmentation_cells (Labels): Input layer representing segmented cell instances.
+        segmentation_nuclei (Labels): Input layer representing segmented nuclei instances.
+        boundary_pmaps (Image | None, optional): Optional boundary probability map (same shape as input layers).
+            Higher values indicate probable cell boundaries, used to refine segmentation.
+        threshold (tuple[float, float], optional): Merge and split thresholds as percentages (0-100).
+            - The first value is the merge threshold: cells with nuclei overlap below this value are merged.
+            - The second value is the split threshold: cells with nuclei overlap above this value are split.
+            Default is (33, 66).
+        quantile (tuple[float, float], optional): Minimum and maximum quantile values for filtering nuclei sizes (0-100).
+            - The first value excludes the smallest nuclei (e.g., "0.3" excludes the smallest 0.3%).
+            - The second value excludes the largest nuclei (e.g., "99.9" excludes the largest 0.1%).
+            Default is (0.3, 99.9).
 
     Returns:
-        Future[LayerDataTuple]: A future object that contains the corrected segmentation layer once the task completes.
+        None
     """
     ps_seg_cel = PlantSegImage.from_napari_layer(segmentation_cells)
     ps_seg_nuc = PlantSegImage.from_napari_layer(segmentation_nuclei)
-    if boundary_pmaps:
-        ps_pmap_cell_boundary = PlantSegImage.from_napari_layer(boundary_pmaps)
-    else:
-        ps_pmap_cell_boundary = None
-    threshold_merge, threshold_split = threshold[0] / 100, threshold[1] / 100
+    ps_pmap_cell_boundary = PlantSegImage.from_napari_layer(boundary_pmaps) if boundary_pmaps else None
+
+    # Normalize percentages to fractions
+    threshold_merge = threshold[0] / 100
+    threshold_split = threshold[1] / 100
+    quantile_min = quantile[0] / 100
+    quantile_max = quantile[1] / 100
 
     return schedule_task(
         fix_over_under_segmentation_from_nuclei_task,
@@ -540,7 +549,8 @@ def widget_fix_over_under_segmentation_from_nuclei(
             'nuclei_seg': ps_seg_nuc,
             'threshold_merge': threshold_merge,
             'threshold_split': threshold_split,
-            'quantiles_nuclei': quantile,
+            'quantile_min': quantile_min,
+            'quantile_max': quantile_max,
             'boundary': ps_pmap_cell_boundary,
         },
         widgets_to_update=[],

diff --git a/plantseg/viewer_napari/widgets/utils.py b/plantseg/viewer_napari/widgets/utils.py
@@ -66,9 +66,6 @@ def schedule_task(task: Callable, task_kwargs: dict, widgets_to_update: list[Wid
             and return a PlantSegImage or a tuple/list of PlantSegImage, or None.
         task_kwargs (dict): Keyword arguments for the function.
         widgets_to_update (list[Widget] | None, optional): Widgets to be updated with the result. Defaults to None.
-
-    Returns:
-        Future: A Future object representing the asynchronous execution of the task.
     """
 
     if hasattr(task, '__plantseg_task__'):

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -106,3 +106,42 @@ def prediction_config(tmpdir):
     # Add the temporary path to the config
     config["path"] = str(tmp_path)  # Ensure the path is a string
     return config
+
+
+@pytest.fixture
+def complex_test_data():
+    """
+    Generates a complex 3D dataset with both under-segmented and over-segmented cells.
+
+    Returns:
+        tuple[np.ndarray, np.ndarray, np.ndarray]: cell segmentation, nuclei segmentation, and boundary probability map.
+    """
+    # Create a 3D grid of zeros
+    cell_seg = np.zeros((10, 10, 10), dtype=np.uint16)
+    nuclei_seg = np.zeros_like(cell_seg, dtype=np.uint16)
+
+    # Define cells with under-segmentation (multiple nuclei in one cell)
+    # Cell 1: covers (2, 2, 2) to (5, 5, 5), contains two nuclei
+    cell_seg[2:6, 2:6, 2:6] = 1
+    nuclei_seg[2:4, 2:3, 2:3] = 1
+    nuclei_seg[4:6, 5:6, 5:6] = 2
+
+    # Define cells with over-segmentation (one nucleus split into multiple cells)
+    # Cell 2 and 3: cover (6, 6, 6) to (8, 8, 8), with one nucleus overlapping both cells
+    cell_seg[6:8, 6:10, 6:10] = 2
+    cell_seg[8:10, 6:10, 6:10] = 3
+    nuclei_seg[7:9, 7:9, 7:9] = 3
+
+    # Define another under-segmented region with a large cell and multiple nuclei
+    # Cell 4: covers (1, 1, 6) to (3, 3, 8), contains two nuclei
+    cell_seg[1:4, 1:4, 6:9] = 4
+    nuclei_seg[1:2, 1:2, 6:7] = 4
+    nuclei_seg[3:4, 3:4, 8:9] = 5
+
+    # Generate a boundary probability map with higher values on the edges of the cells
+    boundary_pmap = np.ones_like(cell_seg, dtype=np.float32)
+    boundary_pmap[2:6, 2:6, 2:6] = 0.2
+    boundary_pmap[6:8, 6:8, 6:8] = 0.2
+    boundary_pmap[1:4, 1:4, 6:9] = 0.2
+
+    return cell_seg, nuclei_seg, boundary_pmap
diff --git a/tests/functionals/dataprocessing/test_advanced.py b/tests/functionals/dataprocessing/test_advanced.py
@@ -1,51 +1,11 @@
 import numpy as np
-import pytest
 
 from plantseg.functionals.dataprocessing.advanced_dataprocessing import (
     fix_over_under_segmentation_from_nuclei,
     remove_false_positives_by_foreground_probability,
 )
 
 
-@pytest.fixture
-def complex_test_data():
-    """
-    Generates a complex 3D dataset with both under-segmented and over-segmented cells.
-
-    Returns:
-        tuple[np.ndarray, np.ndarray, np.ndarray]: cell segmentation, nuclei segmentation, and boundary probability map.
-    """
-    # Create a 3D grid of zeros
-    cell_seg = np.zeros((10, 10, 10), dtype=np.uint16)
-    nuclei_seg = np.zeros_like(cell_seg, dtype=np.uint16)
-
-    # Define cells with under-segmentation (multiple nuclei in one cell)
-    # Cell 1: covers (2, 2, 2) to (5, 5, 5), contains two nuclei
-    cell_seg[2:6, 2:6, 2:6] = 1
-    nuclei_seg[2:4, 2:3, 2:3] = 1
-    nuclei_seg[4:6, 5:6, 5:6] = 2
-
-    # Define cells with over-segmentation (one nucleus split into multiple cells)
-    # Cell 2 and 3: cover (6, 6, 6) to (8, 8, 8), with one nucleus overlapping both cells
-    cell_seg[6:8, 6:10, 6:10] = 2
-    cell_seg[8:10, 6:10, 6:10] = 3
-    nuclei_seg[7:9, 7:9, 7:9] = 3
-
-    # Define another under-segmented region with a large cell and multiple nuclei
-    # Cell 4: covers (1, 1, 6) to (3, 3, 8), contains two nuclei
-    cell_seg[1:4, 1:4, 6:9] = 4
-    nuclei_seg[1:2, 1:2, 6:7] = 4
-    nuclei_seg[3:4, 3:4, 8:9] = 5
-
-    # Generate a boundary probability map with higher values on the edges of the cells
-    boundary_pmap = np.ones_like(cell_seg, dtype=np.float32)
-    boundary_pmap[2:6, 2:6, 2:6] = 0.2
-    boundary_pmap[6:8, 6:8, 6:8] = 0.2
-    boundary_pmap[1:4, 1:4, 6:9] = 0.2
-
-    return cell_seg, nuclei_seg, boundary_pmap
-
-
 def test_remove_false_positives_by_foreground_probability():
     seg = np.ones((10, 10, 10), dtype=np.uint16)
     seg[2:8, 2:8, 2:8] += 20
@@ -64,28 +24,38 @@ def test_remove_false_positives_by_foreground_probability():
 
 
 def test_fix_over_under_segmentation_from_nuclei(complex_test_data):
+    """
+    Test the fix_over_under_segmentation_from_nuclei function with complex input data.
+
+    Args:
+        complex_test_data (tuple): A tuple containing cell segmentation array,
+                                   nuclei segmentation array, and boundary probability map.
+
+    Tests:
+        - Verifies the initial state of the input data.
+        - Ensures under-segmented regions are split correctly.
+        - Ensures over-segmented regions are merged correctly.
+    """
     cell_seg, nuclei_seg, boundary_pmap = complex_test_data
 
     # Check that the input data is as expected
-    assert len(np.unique(cell_seg[2:6, 2:6, 2:6])) == 1
-    assert len(np.unique(cell_seg[1:4, 1:4, 6:9])) == 1
-    assert len(np.unique(cell_seg[6:10, 6:10, 6:10])) == 2
+    assert len(np.unique(cell_seg[2:6, 2:6, 2:6])) == 1, "Initial region should have 1 unique label."
+    assert len(np.unique(cell_seg[1:4, 1:4, 6:9])) == 1, "Initial region should have 1 unique label."
+    assert len(np.unique(cell_seg[6:10, 6:10, 6:10])) == 2, "Initial region should have 2 unique labels."
 
     corrected_seg = fix_over_under_segmentation_from_nuclei(
         cell_seg=cell_seg,
         nuclei_seg=nuclei_seg,
         threshold_merge=0.3,
         threshold_split=0.6,
-        quantiles_nuclei=(0.1, 0.9),
+        quantile_min=0.1,
+        quantile_max=0.9,
         boundary=boundary_pmap,
     )
 
     # Check under-segmented regions are split
-    # Check that there are two unique labels in cell_seg[2:6, 2:6, 2:6]
-    # Check that there are two unique labels in cell_seg[1:4, 1:4, 6:9]
-    assert len(np.unique(corrected_seg[2:6, 2:6, 2:6])) == 2, "Undersegmentation not split."
-    assert len(np.unique(corrected_seg[1:4, 1:4, 6:9])) == 2, "Undersegmentation not split."
+    assert len(np.unique(corrected_seg[2:6, 2:6, 2:6])) == 2, "Undersegmentation not split as expected."
+    assert len(np.unique(corrected_seg[1:4, 1:4, 6:9])) == 2, "Undersegmentation not split as expected."
 
     # Check over-segmented regions are merged
-    # Check that there are 1 unique labels in cell_seg[6:8, 6:10, 6:10]
-    assert len(np.unique(corrected_seg[6:10, 6:10, 6:10])) == 1, "Oversegmentation not merged."
+    assert len(np.unique(corrected_seg[6:10, 6:10, 6:10])) == 1, "Oversegmentation not merged as expected."