add new object detection model based on AutoAdapt POC paper

MI4People · Feb 8, 2024 · ac21d20 · ac21d20
1 parent 8d1af37
commit ac21d20
Show file tree

Hide file tree

Showing 15 changed files with 40,686 additions and 0 deletions.
diff --git a/object_detection_2024/Extract_individual_test_zones.ipynb b/object_detection_2024/Extract_individual_test_zones.ipynb
diff --git a/object_detection_2024/Kit_and_membrane_segmentation.ipynb b/object_detection_2024/Kit_and_membrane_segmentation.ipynb
diff --git a/object_detection_2024/Segmentation_Training.ipynb b/object_detection_2024/Segmentation_Training.ipynb
diff --git a/object_detection_2024/config_segmentation.yaml b/object_detection_2024/config_segmentation.yaml
@@ -0,0 +1,34 @@
+DataSettings:
+    data_dir: 'data_segmentation'
+    output_dir: 'output'
+    split_ratio: 0.80
+    background_id: 0
+    background_color: [255, 0, 0]
+    classes: ['kit', 'membrane']
+    class_ids: [1, 2]
+    class_colors: [[0, 0, 255], [0, 255, 0]]
+    resize_height: 800
+TransformationParameters:
+    rotate_limit: 80
+    rotate_p: 0.8
+    horizontal_flip_p: 0.5
+    blur_limit: 5
+    blur_p: 0.8
+    color_jitter_brightness: 0.1
+    color_jitter_contrast: 0.1
+    color_jitter_saturation: 0.1
+    color_jitter_p: 0.5
+TrainingParameters:
+    save_path: 'saved_models'
+    train_validation_ratio: 0.8
+    num_workers: 0
+    batch_size: 4
+    seed: 42
+    num_epochs: 10
+    num_classes: 3
+    hidden_size: 256
+    learning_rate: 5e-5
+    score_thresholds: [0.85, 0.85]
+    mask_thresholds: [0.85, 0.85]
+
+
diff --git a/object_detection_2024/dataset_segmentation.py b/object_detection_2024/dataset_segmentation.py
@@ -0,0 +1,79 @@
+"""
+File containg the main Dataset class for image segmentation. 
+"""
+
+import os
+import numpy as np
+import torch
+import cv2
+from torchvision.transforms import functional as F
+
+# Custom packages
+from utils_segmentation.utils_dataset import load_valid_filepaths, build_target_from_mask
+from transformations_segmentation import resize_image
+
+class LFASegmentationDataset:
+    def __init__(self, config, kit_id, dataset, filenames=None, transforms=None):
+
+        # Configuration file and relevant features
+        self.config = config
+        self.data_dir = self.config['data_dir']
+        self.resize_h = self.config['resize_height']
+        self.kit_id = kit_id
+        self.dataset = dataset
+        assert self.dataset in ['train', 'test'], "dataset must be 'train' or 'test!"
+
+        # Transformations
+        self.transforms = transforms
+
+        # If filenames not specified, load all filenames in folder
+        if filenames is None:
+            images_path = os.path.join(self.data_dir, f'{kit_id}_{dataset}_images')
+            self.filenames = sorted([path.replace('.jpg', '') for path in os.listdir(images_path)])
+        else:
+            self.filenames = filenames
+
+        # Load image, and mask full filepaths
+        self.image_paths, self.mask_paths = load_valid_filepaths(self.kit_id, self.dataset, self.filenames)
+
+    def __len__(self):
+
+        return len(self.filenames)
+
+    def __getitem__(self, idx):
+
+        # Get corresponding image and mask path
+        image_path = self.image_paths[idx]
+        mask_path = self.mask_paths[idx]
+
+        # Read image and mask as NumPy arrays
+        image = cv2.imread(image_path)
+        mask = cv2.imread(mask_path)
+
+        # Resize image (excluded from transformations because it is mandatory for efficiency)
+        image = resize_image(image, self.resize_h)
+        mask = resize_image(mask, self.resize_h)
+
+        # Check that image and masks have the same dimensions    
+        assert image.shape[:2] == mask.shape[:2], "Image and Masks have different dimensions!"
+
+        # Apply transforms if applicable
+        if self.transforms is not None:
+            image, mask = self.transforms(image, mask)
+
+        # Build category mask, bounding boxes, and labels from RGB mask
+        masks_cat, boxes, labels =  build_target_from_mask(mask)
+
+        # Convert everything to a torch.Tensor
+        image_t = F.to_tensor(image)  # Also scales to [0, 1] range and brings channel to first position!
+        masks_t = torch.as_tensor(masks_cat, dtype=torch.uint8)
+        boxes_t = torch.as_tensor(boxes, dtype=torch.float32)
+        labels_t = torch.as_tensor(labels, dtype=torch.int64)
+
+        # Build target with ground-truths and image information
+        target = {'masks': masks_t, 
+                  'boxes': boxes_t,
+                  'labels': labels_t}
+
+        return image_t, target
+
diff --git a/object_detection_2024/inference_segmentation.py b/object_detection_2024/inference_segmentation.py
@@ -0,0 +1,264 @@
+import os
+import numpy as np
+import pandas as pd
+import torch
+from torch.utils.data import DataLoader
+
+# Custom packages
+from utils_segmentation.utils import collate_fn, compute_iou_mask, compute_iou_box
+from utils_segmentation.visualization import show_images
+from dataset_segmentation import LFASegmentationDataset
+
+@torch.no_grad()
+def run_inference_loader(loader, model, config_file, device=None):
+    """
+    Make predictions for all batches in loader.
+
+    Returns:
+        predictions_list: list of dictionaries of the form {masks, boxes} for each image in loader
+        metrics_list: list of dictionaries of the form {scores, iou_masks, iou_boxes} for each image in loader
+    """
+
+    # Align input device with model's device
+    if device is None:
+        device = next(model.parameters()).device
+    else:
+        model.to(device)
+
+    # List of dictionaries for predictions and metrics for the whole dataset in loader.
+    predictions_list = []
+    metrics_list = []
+
+    for images, targets in loader:
+
+        # Send images and targets to same device as model
+        images = list(img.to(device) for img in images)
+        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
+
+        # Inference step over single batch
+        predictions_list_batch, metrics_list_batch = run_inference_batch(images, targets, model, config_file)
+
+        # Update full output
+        predictions_list += predictions_list_batch
+        metrics_list += metrics_list_batch
+
+    return predictions_list, metrics_list
+
+
+@torch.no_grad()
+def run_inference_batch(images, targets, model, config_file):
+    """
+    Make predictions for each batch of images, and select the masks, and boxes with the best scores for each class 
+    and image. The predicted quantities are then compared with the targets to extract the mask and box IoU 
+    for each class.
+
+    Returns:
+        predictions_list: list of dictionaries of the form {masks, boxes} for each image
+        metrics_list: list of dictionaries of the form {scores, iou_masks, iou_boxes} for each image.
+    """
+
+    # Parameters from configuration file
+    classes = config_file['DataSettings']['classes']
+    n_classes = len(classes)
+    class_ids = config_file['DataSettings']['class_ids']
+    mask_thresholds = config_file['TrainingParameters']['mask_thresholds']
+
+    # List of dictionaries for predictions and metrics for the whole batch.
+    predictions_list = []
+    metrics_list = []
+
+    # Inference step
+    predictions = model(images)
+
+    for target, pred in zip(targets, predictions):
+
+        # Get labels and scores, which are aligned
+        labels = pred['labels'].tolist()
+        scores = pred['scores'].tolist()
+        assert len(labels) == len(scores)
+
+        # Get boxes and masks, which are also naturally aligned
+        boxes = pred['boxes']
+        masks = pred['masks']
+        assert boxes.shape[0] == masks.shape[0] == len(labels)
+
+        # Keep track of best-score mask and boxes predictions for each image
+        pred_dict = {'masks': torch.zeros_like(target['masks']),
+                     'boxes': torch.zeros_like(target['boxes'])}
+
+        # Keep track of best scores, and average IoUs for each image
+        metrics_dict = {'scores': np.zeros(n_classes),
+                        'iou_masks': np.zeros(n_classes),
+                        'iou_boxes': np.zeros(n_classes)}
+
+        # Loop over classes (i.e. kit and membrane)
+        for i, cls in enumerate(classes):
+            if class_ids[i] in labels:  # Check whether there is at least one prediction for that class
+
+                # Get the maximum confidence class location (i.e. first occurrence in list)
+                class_loc = labels.index(class_ids[i])
+
+                # Get best class score
+                class_score = scores[class_loc]
+
+                # Get best class boxes and masks
+                class_box = boxes[class_loc]
+                class_mask = masks[class_loc, 0]
+
+                # Binarize masks
+                class_mask = (class_mask >= mask_thresholds[i]).to(torch.uint8)
+
+                # Compute IoU
+                class_iou_mask = compute_iou_mask(class_mask, target['masks'][i])
+                class_iou_box = compute_iou_box(class_box, target['boxes'][i])
+
+                # Update dictionaries
+                pred_dict['masks'][i] = class_mask
+                pred_dict['boxes'][i] = class_box
+
+                metrics_dict['scores'][i] = class_score
+                metrics_dict['iou_masks'][i] = class_iou_mask
+                metrics_dict['iou_boxes'][i] = class_iou_box
+
+            else:  # If there is no prediction, we leave all zeros in the dictionary
+                print(f'{cls} is missing from the prediction!')
+
+        # Send mask and boxes to cpu
+        pred_dict['masks'] = pred_dict['masks'].to('cpu')
+        pred_dict['boxes'] = pred_dict['boxes'].to('cpu')
+
+        # Update lists
+        predictions_list.append(pred_dict)
+        metrics_list.append(metrics_dict)
+
+    return predictions_list, metrics_list
+
+
+def get_metrics(predictions, image_names):
+    """
+    Return metrics in the form of a Panda's dataframe with image_names as indices.
+    """
+
+    metrics_dict = {}
+
+    for key in ['scores', 'iou_masks', 'iou_boxes']:
+        for i, cls in enumerate(['kit', 'membrane']):
+            metrics_dict[f'{key}_{cls}'] = [pred[key][i] for pred in predictions]
+
+    metrics_df = pd.DataFrame.from_dict(metrics_dict)
+    metrics_df.index = pd.Index(image_names, name='image_names')
+
+    return metrics_df
+
+
+def predict_testset(kit_id, config_file, model, save_filename=None, show_bool=True):
+    """
+    Run inference on all images in the kit_id test set and save all scores and IoU in a csv file.
+
+    Returns:
+        images_test: list of images in test set, as coming out from the Dataset class (i.e. after transformations)
+        predictions_test: list of dictionaries containing the best-score masks, boxes, scores and IoUs
+        metrics_test: list of dictionaries of the form {scores, iou_masks, iou_boxes} for each image.
+        metrics_df: Pandas dataframe containing the scores and IoU for each image
+    """
+
+    data_settings = config_file['DataSettings']
+    training_parameters = config_file['TrainingParameters']
+
+    model.eval()
+
+    # Dataset, dataloader and all images
+    dataset_test = LFASegmentationDataset(data_settings, kit_id, dataset='test', transforms=None)
+
+    loader_test = DataLoader(dataset=dataset_test,
+                            batch_size=training_parameters['batch_size'],
+                            shuffle=False,
+                            num_workers=training_parameters['num_workers'],
+                            collate_fn=collate_fn,
+                            pin_memory=True)
+
+    images_test = [img.to('cpu') for img, _ in dataset_test]
+
+    # Run inference on all test data. Extract predictions and metrics
+    predictions_test, metrics_test = run_inference_loader(loader_test, model, config_file)
+
+    # Show all test images and their predictions
+    if show_bool:
+        show_images(images_test, predictions_test, metrics_test)
+
+    # Format metrics as Dataframe
+    metrics_df = get_metrics(metrics_test, image_names=dataset_test.filenames)
+
+    # Save metrics to csv file
+    if save_filename is not None:
+        metrics_df.to_csv(os.path.join(data_settings['output_dir'], save_filename))
+
+    return images_test, predictions_test, metrics_test, metrics_df
+
+
+@torch.no_grad()
+def run_inference(image, model, config_file):
+    """
+    Make prediction for a single raw image. No pre-procesing or ground truth required.
+
+    Returns:
+        best_masks:
+        best_boxes:
+        best_scores:
+    """
+
+    # Parameters from configuration file
+    classes = config_file['DataSettings']['classes']
+    n_classes = len(classes)
+    class_ids = config_file['DataSettings']['class_ids']
+    mask_thresholds = config_file['TrainingParameters']['mask_thresholds']
+
+    # List of dictionaries for predictions and metrics for the whole batch.
+    predictions_list = []
+    metrics_list = []
+
+    # Inference step (add input's batch dimension)
+    prediction = model(image.unsqueeze(0))[0]
+
+    # Get labels and scores, which are aligned
+    labels = prediction['labels'].cpu().numpy().tolist()
+    scores = prediction['scores'].cpu().numpy().tolist()
+    assert len(labels) == len(scores)
+
+    # Get boxes and masks, which are also naturally aligned
+    boxes = prediction['boxes'].cpu().numpy()
+    masks = prediction['masks'].cpu().numpy()
+    assert boxes.shape[0] == masks.shape[0] == len(labels)
+
+    # Store best-score, and corresponding mask and box prediction
+    best_masks = np.zeros([n_classes, masks.shape[2], masks.shape[3]], dtype='uint8')
+    best_boxes = np.zeros([n_classes, 4])
+    best_scores = np.zeros(n_classes)
+
+    # Loop over classes (i.e. kit and membrane)
+    for i, cls in enumerate(classes):
+        if class_ids[i] in labels:  # Check whether there is at least one prediction for that class
+
+            # Get the maximum confidence class location (i.e. first occurrence in list)
+            class_loc = labels.index(class_ids[i])
+
+            # Get best class score
+            class_score = scores[class_loc]
+
+            # Get best class boxes and masks
+            class_box = boxes[class_loc]
+            class_mask = masks[class_loc, 0]
+
+            # Binarize masks
+            class_mask = (class_mask >= mask_thresholds[i]).astype(np.uint8)
+
+            # Store 
+            best_masks[i] = class_mask
+            best_boxes[i] = class_box
+            best_scores[i] = class_score
+
+        else:  # If there is no prediction, we leave all zeros in the dictionary
+            print(f'{cls} is missing from the prediction!')
+
+
+    return best_masks, best_boxes, best_scores