Skip to content

cocodataset

Semantic segmentation model framework, using smp models

COCOSegmentationDataset

Bases: Dataset

Image dataset for semantic segmentation tasks.

Source code in src/tcd_pipeline/data/cocodataset.py
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
class COCOSegmentationDataset(Dataset):
    """Image dataset for semantic segmentation tasks."""

    def __init__(
        self,
        data_root: str,
        annotation_path: str,
        transform: Union[Callable, Any] = None,
        tile_size: int = 2048,
        image_dirname: str = "images",
        mask_dirname: str = "masks",
        binary_labels: bool = True,
    ):
        """
        Initialise the dataset

        This dataset is designed to work with a COCO annotation file,
        and assumes that the images and masks are stored in the
        supplied image_dirname and mask_dirname folders.

        If a tile_size is provided, the dataset will return a
        random absolute crop of the desired size.

        If you provide a custom transform, ensure that it returns image
        and a mask tensors. This will also override the tile_size.

        Args:
            data_root (str): Path to the data directory
            annotation_path (str): Path to the annotation JSON file
            image_dirname (str): Path to a folder containing images in the dataset
            mask_dirname (str): Path to a folder containing image masks.
            transform (Union[Callable, Any]): Optional transforms to be applied
            tile_size (int, optional): Tile size to return, default to 2048
        """

        self.data_root = data_root
        self.image_path = os.path.join(data_root, image_dirname)
        self.mask_path = os.path.join(data_root, mask_dirname)
        self.binary_labels = binary_labels

        logger.info(f"Looking for images in {self.image_path}")
        logger.info(f"Looking for masks in {self.mask_path}")
        logger.info(f"Loading annotations from: {annotation_path}")

        # TODO: Use MS-COCO
        with open(
            annotation_path,
            "r",
            encoding="utf-8",
        ) as file:
            self.metadata = json.load(file)

        self.transform = transform
        if self.transform is None:
            self.transform = A.Compose(
                [A.RandomCrop(width=tile_size, height=tile_size), ToTensorV2()]
            )

        self.images = []
        for image in tqdm(self.metadata["images"]):
            # Check if mask exists:
            base = os.path.splitext(image["file_name"])[0]
            mask_path = os.path.join(self.mask_path, base + ".png")
            if os.path.exists(mask_path):
                self.images.append(image)
            else:
                logger.debug(f"Mask not found for {image['file_name']}")

        logger.info(
            "Found {} valid images in {}".format(len(self.images), annotation_path)
        )

    def __len__(self) -> int:
        """Return the length of the dataset."""
        return len(self.images)

    def __getitem__(self, idx: int) -> dict:
        """Returns a dataset sample

        Args:
            idx (int): Index of the sample to return

        Returns:
            dict: containing "image" and "mask" tensors
        """

        annotation = self.images[idx]

        img_name = annotation["file_name"]

        img_path = os.path.abspath(os.path.join(self.image_path, img_name))
        base = os.path.splitext(img_name)[0]

        if self.binary_labels:
            mask = np.array(
                Image.open(os.path.join(self.mask_path, base + ".png")).convert("L"),
                dtype="int",
            )
        else:
            mask = np.array(
                Image.open(os.path.join(self.mask_path, base + ".png")), dtype=int
            )

        # Albumentations handles conversion to torch tensor
        image = Image.open(img_path)

        if image.mode != "RGB" or len(image.getbands()) != 2:
            image = image.convert("RGB")

        image = np.array(image)

        transformed = self.transform(image=image, mask=mask)
        image = transformed["image"].float()

        # Hack for transformer models where the ground truth
        # shouldn't be empty.
        if torch.all(transformed["mask"] == 0):
            transformed["mask"][0, 0] = 1
        elif torch.all(transformed["mask"] == 1):
            transformed["mask"][0, 0] = 0

        mask = (transformed["mask"] > 0).long()

        return {
            "image": image,
            "mask": mask,
            "image_path": img_path,
            "image_name": img_name,
        }

__getitem__(idx)

Returns a dataset sample

Parameters:

Name Type Description Default
idx int

Index of the sample to return

required

Returns:

Name Type Description
dict dict

containing "image" and "mask" tensors

Source code in src/tcd_pipeline/data/cocodataset.py
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
def __getitem__(self, idx: int) -> dict:
    """Returns a dataset sample

    Args:
        idx (int): Index of the sample to return

    Returns:
        dict: containing "image" and "mask" tensors
    """

    annotation = self.images[idx]

    img_name = annotation["file_name"]

    img_path = os.path.abspath(os.path.join(self.image_path, img_name))
    base = os.path.splitext(img_name)[0]

    if self.binary_labels:
        mask = np.array(
            Image.open(os.path.join(self.mask_path, base + ".png")).convert("L"),
            dtype="int",
        )
    else:
        mask = np.array(
            Image.open(os.path.join(self.mask_path, base + ".png")), dtype=int
        )

    # Albumentations handles conversion to torch tensor
    image = Image.open(img_path)

    if image.mode != "RGB" or len(image.getbands()) != 2:
        image = image.convert("RGB")

    image = np.array(image)

    transformed = self.transform(image=image, mask=mask)
    image = transformed["image"].float()

    # Hack for transformer models where the ground truth
    # shouldn't be empty.
    if torch.all(transformed["mask"] == 0):
        transformed["mask"][0, 0] = 1
    elif torch.all(transformed["mask"] == 1):
        transformed["mask"][0, 0] = 0

    mask = (transformed["mask"] > 0).long()

    return {
        "image": image,
        "mask": mask,
        "image_path": img_path,
        "image_name": img_name,
    }

__init__(data_root, annotation_path, transform=None, tile_size=2048, image_dirname='images', mask_dirname='masks', binary_labels=True)

Initialise the dataset

This dataset is designed to work with a COCO annotation file, and assumes that the images and masks are stored in the supplied image_dirname and mask_dirname folders.

If a tile_size is provided, the dataset will return a random absolute crop of the desired size.

If you provide a custom transform, ensure that it returns image and a mask tensors. This will also override the tile_size.

Parameters:

Name Type Description Default
data_root str

Path to the data directory

required
annotation_path str

Path to the annotation JSON file

required
image_dirname str

Path to a folder containing images in the dataset

'images'
mask_dirname str

Path to a folder containing image masks.

'masks'
transform Union[Callable, Any]

Optional transforms to be applied

None
tile_size int

Tile size to return, default to 2048

2048
Source code in src/tcd_pipeline/data/cocodataset.py
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
def __init__(
    self,
    data_root: str,
    annotation_path: str,
    transform: Union[Callable, Any] = None,
    tile_size: int = 2048,
    image_dirname: str = "images",
    mask_dirname: str = "masks",
    binary_labels: bool = True,
):
    """
    Initialise the dataset

    This dataset is designed to work with a COCO annotation file,
    and assumes that the images and masks are stored in the
    supplied image_dirname and mask_dirname folders.

    If a tile_size is provided, the dataset will return a
    random absolute crop of the desired size.

    If you provide a custom transform, ensure that it returns image
    and a mask tensors. This will also override the tile_size.

    Args:
        data_root (str): Path to the data directory
        annotation_path (str): Path to the annotation JSON file
        image_dirname (str): Path to a folder containing images in the dataset
        mask_dirname (str): Path to a folder containing image masks.
        transform (Union[Callable, Any]): Optional transforms to be applied
        tile_size (int, optional): Tile size to return, default to 2048
    """

    self.data_root = data_root
    self.image_path = os.path.join(data_root, image_dirname)
    self.mask_path = os.path.join(data_root, mask_dirname)
    self.binary_labels = binary_labels

    logger.info(f"Looking for images in {self.image_path}")
    logger.info(f"Looking for masks in {self.mask_path}")
    logger.info(f"Loading annotations from: {annotation_path}")

    # TODO: Use MS-COCO
    with open(
        annotation_path,
        "r",
        encoding="utf-8",
    ) as file:
        self.metadata = json.load(file)

    self.transform = transform
    if self.transform is None:
        self.transform = A.Compose(
            [A.RandomCrop(width=tile_size, height=tile_size), ToTensorV2()]
        )

    self.images = []
    for image in tqdm(self.metadata["images"]):
        # Check if mask exists:
        base = os.path.splitext(image["file_name"])[0]
        mask_path = os.path.join(self.mask_path, base + ".png")
        if os.path.exists(mask_path):
            self.images.append(image)
        else:
            logger.debug(f"Mask not found for {image['file_name']}")

    logger.info(
        "Found {} valid images in {}".format(len(self.images), annotation_path)
    )

__len__()

Return the length of the dataset.

Source code in src/tcd_pipeline/data/cocodataset.py
92
93
94
def __len__(self) -> int:
    """Return the length of the dataset."""
    return len(self.images)