Skip to content

util

Vegetation

Bases: IntEnum

Classes of vegetation represented by each number

Source code in src/tcd_pipeline/util.py
128
129
130
131
132
133
134
135
class Vegetation(IntEnum):
    """
    Classes of vegetation represented by each number
    """

    CANOPY = 0
    TREE = 1
    CANOPY_SP = 2  # sp = superpixel

convert_to_projected(path, output_path=None, temp_name=None, inplace=False, resample=False, target_gsd_m=0.1, dst_crs='EPSG:3395', use_vrt=True)

Convert an input image to projected coordinates and optionally resample

Parameters:

Name Type Description Default
path str

Path to image (typically a GeoTiff)

required
output_path str

Path to the new stored image

None
temp_name str

Optional temporary filename when processing. Defaults to None.

None
inplace bool

Process input file in place - will overwrite your image! Defaults to False.

False
resample bool

Resample the input image. Defaults to False.

False
target_gsd_m float

Target ground sample distance in metres. Defaults to 0.1.

0.1
use_vrt bool

Use WarpedVRT instead of GDAL directly, should masking but may be slower

True
Source code in src/tcd_pipeline/util.py
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
def convert_to_projected(
    path: str,
    output_path: str = None,
    temp_name: Optional[str] = None,
    inplace: Optional[bool] = False,
    resample: Optional[bool] = False,
    target_gsd_m: float = 0.1,
    dst_crs: str = "EPSG:3395",
    use_vrt: bool = True,
) -> None:
    """Convert an input image to projected coordinates and optionally resample

    Args:
        path (str): Path to image (typically a GeoTiff)
        output_path (str, optional): Path to the new stored image
        temp_name (str, optional): Optional temporary filename when processing. Defaults to None.
        inplace (bool, optional): Process input file in place - will overwrite your image! Defaults to False.
        resample (bool, optional): Resample the input image. Defaults to False.
        target_gsd_m (float): Target ground sample distance in metres. Defaults to 0.1.
        use_vrt (bool): Use WarpedVRT instead of GDAL directly, should masking but may be slower

    """

    if use_vrt:
        with rasterio.Env(GDAL_NUM_THREADS="ALL_CPUS", GDAL_TIFF_INTERNAL_MASK=True):
            with rasterio.open(path) as src:
                with WarpedVRT(
                    src,
                    crs=dst_crs,
                    resampling=Resampling.bilinear,
                    warp_mem_limit=256,
                    warp_extras={"NUM_THREADS": "ALL_CPUS"},
                ) as vrt:
                    scale, dst_transform = scale_transform(vrt, target_gsd_m)

                    height = int(round(vrt.height * scale))
                    width = int(round(vrt.width * scale))

                    dst_window = from_bounds(*src.bounds, src.transform)
                    data = vrt.read(
                        window=dst_window, out_shape=(src.count, width, height)
                    )

                profile = src.profile.copy()
                profile.update(
                    {
                        "count": 3,
                        "crs": dst_crs,
                        "transform": dst_transform,
                        "width": width,
                        "height": height,
                        "nodata": 0,
                        "dtype": "uint8",
                        "driver": "GTiff",
                        "compress": "JPEG",
                        "tiled": True,
                        "blockxsize": 512,
                        "blockysize": 512,
                    }
                )

                if output_path is None:
                    base, ext = os.path.splitext(path)
                    output_path = base + f"_proj_{int(target_gsd_m*100)}" + ext

                with rasterio.open(output_path, "w", **profile) as dst:
                    dst.write(data[:3])

                if inplace:
                    shutil.move(output_path, path)

        return

    with rasterio.open(path) as img:
        working_dir = os.path.dirname(path)
        filename, ext = os.path.splitext(os.path.basename(path))

        if temp_name is None:
            temp_name = "_" + filename

        temporary_vrt = os.path.join(working_dir, f"{temp_name}_m.vrt")

        # Convert to a VRT for speed
        logger.info(f"Converting {temp_name} to projected CRS")
        pargs = [
            "gdalwarp",
            "-multi",
            "-wo",
            "NUM_THREADS=ALL_CPUS",
            "-co",
            "TILED=YES",
            "-co",
            "BLOCKXSIZE=512",
            "-co",
            "BLOCKYSIZE=512",
            "-co",
            "COMPRESS=NONE",
            "-co",
            "SRC_METHOD=NO_GEOTRANSFORM",
            "-r",
            "lanczos",
            "-t_srs",
            dst_crs,
            "-ot",
            "Byte",
            "-overwrite",
            "-of",
            "vrt",
            f"{path}",
            temporary_vrt,
        ]

        try:
            subprocess.check_output(pargs)
        except subprocess.CalledProcessError as error:
            logger.error(error.output)

        temporary_tif = os.path.join(working_dir, f"{temp_name}_m.tif")

        # Then compress
        logger.info(f"Compressing {temp_name}")
        pargs = [
            "gdal_translate",
            "-co",
            "NUM_THREADS=ALL_CPUS",
            "-co",
            "BIGTIFF=IF_SAFER",
            "-co",
            f"COMPRESS=JPEG",
            "-co",
            "TILED=YES",
            "-co",
            "BLOCKXSIZE=512",
            "-co",
            "BLOCKYSIZE=512",
            "-r",
            "lanzcos",
        ]

        if img.count != 1:
            pargs.extend(["-co", "PHOTOMETRIC=YCBCR"])

        if img.count > 3:
            pargs.extend(["-b", "1", "-b", "2", "-b", "3", "-mask", "4"])

        pargs.append(temporary_vrt)
        pargs.append(temporary_tif)

        try:
            subprocess.check_output(pargs)
        except subprocess.CalledProcessError as error:
            logger.error(error.output)

        if inplace:
            shutil.move(temporary_tif, path)
            os.remove(temporary_vrt)
            new_path = path
        else:
            new_path = os.path.join(working_dir, f"{filename}_proj{ext}")
            shutil.move(temporary_tif, new_path)
            os.remove(temporary_vrt)

    if resample:
        args = [
            "gdalwarp",
            "-multi",
            "-wo",
            "NUM_THREADS=ALL_CPUS",
            "-co",
            "BIGTIFF=IF_SAFER",
            "-co",
            f"COMPRESS=JPEG",
            "-co",
            "TILED=YES",
            "-co",
            "BLOCKXSIZE=512",
            "-co",
            "BLOCKYSIZE=512",
            "-r",
            "lanczos",
            "-t_srs",
            dst_crs,
            "-tr",
            f"{target_gsd_m}",
            f"{target_gsd_m}",
            "-overwrite",
        ]

        if inplace:
            args.append("-overwrite")

        base, ext = os.path.splitext(new_path)

        if output_path is None:
            output_path = base + f"_{int(target_gsd_m*100)}" + ext

        args.extend([new_path, output_path])

        logger.info("Running {}".format(" ".join(args)))
        res = subprocess.check_output(args)

        if inplace:
            shutil.move(output_path, path)

image_to_tensor(image)

Converts the input into a float tensor in CHW order. If you pass a Tensor in, no transpose operation will be performed.

Source code in src/tcd_pipeline/util.py
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
def image_to_tensor(image: Union[str, torch.Tensor, DatasetReader]) -> torch.Tensor:
    """
    Converts the input into a float tensor in CHW order. If you pass a Tensor in,
    no transpose operation will be performed.
    """
    # Load image if needed
    if isinstance(image, str):
        image = np.array(Image.open(image))
    elif isinstance(image, DatasetReader):
        image = image.read()
    elif not isinstance(image, np.ndarray) and not isinstance(image, torch.Tensor):
        logger.error("Provided image of type %s which is not supported.", type(image))
        raise NotImplementedError

    # Format conversion
    if isinstance(image, torch.Tensor):
        image_tensor = image.float()
    elif isinstance(image, np.ndarray):
        image_tensor = torch.from_numpy(image.transpose((2, 0, 1))).float()

    return image_tensor

mask_to_polygon(mask, tolerance=1)

Converts the mask of an object to a MultiPolygon

Parameters:

Name Type Description Default
mask np.array(bool

Boolean mask of the segmented object

required

Returns:

Name Type Description
MultiPolygon MultiPolygon

Shapely MultiPolygon describing the object

Source code in src/tcd_pipeline/util.py
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
def mask_to_polygon(
    mask: npt.NDArray[np.bool_], tolerance=1
) -> shapely.geometry.MultiPolygon:
    """Converts the mask of an object to a MultiPolygon

    Args:
        mask (np.array(bool)): Boolean mask of the segmented object

    Returns:
        MultiPolygon: Shapely MultiPolygon describing the object
    """

    all_polygons = []
    for shape, _ in features.shapes(mask.astype(np.int16), mask=mask):
        all_polygons.append(shapely.geometry.shape(shape))

    all_polygons = shapely.geometry.MultiPolygon(all_polygons)
    if not all_polygons.is_valid:
        all_polygons = all_polygons.buffer(0)
        # Sometimes buffer() converts a simple Multipolygon to just a Polygon,
        # need to keep it a Multi throughout
        if all_polygons.type == "Polygon":
            all_polygons = shapely.geometry.MultiPolygon([all_polygons])

    if tolerance > 0:
        return all_polygons.simplify(tolerance)
    else:
        return all_polygons

paste_array(dst, src, offset, merge='max')

Paste src array into dst array at specified offset, handling negative offsets and ensuring src does not extend beyond the bounds of dst.

Parameters:

Name Type Description Default
dst ndarray

Destination array where src is to be pasted.

required
src ndarray

Source array to be pasted into dst.

required
offset tuple

(xmin, ymin) offset at which to paste src into dst.

required

Returns:

Type Description
NDArray

np.ndarray: dst array with src pasted into it.

Source code in src/tcd_pipeline/util.py
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
def paste_array(
    dst: npt.NDArray, src: npt.NDArray, offset: tuple, merge="max"
) -> npt.NDArray:
    """
    Paste src array into dst array at specified offset, handling negative offsets
    and ensuring src does not extend beyond the bounds of dst.

    Args:
        dst (np.ndarray): Destination array where src is to be pasted.
        src (np.ndarray): Source array to be pasted into dst.
        offset (tuple): (xmin, ymin) offset at which to paste src into dst.

    Returns:
        np.ndarray: dst array with src pasted into it.
    """
    xmin, ymin = offset

    src_height, src_width = src.shape
    dst_height, dst_width = dst.shape

    src = src[max(0, -ymin) :, max(0, -xmin) :]

    x_overlap = dst_width - (xmin + src_width)
    if x_overlap < 0:
        src = src[:, :x_overlap]

    y_overlap = dst_height - (ymin + src_height)
    if y_overlap < 0:
        src = src[:y_overlap, :]

    crop_height, crop_width = src.shape

    dst_xmin = max(0, xmin)
    dst_ymin = max(0, ymin)
    dst_xmax = dst_xmin + crop_width
    dst_ymax = dst_ymin + crop_height

    if merge == "max":
        dst[dst_ymin:dst_ymax, dst_xmin:dst_xmax] = np.maximum(
            src, dst[dst_ymin:dst_ymax, dst_xmin:dst_xmax]
        )
    elif merge == "min":
        dst[dst_ymin:dst_ymax, dst_xmin:dst_xmax] = np.minimum(
            src, dst[dst_ymin:dst_ymax, dst_xmin:dst_xmax]
        )
    elif merge == "mean":
        dst[dst_ymin:dst_ymax, dst_xmin:dst_xmax] = (
            src + dst[dst_ymin:dst_ymax, dst_xmin:dst_xmax]
        ) / 2
    else:
        raise NotImplementedError("Currently array merging supports min/max/mean")

    return dst

polygon_to_mask(polygon, shape)

Rasterise a polygon to a mask

Parameters:

Name Type Description Default
polygon Polygon

Shapely Polygon describing the object

required

Returns: np.array(np.bool_): Boolean mask of the segmented object

Source code in src/tcd_pipeline/util.py
112
113
114
115
116
117
118
119
120
121
122
123
124
125
def polygon_to_mask(
    polygon: shapely.geometry.Polygon, shape: tuple[int, int]
) -> npt.NDArray:
    """Rasterise a polygon to a mask

    Args:
        polygon: Shapely Polygon describing the object
    Returns:
        np.array(np.bool_): Boolean mask of the segmented object
    """

    shape = (int(shape[0]), int(shape[1]))

    return features.rasterize([polygon], out_shape=shape)

resample_image(input_path, output_path, target_gsd_m=0.1)

Resample an image to a target GSD in metres

Parameters:

Name Type Description Default
input_path str

Path to input image

required
output_path str

Path to output image

required
target_gsd_m float

Target GSD in metres. Defaults to 0.1.

0.1
Source code in src/tcd_pipeline/util.py
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
def resample_image(input_path: str, output_path: str, target_gsd_m: float = 0.1):
    """Resample an image to a target GSD in metres

    Args:
        input_path (str): Path to input image
        output_path (str): Path to output image
        target_gsd_m (float, optional): Target GSD in metres. Defaults to 0.1.

    """

    with rasterio.Env(GDAL_NUM_THREADS="ALL_CPUS", GDAL_TIFF_INTERNAL_MASK=True) as env:
        with rasterio.open(input_path) as src:
            scale, dst_transform = scale_transform(src, target_gsd_m)

            height = int(round(src.height * scale))
            width = int(round(src.width * scale))

            assert scale < 1

            data = src.read(
                out_shape=(src.count, height, width), resampling=Resampling.bilinear
            )

            kwargs = src.meta.copy()
            kwargs.update(
                {
                    "transform": dst_transform,
                    "width": data.shape[-1],
                    "height": data.shape[-2],
                    "nodata": 0,
                }
            )

            with rasterio.open(output_path, "w", **kwargs) as dst:
                dst.write(data)

scale_transform(src, target_gsd_m)

Get the scale factor and scaled transform given a target resolution.

Input dataset must use a metric CRS.

Args:

src (rasterio.DatasetReader): dataset to scale
target_gsd_m (float): desired GSD

Returns:

scale (float): scale factor
transform (Affine): scaled transform
Source code in src/tcd_pipeline/util.py
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
def scale_transform(src: rasterio.DatasetReader, target_gsd_m: float):
    """Get the scale factor and scaled transform given a target resolution.

    Input dataset must use a metric CRS.

    Args:

        src (rasterio.DatasetReader): dataset to scale
        target_gsd_m (float): desired GSD

    Returns:

        scale (float): scale factor
        transform (Affine): scaled transform
    """
    t = src.transform

    assert np.allclose(*src.res)

    scale = src.res[0] / target_gsd_m
    transform = Affine(t.a / scale, t.b, t.c, t.d, t.e / scale, t.f)

    return scale, transform