Source code for segments.utils

from __future__ import annotations

import copy
import json
import logging
import os
import random
import re
from collections import defaultdict
from io import BytesIO
from typing import TYPE_CHECKING, Any, Dict, List, Mapping, Optional, Tuple, Union, cast
from urllib.parse import urlparse

import numpy as np
import numpy.typing as npt
import requests
from PIL import ExifTags, Image
from typing_extensions import Literal

# https://adamj.eu/tech/2021/05/13/python-type-hints-how-to-fix-circular-imports/
if TYPE_CHECKING:
    from segments.dataset import SegmentsDataset
    from segments.typing import Release


#############
# Variables #
#############
session = requests.Session()
adapter = requests.adapters.HTTPAdapter(max_retries=3)
session.mount("http://", adapter)
session.mount("https://", adapter)
logger = logging.getLogger(__name__)


[docs]def bitmap2file( bitmap: npt.NDArray[np.uint32], is_segmentation_bitmap: bool = True, ) -> BytesIO: """Convert a label bitmap to a file with the proper format. Args: bitmap: A :class:`numpy.ndarray` with :class:`numpy.uint32` dtype where each unique value represents an instance id. is_segmentation_bitmap: If this is a segmentation bitmap. Defaults to :obj:`True`. Returns: A file object. Raises: :exc:`ValueError`: If the ``dtype`` is not :class:`np.uint32` or :class:`np.uint8`. :exc:`ValueError`: If the bitmap is not a segmentation bitmap. """ # Convert bitmap to np.uint32, if it is not already if bitmap.dtype == "uint32": pass elif bitmap.dtype == "uint8": bitmap = np.uint32(bitmap) else: raise ValueError("Only np.ndarrays with np.uint32 dtype can be used.") if is_segmentation_bitmap: bitmap2 = np.copy(bitmap) bitmap2 = bitmap2[:, :, None].view(np.uint8) bitmap2[:, :, 3] = 255 else: raise ValueError("Only segmentation bitmaps can be used.") f = BytesIO() Image.fromarray(bitmap2).save(f, "PNG") f.seek(0) return f
[docs]def get_semantic_bitmap( instance_bitmap: Optional[npt.NDArray[np.uint32]] = None, annotations: Optional[Dict[str, Any]] = None, id_increment: int = 0, ) -> Optional[npt.NDArray[np.uint32]]: """Convert an instance bitmap and annotations dict into a segmentation bitmap. Args: instance_bitmap: A :class:`numpy.ndarray` with :class:`numpy.uint32` ``dtype`` where each unique value represents an instance id. Defaults to :obj:`None`. annotations: An annotations dictionary. Defaults to :obj:`None`. id_increment: Increment the category ids with this number. Defaults to ``0``. Returns: An array here each unique value represents a category id. """ if instance_bitmap is None or annotations is None: return None instance2semantic = [0] * (max([a["id"] for a in annotations], default=0) + 1) for annotation in annotations: instance2semantic[annotation["id"]] = annotation["category_id"] + id_increment instance2semantic = np.array(instance2semantic) semantic_label = instance2semantic[np.array(instance_bitmap, np.uint32)] return semantic_label
[docs]def export_dataset( dataset: SegmentsDataset, export_folder: str = ".", export_format: Literal[ "coco-panoptic", "coco-instance", "yolo", "instance", "instance-color", "semantic", "semantic-color", "polygon", ] = "coco-panoptic", id_increment: int = 0, **kwargs: Any, ) -> Optional[Union[Tuple[str, Optional[str]], Optional[str]]]: """Export a dataset to a different format. +------------------+----------------------------------------------------------------------------------------------------+ | Export format | Supported dataset type | +==================+====================================================================================================+ | COCO panoptic | ``segmentation-bitmap`` and ``segmentation-bitmap-highres`` | +------------------+----------------------------------------------------------------------------------------------------+ | COCO instance | ``segmentation-bitmap`` and ``segmentation-bitmap-highres`` | +------------------+----------------------------------------------------------------------------------------------------+ | YOLO | ``segmentation-bitmap``, ``segmentation-bitmap-highres``, ``vector``, ``bboxes`` and ``keypoints`` | +------------------+----------------------------------------------------------------------------------------------------+ | Instance | ``segmentation-bitmap`` and ``segmentation-bitmap-highres`` | +------------------+----------------------------------------------------------------------------------------------------+ | Colored instance | ``segmentation-bitmap`` and ``segmentation-bitmap-highres`` | +------------------+----------------------------------------------------------------------------------------------------+ | Semantic | ``segmentation-bitmap`` and ``segmentation-bitmap-highres`` | +------------------+----------------------------------------------------------------------------------------------------+ | Colored semantic | ``segmentation-bitmap`` and ``segmentation-bitmap-highres`` | +------------------+----------------------------------------------------------------------------------------------------+ | Polygon | ``segmentation-bitmap`` and ``segmentation-bitmap-highres`` | +------------------+----------------------------------------------------------------------------------------------------+ Example: .. code-block:: python # pip install segments-ai from segments import SegmentsClient, SegmentsDataset from segments.utils import export_dataset # Initialize a SegmentsDataset from the release file client = SegmentsClient('YOUR_API_KEY') release = client.get_release('jane/flowers', 'v1.0') # Alternatively: release = 'flowers-v1.0.json' dataset = SegmentsDataset(release, labelset='ground-truth', filter_by=['labeled', 'reviewed']) # Export to COCO panoptic format export_dataset(dataset, export_format='coco-panoptic') Alternatively, you can use the initialized :class:`.SegmentsDataset` to loop through the samples and labels, and visualize or process them in any way you please: .. code-block:: python import matplotlib.pyplot as plt from segments.utils import get_semantic_bitmap for sample in dataset: # Print the sample name and list of labeled objects print(sample['name']) print(sample['annotations']) # Show the image plt.imshow(sample['image']) plt.show() # Show the instance segmentation label plt.imshow(sample['segmentation_bitmap']) plt.show() # Show the semantic segmentation label semantic_bitmap = get_semantic_bitmap(sample['segmentation_bitmap'], sample['annotations']) plt.imshow(semantic_bitmap) plt.show() Args: dataset: A :class:`.SegmentsDataset`. export_folder: The folder to export the dataset to. Defaults to ``.``. export_format: The destination format. Defaults to ``coco-panoptic``. id_increment: Increment the category ids with this number. Defaults to ``0``. Ignored unless ``export_format`` is ``semantic`` or ``semantic-color``. Returns: Returns the file name and the image directory name (for COCO panoptic, COCO instance, YOLO and polygon), or returns the export folder name (for (colored) instance and (colored) panoptic). Raises: :exc:`ImportError`: If scikit image is not installed (to install run ``pip install scikit-image``). :exc:`ValueError`: If an unvalid ``export_format`` is used. """ try: import skimage # noqa: F401 except ImportError as e: logger.error("Please install scikit-image first: pip install scikit-image.") raise e print("Exporting dataset. This may take a while...") if export_format == "coco-panoptic": if dataset.task_type not in [ "segmentation-bitmap", "segmentation-bitmap-highres", ]: raise ValueError( "Only datasets of type 'segmentation-bitmap' and 'segmentation-bitmap-highres' can be exported to this format." ) from .export import export_coco_panoptic return export_coco_panoptic(dataset, export_folder) elif export_format == "coco-instance": if dataset.task_type not in [ "segmentation-bitmap", "segmentation-bitmap-highres", ]: raise ValueError( "Only datasets of type 'segmentation-bitmap' and 'segmentation-bitmap-highres' can be exported to this format." ) from .export import export_coco_instance return export_coco_instance(dataset, export_folder) elif export_format == "yolo": if dataset.task_type not in [ "segmentation-bitmap", "segmentation-bitmap-highres", "vector", "bboxes", "keypoints", ]: raise ValueError( 'Only datasets of type "segmentation-bitmap", "segmentation-bitmap-highres", "vector", "bboxes" and "keypoints" can be exported to this format.' ) from .export import export_yolo return export_yolo( dataset, export_folder, image_width=kwargs.get("image_width", None), image_height=kwargs.get("image_height", None), ) elif export_format in ["semantic-color", "instance-color", "semantic", "instance"]: if dataset.task_type not in [ "segmentation-bitmap", "segmentation-bitmap-highres", ]: raise ValueError( "Only datasets of type 'segmentation-bitmap' and 'segmentation-bitmap-highres' can be exported to this format." ) from .export import export_image return export_image(dataset, export_folder, export_format, id_increment) elif export_format == "polygon": if dataset.task_type not in [ "segmentation-bitmap", "segmentation-bitmap-highres", ]: raise ValueError( 'Only datasets of type "segmentation-bitmap" and "segmentation-bitmap-highres" can be exported to this format.' ) from .export import export_polygon return export_polygon(dataset, export_folder) else: raise ValueError("Please choose a valid export_format.")
[docs]def load_image_from_url( url: str, save_filename: Optional[str] = None, s3_client: Optional[Any] = None ) -> Image.Image: """Load an image from url. Args: url: The image url. save_filename: The filename to save to. s3_client: A boto3 S3 client, e.g. ``s3_client = boto3.client("s3")``. Needs to be provided if your images are in a private S3 bucket. Defaults to :obj:`None`. """ if s3_client is not None: url_parsed = urlparse(url) regex = re.search( r"(.+).(s3|s3-accelerate).(.+).amazonaws.com", url_parsed.netloc ) if regex: bucket = regex.group(1) if bucket == "segmentsai-prod": image = Image.open(BytesIO(session.get(url).content)) else: # region_name = regex.group(2) key = url_parsed.path.lstrip("/") file_byte_string = s3_client.get_object(Bucket=bucket, Key=key)[ "Body" ].read() image = Image.open(BytesIO(file_byte_string)) else: image = Image.open(BytesIO(session.get(url).content)) # urllib.request.urlretrieve(url, save_filename) if save_filename is not None: if "exif" in image.info: image.save(save_filename, exif=image.info["exif"]) else: image.save(save_filename) return image
[docs]def load_label_bitmap_from_url( url: str, save_filename: Optional[str] = None ) -> npt.NDArray[np.uint32]: """Load a label bitmap from url. Args: url: The label bitmap url. save_filename: The filename to save to. """ def extract_bitmap( bitmap: Image.Image, ) -> npt.NDArray[np.uint32]: bitmap_array = np.array(bitmap) bitmap_array[:, :, 3] = 0 bitmap_array = bitmap_array.view(np.uint32).squeeze(2) return bitmap_array bitmap = Image.open(BytesIO(session.get(url).content)) bitmap_array = extract_bitmap(bitmap) if save_filename: Image.fromarray(bitmap_array).save(save_filename) return bitmap_array
[docs]def load_release(release: Release) -> Any: """Load JSON from Segments release. Args: release: A Segments release. Returns: A JSON with the release labels. """ release_file = cast(str, release.attributes.url) # TODO Fix in the backend. content = requests.get(release_file) return json.loads(content.content)
[docs]def handle_exif_rotation(image: Image.Image) -> Image.Image: """Handle the exif rotation of a PIL image. Args: image: A PIL image. Returns: A rotated PIL image. """ def get_key_by_value(dictionary: Mapping[int, str], value: str) -> int: for k, v in dictionary.items(): if v == value: return k raise ValueError(f"No such value {value}.") try: orientation = get_key_by_value(ExifTags.TAGS, "Orientation") exif = dict(image.getexif().items()) if exif[orientation] == 3: image = image.transpose(Image.ROTATE_180) elif exif[orientation] == 6: image = image.transpose(Image.ROTATE_270) elif exif[orientation] == 8: image = image.transpose(Image.ROTATE_90) return image except (AttributeError, KeyError, IndexError, ValueError): return image
[docs]def show_polygons( image_directory_path: str, image_id: int, exported_polygons_path: str, seed: int = 0, output_path: Optional[str] = None, ) -> None: """Show the exported contours of a segmented image (i.e., resulting from :func:`.export_dataset` with polygon export format). Args: image_directory_path: The image directory path. image_id: The image id (this can be found in the exported polygons JSON file). exported_polygons_path: The exported polygons path. seed: The seed used to generate random colors. Defaults to ``0``. output_path: The directory to save the plot to. Defaults to :obj:`None`. Raises: :exc:`ImportError`: If matplotlib is not installed. """ try: from matplotlib import image from matplotlib import pyplot as plt from matplotlib.patches import Polygon except ImportError as e: logger.error("Please install matplotlib first: pip install matplotlib.") raise e def find_image_name(images: List[Dict[str, Any]], image_id: int) -> str: for image in images: if image["id"] == image_id: return cast(str, image["file_name"]) raise KeyError("Cannot find the image id. Please provide a valid id.") def get_random_color() -> Tuple[float, float, float]: return (random.uniform(0, 1), random.uniform(0, 1), random.uniform(0, 1)) def normalize(color: List[int]) -> Tuple[float, float, float]: """Transform a color from 0-255 range to 0-1 range and from a list to a tuple, e.g., [255, 0, 123] to (1, 0, 0.5).""" return (color[0] / 255, color[1] / 255, color[2] / 255) random.seed(seed) with open(exported_polygons_path, "r") as f: polygons = json.load(f) image_name = find_image_name(polygons["images"], image_id) image = image.imread(f"{image_directory_path}/{image_name}") # {category id: (category name, color)} categories = { category["id"]: ( category["name"], normalize(category["color"]) if category["color"] else get_random_color(), ) for category in polygons["categories"] } # {category id: polygons} annotations = defaultdict(list) filtered_annotations = filter( lambda dictionary: dictionary["image_id"] == image_id, polygons["annotations"] ) for annotation in filtered_annotations: annotations[annotation["category_id"]].extend(annotation["polygons"]) # {category name: (polygons, color)} category_name_polygons_with_annotations = { category_name: (annotations[category_id], category_color) for category_id, (category_name, category_color) in categories.items() if annotations[category_id] } fig, (ax1, ax2, ax3) = plt.subplots( nrows=1, ncols=3, sharex=True, sharey=True, figsize=(25, 10) ) used_category_names = set() for category_name, ( polygons, color, ) in category_name_polygons_with_annotations.items(): for p in polygons: polygon = Polygon( xy=np.asarray(p).reshape(-1, 2), facecolor=color, edgecolor=color, label=category_name if category_name not in used_category_names else None, closed=True, alpha=0.5, ) used_category_names.add(category_name) polygon_copy = copy.deepcopy(polygon) polygon_copy.set_label(None) ax1.add_patch(polygon) # An Artist, container or primitive, cannot be contained in multiple containers, which is consistent with the fact that each Artist holds the parent container as a bare object, not in a list. ax2.add_patch(polygon_copy) # Ax 2 # ax2.axis("off") ax2.set_title("Both") ax2.imshow(image) ax2.set_xlabel("Width (pixels)") # Ax 1 (uses the aspect ratio of the image in axes 2) # ax1.axis("off") ax1.set_title("Label") # ax1.imshow(image) # https://stackoverflow.com/a/44655020 aspect = np.diff(ax1.get_xlim())[0] / np.diff(ax1.get_ylim())[0] aspect /= np.diff(ax2.get_xlim())[0] / np.diff(ax2.get_ylim())[0] aspect = np.abs(aspect) ax1.set_aspect(aspect) ax1.set_xlabel("Width (pixels)") ax1.set_ylabel("Height (pixels)") # Ax 3 # ax3.axis("off") ax3.set_title("Image") ax3.imshow(image) ax3.set_xlabel("Width (pixels)") fig.legend() if output_path: path = os.path.join( output_path, f"exported_polygons_from_image_id_{image_id:04d}" ) plt.savefig(path, bbox_inches="tight") plt.show()