# encoding: utf-8
# ------------------------------------------------------------------------
# Copyright 2020 All Histolab Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ------------------------------------------------------------------------
import math
import ntpath
import os
import pathlib
from typing import TYPE_CHECKING, Iterator, List, Tuple, Union
import numpy as np
import openslide
import PIL
from skimage.measure import find_contours
from .exceptions import (
HistolabException,
LevelError,
MayNeedLargeImageError,
SlidePropertyError,
TileSizeOrCoordinatesError,
)
from .filters.compositions import FiltersComposition
from .tile import Tile
from .types import CoordinatePair
from .util import lazyproperty
if TYPE_CHECKING:
from .masks import BinaryMask
try:
from io import BytesIO
import large_image
LARGEIMAGE_IS_INSTALLED = True
except (ModuleNotFoundError, ImportError): # pragma: no cover
LARGEIMAGE_IS_INSTALLED = False
IMG_EXT = "png"
IMG_UPSAMPLE_MODE = PIL.Image.BICUBIC
IMG_DOWNSAMPLE_MODE = PIL.Image.LANCZOS
TILE_SIZE_PIXEL_TOLERANCE = 5
[docs]class Slide:
"""Provide Slide objects and expose property and methods.
Arguments
---------
path : Union[str, pathlib.Path]
Path where the WSI is saved.
processed_path : Union[str, pathlib.Path]
Path where the tiles will be saved to.
use_largeimage : bool, optional
Whether or not to use the `large_image` package for accessing the
slide and extracting or calculating various metadata. If this is
`False`, `openslide` is used. If it is `True`, `large_image` will try
from the various installed tile sources. For example, if you installed
it using `large_image[all]`, it will try `openslide` first, then `PIL`,
and so on, depending on the slide format and metadata. `large_image`
also handles internal logic to enable fetching exact micron-per-pixel
resolution tiles by interpolating between the internal levels of the
slide. If you don't mind installing an extra dependency,
we recommend setting this to True and fetching Tiles at exact
resolutions as opposed to levels. Different scanners have different
specifications, and the same level may not always encode the same
magnification in different scanners and slide formats.
Raises
------
TypeError
If the processed path is not specified.
ModuleNotFoundError
when `use_largeimage` is set to True and `large_image` module is not
installed.
"""
def __init__(
self,
path: Union[str, pathlib.Path],
processed_path: Union[str, pathlib.Path],
use_largeimage: bool = False,
) -> None:
self._path = str(path) if isinstance(path, pathlib.Path) else path
if processed_path is None:
raise TypeError("processed_path cannot be None.")
self._processed_path = processed_path
if use_largeimage and not LARGEIMAGE_IS_INSTALLED: # pragma: no cover
raise ModuleNotFoundError(
"Setting use_large_image to True requires installation "
"of the large_image module. Please visit: "
"https://github.com/girder/large_image for instructions."
)
self._use_largeimage = use_largeimage
def __repr__(self):
return (
self.__class__.__name__
+ f"(path={self._path}, processed_path={self._processed_path})"
)
# ---public interface methods and properties---
@lazyproperty
def base_mpp(self) -> float:
"""Get microns-per-pixel resolution at scan magnification.
Returns
-------
float
Microns-per-pixel resolution at scan (base) magnification.
Raises
------
ValueError
If `large_image` cannot detemine the slide magnification.
MayNeedLargeImageError
If `use_largeimage` was set to False when slide was initialized,
and we cannot determine the magnification otherwise.
"""
if self._use_largeimage:
if self._metadata.get("mm_x") is not None:
return self._metadata["mm_x"] * (10**3)
raise ValueError(
"Unknown scan resolution! This slide is missing metadata "
"needed for calculating the scanning resolution. Without "
"this information, you can only ask for a tile by level, "
"not mpp resolution."
)
if "openslide.mpp-x" in self.properties:
return float(self.properties["openslide.mpp-x"])
if "aperio.MPP" in self.properties:
return float(self.properties["aperio.MPP"])
if (
"tiff.XResolution" in self.properties
and self.properties.get("tiff.ResolutionUnit") == "centimeter"
):
return 1e4 / float(self.properties["tiff.XResolution"])
raise MayNeedLargeImageError(
"Unknown scan magnification! This slide format may be best "
"handled using the large_image module. Consider setting "
"use_largeimage to True when instantiating this Slide."
)
@lazyproperty
def dimensions(self) -> Tuple[int, int]:
"""Slide dimensions (w,h) at level 0.
Returns
-------
dimensions : Tuple[int, int]
Slide dimensions (width, height)
"""
if self._use_largeimage:
return self._metadata["sizeX"], self._metadata["sizeY"]
return self._wsi.dimensions
[docs] def level_dimensions(self, level: int = 0) -> Tuple[int, int]:
"""Return the slide dimensions (w,h) at the specified level
Parameters
---------
level : int
The level which dimensions are requested, default is 0.
Returns
-------
dimensions : Tuple[int, int]
Slide dimensions at the specified level (width, height)
Raises
------
LevelError
If the specified level is not available
"""
level = level if level >= 0 else self._remap_level(level)
try:
return self._wsi.level_dimensions[level]
except IndexError:
raise LevelError(
f"Level {level} not available. Number of available levels: "
f"{len(self._wsi.level_dimensions)}"
)
[docs] def level_magnification_factor(self, level: int = 0) -> str:
"""Return the magnification factor at the specified level.
Notice that the conversion level-magnification can be computed only
if the native magnification is available in the slide metadata.
Parameters
---------
level : int
The level which magnification factor is requested, default is 0.
Returns
-------
magnification factor : str
Magnification factor at speficied level
Raises
------
LevelError
If the specified level is not available.
SlidePropertyError
If the slide's native magnification or the downsample factor for the
specified level are not available in the file's metadata.
"""
level = level if level >= 0 else self._remap_level(level)
properties = self.properties
if level not in self.levels:
raise LevelError(
f"Level {level} not available. Number of available levels: "
f"{len(self.levels)}"
)
if level > 0 and f"openslide.level[{level}].downsample" not in properties:
raise SlidePropertyError(
f"Downsample factor for level {level} not available. "
f"Available slide properties: {list(self.properties.keys())}"
)
if "openslide.objective-power" not in properties:
raise SlidePropertyError(
f"Native magnification not available. Available slide properties: "
f"{list(self.properties.keys())}"
)
downsample_factor = (
round(float(properties[f"openslide.level[{level}].downsample"]))
if level != 0
else 1
)
level_magnification = (
int(properties["openslide.objective-power"]) / downsample_factor
)
return f"{level_magnification}X"
@lazyproperty
def levels(self) -> List[int]:
"""Slide's available levels
Returns
-------
List[int]
The levels available
"""
return list(range(len(self._wsi.level_dimensions)))
[docs] def locate_mask(
self,
binary_mask: "BinaryMask",
scale_factor: int = 32,
tissue_mask: bool = False,
alpha: int = 128,
outline: str = "red",
) -> PIL.Image.Image:
"""Draw binary mask contours on a rescaled version of the slide
Parameters
----------
binary_mask : BinaryMask
Binary Mask object
scale_factor : int
Scaling factor for the returned image. Default is 32.
tissue_mask : bool, optional
Whether to draw the contours on the binary tissue mask instead of
the rescaled version of the slide. Default is False.
alpha : int
The alpha level to be applied to the rescaled slide, default to 128.
outline : str
The outline color for the annotation, default to 'red'.
Returns
-------
PIL.Image.Image
PIL Image of the rescaled slide with the binary mask contours outlined.
"""
img = self.scaled_image(scale_factor)
mask = binary_mask(self)
resized_mask = np.array(
PIL.Image.fromarray(mask).resize(img.size, PIL.Image.ANTIALIAS)
)
if tissue_mask:
filters = FiltersComposition(Slide).tissue_mask_filters
img_tissue_mask = filters(img)
img = PIL.Image.fromarray(img_tissue_mask).convert("RGB")
else:
img.putalpha(alpha)
# pad the mask to have closed contours along the edges
padded_mask = np.pad(resized_mask, pad_width=1, mode="constant")
contours = [
cont - 1 for cont in find_contours(padded_mask, level=0.5)
] # unpad countours
for contour in contours:
contour = np.ceil(contour)
contour = np.vstack([contour[:, 1], contour[:, 0]]).T
PIL.ImageDraw.Draw(img).polygon(contour.ravel().tolist(), outline=outline)
return img
@lazyproperty
def name(self) -> str:
"""Slide name without extension.
Returns
-------
name : str
"""
bname = ntpath.basename(self._path)
return bname[: bname.rfind(".")]
@lazyproperty
def processed_path(self) -> str:
"""Path to store the tiles generated from the slide.
Returns
-------
str
Path to store the tiles generated from the slide
"""
return self._processed_path
@lazyproperty
def properties(self) -> dict:
"""Whole Slide Image properties.
Returns
-------
dict
WSI complete properties.
"""
return dict(self._wsi.properties)
[docs] def resampled_array(self, scale_factor: int = 32) -> np.array:
"""Return the resampled array from the original slide
Parameters
----------
scale_factor : int, optional
Image scaling factor. Default is 32.
Returns
----------
resampled_array: np.ndarray
Resampled array
"""
return self._resample(scale_factor)[1]
[docs] def scaled_image(self, scale_factor: int = 32) -> PIL.Image.Image:
"""Return a scaled image of the slide.
Parameters
----------
scale_factor : int, optional
Image scaling factor. Default is 32.
Returns
-------
PIL.Image.Image
A scaled image of the slide.
"""
return self._resample(scale_factor)[0]
[docs] def show(self) -> None:
"""Display the slide thumbnail.
NOTE: A new window of your OS image viewer will be opened.
"""
try:
thumbnail = self.thumbnail
thumbnail.show() # pragma: no cover
except FileNotFoundError as error:
raise FileNotFoundError(f"Cannot display the slide thumbnail: {error}")
@lazyproperty
def thumbnail(self) -> PIL.Image.Image:
"""Slide thumbnail.
Returns
-------
PIL.Image.Image
The slide thumbnail.
"""
if self._use_largeimage:
thumb_bytes, _ = self._tile_source.getThumbnail(
encoding="PNG",
width=self._thumbnail_size[0],
height=self._thumbnail_size[1],
)
thumbnail = self._bytes2pil(thumb_bytes).convert("RGB")
return thumbnail
return self._wsi.get_thumbnail(self._thumbnail_size)
# ------- implementation helpers -------
@staticmethod
def _bytes2pil(bytesim: bytearray):
"""Convert a bytes image to a PIL image object.
Parameters
----------
bytesim : bytearray
A bytes object representation of an image.
Returns
-------
PIL.Image.Image
A PIL Image object converted from the Bytes input.
"""
image_content = BytesIO(bytesim)
image_content.seek(0)
return PIL.Image.open(image_content)
def _has_valid_coords(self, coords: CoordinatePair) -> bool:
"""Check if ``coords`` are valid 0-level coordinates.
Parameters
----------
coords : CoordinatePair
Coordinates at level 0 to check
Returns
-------
bool
True if the coordinates are valid, False otherwise
"""
return (
0 <= coords.x_ul < self.dimensions[0]
and 0 <= coords.x_br < self.dimensions[0]
and 0 <= coords.y_ul < self.dimensions[1]
and 0 <= coords.y_br < self.dimensions[1]
)
@lazyproperty
def _metadata(self) -> dict:
"""Get metadata about this slide, including magnification.
Returns
-------
dict
This function is a wrapper. Please read the documentation for
``large_image.TileSource.getMetadata()`` for details on the return
keys and data types.
"""
return self._tile_source.getMetadata()
def _remap_level(self, level: int) -> int:
"""Remap negative index for the given level onto a positive one.
Parameters
----------
level : int
The level index to remap
Returns
-------
level : int
Positive level index
Raises
------
LevelError
when the abs(level) is greater than the number of the levels.
"""
if len(self.levels) - abs(level) < 0:
raise LevelError(
f"Level {level} not available. Number of available levels: "
f"{len(self._wsi.level_dimensions)}"
)
return len(self.levels) - abs(level)
def _resample(self, scale_factor: int = 32) -> Tuple[PIL.Image.Image, np.array]:
"""Convert a slide to a scaled-down PIL image.
The PIL image is also converted to array.
image is the scaled-down PIL image, original width and original height
are the width and height of the slide, new width and new height are the
dimensions of the PIL image.
Parameters
----------
scale_factor : int, 32 by default
Image scaling factor
Returns
-------
PIL.Image.Image
The resampled image
np.ndarray
The resampled image converted to array
"""
_, _, new_w, new_h = self._resampled_dimensions(scale_factor)
if self._use_largeimage:
kwargs = (
{
"scale": {
"magnification": self._metadata["magnification"] / scale_factor
}
}
if self._metadata["magnification"] is not None
else {}
)
wsi_image, _ = self._tile_source.getRegion(
format=large_image.tilesource.TILE_FORMAT_PIL,
**kwargs,
)
else:
level = self._wsi.get_best_level_for_downsample(scale_factor)
wsi_image = self._wsi.read_region(
(0, 0), level, self._wsi.level_dimensions[level]
)
# ---converts openslide read_region to an actual RGBA image---
wsi_image = wsi_image.convert("RGB")
img = wsi_image.resize(
(new_w, new_h),
IMG_UPSAMPLE_MODE if new_w >= wsi_image.size[0] else IMG_DOWNSAMPLE_MODE,
)
arr_img = np.asarray(img)
return img, arr_img
def _resampled_dimensions(
self, scale_factor: int = 32
) -> Tuple[int, int, int, int]:
"""Scale the slide dimensions of a specified factor.
Parameters
---------
scale_factor : int, 32 by default
Image scaling factor
Returns
-------
Tuple[int, int, int, int]
Original slide dimensions and scaled dimensions (original w, original h,
resampled w, resampled h).
"""
large_w, large_h = self.dimensions
new_w = math.floor(large_w / scale_factor)
new_h = math.floor(large_h / scale_factor)
return large_w, large_h, new_w, new_h
@lazyproperty
def _thumbnail_size(self) -> Tuple[int, int]:
r"""Compute the thumbnail size proportionally to the slide dimensions.
If the size of the slide is (v, m) where v has magnitude w and m has magnitude
n, that is,
.. math::
\left\lceil{\\log_{10}(v)}\right\rceil = w
and
.. math::
\left\lceil{\log_{10}(m)}\right\rceil = n
then the thumbnail size is computed as:
.. math::
\big(\frac{v}{10^{w-2}},\frac{v}{10^{n-2}}\big)
Returns
-------
Tuple[int, int]
Thumbnail size
"""
return tuple(
[
int(s / np.power(10, math.ceil(math.log10(s)) - 3))
for s in self.dimensions
]
)
@lazyproperty
def _tile_source(self) -> Union[openslide.OpenSlide, openslide.ImageSlide]:
"""Open the slide and returns a large_image tile source object
Returns
-------
source : large_image TileSource object
An TileSource object representing a whole-slide image.
Raises
------
MayNeedLargeImageError
If `use_largeimage` was set to False when slide was initialized.
"""
if not self._use_largeimage:
raise MayNeedLargeImageError(
"This property uses the large_image module. Please set "
"use_largeimage to True when instantiating this Slide."
)
source = large_image.getTileSource(self._path)
return source
@lazyproperty
def _wsi(self) -> Union[openslide.OpenSlide, openslide.ImageSlide]:
"""Open the slide and returns an openslide object
Returns
-------
slide : OpenSlide object
An OpenSlide object representing a whole-slide image.
"""
bad_format_error = (
"This slide may be corrupted or have a non-standard format not "
"handled by the openslide and PIL libraries. Consider setting "
"use_largeimage to True when instantiating this Slide."
)
try:
slide = openslide.open_slide(self._path)
except PIL.UnidentifiedImageError:
raise PIL.UnidentifiedImageError(bad_format_error)
except FileNotFoundError:
raise FileNotFoundError(
f"The wsi path resource doesn't exist: {self._path}"
)
except Exception as other_error:
raise HistolabException(other_error.__repr__() + f". {bad_format_error}")
return slide
[docs]class SlideSet:
"""Slideset object. It is considered a collection of Slides."""
def __init__(
self,
slides_path: str,
processed_path: str,
valid_extensions: List[str],
keep_slides: List[str] = None,
slide_kwargs: dict = None,
) -> None:
self._slides_path = slides_path
self._processed_path = processed_path
self._valid_extensions = valid_extensions
self._keep_slides = keep_slides
self._slide_kwargs = slide_kwargs if slide_kwargs is not None else {}
def __iter__(self) -> Iterator[Slide]:
"""Slides of the slideset
Returns
-------
generator of `Slide` objects.
"""
slide_names = [
name
for name in os.listdir(self._slides_path)
if (os.path.splitext(name)[1] in self._valid_extensions)
]
if self._keep_slides is not None:
slide_names = [name for name in slide_names if name in self._keep_slides]
return iter(
[
Slide(
os.path.join(self._slides_path, name),
self._processed_path,
**self._slide_kwargs,
)
for name in slide_names
]
)
def __getitem__(self, slide_id: int) -> Slide:
"""Slide object given the correspondent id"""
return list(self.__iter__())[slide_id]
def __len__(self) -> int:
"""Total number of the slides of this Slideset
Returns
-------
int
number of the Slides.
"""
return len(list(self.__iter__()))
# ---public interface methods and properties---
[docs] def scaled_images(
self, scale_factor: int = 32, n: int = 0
) -> List[PIL.Image.Image]:
"""Return rescaled images of the slides.
Parameters
----------
scale_factor : int, optional
Image scaling factor. Default is 32.
n : int, optional
First n slides in dataset folder to rescale. Default is 0, meaning that all
the slides will be returned.
Returns
-------
List[PIL.Image.Image]
List of rescaled images of the slides.
"""
n = self.total_slides if (n > self.total_slides or n == 0) else n
rescaled_imgs = []
for slide in list(self.__iter__())[:n]:
rescaled_imgs.append(slide.scaled_image(scale_factor))
return rescaled_imgs
[docs] def thumbnails(self, n: int = 0) -> List[PIL.Image.Image]:
"""Return slides thumbnails
Parameters
----------
n : int, optional
First n slides in dataset folder. Default is 0, meaning that the thumbnails
of all the slides will be returned.
Returns
-------
List[PIL.Image.Image]
List of slides thumbnails
"""
n = self.total_slides if (n > self.total_slides or n == 0) else n
thumbnails = []
for slide in list(self.__iter__())[:n]:
thumbnails.append(slide.thumbnail)
return thumbnails
@lazyproperty
def slides_stats(self) -> dict:
"""Statistics for the WSI collection, namely the number of available
slides; the slide with the maximum/minimum width; the slide with the
maximum/minimum height; the slide with the maximum/minimum size; the average
width/height/size of the slides.
Returns
----------
basic_stats: dict of slides stats e.g. min_size, avg_size, etc...
"""
return {
"no_of_slides": self.total_slides,
"max_width": self._max_width_slide,
"max_height": self._max_height_slide,
"max_size": self._max_size_slide,
"min_width": self._min_width_slide,
"min_height": self._min_height_slide,
"min_size": self._min_size_slide,
"avg_width": self._avg_width_slide,
"avg_height": self._avg_height_slide,
"avg_size": self._avg_size_slide,
}
@lazyproperty
def total_slides(self) -> int:
"""Number of slides within the slideset.
Returns
----------
n: int
Number of slides.
"""
return self.__len__()
# ---private interface methods and properties---
@lazyproperty
def _avg_width_slide(self) -> float:
return sum(d["width"] for d in self._slides_dimensions) / self.total_slides
@lazyproperty
def _avg_height_slide(self) -> float:
return sum(d["height"] for d in self._slides_dimensions) / self.total_slides
@lazyproperty
def _avg_size_slide(self) -> float:
return sum(d["size"] for d in self._slides_dimensions) / self.total_slides
@lazyproperty
def _max_height_slide(self) -> dict:
max_height = max(self._slides_dimensions, key=lambda x: x["height"])
return {"slide": max_height["slide"], "height": max_height["height"]}
@lazyproperty
def _max_size_slide(self) -> dict:
max_size = max(self._slides_dimensions, key=lambda x: x["size"])
return {"slide": max_size["slide"], "size": max_size["size"]}
@lazyproperty
def _max_width_slide(self) -> dict:
max_width = max(self._slides_dimensions, key=lambda x: x["width"])
return {"slide": max_width["slide"], "width": max_width["width"]}
@lazyproperty
def _min_width_slide(self) -> dict:
min_width = min(self._slides_dimensions, key=lambda x: x["width"])
return {"slide": min_width["slide"], "width": min_width["width"]}
@lazyproperty
def _min_height_slide(self) -> dict:
min_height = min(self._slides_dimensions, key=lambda x: x["height"])
return {"slide": min_height["slide"], "height": min_height["height"]}
@lazyproperty
def _min_size_slide(self) -> dict:
min_size = min(self._slides_dimensions, key=lambda x: x["size"])
return {"slide": min_size["slide"], "size": min_size["size"]}
@lazyproperty
def _slides_dimensions(self) -> List[dict]:
return [
{
"slide": slide.name,
"width": slide.dimensions[0],
"height": slide.dimensions[1],
"size": slide.dimensions[0] * slide.dimensions[1],
}
for slide in list(self.__iter__())
]
@lazyproperty
def _slides_dimensions_list(self):
return [slide.dimensions for slide in list(self.__iter__())]