Source code for cellmap_segmentation_challenge.utils.crops

from dataclasses import dataclass
import os

import fsspec
from upath import UPath
from typing_extensions import Self
from yarl import URL


# get constants from environment, falling back to defaults as needed
TEST_CROP_MANIFEST_URL = os.environ.get(
    "CSC_TEST_CROP_MANIFEST_URL",
    "https://raw.githubusercontent.com/janelia-cellmap/cellmap-segmentation-challenge/refs/heads/main/src/cellmap_segmentation_challenge/utils/test_crop_manifest.csv",
)

MANIFEST_URL = os.environ.get(
    "CSC_FETCH_DATA_MANIFEST_URL",
    "https://raw.githubusercontent.com/janelia-cellmap/cellmap-segmentation-challenge/refs/heads/main/src/cellmap_segmentation_challenge/utils/manifest.csv",
)

ZIP_MANIFEST_URL = os.environ.get(
    "CSC_FETCH_ZIP_DATA_MANIFEST_URL",
    "https://raw.githubusercontent.com/janelia-cellmap/cellmap-segmentation-challenge/refs/heads/main/src/cellmap_segmentation_challenge/utils/zip_manifest.csv",
)


[docs] def fetch_manifest( url: str | URL, file_name: str, object: Self, ) -> tuple[str, ...]: local_path = UPath(__file__).parent / file_name # Attempt to download the manifest file try: # Get the filesystem and path fs, path = fsspec.url_to_fs(str(url)) # Open the file using the filesystem and save locally with fs.open(path, "rb") as src, open(local_path, "wb") as dst: dst.write(src.read()) except: if local_path.exists(): print( f"Failed to download manifest file from {url}, using local file {local_path}." ) else: raise FileNotFoundError( f"Failed to download manifest file from {url} and no local file exists." ) fs, path = fsspec.url_to_fs(str(local_path)) head, *rows = fs.cat_file(path).decode().splitlines() return tuple(object.from_csv_row(row) for row in rows)
[docs] @dataclass class TestCropRow: """A dataclass representing a row in the test crop manifest file.""" id: int dataset: str class_label: str voxel_size: tuple[float, ...] translation: tuple[float, ...] shape: tuple[int, ...]
[docs] @classmethod def from_csv_row(cls, row: str) -> Self: """Create a CropRow object from a CSV row.""" id, dataset, class_label, voxel_size, translation, shape = row.split(",") return cls( int(id), dataset, class_label, tuple(map(float, voxel_size.strip("[]").split(";"))), tuple(map(float, translation.strip("[]").split(";"))), tuple(map(int, shape.strip("[]").split(";"))), )
[docs] def fetch_test_crop_manifest( url: str | URL = TEST_CROP_MANIFEST_URL, ) -> tuple[TestCropRow, ...]: """ Fetch a test manifest file from a URL and return a tuple of TestCropRow objects. Parameters ---------- url : str or yarl.URL The URL to the manifest file. Returns ------- tuple[TestCropRow, ...] A tuple of TestCropRow objects. """ return fetch_manifest(url, "test_crop_manifest.csv", TestCropRow)
[docs] @dataclass class ZipDatasetRow: """A dataclass representing a row in the zip dataset manifest file.""" all_res: bool padding: int name: str url: URL
[docs] @classmethod def from_csv_row(cls, row: str) -> Self: """Create a CropRow object from a CSV row.""" all_res, padding, name, url = row.split(",") all_res = all_res == "True" padding = int(padding) return cls(all_res, padding, name, URL(url))
[docs] def fetch_zip_manifest(url: str | URL = ZIP_MANIFEST_URL) -> tuple[ZipDatasetRow, ...]: """ Fetch a manifest file from a URL and return a tuple of ZipDatasetRow objects. Parameters ---------- url : str or yarl.URL The URL to the manifest file. Returns ------- tuple[ZipDatasetRow, ...] A tuple of ZipDatasetRow objects. """ return fetch_manifest(url, "zip_manifest.csv", ZipDatasetRow)
[docs] @dataclass class CropRow: """A dataclass representing a row in the crop manifest file.""" id: int dataset: str alignment: str gt_source: URL | TestCropRow em_url: URL
[docs] @classmethod def from_csv_row(cls, row: str) -> Self: """Create a CropRow object from a CSV row.""" id, dataset, alignment, gt_source, em_url = row.split(",") return cls(int(id), dataset, alignment, URL(gt_source), URL(em_url))
[docs] def fetch_crop_manifest(url: str | URL = MANIFEST_URL) -> tuple[CropRow, ...]: """ Fetch a manifest file from a URL and return a tuple of CropRow objects. Parameters ---------- url : str or yarl.URL The URL to the manifest file. Returns ------- tuple[CropRow, ...] A tuple of CropRow objects. """ return fetch_manifest(url, "manifest.csv", CropRow)
TEST_CROPS = fetch_test_crop_manifest() TEST_CROPS_DICT = {(crop.id, crop.class_label): crop for crop in TEST_CROPS}
[docs] def get_test_crops() -> tuple[CropRow, ...]: _test_crops = fetch_test_crop_manifest() dataset_em_meta = { crop.dataset: {"em_url": crop.em_url, "alignment": crop.alignment} for crop in fetch_crop_manifest() } test_crops = [] test_crop_meta_by_id = {} for test_crop in _test_crops: crop = CropRow( test_crop.id, test_crop.dataset, dataset_em_meta[test_crop.dataset]["alignment"], test_crop, dataset_em_meta[test_crop.dataset]["em_url"], ) if test_crop.id in test_crop_meta_by_id: # Make sure metadata for highest resolution, smallest offset, and largest shape is kept listed = test_crop_meta_by_id[test_crop.id] new_voxel_size = ( min(l_vs, t_vs) for l_vs, t_vs in zip(test_crop.voxel_size, listed.voxel_size) ) new_translation = ( min(l_trans, t_trans) for l_trans, t_trans in zip(test_crop.translation, listed.translation) ) new_shape = ( max(l_shape, t_shape) for l_shape, t_shape in zip(test_crop.shape, listed.shape) ) new_test_crop = TestCropRow( crop.id, crop.dataset, "test", tuple(new_voxel_size), tuple(new_translation), tuple(new_shape), ) test_crop_meta_by_id[test_crop.id] = new_test_crop else: test_crop_meta_by_id[test_crop.id] = test_crop for id, test_crop in test_crop_meta_by_id.items(): new_crop = CropRow( id, test_crop.dataset, dataset_em_meta[test_crop.dataset]["alignment"], test_crop, dataset_em_meta[test_crop.dataset]["em_url"], ) test_crops.append(new_crop) return tuple(test_crops)