Skip to content

Commit

Permalink
Merge pull request #223 from fractal-analytics-platform/multiplexing_…
Browse files Browse the repository at this point in the history
…workflows

MIP multiplexing workflows
  • Loading branch information
tcompa authored Nov 24, 2022
2 parents 2c20a2a + 368c8f2 commit 0fa3e02
Show file tree
Hide file tree
Showing 3 changed files with 165 additions and 97 deletions.
1 change: 1 addition & 0 deletions docs/source/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ Other changes
~~~~~~~~~~~~~
* Skip image files if filename is not parsable (#219).
* Preserve order of ``input_paths`` for multiplexing subfolders (#222).
* Major refactor of ``replicate_zarr_structure``, also enabling support for zarr files with multiple images (#223).


0.4.5
Expand Down
186 changes: 89 additions & 97 deletions fractal_tasks_core/replicate_zarr_structure.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,7 @@
Task that copies the structure of an OME-NGFF zarr array to a new one
"""
import json
import logging
from glob import glob
from pathlib import Path
from typing import Any
from typing import Dict
Expand Down Expand Up @@ -45,23 +43,44 @@ def replicate_zarr_structure(
output_path: Path,
metadata: Dict[str, Any],
project_to_2D: bool = True,
suffix: str = None,
suffix: Optional[str] = None,
ROI_table_names: Optional[Sequence[str]] = None,
) -> Dict[str, Any]:

"""
Duplicate an input zarr structure to a new path.
If project_to_2D=True, adapt it to host a maximum-intensity projection
(that is, with a single Z layer).
Examples
input_paths[0] = /tmp/out/*.zarr (Path)
output_path = /tmp/out_mip/*.zarr (Path)
More detailed description:
1. For each plate, create a new zarr group with the same attributes as the
original one.
2. For each well (in each plate), create a new zarr subgroup with the same
attributes as the original one.
3. For each image (in each well), create a new zarr subgroup with the same
attributes as the original one.
4. For each image (in each well), copy the relevant AnnData tables from the
original source.
Note: this task makes use of methods from the ``Attributes`` class, see
https://zarr.readthedocs.io/en/stable/api/attrs.html.
Examples of some inputs::
input_paths = ["/tmp/out/*.zarr"]
output_path = "/tmp/out_mip/*.zarr"
:param input_paths: TBD
:param output_path: TBD
:param metadata: TBD
:param project_to_2D: TBD
:param suffix: TBD
:param project_to_2D: If ``True``, apply a 3D->2D projection to the ROI
tables that are copied to the new zarr.
:param suffix: The suffix that is used to transform ``plate.zarr`` into
``plate_suffix.zarr``. Note that `None` is not currently
supported.
:param ROI_table_names: List of ROI-table names to be copied. If ``None``,
it is replaced by ``["FOV_ROI_table",
"well_ROI_table"]``. Note: copying non-ROI tables
may fail if ``project_to_2D=True``.
"""

# Preliminary check
Expand All @@ -71,12 +90,15 @@ def replicate_zarr_structure(
# FIXME create a standard suffix (with timestamp)
raise NotImplementedError

if ROI_table_names is None:
ROI_table_names = ["FOV_ROI_table", "well_ROI_table"]

# List all plates
in_path = input_paths[0]
list_plates = [
p.as_posix() for p in in_path.parent.resolve().glob(in_path.name)
]
logger.info("{list_plates=}")
logger.info(f"{list_plates=}")

meta_update: Dict[str, Any] = {"replicate_zarr": {}}
meta_update["replicate_zarr"]["suffix"] = suffix
Expand All @@ -95,96 +117,65 @@ def replicate_zarr_structure(
logger.info(f"{zarrurl_new=}")
logger.info(f"{meta_update=}")

# Identify properties of input zarr file
well_rows_columns = sorted(
[rc.split("/")[-2:] for rc in glob(zarrurl_old + "/*/*")]
)
row_list = [
well_row_column[0] for well_row_column in well_rows_columns
]
col_list = [
well_row_column[1] for well_row_column in well_rows_columns
# Replicate plate attrs
old_plate_group = zarr.open_group(zarrurl_old, mode="r")
new_plate_group = zarr.open(zarrurl_new)
new_plate_group.attrs.put(old_plate_group.attrs.asdict())

well_paths = [
well["path"] for well in new_plate_group.attrs["plate"]["wells"]
]
row_list = sorted(list(set(row_list)))
col_list = sorted(list(set(col_list)))

group_plate = zarr.group(zarrurl_new)
plate = zarrurl_old.replace(".zarr", "").split("/")[-1]
logger.info(f"{plate=}")
group_plate.attrs["plate"] = {
"acquisitions": [{"id": 0, "name": plate}],
"columns": [{"name": col} for col in col_list],
"rows": [{"name": row} for row in row_list],
"wells": [
{
"path": well_row_column[0] + "/" + well_row_column[1],
"rowIndex": row_list.index(well_row_column[0]),
"columnIndex": col_list.index(well_row_column[1]),
}
for well_row_column in well_rows_columns
],
}

for row, column in well_rows_columns:

# Find FOVs in COL/ROW/.zattrs
path_well_zattrs = f"{zarrurl_old}/{row}/{column}/.zattrs"
with open(path_well_zattrs) as well_zattrs_file:
well_zattrs = json.load(well_zattrs_file)
well_images = well_zattrs["well"]["images"]
list_FOVs = sorted([img["path"] for img in well_images])

# Create well group
group_well = group_plate.create_group(f"{row}/{column}/")
group_well.attrs["well"] = {
"images": well_images,
"version": __OME_NGFF_VERSION__,
}

# Check that only the 0-th FOV exists
FOV = 0
if len(list_FOVs) > 1:
raise Exception(
"ERROR: we are in a single-merged-FOV scheme, "
f"but there are {len(list_FOVs)} FOVs."
)
logger.info(f"{well_paths=}")
for well_path in well_paths:

# Create FOV group
group_FOV = group_well.create_group(f"{FOV}/")

# Copy .zattrs file at the COL/ROW/FOV level
path_FOV_zattrs = f"{zarrurl_old}/{row}/{column}/{FOV}/.zattrs"
with open(path_FOV_zattrs) as FOV_zattrs_file:
FOV_zattrs = json.load(FOV_zattrs_file)
for key in FOV_zattrs.keys():
group_FOV.attrs[key] = FOV_zattrs[key]

# Read FOV ROI table
FOV_ROI_table = ad.read_zarr(
f"{zarrurl_old}/{row}/{column}/0/tables/FOV_ROI_table"
)
well_ROI_table = ad.read_zarr(
f"{zarrurl_old}/{row}/{column}/0/tables/well_ROI_table"
)

# Convert 3D FOVs to 2D
if project_to_2D:
# Read pixel sizes from zattrs file
pxl_sizes_zyx = extract_zyx_pixel_sizes(
path_FOV_zattrs, level=0
)
pxl_size_z = pxl_sizes_zyx[0]
FOV_ROI_table = convert_ROIs_from_3D_to_2D(
FOV_ROI_table, pxl_size_z
# Replicate well attrs
old_well_group = zarr.open_group(f"{zarrurl_old}/{well_path}")
new_well_group = zarr.group(f"{zarrurl_new}/{well_path}")
new_well_group.attrs.put(old_well_group.attrs.asdict())

image_paths = [
image["path"]
for image in new_well_group.attrs["well"]["images"]
]
logger.info(f"{image_paths=}")

for image_path in image_paths:

# Replicate image attrs
old_image_group = zarr.open_group(
f"{zarrurl_old}/{well_path}/{image_path}"
)
well_ROI_table = convert_ROIs_from_3D_to_2D(
well_ROI_table, pxl_size_z
new_image_group = zarr.group(
f"{zarrurl_new}/{well_path}/{image_path}"
)

# Create table group and write new table
group_tables = group_FOV.create_group("tables/")
write_elem(group_tables, "FOV_ROI_table", FOV_ROI_table)
write_elem(group_tables, "well_ROI_table", well_ROI_table)
new_image_group.attrs.put(old_image_group.attrs.asdict())

# Extract pixel sizes, if needed
if ROI_table_names:

group_tables = new_image_group.create_group("tables/")
if project_to_2D:
path_FOV_zattrs = (
f"{zarrurl_old}/{well_path}/{image_path}/.zattrs"
)
pxl_sizes_zyx = extract_zyx_pixel_sizes(
path_FOV_zattrs, level=0
)
pxl_size_z = pxl_sizes_zyx[0]

# Copy the tables in ROI_table_names
for ROI_table_name in ROI_table_names:
ROI_table = ad.read_zarr(
f"{zarrurl_old}/{well_path}/{image_path}/"
f"tables/{ROI_table_name}"
)
# Convert 3D FOVs to 2D
if project_to_2D:
ROI_table = convert_ROIs_from_3D_to_2D(
ROI_table, pxl_size_z
)
# Write new table
write_elem(group_tables, ROI_table_name, ROI_table)

for key in ["plate", "well", "image"]:
meta_update[key] = [
Expand All @@ -205,6 +196,7 @@ class TaskArguments(BaseModel):
metadata: Dict[str, Any]
project_to_2D: bool = True
suffix: Optional[str] = None
ROI_table_names: Optional[Sequence[str]] = None

run_fractal_task(
task_function=replicate_zarr_structure,
Expand Down
75 changes: 75 additions & 0 deletions tests/test_workflows_multiplexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,12 @@
from fractal_tasks_core.create_zarr_structure_multiplex import (
create_zarr_structure_multiplex,
)
from fractal_tasks_core.maximum_intensity_projection import (
maximum_intensity_projection,
) # noqa
from fractal_tasks_core.replicate_zarr_structure import (
replicate_zarr_structure,
) # noqa
from fractal_tasks_core.yokogawa_to_zarr import yokogawa_to_zarr


Expand Down Expand Up @@ -99,3 +105,72 @@ def test_workflow_multiplexing(

check_file_number(zarr_path=image_zarr_0)
check_file_number(zarr_path=image_zarr_1)


def test_workflow_multiplexing_MIP(
tmp_path: Path, zenodo_images_multiplex: Sequence[Path]
):

# Init
img_paths = [
cycle_folder / "*.png" for cycle_folder in zenodo_images_multiplex
]
zarr_path = tmp_path / "tmp_out/*.zarr"
zarr_path_mip = tmp_path / "tmp_out_mip/*.zarr"
metadata = {}

# Create zarr structure
debug(img_paths)
metadata_update = create_zarr_structure_multiplex(
input_paths=img_paths,
output_path=zarr_path,
channel_parameters=channel_parameters,
num_levels=num_levels,
coarsening_xy=coarsening_xy,
metadata_table="mrf_mlf",
)
metadata.update(metadata_update)
debug(metadata)

# Yokogawa to zarr
for component in metadata["image"]:
yokogawa_to_zarr(
input_paths=[zarr_path],
output_path=zarr_path,
metadata=metadata,
component=component,
)
debug(metadata)

# Replicate
metadata_update = replicate_zarr_structure(
input_paths=[zarr_path],
output_path=zarr_path_mip,
metadata=metadata,
project_to_2D=True,
suffix="mip",
)
metadata.update(metadata_update)
debug(metadata)

# MIP
for component in metadata["image"]:
maximum_intensity_projection(
input_paths=[zarr_path_mip],
output_path=zarr_path_mip,
metadata=metadata,
component=component,
)

# OME-NGFF JSON validation
image_zarr_0 = Path(zarr_path_mip.parent / metadata["image"][0])
image_zarr_1 = Path(zarr_path_mip.parent / metadata["image"][1])
well_zarr = image_zarr_0.parent
plate_zarr = image_zarr_0.parents[2]
validate_schema(path=str(image_zarr_0), type="image")
validate_schema(path=str(image_zarr_1), type="image")
validate_schema(path=str(well_zarr), type="well")
validate_schema(path=str(plate_zarr), type="plate")

check_file_number(zarr_path=image_zarr_0)
check_file_number(zarr_path=image_zarr_1)

0 comments on commit 0fa3e02

Please sign in to comment.