From df7f62bc695b9951f27f9a1cdcf3cd398ec0fc1c Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Thu, 24 Nov 2022 12:39:31 +0100 Subject: [PATCH 1/8] BROKEN add test for multiplexing beyond-parsing workflow (ref #199) --- tests/test_workflows_multiplexing.py | 79 ++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/tests/test_workflows_multiplexing.py b/tests/test_workflows_multiplexing.py index 6781141b0..cfe9e9e51 100644 --- a/tests/test_workflows_multiplexing.py +++ b/tests/test_workflows_multiplexing.py @@ -21,8 +21,15 @@ from fractal_tasks_core.create_zarr_structure_multiplex import ( create_zarr_structure_multiplex, ) +from fractal_tasks_core.replicate_zarr_structure import ( + replicate_zarr_structure, +) # noqa from fractal_tasks_core.yokogawa_to_zarr import yokogawa_to_zarr +# from fractal_tasks_core.maximum_intensity_projection import ( +# maximum_intensity_projection, +# ) # noqa + single_cycle_channel_parameters = { "A01_C01": { @@ -99,3 +106,75 @@ def test_workflow_multiplexing( check_file_number(zarr_path=image_zarr_0) check_file_number(zarr_path=image_zarr_1) + + +def test_workflow_multiplexing_MIP( + tmp_path: Path, zenodo_images_multiplex: Sequence[Path] +): + + # Init + img_paths = [ + cycle_folder / "*.png" for cycle_folder in zenodo_images_multiplex + ] + zarr_path = tmp_path / "tmp_out/*.zarr" + zarr_path_mip = tmp_path / "tmp_out_mip/*.zarr" + metadata = {} + + # Create zarr structure + debug(img_paths) + metadata_update = create_zarr_structure_multiplex( + input_paths=img_paths, + output_path=zarr_path, + channel_parameters=channel_parameters, + num_levels=num_levels, + coarsening_xy=coarsening_xy, + metadata_table="mrf_mlf", + ) + metadata.update(metadata_update) + debug(metadata) + + # Yokogawa to zarr + for component in metadata["image"]: + yokogawa_to_zarr( + input_paths=[zarr_path], + output_path=zarr_path, + metadata=metadata, + component=component, + ) + debug(metadata) + + # Replicate + metadata_update = replicate_zarr_structure( + input_paths=[zarr_path], + output_path=zarr_path_mip, + metadata=metadata, + project_to_2D=True, + suffix="mip", + ) + metadata.update(metadata_update) + debug(metadata) + + """ + # MIP + for component in metadata["image"]: + maximum_intensity_projection( + input_paths=[zarr_path_mip], + output_path=zarr_path_mip, + metadata=metadata, + component=component, + ) + + + # OME-NGFF JSON validation + image_zarr_0 = Path(zarr_path.parent / metadata["image"][0]) + image_zarr_1 = Path(zarr_path.parent / metadata["image"][1]) + well_zarr = image_zarr_0.parent + plate_zarr = image_zarr_0.parents[2] + validate_schema(path=str(image_zarr_0), type="image") + validate_schema(path=str(image_zarr_1), type="image") + validate_schema(path=str(well_zarr), type="well") + validate_schema(path=str(plate_zarr), type="plate") + + check_file_number(zarr_path=image_zarr_0) + check_file_number(zarr_path=image_zarr_1) + """ From 27b933238b9ab9406e9b50be10908c1b1cb46978 Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Thu, 24 Nov 2022 12:41:18 +0100 Subject: [PATCH 2/8] Add f to f-string --- fractal_tasks_core/replicate_zarr_structure.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fractal_tasks_core/replicate_zarr_structure.py b/fractal_tasks_core/replicate_zarr_structure.py index c589e7d73..b4e856746 100644 --- a/fractal_tasks_core/replicate_zarr_structure.py +++ b/fractal_tasks_core/replicate_zarr_structure.py @@ -76,7 +76,7 @@ def replicate_zarr_structure( list_plates = [ p.as_posix() for p in in_path.parent.resolve().glob(in_path.name) ] - logger.info("{list_plates=}") + logger.info(f"{list_plates=}") meta_update: Dict[str, Any] = {"replicate_zarr": {}} meta_update["replicate_zarr"]["suffix"] = suffix From 84fe50e7e2fac03430a778a0a0adbd36b687cab5 Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Thu, 24 Nov 2022 15:23:10 +0100 Subject: [PATCH 3/8] Refactor replicate_task_structure using zarr attributes (close #224) --- .../replicate_zarr_structure.py | 168 ++++++++---------- 1 file changed, 77 insertions(+), 91 deletions(-) diff --git a/fractal_tasks_core/replicate_zarr_structure.py b/fractal_tasks_core/replicate_zarr_structure.py index b4e856746..ca1073740 100644 --- a/fractal_tasks_core/replicate_zarr_structure.py +++ b/fractal_tasks_core/replicate_zarr_structure.py @@ -14,9 +14,7 @@ Task that copies the structure of an OME-NGFF zarr array to a new one """ -import json import logging -from glob import glob from pathlib import Path from typing import Any from typing import Dict @@ -45,13 +43,27 @@ def replicate_zarr_structure( output_path: Path, metadata: Dict[str, Any], project_to_2D: bool = True, - suffix: str = None, + suffix: Optional[str] = None, + ROI_table_names: Optional[Sequence[str]] = None, ) -> Dict[str, Any]: """ Duplicate an input zarr structure to a new path. - If project_to_2D=True, adapt it to host a maximum-intensity projection - (that is, with a single Z layer). + + + If ``project_to_2D=True``, adapt the new to host a maximum-intensity + projection (that is, with a single Z layer). + + + More detailed description (TODO) + 1. For each plate zarr, create a new one. + 2. For each well (in each plate), copy its zattrs over (to the new zarr). + 3. For each image (in each well), copy its zattrs over. + 4. Re-create relevant FOV/well ROI tables. + + + + Ref for Attributes https://zarr.readthedocs.io/en/stable/api/attrs.html Examples input_paths[0] = /tmp/out/*.zarr (Path) @@ -62,6 +74,7 @@ def replicate_zarr_structure( :param metadata: TBD :param project_to_2D: TBD :param suffix: TBD + :param ROI_table_names: TBD """ # Preliminary check @@ -71,6 +84,9 @@ def replicate_zarr_structure( # FIXME create a standard suffix (with timestamp) raise NotImplementedError + if ROI_table_names is None: + ROI_table_names = ["FOV_ROI_table", "well_ROI_table"] + # List all plates in_path = input_paths[0] list_plates = [ @@ -95,96 +111,65 @@ def replicate_zarr_structure( logger.info(f"{zarrurl_new=}") logger.info(f"{meta_update=}") - # Identify properties of input zarr file - well_rows_columns = sorted( - [rc.split("/")[-2:] for rc in glob(zarrurl_old + "/*/*")] - ) - row_list = [ - well_row_column[0] for well_row_column in well_rows_columns - ] - col_list = [ - well_row_column[1] for well_row_column in well_rows_columns + # Replicate plate attrs + old_plate_group = zarr.open_group(zarrurl_old, mode="r") + new_plate_group = zarr.open(zarrurl_new) + new_plate_group.attrs.put(old_plate_group.attrs.asdict()) + + well_paths = [ + well["path"] for well in new_plate_group.attrs["plate"]["wells"] ] - row_list = sorted(list(set(row_list))) - col_list = sorted(list(set(col_list))) - - group_plate = zarr.group(zarrurl_new) - plate = zarrurl_old.replace(".zarr", "").split("/")[-1] - logger.info(f"{plate=}") - group_plate.attrs["plate"] = { - "acquisitions": [{"id": 0, "name": plate}], - "columns": [{"name": col} for col in col_list], - "rows": [{"name": row} for row in row_list], - "wells": [ - { - "path": well_row_column[0] + "/" + well_row_column[1], - "rowIndex": row_list.index(well_row_column[0]), - "columnIndex": col_list.index(well_row_column[1]), - } - for well_row_column in well_rows_columns - ], - } - - for row, column in well_rows_columns: - - # Find FOVs in COL/ROW/.zattrs - path_well_zattrs = f"{zarrurl_old}/{row}/{column}/.zattrs" - with open(path_well_zattrs) as well_zattrs_file: - well_zattrs = json.load(well_zattrs_file) - well_images = well_zattrs["well"]["images"] - list_FOVs = sorted([img["path"] for img in well_images]) - - # Create well group - group_well = group_plate.create_group(f"{row}/{column}/") - group_well.attrs["well"] = { - "images": well_images, - "version": __OME_NGFF_VERSION__, - } - - # Check that only the 0-th FOV exists - FOV = 0 - if len(list_FOVs) > 1: - raise Exception( - "ERROR: we are in a single-merged-FOV scheme, " - f"but there are {len(list_FOVs)} FOVs." - ) + logger.info(f"{well_paths=}") + for well_path in well_paths: - # Create FOV group - group_FOV = group_well.create_group(f"{FOV}/") - - # Copy .zattrs file at the COL/ROW/FOV level - path_FOV_zattrs = f"{zarrurl_old}/{row}/{column}/{FOV}/.zattrs" - with open(path_FOV_zattrs) as FOV_zattrs_file: - FOV_zattrs = json.load(FOV_zattrs_file) - for key in FOV_zattrs.keys(): - group_FOV.attrs[key] = FOV_zattrs[key] - - # Read FOV ROI table - FOV_ROI_table = ad.read_zarr( - f"{zarrurl_old}/{row}/{column}/0/tables/FOV_ROI_table" - ) - well_ROI_table = ad.read_zarr( - f"{zarrurl_old}/{row}/{column}/0/tables/well_ROI_table" - ) - - # Convert 3D FOVs to 2D - if project_to_2D: - # Read pixel sizes from zattrs file - pxl_sizes_zyx = extract_zyx_pixel_sizes( - path_FOV_zattrs, level=0 - ) - pxl_size_z = pxl_sizes_zyx[0] - FOV_ROI_table = convert_ROIs_from_3D_to_2D( - FOV_ROI_table, pxl_size_z + # Replicate well attrs + old_well_group = zarr.open_group(f"{zarrurl_old}/{well_path}") + new_well_group = zarr.group(f"{zarrurl_new}/{well_path}") + new_well_group.attrs.put(old_well_group.attrs.asdict()) + + image_paths = [ + image["path"] + for image in new_well_group.attrs["well"]["images"] + ] + logger.info(f"{image_paths=}") + + for image_path in image_paths: + + # Replicate image attrs + old_image_group = zarr.open_group( + f"{zarrurl_old}/{well_path}/{image_path}" ) - well_ROI_table = convert_ROIs_from_3D_to_2D( - well_ROI_table, pxl_size_z + new_image_group = zarr.group( + f"{zarrurl_new}/{well_path}/{image_path}" ) - - # Create table group and write new table - group_tables = group_FOV.create_group("tables/") - write_elem(group_tables, "FOV_ROI_table", FOV_ROI_table) - write_elem(group_tables, "well_ROI_table", well_ROI_table) + new_image_group.attrs.put(old_image_group.attrs.asdict()) + + # Extract pixel sizes, if needed + if ROI_table_names: + + group_tables = new_image_group.create_group("tables/") + if project_to_2D: + path_FOV_zattrs = ( + f"{zarrurl_old}/{well_path}/{image_path}/.zattrs" + ) + pxl_sizes_zyx = extract_zyx_pixel_sizes( + path_FOV_zattrs, level=0 + ) + pxl_size_z = pxl_sizes_zyx[0] + + # Copy the tables in ROI_table_names + for ROI_table_name in ROI_table_names: + ROI_table = ad.read_zarr( + f"{zarrurl_old}/{well_path}/{image_path}/" + f"tables/{ROI_table_name}" + ) + # Convert 3D FOVs to 2D + if project_to_2D: + ROI_table = convert_ROIs_from_3D_to_2D( + ROI_table, pxl_size_z + ) + # Write new table + write_elem(group_tables, ROI_table_name, ROI_table) for key in ["plate", "well", "image"]: meta_update[key] = [ @@ -205,6 +190,7 @@ class TaskArguments(BaseModel): metadata: Dict[str, Any] project_to_2D: bool = True suffix: Optional[str] = None + ROI_table_names: Optional[Sequence[str]] = None run_fractal_task( task_function=replicate_zarr_structure, From 22fa2e112dd14007d744016c6be3895312e8b217 Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Thu, 24 Nov 2022 15:41:26 +0100 Subject: [PATCH 4/8] Improve docstring --- fractal_tasks_core/replicate_zarr_structure.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fractal_tasks_core/replicate_zarr_structure.py b/fractal_tasks_core/replicate_zarr_structure.py index ca1073740..754724052 100644 --- a/fractal_tasks_core/replicate_zarr_structure.py +++ b/fractal_tasks_core/replicate_zarr_structure.py @@ -61,8 +61,6 @@ def replicate_zarr_structure( 3. For each image (in each well), copy its zattrs over. 4. Re-create relevant FOV/well ROI tables. - - Ref for Attributes https://zarr.readthedocs.io/en/stable/api/attrs.html Examples @@ -74,8 +72,9 @@ def replicate_zarr_structure( :param metadata: TBD :param project_to_2D: TBD :param suffix: TBD - :param ROI_table_names: TBD - """ + :param ROI_table_names: List of ROI-table names to be copied. Note: this + may fail for non-ROI tables, if + ``project_to_2D=True``.""" # Preliminary check if len(input_paths) > 1: From 612412fa584ecd71533f4732198f32fea9a7097c Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Thu, 24 Nov 2022 15:44:11 +0100 Subject: [PATCH 5/8] Clean up test_workflow_multiplexing_MIP --- tests/test_workflows_multiplexing.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/tests/test_workflows_multiplexing.py b/tests/test_workflows_multiplexing.py index cfe9e9e51..55d80f06a 100644 --- a/tests/test_workflows_multiplexing.py +++ b/tests/test_workflows_multiplexing.py @@ -21,15 +21,14 @@ from fractal_tasks_core.create_zarr_structure_multiplex import ( create_zarr_structure_multiplex, ) +from fractal_tasks_core.maximum_intensity_projection import ( + maximum_intensity_projection, +) # noqa from fractal_tasks_core.replicate_zarr_structure import ( replicate_zarr_structure, ) # noqa from fractal_tasks_core.yokogawa_to_zarr import yokogawa_to_zarr -# from fractal_tasks_core.maximum_intensity_projection import ( -# maximum_intensity_projection, -# ) # noqa - single_cycle_channel_parameters = { "A01_C01": { @@ -154,7 +153,6 @@ def test_workflow_multiplexing_MIP( metadata.update(metadata_update) debug(metadata) - """ # MIP for component in metadata["image"]: maximum_intensity_projection( @@ -164,10 +162,9 @@ def test_workflow_multiplexing_MIP( component=component, ) - # OME-NGFF JSON validation - image_zarr_0 = Path(zarr_path.parent / metadata["image"][0]) - image_zarr_1 = Path(zarr_path.parent / metadata["image"][1]) + image_zarr_0 = Path(zarr_path_mip.parent / metadata["image"][0]) + image_zarr_1 = Path(zarr_path_mip.parent / metadata["image"][1]) well_zarr = image_zarr_0.parent plate_zarr = image_zarr_0.parents[2] validate_schema(path=str(image_zarr_0), type="image") @@ -177,4 +174,3 @@ def test_workflow_multiplexing_MIP( check_file_number(zarr_path=image_zarr_0) check_file_number(zarr_path=image_zarr_1) - """ From 9ea2974fab8f38a0d97f2768372de6e322a6503f Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Thu, 24 Nov 2022 15:45:19 +0100 Subject: [PATCH 6/8] Update changelog --- docs/source/changelog.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst index e2ec63544..1e5ee798b 100644 --- a/docs/source/changelog.rst +++ b/docs/source/changelog.rst @@ -13,6 +13,7 @@ Other changes ~~~~~~~~~~~~~ * Skip image files if filename is not parsable (#219). * Preserve order of ``input_paths`` for multiplexing subfolders (#222). +* Major refactor of ``replicate_zarr_structure``, to support multiplexing zarr files (#223). 0.4.5 From dd162b7b404055f8ce5353581a4397d1d01b81d4 Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Thu, 24 Nov 2022 16:26:33 +0100 Subject: [PATCH 7/8] Fix detail in changelog --- docs/source/changelog.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst index 1e5ee798b..19e03e422 100644 --- a/docs/source/changelog.rst +++ b/docs/source/changelog.rst @@ -13,7 +13,7 @@ Other changes ~~~~~~~~~~~~~ * Skip image files if filename is not parsable (#219). * Preserve order of ``input_paths`` for multiplexing subfolders (#222). -* Major refactor of ``replicate_zarr_structure``, to support multiplexing zarr files (#223). +* Major refactor of ``replicate_zarr_structure``, also enabling support for zarr files with multiple images (#223). 0.4.5 From 368c8f2d57582f49a1e81246bb3aa7520f3c78e0 Mon Sep 17 00:00:00 2001 From: Tommaso Comparin <3862206+tcompa@users.noreply.github.com> Date: Thu, 24 Nov 2022 16:29:19 +0100 Subject: [PATCH 8/8] Improve docstring --- .../replicate_zarr_structure.py | 43 +++++++++++-------- 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/fractal_tasks_core/replicate_zarr_structure.py b/fractal_tasks_core/replicate_zarr_structure.py index 754724052..6b1922564 100644 --- a/fractal_tasks_core/replicate_zarr_structure.py +++ b/fractal_tasks_core/replicate_zarr_structure.py @@ -50,31 +50,38 @@ def replicate_zarr_structure( """ Duplicate an input zarr structure to a new path. + More detailed description: - If ``project_to_2D=True``, adapt the new to host a maximum-intensity - projection (that is, with a single Z layer). + 1. For each plate, create a new zarr group with the same attributes as the + original one. + 2. For each well (in each plate), create a new zarr subgroup with the same + attributes as the original one. + 3. For each image (in each well), create a new zarr subgroup with the same + attributes as the original one. + 4. For each image (in each well), copy the relevant AnnData tables from the + original source. + Note: this task makes use of methods from the ``Attributes`` class, see + https://zarr.readthedocs.io/en/stable/api/attrs.html. - More detailed description (TODO) - 1. For each plate zarr, create a new one. - 2. For each well (in each plate), copy its zattrs over (to the new zarr). - 3. For each image (in each well), copy its zattrs over. - 4. Re-create relevant FOV/well ROI tables. - - Ref for Attributes https://zarr.readthedocs.io/en/stable/api/attrs.html - - Examples - input_paths[0] = /tmp/out/*.zarr (Path) - output_path = /tmp/out_mip/*.zarr (Path) + Examples of some inputs:: + input_paths = ["/tmp/out/*.zarr"] + output_path = "/tmp/out_mip/*.zarr" :param input_paths: TBD :param output_path: TBD :param metadata: TBD - :param project_to_2D: TBD - :param suffix: TBD - :param ROI_table_names: List of ROI-table names to be copied. Note: this - may fail for non-ROI tables, if - ``project_to_2D=True``.""" + :param project_to_2D: If ``True``, apply a 3D->2D projection to the ROI + tables that are copied to the new zarr. + :param suffix: The suffix that is used to transform ``plate.zarr`` into + ``plate_suffix.zarr``. Note that `None` is not currently + supported. + + :param ROI_table_names: List of ROI-table names to be copied. If ``None``, + it is replaced by ``["FOV_ROI_table", + "well_ROI_table"]``. Note: copying non-ROI tables + may fail if ``project_to_2D=True``. + """ # Preliminary check if len(input_paths) > 1: