From 0aaa505c6de9f7d3d59a9bbf565c26425152a8b7 Mon Sep 17 00:00:00 2001 From: Matthias Bernt Date: Mon, 2 Dec 2024 12:00:56 +0100 Subject: [PATCH 1/2] Use collection hids for collection inputs in dataset names --- lib/galaxy/tools/actions/__init__.py | 23 +++++++++++++++++--- lib/galaxy/tools/actions/model_operations.py | 2 +- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/lib/galaxy/tools/actions/__init__.py b/lib/galaxy/tools/actions/__init__.py index 841eea988d49..db4d3f9dd2ba 100644 --- a/lib/galaxy/tools/actions/__init__.py +++ b/lib/galaxy/tools/actions/__init__.py @@ -453,7 +453,7 @@ def execute( ) = self._collect_inputs(tool, trans, incoming, history, current_user_roles, collection_info) assert history # tell type system we've set history and it is no longer optional # Build name for output datasets based on tool name and input names - on_text = self._get_on_text(inp_data) + on_text = self._get_on_text(inp_data, inp_dataset_collections) # format='input" previously would give you a random extension from # the input extensions, now it should just give "input" as the output @@ -883,12 +883,29 @@ def _wrapped_params(self, trans, tool, incoming, input_datasets=None): wrapped_params = WrappedParameters(trans, tool, incoming, input_datasets=input_datasets) return wrapped_params - def _get_on_text(self, inp_data): + def _get_on_text(self, inp_data, inp_dataset_collections): input_names = [] + collection_hda_ids = set() + # output collection id and store included hda ids (to avoid extra inclusion in the list of datasets) + # two for loops because: + # - inp_dataset_collections stores the collections per input parameter in a dict + # - the values contain a list of pairs of DatasetCollectionsAssociations and a bool + for collections in inp_dataset_collections.values(): + for dataset_collection, _ in collections: + if getattr(dataset_collection, "hid", None): + collection_names.append(f"{dataset_collection.hid}") + if ( + getattr(dataset_collection, "collection", None) is None + or getattr(dataset_collection.collection, "elements", None) is None + ): + continue + for element in dataset_collection.collection.elements: + collection_hda_ids.add(element.hda_id) for data in reversed(list(inp_data.values())): + if getattr(data, "id", None) is None or data.id in collection_hda_ids: + continue if getattr(data, "hid", None): input_names.append(f"data {data.hid}") - return on_text_for_names(input_names) def _new_job_for_session(self, trans, tool, history) -> Tuple[model.Job, Optional[model.GalaxySession]]: diff --git a/lib/galaxy/tools/actions/model_operations.py b/lib/galaxy/tools/actions/model_operations.py index 9e78e122fc81..579897e08c64 100644 --- a/lib/galaxy/tools/actions/model_operations.py +++ b/lib/galaxy/tools/actions/model_operations.py @@ -83,7 +83,7 @@ def execute( ) = self._collect_inputs(tool, trans, incoming, history, current_user_roles, collection_info) # Build name for output datasets based on tool name and input names - on_text = self._get_on_text(inp_data) + on_text = self._get_on_text(inp_data, inp_dataset_collections) # wrapped params are used by change_format action and by output.label; only perform this wrapping once, as needed wrapped_params = self._wrapped_params(trans, tool, incoming) From d70fe5c74aab76c2e62e4f99aac78a50d214382d Mon Sep 17 00:00:00 2001 From: Matthias Bernt Date: Mon, 2 Dec 2024 13:16:23 +0100 Subject: [PATCH 2/2] shorten datasets names output "data 1, 2, and 3" instead of "data 1, data 2, and data 3" --- lib/galaxy/tools/actions/__init__.py | 7 ++++--- lib/galaxy/tools/execute.py | 6 +++--- lib/galaxy/tools/execution_helpers.py | 23 ++++++++++++++++------ lib/galaxy_test/api/test_workflows.py | 6 +++--- lib/galaxy_test/selenium/test_tool_form.py | 2 +- test/unit/app/tools/test_actions.py | 14 ++++++------- 6 files changed, 35 insertions(+), 23 deletions(-) diff --git a/lib/galaxy/tools/actions/__init__.py b/lib/galaxy/tools/actions/__init__.py index db4d3f9dd2ba..6ca267931bd2 100644 --- a/lib/galaxy/tools/actions/__init__.py +++ b/lib/galaxy/tools/actions/__init__.py @@ -50,7 +50,7 @@ ) from galaxy.tools.execution_helpers import ( filter_output, - on_text_for_names, + on_text_for_dataset_and_collections, ToolExecutionCache, ) from galaxy.tools.parameters import update_dataset_ids @@ -885,6 +885,7 @@ def _wrapped_params(self, trans, tool, incoming, input_datasets=None): def _get_on_text(self, inp_data, inp_dataset_collections): input_names = [] + collection_names = [] collection_hda_ids = set() # output collection id and store included hda ids (to avoid extra inclusion in the list of datasets) # two for loops because: @@ -905,8 +906,8 @@ def _get_on_text(self, inp_data, inp_dataset_collections): if getattr(data, "id", None) is None or data.id in collection_hda_ids: continue if getattr(data, "hid", None): - input_names.append(f"data {data.hid}") - return on_text_for_names(input_names) + input_names.append(f"{data.hid}") + return on_text_for_dataset_and_collections(dataset_names=input_names, collection_names=collection_names) def _new_job_for_session(self, trans, tool, history) -> Tuple[model.Job, Optional[model.GalaxySession]]: job = trans.app.model.Job() diff --git a/lib/galaxy/tools/execute.py b/lib/galaxy/tools/execute.py index 31369c948052..2d449581f340 100644 --- a/lib/galaxy/tools/execute.py +++ b/lib/galaxy/tools/execute.py @@ -33,7 +33,7 @@ from galaxy.tool_util.parser import ToolOutputCollectionPart from galaxy.tools.execution_helpers import ( filter_output, - on_text_for_names, + on_text_for_dataset_and_collections, ToolExecutionCache, ) from galaxy.tools.parameters.workflow_utils import is_runtime_value @@ -325,8 +325,8 @@ def record_error(self, error): def on_text(self) -> Optional[str]: collection_info = self.collection_info if self._on_text is None and collection_info is not None: - collection_names = ["collection %d" % c.hid for c in collection_info.collections.values()] - self._on_text = on_text_for_names(collection_names) + collection_names = ["%d" % c.hid for c in collection_info.collections.values()] + self._on_text = on_text_for_dataset_and_collections(collection_names=collection_names) return self._on_text diff --git a/lib/galaxy/tools/execution_helpers.py b/lib/galaxy/tools/execution_helpers.py index 76413a5f6370..94d4fd970b30 100644 --- a/lib/galaxy/tools/execution_helpers.py +++ b/lib/galaxy/tools/execution_helpers.py @@ -5,7 +5,10 @@ """ import logging -from typing import Collection +from typing import ( + Collection, + Optional, +) log = logging.getLogger(__name__) @@ -48,7 +51,9 @@ def filter_output(tool, output, incoming): return False -def on_text_for_names(input_names: Collection[str]) -> str: +def on_text_for_names(input_names: Optional[Collection[str]], prefix) -> str: + if input_names is None: + return "" # input_names may contain duplicates... this is because the first value in # multiple input dataset parameters will appear twice once as param_name # and once as param_name1. @@ -62,11 +67,17 @@ def on_text_for_names(input_names: Collection[str]) -> str: if len(input_names) == 0: on_text = "" elif len(input_names) == 1: - on_text = input_names[0] + on_text = prefix + " " + input_names[0] elif len(input_names) == 2: - on_text = "{} and {}".format(*input_names) + on_text = prefix + "s {} and {}".format(*input_names) elif len(input_names) == 3: - on_text = "{}, {}, and {}".format(*input_names) + on_text = prefix + "s {}, {}, and {}".format(*input_names) else: - on_text = "{}, {}, and others".format(*input_names[:2]) + on_text = prefix + "s {}, {}, and others".format(*input_names[:2]) return on_text + + +def on_text_for_dataset_and_collections( + dataset_names: Optional[Collection[str]] = None, collection_names: Optional[Collection[str]] = None +) -> str: + return on_text_for_names(collection_names, "collection") + on_text_for_names(dataset_names, "dataset") diff --git a/lib/galaxy_test/api/test_workflows.py b/lib/galaxy_test/api/test_workflows.py index f18cdd8a760d..ea8170a65b8d 100644 --- a/lib/galaxy_test/api/test_workflows.py +++ b/lib/galaxy_test/api/test_workflows.py @@ -5871,7 +5871,7 @@ def test_run_build_list_hide_collection_output(self): history_id, hid=3, wait=True, assert_ok=True ) assert details1["elements"][0]["object"]["visible"] is False - assert details1["name"] == "data 1 (as list)", details1 + assert details1["name"] == "dataset 1 (as list)", details1 assert details1["visible"] is False @skip_without_tool("__BUILD_LIST__") @@ -5906,7 +5906,7 @@ def test_run_build_list_delete_intermediate_collection_output(self): history_id, hid=3, wait=True, assert_ok=True ) assert details1["elements"][0]["object"]["visible"] is False - assert details1["name"] == "data 1 (as list)", details1 + assert details1["name"] == "dataset 1 (as list)", details1 # FIXME: this doesn't work because the workflow is still being scheduled # TODO: Implement a way to run PJAs that couldn't be run during/after the job # after the workflow has run to completion @@ -5951,7 +5951,7 @@ def test_run_build_list_change_datatype_collection_output(self): details1 = self.dataset_populator.get_history_collection_details( history_id, hid=3, wait=True, assert_ok=True ) - assert details1["name"] == "data 1 (as list)", details1 + assert details1["name"] == "dataset 1 (as list)", details1 assert details1["elements"][0]["object"]["visible"] is False assert details1["elements"][0]["object"]["file_ext"] == "txt" details2 = self.dataset_populator.get_history_collection_details( diff --git a/lib/galaxy_test/selenium/test_tool_form.py b/lib/galaxy_test/selenium/test_tool_form.py index 2d653ef04c4e..85de2d2fc25d 100644 --- a/lib/galaxy_test/selenium/test_tool_form.py +++ b/lib/galaxy_test/selenium/test_tool_form.py @@ -269,7 +269,7 @@ def test_run_data(self): latest_hda = self.latest_history_item() assert latest_hda["hid"] == 3 - assert latest_hda["name"] == "Select first on data 1" + assert latest_hda["name"] == "Select first on dataset 1" @selenium_test def test_bibtex_rendering(self): diff --git a/test/unit/app/tools/test_actions.py b/test/unit/app/tools/test_actions.py index 36056fbc61d9..72f36eea4496 100644 --- a/test/unit/app/tools/test_actions.py +++ b/test/unit/app/tools/test_actions.py @@ -52,15 +52,15 @@ def test_on_text_for_names(): def assert_on_text_is(expected, *names): - on_text = on_text_for_names(names) + on_text = on_text_for_names(names, "dataset") assert on_text == expected, f"Wrong on text value {on_text}, expected {expected}" - assert_on_text_is("data 1", "data 1") - assert_on_text_is("data 1 and data 2", "data 1", "data 2") - assert_on_text_is("data 1, data 2, and data 3", "data 1", "data 2", "data 3") - assert_on_text_is("data 1, data 2, and others", "data 1", "data 2", "data 3", "data 4") + assert_on_text_is("dataset 1", "1") + assert_on_text_is("datasets 1 and 2", "1", "2") + assert_on_text_is("datasets 1, 2, and 3", "1", "2", "3") + assert_on_text_is("datasets 1, 2, and others", "1", "2", "3", "4") - assert_on_text_is("data 1 and data 2", "data 1", "data 1", "data 2") + assert_on_text_is("datasets 1 and 2", "1", "1", "2") class TestDefaultToolAction(TestCase, tools_support.UsesTools): @@ -99,7 +99,7 @@ def test_output_label_data(self): tools_support.SIMPLE_CAT_TOOL_CONTENTS, incoming, ) - assert output["out1"].name == "Test Tool on data 2 and data 1" + assert output["out1"].name == "Test Tool on datasets 2 and 1" def test_object_store_ids(self): _, output = self._simple_execute(contents=TWO_OUTPUTS)