galaxyproject · bernt-matthias · Dec 2, 2024 · Dec 2, 2024
diff --git a/lib/galaxy/tools/actions/__init__.py b/lib/galaxy/tools/actions/__init__.py
@@ -50,7 +50,7 @@
 )
 from galaxy.tools.execution_helpers import (
     filter_output,
-    on_text_for_names,
+    on_text_for_dataset_and_collections,
     ToolExecutionCache,
 )
 from galaxy.tools.parameters import update_dataset_ids
@@ -453,7 +453,7 @@ def execute(
         ) = self._collect_inputs(tool, trans, incoming, history, current_user_roles, collection_info)
         assert history  # tell type system we've set history and it is no longer optional
         # Build name for output datasets based on tool name and input names
-        on_text = self._get_on_text(inp_data)
+        on_text = self._get_on_text(inp_data, inp_dataset_collections)
 
         # format='input" previously would give you a random extension from
         # the input extensions, now it should just give "input" as the output
@@ -883,13 +883,31 @@ def _wrapped_params(self, trans, tool, incoming, input_datasets=None):
         wrapped_params = WrappedParameters(trans, tool, incoming, input_datasets=input_datasets)
         return wrapped_params
 
-    def _get_on_text(self, inp_data):
+    def _get_on_text(self, inp_data, inp_dataset_collections):
         input_names = []
+        collection_names = []
+        collection_hda_ids = set()
+        # output collection id and store included hda ids (to avoid extra inclusion in the list of datasets)
+        # two for loops because:
+        # - inp_dataset_collections stores the collections per input parameter in a dict
+        # - the values contain a list of pairs of DatasetCollectionsAssociations and a bool
+        for collections in inp_dataset_collections.values():
+            for dataset_collection, _ in collections:
+                if getattr(dataset_collection, "hid", None):
+                    collection_names.append(f"{dataset_collection.hid}")
+                if (
+                    getattr(dataset_collection, "collection", None) is None
+                    or getattr(dataset_collection.collection, "elements", None) is None
+                ):
+                    continue
+                for element in dataset_collection.collection.elements:
+                    collection_hda_ids.add(element.hda_id)
         for data in reversed(list(inp_data.values())):
+            if getattr(data, "id", None) is None or data.id in collection_hda_ids:
+                continue
             if getattr(data, "hid", None):
-                input_names.append(f"data {data.hid}")
-
-        return on_text_for_names(input_names)
+                input_names.append(f"{data.hid}")
+        return on_text_for_dataset_and_collections(dataset_names=input_names, collection_names=collection_names)
 
     def _new_job_for_session(self, trans, tool, history) -> Tuple[model.Job, Optional[model.GalaxySession]]:
         job = trans.app.model.Job()

diff --git a/lib/galaxy/tools/actions/model_operations.py b/lib/galaxy/tools/actions/model_operations.py
@@ -83,7 +83,7 @@ def execute(
         ) = self._collect_inputs(tool, trans, incoming, history, current_user_roles, collection_info)
 
         # Build name for output datasets based on tool name and input names
-        on_text = self._get_on_text(inp_data)
+        on_text = self._get_on_text(inp_data, inp_dataset_collections)
 
         # wrapped params are used by change_format action and by output.label; only perform this wrapping once, as needed
         wrapped_params = self._wrapped_params(trans, tool, incoming)

diff --git a/lib/galaxy/tools/execute.py b/lib/galaxy/tools/execute.py
@@ -33,7 +33,7 @@
 from galaxy.tool_util.parser import ToolOutputCollectionPart
 from galaxy.tools.execution_helpers import (
     filter_output,
-    on_text_for_names,
+    on_text_for_dataset_and_collections,
     ToolExecutionCache,
 )
 from galaxy.tools.parameters.workflow_utils import is_runtime_value
@@ -325,8 +325,8 @@ def record_error(self, error):
     def on_text(self) -> Optional[str]:
         collection_info = self.collection_info
         if self._on_text is None and collection_info is not None:
-            collection_names = ["collection %d" % c.hid for c in collection_info.collections.values()]
-            self._on_text = on_text_for_names(collection_names)
+            collection_names = ["%d" % c.hid for c in collection_info.collections.values()]
+            self._on_text = on_text_for_dataset_and_collections(collection_names=collection_names)
 
         return self._on_text
 

diff --git a/lib/galaxy/tools/execution_helpers.py b/lib/galaxy/tools/execution_helpers.py
@@ -5,7 +5,10 @@
 """
 
 import logging
-from typing import Collection
+from typing import (
+    Collection,
+    Optional,
+)
 
 log = logging.getLogger(__name__)
 
@@ -48,7 +51,9 @@ def filter_output(tool, output, incoming):
     return False
 
 
-def on_text_for_names(input_names: Collection[str]) -> str:
+def on_text_for_names(input_names: Optional[Collection[str]], prefix) -> str:
+    if input_names is None:
+        return ""
     # input_names may contain duplicates... this is because the first value in
     # multiple input dataset parameters will appear twice once as param_name
     # and once as param_name1.
@@ -62,11 +67,17 @@ def on_text_for_names(input_names: Collection[str]) -> str:
     if len(input_names) == 0:
         on_text = ""
     elif len(input_names) == 1:
-        on_text = input_names[0]
+        on_text = prefix + " " + input_names[0]
     elif len(input_names) == 2:
-        on_text = "{} and {}".format(*input_names)
+        on_text = prefix + "s {} and {}".format(*input_names)
     elif len(input_names) == 3:
-        on_text = "{}, {}, and {}".format(*input_names)
+        on_text = prefix + "s {}, {}, and {}".format(*input_names)
     else:
-        on_text = "{}, {}, and others".format(*input_names[:2])
+        on_text = prefix + "s {}, {}, and others".format(*input_names[:2])
     return on_text
+
+
+def on_text_for_dataset_and_collections(
+    dataset_names: Optional[Collection[str]] = None, collection_names: Optional[Collection[str]] = None
+) -> str:
+    return on_text_for_names(collection_names, "collection") + on_text_for_names(dataset_names, "dataset")
diff --git a/lib/galaxy_test/api/test_workflows.py b/lib/galaxy_test/api/test_workflows.py
@@ -5871,7 +5871,7 @@ def test_run_build_list_hide_collection_output(self):
                 history_id, hid=3, wait=True, assert_ok=True
             )
             assert details1["elements"][0]["object"]["visible"] is False
-            assert details1["name"] == "data 1 (as list)", details1
+            assert details1["name"] == "dataset 1 (as list)", details1
             assert details1["visible"] is False
 
     @skip_without_tool("__BUILD_LIST__")
@@ -5906,7 +5906,7 @@ def test_run_build_list_delete_intermediate_collection_output(self):
                 history_id, hid=3, wait=True, assert_ok=True
             )
             assert details1["elements"][0]["object"]["visible"] is False
-            assert details1["name"] == "data 1 (as list)", details1
+            assert details1["name"] == "dataset 1 (as list)", details1
             # FIXME: this doesn't work because the workflow is still being scheduled
             # TODO: Implement a way to run PJAs that couldn't be run during/after the job
             # after the workflow has run to completion
@@ -5951,7 +5951,7 @@ def test_run_build_list_change_datatype_collection_output(self):
             details1 = self.dataset_populator.get_history_collection_details(
                 history_id, hid=3, wait=True, assert_ok=True
             )
-            assert details1["name"] == "data 1 (as list)", details1
+            assert details1["name"] == "dataset 1 (as list)", details1
             assert details1["elements"][0]["object"]["visible"] is False
             assert details1["elements"][0]["object"]["file_ext"] == "txt"
             details2 = self.dataset_populator.get_history_collection_details(

diff --git a/lib/galaxy_test/selenium/test_tool_form.py b/lib/galaxy_test/selenium/test_tool_form.py
@@ -269,7 +269,7 @@ def test_run_data(self):
 
         latest_hda = self.latest_history_item()
         assert latest_hda["hid"] == 3
-        assert latest_hda["name"] == "Select first on data 1"
+        assert latest_hda["name"] == "Select first on dataset 1"
 
     @selenium_test
     def test_bibtex_rendering(self):

diff --git a/test/unit/app/tools/test_actions.py b/test/unit/app/tools/test_actions.py
@@ -52,15 +52,15 @@
 
 def test_on_text_for_names():
     def assert_on_text_is(expected, *names):
-        on_text = on_text_for_names(names)
+        on_text = on_text_for_names(names, "dataset")
         assert on_text == expected, f"Wrong on text value {on_text}, expected {expected}"
 
-    assert_on_text_is("data 1", "data 1")
-    assert_on_text_is("data 1 and data 2", "data 1", "data 2")
-    assert_on_text_is("data 1, data 2, and data 3", "data 1", "data 2", "data 3")
-    assert_on_text_is("data 1, data 2, and others", "data 1", "data 2", "data 3", "data 4")
+    assert_on_text_is("dataset 1", "1")
+    assert_on_text_is("datasets 1 and 2", "1", "2")
+    assert_on_text_is("datasets 1, 2, and 3", "1", "2", "3")
+    assert_on_text_is("datasets 1, 2, and others", "1", "2", "3", "4")
 
-    assert_on_text_is("data 1 and data 2", "data 1", "data 1", "data 2")
+    assert_on_text_is("datasets 1 and 2", "1", "1", "2")
 
 
 class TestDefaultToolAction(TestCase, tools_support.UsesTools):
@@ -99,7 +99,7 @@ def test_output_label_data(self):
             tools_support.SIMPLE_CAT_TOOL_CONTENTS,
             incoming,
         )
-        assert output["out1"].name == "Test Tool on data 2 and data 1"
+        assert output["out1"].name == "Test Tool on datasets 2 and 1"
 
     def test_object_store_ids(self):
         _, output = self._simple_execute(contents=TWO_OUTPUTS)