Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use collection hids for collection inputs in dataset names #19229

Open
wants to merge 2 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 24 additions & 6 deletions lib/galaxy/tools/actions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
)
from galaxy.tools.execution_helpers import (
filter_output,
on_text_for_names,
on_text_for_dataset_and_collections,
ToolExecutionCache,
)
from galaxy.tools.parameters import update_dataset_ids
Expand Down Expand Up @@ -453,7 +453,7 @@ def execute(
) = self._collect_inputs(tool, trans, incoming, history, current_user_roles, collection_info)
assert history # tell type system we've set history and it is no longer optional
# Build name for output datasets based on tool name and input names
on_text = self._get_on_text(inp_data)
on_text = self._get_on_text(inp_data, inp_dataset_collections)

# format='input" previously would give you a random extension from
# the input extensions, now it should just give "input" as the output
Expand Down Expand Up @@ -883,13 +883,31 @@ def _wrapped_params(self, trans, tool, incoming, input_datasets=None):
wrapped_params = WrappedParameters(trans, tool, incoming, input_datasets=input_datasets)
return wrapped_params

def _get_on_text(self, inp_data):
def _get_on_text(self, inp_data, inp_dataset_collections):
input_names = []
collection_names = []
collection_hda_ids = set()
# output collection id and store included hda ids (to avoid extra inclusion in the list of datasets)
# two for loops because:
# - inp_dataset_collections stores the collections per input parameter in a dict
# - the values contain a list of pairs of DatasetCollectionsAssociations and a bool
for collections in inp_dataset_collections.values():
for dataset_collection, _ in collections:
if getattr(dataset_collection, "hid", None):
collection_names.append(f"{dataset_collection.hid}")
if (
getattr(dataset_collection, "collection", None) is None
or getattr(dataset_collection.collection, "elements", None) is None
):
continue
for element in dataset_collection.collection.elements:
collection_hda_ids.add(element.hda_id)
for data in reversed(list(inp_data.values())):
if getattr(data, "id", None) is None or data.id in collection_hda_ids:
continue
if getattr(data, "hid", None):
input_names.append(f"data {data.hid}")

return on_text_for_names(input_names)
input_names.append(f"{data.hid}")
return on_text_for_dataset_and_collections(dataset_names=input_names, collection_names=collection_names)

def _new_job_for_session(self, trans, tool, history) -> Tuple[model.Job, Optional[model.GalaxySession]]:
job = trans.app.model.Job()
Expand Down
2 changes: 1 addition & 1 deletion lib/galaxy/tools/actions/model_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def execute(
) = self._collect_inputs(tool, trans, incoming, history, current_user_roles, collection_info)

# Build name for output datasets based on tool name and input names
on_text = self._get_on_text(inp_data)
on_text = self._get_on_text(inp_data, inp_dataset_collections)

# wrapped params are used by change_format action and by output.label; only perform this wrapping once, as needed
wrapped_params = self._wrapped_params(trans, tool, incoming)
Expand Down
6 changes: 3 additions & 3 deletions lib/galaxy/tools/execute.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
from galaxy.tool_util.parser import ToolOutputCollectionPart
from galaxy.tools.execution_helpers import (
filter_output,
on_text_for_names,
on_text_for_dataset_and_collections,
ToolExecutionCache,
)
from galaxy.tools.parameters.workflow_utils import is_runtime_value
Expand Down Expand Up @@ -325,8 +325,8 @@ def record_error(self, error):
def on_text(self) -> Optional[str]:
collection_info = self.collection_info
if self._on_text is None and collection_info is not None:
collection_names = ["collection %d" % c.hid for c in collection_info.collections.values()]
self._on_text = on_text_for_names(collection_names)
collection_names = ["%d" % c.hid for c in collection_info.collections.values()]
self._on_text = on_text_for_dataset_and_collections(collection_names=collection_names)

return self._on_text

Expand Down
23 changes: 17 additions & 6 deletions lib/galaxy/tools/execution_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@
"""

import logging
from typing import Collection
from typing import (
Collection,
Optional,
)

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -48,7 +51,9 @@ def filter_output(tool, output, incoming):
return False


def on_text_for_names(input_names: Collection[str]) -> str:
def on_text_for_names(input_names: Optional[Collection[str]], prefix) -> str:
if input_names is None:
return ""
# input_names may contain duplicates... this is because the first value in
# multiple input dataset parameters will appear twice once as param_name
# and once as param_name1.
Expand All @@ -62,11 +67,17 @@ def on_text_for_names(input_names: Collection[str]) -> str:
if len(input_names) == 0:
on_text = ""
elif len(input_names) == 1:
on_text = input_names[0]
on_text = prefix + " " + input_names[0]
elif len(input_names) == 2:
on_text = "{} and {}".format(*input_names)
on_text = prefix + "s {} and {}".format(*input_names)
elif len(input_names) == 3:
on_text = "{}, {}, and {}".format(*input_names)
on_text = prefix + "s {}, {}, and {}".format(*input_names)
else:
on_text = "{}, {}, and others".format(*input_names[:2])
on_text = prefix + "s {}, {}, and others".format(*input_names[:2])
return on_text


def on_text_for_dataset_and_collections(
dataset_names: Optional[Collection[str]] = None, collection_names: Optional[Collection[str]] = None
) -> str:
return on_text_for_names(collection_names, "collection") + on_text_for_names(dataset_names, "dataset")
6 changes: 3 additions & 3 deletions lib/galaxy_test/api/test_workflows.py
Original file line number Diff line number Diff line change
Expand Up @@ -5871,7 +5871,7 @@ def test_run_build_list_hide_collection_output(self):
history_id, hid=3, wait=True, assert_ok=True
)
assert details1["elements"][0]["object"]["visible"] is False
assert details1["name"] == "data 1 (as list)", details1
assert details1["name"] == "dataset 1 (as list)", details1
assert details1["visible"] is False

@skip_without_tool("__BUILD_LIST__")
Expand Down Expand Up @@ -5906,7 +5906,7 @@ def test_run_build_list_delete_intermediate_collection_output(self):
history_id, hid=3, wait=True, assert_ok=True
)
assert details1["elements"][0]["object"]["visible"] is False
assert details1["name"] == "data 1 (as list)", details1
assert details1["name"] == "dataset 1 (as list)", details1
# FIXME: this doesn't work because the workflow is still being scheduled
# TODO: Implement a way to run PJAs that couldn't be run during/after the job
# after the workflow has run to completion
Expand Down Expand Up @@ -5951,7 +5951,7 @@ def test_run_build_list_change_datatype_collection_output(self):
details1 = self.dataset_populator.get_history_collection_details(
history_id, hid=3, wait=True, assert_ok=True
)
assert details1["name"] == "data 1 (as list)", details1
assert details1["name"] == "dataset 1 (as list)", details1
assert details1["elements"][0]["object"]["visible"] is False
assert details1["elements"][0]["object"]["file_ext"] == "txt"
details2 = self.dataset_populator.get_history_collection_details(
Expand Down
2 changes: 1 addition & 1 deletion lib/galaxy_test/selenium/test_tool_form.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ def test_run_data(self):

latest_hda = self.latest_history_item()
assert latest_hda["hid"] == 3
assert latest_hda["name"] == "Select first on data 1"
assert latest_hda["name"] == "Select first on dataset 1"

@selenium_test
def test_bibtex_rendering(self):
Expand Down
14 changes: 7 additions & 7 deletions test/unit/app/tools/test_actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,15 +52,15 @@

def test_on_text_for_names():
def assert_on_text_is(expected, *names):
on_text = on_text_for_names(names)
on_text = on_text_for_names(names, "dataset")
assert on_text == expected, f"Wrong on text value {on_text}, expected {expected}"

assert_on_text_is("data 1", "data 1")
assert_on_text_is("data 1 and data 2", "data 1", "data 2")
assert_on_text_is("data 1, data 2, and data 3", "data 1", "data 2", "data 3")
assert_on_text_is("data 1, data 2, and others", "data 1", "data 2", "data 3", "data 4")
assert_on_text_is("dataset 1", "1")
assert_on_text_is("datasets 1 and 2", "1", "2")
assert_on_text_is("datasets 1, 2, and 3", "1", "2", "3")
assert_on_text_is("datasets 1, 2, and others", "1", "2", "3", "4")

assert_on_text_is("data 1 and data 2", "data 1", "data 1", "data 2")
assert_on_text_is("datasets 1 and 2", "1", "1", "2")


class TestDefaultToolAction(TestCase, tools_support.UsesTools):
Expand Down Expand Up @@ -99,7 +99,7 @@ def test_output_label_data(self):
tools_support.SIMPLE_CAT_TOOL_CONTENTS,
incoming,
)
assert output["out1"].name == "Test Tool on data 2 and data 1"
assert output["out1"].name == "Test Tool on datasets 2 and 1"

def test_object_store_ids(self):
_, output = self._simple_execute(contents=TWO_OUTPUTS)
Expand Down
Loading