diff --git a/README.md b/README.md index 0fde6023c..bba71faf5 100644 --- a/README.md +++ b/README.md @@ -105,9 +105,9 @@ For more details, see the [extended documentation](https://deeprank2.rtfd.io/). ### Data generation -For each protein-protein complex (or protein structure containing a missense variant), a `DeepRankQuery` can be created and added to the `QueryCollection` object, to be processed later on. Two subtypes of `DeepRankQuery` exist: `ProteinProteinInterfaceQuery` and `SingleResidueVariantQuery`. +For each protein-protein complex (or protein structure containing a missense variant), a `Query` can be created and added to the `QueryCollection` object, to be processed later on. Two subtypes of `Query` exist: `ProteinProteinInterfaceQuery` and `SingleResidueVariantQuery`. -A `DeepRankQuery` takes as inputs: +A `Query` takes as inputs: - a `.pdb` file, representing the protein-protein structure, - the resolution (`"residue"` or `"atom"`), i.e. whether each node should represent an amino acid residue or an atom, - the ids of the chains composing the structure, and diff --git a/deeprank2/dataset.py b/deeprank2/dataset.py index 42aa9ec5c..13c9ea804 100644 --- a/deeprank2/dataset.py +++ b/deeprank2/dataset.py @@ -441,7 +441,7 @@ def __init__( # pylint: disable=too-many-arguments Value will be ignored and inherited from `dataset_train` if `train` is set as False and `dataset_train` is assigned. Defaults to "all". target (Optional[str], optional): Default options are irmsd, lrmsd, fnat, binary, capri_class, and dockq. It can also be - a custom-defined target given to the DeepRankQuery class as input (see: `deeprank2.query`); in this case, + a custom-defined target given to the Query class as input (see: `deeprank2.query`); in this case, the task parameter needs to be explicitly specified as well. Only numerical target variables are supported, not categorical. If the latter is your case, please convert the categorical classes into @@ -680,7 +680,7 @@ def __init__( # noqa: MC0001, pylint: disable=too-many-arguments, too-many-local :class:`Datasets` belong to the "cluster" Group. They are saved in the .HDF5 file to make them available to networks that make use of clustering methods. Defaults to None. target (Optional[str], optional): Default options are irmsd, lrmsd, fnat, binary, capri_class, and dockq. - It can also be a custom-defined target given to the DeepRankQuery class as input (see: `deeprank2.query`); + It can also be a custom-defined target given to the Query class as input (see: `deeprank2.query`); in this case, the task parameter needs to be explicitly specified as well. Only numerical target variables are supported, not categorical. If the latter is your case, please convert the categorical classes into diff --git a/deeprank2/query.py b/deeprank2/query.py index 68f3a7b9e..2c55a7712 100644 --- a/deeprank2/query.py +++ b/deeprank2/query.py @@ -36,10 +36,10 @@ @dataclass(repr=False, kw_only=True) -class DeepRankQuery: +class Query: """Represents one entity of interest: a single residue variant (SRV) or a protein-protein interface (PPI). - :class:`DeepRankQuery` objects are used to generate graphs from structures, and they should be created before any model is loaded. + :class:`Query` objects are used to generate graphs from structures, and they should be created before any model is loaded. They can have target values associated with them, which will be stored with the resulting graph. Args: @@ -215,10 +215,10 @@ def get_query_id(self) -> str: @dataclass(kw_only=True) -class SingleResidueVariantQuery(DeepRankQuery): +class SingleResidueVariantQuery(Query): """A query that builds a single residue variant graph. - Args (common for `DeepRankQuery`): + Args (common for `Query`): pdb_path (str): the path to the PDB file to query. resolution (Literal['residue', 'atom']): sets whether each node is a residue or atom. chain_ids (list[str] | str): the chain identifier of the variant residue (generally a single capital letter). @@ -313,7 +313,7 @@ def _build_helper(self) -> Graph: @dataclass(kw_only=True) -class ProteinProteinInterfaceQuery(DeepRankQuery): +class ProteinProteinInterfaceQuery(Query): """A query that builds a protein-protein interface graph. Args: @@ -403,14 +403,14 @@ def __init__(self): def add( self, - query: DeepRankQuery, + query: Query, verbose: bool = False, warn_duplicate: bool = True, ): """Add a new query to the collection. Args: - query(:class:`DeepRankQuery`): The `DeepRankQuery` to add to the collection. + query(:class:`Query`): The `Query` to add to the collection. verbose(bool): For logging query IDs added. Defaults to `False`. warn_duplicate (bool): Log a warning before renaming if a duplicate query is identified. Defaults to `True`. """ @@ -426,7 +426,7 @@ def add( new_id = query.model_id + "_" + str(self._ids_count[query_id]) query.model_id = new_id if warn_duplicate: - _log.warning(f'DeepRankQuery with ID {query_id} has already been added to the collection. Renaming it as {query.get_query_id()}') + _log.warning(f'Query with ID {query_id} has already been added to the collection. Renaming it as {query.get_query_id()}') self._queries.append(query) @@ -441,20 +441,20 @@ def export_dict(self, dataset_path: str): pickle.dump(self, pkl_file) @property - def queries(self) -> list[DeepRankQuery]: + def queries(self) -> list[Query]: """The list of queries added to the collection.""" return self._queries - def __contains__(self, query: DeepRankQuery) -> bool: + def __contains__(self, query: Query) -> bool: return query in self._queries - def __iter__(self) -> Iterator[DeepRankQuery]: + def __iter__(self) -> Iterator[Query]: return iter(self._queries) def __len__(self) -> int: return len(self._queries) - def _process_one_query(self, query: DeepRankQuery): + def _process_one_query(self, query: Query): """Only one process may access an hdf5 file at a time""" try: @@ -471,7 +471,7 @@ def _process_one_query(self, query: DeepRankQuery): graph.write_as_grid_to_hdf5(output_path, self._grid_settings, self._grid_map_method, augmentation) except (ValueError, AttributeError, KeyError, TimeoutError) as e: - _log.warning(f'\nGraph/DeepRankQuery with ID {query.get_query_id()} ran into an Exception ({e.__class__.__name__}: {e}),' + _log.warning(f'\nGraph/Query with ID {query.get_query_id()} ran into an Exception ({e.__class__.__name__}: {e}),' ' and it has not been written to the hdf5 file. More details below:') _log.exception(e) diff --git a/docs/getstarted.md b/docs/getstarted.md index c5db7ed41..e028d8558 100644 --- a/docs/getstarted.md +++ b/docs/getstarted.md @@ -6,9 +6,9 @@ For more details, see the [extended documentation](https://deeprank2.rtfd.io/). ## Data generation -For each protein-protein complex (or protein structure containing a missense variant), a `DeepRankQuery` can be created and added to the `QueryCollection` object, to be processed later on. Two subtypes of `DeepRankQuery` exist: `ProteinProteinInterfaceQuery` and `SingleResidueVariantQuery`. +For each protein-protein complex (or protein structure containing a missense variant), a `Query` can be created and added to the `QueryCollection` object, to be processed later on. Two subtypes of `Query` exist: `ProteinProteinInterfaceQuery` and `SingleResidueVariantQuery`. -A `DeepRankQuery` takes as inputs: +A `Query` takes as inputs: - a `.pdb` file, representing the protein-protein structure, - the resolution (`"residue"` or `"atom"`), i.e. whether each node should represent an amino acid residue or an atom, - the ids of the chains composing the structure, and diff --git a/tests/test_querycollection.py b/tests/test_querycollection.py index 6b001b6b3..258357cdc 100644 --- a/tests/test_querycollection.py +++ b/tests/test_querycollection.py @@ -12,7 +12,7 @@ from deeprank2.domain import nodestorage as Nfeat from deeprank2.domain.aminoacidlist import alanine, phenylalanine from deeprank2.features import components, contact, surfacearea -from deeprank2.query import (DeepRankQuery, ProteinProteinInterfaceQuery, +from deeprank2.query import (ProteinProteinInterfaceQuery, Query, QueryCollection, SingleResidueVariantQuery) from deeprank2.tools.target import compute_ppi_scores @@ -131,7 +131,7 @@ def test_querycollection_process(): assert isinstance(collection.queries, list) assert len(collection.queries) == n_queries for query in collection.queries: - assert issubclass(type(query), DeepRankQuery) + assert issubclass(type(query), Query) rmtree(output_directory) @@ -224,9 +224,8 @@ def test_querycollection_process_combine_output_false(): def test_querycollection_duplicates_add(): - """ - Tests add method of QueryCollection class. - """ + """Tests add method of QueryCollection class.""" + ref_path = "tests/data/ref/1ATN/1ATN.pdb" pssm_path1 = "tests/data/pssm/1ATN/1ATN.A.pdb.pssm" pssm_path2 = "tests/data/pssm/1ATN/1ATN.B.pdb.pssm" diff --git a/tutorials/data_generation_ppi.ipynb b/tutorials/data_generation_ppi.ipynb index 857fc21c3..40a765eb9 100644 --- a/tutorials/data_generation_ppi.ipynb +++ b/tutorials/data_generation_ppi.ipynb @@ -147,7 +147,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## `QueryCollection` and `DeepRankQuery` objects" + "## `QueryCollection` and `Query` objects" ] }, { diff --git a/tutorials/data_generation_srv.ipynb b/tutorials/data_generation_srv.ipynb index 136984a4c..1fa693011 100644 --- a/tutorials/data_generation_srv.ipynb +++ b/tutorials/data_generation_srv.ipynb @@ -166,7 +166,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## `QueryCollection` and `DeepRankQuery` objects" + "## `QueryCollection` and `Query` objects" ] }, {