From 6565c1e9b4c09384b9333d26ef84c6ebb50f0468 Mon Sep 17 00:00:00 2001
From: gcroci2 <crocioni.giulia@gmail.com>
Date: Tue, 19 Sep 2023 10:37:18 +0200
Subject: [PATCH 1/7] fix pyproject.toml to find tests folder and deeprank2
 submodules

---
 pyproject.toml | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index f6dd8d8f1..5b6d2f1b5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -80,5 +80,6 @@ Changelog = "https://github.com/DeepRank/deeprank2/blob/main/CHANGELOG.md"
 branch = true
 source = ["deeprank2"]
 
-[tool.setuptools]
-packages = ["deeprank2"]
+[tool.setuptools.packages.find]
+include = ["deeprank2*"]
+exclude = ["tests*"]

From 2d436b5fe179e367c21678ce6ef3a38a9aab655e Mon Sep 17 00:00:00 2001
From: gcroci2 <crocioni.giulia@gmail.com>
Date: Tue, 19 Sep 2023 10:38:04 +0200
Subject: [PATCH 2/7] add docs and change compute_targets to compute_ppi_scores

---
 deeprank2/tools/target.py | 42 ++++++++++++++++++++++++++++-----------
 1 file changed, 30 insertions(+), 12 deletions(-)

diff --git a/deeprank2/tools/target.py b/deeprank2/tools/target.py
index e8ace344c..ff7940ace 100644
--- a/deeprank2/tools/target.py
+++ b/deeprank2/tools/target.py
@@ -83,22 +83,40 @@ def add_target(graph_path: Union[str, List[str]], target_name: str, target_list:
             print(f"no graph for {hdf5}")
 
 
-def compute_targets(pdb_path: str, reference_pdb_path: str) -> Dict[str, Union[float, int]]:
+def compute_ppi_scores(pdb_path: str, reference_pdb_path: str) -> Dict[str, Union[float, int]]:
 
     """
-    Compute targets and outputs them as a dictionary.
-    For classification:
-       - binary (scalar value is expected to be either 0 or 1)
-       - capri_classes (scalar integer values are expected)
+    Compute structure similarity scores and return them as a dictionary.
+    Such measures have been developed for evaluating the quality of the PPI models produced by
+    computational methods (docking models), and all of them compare the structural similarity
+    between the decoys (computationally generated structures) and the experimentally solved native
+    structures. To calculate these measures, the interface between the two interacting protein molecules
+    is defined as any pair of heavy atoms from the two molecules within 5Å of each other.
     For regression:
-       - irmsd
-       - lrmsd
-       - fnat
-       - dockq
+       - ligand root mean square deviation (lrmsd), float. It is calculated for the backbone of
+       the shorter chain (ligand) of the model after superposition of the longer chain (receptor).
+       The lower the better.
+       - interface rmsd (irmsd), float. The backbone atoms of the interface residues (atomic contact cutoff
+       of 10Å) is superposed on their equivalents in the predicted complex (model) to compute it.
+       The lower the better.
+       - fraction of native contacts (fnat), float. The fraction of native interfacial contacts preserved in
+       the interface of the predicted complex. The score is in the range [0, 1], corresponding to low and
+       high quality, respectively.
+       - dockq, float. It is a continuous quality measure for docking models that instead of classifying into different
+       quality groups, combines Fnat, LRMS, and iRMS to yield a score in the range [0, 1], corresponding to low and
+       high quality, respectively.
+    For classification:
+       - binary (bool). True if the irmsd is lower than 4.0, meaning that the decoy is considered high quality
+       docking model, otherwise False.
+       - capri_classes (int). The possible values are: 4 (incorrect), 3 (acceptable), 2 (medium), 1 (high quality).
+    See https://onlinelibrary.wiley.com/doi/abs/10.1002/prot.10393
+    for more details about capri_classes, lrmsd, irmsd, and fnat.
+    See https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0161879
+    for more details about dockq.
 
     Args:
-        pdb_path (str): Path to the scored pdb structure.
-        reference_pdb_path (str): Path to the reference structure required to compute the different target.
+        pdb_path (str): Path to the decoy.
+        reference_pdb_path (str): Path to the reference (native) structure.
 
     Returns: a dictionary containing values for lrmsd, irmsd, fnat, dockq, binary, capri_class
     """
@@ -128,7 +146,7 @@ def compute_targets(pdb_path: str, reference_pdb_path: str) -> Dict[str, Union[f
     )
     scores[targets.BINARY] = scores[targets.IRMSD] < 4.0
 
-    scores[targets.CAPRI] = 5
+    scores[targets.CAPRI] = 4
     for thr, val in zip([6.0, 4.0, 2.0, 1.0], [4, 3, 2, 1]):
         if scores[targets.IRMSD] < thr:
             scores[targets.CAPRI] = val

From 9d74cf500671ec5178949593184d6eac3cd412d9 Mon Sep 17 00:00:00 2001
From: gcroci2 <crocioni.giulia@gmail.com>
Date: Tue, 19 Sep 2023 10:38:35 +0200
Subject: [PATCH 3/7] add tests for ppi scores

---
 tests/tools/test_target.py | 40 +++++++++++++++++++++++++++++++++++---
 1 file changed, 37 insertions(+), 3 deletions(-)

diff --git a/tests/tools/test_target.py b/tests/tools/test_target.py
index 051cdfc33..8c51937d4 100644
--- a/tests/tools/test_target.py
+++ b/tests/tools/test_target.py
@@ -3,7 +3,9 @@
 import tempfile
 import unittest
 
-from deeprank2.tools.target import add_target, compute_targets
+from pdb2sql import StructureSimilarity
+
+from deeprank2.tools.target import add_target, compute_ppi_scores
 
 
 class TestTools(unittest.TestCase):
@@ -33,10 +35,42 @@ def test_add_target(self):
             os.remove(graph_path)
 
 
-    def test_compute_targets(self):
-        compute_targets("tests/data/pdb/1ATN/1ATN_1w.pdb", "tests/data/ref/1ATN/1ATN.pdb")
+    def test_compute_ppi_scores(self):
+        scores = compute_ppi_scores(
+            os.path.join(self.pdb_path, "1ATN_1w.pdb"),
+            os.path.join(self.ref, "1ATN.pdb"))
+
+        sim = StructureSimilarity(
+            os.path.join(self.pdb_path, "1ATN_1w.pdb"),
+            os.path.join(self.ref, "1ATN.pdb"), enforce_residue_matching=False)
+        lrmsd = sim.compute_lrmsd_fast(method="svd")
+        irmsd = sim.compute_irmsd_fast(method="svd")
+        fnat = sim.compute_fnat_fast()
+        dockq = sim.compute_DockQScore(fnat, lrmsd, irmsd)
+        binary = irmsd < 4.0
+        capri = 4
+        for thr, val in zip([6.0, 4.0, 2.0, 1.0], [4, 3, 2, 1]):
+            if irmsd < thr:
+                capri = val
+
+        assert scores['irmsd'] == irmsd
+        assert scores['lrmsd'] == lrmsd
+        assert scores['fnat'] == fnat
+        assert scores['dockq'] == dockq
+        assert scores['binary'] == binary
+        assert scores['capri_class'] == capri
 
+    def test_compute_ppi_scores_same_struct(self):
+        scores = compute_ppi_scores(
+            os.path.join(self.pdb_path, "1ATN_1w.pdb"),
+            os.path.join(self.pdb_path, "1ATN_1w.pdb"))
 
+        assert scores['irmsd'] == 0.0
+        assert scores['lrmsd'] == 0.0
+        assert scores['fnat'] == 1.0
+        assert scores['dockq'] == 1.0
+        assert scores['binary'] # True
+        assert scores['capri_class'] == 1
 
 
 if __name__ == "__main__":

From 150b4ebb7cf36b9f76cb1e8ccb0fcf52e59dc159 Mon Sep 17 00:00:00 2001
From: gcroci2 <crocioni.giulia@gmail.com>
Date: Tue, 19 Sep 2023 10:39:12 +0200
Subject: [PATCH 4/7] update compute_ppi_scores nomenclature

---
 tests/data/hdf5/_generate_testdata.ipynb |  8 ++++----
 tests/test_integration.py                | 16 ++++++++--------
 tests/test_querycollection.py            | 10 +++++-----
 3 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/tests/data/hdf5/_generate_testdata.ipynb b/tests/data/hdf5/_generate_testdata.ipynb
index 41acbe392..9ad003684 100644
--- a/tests/data/hdf5/_generate_testdata.ipynb
+++ b/tests/data/hdf5/_generate_testdata.ipynb
@@ -16,7 +16,7 @@
     "    ProteinProteinInterfaceResidueQuery,\n",
     "    SingleResidueVariantResidueQuery,\n",
     "    ProteinProteinInterfaceAtomicQuery)\n",
-    "from deeprank2.tools.target import compute_targets\n",
+    "from deeprank2.tools.target import compute_ppi_scores\n",
     "from deeprank2.dataset import save_hdf5_keys\n",
     "from deeprank2.domain.aminoacidlist import alanine, phenylalanine\n",
     "import glob\n",
@@ -64,7 +64,7 @@
     "\n",
     "    for pdb_path in pdb_paths:\n",
     "        # Append data points\n",
-    "        targets = compute_targets(pdb_path, ref_path)\n",
+    "        targets = compute_ppi_scores(pdb_path, ref_path)\n",
     "        queries.add(ProteinProteinInterfaceResidueQuery(\n",
     "            pdb_path = pdb_path,\n",
     "            chain_id1 = chain_id1,\n",
@@ -219,7 +219,7 @@
     "count_queries = 5\n",
     "pdb_path = str(PATH_TEST / \"data/pdb/3C8P/3C8P.pdb\")\n",
     "ref_path = str(PATH_TEST / \"data/ref/3C8P/3C8P.pdb\")\n",
-    "targets = compute_targets(pdb_path, ref_path)\n",
+    "targets = compute_ppi_scores(pdb_path, ref_path)\n",
     "queries = QueryCollection()\n",
     "\n",
     "for number in range(1, count_queries + 1):\n",
@@ -269,7 +269,7 @@
     "\n",
     "for pdb_path in pdb_paths:\n",
     "    # Append data points\n",
-    "    targets = compute_targets(pdb_path, ref_path)\n",
+    "    targets = compute_ppi_scores(pdb_path, ref_path)\n",
     "    queries.add(ProteinProteinInterfaceAtomicQuery(\n",
     "        pdb_path = pdb_path,\n",
     "        chain_id1 = chain_id1,\n",
diff --git a/tests/test_integration.py b/tests/test_integration.py
index 8a977bc6b..d8a37a58c 100644
--- a/tests/test_integration.py
+++ b/tests/test_integration.py
@@ -4,20 +4,20 @@
 from tempfile import mkdtemp
 
 import h5py
+
 from deeprank2.dataset import GraphDataset, GridDataset
+from deeprank2.domain import edgestorage as Efeat
+from deeprank2.domain import nodestorage as Nfeat
+from deeprank2.domain import targetstorage as targets
 from deeprank2.neuralnets.cnn.model3d import CnnClassification
 from deeprank2.neuralnets.gnn.ginet import GINet
 from deeprank2.query import (ProteinProteinInterfaceResidueQuery,
                              QueryCollection)
-from deeprank2.tools.target import compute_targets
+from deeprank2.tools.target import compute_ppi_scores
 from deeprank2.trainer import Trainer
 from deeprank2.utils.exporters import HDF5OutputExporter
 from deeprank2.utils.grid import GridSettings, MapMethod
 
-from deeprank2.domain import edgestorage as Efeat
-from deeprank2.domain import nodestorage as Nfeat
-from deeprank2.domain import targetstorage as targets
-
 pdb_path = str("tests/data/pdb/3C8P/3C8P.pdb")
 ref_path = str("tests/data/ref/3C8P/3C8P.pdb")
 pssm_path1 = str("tests/data/pssm/3C8P/3C8P.A.pdb.pssm")
@@ -41,10 +41,10 @@ def test_cnn(): # pylint: disable=too-many-locals
 
     prefix = os.path.join(hdf5_directory, "test-queries-process")
 
-    all_targets = compute_targets(pdb_path, ref_path)
+    all_targets = compute_ppi_scores(pdb_path, ref_path)
 
     try:
-        all_targets = compute_targets(pdb_path, ref_path)
+        all_targets = compute_ppi_scores(pdb_path, ref_path)
 
         queries = QueryCollection()
         for _ in range(count_queries):
@@ -125,7 +125,7 @@ def test_gnn(): # pylint: disable=too-many-locals
     prefix = os.path.join(hdf5_directory, "test-queries-process")
 
     try:
-        all_targets = compute_targets(pdb_path, ref_path)
+        all_targets = compute_ppi_scores(pdb_path, ref_path)
 
         queries = QueryCollection()
         for _ in range(count_queries):
diff --git a/tests/test_querycollection.py b/tests/test_querycollection.py
index 90f4a2083..ed4611137 100644
--- a/tests/test_querycollection.py
+++ b/tests/test_querycollection.py
@@ -6,14 +6,14 @@
 
 import h5py
 import pytest
-from deeprank2.domain.aminoacidlist import alanine, phenylalanine
-from deeprank2.query import (ProteinProteinInterfaceResidueQuery, Query,
-                             QueryCollection, SingleResidueVariantResidueQuery)
-from deeprank2.tools.target import compute_targets
 
 from deeprank2.domain import edgestorage as Efeat
 from deeprank2.domain import nodestorage as Nfeat
+from deeprank2.domain.aminoacidlist import alanine, phenylalanine
 from deeprank2.features import components, contact, surfacearea
+from deeprank2.query import (ProteinProteinInterfaceResidueQuery, Query,
+                             QueryCollection, SingleResidueVariantResidueQuery)
+from deeprank2.tools.target import compute_ppi_scores
 
 
 def _querycollection_tester( # pylint: disable = too-many-locals, dangerous-default-value
@@ -242,7 +242,7 @@ def test_querycollection_duplicates_add():
 
     for pdb_path in pdb_paths:
         # Append data points
-        targets = compute_targets(pdb_path, ref_path)
+        targets = compute_ppi_scores(pdb_path, ref_path)
         queries.add(ProteinProteinInterfaceResidueQuery(
             pdb_path = pdb_path,
             chain_id1 = chain_id1,

From a529a365662bd4ab9e5fda48ee6e47877bef8b43 Mon Sep 17 00:00:00 2001
From: gcroci2 <crocioni.giulia@gmail.com>
Date: Tue, 19 Sep 2023 17:54:36 +0200
Subject: [PATCH 5/7] add dani's suggestions

---
 deeprank2/tools/target.py | 50 +++++++++++++++++++--------------------
 1 file changed, 24 insertions(+), 26 deletions(-)

diff --git a/deeprank2/tools/target.py b/deeprank2/tools/target.py
index ff7940ace..4ab6d95ce 100644
--- a/deeprank2/tools/target.py
+++ b/deeprank2/tools/target.py
@@ -85,40 +85,38 @@ def add_target(graph_path: Union[str, List[str]], target_name: str, target_list:
 
 def compute_ppi_scores(pdb_path: str, reference_pdb_path: str) -> Dict[str, Union[float, int]]:
 
-    """
-    Compute structure similarity scores and return them as a dictionary.
-    Such measures have been developed for evaluating the quality of the PPI models produced by
+    """Compute structure similarity scores and return them as a dictionary.
+
+    These measures have been developed for evaluating the quality of the PPI models produced by
     computational methods (docking models), and all of them compare the structural similarity
     between the decoys (computationally generated structures) and the experimentally solved native
     structures. To calculate these measures, the interface between the two interacting protein molecules
     is defined as any pair of heavy atoms from the two molecules within 5Å of each other.
-    For regression:
-       - ligand root mean square deviation (lrmsd), float. It is calculated for the backbone of
-       the shorter chain (ligand) of the model after superposition of the longer chain (receptor).
-       The lower the better.
-       - interface rmsd (irmsd), float. The backbone atoms of the interface residues (atomic contact cutoff
-       of 10Å) is superposed on their equivalents in the predicted complex (model) to compute it.
-       The lower the better.
-       - fraction of native contacts (fnat), float. The fraction of native interfacial contacts preserved in
-       the interface of the predicted complex. The score is in the range [0, 1], corresponding to low and
-       high quality, respectively.
-       - dockq, float. It is a continuous quality measure for docking models that instead of classifying into different
-       quality groups, combines Fnat, LRMS, and iRMS to yield a score in the range [0, 1], corresponding to low and
-       high quality, respectively.
-    For classification:
-       - binary (bool). True if the irmsd is lower than 4.0, meaning that the decoy is considered high quality
-       docking model, otherwise False.
-       - capri_classes (int). The possible values are: 4 (incorrect), 3 (acceptable), 2 (medium), 1 (high quality).
-    See https://onlinelibrary.wiley.com/doi/abs/10.1002/prot.10393
-    for more details about capri_classes, lrmsd, irmsd, and fnat.
-    See https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0161879
-    for more details about dockq.
+
+    - lmrsd (ligand root mean square deviation) is a float value calculated for the backbone of
+        the shorter chain (ligand) of the model after superposition of the longer chain (receptor).
+        Lower scores represent better matching than higher scores.
+    - imrsd (interface rmsd) is a float value calculated for the backbone atoms of the interface residues
+        (atomic contact cutoff of 10Å) after superposition of their equivalents in the predicted complex (model)
+        Lower scores represent better matching than higher scores.
+    - fnat (fraction of native contacts) is the fraction of native interfacial contacts preserved in
+        the interface of the predicted complex. The score is a float in the range [0, 1], where higher values
+        respresent higher quality.
+    - dockq (docking model quality) is a continuous quality measure for docking models that instead of classifying into different
+        quality groups. It combines fnat, lmrs, and irms and yields a float score in the range [0, 1], where
+        higher values respresent higher quality.
+    - binary (bool): True if the irmsd is lower than 4.0, meaning that the decoy is considered high quality
+        docking model, otherwise False.
+    - capri classification (int). The possible values are: 1 (high quality, irmsd < 1.0), 2 (medium, irmsd < 2.0),
+        3 (acceptable, irms < 4.0), 4 (incorrect, irmsd >= 4.0)
+    See https://onlinelibrary.wiley.com/doi/abs/10.1002/prot.10393 for more details about capri_classes, lrmsd, irmsd, and fnat.
+    See https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0161879 for more details about dockq.
 
     Args:
         pdb_path (str): Path to the decoy.
         reference_pdb_path (str): Path to the reference (native) structure.
 
-    Returns: a dictionary containing values for lrmsd, irmsd, fnat, dockq, binary, capri_class
+    Returns: a dictionary containing values for lrmsd, irmsd, fnat, dockq, binary, capri_class.
     """
 
     ref_name = os.path.splitext(os.path.basename(reference_pdb_path))[0]
@@ -147,7 +145,7 @@ def compute_ppi_scores(pdb_path: str, reference_pdb_path: str) -> Dict[str, Unio
     scores[targets.BINARY] = scores[targets.IRMSD] < 4.0
 
     scores[targets.CAPRI] = 4
-    for thr, val in zip([6.0, 4.0, 2.0, 1.0], [4, 3, 2, 1]):
+    for thr, val in zip([4.0, 2.0, 1.0], [3, 2, 1]):
         if scores[targets.IRMSD] < thr:
             scores[targets.CAPRI] = val
 

From 540fe291313ef732b58ee76e0df78d0cb62972fb Mon Sep 17 00:00:00 2001
From: gcroci2 <crocioni.giulia@gmail.com>
Date: Wed, 20 Sep 2023 13:41:35 +0200
Subject: [PATCH 6/7] add doc page for docking and improve doc string in
 target.py

---
 deeprank2/tools/target.py  | 32 +++++----------------------
 docs/docking.md            | 45 ++++++++++++++++++++++++++++++++++++++
 docs/index.rst             |  4 ++++
 tests/tools/test_target.py |  1 -
 4 files changed, 55 insertions(+), 27 deletions(-)
 create mode 100644 docs/docking.md

diff --git a/deeprank2/tools/target.py b/deeprank2/tools/target.py
index 4ab6d95ce..80f57be9f 100644
--- a/deeprank2/tools/target.py
+++ b/deeprank2/tools/target.py
@@ -85,32 +85,12 @@ def add_target(graph_path: Union[str, List[str]], target_name: str, target_list:
 
 def compute_ppi_scores(pdb_path: str, reference_pdb_path: str) -> Dict[str, Union[float, int]]:
 
-    """Compute structure similarity scores and return them as a dictionary.
-
-    These measures have been developed for evaluating the quality of the PPI models produced by
-    computational methods (docking models), and all of them compare the structural similarity
-    between the decoys (computationally generated structures) and the experimentally solved native
-    structures. To calculate these measures, the interface between the two interacting protein molecules
-    is defined as any pair of heavy atoms from the two molecules within 5Å of each other.
-
-    - lmrsd (ligand root mean square deviation) is a float value calculated for the backbone of
-        the shorter chain (ligand) of the model after superposition of the longer chain (receptor).
-        Lower scores represent better matching than higher scores.
-    - imrsd (interface rmsd) is a float value calculated for the backbone atoms of the interface residues
-        (atomic contact cutoff of 10Å) after superposition of their equivalents in the predicted complex (model)
-        Lower scores represent better matching than higher scores.
-    - fnat (fraction of native contacts) is the fraction of native interfacial contacts preserved in
-        the interface of the predicted complex. The score is a float in the range [0, 1], where higher values
-        respresent higher quality.
-    - dockq (docking model quality) is a continuous quality measure for docking models that instead of classifying into different
-        quality groups. It combines fnat, lmrs, and irms and yields a float score in the range [0, 1], where
-        higher values respresent higher quality.
-    - binary (bool): True if the irmsd is lower than 4.0, meaning that the decoy is considered high quality
-        docking model, otherwise False.
-    - capri classification (int). The possible values are: 1 (high quality, irmsd < 1.0), 2 (medium, irmsd < 2.0),
-        3 (acceptable, irms < 4.0), 4 (incorrect, irmsd >= 4.0)
-    See https://onlinelibrary.wiley.com/doi/abs/10.1002/prot.10393 for more details about capri_classes, lrmsd, irmsd, and fnat.
-    See https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0161879 for more details about dockq.
+    """Compute structure similarity scores for the input docking model and return them as a dictionary.
+
+    The computed scores are: `lrmsd` (ligand root mean square deviation), `irmsd` (interface rmsd),
+    `fnat` (fraction of native contacts), `dockq` (docking model quality), `binary` (True - high quality,
+    False - low quality), `capri_class` (capri classification, 1 - high quality, 2 - medium, 3 - acceptable,
+    4 - incorrect). See https://deeprank2.readthedocs.io/en/latest/docking.html for more details about the scores.
 
     Args:
         pdb_path (str): Path to the decoy.
diff --git a/docs/docking.md b/docs/docking.md
new file mode 100644
index 000000000..16406dcfe
--- /dev/null
+++ b/docs/docking.md
@@ -0,0 +1,45 @@
+# Docking scores
+
+The following scores have been developed for evaluating the quality of the protein-protein models produced by computational methods (docking models), and all of them compare the structural similarity between the decoys (computationally generated structures) and the experimentally solved native structures. To calculate these measures, the interface between the two interacting protein molecules is defined as any pair of heavy atoms from the two molecules within 5Å of each other.
+
+- `lmrsd` (ligand root mean square deviation) is a float value calculated for the backbone of the shorter chain (ligand) of the model after superposition of the longer chain (receptor). Lower scores represent better matching than higher scores.
+- `imrsd` (interface rmsd) is a float value calculated for the backbone atoms of the interface residues (atomic contact cutoff of 10Å) after superposition of their equivalents in the predicted complex (model) Lower scores represent better matching than higher scores.
+- `fnat` (fraction of native contacts) is the fraction of native interfacial contacts preserved in the interface of the predicted complex. The score is a float in the range [0, 1], where higher values respresent higher quality.
+- `dockq` (docking model quality) is a continuous quality measure for docking models that instead of classifying into different quality groups. It combines fnat, lmrs, and irms and yields a float score in the range [0, 1], where higher values respresent higher quality.
+- `binary` (bool): True if the irmsd is lower than 4.0, meaning that the decoy is considered high quality docking model, otherwise False.
+- `capri_class` (int). It refers to Critical Assessment of PRedicted Interactions (CAPRI) classification, in which the possible values are: 1 (high quality, irmsd < 1.0), 2 (medium, irmsd < 2.0), 3 (acceptable, irms < 4.0), 4 (incorrect, irmsd >= 4.0)
+
+See https://onlinelibrary.wiley.com/doi/abs/10.1002/prot.10393 for more details about `capri_class`, `lrmsd`, `irmsd`, and `fnat`. See https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0161879 for more details about `dockq`.
+
+## Compute and add docking scores
+
+The following code snippet shows an example of how to use deeprank2 to compute the docking scores for a given docking model, and how to add one of the scores (e.g., `dockq`) as a target to the already processed data. 
+
+```python
+from deeprank2.tools.target import add_target, compute_ppi_scores
+
+docking_models = [
+    "<path_to_docking_model1.pdb>",
+    "<path_to_docking_model2.pdb>"
+    ]
+ref_models = [
+    "<path_to_ref_model1.pdb>",
+    "<path_to_ref_model2.pdb>"
+]
+
+target_list = ""
+for idx, _ in enumerate(docking_models):
+    scores = compute_ppi_scores(
+        docking_models[idx],
+        ref_models[idx])
+    dockq = scores['dockq']
+    target_list += f"query_id_model{idx} {dockq}\n"
+
+with open("<path_to_target_list.lst>", "w", encoding="utf-8") as f:
+    f.write(target_list)
+
+add_target("<path_to_hdf5_file.hdf5>", "dockq", "<path_to_target_list.lst>")
+
+```
+
+After having run the above code snipped, each processed data point within the indicated HDF5 file will contain a new Dataset called "dockq", containing the value computed through `compute_ppi_scores`. 
\ No newline at end of file
diff --git a/docs/index.rst b/docs/index.rst
index 1b8fe1d44..f86456d82 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -47,10 +47,14 @@ Notes
    :hidden:
 
    features
+   docking
 
 :doc:`features`
     Get a detailed overview about nodes' and edges' features implemented in the package.
 
+:doc:`docking`
+    Get a detailed overview about PPIs' docking metrics implemented in the package.
+
 Package reference
 ===========
    
diff --git a/tests/tools/test_target.py b/tests/tools/test_target.py
index 8c51937d4..bf5af2420 100644
--- a/tests/tools/test_target.py
+++ b/tests/tools/test_target.py
@@ -13,7 +13,6 @@ def setUp(self):
         self.pdb_path = "./tests/data/pdb/1ATN/"
         self.pssm_path = "./tests/data/pssm/1ATN/1ATN.A.pdb.pssm"
         self.ref = "./tests/data/ref/1ATN/"
-        self.h5_train_ref = "tests/data/train_ref/train_data.hdf5"
         self.h5_graphs = "tests/data/hdf5/1ATN_ppi.hdf5"
 
     def test_add_target(self):

From 0e3022dfef77b2b90505188ba01f94054185a17e Mon Sep 17 00:00:00 2001
From: gcroci2 <crocioni.giulia@gmail.com>
Date: Wed, 20 Sep 2023 15:48:24 +0200
Subject: [PATCH 7/7] add ref to docking docs page

---
 README.md      | 1 +
 docs/index.rst | 7 ++++---
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 4627ee9ca..482a2fe42 100644
--- a/README.md
+++ b/README.md
@@ -23,6 +23,7 @@ Main features:
   - All features' documentation is available [here](https://deeprank2.readthedocs.io/en/latest/features.html)
 - Predefined target types
   - binary class, CAPRI categories, DockQ, RMSD, and FNAT
+  - Detailed docking scores documentation is available [here](https://deeprank2.readthedocs.io/en/latest/docking.html)
 - Flexible definition of both new features and targets
 - Features generation for both graphs and grids
 - Efficient data storage in HDF5 format
diff --git a/docs/index.rst b/docs/index.rst
index f86456d82..87992e7c5 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -7,8 +7,8 @@ DeepRank2 allows for transformation of (pdb formatted) molecular data into 3D re
 
 Main features:
 
-* Predefined atom-level and residue-level feature types (e.g. atom/residue type, charge, size, potential energy, all features' documentation is available `here`_)
-* Predefined target types (binary class, CAPRI categories, DockQ, RMSD, and FNAT)
+* Predefined atom-level and residue-level feature types (e.g. atom/residue type, charge, size, potential energy, all features' documentation is available under `Features`_ notes)
+* Predefined target types (binary class, CAPRI categories, DockQ, RMSD, and FNAT, detailed docking scores documentation is available under `Docking scores`_ notes)
 * Flexible definition of both new features and targets
 * Features generation for both graphs and grids
 * Efficient data storage in HDF5 format
@@ -18,7 +18,8 @@ Main features:
 .. _DeepRank-GNN: https://github.com/DeepRank/Deeprank-GNN
 .. _convolutional neural networks: https://en.wikipedia.org/wiki/Convolutional_neural_network
 .. _graph neural networks: https://en.wikipedia.org/wiki/Graph_neural_network
-.. _here: https://deeprank2.readthedocs.io/en/latest/features.html
+.. _Features: https://deeprank2.readthedocs.io/en/latest/features.html
+.. _Docking scores: https://deeprank2.readthedocs.io/en/latest/docking.html
 .. _PyTorch: https://pytorch.org/docs/stable/index.html
 .. _PyTorch Geometric: https://pytorch-geometric.readthedocs.io/en/latest/