Skip to content

Commit

Permalink
fix linting according to ruff
Browse files Browse the repository at this point in the history
  • Loading branch information
DaniBodor committed Jan 15, 2024
1 parent 68ee7a6 commit 3343756
Show file tree
Hide file tree
Showing 60 changed files with 529 additions and 1,583 deletions.
324 changes: 74 additions & 250 deletions deeprank2/dataset.py

Large diffs are not rendered by default.

28 changes: 6 additions & 22 deletions deeprank2/domain/aminoacidlist.py
Original file line number Diff line number Diff line change
Expand Up @@ -372,33 +372,19 @@
# pyrrolysine,
]

amino_acids_by_code = {
amino_acid.three_letter_code: amino_acid for amino_acid in amino_acids
}
amino_acids_by_letter = {
amino_acid.one_letter_code: amino_acid for amino_acid in amino_acids
}
amino_acids_by_code = {amino_acid.three_letter_code: amino_acid for amino_acid in amino_acids}
amino_acids_by_letter = {amino_acid.one_letter_code: amino_acid for amino_acid in amino_acids}
amino_acids_by_name = {amino_acid.name: amino_acid for amino_acid in amino_acids}


def convert_aa_nomenclature(aa: str, output_type: int | None = None):
try:
if len(aa) == 1:
aa: AminoAcid = [
entry
for entry in amino_acids
if entry.one_letter_code.lower() == aa.lower()
][0]
aa: AminoAcid = [entry for entry in amino_acids if entry.one_letter_code.lower() == aa.lower()][0]
elif len(aa) == 3:
aa: AminoAcid = [
entry
for entry in amino_acids
if entry.three_letter_code.lower() == aa.lower()
][0]
aa: AminoAcid = [entry for entry in amino_acids if entry.three_letter_code.lower() == aa.lower()][0]
else:
aa: AminoAcid = [
entry for entry in amino_acids if entry.name.lower() == aa.lower()
][0]
aa: AminoAcid = [entry for entry in amino_acids if entry.name.lower() == aa.lower()][0]
except IndexError as e:
raise ValueError(f"{aa} is not a valid amino acid.") from e

Expand All @@ -408,6 +394,4 @@ def convert_aa_nomenclature(aa: str, output_type: int | None = None):
return aa.three_letter_code
if output_type == 1:
return aa.one_letter_code
raise ValueError(
f"output_type {output_type} not recognized. Must be set to None (amino acid name), 1 (one letter code), or 3 (three letter code)."
)
raise ValueError(f"output_type {output_type} not recognized. Must be set to None (amino acid name), 1 (one letter code), or 3 (three letter code).")
4 changes: 1 addition & 3 deletions deeprank2/domain/nodestorage.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,7 @@
DIFFPI = "diff_pI"
DIFFPOLARITY = "diff_polarity" # [type?]; former FEATURENAME_POLARITYDIFFERENCE
DIFFHBDONORS = "diff_hb_donors" # int; former FEATURENAME_HYDROGENBONDDONORSDIFFERENCE
DIFFHBACCEPTORS = (
"diff_hb_acceptors" # int; former FEATURENAME_HYDROGENBONDACCEPTORSDIFFERENCE
)
DIFFHBACCEPTORS = "diff_hb_acceptors" # int; former FEATURENAME_HYDROGENBONDACCEPTORSDIFFERENCE

## conservation features
PSSM = "pssm" # list[20xint]; former FEATURENAME_PSSM
Expand Down
16 changes: 4 additions & 12 deletions deeprank2/features/components.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,24 +45,16 @@ def add_features( # pylint: disable=unused-argument
if residue == single_amino_acid_variant.residue:
node.features[Nfeat.VARIANTRES] = variant.onehot
node.features[Nfeat.DIFFCHARGE] = variant.charge - wildtype.charge
node.features[Nfeat.DIFFPOLARITY] = (
variant.polarity.onehot - wildtype.polarity.onehot
)
node.features[Nfeat.DIFFPOLARITY] = variant.polarity.onehot - wildtype.polarity.onehot
node.features[Nfeat.DIFFSIZE] = variant.size - wildtype.size
node.features[Nfeat.DIFFMASS] = variant.mass - wildtype.mass
node.features[Nfeat.DIFFPI] = variant.pI - wildtype.pI
node.features[Nfeat.DIFFHBDONORS] = (
variant.hydrogen_bond_donors - wildtype.hydrogen_bond_donors
)
node.features[Nfeat.DIFFHBACCEPTORS] = (
variant.hydrogen_bond_acceptors - wildtype.hydrogen_bond_acceptors
)
node.features[Nfeat.DIFFHBDONORS] = variant.hydrogen_bond_donors - wildtype.hydrogen_bond_donors
node.features[Nfeat.DIFFHBACCEPTORS] = variant.hydrogen_bond_acceptors - wildtype.hydrogen_bond_acceptors
else:
node.features[Nfeat.VARIANTRES] = residue.amino_acid.onehot
node.features[Nfeat.DIFFCHARGE] = 0
node.features[Nfeat.DIFFPOLARITY] = np.zeros(
residue.amino_acid.polarity.onehot.shape
)
node.features[Nfeat.DIFFPOLARITY] = np.zeros(residue.amino_acid.polarity.onehot.shape)
node.features[Nfeat.DIFFSIZE] = 0
node.features[Nfeat.DIFFMASS] = 0
node.features[Nfeat.DIFFPI] = 0
Expand Down
19 changes: 4 additions & 15 deletions deeprank2/features/conservation.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,28 +24,17 @@ def add_features( # pylint: disable=unused-argument
raise TypeError(f"Unexpected node type: {type(node.id)}")

pssm_row = residue.get_pssm()
profile = np.array(
[
pssm_row.get_conservation(amino_acid)
for amino_acid in profile_amino_acid_order
]
)
profile = np.array([pssm_row.get_conservation(amino_acid) for amino_acid in profile_amino_acid_order])
node.features[Nfeat.PSSM] = profile
node.features[Nfeat.INFOCONTENT] = pssm_row.information_content

if single_amino_acid_variant is not None:
if residue == single_amino_acid_variant.residue:
# only the variant residue can have a variant and wildtype amino acid
conservation_wildtype = pssm_row.get_conservation(
single_amino_acid_variant.wildtype_amino_acid
)
conservation_variant = pssm_row.get_conservation(
single_amino_acid_variant.variant_amino_acid
)
conservation_wildtype = pssm_row.get_conservation(single_amino_acid_variant.wildtype_amino_acid)
conservation_variant = pssm_row.get_conservation(single_amino_acid_variant.variant_amino_acid)
node.features[Nfeat.CONSERVATION] = conservation_wildtype
node.features[Nfeat.DIFFCONSERVATION] = (
conservation_variant - conservation_wildtype
)
node.features[Nfeat.DIFFCONSERVATION] = conservation_variant - conservation_wildtype
else:
# all nodes must have the same features, so set them to zero here
node.features[Nfeat.CONSERVATION] = 0.0
Expand Down
87 changes: 17 additions & 70 deletions deeprank2/features/contact.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,49 +38,26 @@ def _get_nonbonded_energy( # pylint: disable=too-many-locals
Tuple [NDArray[np.float64], NDArray[np.float64]]: matrices in same format as `distances` containing
all pairwise electrostatic potential energies and all pairwise Van der Waals potential energies
"""

# ELECTROSTATIC POTENTIAL
EPSILON0 = 1.0
COULOMB_CONSTANT = 332.0636
charges = [atomic_forcefield.get_charge(atom) for atom in atoms]
E_elec = (
np.expand_dims(charges, axis=1)
* np.expand_dims(charges, axis=0)
* COULOMB_CONSTANT
/ (EPSILON0 * distances)
)
E_elec = np.expand_dims(charges, axis=1) * np.expand_dims(charges, axis=0) * COULOMB_CONSTANT / (EPSILON0 * distances)

# VAN DER WAALS POTENTIAL
# calculate main vdw energies
sigmas = [
atomic_forcefield.get_vanderwaals_parameters(atom).sigma_main for atom in atoms
]
epsilons = [
atomic_forcefield.get_vanderwaals_parameters(atom).epsilon_main
for atom in atoms
]
sigmas = [atomic_forcefield.get_vanderwaals_parameters(atom).sigma_main for atom in atoms]
epsilons = [atomic_forcefield.get_vanderwaals_parameters(atom).epsilon_main for atom in atoms]
mean_sigmas = 0.5 * np.add.outer(sigmas, sigmas)
geomean_eps = np.sqrt(np.multiply.outer(epsilons, epsilons)) # sqrt(eps1*eps2)
E_vdw = (
4.0
* geomean_eps
* ((mean_sigmas / distances) ** 12 - (mean_sigmas / distances) ** 6)
)
E_vdw = 4.0 * geomean_eps * ((mean_sigmas / distances) ** 12 - (mean_sigmas / distances) ** 6)

# calculate vdw energies for 1-4 pairs
sigmas = [
atomic_forcefield.get_vanderwaals_parameters(atom).sigma_14 for atom in atoms
]
epsilons = [
atomic_forcefield.get_vanderwaals_parameters(atom).epsilon_14 for atom in atoms
]
sigmas = [atomic_forcefield.get_vanderwaals_parameters(atom).sigma_14 for atom in atoms]
epsilons = [atomic_forcefield.get_vanderwaals_parameters(atom).epsilon_14 for atom in atoms]
mean_sigmas = 0.5 * np.add.outer(sigmas, sigmas)
geomean_eps = np.sqrt(np.multiply.outer(epsilons, epsilons)) # sqrt(eps1*eps2)
E_vdw_14pairs = (
4.0
* geomean_eps
* ((mean_sigmas / distances) ** 12 - (mean_sigmas / distances) ** 6)
)
E_vdw_14pairs = 4.0 * geomean_eps * ((mean_sigmas / distances) ** 12 - (mean_sigmas / distances) ** 6)

# Fix energies for close contacts on same chain
chains = [atom.residue.chain.id for atom in atoms]
Expand Down Expand Up @@ -137,51 +114,21 @@ def add_features( # pylint: disable=unused-argument, too-many-locals
atom1_index = atom_dict[contact.atom1]
atom2_index = atom_dict[contact.atom2]
## set features
edge.features[Efeat.SAMERES] = float(
contact.atom1.residue == contact.atom2.residue
)
edge.features[Efeat.SAMECHAIN] = float(
contact.atom1.residue.chain == contact.atom1.residue.chain
)
edge.features[Efeat.DISTANCE] = interatomic_distances[
atom1_index, atom2_index
]
edge.features[Efeat.ELEC] = interatomic_electrostatic_energy[
atom1_index, atom2_index
]
edge.features[Efeat.VDW] = interatomic_vanderwaals_energy[
atom1_index, atom2_index
]
edge.features[Efeat.SAMERES] = float(contact.atom1.residue == contact.atom2.residue)
edge.features[Efeat.SAMECHAIN] = float(contact.atom1.residue.chain == contact.atom1.residue.chain)
edge.features[Efeat.DISTANCE] = interatomic_distances[atom1_index, atom2_index]
edge.features[Efeat.ELEC] = interatomic_electrostatic_energy[atom1_index, atom2_index]
edge.features[Efeat.VDW] = interatomic_vanderwaals_energy[atom1_index, atom2_index]

elif isinstance(contact, ResidueContact):
## find the indices
atom1_indices = [atom_dict[atom] for atom in contact.residue1.atoms]
atom2_indices = [atom_dict[atom] for atom in contact.residue2.atoms]
## set features
edge.features[Efeat.SAMECHAIN] = float(
contact.residue1.chain == contact.residue2.chain
)
edge.features[Efeat.DISTANCE] = np.min(
[
[interatomic_distances[a1, a2] for a1 in atom1_indices]
for a2 in atom2_indices
]
)
edge.features[Efeat.ELEC] = np.sum(
[
[interatomic_electrostatic_energy[a1, a2] for a1 in atom1_indices]
for a2 in atom2_indices
]
)
edge.features[Efeat.VDW] = np.sum(
[
[interatomic_vanderwaals_energy[a1, a2] for a1 in atom1_indices]
for a2 in atom2_indices
]
)
edge.features[Efeat.SAMECHAIN] = float(contact.residue1.chain == contact.residue2.chain)
edge.features[Efeat.DISTANCE] = np.min([[interatomic_distances[a1, a2] for a1 in atom1_indices] for a2 in atom2_indices])
edge.features[Efeat.ELEC] = np.sum([[interatomic_electrostatic_energy[a1, a2] for a1 in atom1_indices] for a2 in atom2_indices])
edge.features[Efeat.VDW] = np.sum([[interatomic_vanderwaals_energy[a1, a2] for a1 in atom1_indices] for a2 in atom2_indices])

# Calculate irrespective of node type
edge.features[Efeat.COVALENT] = float(
edge.features[Efeat.DISTANCE] < covalent_cutoff
and edge.features[Efeat.SAMECHAIN]
)
edge.features[Efeat.COVALENT] = float(edge.features[Efeat.DISTANCE] < covalent_cutoff and edge.features[Efeat.SAMECHAIN])
32 changes: 8 additions & 24 deletions deeprank2/features/irc.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@


def _id_from_residue(residue: tuple[str, int, str]) -> str:
"""Create and id from pdb2sql rendered residues that is similar to the id of residue nodes
"""Create and id from pdb2sql rendered residues that is similar to the id of residue nodes.
Args:
residue (tuple): Input residue as rendered by pdb2sql: ( str(<chain>), int(<residue_number>), str(<three_letter_code> )
Expand All @@ -23,7 +23,6 @@ def _id_from_residue(residue: tuple[str, int, str]) -> str:
Returns:
str: Output id in form of '<chain><residue_number>'. For example: 'A27'.
"""

return residue[0] + str(residue[1])


Expand All @@ -40,9 +39,7 @@ def __init__(self, residue: tuple[str, int, str], polarity: Polarity):
self.connections["all"] = []


def get_IRCs(
pdb_path: str, chains: list[str], cutoff: float = 5.5
) -> dict[str, _ContactDensity]:
def get_IRCs(pdb_path: str, chains: list[str], cutoff: float = 5.5) -> dict[str, _ContactDensity]:
"""Get all close contact residues from the opposite chain.
Args:
Expand All @@ -55,7 +52,6 @@ def get_IRCs(
keys: ids of residues in form returned by id_from_residue.
items: _ContactDensity objects, containing all contact density information for the residue.
"""

residue_contacts: dict[str, _ContactDensity] = {}

sql = pdb2sql.interface(pdb_path)
Expand Down Expand Up @@ -93,9 +89,7 @@ def get_IRCs(
# add chain2_res to residue_contact dict if it doesn't exist yet
contact2_id = _id_from_residue(chain2_res)
if contact2_id not in residue_contacts:
residue_contacts[contact2_id] = _ContactDensity(
chain2_res, aa2.polarity
)
residue_contacts[contact2_id] = _ContactDensity(chain2_res, aa2.polarity)

# populate densities and connections for chain2_res
residue_contacts[contact2_id].densities["total"] += 1
Expand All @@ -113,9 +107,7 @@ def add_features(
):
if not single_amino_acid_variant: # VariantQueries do not use this feature
polarity_pairs = list(combinations(Polarity, 2))
polarity_pair_string = [
f"irc_{x[0].name.lower()}_{x[1].name.lower()}" for x in polarity_pairs
]
polarity_pair_string = [f"irc_{x[0].name.lower()}_{x[1].name.lower()}" for x in polarity_pairs]

total_contacts = 0
residue_contacts = get_IRCs(pdb_path, graph.get_all_chains())
Expand All @@ -129,28 +121,20 @@ def add_features(
else:
raise TypeError(f"Unexpected node type: {type(node.id)}")

contact_id = (
residue.chain.id + residue.number_string
) # reformat id to be in line with residue_contacts keys
contact_id = residue.chain.id + residue.number_string # reformat id to be in line with residue_contacts keys

# initialize all IRC features to 0
for IRC_type in Nfeat.IRC_FEATURES:
node.features[IRC_type] = 0

# load correct values to IRC features
try:
node.features[Nfeat.IRCTOTAL] = residue_contacts[contact_id].densities[
"total"
]
node.features[Nfeat.IRCTOTAL] = residue_contacts[contact_id].densities["total"]
for i, pair in enumerate(polarity_pairs):
if residue_contacts[contact_id].polarity == pair[0]:
node.features[polarity_pair_string[i]] = residue_contacts[
contact_id
].densities[pair[1]]
node.features[polarity_pair_string[i]] = residue_contacts[contact_id].densities[pair[1]]
elif residue_contacts[contact_id].polarity == pair[1]:
node.features[polarity_pair_string[i]] = residue_contacts[
contact_id
].densities[pair[0]]
node.features[polarity_pair_string[i]] = residue_contacts[contact_id].densities[pair[0]]
total_contacts += 1
except KeyError: # node has no contact residues and all counts remain 0
pass
Expand Down
22 changes: 8 additions & 14 deletions deeprank2/features/secondary_structure.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@


class DSSPError(Exception):
"Raised if DSSP fails to produce an output"
"""Raised if DSSP fails to produce an output."""


class SecondarySctructure(Enum):
"a value to express a secondary a residue's secondary structure type"
"""Value to express a secondary a residue's secondary structure type."""

HELIX = 0 # 'GHI'
STRAND = 1 # 'BE'
Expand Down Expand Up @@ -54,9 +54,7 @@ def _check_pdb(pdb_path: str):
existing_records = _get_records(lines)
if "CRYST1" not in existing_records:
fix_pdb = True
dummy_CRYST1 = (
"CRYST1 00.000 00.000 00.000 00.00 00.00 00.00 X 00 00 0 00\n"
)
dummy_CRYST1 = "CRYST1 00.000 00.000 00.000 00.00 00.00 00.00 X 00 00 0 00\n"
lines = [lines[0]] + [dummy_CRYST1] + lines[1:]

# check for unnumbered REMARK lines
Expand Down Expand Up @@ -92,7 +90,6 @@ def _get_secstructure(pdb_path: str) -> dict:
Returns:
dict: A dictionary containing secondary structure information for each chain and residue.
"""

# Execute DSSP and read the output
_check_pdb(pdb_path)
p = PDBParser(QUIET=True)
Expand All @@ -108,7 +105,7 @@ def _get_secstructure(pdb_path: str) -> dict:
\nThis is likely due to an improrperly formatted pdb file: {pdb_path}.\
\nSee {pdb_format_link} for guidance on how to format your pdb files.\
\nAlternatively, turn off secondary_structure feature module during QueryCollection.process()."
)
) from e

chain_ids = [dssp_key[0] for dssp_key in dssp.property_keys]
res_numbers = [dssp_key[1][1] for dssp_key in dssp.property_keys]
Expand Down Expand Up @@ -145,11 +142,8 @@ def add_features( # pylint: disable=unused-argument

# pylint: disable=raise-missing-from
try:
node.features[Nfeat.SECSTRUCT] = _classify_secstructure(
sec_structure_features[chain_id][res_num]
).onehot
except AttributeError:
node.features[Nfeat.SECSTRUCT] = _classify_secstructure(sec_structure_features[chain_id][res_num]).onehot
except AttributeError as e:
raise ValueError(
f"Unknown secondary structure type ({sec_structure_features[chain_id][res_num]}) "
+ f"detected on chain {chain_id} residues {res_num}."
)
f"Unknown secondary structure type ({sec_structure_features[chain_id][res_num]}) " + f"detected on chain {chain_id} residues {res_num}."
) from e
Loading

0 comments on commit 3343756

Please sign in to comment.