-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathligand_occupancy.py
executable file
·86 lines (76 loc) · 4.33 KB
/
ligand_occupancy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Computes how often any specified ligand was near the C-alpha atom of each residue.
# Spits out a histogram data file.
# This is useful for making sense of flooding simulations.
from __future__ import print_function
import argparse
import sys
from tqdm import tqdm
import numpy as np
import MDAnalysis as mda
from MDAnalysis.analysis.distances import *
if __name__ == '__main__':
ap = argparse.ArgumentParser(description='Where do ligands go? Useful for flooding simulations')
ap.add_argument('-c', '--cutoff', default=5, type=float, help='Distance cutoff in Angstroms')
ap.add_argument('-t', '--percent-frames-threshold', default=0, type=float,
help='Only print residues with this percent occupancy or greater (out of 100)')
ap.add_argument('-n', '--num-frames', type=int,
help='Limit the number of frames processed (beginning at the first frame)')
ap.add_argument('-s', '--atomselect', action='store_true', default=False,
help='ligand_resname argument is actually an MDAnalysis atomselect string')
ap.add_argument('-g', '--group-across-chains', action='store_true', default=False,
help='Group counts across chains (useful when the protein is multiple identical monomers)')
ap.add_argument('--colorize-pdb-filename', help='Filename for output suitable for colorize_pdb.py (percent occupancy cutoff ignored)')
ap.add_argument('ligand_resname', help='Residue name of ligand of interest (e.g. APM)')
ap.add_argument('psf', help='PSF topology file')
ap.add_argument('dcd', nargs='+', help='Trajectory file(s)')
args = ap.parse_args()
u = mda.Universe(args.psf, args.dcd)
u.atoms.wrap(compound='fragments', inplace=True)
protein_ca = u.select_atoms('protein and name CA')
ligand = u.select_atoms(f'resname {args.ligand_resname}' if args.atomselect is False else args.ligand_resname)
num_residues = len(protein_ca)
first_resid = protein_ca.residues[0].resid
num_frames = args.num_frames or len(u.trajectory)
print(f'Atoms in ligand: {len(ligand)}', file=sys.stderr)
print('Trajectory files: %s' % ', '.join(args.dcd), file=sys.stderr)
print('Residues: %d' % num_residues, file=sys.stderr)
print('First resid: %d' % first_resid, file=sys.stderr)
print('Frames: To process %d of %d' % (num_frames, len(u.trajectory)), file=sys.stderr)
print('Distance cutoff: %.2f Å for resname %s' % (args.cutoff, args.ligand_resname), file=sys.stderr)
print('Percent occupancy threshold: %.2f%%' % args.percent_frames_threshold, file=sys.stderr)
counts = np.zeros(num_residues)
for ts in tqdm(u.trajectory, unit='frames'):
if ts.frame >= num_frames:
print(f"Stopping here since you only asked for the first {num_frames} frames.", file=sys.stderr)
break
d = distance_array(protein_ca.positions, ligand.positions, box=ts.dimensions)
resid = 0
for ca in d:
min_dist = np.min(ca)
if min_dist < args.cutoff:
counts[resid] += 1
resid += 1
# We don't know beforehand how long the "coalesced" counts array needs to be,
# and we need to index by resname+resid, so we use a dict.
if args.group_across_chains is True:
counts_coalesced = dict()
for i in range(len(counts)):
key = '%s%d' % (protein_ca[i].resname, protein_ca[i].resid)
if key not in counts_coalesced:
counts_coalesced[key] = 0
counts_coalesced[key] += counts[i]
for key in sorted(list(counts_coalesced.keys()), key=lambda res: int(res[3:])):
percent_frames = 100*float(counts_coalesced[key])/num_frames
if counts_coalesced[key] > 0 and percent_frames > args.percent_frames_threshold:
print('%d\t%.2f%%\t%s' % (counts_coalesced[key], percent_frames, key))
else:
for i in range(len(counts)):
percent_frames = 100*float(counts[i])/num_frames
if counts[i] > 0 and percent_frames > args.percent_frames_threshold:
print('%d\t%.2f%%\t%s%d' % (counts[i], percent_frames, protein_ca[i].resname, protein_ca[i].resid))
if args.colorize_pdb_filename is not None:
with open(args.colorize_pdb_filename, 'w') as f:
print(' '.join([str(x) for x in counts/np.max(counts)]), file=f)