-
Notifications
You must be signed in to change notification settings - Fork 0
/
11_Molecular_counter.py
118 lines (84 loc) · 3.57 KB
/
11_Molecular_counter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# -*- coding: utf-8 -*-
"""
Created on Mon Dec 28 17:25:41 2020
@author: Ani Chattaraj
"""
import numpy as np
import os, re
from progress_bar import updateCalc
from time import time
from csv import writer, reader
from AllBondPlot import PlotAllBond
import matplotlib.pyplot as plt
import pandas as pd
class MoleculeCounter:
def __init__ (self, inpath, numRuns):
self.inpath = inpath
if type(numRuns) == list:
self.runs = numRuns
if type(numRuns) == int:
self.runs = [_ for _ in range(numRuns)]
def getOutpath(self):
outpath = self.inpath + "/pyStat/Bond_Stat"
if not os.path.isdir(outpath):
os.makedirs(outpath)
return outpath
def getBoxSize(self):
txtfile = self.inpath.split('/')[-1].replace('_FOLDER','.txt')
with open(self.inpath + "/" + txtfile, 'r') as tf:
for line in tf.readlines():
if re.search('L_x', line):
Lx = float(line.split(':')[-1].strip())
if re.search('L_y', line):
Ly = float(line.split(':')[-1].strip())
if re.search('L_z_in', line):
Lz = float(line.split(':')[-1].strip())
return (Lx*1e3,Ly*1e3,Lz*1e3) # in nm
@staticmethod
def getMoleculeCount(file, mols):
mol_stat = []
df = pd.read_csv(file, header=0)
for mol in mols:
mol_stat.append(list(df[mol]))
return np.asarray(mol_stat)
def getTimePoints(self):
file = None
for run in self.runs:
testfile = self.inpath + f"/data/Run{run}/FullBondData.csv"
if os.path.isfile(testfile):
file = testfile
break
else:
pass
else:
print('No run is complete yet')
df = pd.read_csv(file)
return df['Time']
def getMoleculeStat(self, molecules):
molCount = []
tp = self.getTimePoints()
lx,ly,lz = self.getBoxSize()
sysVol = lx*ly*lz
for run in self.runs:
try:
file = self.inpath + "/data/Run{}/FullCountData.csv".format(run)
mols = self.getMoleculeCount(file, molecules)
molCount.append(mols)
except:
print(f'Run{run} is still not complete!' )
pass
molCount_arr = np.asarray(molCount)
_, numMols, _ = molCount_arr.shape # shape : numRuns, numMolecules, numTimepoints
m_mols = np.mean(molCount_arr, axis=0) # axis = 0 gives average molecular counts over multiple trajectories
factor = 6.023 * 1e-7 # converts uM to molecules/nm3
df = pd.DataFrame({"Time":[_ for _ in tp]})
for i in range(numMols):
df[molecules[i] + ' (uM)'] = [_ for _ in m_mols[i]/(factor*sysVol)]
df.to_csv(self.getOutpath() + "/Average_Molecular_Concentration.csv", sep=',')
if __name__ == '__main__':
path = 'Z:/01_PROJECTS/00_PS_principles/01_ssalad_manuscript_models/00_Reference_system/4v_4v_flex_9nm_linker_SIMULATIONS/4v_flex_config03_CB_25uM_SIM_FOLDER'
#mols = ['FREE poly_SH3', 'FREE poly_PRM']
mols = [' TOTAL poly_A', 'TOTAL poly_B']
Runs = 100
mc = MoleculeCounter(path, Runs)
mc.getMoleculeStat(mols)