-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathpyMBE.py
3329 lines (2904 loc) · 174 KB
/
pyMBE.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#
# Copyright (C) 2023-2024 pyMBE-dev team
#
# This file is part of pyMBE.
#
# pyMBE is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# pyMBE is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import re
import sys
import ast
import json
import pint
import numpy as np
import pandas as pd
import scipy.constants
import scipy.optimize
class pymbe_library():
"""
The library for the Molecular Builder for ESPResSo (pyMBE)
Attributes:
N_A(`pint.Quantity`): Avogadro number.
Kb(`pint.Quantity`): Boltzmann constant.
e(`pint.Quantity`): Elementary charge.
df(`Pandas.Dataframe`): Dataframe used to bookkeep all the information stored in pyMBE. Typically refered as `pmb.df`.
kT(`pint.Quantity`): Thermal energy.
Kw(`pint.Quantity`): Ionic product of water. Used in the setup of the G-RxMC method.
"""
class NumpyEncoder(json.JSONEncoder):
"""
Custom JSON encoder that converts NumPy arrays to Python lists
and NumPy scalars to Python scalars.
"""
def default(self, obj):
if isinstance(obj, np.ndarray):
return obj.tolist()
if isinstance(obj, np.generic):
return obj.item()
return super().default(obj)
def __init__(self, seed, temperature=None, unit_length=None, unit_charge=None, Kw=None):
"""
Initializes the pymbe_library by setting up the reduced unit system with `temperature` and `reduced_length`
and sets up the `pmb.df` for bookkeeping.
Args:
temperature(`pint.Quantity`,optional): Value of the temperature in the pyMBE UnitRegistry. Defaults to None.
unit_length(`pint.Quantity`, optional): Value of the unit of length in the pyMBE UnitRegistry. Defaults to None.
unit_charge (`pint.Quantity`,optional): Reduced unit of charge defined using the `pmb.units` UnitRegistry. Defaults to None.
Kw (`pint.Quantity`,optional): Ionic product of water in mol^2/l^2. Defaults to None.
Note:
- If no `temperature` is given, a value of 298.15 K is assumed by default.
- If no `unit_length` is given, a value of 0.355 nm is assumed by default.
- If no `unit_charge` is given, a value of 1 elementary charge is assumed by default.
- If no `Kw` is given, a value of 10^(-14) * mol^2 / l^2 is assumed by default.
"""
# Seed and RNG
self.seed=seed
self.rng = np.random.default_rng(seed)
self.units=pint.UnitRegistry()
self.N_A=scipy.constants.N_A / self.units.mol
self.kB=scipy.constants.k * self.units.J / self.units.K
self.e=scipy.constants.e * self.units.C
self.set_reduced_units(unit_length=unit_length,
unit_charge=unit_charge,
temperature=temperature,
Kw=Kw,
verbose=False)
self.setup_df()
return
def add_bond_in_df(self, particle_id1, particle_id2, use_default_bond=False):
"""
Adds a bond entry on the `pymbe.df` storing the particle_ids of the two bonded particles.
Args:
particle_id1(`int`): particle_id of the type of the first particle type of the bonded particles
particle_id2(`int`): particle_id of the type of the second particle type of the bonded particles
use_default_bond(`bool`, optional): Controls if a bond of type `default` is used to bond particle whose bond types are not defined in `pmb.df`. Defaults to False.
Returns:
index(`int`): Row index where the bond information has been added in pmb.df.
"""
particle_name1 = self.df.loc[(self.df['particle_id']==particle_id1) & (self.df['pmb_type']=="particle")].name.values[0]
particle_name2 = self.df.loc[(self.df['particle_id']==particle_id2) & (self.df['pmb_type']=="particle")].name.values[0]
bond_key = self.find_bond_key(particle_name1=particle_name1,
particle_name2=particle_name2,
use_default_bond=use_default_bond)
if not bond_key:
return
self.copy_df_entry(name=bond_key,column_name='particle_id2',number_of_copies=1)
indexs = np.where(self.df['name']==bond_key)
index_list = list (indexs[0])
used_bond_df = self.df.loc[self.df['particle_id2'].notnull()]
#without this drop the program crashes when dropping duplicates because the 'bond' column is a dict
used_bond_df = used_bond_df.drop([('bond_object','')],axis =1 )
used_bond_index = used_bond_df.index.to_list()
for index in index_list:
if index not in used_bond_index:
self.clean_df_row(index=int(index))
self.df.at[index,'particle_id'] = particle_id1
self.df.at[index,'particle_id2'] = particle_id2
break
return index
def add_bonds_to_espresso(self, espresso_system) :
"""
Adds all bonds defined in `pmb.df` to `espresso_system`.
Args:
espresso_system(`espressomd.system.System`): system object of espressomd library
"""
if 'bond' in self.df.values:
bond_df = self.df.loc[self.df ['pmb_type'] == 'bond']
bond_list = bond_df.bond_object.values.tolist()
for bond in bond_list:
espresso_system.bonded_inter.add(bond)
else:
print ('WARNING: There are no bonds defined in pymbe.df')
return
def add_value_to_df(self,index,key,new_value, verbose=True, non_standard_value=False, overwrite=False):
"""
Adds a value to a cell in the `pmb.df` DataFrame.
Args:
index(`int`): index of the row to add the value to.
key(`str`): the column label to add the value to.
verbose(`bool`, optional): Switch to activate/deactivate verbose. Defaults to True.
non_standard_value(`bool`, optional): Switch to enable insertion of non-standard values, such as `dict` objects. Defaults to False.
overwrite(`bool`, optional): Switch to enable overwriting of already existing values in pmb.df. Defaults to False.
"""
token = "#protected:"
def protect(obj):
if non_standard_value:
return token + json.dumps(obj, cls=self.NumpyEncoder)
return obj
def deprotect(obj):
if non_standard_value and isinstance(obj, str) and obj.startswith(token):
return json.loads(obj.removeprefix(token))
return obj
# Make sure index is a scalar integer value
index = int (index)
assert isinstance(index, int), '`index` should be a scalar integer value.'
idx = pd.IndexSlice
if self.check_if_df_cell_has_a_value(index=index,key=key):
old_value= self.df.loc[index,idx[key]]
if protect(old_value) != protect(new_value):
name=self.df.loc[index,('name','')]
pmb_type=self.df.loc[index,('pmb_type','')]
if verbose:
print(f"WARNING: you are attempting to redefine the properties of {name} of pmb_type {pmb_type}")
if overwrite and verbose:
print(f'WARNING: overwritting the value of the entry `{key}`: old_value = {old_value} new_value = {new_value}')
if not overwrite:
if verbose:
print(f"WARNING: pyMBE has preserved of the entry `{key}`: old_value = {old_value}. If you want to overwrite it with new_value = {new_value}, activate the switch overwrite = True ")
return
self.df.loc[index,idx[key]] = protect(new_value)
if non_standard_value:
self.df[key] = self.df[key].apply(deprotect)
return
def assign_molecule_id(self, name, molecule_index, pmb_type, used_molecules_id):
"""
Assigns the `molecule_id` of the pmb object given by `pmb_type`
Args:
name(`str`): Label of the molecule type to be created. `name` must be defined in `pmb.df`
pmb_type(`str`): pmb_object_type to assign the `molecule_id`
molecule_index(`int`): index of the current `pmb_object_type` to assign the `molecule_id`
used_molecules_id(`lst`): list with the `molecule_id` values already used.
Returns:
molecule_id(`int`): Id of the molecule
"""
self.clean_df_row(index=int(molecule_index))
if self.df['molecule_id'].isnull().values.all():
molecule_id = 0
else:
# check if a residue is part of another molecule
check_residue_name = self.df[self.df['residue_list'].astype(str).str.contains(name)]
mol_pmb_type = self.df.loc[self.df['name']==name].pmb_type.values[0]
if not check_residue_name.empty and mol_pmb_type == pmb_type:
for value in check_residue_name.index.to_list():
if value not in used_molecules_id:
molecule_id = self.df.loc[value].molecule_id.values[0]
break
else:
molecule_id = self.df['molecule_id'].max() +1
self.add_value_to_df (key=('molecule_id',''),
index=int(molecule_index),
new_value=molecule_id,
verbose=False)
return molecule_id
def calculate_center_of_mass_of_molecule(self, molecule_id, espresso_system):
"""
Calculates the center of the molecule with a given molecule_id.
Args:
molecule_id(`int`): Id of the molecule whose center of mass is to be calculated.
espresso_system(`espressomd.system.System`): Instance of a system object from the espressomd library.
Returns:
center_of_mass(`lst`): Coordinates of the center of mass.
"""
center_of_mass = np.zeros(3)
axis_list = [0,1,2]
molecule_name = self.df.loc[(self.df['molecule_id']==molecule_id) & (self.df['pmb_type'].isin(["molecule","protein"]))].name.values[0]
particle_id_list = self.get_particle_id_map(object_name=molecule_name)["all"]
for pid in particle_id_list:
for axis in axis_list:
center_of_mass [axis] += espresso_system.part.by_id(pid).pos[axis]
center_of_mass = center_of_mass / len(particle_id_list)
return center_of_mass
def calculate_HH(self, molecule_name, pH_list=None, pka_set=None):
"""
Calculates the charge per molecule according to the ideal Henderson-Hasselbalch titration curve
for molecules with the name `molecule_name`.
Args:
molecule_name(`str`): name of the molecule to calculate the ideal charge for
pH_list(`lst`): pH-values to calculate.
pka_set(`dict`): {"name" : {"pka_value": pka, "acidity": acidity}}
Returns:
Z_HH(`lst`): Henderson-Hasselbalch prediction of the charge of `sequence` in `pH_list`
Note:
- This function supports objects with pmb types: "molecule", "peptide" and "protein".
- If no `pH_list` is given, 50 equispaced pH-values ranging from 2 to 12 are calculated
- If no `pka_set` is given, the pKa values are taken from `pmb.df`
- This function should only be used for single-phase systems. For two-phase systems `pmb.calculate_HH_Donnan` should be used.
"""
if pH_list is None:
pH_list=np.linspace(2,12,50)
if pka_set is None:
pka_set=self.get_pka_set()
self.check_pka_set(pka_set=pka_set)
charge_number_map = self.get_charge_number_map()
Z_HH=[]
for pH_value in pH_list:
Z=0
index = self.df.loc[self.df['name'] == molecule_name].index[0].item()
residue_list = self.df.at [index,('residue_list','')]
sequence = self.df.at [index,('sequence','')]
if np.any(pd.isnull(sequence)):
# Molecule has no sequence
for residue in residue_list:
list_of_particles_in_residue = self.search_particles_in_residue(residue)
for particle in list_of_particles_in_residue:
if particle in pka_set.keys():
if pka_set[particle]['acidity'] == 'acidic':
psi=-1
elif pka_set[particle]['acidity']== 'basic':
psi=+1
else:
psi=0
Z+=psi/(1+10**(psi*(pH_value-pka_set[particle]['pka_value'])))
Z_HH.append(Z)
else:
# Molecule has a sequence
if not isinstance(sequence, list):
# If the df has been read by file, the sequence needs to be parsed.
sequence = self.parse_sequence_from_file(sequence=sequence)
for name in sequence:
if name in pka_set.keys():
if pka_set[name]['acidity'] == 'acidic':
psi=-1
elif pka_set[name]['acidity']== 'basic':
psi=+1
else:
psi=0
Z+=psi/(1+10**(psi*(pH_value-pka_set[name]['pka_value'])))
else:
state_one_type = self.df.loc[self.df['name']==name].state_one.es_type.values[0]
Z+=charge_number_map[state_one_type]
Z_HH.append(Z)
return Z_HH
def calculate_HH_Donnan(self, c_macro, c_salt, pH_list=None, pka_set=None):
"""
Calculates the charge on the different molecules according to the Henderson-Hasselbalch equation coupled to the Donnan partitioning.
Args:
c_macro('dict'): {"name": concentration} - A dict containing the concentrations of all charged macromolecular species in the system.
c_salt('float'): Salt concentration in the reservoir.
pH_list('lst'): List of pH-values in the reservoir.
pka_set('dict'): {"name": {"pka_value": pka, "acidity": acidity}}.
Returns:
{"charges_dict": {"name": charges}, "pH_system_list": pH_system_list, "partition_coefficients": partition_coefficients_list}
pH_system_list ('lst'): List of pH_values in the system.
partition_coefficients_list ('lst'): List of partition coefficients of cations.
Note:
- If no `pH_list` is given, 50 equispaced pH-values ranging from 2 to 12 are calculated
- If no `pka_set` is given, the pKa values are taken from `pmb.df`
- If `c_macro` does not contain all charged molecules in the system, this function is likely to provide the wrong result.
"""
if pH_list is None:
pH_list=np.linspace(2,12,50)
if pka_set is None:
pka_set=self.get_pka_set()
self.check_pka_set(pka_set=pka_set)
partition_coefficients_list = []
pH_system_list = []
Z_HH_Donnan={}
for key in c_macro:
Z_HH_Donnan[key] = []
def calc_charges(c_macro, pH):
"""
Calculates the charges of the different kinds of molecules according to the Henderson-Hasselbalch equation.
Args:
c_macro('dic'): {"name": concentration} - A dict containing the concentrations of all charged macromolecular species in the system.
pH('float'): pH-value that is used in the HH equation.
Returns:
charge('dict'): {"molecule_name": charge}
"""
charge = {}
for name in c_macro:
charge[name] = self.calculate_HH(name, [pH], pka_set)[0]
return charge
def calc_partition_coefficient(charge, c_macro):
"""
Calculates the partition coefficients of positive ions according to the ideal Donnan theory.
Args:
charge('dict'): {"molecule_name": charge}
c_macro('dict'): {"name": concentration} - A dict containing the concentrations of all charged macromolecular species in the system.
"""
nonlocal ionic_strength_res
charge_density = 0.0
for key in charge:
charge_density += charge[key] * c_macro[key]
return (-charge_density / (2 * ionic_strength_res) + np.sqrt((charge_density / (2 * ionic_strength_res))**2 + 1)).magnitude
for pH_value in pH_list:
# calculate the ionic strength of the reservoir
if pH_value <= 7.0:
ionic_strength_res = 10 ** (-pH_value) * self.units.mol/self.units.l + c_salt
elif pH_value > 7.0:
ionic_strength_res = 10 ** (-(14-pH_value)) * self.units.mol/self.units.l + c_salt
#Determine the partition coefficient of positive ions by solving the system of nonlinear, coupled equations
#consisting of the partition coefficient given by the ideal Donnan theory and the Henderson-Hasselbalch equation.
#The nonlinear equation is formulated for log(xi) since log-operations are not supported for RootResult objects.
equation = lambda logxi: logxi - np.log10(calc_partition_coefficient(calc_charges(c_macro, pH_value - logxi), c_macro))
logxi = scipy.optimize.root_scalar(equation, bracket=[-1e2, 1e2], method="brentq")
partition_coefficient = 10**logxi.root
charges_temp = calc_charges(c_macro, pH_value-np.log10(partition_coefficient))
for key in c_macro:
Z_HH_Donnan[key].append(charges_temp[key])
pH_system_list.append(pH_value - np.log10(partition_coefficient))
partition_coefficients_list.append(partition_coefficient)
return {"charges_dict": Z_HH_Donnan, "pH_system_list": pH_system_list, "partition_coefficients": partition_coefficients_list}
def calculate_initial_bond_length(self, bond_object, bond_type, epsilon, sigma, cutoff, offset):
"""
Calculates the initial bond length that is used when setting up molecules,
based on the minimum of the sum of bonded and short-range (LJ) interactions.
Args:
bond_object(`espressomd.interactions.BondedInteractions`): instance of a bond object from espressomd library
bond_type(`str`): label identifying the used bonded potential
epsilon(`pint.Quantity`): LJ epsilon of the interaction between the particles
sigma(`pint.Quantity`): LJ sigma of the interaction between the particles
cutoff(`pint.Quantity`): cutoff-radius of the LJ interaction
offset(`pint.Quantity`): offset of the LJ interaction
"""
def truncated_lj_potential(x, epsilon, sigma, cutoff,offset):
if x>cutoff:
return 0.0
else:
return 4*epsilon*((sigma/(x-offset))**12-(sigma/(x-offset))**6) - 4*epsilon*((sigma/cutoff)**12-(sigma/cutoff)**6)
epsilon_red=epsilon.to('reduced_energy').magnitude
sigma_red=sigma.to('reduced_length').magnitude
cutoff_red=cutoff.to('reduced_length').magnitude
offset_red=offset.to('reduced_length').magnitude
if bond_type == "harmonic":
r_0 = bond_object.params.get('r_0')
k = bond_object.params.get('k')
l0 = scipy.optimize.minimize(lambda x: 0.5*k*(x-r_0)**2 + truncated_lj_potential(x, epsilon_red, sigma_red, cutoff_red, offset_red), x0=r_0).x
elif bond_type == "FENE":
r_0 = bond_object.params.get('r_0')
k = bond_object.params.get('k')
d_r_max = bond_object.params.get('d_r_max')
l0 = scipy.optimize.minimize(lambda x: -0.5*k*(d_r_max**2)*np.log(1-((x-r_0)/d_r_max)**2) + truncated_lj_potential(x, epsilon_red, sigma_red, cutoff_red,offset_red), x0=1.0).x
return l0
def calculate_net_charge(self, espresso_system, molecule_name, dimensionless=False):
'''
Calculates the net charge per molecule of molecules with `name` = molecule_name.
Returns the net charge per molecule and a maps with the net charge per residue and molecule.
Args:
espresso_system(`espressomd.system.System`): system information
molecule_name(`str`): name of the molecule to calculate the net charge
dimensionless(`bool'): sets if the charge is returned with a dimension or not
Returns:
(`dict`): {"mean": mean_net_charge, "molecules": {mol_id: net_charge_of_mol_id, }, "residues": {res_id: net_charge_of_res_id, }}
Note:
- The net charge of the molecule is averaged over all molecules of type `name`
- The net charge of each particle type is averaged over all particle of the same type in all molecules of type `name`
'''
valid_pmb_types = ["molecule", "protein"]
pmb_type=self.df.loc[self.df['name']==molecule_name].pmb_type.values[0]
if pmb_type not in valid_pmb_types:
raise ValueError("The pyMBE object with name {molecule_name} has a pmb_type {pmb_type}. This function only supports pyMBE types {valid_pmb_types}")
id_map = self.get_particle_id_map(object_name=molecule_name)
def create_charge_map(espresso_system,id_map,label):
charge_number_map={}
for super_id in id_map[label].keys():
if dimensionless:
net_charge=0
else:
net_charge=0 * self.units.Quantity(1,'reduced_charge')
for pid in id_map[label][super_id]:
if dimensionless:
net_charge+=espresso_system.part.by_id(pid).q
else:
net_charge+=espresso_system.part.by_id(pid).q * self.units.Quantity(1,'reduced_charge')
charge_number_map[super_id]=net_charge
return charge_number_map
net_charge_molecules=create_charge_map(label="molecule_map",
espresso_system=espresso_system,
id_map=id_map)
net_charge_residues=create_charge_map(label="residue_map",
espresso_system=espresso_system,
id_map=id_map)
if dimensionless:
mean_charge=np.mean(np.array(list(net_charge_molecules.values())))
else:
mean_charge=np.mean(np.array([value.magnitude for value in net_charge_molecules.values()]))*self.units.Quantity(1,'reduced_charge')
return {"mean": mean_charge, "molecules": net_charge_molecules, "residues": net_charge_residues}
def center_molecule_in_simulation_box(self, molecule_id, espresso_system):
"""
Centers the pmb object matching `molecule_id` in the center of the simulation box in `espresso_md`.
Args:
molecule_id(`int`): Id of the molecule to be centered.
espresso_system(`espressomd.system.System`): Instance of a system object from the espressomd library.
"""
if len(self.df.loc[self.df['molecule_id']==molecule_id].pmb_type) == 0:
raise ValueError("The provided molecule_id is not present in the pyMBE dataframe.")
center_of_mass = self.calculate_center_of_mass_of_molecule(molecule_id=molecule_id,espresso_system=espresso_system)
box_center = [espresso_system.box_l[0]/2.0,
espresso_system.box_l[1]/2.0,
espresso_system.box_l[2]/2.0]
molecule_name = self.df.loc[(self.df['molecule_id']==molecule_id) & (self.df['pmb_type'].isin(["molecule","protein"]))].name.values[0]
particle_id_list = self.get_particle_id_map(object_name=molecule_name)["all"]
for pid in particle_id_list:
es_pos = espresso_system.part.by_id(pid).pos
espresso_system.part.by_id(pid).pos = es_pos - center_of_mass + box_center
return
def check_aminoacid_key(self, key):
"""
Checks if `key` corresponds to a valid aminoacid letter code.
Args:
key(`str`): key to be checked.
Returns:
`bool`: True if `key` is a valid aminoacid letter code, False otherwise.
"""
valid_AA_keys=['V', #'VAL'
'I', #'ILE'
'L', #'LEU'
'E', #'GLU'
'Q', #'GLN'
'D', #'ASP'
'N', #'ASN'
'H', #'HIS'
'W', #'TRP'
'F', #'PHE'
'Y', #'TYR'
'R', #'ARG'
'K', #'LYS'
'S', #'SER'
'T', #'THR'
'M', #'MET'
'A', #'ALA'
'G', #'GLY'
'P', #'PRO'
'C'] #'CYS'
if key in valid_AA_keys:
return True
else:
return False
def check_dimensionality(self, variable, expected_dimensionality):
"""
Checks if the dimensionality of `variable` matches `expected_dimensionality`.
Args:
variable(`pint.Quantity`): Quantity to be checked.
expected_dimensionality(`str`): Expected dimension of the variable.
Returns:
(`bool`): `True` if the variable if of the expected dimensionality, `False` otherwise.
Note:
- `expected_dimensionality` takes dimensionality following the Pint standards [docs](https://pint.readthedocs.io/en/0.10.1/wrapping.html?highlight=dimensionality#checking-dimensionality).
- For example, to check for a variable corresponding to a velocity `expected_dimensionality = "[length]/[time]"`
"""
correct_dimensionality=variable.check(f"{expected_dimensionality}")
if not correct_dimensionality:
raise ValueError(f"The variable {variable} should have a dimensionality of {expected_dimensionality}, instead the variable has a dimensionality of {variable.dimensionality}")
return correct_dimensionality
def check_if_df_cell_has_a_value(self, index,key):
"""
Checks if a cell in the `pmb.df` at the specified index and column has a value.
Args:
index(`int`): Index of the row to check.
key(`str`): Column label to check.
Returns:
`bool`: `True` if the cell has a value, `False` otherwise.
"""
idx = pd.IndexSlice
import warnings
with warnings.catch_warnings():
warnings.simplefilter("ignore")
return not pd.isna(self.df.loc[index, idx[key]])
def check_if_name_is_defined_in_df(self, name, pmb_type_to_be_defined):
"""
Checks if `name` is defined in `pmb.df`.
Args:
name(`str`): label to check if defined in `pmb.df`.
pmb_type_to_be_defined(`str`): pmb object type corresponding to `name`.
Returns:
`bool`: `True` for success, `False` otherwise.
"""
if name in self.df['name'].unique():
current_object_type = self.df[self.df['name']==name].pmb_type.values[0]
if current_object_type != pmb_type_to_be_defined:
raise ValueError (f"The name {name} is already defined in the df with a pmb_type = {current_object_type}, pymMBE does not support objects with the same name but different pmb_types")
return True
else:
return False
def check_if_metal_ion(self,key):
"""
Checks if `key` corresponds to a label of a supported metal ion.
Args:
key(`str`): key to be checked
Returns:
(`bool`): True if `key` is a supported metal ion, False otherwise.
"""
if key in self.get_metal_ions_charge_number_map().keys():
return True
else:
return False
def check_pka_set(self, pka_set):
"""
Checks that `pka_set` has the formatting expected by the pyMBE library.
Args:
pka_set(`dict`): {"name" : {"pka_value": pka, "acidity": acidity}}
"""
required_keys=['pka_value','acidity']
for required_key in required_keys:
for pka_entry in pka_set.values():
if required_key not in pka_entry.keys():
raise ValueError(f'missing a required key "{required_key}" in the following entry of pka_set "{pka_entry}"')
return
def clean_df_row(self, index, columns_keys_to_clean=("particle_id", "particle_id2", "residue_id", "molecule_id")):
"""
Cleans the columns of `pmb.df` in `columns_keys_to_clean` of the row with index `index` by assigning them a np.nan value.
Args:
index(`int`): Index of the row to clean.
columns_keys_to_clean(`list` of `str`, optional): List with the column keys to be cleaned. Defaults to [`particle_id`, `particle_id2`, `residue_id`, `molecule_id`].
"""
for column_key in columns_keys_to_clean:
self.add_value_to_df(key=(column_key,''),index=index,new_value=np.nan, verbose=False)
return
def convert_columns_to_original_format (self, df):
"""
Converts the columns of the Dataframe to the original format in pyMBE.
Args:
df(`DataFrame`): dataframe with pyMBE information as a string
"""
columns_dtype_int = ['particle_id','particle_id2', 'residue_id','molecule_id', 'model',('state_one','es_type'),('state_two','es_type'),('state_one','z'),('state_two','z') ]
columns_with_units = ['sigma', 'epsilon', 'cutoff', 'offset']
columns_with_list_or_dict = ['residue_list','side_chains', 'parameters_of_the_potential','sequence']
for column_name in columns_dtype_int:
df[column_name] = df[column_name].astype(object)
for column_name in columns_with_list_or_dict:
if df[column_name].isnull().all():
df[column_name] = df[column_name].astype(object)
else:
df[column_name] = df[column_name].apply(lambda x: ast.literal_eval(str(x)) if pd.notnull(x) else x)
for column_name in columns_with_units:
df[column_name] = df[column_name].apply(lambda x: self.create_variable_with_units(x) if pd.notnull(x) else x)
df['bond_object'] = df['bond_object'].apply(lambda x: self.convert_str_to_bond_object(x) if pd.notnull(x) else x)
return df
def convert_str_to_bond_object (self, bond_str):
"""
Convert a row read as a `str` to the corresponding bond object. There are two supported bonds: HarmonicBond and FeneBond
Args:
bond_str(`str`): string with the information of a bond object
Returns:
bond_object(`obj`): EsPRESSo bond object
"""
from espressomd.interactions import HarmonicBond
from espressomd.interactions import FeneBond
supported_bonds = ['HarmonicBond', 'FeneBond']
for bond in supported_bonds:
variable = re.subn(f'{bond}', '', bond_str)
if variable[1] == 1:
params = ast.literal_eval(variable[0])
if bond == 'HarmonicBond':
bond_object = HarmonicBond(r_cut =params['r_cut'], k = params['k'], r_0=params['r_0'])
elif bond == 'FeneBond':
bond_object = FeneBond(k = params['k'], d_r_max =params['d_r_max'], r_0=params['r_0'])
return bond_object
def copy_df_entry(self, name, column_name, number_of_copies):
'''
Creates 'number_of_copies' of a given 'name' in `pymbe.df`.
Args:
name(`str`): Label of the particle/residue/molecule type to be created. `name` must be defined in `pmb.df`
column_name(`str`): Column name to use as a filter.
number_of_copies(`int`): number of copies of `name` to be created.
Note:
- Currently, column_name only supports "particle_id", "particle_id2", "residue_id" and "molecule_id"
'''
valid_column_names=["particle_id", "residue_id", "molecule_id", "particle_id2" ]
if column_name not in valid_column_names:
raise ValueError(f"{column_name} is not a valid column_name, currently only the following are supported: {valid_column_names}")
df_by_name = self.df.loc[self.df.name == name]
if number_of_copies != 1:
if df_by_name[column_name].isnull().values.any():
df_by_name_repeated = pd.concat ([df_by_name]*(number_of_copies-1), ignore_index=True)
else:
df_by_name = df_by_name[df_by_name.index == df_by_name.index.min()]
df_by_name_repeated = pd.concat ([df_by_name]*(number_of_copies), ignore_index=True)
df_by_name_repeated[column_name] =np.nan
# Concatenate the new particle rows to `df`
self.df = pd.concat ([self.df,df_by_name_repeated], ignore_index=True)
else:
if not df_by_name[column_name].isnull().values.any():
df_by_name = df_by_name[df_by_name.index == df_by_name.index.min()]
df_by_name_repeated = pd.concat ([df_by_name]*(number_of_copies), ignore_index=True)
df_by_name_repeated[column_name] =np.nan
self.df = pd.concat ([self.df,df_by_name_repeated], ignore_index=True)
return
def create_added_salt (self, espresso_system, cation_name, anion_name, c_salt, verbose=True):
"""
Creates a `c_salt` concentration of `cation_name` and `anion_name` ions into the `espresso_system`.
Args:
espresso_system(`espressomd.system.System`): instance of an espresso system object.
cation_name(`str`): `name` of a particle with a positive charge.
anion_name(`str`): `name` of a particle with a negative charge.
c_salt(`float`): Salt concentration.
verbose(`bool`): switch to activate/deactivate verbose. Defaults to True.
Returns:
c_salt_calculated(`float`): Calculated salt concentration added to `espresso_system`.
"""
cation_name_charge = self.df.loc[self.df['name']==cation_name].state_one.z.values[0]
anion_name_charge = self.df.loc[self.df['name']==anion_name].state_one.z.values[0]
if cation_name_charge <= 0:
raise ValueError('ERROR cation charge must be positive, charge ',cation_name_charge)
if anion_name_charge >= 0:
raise ValueError('ERROR anion charge must be negative, charge ', anion_name_charge)
# Calculate the number of ions in the simulation box
volume=self.units.Quantity(espresso_system.volume(), 'reduced_length**3')
if c_salt.check('[substance] [length]**-3'):
N_ions= int((volume*c_salt.to('mol/reduced_length**3')*self.N_A).magnitude)
c_salt_calculated=N_ions/(volume*self.N_A)
elif c_salt.check('[length]**-3'):
N_ions= int((volume*c_salt.to('reduced_length**-3')).magnitude)
c_salt_calculated=N_ions/volume
else:
raise ValueError('Unknown units for c_salt, please provided it in [mol / volume] or [particle / volume]', c_salt)
N_cation = N_ions*abs(anion_name_charge)
N_anion = N_ions*abs(cation_name_charge)
self.create_particle(espresso_system=espresso_system, name=cation_name, number_of_particles=N_cation)
self.create_particle(espresso_system=espresso_system, name=anion_name, number_of_particles=N_anion)
if verbose:
if c_salt_calculated.check('[substance] [length]**-3'):
print(f"\n Added salt concentration of {c_salt_calculated.to('mol/L')} given by {N_cation} cations and {N_anion} anions")
elif c_salt_calculated.check('[length]**-3'):
print(f"\n Added salt concentration of {c_salt_calculated.to('reduced_length**-3')} given by {N_cation} cations and {N_anion} anions")
return c_salt_calculated
def create_bond_in_espresso(self, bond_type, bond_parameters):
'''
Creates either a harmonic or a FENE bond in ESPREesSo
Args:
bond_type(`str`): label to identify the potential to model the bond.
bond_parameters(`dict`): parameters of the potential of the bond.
Note:
Currently, only HARMONIC and FENE bonds are supported.
For a HARMONIC bond the dictionary must contain:
- k (`obj`) : Magnitude of the bond. It should have units of energy/length**2
using the `pmb.units` UnitRegistry.
- r_0 (`obj`) : Equilibrium bond length. It should have units of length using
the `pmb.units` UnitRegistry.
For a FENE bond the dictionay must additionally contain:
- d_r_max (`obj`): Maximal stretching length for FENE. It should have
units of length using the `pmb.units` UnitRegistry. Default 'None'.
Returns:
bond_object (`obj`): a harmonic or a FENE bond object in ESPREesSo
'''
from espressomd import interactions
valid_bond_types = ["harmonic", "FENE"]
if 'k' in bond_parameters:
bond_magnitude = bond_parameters['k'].to('reduced_energy / reduced_length**2')
else:
raise ValueError("Magnitud of the potential (k) is missing")
if bond_type == 'harmonic':
if 'r_0' in bond_parameters:
bond_length = bond_parameters['r_0'].to('reduced_length')
else:
raise ValueError("Equilibrium bond length (r_0) is missing")
bond_object = interactions.HarmonicBond(k = bond_magnitude.magnitude,
r_0 = bond_length.magnitude)
elif bond_type == 'FENE':
if 'r_0' in bond_parameters:
bond_length = bond_parameters['r_0'].to('reduced_length').magnitude
else:
print("WARNING: No value provided for r_0. Defaulting to r_0 = 0")
bond_length=0
if 'd_r_max' in bond_parameters:
max_bond_stret = bond_parameters['d_r_max'].to('reduced_length')
else:
raise ValueError("Maximal stretching length (d_r_max) is missing")
bond_object = interactions.FeneBond(r_0 = bond_length,
k = bond_magnitude.magnitude,
d_r_max = max_bond_stret.magnitude)
else:
raise ValueError(f"Bond type {bond_type} currently not implemented in pyMBE, valid types are {valid_bond_types}")
return bond_object
def create_counterions(self, object_name, cation_name, anion_name, espresso_system,verbose=True):
"""
Creates particles of `cation_name` and `anion_name` in `espresso_system` to counter the net charge of `pmb_object`.
Args:
object_name(`str`): `name` of a pymbe object.
espresso_system(`espressomd.system.System`): Instance of a system object from the espressomd library.
cation_name(`str`): `name` of a particle with a positive charge.
anion_name(`str`): `name` of a particle with a negative charge.
verbose(`bool`): switch to activate/deactivate verbose. Defaults to True.
Returns:
counterion_number(`dict`): {"name": number}
"""
cation_charge = self.df.loc[self.df['name']==cation_name].state_one.z.iloc[0]
anion_charge = self.df.loc[self.df['name']==anion_name].state_one.z.iloc[0]
object_ids = self.get_particle_id_map(object_name=object_name)["all"]
counterion_number={}
object_charge={}
for name in ['positive', 'negative']:
object_charge[name]=0
for id in object_ids:
if espresso_system.part.by_id(id).q > 0:
object_charge['positive']+=1*(np.abs(espresso_system.part.by_id(id).q ))
elif espresso_system.part.by_id(id).q < 0:
object_charge['negative']+=1*(np.abs(espresso_system.part.by_id(id).q ))
if object_charge['positive'] % abs(anion_charge) == 0:
counterion_number[anion_name]=int(object_charge['positive']/abs(anion_charge))
else:
raise ValueError('The number of positive charges in the pmb_object must be divisible by the charge of the anion')
if object_charge['negative'] % abs(cation_charge) == 0:
counterion_number[cation_name]=int(object_charge['negative']/cation_charge)
else:
raise ValueError('The number of negative charges in the pmb_object must be divisible by the charge of the cation')
if counterion_number[cation_name] > 0:
self.create_particle(espresso_system=espresso_system, name=cation_name, number_of_particles=counterion_number[cation_name])
else:
counterion_number[cation_name]=0
if counterion_number[anion_name] > 0:
self.create_particle(espresso_system=espresso_system, name=anion_name, number_of_particles=counterion_number[anion_name])
else:
counterion_number[anion_name] = 0
if verbose:
print('The following counter-ions have been created: ')
for name in counterion_number.keys():
print(f'Ion type: {name} created number: {counterion_number[name]}')
return counterion_number
def create_molecule(self, name, number_of_molecules, espresso_system, list_of_first_residue_positions=None, backbone_vector=None, use_default_bond=False):
"""
Creates `number_of_molecules` molecule of type `name` into `espresso_system` and bookkeeps them into `pmb.df`.
Args:
name(`str`): Label of the molecule type to be created. `name` must be defined in `pmb.df`
espresso_system(`espressomd.system.System`): Instance of a system object from espressomd library.
number_of_molecules(`int`): Number of molecules of type `name` to be created.
list_of_first_residue_positions(`list`, optional): List of coordinates where the central bead of the first_residue_position will be created, random by default.
backbone_vector(`list` of `float`): Backbone vector of the molecule, random by default. Central beads of the residues in the `residue_list` are placed along this vector.
use_default_bond(`bool`, optional): Controls if a bond of type `default` is used to bond particle with undefined bonds in `pymbe.df`
Returns:
molecules_info(`dict`): {molecule_id: {residue_id:{"central_bead_id":central_bead_id, "side_chain_ids": [particle_id1, ...]}}}
"""
if list_of_first_residue_positions is not None:
for item in list_of_first_residue_positions:
if not isinstance(item, list):
raise ValueError("The provided input position is not a nested list. Should be a nested list with elements of 3D lists, corresponding to xyz coord.")
elif len(item) != 3:
raise ValueError("The provided input position is formatted wrong. The elements in the provided list does not have 3 coordinates, corresponding to xyz coord.")
if len(list_of_first_residue_positions) != number_of_molecules:
raise ValueError(f"Number of positions provided in {list_of_first_residue_positions} does not match number of molecules desired, {number_of_molecules}")
if number_of_molecules <= 0:
return 0
self.check_if_name_is_defined_in_df(name=name,
pmb_type_to_be_defined='molecule')
first_residue = True
molecules_info = {}
residue_list = self.df[self.df['name']==name].residue_list.values [0]
self.copy_df_entry(name=name,
column_name='molecule_id',
number_of_copies=number_of_molecules)
molecules_index = np.where(self.df['name']==name)
molecule_index_list =list(molecules_index[0])[-number_of_molecules:]
used_molecules_id = self.df.molecule_id.dropna().drop_duplicates().tolist()
pos_index = 0
for molecule_index in molecule_index_list:
molecule_id = self.assign_molecule_id(name=name,
pmb_type='molecule',
used_molecules_id=used_molecules_id,
molecule_index=molecule_index)
molecules_info[molecule_id] = {}
for residue in residue_list:
if first_residue:
if list_of_first_residue_positions is None:
residue_position = None
else:
for item in list_of_first_residue_positions:
residue_position = [np.array(list_of_first_residue_positions[pos_index])]
# Generate an arbitrary random unit vector
if backbone_vector is None:
backbone_vector = self.generate_random_points_in_a_sphere(center=[0,0,0],
radius=1,
n_samples=1,
on_surface=True)[0]
residues_info = self.create_residue(name=residue,
espresso_system=espresso_system,
central_bead_position=residue_position,
use_default_bond= use_default_bond,
backbone_vector=backbone_vector)
residue_id = next(iter(residues_info))
# Add the correct molecule_id to all particles in the residue
for index in self.df[self.df['residue_id']==residue_id].index:
self.add_value_to_df(key=('molecule_id',''),
index=int (index),
new_value=molecule_id,
overwrite=True,
verbose=False)
central_bead_id = residues_info[residue_id]['central_bead_id']
previous_residue = residue
residue_position = espresso_system.part.by_id(central_bead_id).pos
previous_residue_id = central_bead_id
first_residue = False
else:
previous_central_bead_name=self.df[self.df['name']==previous_residue].central_bead.values[0]
new_central_bead_name=self.df[self.df['name']==residue].central_bead.values[0]
bond = self.search_bond(particle_name1=previous_central_bead_name,
particle_name2=new_central_bead_name,
hard_check=True,
use_default_bond=use_default_bond)
l0 = self.get_bond_length(particle_name1=previous_central_bead_name,
particle_name2=new_central_bead_name,
hard_check=True,
use_default_bond=use_default_bond)
residue_position = residue_position+backbone_vector*l0
residues_info = self.create_residue(name=residue,
espresso_system=espresso_system,
central_bead_position=[residue_position],
use_default_bond= use_default_bond,
backbone_vector=backbone_vector)
residue_id = next(iter(residues_info))
for index in self.df[self.df['residue_id']==residue_id].index:
self.add_value_to_df(key=('molecule_id',''),
index=int (index),
new_value=molecule_id,
verbose=False,
overwrite=True)
central_bead_id = residues_info[residue_id]['central_bead_id']
espresso_system.part.by_id(central_bead_id).add_bond((bond, previous_residue_id))
bond_index = self.add_bond_in_df(particle_id1=central_bead_id,
particle_id2=previous_residue_id,
use_default_bond=use_default_bond)
self.add_value_to_df(key=('molecule_id',''),
index=int (bond_index),
new_value=molecule_id,
verbose=False,
overwrite=True)
previous_residue_id = central_bead_id
previous_residue = residue
molecules_info[molecule_id][residue_id] = residues_info[residue_id]
first_residue = True
pos_index+=1
return molecules_info
def create_particle(self, name, espresso_system, number_of_particles, position=None, fix=False):
"""
Creates `number_of_particles` particles of type `name` into `espresso_system` and bookkeeps them into `pymbe.df`.
Args:
name(`str`): Label of the particle type to be created. `name` must be a `particle` defined in `pmb_df`.