From 5f9f9ac7896c3a026e1685f82bc4d42e9a3ea53d Mon Sep 17 00:00:00 2001 From: Andy Date: Mon, 15 Nov 2021 12:03:38 +0100 Subject: [PATCH] Added sub-directories --- pyDIFRATE/.DS_Store | Bin 8196 -> 8196 bytes pyDIFRATE/Struct/.DS_Store | Bin 0 -> 6148 bytes pyDIFRATE/Struct/.test.xtc_offsets.npz | Bin 0 -> 5260 bytes pyDIFRATE/Struct/FramesPostProc.py | 82 + pyDIFRATE/Struct/__init__.py | 0 "pyDIFRATE/Struct/__pycache__/Icon\r" | 0 pyDIFRATE/Struct/eval_fr.py | 1403 +++++++++++++++++ pyDIFRATE/Struct/frames.py | 716 +++++++++ pyDIFRATE/Struct/select_tools.py | 524 +++++++ pyDIFRATE/Struct/special_frames.py | 352 +++++ pyDIFRATE/Struct/structure.py | 594 ++++++++ pyDIFRATE/Struct/user_frames.py | 82 + pyDIFRATE/Struct/vec_funs.py | 132 ++ pyDIFRATE/Struct/vf_tools.py | 1143 ++++++++++++++ pyDIFRATE/__init__.py | 15 + "pyDIFRATE/__pycache__/Icon\r" | 0 pyDIFRATE/chimera/.DS_Store | Bin 0 -> 6148 bytes "pyDIFRATE/chimera/Icon\r" | 0 pyDIFRATE/chimera/__init__.py | 0 "pyDIFRATE/chimera/__pycache__/Icon\r" | 0 pyDIFRATE/chimera/chimeraX_funs.py | 1222 +++++++++++++++ pyDIFRATE/data/.DS_Store | Bin 0 -> 6148 bytes "pyDIFRATE/data/Icon\r" | 0 pyDIFRATE/data/__init__.py | 0 "pyDIFRATE/data/__pycache__/Icon\r" | 0 pyDIFRATE/data/bin_in_out.py | 123 ++ pyDIFRATE/data/data_class.py | 791 ++++++++++ pyDIFRATE/data/explicit_fits.py | 106 ++ pyDIFRATE/data/fitting.py | 451 ++++++ pyDIFRATE/data/in_out.py | 35 + pyDIFRATE/data/load_nmr.py | 385 +++++ pyDIFRATE/iRED/.DS_Store | Bin 0 -> 6148 bytes pyDIFRATE/iRED/Ct_ana.py | 162 ++ pyDIFRATE/iRED/Ct_fast.py | 343 +++++ "pyDIFRATE/iRED/Icon\r" | 0 pyDIFRATE/iRED/__init__.py | 0 "pyDIFRATE/iRED/__pycache__/Icon\r" | 0 pyDIFRATE/iRED/fast_funs.py | 590 +++++++ pyDIFRATE/iRED/fast_index.py | 109 ++ pyDIFRATE/iRED/iRED_ana.py | 575 +++++++ pyDIFRATE/iRED/iRED_fast.py | 545 +++++++ pyDIFRATE/iRED/parCt.py | 183 +++ pyDIFRATE/iRED/par_iRED.py | 150 ++ pyDIFRATE/iRED/parallel_Ct.py | 180 +++ pyDIFRATE/plots/.DS_Store | Bin 0 -> 6148 bytes pyDIFRATE/plots/__init__.py | 0 pyDIFRATE/plots/plotting_funs.py | 638 ++++++++ pyDIFRATE/r_class/.DS_Store | Bin 0 -> 6148 bytes pyDIFRATE/r_class/Ctsens.py | 238 +++ pyDIFRATE/r_class/DIFRATE_funs.py | 264 ++++ pyDIFRATE/r_class/DynamicModels.py | 318 ++++ "pyDIFRATE/r_class/Icon\r" | 0 pyDIFRATE/r_class/__init__.py | 0 pyDIFRATE/r_class/detectors.py | 1941 ++++++++++++++++++++++++ pyDIFRATE/r_class/mdl_sens.py | 540 +++++++ pyDIFRATE/r_class/parallel_funs.py | 45 + pyDIFRATE/r_class/sens.py | 555 +++++++ pyDIFRATE/tools/.DS_Store | Bin 0 -> 6148 bytes pyDIFRATE/tools/DRtools.py | 744 +++++++++ pyDIFRATE/tools/GyroRatio | 351 +++++ pyDIFRATE/tools/__init__.py | 0 61 files changed, 16627 insertions(+) create mode 100644 pyDIFRATE/Struct/.DS_Store create mode 100644 pyDIFRATE/Struct/.test.xtc_offsets.npz create mode 100644 pyDIFRATE/Struct/FramesPostProc.py create mode 100644 pyDIFRATE/Struct/__init__.py create mode 100644 "pyDIFRATE/Struct/__pycache__/Icon\r" create mode 100644 pyDIFRATE/Struct/eval_fr.py create mode 100644 pyDIFRATE/Struct/frames.py create mode 100644 pyDIFRATE/Struct/select_tools.py create mode 100644 pyDIFRATE/Struct/special_frames.py create mode 100755 pyDIFRATE/Struct/structure.py create mode 100644 pyDIFRATE/Struct/user_frames.py create mode 100644 pyDIFRATE/Struct/vec_funs.py create mode 100644 pyDIFRATE/Struct/vf_tools.py create mode 100644 pyDIFRATE/__init__.py create mode 100644 "pyDIFRATE/__pycache__/Icon\r" create mode 100644 pyDIFRATE/chimera/.DS_Store create mode 100644 "pyDIFRATE/chimera/Icon\r" create mode 100644 pyDIFRATE/chimera/__init__.py create mode 100644 "pyDIFRATE/chimera/__pycache__/Icon\r" create mode 100644 pyDIFRATE/chimera/chimeraX_funs.py create mode 100644 pyDIFRATE/data/.DS_Store create mode 100644 "pyDIFRATE/data/Icon\r" create mode 100644 pyDIFRATE/data/__init__.py create mode 100644 "pyDIFRATE/data/__pycache__/Icon\r" create mode 100644 pyDIFRATE/data/bin_in_out.py create mode 100755 pyDIFRATE/data/data_class.py create mode 100755 pyDIFRATE/data/explicit_fits.py create mode 100755 pyDIFRATE/data/fitting.py create mode 100644 pyDIFRATE/data/in_out.py create mode 100644 pyDIFRATE/data/load_nmr.py create mode 100644 pyDIFRATE/iRED/.DS_Store create mode 100755 pyDIFRATE/iRED/Ct_ana.py create mode 100644 pyDIFRATE/iRED/Ct_fast.py create mode 100644 "pyDIFRATE/iRED/Icon\r" create mode 100644 pyDIFRATE/iRED/__init__.py create mode 100644 "pyDIFRATE/iRED/__pycache__/Icon\r" create mode 100644 pyDIFRATE/iRED/fast_funs.py create mode 100644 pyDIFRATE/iRED/fast_index.py create mode 100755 pyDIFRATE/iRED/iRED_ana.py create mode 100644 pyDIFRATE/iRED/iRED_fast.py create mode 100644 pyDIFRATE/iRED/parCt.py create mode 100644 pyDIFRATE/iRED/par_iRED.py create mode 100644 pyDIFRATE/iRED/parallel_Ct.py create mode 100644 pyDIFRATE/plots/.DS_Store create mode 100644 pyDIFRATE/plots/__init__.py create mode 100644 pyDIFRATE/plots/plotting_funs.py create mode 100644 pyDIFRATE/r_class/.DS_Store create mode 100755 pyDIFRATE/r_class/Ctsens.py create mode 100755 pyDIFRATE/r_class/DIFRATE_funs.py create mode 100755 pyDIFRATE/r_class/DynamicModels.py create mode 100644 "pyDIFRATE/r_class/Icon\r" create mode 100644 pyDIFRATE/r_class/__init__.py create mode 100755 pyDIFRATE/r_class/detectors.py create mode 100755 pyDIFRATE/r_class/mdl_sens.py create mode 100755 pyDIFRATE/r_class/parallel_funs.py create mode 100755 pyDIFRATE/r_class/sens.py create mode 100644 pyDIFRATE/tools/.DS_Store create mode 100644 pyDIFRATE/tools/DRtools.py create mode 100755 pyDIFRATE/tools/GyroRatio create mode 100644 pyDIFRATE/tools/__init__.py diff --git a/pyDIFRATE/.DS_Store b/pyDIFRATE/.DS_Store index 5408947d5feb8110fc3ede75c6344aea5f8f798c..cff14fb189ed49be7f72fafce56ed572097b9418 100644 GIT binary patch delta 76 zcmZp1XmQxEMu5rT>g06-Y8(^%pMJ1hc-Rrhkr7I9Ny^DjVqjoE7M2ny$S@2}&d)6X TN-!|6PGA74mD=nlxQ!10WAYl= delta 76 zcmZp1XmQxEMu5p;=j3$)Y8-0P>)uy49&`k9WQ0;&l5+Bs7#J9kg{1@vG7N*0^K%P; T5)2Hi2@F8BQk(q*xA6f05}p|u diff --git a/pyDIFRATE/Struct/.DS_Store b/pyDIFRATE/Struct/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..6ac5452305830a47ba4f1283ef641c724d07039c GIT binary patch literal 6148 zcmeHK%}N6?5T4N<7DVV#@RC>P8^p3cK_5V^tM$-zi>2T(Pv*h1ui`uS&5yE6mx>ob znSseS$;@Pv4>rjV5zlwcjA%+k6;zPL7!esBbsc%|1jsta9c}4~cJv7S8zO5zrF(j! z8)~V){w;dVdNo_zcI~FOnFG;nSJ%sRw?uUH^7eebdVjq*Jb&9YKg`%X?5(J`xG;l( zU?3O>27-Za8Ni(_Qk@t^8w>;k!N3;-ay}%eV0J8qdUT-C5&$UAXcgE}OGr*~%#Otn z9tc}1&{Ek;47PNPCy&dH#n94;z4&1N@@MhF`gP2oG@LjaMjH$S149Omtvi+b{}R8< zXpuimiC!=e4E!?&cv3IwIX2~Y>)!U{u1(NusEEWBF(9x9j{tP!963{``IFd)%Z|lR UW|47C2gXA{35g~c_yq<&03E3~4gdfE literal 0 HcmV?d00001 diff --git a/pyDIFRATE/Struct/.test.xtc_offsets.npz b/pyDIFRATE/Struct/.test.xtc_offsets.npz new file mode 100644 index 0000000000000000000000000000000000000000..281d4e4adaf009574cb523bea8374d1357bf0ad6 GIT binary patch literal 5260 zcmc)OaZr=z9l-Gy4H`ApDAC4(Hr7;75u=SRYMiL(qC}fkN>QVL4G1-A3Ym=+uQ`{x z)ZwXHR$0edrg)1jt(=!xbacipJGh$7Oyh|yt4?w1T&3N}^9`MP{bO5qw};Dp zl3%`W$eXiAiSY|7V%@-Eh%ZqyEd#}d zypNdM4X7wV|6a^&!pI#g8P`|zQZClKhQc4*Rk>_ zYOZ60I!SanChU9}D}I5hb6DSxe+{N-*lI(U4>chS-NwAJ!^P`%Jc~8kFzax9{wJ(d zju4$86I(Z;_ia?4!kRBpm+`3RqpQ%d6J;Nwza32}YSFc2qjw8x-^I`;STJy;=oB-t zt`yBx*wqxD-^IMt$3)+ri@xn>I)ZIi(5p%oogoWDU{j22zXY;>>3w4Y(?NvynwriqV>UbX`LucP^Be107R zYK`dZCe*x)(F5o>iz$g?q}U4U(Pl%14}%}Y=YK=x6Hkb~@fozcQCW|{3s^E_tmtHO zuyzCHyoFuAL*EyupQIIi*owAFlr^IN2AY$f6y3GO=zayY?_lISdIpRWoo)t3*P>%D zDx0w34%)}1ie8nA4IZ=wP|+5jC#8ulZ$5UFVeLLNpT>@a@uI7mf`&qD-Ho0grgdQW z@d=_Yc^Vb3qW^u&`3$=TPZXVhHd@wWM>TrGsO`q^#B|ZSSEA+z7(Ry9>!=zzNpv9- zI$WssW5YSj>z^UI_UTwvjJh|lIfRZ*lxru8zIquNzlSY{(Rm3K!*rsnor`)0hTg`U zQ+V-9bY@Huy=pbqRbo~nw%x#r$EJ!-vlxS$(S8us=P@dON_15QG?$=Mg@Gn4xQnt> zz33Zq(Y77cM=*K?9mA)IPMw93a`f&;;~8x4GhKB4sc2q<9ec3q1R8H)o90`h_gT>B z#+F0qy@2UM45Ew9M!Oy5)fhM#pWj3E#2KQGtiXyLs6B?w*U>q0rs%Ybu+4=&KN?%G zrT;9^xu&DO7~6l0zTaZz-?2+;6n#SuT0N)^VCXWsl4gr8eLh|+!>WB~I*pwP-xgiX z6totiat{WNqw}w*89hhz;U(zViu(63+KMhkrs%XrjIP7VpQ8RGUi=()PnavkmT3iE z^J4V}Xt|1tN9T#Ib|D%!VcP-poW;z<`J(IAW1S6cK2(J;d>cJuvqW!LhArFBa~SoP z;`5;kMCX}{x|i@`4OU0cEG^{sPsX}@EZBwe;~2V$uF>BSy>2O9+lsaCVcs86HrOP( zMk6}ysISIrC((BgEfW`sUa=AzcVNjeO!*5&M=lm!!)wvK z7qw0C`JMQDoLTg`T#USiu3w`13Px2+L{~8%Q_8XN=V<=}suGrpE~G^?#Jo3A_Av&#(4O{;=rws5{XSNHfX1uXG2&U# zRWC%#ChR_d^=HwVxJ-22daSWwjt@J3gZ|rCGB!u_Da$aj4PA#(dnrC2`kd%0=c4H) z?5@Gu2wJ3E(aCif%18SyOgWC>o9G_(yy(-GU`qwM>*DiPY#x*+I{PeCti$>@G4ErP zKF8Ye%SE5H9NWC;Z9v1-_x;s2^h$ zeVYX<-Kej}@C9@YStYuZIoP}b?Qfy__ZavB9g|jzUcCyNE79GE%61GS=ZnsojfyQ8 zcn9sD#ODJGL>HWa1*NF|2}YW+q6@XDg;H!qa?!pW(fSxmW!CAWg zKRkuve{VDBe{c#%)*apP-51Zzeeg^0<{q;2omw|6c#!-@_IK z*M%iJp^vJsiU-oe8*}k-5AKB6{rk4*9zO8Fi}3yfmkgD@x+EVy>cKDm{-cHrlm2xX jJbc)LANKu+o$fj8zK`CLo!Bq-w5;dnLXUfsVvqg_x1=C4 literal 0 HcmV?d00001 diff --git a/pyDIFRATE/Struct/FramesPostProc.py b/pyDIFRATE/Struct/FramesPostProc.py new file mode 100644 index 0000000..fdae7d6 --- /dev/null +++ b/pyDIFRATE/Struct/FramesPostProc.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Copyright 2021 Albert Smith-Penzel + +This file is part of Frames Theory Archive (FTA). + +FTA is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +FTA is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with FTA. If not, see . + + +Questions, contact me at: +albert.smith-penzel@medizin.uni-leipzig.de + + +Created on Mon Sep 20 13:56:00 2021 + +@author: albertsmith +""" + +from pyDIFRATE.Struct import vf_tools as vft +import numpy as np + + +def moving_avg(t,v,sigma): + """ + Moving average of a vector direction. Note that the output is NOT normalized, + but the direction is correct + """ + nsteps=np.ceil((sigma/np.diff(t).min())*2).astype(int) #Cut off the average after 2*sigma + return np.moveaxis([(np.exp(-(t0-t[np.max([0,k-nsteps]):k+nsteps+1])**2/(2*sigma**2))*\ + v[:,:,np.max([0,k-nsteps]):k+nsteps+1]).sum(-1) for k,t0 in enumerate(t)],0,-1) + +def AvgGauss(vecs,fr_ind,sigma=50): + """ + Takes a moving average of the frame direction, in order to remove librational + motion (which tends to be correlated). Moving average is defined by a weighted + Gaussian, defined in the units of the trajectory (usually ps, default here + is 50). + """ + if sigma==0:return #Do nothing if sigma is 0 + t=vecs['t'] + + if np.ndim(vecs['v'][fr_ind])==4: + vecs['v'][fr_ind]=np.array([moving_avg(t,v,sigma) for v in vecs['v'][fr_ind]]) + else: + vecs['v'][fr_ind]=moving_avg(t,vecs['v'][fr_ind],sigma) + + + +def AvgHop(vecs,fr_ind,vr,sigma=50): + """ + Removes short traverses from hopping motion of a trajectory. sigma determines + where to cut off short traverses (averaging performed with a Gaussian + distribution, default is 50 ps, note that if trajectory uses a different unit, + then this number will need to be adjusted). + + Note- needs to be run before any averaging is applied to the reference frame! + """ + if sigma==0:return #Do nothing if sigma is 0 + t=vecs['t'] + + v12s,v23s,v34s=[moving_avg(t,v,sigma) for v in vecs['v'][fr_ind]] + + sc=vft.getFrame(v23s,v34s) + v12s=np.moveaxis(vft.R(v12s,*vft.pass2act(*sc)),-1,0) + + i=np.argmax([(vr0*v12s).sum(1) for vr0 in vr],axis=0) + + v12s=vr[i,:,np.arange(i.shape[1])].T + v12s=vft.R(v12s,*sc) + vecs['v'][fr_ind]=np.array([v12s,v23s]) \ No newline at end of file diff --git a/pyDIFRATE/Struct/__init__.py b/pyDIFRATE/Struct/__init__.py new file mode 100644 index 0000000..e69de29 diff --git "a/pyDIFRATE/Struct/__pycache__/Icon\r" "b/pyDIFRATE/Struct/__pycache__/Icon\r" new file mode 100644 index 0000000..e69de29 diff --git a/pyDIFRATE/Struct/eval_fr.py b/pyDIFRATE/Struct/eval_fr.py new file mode 100644 index 0000000..e493a2e --- /dev/null +++ b/pyDIFRATE/Struct/eval_fr.py @@ -0,0 +1,1403 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Copyright 2021 Albert Smith-Penzel + +This file is part of Frames Theory Archive (FTA). + +FTA is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +FTA is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with FTA. If not, see . + + +Questions, contact me at: +albert.smith-penzel@medizin.uni-leipzig.de + + +Created on Tue Oct 6 10:46:10 2020 + +@author: albertsmith +""" + + +import numpy as np +from copy import deepcopy +import pyDIFRATE.Struct.vf_tools as vft +from pyDIFRATE.iRED.fast_index import trunc_t_axis +from pyDIFRATE.iRED.fast_funs import get_count,printProgressBar +from pyDIFRATE.data.data_class import data +from pyDIFRATE.Struct.vec_funs import new_fun,print_frame_info +from pyDIFRATE.chimera.chimeraX_funs import draw_tensors +from pyDIFRATE.Struct import FramesPostProc as FPP + + +flags={'ct_finF':True,'ct_m0_finF':False,'ct_0m_finF':False,'ct_0m_PASinF':False,\ + 'A_m0_finF':False,'A_0m_finF':False,'A_0m_PASinF':False,\ + 'ct_prod':True,'ct':True,'S2':True} +class ReturnIndex(): + for k in flags.keys(): + locals()[k]=property(lambda self,k=k:self.flags[k]) #Dynamically set these properties + + def __init__(self,ret_in=None,**kwargs): + self.flags=flags + + + if ret_in is not None: + if hasattr(ret_in,'return_index') and len(ret_in.return_index)==10: + self=ret_in + else: + assert isinstance(ret_in,list) and len(ret_in)==10,'ret_in must be a list of 10 elements' + for k,v in zip(flags.keys(),ret_in): + flags[k]=bool(v) + + for k,v in kwargs.items(): + if k in flags.keys(): + flags[k]=v + + self.flags=flags.copy() #This makes the class and instance values independent + + def __getitem__(self,k): + if isinstance(k,int): + return [v for v in self.flags.values()][k] + else: + return self.flags[k] + + def __repr__(self): + out='' + for k,v in self.flags.items(): + out+=k+': {0}'.format(v)+'\n' + return out + def __str__(self): + return self.__repr__() + + def copy(self): + return ReturnIndex(**self.flags) + + @property + def return_index(self): + """ + Returns an array of logicals determining which terms to calculate + """ + return np.array([v for v in self.flags.values()],dtype=bool) + + @property + def calc_ct_m0_finF(self): + "Determines if we should calculate ct_m0_finF" + if self.ct_finF or self.ct_m0_finF or self.ct_0mPASinF or self.ct_prod:return True + return False + @property + def calc_A_m0_finF(self): + "Determines if we should calculate A_m0_finF" + if self.A_m0_finF or self.A_0m_finF:return True + return False + @property + def calc_A_0m_PASinF(self): + "Determines if we should calculate A_m0_PASinF" + if self.ct_finF or self.ct_prod or self.A_0m_PASinF:return True + return False + @property + def calc_ct_finF(self): + "Determines if we should calculate ct_finF" + if self.ct_finF or self.ct_prod:return True + return False + @property + def calc_any_ct(self): + "Determines if any correlation functions should be calculated" + if self.ct_finF or self.ct_m0_finF or self.ct_0m_finF or self.ct_0m_PASinF or \ + self.ct_prod or self.ct:return True + return False + + def set2sym(self): + "De-activates the storage of terms that cannot by calculated in symmetric mode" + self.set2auto() + self.flags['ct_m0_finF']=False + + def set2auto(self): + "De-activates the storage of terms that cannot by calculated in auto mode" + if self.ct_0m_finF or self.ct_0m_PASinF or self.A_m0_finF or self.A_m0_finF: + print('Warning: Individual components of the correlation functions or tensors will not be returned in auto or sym mode') + self.flags.update({'ct_0m_finF':False,'ct_0m_PASinF':False,\ + 'A_m0_finF':False,'A_0m_finF':False}) + + + +class FrameObj(): + def __init__(self,molecule): + self.molecule=molecule + self.vft=None + self.vf=list() + self.frame_info={'frame_index':list(),'label':None,'info':list()} + self.defaults={'t0':0,'tf':-1,'dt':None,'n':10,'nr':10,'mode':'auto',\ + 'squeeze':True} + self.terms={'ct_finF':True,'ct_m0_finF':False,'ct_0mPASinF':False,\ + 'A_m0_finF':True,'A_0m_finF':True,'A_0m_PASinF':True,\ + 'ct_prod':True,'ct':True,'S2':True} + self.__frames_loaded=False #Flag to check if frames currently loaded + self.include=None #Record of which frames were included in calculation + self.mode=self.defaults['mode'] + + self.return_index=ReturnIndex(**self.terms) + self.t=None + self.Ct={} + self.A={} + self.S2=None + + "I'd like to get rid of these in a later iteration. These were hidden as part of the molecule object..." + @property + def _vft(self):return self.vft + @property + def _vf(self):return self.vf + @property + def _frame_info(self):return self.frame_info + + @property + def description_of_terms(self): + out="""n=number of frames, nr=number of residues, nt=number of time points + ct_finF: + n x nr x nt array of the real correlation functions for each + motion (after scaling by residual tensor of previous motion) + ct_m0_finF: + 5 x n x nr x nt array, with the individual components of each + motion (f in F) + ct_0m_finF: + 5 x n x nr x nt array, with the individual components of each + motion (f in F) + ct_0m_PASinF: + 5 x n x nr x nt array, with the individual components of each + motion (PAS in F) + A_m0_finF: + Value at infinite time of ct_m0_finF + A_0m_finF: + Value at infinite time of ct_0m_finF + A_0m_PASinF: + Value at infinite time of ct_0m_PASinF + ct_prod: + nr x nt array, product of the elements ct_finF + ct: + Directly calculated correlation function of the total motion + S2: + Final value of ct + """ + print(out) + + + def new_frame(self,Type=None,frame_index=None,**kwargs): + """ + Create a new frame, where possible frame types are found in vec_funs. + Note that if the frame function produces a different number of reference + frames than there are bonds (that is, vectors produced by the tensor + frame), then a frame_index is required, to map the frame to the appropriate + bond. The length of the frame_index should be equal to the number of + vectors produced by the tensor frame, and those elements should have + values ranging from 0 to one minus the number of frames defined by this + frame. + + To get a list of all implemented frames and their arguments, call this + function without any arguments. To get arguments for a particular frame, + call this function with only Type defined. + """ + mol=self.molecule + if Type is None: + print_frame_info() + elif len(kwargs)==0: + print_frame_info(Type) + else: + assert self.vft is not None,'Define the tensor frame first (run mol.tensor_frame)' + vft=self.vft() + nb=vft[0].shape[1] if len(vft)==2 else vft.shape[1] #Number of bonds in the tensor frame + fun,fi,info=new_fun(Type,mol,**kwargs) + if frame_index is None:frame_index=fi #Assign fi to frame_index if frame_index not provided + f=fun() #Output of the vector function (test its behavior) + nf=f[0].shape[1] if len(f)>1 else f.shape[1] + if fun is not None: + "Run some checks on the validity of the frame before storing it" + if frame_index is not None: + assert frame_index.size==nb,'frame_index size does not match the size of the tensor_fun output' + assert frame_index[np.logical_not(np.isnan(frame_index))].max()=0]=np.arange(np.sum(fiout[k]>=0)) + elif k==len(fiout0): + fiout.append(np.array(fiout0[-1],dtype=int)) + m=k-2 + while np.any(fiout[-1]<0): + fiout[-1][fiout[-1]<0]=fiout0[m][fiout[-1]<0]+np.max(fiout0) + m+=-1 + else: + fiout.append(np.array(fiout0[k-1],dtype=int)) + m=k-2 + while np.any(fiout[-1]<0): + fiout[-1][fiout[-1]<0]=fiout0[m][fiout[-1]<0]+np.max(fiout0) + m+=-1 + fiout[-1][fiout0[k]<0]=-1 + + + + "Make sure vectors are normalized" + vZ=vft.norm(vZ) + nuZ=[vft.norm(nuz) for nuz in nuZ] + + return vZ,vXZ,nuZ,nuXZ,fiout + + +"This function handles the organization of the output, determines which terms to calculate" +def frames2ct(mol=None,v=None,return_index=None,mode='full',n=100,nr=10,t0=0,tf=None,dt=None): + """ + Calculates correlation functions for frames (f in F), for a list of frames. + One may provide the molecule object, containing the frame functions, or + the output of mol2vec (or ini_vec_load). frames2data returns np arrays with + the following data + + If we have n frames (including the tensor frame), nr residues (tensors), and + nt time points in the resulting correlation function, we can calculate any + of the following functions: + + ct_finF : n x nr x nt array of the real correlation functions for each + motion (after scaling by residual tensor of previous motion) + ct_m0_finF : 5 x n x nr x nt array, with the individual components of + each motion (f in F) + ct_0m_finF : 5 x n x nr x nt array, with the individual components of + each motion (f in F) + ct_0m_PASinF: 5 x n x nr x nt array, with the individual components of + each motion (PAS in F) + A_m0_finF : Value at infinite time of ct_m0_finF + A_0m_finF : Value at infinite time of ct_0m_finF + A_0m_PASinF : Value at infinite time of ct_0m_PASinF + ct_prod : nr x nt array, product of the elements ct_finF + ct : Directly calculated correlation function of the total motion + S2 : Final value of ct + + Include a logical index to select which functions to return, called return_index + + Default is + return_index=[True,False,False,False,False,False,False,True,True,False] + that is, ct_finF, ct_prod, and ct are included in the default. + + That is, we calculate the individual correlation functions, the product of + those terms, and the directly calculated correlation function by default. + + frames2ct(mol=None,v=None,return_index=None,n=100,nr=10,nf=None,dt=None) + + We may also take advantage of symmetry in the various motions. This option + is obtained by changing mode from 'full' to either 'sym' (assume all motions + result in symmetric residual tensors, A_0m_PASinF has eta=0), or we set mode + to 'auto', where a threshold on eta determines whether or not we treat the + residual tensor as symmetric. By default, the threshold is 0.2, but the user + may set the mode to autoXX, where XX means eta should be less than 0.XX (one + may use arbitrary precision, autoX, autoXX, autoXXX, etc.). + """ + + + if v is None and mol is None: + print('mol or v must be given') + return + elif v is None: + v=mol2vec(mol,n=n,nr=nr,t0=t0,tf=tf,dt=dt) + +# if return_index is None: +# return_index=[True,False,False,False,False,False,False,True,True,False] +# ri=np.array(return_index,dtype=bool) + ri=ReturnIndex(return_index) + + index=v['index'] + + vZ,vXZ,nuZ,nuXZ,_=apply_fr_index(v) + + + nf=len(nuZ) + nr,nt=vZ.shape[1:] + + "Initial calculations/settings required if using symmetry for calculations" + if mode.lower()=='sym' or 'auto' in mode.lower(): +# if np.any(ri[[2,3,4,5]]): +# ri[[2,3,4,5]]=False +# if mode.lower()=='sym':ri[1]=False +# print('Warning: Individual components of the correlation functions or tensors will not be returned in auto or sym mode') +# + if mode.lower()=='sym':ri.set2sym() + if mode.lower()=='auto':ri.set2auto() + + A_0m_PASinf=list() + for k in range(nf): + vZ_inf=vft.applyFrame(vft.norm(vZ),nuZ_F=nuZ[k],nuXZ_F=nuXZ[k]) + A_0m_PASinf.append(vft.D2vec(vZ_inf).mean(axis=-1)) + else: + A_0m_PASinf=[None for _ in range(nf)] + + + if mode=='sym': + threshold=1 + elif 'auto' in mode.lower(): + threshold=float(mode[4:])/10**(len(mode)-4) if len(mode)>4 else 0.2 #Set threshold for eta (default 0.2) + else: + threshold=0 + +# if ri[0] or ri[1] or ri[2] or ri[7]: + if ri.calc_ct_m0_finF: + "Calculate ct_m0_finF if requested, if ct_prod requested, if ct_finF requested, or if ct_0m_finF requested" + ct_m0_finF=list() + A_m0_finF=list() + for k in range(nf+1): + if k==0: + a,b=Ct_D2inf(vZ=vZ,vXZ=vXZ,nuZ_F=nuZ[k],nuXZ_F=nuXZ[k],cmpt='m0',mode='both',index=index) + elif k==nf: +# a,b=Ct_D2inf(vZ=vZ,vXZ=vXZ,nuZ_f=nuZ[k-1],nuXZ_f=nuXZ[k-1],cmpt='m0',mode='both',index=index) + a,b=sym_full_swap(vZ=vZ,threshold=threshold,A_0m_PASinf=A_0m_PASinf[k-1],vXZ=vXZ,\ + nuZ_f=nuZ[k-1],nuXZ_f=nuXZ[k-1],cmpt='m0',mode='both',index=index) + else: +# a,b=Ct_D2inf(vZ=vZ,vXZ=vXZ,nuZ_f=nuZ[k-1],nuXZ_f=nuXZ[k-1],nuZ_F=nuZ[k],nuXZ_F=nuXZ[k],cmpt='m0',mode='both',index=index) + a,b=sym_full_swap(vZ=vZ,threshold=threshold,A_0m_PASinf=A_0m_PASinf[k-1],vXZ=vXZ,\ + nuZ_f=nuZ[k-1],nuXZ_f=nuXZ[k-1],nuZ_F=nuZ[k],nuXZ_F=nuXZ[k],\ + cmpt='m0',mode='both',index=index) + ct_m0_finF.append(a) + A_m0_finF.append(b) + ct_m0_finF=np.array(ct_m0_finF) + A_m0_finF=np.array(A_m0_finF) +# elif ri[4] or ri[5]: + elif ri.calc_A_m0_finF: + "Calculate A_m0_finF if requested, or A_0m_finF requested" + A_m0_finF=list() + for k in range(nf+1): + if k==0: + b=Ct_D2inf(vZ=vZ,vXZ=vXZ,nuZ_F=nuZ[k],nuXZ_F=nuXZ[k],cmpt='m0',mode='d2',index=index) + elif k==nf: + b=Ct_D2inf(vZ=vZ,vXZ=vXZ,nuZ_f=nuZ[k-1],nuXZ_f=nuXZ[k-1],cmpt='m0',mode='d2',index=index) + else: + b=Ct_D2inf(vZ=vZ,vXZ=vXZ,nuZ_f=nuZ[k-1],nuXZ_f=nuXZ[k-1],nuZ_F=nuZ[k],nuXZ_F=nuXZ[k],cmpt='m0',mode='d2',index=index) + A_m0_finF.append(b) + A_m0_finF=np.array(A_m0_finF) +# if ri[2]: + if ri.ct_0m_finF: + "ct_0m_finF are just the conjugates of ct_m0_finF" + ct_0m_finF=np.array([ct0.conj() for ct0 in ct_m0_finF]) + +# if ri[5]: + if ri.A_0m_finF: + "A_0m_finF are just the conjugates of A_m0_finF" + A_0m_finF=np.array([a0.conj() for a0 in A_m0_finF]) + + +# if ri[3]: #This option is deactivated for sym and auto modes + if ri.ct_0m_PASinF: + "Calculate ct_0m_PASinF if requested" + ct_0m_PASinF=list() + A_0m_PASinF=list() + for k in range(nf+1): + if k==nf: + a,b=Ct_D2inf(vZ=vZ,vXZ=vXZ,cmpt='0m',mode='both',index=index) + else: + a,b=Ct_D2inf(vZ=vZ,vXZ=vXZ,nuZ_F=nuZ[k],nuXZ_F=nuXZ[k],cmpt='0m',mode='both',index=index) + ct_0m_PASinF.append(a) + A_0m_PASinF.append(b) + ct_0m_PASinF=np.array(ct_0m_PASinF) + A_0m_PASinF=np.array(A_0m_PASinF) +# elif ri[0] or ri[6] or ri[7]: + elif ri.calc_A_0m_PASinF: + "Calculate A_0m_PASinF if requested, if ct_prod requested, or if ct_finF requested" + A_0m_PASinF=list() + for k in range(nf+1): + if k==nf: + b=Ct_D2inf(vZ=vZ,vXZ=vXZ,cmpt='0m',mode='D2',index=index) + else: + b=Ct_D2inf(vZ=vZ,vXZ=vXZ,nuZ_F=nuZ[k],nuXZ_F=nuXZ[k],cmpt='0m',mode='D2',index=index) + A_0m_PASinF.append(b) + A_0m_PASinF=np.array(A_0m_PASinF) + +# if ri[0] or ri[7]: + if ri.calc_ct_finF: + "Calculate ct_finF if requested, or if ct_prod requested" + ct_finF=list() + for k in range(nf+1): + if k==0: + ct_finF.append(ct_m0_finF[0][2].real) + else: + ct_finF.append((np.moveaxis(ct_m0_finF[k],-1,0)*A_0m_PASinF[k-1]/A_0m_PASinF[k-1][2].real).sum(1).real.T) + ct_finF=np.array(ct_finF) +# if ri[7]: + if ri.ct_prod: + "Calculate ct_prod" + ct_prod=ct_finF.prod(0) + +# if ri[8]: + if ri.ct: + "Calculate ct if requested" + ct,S2=Ct_D2inf(vZ,cmpt='00',mode='both',index=index) + ct=ct.real + S2=S2.real +# elif ri[9]: + elif ri.S2: + "Calculate S2 if requested" + S2=Ct_D2inf(vZ,cmpt='00',mode='d2',index=index) + S2=S2.real + + out=dict() + for k in ri.flags.keys(): + if getattr(ri,k):out[k]=locals()[k] +# if ri[0]:out['ct_finF']=ct_finF +# if ri[1]:out['ct_m0_finF']=ct_m0_finF +# if ri[2]:out['ct_0m_finF']=ct_0m_finF +# if ri[3]:out['ct_0m_PASinF']=ct_0m_PASinF +# if ri[4]:out['A_m0_finF']=A_m0_finF +# if ri[5]:out['A_0m_finF']=A_0m_finF +# if ri[6]:out['A_0m_PASinF']=A_0m_PASinF +# if ri[7]:out['ct_prod']=ct_prod +# if ri[8]:out['ct']=ct +# if ri[9]:out['S2']=S2 + +# if ri[0] or ri[1] or ri[2] or ri[3] or ri[7] or ri[8] or ri[9]: + if ri.calc_any_ct: + if index is None: + index=np.arange(v['vT'].shape[-1]) + out['index']=index + N=get_count(index) + i=N!=0 + N=N[i] + dt=(v['t'][1]-v['t'][0])/(index[1]-index[0]) + t=(np.cumsum(i)-1)*dt/1e3 + out['N']=N + out['t']=t[i] + + return out + +"This function extracts various frame vectors from trajectory" +def mol2vec(mol,n=100,nr=10,t0=0,tf=-1,dt=None,index=None): + """ + Extracts vectors describing from the frame functions found in the molecule + object. Arguments are mol, the molecule object, n and nr, which are parameters + specifying sparse sampling, and dt, which overrides dt found in the trajectory + """ + + traj=(mol if hasattr(mol,'mda_object') else mol.molecule).mda_object.trajectory + if tf is None or tf==-1:tf=traj.n_frames + if index is None: + index=trunc_t_axis(tf-t0,n,nr)+t0 + + return ini_vec_load(traj,mol._vf,mol._vft,mol._frame_info['frame_index'],index=index,dt=dt,info=mol._frame_info['info']) + +"This function takes care of the bulk of the actual calculations" +def Ct_D2inf(vZ,vXZ=None,nuZ_F=None,nuXZ_F=None,nuZ_f=None,nuXZ_f=None,cmpt='0p',mode='both',index=None): + """ + Calculates the correlation functions and their values at infinite time + simultaneously (reducing the total number of calculations) + + To perform the calculation in reference frame F, provide nuZ_F and + optionally nuXZ_F + + To calculate the effect of the motion of frame f on the correlation function + for the bond, provide nuZ_f and optionally nuXZ_f + + To only return the correlation function, or only return the values at infinite + time, set mode to 'Ct' or 'D2inf', respectively. + + To determine what terms to calculate, set cmpt: + '0p' yields the 5 terms, C_0p (default) + 'p0' yields the 5 terms, C_p0 + 'pp' yields the 5 terms, C_pp + '01','20','00','-20', etc. all will return the requested component + + Setting m OR mp will automatically set the other term to 0. Default is for + mp=0 (starting component), and m is swept from -2 to 2. + + Currently, m or mp must be zero + + index should be provided if the trajectory has been sparsely sampled. + + ct,d2=Ct_D2inf(vZ,vXZ=None,nuZ_F=None,nuXZ_F=None,nuZ_f=None,nuXZ_f=None,cmpt='0p',mode='both',index=index) + + if mode is 'd2', only d2 is returned (even if index is provided). F + if mode is 'ct', d2 is not returned + if mode is 'both', then ct and d2 are returned + """ + + + """Rather than having a bunch of if/then statements, we're just going to make + a logical array to determine which terms get calculated in this run. Note + symmetry relations: we will use mmpswap to get p0 terms + + calc=[0-2,0-1,00,01,02,-2-2,-1-1,11,22] + + Note that we'll skip terms that can be obtained based on their relationship + to other terms, and fill these in at the end (ex. if C_01 and C_10 are required, + we'll only calculate one, and get the other from the negative conjugate) + """ + calc=np.zeros(9,dtype=bool) + mmpswap=False + if cmpt in ['0m','m0','0p','p0']: + calc[:3]=True + if cmpt in ['m0','p0']:mmpswap=True + elif cmpt in ['mm','pp']: + calc[-2:]=True + calc[2]=True + elif cmpt in ['0-2','0-1','00','01','02','-2-2','-1-1','11','22']: + calc=np.array(['0-2','0-1','00','01','02','-2-2','-1-1','11','22'])==cmpt + elif cmpt in ['-20','-10','10','20']: + calc=np.array(['-20','-10','00','10','20','-2-2','-1-1','11','22'])==cmpt + mmpswap=True + + #Flags for calculating correlation function or not, and how to calculate + calc_ct=True if (mode[0].lower()=='b' or mode[0].lower()=='c') else False + if calc_ct: + if index is None or index.size/index[-1]>0.25: #No idea where the cutoff is.... + ctFT=True + ctDIR=False + else: + ctDIR=True + ctFT=False + else: + ctFT=False + ctDIR=False + + #Size of the output + n=vZ.shape[-1] + N=get_count(index) if index is not None else np.arange(n,0,-1) + n=2*(np.argwhere(N!=0).squeeze()[-1]+1) if ctFT else np.sum(N!=0) + SZ=[1,n] if vZ.ndim==2 else [vZ.shape[1],n] + + #Pre-allocation for the running sums + if calc_ct:ct0=[np.zeros(SZ,dtype=complex) if cc else None for cc in calc] + d20=[np.zeros(SZ[0],dtype=complex) if cc else None for cc in calc] + + "Here we create a generator that contains each term in the correlation function" + l=loops(vZ=vZ,vXZ=vXZ,nuZ_F=nuZ_F,nuXZ_F=nuXZ_F,nuZ_f=nuZ_f,nuXZ_f=nuXZ_f,calc=calc) + for l0 in l: + "These terms appear in all correlation functions" + if 'eag' in l0.keys(): + zzp=l0['eag']*l0['ebd'] + else: + zzp=l0['az']*l0['bz'] + zz=zzp.mean(-1) + + "Get the FT of zzp if required" + if ctFT:ftzz=FT(zzp,index) + """ + AN IMPORTANT NOTE HERE: + For awhile, I have had taken the complex conjugate of ftzz and not of + the other terms. In principle, should be a teeny bit faster that way. + However, it returns, apparently, the complex conjugate of the correct + correlation. One solution was to simply take the complex conjugate + of the result (previously after taking the inverse transform about 40 + lines below here). However, I think this only works because the correlation + functions are approximately symmetric about 0. Therefore, now I instead + take the complex conjugate of the other term (about 15 lines below), + and remove the conjugate here and on the final correlatin functions. + + Let's assume this works, but watch out for new errors! + """ + "These are additional terms required for C_pp" + if np.any(calc[5:]): + z=l0['eag'] + zm=z.mean() + if ctFT:ftz=FT(z,index).conj() + + "Loop over all terms C_0p" + for k in range(5): + if calc[k]: #Loop over all terms + p=ct_prods(l0,k) + d20[k]+=p.mean(-1)*zz + if ctFT:ct0[k]+=FT(p,index).conj()*ftzz #Calc ct + if ctDIR:ct0[k]+=fastCT(p,zzp,index,N) + + "Loop over all terms C_pp" + for k in range(5,9): + if calc[k]: + p,p1=ct_prods(l0,k) + d20[k]+=zm*p1.mean(-1)+zz*p.mean(-1) + if ctFT:ct0[k]+=FT(p,index).conj()*ftzz+FT(p1,index)*ftz + if ctDIR:ct0[k]+=fastCT(p,zzp,index,N)+fastCT(p1,z,index,N) + + + "Now calculate inverse transforms if calc_ct" + if ctFT: + print('Use Fourier Transform') + #Here the number of time point pairs for each element of the correlation function +# N=get_count(index) if index is not None else np.arange(n,0,-1) + i=N!=0 + N=N[i] + #We only take values for N!=0, and then normalize by N + if mmpswap: + ct=[None if ct1 is None else (np.fft.ifft(ct1.conj(),axis=-1)[:,:int(n/2)])[:,i]/N for ct1 in ct0] + else: + ct=[None if ct1 is None else (np.fft.ifft(ct1,axis=-1)[:,:int(n/2)])[:,i]/N for ct1 in ct0] +# ct=[None if ct1 is None else ct1.conj() for ct1 in ct] #We have the complex conjugate of the correct correlation function + elif ctDIR: + ct=[None if ct1 is None else ct1/N[N!=0] for ct1 in ct0] + "Add offsets to terms C_pp" + offsets=[0,0,-1/2,0,0,-1/2,1/4,1/4,-1/2] + d2=[None if d21 is None else d21+o for d21,o in zip(d20,offsets)] + if calc_ct:ct=[None if ct1 is None else ct1+o for ct1,o in zip(ct,offsets)] + + "If a particular value selected with m=0,mp!=0 (C_p0), apply the m/mp swap" + if mmpswap: + d2=[m_mp_swap(d2,0,k-2,k-2,0) for k,d2 in enumerate(d2[:5])] + if calc_ct:ct=[m_mp_swap(ct0,0,k-2,k-2,0) for k,ct0 in enumerate(ct[:5])] + + "Remove extra dimension if input was one dimensional" + if vZ.ndim==2: + d2=[None if d21 is None else d21.squeeze() for d21 in d2] + if calc_ct:ct=[None if ct1 is None else ct1.squeeze() for ct1 in ct] + + """Extract only desired terms of ct,d2 OR" + fill in terms of ct, d2 that are calculated with sign swaps/conjugates""" + if cmpt in ['0m','m0','0p','p0']: + d2[3],d2[4]=-d2[1].conj(),d2[0].conj() + d2=np.array(d2[:5]) + if calc_ct: + ct[3],ct[4]=-ct[1].conj(),ct[0].conj() + ct=np.array(ct[:5]) + elif cmpt in ['mm','pp']: + d2[-3],d2[-4]=-d2[-2].conj(),d2[-1].conj() + d2=np.array(d2[-4:]) + if calc_ct: + ct[-3],ct[-4]=-ct[-2].conj(),ct[-1].conj() + ct=np.concatenate((ct[-4:-2],ct[2:3],ct[-2:]),axis=0) + elif cmpt in ['0-2','0-1','00','01','02','-2-2','-1-1','11','22','-20','-10','10','20']: + i=np.argwhere(calc).squeeze() + d2=d2[i] + if calc_ct:ct=ct[i] + + "Return the requested terms" + if mode[0].lower()=='b': #both (just check match on first letter) + return ct,d2 + elif mode[0].lower()=='c': #ct only + return ct + else: #D2inf only + return d2 + +"Used in conjunction with loops to calculate the required terms for the correlation functions" +def ct_prods(l,n): + """ + Calculates the appropriate product (x,y,z components, etc.) for a given + correlation function's component. Provide l, the output of a generator from + loops. Mean may be taken, or FT, depending on if final value or correlation + function is required. + + n determines which term to calculate. n is 0-8, indexing the following terms + + [0-2,0-1,00,01,02,-2-2,-1-1,11,22] + + """ + + + if n==0: + p=np.sqrt(3/8)*(l['ax']*l['bx']-l['ay']*l['by']+1j*2*l['ax']*l['by']) + if n==1: + p=-np.sqrt(3/2)*(l['ax']*l['bz']+1j*l['ay']*l['bz']) + if n==2: + p=3/2*l['az']*l['bz'] + if n==3: + p=np.sqrt(3/2)*(l['ax']*l['bz']-1j*l['ay']*l['bz']) + if n==4: + p=np.sqrt(3/8)*(l['ax']*l['bx']-l['ay']*l['by']-1j*2*l['ax']*l['by']) + if n==5 or n==8: + p=l['az']*l['bz'] + p1=1/2*l['az']*l['gz'] + if n==6 or n==7: + p=1/4*l['az']*l['bz'] + p1=1/2*l['az']*l['gz'] + + if 'eag' in l.keys(): + p*=l['gz']*l['dz'] + + if n>4: + return p,p1 + else: + return p + + + +"Generator object to loop over when calculating correlation functions/residual tensors" +def loops(vZ,vXZ=None,nuZ_F=None,nuXZ_F=None,nuZ_f=None,nuXZ_f=None,calc=None): + """ + Generator that calculates the elements required for the loop over components + for each correlation function. + + All arguments must be provided in the same frame (typically, the lab frame, + although other frames may be used) + + Vectors + vZ: Direction of the bond + vXZ: Vector in XZ plane of the bond frame (usually another bond). Required + if calculating bond motion (warning produced if frame F is defined + but frame f is not, and vXZ is omitted). + nuZ_F: Z-axis of frame F (motion of F is removed). Optional + nuXZ_F: Vector in XZ plane of frame F (if F defined with two vectors). Optional + nuZ_f: Z-axis of frame f. Used if calculating motion of f in F. Optional + nuXZ_f: Vector in XZ plane of frame f (if f defined with two vectors). Optional + + Arguments: + calc: Logical of 9 elements, to determine which terms to return. If set to + None, all elements will be returned + + loops(vZ,vXZ=None,nuZ_F=None,nuXZ_F=None,nuZ_f=None,nuXZ_f=None,calc=None) + """ + + if calc is None:calc=np.ones(9,dtype=bool) + + vZ,nuZ_F,nuZ_f=vft.norm(vZ),vft.norm(nuZ_F),vft.norm(nuZ_f) #Make sure terms are normalized + + "Apply frame F (remove motion of frame F)" + vZF,vXZF,nuZ_fF,nuXZ_fF=vft.applyFrame(vZ,vXZ,nuZ_f,nuXZ_f,nuZ_F=nuZ_F,nuXZ_F=nuXZ_F) + + if np.any(calc[[0,1,3,4]]): #Do we need X and Y axes for the bond frame? + sc=vft.getFrame(vZF,vXZF) + vXF,vYF=vft.R([1,0,0],*sc),vft.R([0,1,0],*sc) + else: + vXF=[None,None,None] #Just set to None if not required + vYF=[None,None,None] + + if nuZ_f is None: #This is a bond in frame calculation (9 loop elements) + for ax,ay,az in zip(vXF,vYF,vZF): + for bx,by,bz in zip(vXF,vYF,vZF): + out={'az':az,'bz':bz} + + if calc[0] or calc[4]: #All terms required + out.update({'ax':ax,'bx':bx,'ay':ay,'by':by}) + elif calc[1] or calc[3]: #Some terms required + out.update({'ax':ax,'ay':ay}) + yield out #Only z terms required + + else: #This is a frame (f) in frame (F) calculation (81 loop elements) + + scfF=vft.getFrame(nuZ_fF,nuXZ_fF) + vZf=vft.R(vZF,*vft.pass2act(*scfF)) +# vZf=vft.applyFrame(vZ,nuZ_F=nuZ_f,nuXZ_F=nuXZ_f) +# vZf=vft.R(vZF,*scfF) + eFf=[vft.R([1,0,0],*vft.pass2act(*scfF)),\ + vft.R([0,1,0],*vft.pass2act(*scfF)),\ + vft.R([0,0,1],*vft.pass2act(*scfF))] +# eFf=[vft.R([1,0,0],*scfF),\ +# vft.R([0,1,0],*scfF),\ +# vft.R([0,0,1],*scfF)] + + for ea,ax,ay,az in zip(eFf,vXF,vYF,vZF): + for eb,bx,by,bz in zip(eFf,vXF,vYF,vZF): + for eag,gz in zip(ea,vZf): + for ebd,dz in zip(eb,vZf): + if calc[0] or calc[4]: #All terms required + out={'eag':eag,'ebd':ebd,'ax':ax,'ay':ay,'az':az,\ + 'bx':bx,'by':by,'bz':bz,'gz':gz,'dz':dz} + elif calc[1] or calc[3]: #Some terms required + out={'eag':eag,'ebd':ebd,'ax':ax,'ay':ay,'az':az,\ + 'bz':bz,'gz':gz,'dz':dz} + else: #Only z-terms required + out={'eag':eag,'ebd':ebd,'az':az,'bz':bz,'gz':gz,'dz':dz} + + yield out + + +"Swap indices, using appropriate symmetry relationships" +def m_mp_swap(X,mpi=0,mi=0,mpf=0,mf=0): + """ + Performs the appropriate sign changes to switch between components + of correlation functions or their values at infinite time. One should provide + the initial component indices (mi,mpi) and the final component indices (mf,mpf) + + Currently, one of the components must be 0 or both mi=mpi, mf=mpf + """ + + if X is None: + return None + + if not((np.abs(mi)==np.abs(mf) and np.abs(mpi)==np.abs(mpf)) or (np.abs(mi)==np.abs(mpf) and np.abs(mpi)==np.abs(mf))): + print('Invalid m values') + print('Example: initial components (0,2) can have final components (0,-2),(2,0),(-2,0)') + return + + if mi!=0 and mpi!=0 and mi!=mpi: + print('m or mp must be 0, or m=mp') + return + + if mi==mpi and mf==mpf: + return X + + if np.abs(mi)!=np.abs(mf): #Test for a position swap +# if np.abs(mi)==1 or np.abs(mf)==1: +# X=-np.conj(X) #Sign change and conjugate +# elif np.abs(mi)==2 or np.abs(mf)==2: +# X=np.conj(X) #Conjugate + X=np.conj(X) + if (mi+mpi)!=(mf+mpf): #Test for a sign swap + if np.abs(mi)==1 or np.abs(mf)==1: + X=-np.conj(X) #Sign change and conjugate + elif np.abs(mi)==2 or np.abs(mf)==2: + X=np.conj(X) #Conjugate + return X + + +#%% Calculations in case of symmetry axis in motion +def sym_full_swap(vZ,threshold=0,A_0m_PASinf=None,vXZ=None,nuZ_F=None,nuXZ_F=None,nuZ_f=None,nuXZ_f=None,cmpt='0p',mode='both',index=None): + """ + Swaps between calculating all components of the correlation function or + assuming the correlation function is symmetric + + sym_full_swap(vZ,threshold=0,A_0m_PASinf=None,vXZ=None,nuZ_F=None,nuXZ_F=None,\ + nuZ_f=None,nuXZ_f=None,cmpt='0p',mode='both',index=None) + + Setting the threshold to 0 will force a full calculation, and setting the + threshold to 1 will force a symmetric calculation. In case threshold is set + to 0, A_0m_PASinf is not required. + """ + A=None + if A_0m_PASinf is None or threshold==0: + ct,A=Ct_D2inf(vZ=vZ,vXZ=vXZ,nuZ_F=nuZ_F,nuXZ_F=nuXZ_F,nuZ_f=nuZ_f,nuXZ_f=nuXZ_f,cmpt=cmpt,mode=mode,index=index) + elif threshold==1: + ct0=Ctsym(A_0m_PASinf,nuZ_f=nuZ_f,nuXZ_f=nuXZ_f,nuZ_F=nuZ_F,nuXZ_F=nuXZ_F,index=index) + ct=np.zeros([5,ct0.shape[0],ct0.shape[1]],dtype=complex) + ct[2]=ct0 + else: + sym=vft.Spher2pars(A_0m_PASinf)[1]. + + +Questions, contact me at: +albert.smith-penzel@medizin.uni-leipzig.de + + +Created on Thu Feb 6 10:43:33 2020 + +@author: albertsmith +""" + +import numpy as np +import pyDIFRATE.Struct.vf_tools as vft +import pyDIFRATE.Struct.select_tools as selt + +#%% Frames +""" +Here, we define various functions that define the frames of different motions +in an MD trajectory. Each function should return another function that will +produce one or two vectors defining the frame (without arguments). Those vectors +should have X,Y,Z as the first dimension (for example, such that we can apply +X,Y,Z=v). Note this is the transpose of the outputs of MDanalysis positions +""" + +def peptide_plane(molecule,resids=None,segids=None,filter_str=None,full=True,sigma=0): + """ + Aligns the peptide plane motion. Two options exist, full=True performs an + RMS alignment of the N,H,CA of the given residue and C',O,CA of the previous + residue. + full=False uses only the positions of the N of the given residue and C',O + of the previous. + + The former is notably slower, but also performs better when separating + librational motion + """ + "Peptide plane motion, defined by C,N,O positions" + if full: + "Get selections" + selCA,selH,selN,selCm1,selOm1,selCAm1=selt.peptide_plane(molecule,resids,segids,filter_str) + + "Get universe, reset time" + uni=molecule.mda_object + uni.trajectory.rewind() + + "Define function to calculate the vectors defining the plane" + def vfun(): + v=list() + for CA,H,N,Cm1,Om1,CAm1 in zip(selCA,selH,selN,selCm1,selOm1,selCAm1): + v0=np.array([CA.position-N.position, + H.position-N.position, + N.position-Cm1.position, + Cm1.position-Om1.position, + Cm1.position-CAm1.position]) + box=uni.dimensions[:3] + v.append(vft.pbc_corr(v0.T,box)) + return v + + "Get the reference vectors (at t=0)" + vref=vfun() + + def sub(): + R=list() + vecs=vfun() + R=[vft.RMSalign(vr,v) for v,vr in zip(vecs,vref)] + return vft.R2vec(R) + return sub,None,{'PPfun':'AvgGauss','sigma':sigma} + else: + "Peptide plane motion, defined by C,N,O positions" + selN,selC,selO=selt.peptide_plane(molecule,resids,segids,filter_str,full) + uni=molecule.mda_object + def sub(): + box=uni.dimensions[0:3] + v1=selO.positions-selC.positions + v2=selN.positions-selC.positions + v1=vft.pbc_corr(v1.T,box) + v2=vft.pbc_corr(v2.T,box) + + return v1,v2 + return sub,None,{'PPfun':'AvgGauss','sigma':sigma} + +def bond(molecule,sel1=None,sel2=None,sel3=None,Nuc=None,resids=None,segids=None,filter_str=None): + """Bond defines the frame. + sel1/sel2 : Defines the z-axis of the frame (the bond itself). Follows + the argument rules of sel_simple (sel2 should usually be + the heteroatom) + Nuc : Automatically sets sel1 and sel2 for a given nucleus definition + sel3 : sel2 and sel3 will define the xz-plane of the bond frame. + This is optional: however, if this frame is the PAS of the + bond responsible for relaxation, then frames may not + function correctly if this is not provided. By default, sel3 + is set to None and is omitted. However, if called from within + molecule.tensor_frame, then default is changed to sel3='auto' + resids, segids, filter_str apply additional filters to sel1, sel2, and sel3 + if defined. + """ + if Nuc is not None: + sel2,sel1=selt.protein_defaults(Nuc,molecule,resids,segids,filter_str) + else: + sel2,sel1=[selt.sel_simple(molecule,s,resids,segids,filter_str) for s in [sel1,sel2]] + + if isinstance(sel3,str) and sel3=='auto': + uni=sel1.universe + resids=np.unique(sel2.resids) + sel0=uni.residues[np.isin(uni.residues.resids,resids)].atoms + sel3=selt.find_bonded(sel2,sel0,exclude=sel1,n=1,sort='cchain')[0] + elif sel3 is not None: + sel3=selt.sel_simple(molecule,sel3,resids,segids,filter_str) + + uni=molecule.mda_object + + if sel3 is None: + def sub(): + box=uni.dimensions[0:3] + v=sel1.positions-sel2.positions + v=vft.pbc_corr(v.T,box) + return v + else: + def sub(): + box=uni.dimensions[0:3] + vZ=sel1.positions-sel2.positions + vXZ=sel3.positions-sel2.positions + vZ=vft.pbc_corr(vZ.T,box) + vXZ=vft.pbc_corr(vXZ.T,box) + return vZ,vXZ + return sub + +def LabXY(molecule,sel1=None,sel2=None,Nuc=None,resids=None,segids=None,filter_str=None): + """Motion projected to the XY-plane of the Lab frame. Use only for systems + that remain aligned along z + """ + if Nuc is not None: + sel1,sel2=selt.protein_defaults(Nuc,molecule,resids,segids,filter_str) + else: + sel1=selt.sel_simple(molecule,sel1,resids,segids,filter_str) + sel2=selt.sel_simple(molecule,sel2,resids,segids,filter_str) + uni=molecule.mda_object + def sub(): + box=uni.dimensions[0:3] + v=sel1.positions-sel2.positions + v=vft.pbc_corr(v.T,box) + v[2]=0 + return v + return sub + +def LabZ(molecule,sel1=None,sel2=None,Nuc=None,resids=None,segids=None,filter_str=None): + """Motion projected to the Z-axis of the Lab frame. Use only for systems + that remain aligned along z + """ + if Nuc is not None: + sel1,sel2=selt.protein_defaults(Nuc,molecule,resids,segids,filter_str) + else: + sel1=selt.sel_simple(molecule,sel1,resids,segids,filter_str) + sel2=selt.sel_simple(molecule,sel2,resids,segids,filter_str) + uni=molecule.mda_object + def sub(): + box=uni.dimensions[0:3] + v=sel1.positions-sel2.positions + v=vft.pbc_corr(v.T,box) + v[:2]=0 + return v + return sub + +def bond_rotate(molecule,sel1=None,sel2=None,sel3=None,Nuc=None,resids=None,segids=None,filter_str=None): + """ + Rotation around a given bond, defined by sel1 and sel2. Has a very similar + effect to simply using bond with the same sel1 and sel2. However, an addition + selection is created to a third atom. Then, the vector between sel1 and + sel2 defines the rotation axis. However, rotation around this axis caused + by more distant motions is removed, because a third selection (sel3) is + used with sel2 to create a second vector, which then remains in the xz plane + + (if only sel1 and sel2 are specified for rotation, then some rotation further + up a carbon chain, for example, may not move the vector between sel1 and sel2, + but does cause rotation of the inner bonds- in most cases it is not clear if + this is happening, but becomes particularly apparent when rotation appears + on double bonds, where rotation should be highly restricted) + + sel3 may be defined, but is not required. If it is not provided, a third + atom will be found that is bound to sel2 (this frame won't work if sel2 is + not bound to any other atom). + """ + + if Nuc is not None: + sel1,sel2=selt.protein_defaults(Nuc,molecule,resids,segids,filter_str) + else: + sel1=selt.sel_simple(molecule,sel1,resids,segids,filter_str) + sel2=selt.sel_simple(molecule,sel2,resids,segids,filter_str) + + if sel3 is not None: + sel3=selt.sel_simple(molecule,sel3,resids,segids,filter_str) + else: + resids=np.unique(sel1.resids) + i=np.isin(sel1.universe.residues.resids,resids) #Filter for atoms in the same residues + sel0=sel1.universe.residues[i].atoms + sel3=selt.find_bonded(sel2,sel0,sel1,n=1,sort='cchain')[0] + + uni=molecule.mda_object + + def sub(): + box=uni.dimensions[0:3] + v1=sel1.positions-sel2.positions + v2=sel2.positions-sel3.positions + v1=vft.pbc_corr(v1.T,box) + v2=vft.pbc_corr(v2.T,box) + return v1,v2 + return sub + +def superimpose(molecule,sel=None,resids=None,segids=None,filter_str=None,sigma=0): + """ + Superimposes a selection of atoms to a reference frame (the first frame) + + Note that we may have multiple selections. In this case, then at least some + of the arguments will be lists or higher dimensional. For this purpose, the + sel_lists function is used (in select_tools.py) + + f=superimpose(molecule,sel=None,resids,None,segids=None,filter_str=None) + + f() returns vectors representing the rotation matrix + """ + + sel=selt.sel_lists(molecule,sel,resids,segids,filter_str) + uni=molecule.mda_object + "Calculate the reference vectors" + uni.trajectory.rewind() + vref=list() + i0=list() + for s in sel: + vr=s.positions + i0.append(vft.sort_by_dist(vr)) + vref.append(np.diff(vr[i0[-1]],axis=0).T) + + def sub(): + R=list() + box=uni.dimensions[:3] + for s,vr,i in zip(sel,vref,i0): + v=vft.pbc_corr(np.diff(s.positions[i],axis=0).T,box) #Calculate vectors, periodic boundary correction + R.append(vft.RMSalign(vr,v)) #Get alignment to reference vector + + return vft.R2vec(R) #This converts R back into two vectors + return sub,None,{'PPfun':'AvgGauss','sigma':sigma} + + +def chain_rotate(molecule,sel=None,Nuc=None,resids=None,segids=None,filter_str=None): + """ + Creates a frame for which a chain of atoms (usually carbons) is aligned + such that the vector formed by the previous and next heteroatom (not 1H) + are aligned along z. + + Note that the frame is selected with a single initial selection, and the + function automatically searches for the surrounding atoms. In case a methyl + carbon is included, the rotation is defined by the carbon itself and its + nearest neighbor, instead of the surrounding two atoms (which would then + have to include a methyl proton) + """ + + uni=molecule.mda_object + + "Get the initial selection" + if Nuc is not None: + sel,_=selt.protein_defaults(Nuc,molecule,resids,segids,filter_str) + else: + sel=selt.sel_simple(molecule,sel,resids,segids,filter_str) + + "Get all atoms in the residues included in the initial selection" + resids=np.unique(sel.resids) + sel0=uni.residues[np.isin(uni.residues.resids,resids)].atoms + + "Get bonded" + sel1,sel2=selt.find_bonded(sel,sel0=sel0,n=2,sort='cchain') + + "Replace 1H with the original selection" + i=sel2.types=='H' + + sel20=sel2 + sel2=uni.atoms[:0] + for s2,s,i0 in zip(sel20,sel,i): + if i0: + sel2+=s + else: + sel2+=s2 + + + def sub(): + box=uni.dimensions[0:3] + v=sel2.positions-sel1.positions + v=vft.pbc_corr(v.T,box) + return v + return sub + + +def methylCC(molecule,Nuc=None,resids=None,segids=None,filter_str=None,sigma=0): + """ + Superimposes the C-X bond attached to a methyl carbon, and can separate + methyl rotation from re-orientation of the overall methyl group + + Note- we only return one copy of the C–C bond, so a frame index is necessary + """ + + if Nuc is None: + Nuc='ch3' + selC1,_=selt.protein_defaults(Nuc,molecule,resids,segids,filter_str) + selC1=selC1[::3] #Above line returns 3 copies of each carbon. Just take 1 copy + + resids=molecule.mda_object.residues.resids + sel0=molecule.mda_object.residues[np.isin(resids,selC1.resids)].atoms + selC2=selt.find_bonded(selC1,sel0,n=1,sort='cchain')[0] + selC3=selt.find_bonded(selC2,sel0,exclude=selC1,n=1,sort='cchain')[0] +# +# selC2=sum([sel0.select_atoms('not name H* and around 1.6 atom {0} {1} {2}'\ +# .format(s.segid,s.resid,s.name)) for s in selC1]) + + def sub(): + box=molecule.mda_object.dimensions[:3] + v1,v2=selC1.positions-selC2.positions,selC2.positions-selC3.positions + v1,v2=[vft.pbc_corr(v.T,box) for v in [v1,v2]] + return v1,v2 + frame_index=np.arange(len(selC1)).repeat(3) + return sub,frame_index,{'PPfun':'AvgGauss','sigma':sigma} + +def side_chain_chi(molecule,n_bonds=1,Nuc=None,resids=None,segids=None,filter_str=None,sigma=0): + """ + Returns a frame that accounts for motion arounda given bond in the side chain, + where we are interested in the total methyl dynamics.Ideally, the product of + all side chain rotations plus the backbone motion and methyl rotation yields + the total motion. One should provide the same selection arguments as used for + the methylCC frame, plus one additional argument, n_bonds, which determines + how many bonds away from the methyl group we define the frame. + + Note that, due to different side chain lengths, some frames defined this way + will not be defined, because n_bonds is too large. For example, side_chain_chi + will never return a frame for an alanine group, and valine will only yield a + frame for n_bonds=1. This should not cause an error, but rather will result + in np.nan found in the returned frame index. + """ + + if Nuc is None: + Nuc='ch3' + selC,_=selt.protein_defaults(Nuc,molecule,resids,segids,filter_str) + selC=selC[::3] #Above line returns 3 copies of each carbon. Just take 1 copy + + frame_index=list() + sel1,sel2,sel3=None,None,None + k=0 + for s in selC: + chain=selt.get_chain(s,s.residue.atoms)[3+n_bonds:6+n_bonds] + if len(chain)==3: + frame_index.extend([k,k,k]) + k+=1 + if sel1 is None: + sel1,sel2,sel3=chain[0:1],chain[1:2],chain[2:3] + else: + sel1=sel1+chain[0] + sel2=sel2+chain[1] + sel3=sel3+chain[2] + else: + frame_index.extend([np.nan,np.nan,np.nan]) + frame_index=np.array(frame_index) + uni=molecule.mda_object + + def sub(): + box=uni.dimensions[0:3] + vZ=sel1.positions-sel2.positions + vXZ=sel3.positions-sel2.positions + vZ=vft.pbc_corr(vZ.T,box) + vXZ=vft.pbc_corr(vXZ.T,box) + return vZ,vXZ + + return sub,frame_index,{'PPfun':'AvgGauss','sigma':sigma} + +def librations(molecule,sel1=None,sel2=None,Nuc=None,resids=None,segids=None,filter_str=None,full=True): + """ + Defines a frame for which librations are visible. That is, for a given bond, + defined by sel1 and sel2, we search for other atoms bound to the + heteroatom (by distance). The reference frame is then defined by the + heteroatom and the bonded atoms, leaving primarily librational + motion of the bond. We preferentially select the other two atoms for larger + masses, but they may also be protons (for example, a methyl H–C bond will + be referenced to the next carbon but also another one of the protons of + the methyl group) + + In case the heteroatom only has two bound partners, the second atom in the + bond will also be used for alignment, reducing the effect motion + (not very common in biomolecules) + + librations(sel1,sel2,Nuc,resids,segids,filter_str) + """ + if Nuc is not None: + sel1,sel2=selt.protein_defaults(Nuc,molecule,resids,segids,filter_str) + else: + sel1=selt.sel_simple(molecule,sel1,resids,segids,filter_str) + sel2=selt.sel_simple(molecule,sel2,resids,segids,filter_str) + + if sel1.masses.sum()1]=1.0 + sb=np.sqrt(1-cb**2) + v0=np.concatenate(([sb],[np.zeros(sb.shape)],[cb]),axis=0) + "Here, we keep the vector fixed in the xz plane of the MOI frame" + vZ[:,k==index]=vft.R(v0,*sc0) +# vZ[:,k==index]=v0 +# vXZ[:,k==index]=np.atleast_2d(vn).T.repeat(v0.shape[1],1) + + return vZ + + return sub \ No newline at end of file diff --git a/pyDIFRATE/Struct/select_tools.py b/pyDIFRATE/Struct/select_tools.py new file mode 100644 index 0000000..7664138 --- /dev/null +++ b/pyDIFRATE/Struct/select_tools.py @@ -0,0 +1,524 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Copyright 2021 Albert Smith-Penzel + +This file is part of Frames Theory Archive (FTA). + +FTA is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +FTA is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with FTA. If not, see . + + +Questions, contact me at: +albert.smith-penzel@medizin.uni-leipzig.de + + +Created on Fri Nov 29 13:06:44 2019 + +@author: albertsmith +""" + +""" +Library of selection tools, to help define the selections for correlation +function calculation, frame definition, etc. +""" +import MDAnalysis as mda +import numpy as np +import numbers + + +def sel0_filter(mol,resids=None,segids=None,filter_str=None): + """ + Performs initial filtering of all atoms in an MDA Universe. Filtering may + be by resid, segid, and/or a selection string. Each selector will default + to None, in which case it is not applied + + sel0=sel0_filter(mol,resids,segids,filter_str) + """ + if hasattr(mol,'mda_object'): + sel0=mol.mda_object.atoms + elif hasattr(mol,'atoms'): + sel0=mol.atoms + else: + print('mol needs to be a molecule object or an atom group') + return + + if segids is not None: + segids=np.atleast_1d(segids) + i=np.isin(sel0.segments.segids,segids) + sel_si=sel0.segments[np.argwhere(i).squeeze()].atoms + sel0=sel0.intersection(sel_si) + if resids is not None: + resids=np.atleast_1d(resids) + i=np.isin(sel0.residues.resids,resids) + sel_ri=sel0.residues[np.argwhere(i).squeeze()].atoms + sel0=sel0.intersection(sel_ri) + + if filter_str is not None: + sel_fs=sel0.select_atoms(filter_str) + sel0=sel0.intersection(sel_fs) + + return sel0 + +#%% Simple selection + +def sel_simple(mol,sel=None,resids=None,segids=None,filter_str=None): + """ + Takes a selection out of the molecule object, where that selection may + be an atom group produced by the user, may be a selection string, or + may be extracted from mol.sel1/mol.sel2. In each case, the output is a MDAnalysis + atom group. + + One may further filter the given group with a valid MDAnalysis selection + string, with a specification of specific residues or specific segments + + sel = sel_simple(sel,mol=None,resids,segids,filter_str=None) + + set 'sel' to be a string, an atom group, or simply 1 or 2 to use the selections + stored in mol + """ + + """If sel has atoms as an attribute, we make sure it's an atom group + (could be a residue group or universe, for example) + """ + if hasattr(mol,'atoms'): sel=mol.atoms + + + if sel is None: + if not(isinstance(sel,mda.AtomGroup)): + print('If the molecule object is not provided, then sel must be an atom group') + return + sel=sel0_filter(mol,resids,segids,filter_str) + return sel + + if isinstance(sel,str): + sel0=sel0_filter(mol,resids,segids,filter_str) + sel=sel0.select_atoms(sel) + elif isinstance(sel,numbers.Real) and sel==1: + sel=sel0_filter(mol.sel1,resids,segids,filter_str) + elif isinstance(sel,numbers.Real) and sel==2: + sel=sel0_filter(mol.sel2,resids,segids,filter_str) + elif isinstance(sel,mda.AtomGroup): + sel=sel0_filter(sel,resids,segids,filter_str) + else: + print('sel is not an accepted data type') + return + + return sel + + +def sel_lists(mol,sel=None,resids=None,segids=None,filter_str=None): + """ + Creates multiple selections from single items or lists of sel, resids, + segids, and filter_str. + + Each argument (sel,resids,segids,filter_str) may be None, may be a single + argument (as for sel_simple), or may be a list of arguments. If more than + one of these is a list, then the lists must have the same length. Applies + sel_simple for each item in the list. The number of selections returns is + either one (no lists used), or the length of the lists (return will always + be a list) + + sel_list=sel_lists(mol,sel=None,resids=None,segids=None,filter_str=None) + """ + + "First apply sel, as a single selection or list of selections" + if hasattr(sel,'atoms') or isinstance(sel,str) or sel==1 or sel==2: + sel=sel_simple(mol,sel) + n=1 + elif isinstance(sel,list): + sel=[sel_simple(mol,s) for s in sel] + n=len(sel) + elif sel is None: + sel=mol.mda_object.atoms + n=1 + else: + print('sel data type was not recognized') + return + + "Apply the resids filter" + if resids is not None: + if hasattr(resids,'__iter__') and hasattr(resids[0],'__iter__'): + if n==1: + n=len(resids) + sel=[sel_simple(sel,resids=r) for r in resids] + elif len(resids)==n: + sel=[sel_simple(s,resids=r) for s,r in zip(sel,resids)] + else: + print('Inconsistent sizes for selections (resids)') + else: + if n==1: + sel=sel_simple(sel,resids=resids) + else: + sel=[sel_simple(s,resids=resids) for s in sel] + + "Apply the segids filter" + if segids is not None: + if not(isinstance(segids,str)) and hasattr(segids,'__iter__') and hasattr(segids[0],'__iter__'): + if n==1: + n=len(segids) + sel=[sel_simple(sel,segids=si) for si in segids] + elif len(segids)==n: + sel=[sel_simple(s,segids=si) for s,si in zip(sel,segids)] + else: + print('Inconsistent sizes for selections (segids)') + else: + if n==1: + sel=sel_simple(sel,segids=segids) + else: + sel=[sel_simple(s,segids=segids) for s in sel] + + "Apply the filter_str" + if filter_str is not None: + if np.ndim(filter_str)>0: + if n==1: + n=len(filter_str) + sel=[sel_simple(sel,filter_str=f) for f in filter_str] + elif len(filter_str)==n: + sel=[sel_simple(s,filter_str=f) for s,f in zip(sel,filter_str)] + else: + print('Inconsistent sizes for selections (filter_str)') + else: + if n==1: + sel=sel_simple(sel,filter_str=filter_str) + else: + sel=[sel_simple(s,filter_str=filter_str) for s in sel] + + if n==1: + sel=[sel] + + return sel + +#%% Specific selections for proteins +def protein_defaults(Nuc,mol,resids=None,segids=None,filter_str=None): + """ + Selects pre-defined pairs of atoms in a protein, usually based on nuclei that + are observed for relaxation. One may also select specific residues, specific + segments, and apply a filter string + + sel1,sel2=protein_defaults(Nuc,mol,resids,segids,filter_str) + + Nuc is a string and can be: + N (15N,n,n15,N15), CA (13CA,ca,ca13,CA13), C (CO, 13CO, etc.) + """ + + sel0=sel0_filter(mol,resids,segids,filter_str) + + if Nuc.lower()=='15n' or Nuc.lower()=='n' or Nuc.lower()=='n15': + sel1=sel0.select_atoms('name N and around 1.1 (name H or name HN)') + sel2=sel0.select_atoms('(name H or name HN) and around 1.1 name N') + elif Nuc.lower()=='co' or Nuc.lower()=='13co' or Nuc.lower()=='co13' or Nuc.lower()=='c': + sel1=sel0.select_atoms('name C and around 1.4 name O') + sel2=sel0.select_atoms('name O and around 1.4 name C') + elif Nuc.lower()=='ca' or Nuc.lower()=='13ca' or Nuc.lower()=='ca13': + sel1=sel0.select_atoms('name CA and around 1.5 (name HA or name HA2)') + sel2=sel0.select_atoms('(name HA or name HA2) and around 1.5 name CA') + print('Warning: selecting HA2 for glycines. Use manual selection to get HA1 or both bonds') + elif Nuc[:3].lower()=='ivl' or Nuc[:3].lower()=='ch3': + if Nuc[:4].lower()=='ivla': + fs0='resname ILE Ile ile VAL val Val LEU Leu leu ALA Ala ala' + Nuc0=Nuc[4:] + elif Nuc[:3].lower()=='ivl': + fs0='resname ILE Ile ile VAL val Val LEU Leu leu' + Nuc0=Nuc[3:] + else: + fs0=None + Nuc0=Nuc[3:] + filter_str=filter_str if fs0 is None else (fs0 if filter_str is None else \ + '('+filter_str+') and ('+fs0+')') + select=None + if 't' in Nuc0.lower() or 'l' in Nuc0.lower():select='l' + if 'r' in Nuc0.lower():select='r' + + sel1,sel2=find_methyl(mol,resids,segids,filter_str,select=select) + +# +# if Nuc[-1].lower()=='t' or Nuc[-2].lower()=='t': #Truncated list- only one C per residue +# sel0=sel0-sel0.select_atoms('(resname VAL val Val and name CG2) or \ +# (resname ILE ile Ile and name CG2) or \ +# (resname LEU leu Leu and name CD1)') +# if Nuc[-1].lower()=='r' or Nuc[-1].lower()=='l': +# sel0=sel0-sel0.select_atoms('(resname VAL val Val and name CG{0}) or \ +# (resname ILE ile Ile and name CG2) or \ +# (resname LEU leu Leu and name CD{0})'\ +# .format('2' if Nuc[-1].lower()=='l' else '1')) +# +# if Nuc[:4].lower()=='ivla': +# sel0C=sel0.select_atoms('resname ILE Ile ile VAL val Val LEU Leu leu ALA Ala ala and name C*') +# sel0H=sel0.select_atoms('resname ILE Ile ile VAL val Val LEU Leu leu ALA Ala ala and name H*') +# elif Nuc[:3].lower()=='ivl': +# sel0C=sel0.select_atoms('resname ILE Ile ile VAL val Val LEU Leu leu and name C*') +# sel0H=sel0.select_atoms('resname ILE Ile ile VAL val Val LEU Leu leu and name H*') +# else: +# sel0C=sel0.select_atoms('resname ILE Ile ile VAL val Val LEU Leu leu ALA Ala ala MET Met met THR Thr thr and name C*') +# sel0H=sel0.select_atoms('resname ILE Ile ile VAL val Val LEU Leu leu ALA Ala ala MET Met met THR Thr thr and name H*') +# ids=list() +# for s in sel0C: +# if (sel0H+sel0C).select_atoms('name H* and around 1.15 atom {0} {1} {2}'.format(s.segid,s.resid,s.name)).n_atoms==3: +# ids.append(s.id) +# sel1=sel0[np.isin(sel0.ids,ids)] +# sel1=sel1[np.repeat([np.arange(sel1.n_atoms)],3,axis=1).reshape(sel1.n_atoms*3)] +# sel2=(sel1+sel0H).select_atoms('name H* and around 1.15 name C*') + + if '1' in Nuc: + sel1=sel1[::3] + sel2=sel2[::3] + + return sel1,sel2 + +def find_methyl(mol,resids=None,segids=None,filter_str=None,select=None): + """ + Finds methyl groups in a protein for a list of residues. Standard selection + options are used. + + select may be set to 'l' or 'r' (left or right), which will select one of the + two methyl groups on valine or leucine, depending on their stereochemistry. In + this mode, only the terminal isoleucine methyl group will be returned. + + To just get rid of the gamma methyl on isoleucine, set select to 'ile_d' + """ + mol.mda_object.trajectory[0] + sel0=sel0_filter(mol,resids,segids,filter_str) + selC0,selH0=sel0.select_atoms('name C*'),sel0.select_atoms('name H*') + index=np.array([all(b0 in selH0 for b0 in b)\ + for b in np.array(find_bonded(selC0,selH0,n=3,d=1.5,sort='massi')).T]) +# index=np.all([b.names[0]=='H' for b in find_bonded(selC0,selH0,n=3,d=1.5)],axis=0) + selH=find_bonded(selC0[index],sel0=selH0,n=3,d=1.5) + + selH=np.array(selH).T + selC=selC0[index] + + "First, we delete the gamma of isoleucine if present" + if select is not None: + ile=[s.resname.lower()=='ile' for s in selC] #Find the isoleucines + if any(ile): + exclude=[s.sum() for s in selH[ile]] + nxt=find_bonded(selC[ile],sel0=sel0,exclude=exclude,n=1,sort='cchain')[0] + keep=np.array([np.sum([b0.name[0]=='H' for b0 in b])==2 \ + for b in np.array(find_bonded(nxt,sel0=sel0,exclude=selC[ile],n=2,sort='massi')).T]) + # keep=np.sum([b.types=='H' for b in find_bonded(nxt,sel0=sel0,exclude=selC[ile],n=2)],axis=0)==2 + index=np.ones(len(selC),dtype=bool) + index[ile]=keep + selC,selH=selC[index],selH[index] + + if select is not None and (select[0].lower() in ['l','r']): + val_leu=[s.resname.lower()=='val' or s.resname.lower()=='leu' for s in selC] + if any(val_leu): + exclude=[s.sum() for s in selH[val_leu][::2]] + nxt0=find_bonded(selC[val_leu][::2],sel0=sel0,exclude=exclude,n=1,sort='cchain')[0] + exclude=np.array([selC[val_leu][::2],selC[val_leu][1::2]]).T + exclude=[e.sum() for e in exclude] + nxt1=find_bonded(nxt0,sel0=sel0,exclude=exclude,n=1,sort='cchain')[0] + nxtH=find_bonded(nxt0,sel0=sel0,exclude=exclude,n=1,sort='massi')[0] + + cross=np.cross(nxtH.positions-nxt0.positions,nxt1.positions-nxt0.positions) + dot0=(cross*selC[val_leu][::2].positions).sum(1) + dot1=(cross*selC[val_leu][1::2].positions).sum(1) + keep=np.zeros(np.sum(val_leu),dtype=bool) + keep[::2]=dot0>=dot1 + keep[1::2]=dot0k: + out[k]+=sel01[k] + else: + out[k]+=s + + return out + + +#%% This allows us to use a specific keyword to make an automatic selection +""" +Mainly for convenience, cleanliness in code construction +To add a new keyword, simply define a function of the same name, that returns +the desired selections. +Note that mol must always be an argument (the molecule object) +resids,segids,and filter_str must also be arguments, or **kwargs must be included +""" +def keyword_selections(keyword,mol,resids=None,segids=None,filter_str=None,**kwargs): + if keyword in globals() and globals()[keyword].__code__.co_varnames[0]=='mol': #Determine if this is a valid vec_fun + fun0=globals()[keyword] + else: + raise Exception('Keyword selection "{0}" was not recognized'.format(keyword)) + + fun=fun0(mol=mol,resids=resids,segids=segids,filter_str=filter_str,**kwargs) + + return fun + +def peptide_plane(mol,resids=None,segids=None,filter_str=None,full=True): + """ + Selects the peptide plane. One may also provide resids, segids, + and a filter string. Note that we define the residue as the residue containing + the N atom (whereas the C, O, and one Ca of the same peptide plane are actually in + the previous residue). + + returns 6 selections: + selCA,selH,selN,selCm1,selOm1,selCAm1 + (selCA, selH, and selN are from residues in resids, and + selCm1, selOm1, selCAm1 are from residues in resids-1) + + or if full = False, returns 3 selections + selN,selCm1,selOm1 + + Note that peptide planes for which one of the defining atoms is missing will + be excluded + """ + sel0=sel0_filter(mol,resids,segids,filter_str) + if resids is None: + resids=sel0.resids + selm1=sel0_filter(mol,np.array(resids)-1,segids,filter_str) + + if full: +# selN=(sel0.union(selm1)).select_atoms('protein and (name N and around 1.5 name HN H CD) and (around 1.4 (name C and around 1.4 name O))') + selN=(sel0.union(selm1)).select_atoms('protein and (name N and around 1.7 name HN H CD) and (around 1.7 (name C and around 1.7 name O))') + else: #We don't need the HN to be present in this case +# selN=(sel0.union(selm1)).select_atoms('protein and (name N and around 1.4 (name C and around 1.4 name O))') + selN=(sel0.union(selm1)).select_atoms('protein and (name N and around 1.7 (name C and around 1.7 name O))') + + i=np.isin(selN.resids,resids) + selN=selN[i] #Maybe we accidently pick up the N in the previous plane? Exclude it here + resids=selN.resids + "Re-filter the original selection for reduced resid list" + sel0=sel0_filter(sel0,resids) + selm1=sel0_filter(selm1,np.array(resids)-1) + if full: +# selH=sel0.residues.atoms.select_atoms('protein and (name H HN CD and around 1.5 name N)') + selH=sel0.residues.atoms.select_atoms('protein and (name H HN CD and around 1.7 name N)') + selCA=sel0.residues.atoms.select_atoms('protein and (name CA and around 1.7 name N)') + +# i=np.argwhere(np.isin(sel0.residues.resids,sel1.residues.resids-1)).squeeze() +# selCm1=selm1.residues.atoms.select_atoms('protein and (name C and around 1.4 name O)') + selCm1=selm1.residues.atoms.select_atoms('protein and (name C and around 1.7 name O)') +# selOm1=selm1.residues.atoms.select_atoms('protein and (name O and around 1.4 name C)') + selOm1=selm1.residues.atoms.select_atoms('protein and (name O and around 1.7 name C)') + if full: +# selCAm1=selm1.residues.atoms.select_atoms('protein and (name CA and around 1.6 name C)') + selCAm1=selm1.residues.atoms.select_atoms('protein and (name CA and around 1.7 name C)') + + if full: + return selCA,selH,selN,selCm1,selOm1,selCAm1 + else: + return selN,selCm1,selOm1 + + +def get_chain(atom,sel0,exclude=None): + if exclude is None:exclude=[] + '''searching a path from a methyl group of a residue down to the C-alpha of the residue + returns a list of atoms (MDA.Atom) beginning with the Hydrogens of the methyl group and continuing + with the carbons of the side chain + returns empty list if atom is not a methyl carbon''' + final=False + def get_bonded(): + '''it happens, that pdb files do not contain bond information, in that case, we switch to selection + by string parsing''' + return np.sum(find_bonded([atom],sel0,n=4,d=1.7)) + + a_name=atom.name.lower() + a_type=atom.name[0].lower() + if 'c'==a_name and len(exclude): + return [atom] + elif a_name == "n": + return [] + connected_atoms = [] + bonded = get_bonded() + if len(exclude)==0: + if np.sum(np.fromiter(["h"==a.type.lower() for a in bonded],dtype=bool)) == 3: + final=True + for a in bonded: + if "h"==a.name[0].lower(): + connected_atoms.append(a) + if not "c"==a_type: + return [] + else: + return [] + connected_atoms.append(atom) + exclude.append(atom) + for a in bonded: + if not a in exclude: + nxt = get_chain(a,sel0,exclude) + for b in nxt: + connected_atoms.append(b) + if len(connected_atoms)>1: + if final: + return np.sum(connected_atoms) + else: + return connected_atoms + else: + return [] + +def search_methyl_groups(residue): + methyl_groups = [] + for atom in residue.atoms: + chain = get_chain(atom,residue,[]) + if len(chain): + methyl_groups.append(chain) + return methyl_groups \ No newline at end of file diff --git a/pyDIFRATE/Struct/special_frames.py b/pyDIFRATE/Struct/special_frames.py new file mode 100644 index 0000000..54ad2ec --- /dev/null +++ b/pyDIFRATE/Struct/special_frames.py @@ -0,0 +1,352 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Copyright 2021 Albert Smith-Penzel + +This file is part of Frames Theory Archive (FTA). + +FTA is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +FTA is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with FTA. If not, see . + + +Questions, contact me at: +albert.smith-penzel@medizin.uni-leipzig.de + + +Created on Thu Feb 6 10:45:12 2020 + +@author: albertsmith +""" + +""" +This module is meant for containing special-purpose frames. Usually, these will +only work on a specific type of system, and may be more complex types of functions +(for example, peptide_plane is still a standard frame, although it only works +for proteins, because it is relatively simple) + + 1) The first argument must be "molecule", where this refers to the molecule + object of pyDIFRATE + 2) The output of this function must be another function. + 3) The returned function should not require any input arguments. It should + only depend on the current time point in the MD trajectory (therefore, + calling this function will return different results as one advances through + the trajectory). + 4) The output of the sub-function should be one or two vectors (if the + frame is defined by just a bond direction, for example, then one vector. If + it is defined by some 3D object, say the peptide plane, then two vectors + should be returned) + 5) Each vector returned should be a numpy array, with dimensions 3xN. The + rows corresponds to directions x,y,z. The vectors do not need to be normalized + + 6) Be careful of factors like periodic boundary conditions, etc. In case of + user frames and in the built-in definitions (frames.py) having the same name, + user frames will be given priority. + 7) The outer function must have at least one required argument aside from + molecule. By default, calling molecule.new_frame(Type) just returns a list + of input arguments. + + + Ex. + def user_frame(molecule,arguments...): + some_setup + sel1,sel2,...=molecule_selections (use select_tools for convenience) + ... + uni=molecule.mda_object + + def sub() + ... + v1,v2=some_calculations + ... + box=uni.dimensions[:3] (periodic boundary conditions) + v1=vft.pbc_corr(v1,box) + v2=vft.pbc_corr(v2,box) + + return v1,v2 + return sub + +""" + + + +import numpy as np +import pyDIFRATE.Struct.vf_tools as vft +import pyDIFRATE.Struct.select_tools as selt + +def hop_setup(uni,sel1,sel2,sel3,sel4,ntest=1000): + """ + Function that determines where the energy minima for a set of bonds can be + found. Use for chi_hop and hop_3site. + """ + v12,v23,v34=list(),list(),list() + box=uni.dimensions + traj=uni.trajectory + step=np.floor(traj.n_frames/ntest).astype(int) + + for _ in traj[::step]: + v12.append(vft.pbc_corr((sel1.positions-sel2.positions).T,box[:3])) + v23.append(vft.pbc_corr((sel2.positions-sel3.positions).T,box[:3])) + v34.append(vft.pbc_corr((sel3.positions-sel4.positions).T,box[:3])) + + traj[0] #Sometimes, leaving the trajectory at the end can create other errors... + + v12,v23,v34=[np.moveaxis(np.array(v),0,-1) for v in [v12,v23,v34]] + + v12a=vft.applyFrame(v12,nuZ_F=v23,nuXZ_F=v34) #Rotate so that 23 is on z-axis, 34 in XY-plane + + v0z=vft.norm(np.array([np.sqrt(v12a[0]**2+v12a[1]**2).mean(axis=-1),\ + np.zeros(v12a.shape[1]),v12a[2].mean(axis=-1)])) #Mean projection onto xz + + v12a[2]=0 #Project v12 onto xy-plane + v12a=vft.norm(v12a) + i=np.logical_and(v12a[0]<.5,v12a[1]>0) #For bonds not between -60 and 60 degrees + v12a[:,i]=vft.Rz(v12a[:,i],-.5,-np.sqrt(3)/2) #we rotate +/- 120 degrees to align them all + i=np.logical_and(v12a[0]<.5,v12a[1]<=0) + v12a[:,i]=vft.Rz(v12a[:,i],-.5,np.sqrt(3)/2) + + v0xy=vft.norm(v12a.mean(-1)) #This is the average direction of v12a (in xy-plane) + theta=np.arctan2(v0xy[1],v0xy[0]) + """The direction of the frame follows sel2-sel3, sel3-sel4, but sel1-sel2 + is forced to align with a vector in vr""" + vr=np.array([vft.Rz(v0z,k+theta) for k in [0,2*np.pi/3,4*np.pi/3]]) #Reference vectors (separated by 120 degrees) + "axis=1 of vr is x,y,z" + + return vr + +def chi_hop(molecule,n_bonds=1,Nuc=None,resids=None,segids=None,filter_str=None,ntest=1000,sigma=0): + """ + Determines contributions to motion due to 120 degree hops across three sites + for some bond within a side chain. Motion of the frame will be the three site + hoping plus any outer motion (could be removed with additional frames), and + motion within the frame will be all rotation around the bond excluding + hopping. + + One provides the same arguments as side_chain_chi, where we specify the + nucleus of interest (ch3,ivl,ivla,ivlr,ivll, etc.), plus any other desired + filters. We also provide n_bonds, which will determine how many bonds away + from the methyl group (only methyl currently implemented) we want to observe + the motion (usually 1 or 2). + """ + + "First we get the selections, and simultaneously determine the frame_index" + if Nuc is None: + Nuc='ch3' + selC,_=selt.protein_defaults(Nuc,molecule,resids,segids,filter_str) + selC=selC[::3] #Above line returns 3 copies of each carbon. Just take 1 copy + frame_index=list() + sel1,sel2,sel3,sel4=None,None,None,None + k=0 + for s in selC: + chain=selt.get_chain(s,s.residue.atoms)[2+n_bonds:6+n_bonds] + if len(chain)==4: + frame_index.extend([k,k,k]) + k+=1 + if sel1 is None: + sel1,sel2,sel3,sel4=chain[0:1],chain[1:2],chain[2:3],chain[3:4] + else: + sel1=sel1+chain[0] + sel2=sel2+chain[1] + sel3=sel3+chain[2] + sel4=sel4+chain[3] + else: + frame_index.extend([np.nan,np.nan,np.nan]) + frame_index=np.array(frame_index) + + "Next, we sample the trajectory to get an estimate of the energy minima of the hopping" + #Note that we are assuming that minima are always separated by 120 degrees + + vr=hop_setup(molecule.mda_object,sel1,sel2,sel3,sel4,ntest) + + box=molecule.mda_object.dimensions + if sigma!=0: + def sub(): + return [vft.pbc_corr((s1.positions-s2.positions).T,box[:3]) \ + for s1,s2 in zip([sel1,sel2,sel3],[sel2,sel3,sel4])] + return sub,frame_index,{'PPfun':'AvgHop','vr':vr,'sigma':sigma} + else: + + def sub(): + v12s,v23s,v34s=[vft.pbc_corr((s1.positions-s2.positions).T,box[:3]) \ + for s1,s2 in zip([sel1,sel2,sel3],[sel2,sel3,sel4])] + v12s=vft.norm(v12s) + sc=vft.getFrame(v23s,v34s) + v12s=vft.R(v12s,*vft.pass2act(*sc)) #Into frame defined by v23,v34 + i=np.argmax((v12s*vr).sum(axis=1),axis=0) #Index of best fit to reference vectors (product is cosine, which has max at nearest value) + v12s=vr[i,:,np.arange(v12s.shape[1])] #Replace v12 with one of the three reference vectors + return vft.R(v12s.T,*sc),v23s #Rotate back into original frame + + return sub,frame_index + +def hops_3site(molecule,sel1=None,sel2=None,sel3=None,sel4=None,\ + Nuc=None,resids=None,segids=None,filter_str=None,ntest=1000,sigma=0): + """ + Determines contributions to motion due to 120 degree hops across three sites. + Motion within this frame will be all motion not involving a hop itself. Motion + of the frame will be three site hoping plus any outer motion (ideally removed + with a methylCC frame) + + sel1 and sel2 determine the bond of the interaction (sel2 should be the + carbon). sel2 and sel3 determine the rotation axis, and sel3/sel4 keep the + axis aligned. + + sel1-sel4 may all be automatically determined if instead providing some of + the usual selection options (Nuc, resids, segids, filter_str) + + First step is to use sel2/sel3 as a z-axis and project the bond onto the x/y + plane for a series of time points. We then rotate around z to find an + orientation that best explains the sel1/sel2 projection as a 3 site hop. + + Second step is to project sel1/sel2 vector onto the z-axis and determine the + angle of the bond relative to the z-axis. + + Then, this frame will only return vectors that match this angle to the z-axis + and are defined by the 3-site hop. + + Setup requires a sampling of the trajectory. We use 1000 points by default + (ntest). This frame will take more time than most to set up because of this + setup. + + hops_3site(molecule,sel1=None,sel2=None,sel3=None,sel4=None,ntest=1000) + """ + + + if sel1:sel1=selt.sel_simple(molecule,sel1,resids,segids,filter_str) + if sel2:sel2=selt.sel_simple(molecule,sel2,resids,segids,filter_str) + if sel3:sel3=selt.sel_simple(molecule,sel3,resids,segids,filter_str) + if sel4:sel4=selt.sel_simple(molecule,sel4,resids,segids,filter_str) + + if not(sel1) and not(sel2):sel2,sel1=selt.protein_defaults(Nuc,molecule,resids,segids,filter_str) + + if 'H' in sel2[0].name:sel1,sel2=sel2,sel1 + + "Get all atoms in the residues included in the initial selection" + uni=molecule.mda_object + resids=np.unique(np.concatenate([sel1.resids,sel2.resids])) + sel0=uni.residues[np.isin(uni.residues.resids,resids)].atoms + + if not(sel3): + sel3=selt.find_bonded(sel2,sel0,exclude=sel1,n=1,sort='cchain',d=1.65)[0] + if not(sel4): + sel4=selt.find_bonded(sel3,sel0,exclude=sel2,n=1,sort='cchain',d=1.65)[0] + + vr=hop_setup(molecule.mda_object,sel1,sel2,sel3,sel4,ntest) + + box=uni.dimensions + if sigma!=0: + def sub(): + return [vft.pbc_corr((s1.positions-s2.positions).T,box[:3]) \ + for s1,s2 in zip([sel1,sel2,sel3],[sel2,sel3,sel4])] + return sub,None,{'PPfun':'AvgHop','vr':vr,'sigma':sigma} + else: + def sub(): + v12s,v23s,v34s=[vft.pbc_corr((s1.positions-s2.positions).T,box[:3]) \ + for s1,s2 in zip([sel1,sel2,sel3],[sel2,sel3,sel4])] + v12s=vft.norm(v12s) + sc=vft.getFrame(v23s,v34s) + v12s=vft.R(v12s,*vft.pass2act(*sc)) #Into frame defined by v23,v34 + i=np.argmax((v12s*vr).sum(axis=1),axis=0) #Index of best fit to reference vectors (product is cosine, which has max at nearest value) + v12s=vr[i,:,np.arange(v12s.shape[1])] #Replace v12 with one of the three reference vectors + return vft.R(v12s.T,*sc),v23s #Rotate back into original frame + return sub + + + +def membrane_grid(molecule,grid_pts,sigma=25,sel0=None,sel='type P',resids=None,segids=None,filter_str=None): + """ + Calculates motion of the membrane normal, defined by a grid of points spread about + the simulation. For each grid point, a normal vector is returned. The grid + is spread uniformly around some initial selection (sel0 is a single atom!) + in the xy dimensions (currently, if z is not approximately the membrane + normal, this function will fail). + + The membrane normal is defined by a set of atoms (determined with some + combination of the arguments sel, resids, segids, filter_str, with sel_simple) + + At each grid point, atoms in the selection will be fit to a plane. However, + the positions will be weighted depending on how far they are away from that + grid point in the xy dimensions. Weighting is performed with a normal + distribution. sigma, by default, has a width approximately equal to the + grid spacing (if x and y box lengths are different, we have to round off the + spacing) + + The number of points is given by grid_pts. These points will be distributed + automatically in the xy dimensions, to have approximately the same spacing + in both dimensions. grid_pts will be changed to be the product of the exact + number of points used (we will always distribute an odd number of points + in each dimension, so the reference point is in the center of the grid) + + if sel0, defining the reference atom, is omitted, then the center of the + box will be used. Otherwise, the grid will move around with the reference + atom + + membrane_grid(molecule,grid_pts,sigma,sel0,sel,resids,segids,filter_str) + + """ + + uni=molecule.mda_object + + X,Y,Z=uni.dimensions[:3] + nX,nY=1+2*np.round((np.sqrt(grid_pts)-1)/2*np.array([X/Y,Y/X])) + dX,dY=X/nX,Y/nY + + print('{0:.0f} pts in X, {1:.0f} pts in Y, for {2:.0f} total points'.format(nX,nY,nX*nY)) + print('Spacing is {0:.2f} A in X, {0:.2f} A in Y'.format(dX,dY)) + print('Center of grid is found at index {0:.0f}'.format(nX*(nY-1)/2+(nX-1)/2)) + print('sigma = {0:.2f} A'.format(sigma)) + + + if sel0 is not None: + sel0=selt.sel_simple(molecule,sel0) #Make sure this is an atom group + if hasattr(sel0,'n_atoms'): + if sel0.n_atoms!=1: + print('Only one atom should be selected as the membrane grid reference point') + print('Setup failed') + return + else: + sel0=sel0[0] #Make sure we have an atom, not an atom group + + tophalf=sel0.position[2]>Z/2 #Which side of the membrane is this? + else: + tophalf=True + + "Atoms defining the membrance surface" + sel=selt.sel_simple(molecule,sel,resids,segids,filter_str) + + "Filter for only atoms on the same side of the membrane" + sel=sel[sel.positions[:,2]>Z/2] if tophalf else sel[sel.positions[:,2]3*sigma + weight=np.exp(-d2[i]/(2*sigma**2)) + + v.append(vft.RMSplane(v0[:,i],np.sqrt(weight))) + v=np.transpose(v) + return v/np.sign(v[2]) + + return sub + + + \ No newline at end of file diff --git a/pyDIFRATE/Struct/structure.py b/pyDIFRATE/Struct/structure.py new file mode 100755 index 0000000..faffc78 --- /dev/null +++ b/pyDIFRATE/Struct/structure.py @@ -0,0 +1,594 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Copyright 2021 Albert Smith-Penzel + +This file is part of Frames Theory Archive (FTA). + +FTA is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +FTA is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with FTA. If not, see . + + +Questions, contact me at: +albert.smith-penzel@medizin.uni-leipzig.de + + +Created on Thu Apr 4 15:05:19 2019 + +@author: albertsmith +""" + +import MDAnalysis as mda +from MDAnalysis.lib.mdamath import make_whole +import MDAnalysis.analysis.align +import numpy as np +import os +from pyDIFRATE.Struct.vec_funs import new_fun,print_frame_info +import copy +#os.chdir('../chimera') +#from chimera.chimera_funs import open_chimera +from pyDIFRATE.chimera.chimeraX_funs import molecule_only +#os.chdir('../Struct') +import pyDIFRATE.Struct.select_tools as selt + +class molecule(object): + def __init__(self,*args): + self.mda_object=None + self.sel1=None + self.sel2=None + self.sel1in=None + self.sel2in=None + self.label_in=list() + self.label=list() + self.vXY=np.array([]) + self.vCSA=np.array([]) + self.Ralign=list() + self._vf=None + self._vft=None + self._frame_info={'frame_index':list(),'label':None} + + self.pdb=None #Container for a pdb extracted from the mda_object + self.pdb_id=None + "We might want to delete this pdb upon object deletion" + + self.__MDA_info=None + + if np.size(args)>0: + self.load_struct(*args) + + def load_struct(self,*args,**kwargs): + self.mda_object=mda.Universe(*args,**kwargs) + +# def vec_special(self,Type,**kwargs): +# """ +# Allows user defined vectors to be created, from a function defined in +# vec_vuns.py (a function handle should be returned, where that function +# returns a value dependent on the current position in the md analysis +# trajectory. The function should return x,y,z components at the given time) +# """ +## if self._vf is None: +## self._vf=list() +## +## self._vf.append(new_fun(Type,self,**kwargs)) +# +# """I'm joining the vec_special and frames functionality. +# vec_special as its own attribute will eventually be removed +# """ +# self.new_frame(Type,**kwargs) + + def clear_vec_special(self): + self._vf=None + + def vec_fun(self): + """ + Evaluates all vectors generated with vec_special at the current time point + of the MD trajectory. Returns a 3xN vector, where N is the number of + vectors (ex. moment_of_inertia or rot_axis vectors) + """ + vec=list() + if self._vf is not None: + for f in self._vf: + vec.append(f()) #Run all of the functions in self._vf + return np.concatenate(vec,axis=1) + else: + print('No vector functions defined, run vec_special first') + +# def select_atoms(self,sel1=None,sel2=None,sel1in=None,sel2in=None,index1=None,index2=None,Nuc=None,resi=None,select=None,**kwargs): + + def new_frame(self,Type=None,frame_index=None,**kwargs): + """ + Create a new frame, where possible frame types are found in vec_funs. + Note that if the frame function produces a different number of reference + frames than there are bonds (that is, vectors produced by the tensor + frame), then a frame_index is required, to map the frame to the appropriate + bond. The length of the frame_index should be equal to the number of + vectors produced by the tensor frame, and those elements should have + values ranging from 0 to one minus the number of frames defined by this + frame. + + To get a list of all implemented frames and their arguments, call this + function without any arguments. To get arguments for a particular frame, + call this function with only Type defined. + """ + if Type is None: + print_frame_info() + elif len(kwargs)==0: + print_frame_info(Type) + else: + assert self._vft is not None,'Define the tensor frame first (run mol.tensor_frame)' + vft=self._vft() + nb=vft[0].shape[1] if len(vft)==2 else vft.shape[1] #Number of bonds in the tensor frame + if self._vf is None: self._vf=list() + fun,fi,*_=new_fun(Type,self,**kwargs) + if frame_index is None:frame_index=fi #Assign fi to frame_index if frame_index not provided + f=fun() #Output of the vector function (test its behavior) + nf=f[0].shape[1] if len(f)==2 else f.shape[1] + if fun is not None: + "Run some checks on the validity of the frame before storing it" + if frame_index is not None: + assert frame_index.size==nb,'frame_index size does not match the size of the tensor_fun output' + assert frame_index[np.logical_not(np.isnan(frame_index))].max()=7 and base[0:7]=='rmsfit_' and overwrite: + print('Warning: re-aligned a trajectory that has already been aligned before') + + filename='rmsfit_'+base + newfile=os.path.join(directory,filename) + + + "Load the file if it already exists" + if not(overwrite) and os.path.exists(newfile): + print('Aligned file:\n {0}\n already exists. Loading existing file'.format(newfile)) + print('To re-calculate aligned trajectory, set overwrite=True') + self.load_struct(uni.filename,newfile) + else: + "Create a reference pdb (the first frame of the trajectory)" + if self.pdb is not None: + "We won't delete an existing pdb" + pdb=self.pdb + pdb_id=self.pdb_id + self.pdb=None + else: + pdb=None + + "Get the reference pdb from the first trajectory" + self.MDA2pdb(tstep=0,select=None) + + ref=mda.Universe(uni.filename,self.pdb) + + alignment=mda.analysis.align.AlignTraj(uni,ref,select=select,verbose=True,pbc=True) + alignment.run() + + if newfile!=alignment.filename: + print('Warning: Unexpected filename used by MDanalysis') + + self.load_struct(uni.filename,alignment.filename) + + "Reload existing pdb if given" + if pdb is not None: + os.remove(self.pdb) + self.pdb=pdb + self.pdb_id=pdb_id + + + "Reset the selections" + if self.sel1 is not None: + self.sel1=self.mda_object.atoms[self.sel1.indices] + if self.sel2 is not None: + self.sel2=self.mda_object.atoms[self.sel2.indices] + try: + self.set_selection() + except: + pass + + + return + + def del_MDA_object(self): + """ + In some cases, it is necessary to delete the MD analysis objects + (for example, when saving, we can't pickle the MD object). This function + deletes the object after first saving information required to reload + it and the atom selections + """ + if self.mda_object is None: + "Do nothing if no universe is stored" + return + else: + uni=self.mda_object + info=dict() + self.__MDA_info=info + "Save the filenames used for the universe" + info.update({'filename':uni.filename}) + if hasattr(uni.trajectory,'filenames'): + info.update({'filenames':uni.trajectory.filenames}) + elif hasattr(uni.trajectory,'filename'): + info.update({'filenames':np.atleast_1d(uni.trajectory.filename)}) + + "Save the id numbers of the selections" + if self.sel1 is not None: + info.update({'sel1':self.sel1.ids}) + if self.sel2 is not None: + info.update({'sel2':self.sel2.ids}) + + "Set the MD analysis objects to None" + self.mda_object=None + self.sel1=None + self.sel2=None + + def reload_MDA(self): + if self.__MDA_info is None: + "Do nothing if MD analysis object hasn't been deleted" + return + info=self.__MDA_info + if 'filenames' in info: + uni=mda.Universe(info['filename'],info['filenames'].tolist()) + else: + uni=mda.Universe(info['filename']) + self.mda_object=uni + + sel0=uni.atoms + if 'sel1' in info: + self.sel1=sel0[info['sel1']-1] + if 'sel2' in info: + self.sel2=sel0[info['sel2']-1] + + self.__MDA_info=None + + def copy(self,type='deep'): + """ + | + |Returns a copy of the object. Default is deep copy (all objects except the + |MDanalysis object, mda_object) + | obj = obj0.copy(type='deep') + |To also create a copy of the molecule object, set type='ddeep' + |To do a shallow copy, set type='shallow' + """ + if type=='ddeep': + out=copy.deepcopy(self) + elif type!='deep': + out=copy.copy(self) + else: + uni=self.mda_object + self.mda_object=None + out=copy.deepcopy(self) + self.mda_object=uni + out.mda_object=uni + return out + + def __del__(self): + if self.pdb is not None and os.path.exists(self.pdb): + os.remove(self.pdb) + \ No newline at end of file diff --git a/pyDIFRATE/Struct/user_frames.py b/pyDIFRATE/Struct/user_frames.py new file mode 100644 index 0000000..908d742 --- /dev/null +++ b/pyDIFRATE/Struct/user_frames.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Copyright 2021 Albert Smith-Penzel + +This file is part of Frames Theory Archive (FTA). + +FTA is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +FTA is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with FTA. If not, see . + + +Questions, contact me at: +albert.smith-penzel@medizin.uni-leipzig.de + + +Created on Thu Feb 6 10:45:12 2020 + +@author: albertsmith +""" + +""" +Use this file to write your own frame definitions. Follow the formating found in +frames.py. A few critical points: + + 1) The first argument must be "molecule", where this refers to the molecule + object of pyDIFRATE + 2) The output of this function must be another function. + 3) The returned function should not require any input arguments. It should + only depend on the current time point in the MD trajectory (therefore, + calling this function will return different results as one advances through + the trajectory). + 4) The output of the sub-function should be one or two vectors (if the + frame is defined by just a bond direction, for example, then one vector. If + it is defined by some 3D object, say the peptide plane, then two vectors + should be returned) + 5) Each vector returned should be a numpy array, with dimensions 3xN. The + rows corresponds to directions x,y,z. The vectors do not need to be normalized + + 6) Be careful of factors like periodic boundary conditions, etc. In case of + user frames and in the built-in definitions (frames.py) having the same name, + user frames will be given priority. + 7) The outer function must have at least one required argument aside from + molecule. By default, calling molecule.new_frame(Type) just returns a list + of input arguments. + + + Ex. + def user_frame(molecule,arguments...): + some_setup + sel1,sel2,...=molecule_selections (use select_tools for convenience) + ... + uni=molecule.mda_object + + def sub() + ... + v1,v2=some_calculations + ... + box=uni.dimensions[:3] (periodic boundary conditions) + v1=vft.pbc_corr(v1,box) + v2=vft.pbc_corr(v2,box) + + return v1,v2 + return sub + +""" + + + +import numpy as np +import pyDIFRATE.Struct.vf_tools as vft +import pyDIFRATE.Struct.select_tools as selt + diff --git a/pyDIFRATE/Struct/vec_funs.py b/pyDIFRATE/Struct/vec_funs.py new file mode 100644 index 0000000..f9106c9 --- /dev/null +++ b/pyDIFRATE/Struct/vec_funs.py @@ -0,0 +1,132 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Copyright 2021 Albert Smith-Penzel + +This file is part of Frames Theory Archive (FTA). + +FTA is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +FTA is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with FTA. If not, see . + + +Questions, contact me at: +albert.smith-penzel@medizin.uni-leipzig.de + + +Created on Wed Aug 21 13:21:49 2019 + +@author: albertsmith +""" + + +import pyDIFRATE.Struct.frames as frames +import pyDIFRATE.Struct.user_frames as user_frames +import pyDIFRATE.Struct.special_frames as special_frames +import sys + +fr=[user_frames,special_frames,frames] + + +def new_fun(Type,molecule,**kwargs): + """ + Creates a function to calculate a particular vector(s) from the MD trajectory. + Mainly responsible for searching the vec_funs files for available functions and + returning the appropriate function if found (Type determines which function to use) + + Required arguments are Type (string specifying the function to be used) and + a molecule object (contains the MDAnalysis object) + + fun=new_fun(Type,molecule,**kwargs) + + """ + + fun0=None + for f in fr: + if is_valid(f,Type): + fun0=f.__dict__[Type] + break + if fun0 is None: + raise Exception('Frame "{0}" was not recognized'.format(Type)) + + if len(kwargs)==0: + print_frame_info(Type) + return + + try: + fun=fun0(molecule,**kwargs) + except: + print_frame_info(Type) + assert 0,'Frame definition failed (frame function could not be created),\n'+\ + 'Error:{0}, {1}'.format(*sys.exc_info()[:2]) + + frame_index=None + info={} + if hasattr(fun,'__len__') and len(fun)==2:fun,frame_index=fun + if hasattr(fun,'__len__') and len(fun)==3:fun,frame_index,info=fun + + try: + fun() + except: + assert 0,'Frame function failed to run, ,\n'+\ + 'Error:{0}, {1}'.format(*sys.exc_info()[:2]) + + return fun,frame_index,info + +def return_frame_info(Type=None): + """ + Provides information as to what frames are available, and what arguments they + take. + + frames=return_frame_info() Returns list of the frames + + args=return_frame_info(Type) Returns argument list and help info for Type + """ + + if Type is None: + fun_names=list() + for f in fr: + for n in dir(f): + if is_valid(f,n): + fun_names.append(n) + return fun_names + else: + for f in fr: + if is_valid(f,Type): + code=f.__dict__[Type].__code__ + return code.co_varnames[1:code.co_argcount] + + print('Frame "{0}" is not implemented'.format(Type)) + return + +def print_frame_info(Type=None): + """ + Prints out some information about the possible frames + """ + + if Type is None: + fun_names=return_frame_info() + print('Implemented frames are:') + for f in fun_names: + args=return_frame_info(f) + print('"{0}" with arguments {1}'.format(f,args)) + else: + args=return_frame_info(Type) + if args is not None: + print('"{0}" has arguments {1}'.format(Type,args)) + +def is_valid(mod,Type): + """ + Checks if a frame is included in a given module + """ + return Type in dir(mod) and hasattr(mod.__dict__[Type],'__code__') and\ + mod.__dict__[Type].__code__.co_varnames[0]=='molecule' \ No newline at end of file diff --git a/pyDIFRATE/Struct/vf_tools.py b/pyDIFRATE/Struct/vf_tools.py new file mode 100644 index 0000000..c05a78b --- /dev/null +++ b/pyDIFRATE/Struct/vf_tools.py @@ -0,0 +1,1143 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Copyright 2021 Albert Smith-Penzel + +This file is part of Frames Theory Archive (FTA). + +FTA is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +FTA is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with FTA. If not, see . + + +Questions, contact me at: +albert.smith-penzel@medizin.uni-leipzig.de + + +Created on Wed Nov 27 13:21:51 2019 + +@author: albertsmith +""" + +""" +Library of functions to deal with vectors and tensors, used for aligning tensors +and vectors into different frames. We assume all vectors provided are 2D numpy +arrays, with the first dimension being X,Y,Z (we do not deal with time- +dependence in these functions. This is obtained by sweeping over the trajectory. +Frames are processed at each time point separately) +""" + + +""" +Rotations are +""" + + +import numpy as np +from scipy.linalg import svd + +#%% Periodic boundary condition check +def pbc_corr(v0,box): + """ + Corrects for bonds that may be extended across the box. Our assumption is + that no vector should be longer than half the box. If we find such vectors, + we will add/subtract the box length in the appropriate dimension(s) + + Input should be 3xN vector and 3 element box dimensions + + v = pbc_corr(v0,box) + """ + + "Copy input, take tranpose for easier calculation" + v=v0.copy() + if v.shape[0]==3: + v=v.T + tp=True + else: + tp=False + + + i=v>box/2 + ib=np.argwhere(i).T[1] + v[i]=v[i]-box[ib] + + i=v<-box/2 + ib=np.argwhere(i).T[1] + v[i]=v[i]+box[ib] + + if tp: + return v.T + else: + return v + +#%% Periodic boundary condition for positions +def pbc_pos(v0,box): + """ + Sometimes, we are required to work with an array of positions instead of + a pair of positions (allowing easy calculation of a vector and determining + if the vector wraps around the box). In this case, we take differences + between positions, and make sure the differences don't yield a step around + the box edges. The whole molecule, however, may jump around the box after + this correction. This shouldn't matter, since all calculations are orientational, + so the center position is irrelevant. + + Input is a 3xN vector and a 3 element box + """ + + v=np.concatenate((np.zeros([3,1]),np.diff(v0,axis=1)),axis=1) + v=pbc_corr(v,box) + + return np.cumsum(v,axis=1)+np.atleast_2d(v0[:,0]).T.repeat(v.shape[1],axis=1) + + +#%% Vector normalization +def norm(v0): + """ + Normalizes a vector to a length of one. Input should be a 3xN vector. + """ + if v0 is None: + return None + +# X,Y,Z=v0 +# length=np.sqrt(X**2+Y**2+Z**2) +# +# return v0/length + + return v0/np.sqrt((v0**2).sum(0)) + +#%% Reverse rotation direction (passive/active) +def pass2act(cA,sA,cB,sB=None,cG=None,sG=None): + """ + After determining a set of euler angles, we often want to apply them to go + into the reference frame corresponding to those angles. This requires + reversing the rotation, performed by this function + + -gamma,-beta,-alpha=pass2act(alpha,beta,gamma) + + or + + cG,-sG,cB,-sB,cA,-sA=pass2act(cA,sA,cB,sB,cG,sG) + """ + + if sB is None: + return -cB,-sA,-cA + else: + return cG,-sG,cB,-sB,cA,-sA + +#%% Change sines and cosines to angles +def sc2angles(cA,sA=None,cB=None,sB=None,cG=None,sG=None): + """ + Converts cosines and sines of angles to the angles themselves. Takes one or + three cosine/sine pairs. Note, if an odd number of arguments is given (1 or 3), + we assume that this function has been called using angles instead of cosines + and sines, and simply return the input. + """ + if sA is None: + return cA + elif cB is None: + return np.mod(np.arctan2(sA,cA),2*np.pi) + elif sB is None: + return np.array([cA,sA,cB]) + else: + return np.mod(np.array([np.arctan2(sA,cA),np.arctan2(sB,cB),np.arctan2(sG,cG)]),2*np.pi) + +#%% Frame calculations +def getFrame(v1,v2=None,return_angles=False): + """ + Calculates the sines and cosines of the euler angles for the principle axis + system of a frame defined by one or two vectors. The new frame has v1 along + its z-axis and if a second vector is provided, then the second vector lies + in the xz plane of the frame. + + We use zyz convention (alpha,beta,gamma), where rotation into a frame is + achieved by first applying gamma: + X,Y=cos(gamma)*X+sin(gamma)*Y,-sin(gamma)*X+cos(gamma)*Y + Then applying beta: + X,Z=cos(beta)*X-sin(beta)*Z,sin(beta)*X+cos(beta)*Z + Finally alpha: + X,Y=cos(alpha)*X+sin(alpha)*Y,-sin(alpha)*X+cos(alpha)*Y + + gamma=arctan2(Y,X) + beta=arccos(Z) + alpha=arctan(Y1,X1) (Y1 and X1 after applying gamma and beta!) + + Note that we do not return alpha,beta,gamma! Instead, we return + cos(alpha),sin(alpha),cos(beta),sin(beta),cos(gamma),sin(gamma)! + + If only one vector is provided, then we simply require that this + vector lies along z, achieved by rotating the shortest distance to the + z-axis. Then, the euler angles are (-gamma,beta,gamma) + + + cA,sA,cB,sB,cG,sG = getFrame(v1,v2) + + or + + cG,-sG,cB,sB,cG,sG = getFrame(v1) + + Finally, if you need the angles themselves: + + alpha,beta,gamma = getFrame(v1,v2,return_angles=True) + """ + + if np.ndim(v1)==1: + v1=np.atleast_2d(v1).T + oneD=True + if v2 is not None: + v2=np.atleast_2d(v2).T + else: + oneD=False + + "Normalize" + X,Y,Z=norm(v1) + + "Gamma" + lenXY=np.sqrt(X**2+Y**2) + i=lenXY==0 + lenXY[i]=1 #cG and sG will be 0 since X and Y are zero + cG,sG=X/lenXY,Y/lenXY + cG[i]=1. #Set cG to 1 where cG/sG is undefined (gamma=0) + + "Beta" + cB,sB=Z,np.sqrt(1-Z**2) + + "Alpha" + if v2 is None: +# cA,sA=np.ones(cG.shape),np.zeros(sG.shape) + cA,sA=cG,-sG + else: + v2=Rz(v2,cG,-sG) + X,Y,_=Ry(v2,cB,-sB) + + lenXY=np.sqrt(X**2+Y**2) + i=lenXY==0 + lenXY[i]=1 #cA and sA will be 0 since X and Y are zero + cA,sA=X/lenXY,Y/lenXY + cA[i]=1. #Now set cG to 1 where cG/sG undefined (alpha=0) + i=np.isnan(lenXY) + cA[i],sA[i]=cG[i],-sG[i] #nan also gets set to -gamma + + if oneD: + cA,sA,cB,sB,cG,sG=cA[0],sA[0],cB[0],sB[0],cG[0],sG[0] + #Recently added. May need removed if errors occur 11.09.2021 + + if return_angles: + return sc2angles(cA,sA,cB,sB,cG,sG) + else: + return cA,sA,cB,sB,cG,sG + + +def applyFrame(*vecs,nuZ_F=None,nuXZ_F=None): + """ + Applies a frame, F, to a set of vectors, *vecs, by rotating such that the + vector nuZ_F lies along the z-axis, and nuXZ_F lies in the xz-plane. Input + is the vectors (as *vecs, so list separately, don't collect in a list), and + the frame, defined by nuZ_F (a vector on the z-axis of the frame), and + optionally nuXZ_F (a vector on xy-axis of the frame). These must be given + as keyword arguments. + + vecs_F = applyFrame(*vecs,nuZ_F=nuZ_F,nuXZ_F=None,frame_index=None) + + Note, one may also omit the frame application and just apply a frame index + """ + + if nuZ_F is None: + out=vecs + else: + sc=pass2act(*getFrame(nuZ_F,nuXZ_F)) + out=[None if v is None else R(v,*sc) for v in vecs] + + if len(vecs)==1: + return out[0] + else: + return out + +#%% Apply/invert rotations +def Rz(v0,c,s=None): + """ + Rotates a vector around the z-axis. One must provide the vector(s) and either + the angle itself, or the cosine(s) and sine(s) of the angle(s). The number + of vectors must match the number of angles, or only one angle is provided + + v=Rz(v0,c,s) + + or + + v=Rz(v0,theta) + """ + + if s is None: + c,s=np.cos(c),np.sin(c) + + X,Y,Z=v0.copy() + + X,Y=c*X-s*Y,s*X+c*Y + Z=np.ones(X.shape)*Z + + return np.array([X,Y,Z]) + +def Ry(v0,c,s=None): + """ + Rotates a vector around the y-axis. One must provide the vector(s) and either + the angle itself, or the cosine(s) and sine(s) of the angle(s). The number + of vectors must match the number of angles, or only one angle is provided + + v=Ry(v0,c,s) + + or + + v=Ry(v0,theta) + """ + + if s is None: + c,s=np.cos(c),np.sin(c) + + X,Y,Z=v0.copy() + + X,Z=c*X+s*Z,-s*X+c*Z + Y=np.ones(c.shape)*Y + + return np.array([X,Y,Z]) + +def R(v0,cA,sA,cB,sB=None,cG=None,sG=None): + """ + Rotates a vector using ZYZ convention. One must provide the vector(s) and + either the euler angles, or the cosine(s) and sine(s) of the angle(s). The + number of vectors must match the number of angles, or only one angle is + provided for alpha,beta,gamma (or the sines/cosines of alpha,beta,gamma) + + v=R(v0,cA,sA,cB,sB,cG,sG) + + or + + v=R(v0,alpha,beta,gamma) + """ + if v0 is None: + return None + + if sB is None: + cA,sA,cB,sB,cG,sG=np.cos(cA),np.sin(cA),np.cos(sA),np.sin(sA),np.cos(cB),np.sin(cB) + + return Rz(Ry(Rz(v0,cA,sA),cB,sB),cG,sG) + +def Rfull(cA,sA,cB,sB=None,cG=None,sG=None): + """ + Returns a ZYZ rotation matrix for one set of Euler angles + """ + + if sB is None: + a=cA + b=sA + g=cB + cA,sA,cB,sB,cG,sG=np.cos(a),np.sin(a),np.cos(b),np.sin(b),np.cos(g),np.sin(g) + + return np.array([[cA*cB*cG-sA*sG,-cG*sA-cA*cB*sG,cA*sB],\ + [cA*sG+cB*cG*sA,cA*cG-cB*sA*sG,sA*sB],\ + [-cG*sB,sB*sG,cB]]) + +def euler_prod(*euler,return_angles=False): + """ + Calculates the product of a series of euler angles. Input is a list, where + each list element is a set of euler angles. Each set of euler angles may be + given as a list of 3 elements (alpha,beta,gamma) or six elements + (ca,sa,cb,sb,cg,sg). + + The individual elements (alpha,beta,gamma, ca, sa, etc.) may have any size, + although all sizes used should be the same or consistent for broadcasting + + ca,sa,cb,sb,cg,sg=euler_prod(euler1,euler2,...,return_angles=False) + + or + + alpha,beta,gamma=euler_prod(euler1,euler2,...,return_angles=True) + """ + + if len(euler)==1: #I think this is here in case a list is provided instead of multiple inputs + euler=euler[0] + + vZ=[0,0,1] #Reference vectors + vX=[1,0,0] + + for sc in euler: + vZ=R(vZ,*sc) + vX=R(vX,*sc) + + return getFrame(vZ,vX,return_angles) + + +def Rspher(rho,cA,sA,cB,sB=None,cG=None,sG=None): + """ + Rotates a spherical tensor, using angles alpha, beta, and + gamma. The cosines and sines may be provided, or the angles directly. + + One may provide multiple rho and/or multiple angles. If a single rho vector + is given (5,), then any shape of angles may be used, and similarly, if a single + set of euler angles is used, then any shape of rho may be used (the first + dimension must always be 5). Otherwise, standard broadcasting rules apply + (the last dimensions must match in size) + + rho_out = Rspher(rho,alpha,beta,gamma) + + or + + rho_out = Rspher(rho,cA,sA,cB,sB,cG,sG)- cosines and sines of the angles + """ + + + for k,r in enumerate(rho): + M=D2(cA,sA,cB,sB,cG,sG,mp=k-2,m=None) #Rotate from mp=k-2 to all new components + if k==0: + rho_out=M*r + else: + rho_out+=M*r + return rho_out + + + +def R2euler(R,return_angles=False): + """ + Input a rotation matrix in cartesian coordinates, and return either the + euler angles themselves or their cosines and sines(default) + + cA,sA,cB,sB,cG,sG = R2euler(R) + + or + + alpha,beta,gamma = R2euler(R,return_angles=True) + + R can be a list of matrices + """ + +# R = np.array([R]) if np.ndim(R)==2 else np.array(R) + + + """ + Note that R may be the result of an eigenvector decomposition, and does + not guarantee that R is a proper rotation matrix. We can check the sign + on the determinant: if it is 1, it's a proper rotation, if it's -1, it's not + Then, we just multiply each matrix by the result to have only proper + rotations. + + """ + sgn=np.sign(np.linalg.det(R)) + + if np.ndim(R)>2: #Bring the dimensions of the R matrix to the first two dimensions + for m in range(0,R.ndim-2): + for k in range(0,R.ndim-1):R=R.swapaxes(k,k+1) + R=R*sgn + + if R.ndim>2: + cB=R[2,2] + cB[cB>1]=1. #Some clean-up to make sure we don't get imaginary terms later (cB cannot exceed 1- only numerical error causes this) + cB[cB<-1]=-1. + sB=np.sqrt(1.-cB**2) + i,ni=sB!=0,sB==0 + cA,sA,cG,sG=np.ones(i.shape),np.zeros(i.shape),np.ones(i.shape),np.zeros(i.shape) + cA[i]=R[2,0,i]/sB[i] #Sign swap, 30.09.21 + sA[i]=R[2,1,i]/sB[i] + cG[i]=-R[0,2,i]/sB[i] #Sign swap, 30.09.21 + sG[i]=R[1,2,i]/sB[i] + + cG[ni]=R[0,0,ni] + sG[ni]=-R[1,0,ni] #Sign swap, 30.09.21 + else: + cB=R[2,2] + if cB>1:cB=1 + if cB<-1:cB=-1 + sB=np.sqrt(1-cB**2) + if sB>0: + cA=R[2,0]/sB #Sign swap, 30.09.21 + sA=R[2,1]/sB + cG=-R[0,2]/sB #Sign swap, 30.09.21 + sG=R[1,2]/sB + else: + cA,sA=1,0 + cG=R[0,0] + sG=-R[1,0] #Sign swap, 30.09.21 + + + if return_angles: + return sc2angles(cA,sA,cB,sB,cG,sG) + else: + return np.array((cA,sA,cB,sB,cG,sG)) + +def R2vec(R): + """ + Given a rotation matrix, R, this function returns two vectors, v1, and v2 + that have been rotated from v10=[0,0,1] and v20=[1,0,0] + + v1=np.dot(R,v10) + v2=np.dot(R,v20) + + If a frame is defined by a rotation matrix, instead of directly by a set of + vectors, then v1 and v2 have the same Euler angles to rotate back to their + PAS as the rotation matrix + + R may be a list of rotation matrices + + Note: v1, v2 are trivially given by R[:,:,2] and R[:,:,0] + """ + R = np.array([R]) if np.ndim(R)==2 else np.array(R) + + v1=R[:,:,2] + v2=R[:,:,0] + + return v1.T,v2.T + + +#%% Tensor operations +def d2(c=0,s=None,m=None,mp=0): + """ + Calculates components of the d2 matrix. By default only calculates the components + starting at m=0 and returns five components, from -2,-1,0,1,2. One may also + edit the starting component and select a specific final component + (mp=None returns all components, whereas mp may be specified between -2 and 2) + + d2_m_mp=d2(m,mp,c,s) #c and s are the cosine and sine of the desired beta angle + + or + + d2_m_mp=d2(m,mp,beta) #Give the angle directly + + Setting mp to None will return all values for mp in a 2D array + + (Note that m is the final index) + """ + + if s is None: + c,s=np.cos(c),np.sin(c) + + """ + Here we define each of the components as functions. We'll collect these into + an array, and then call them out with the m and mp indices + """ + "First, for m=-2" + + if m is None or mp is None: + if m is None and mp is None: + print('m or mp must be specified') + return + elif m is None: + if mp==-2: + index=range(0,5) + elif mp==-1: + index=range(5,10) + elif mp==0: + index=range(10,15) + elif mp==1: + index=range(15,20) + elif mp==2: + index=range(20,25) + elif mp is None: + if m==-2: + index=range(0,25,5) + elif m==-1: + index=range(1,25,5) + elif m==0: + index=range(2,25,5) + elif m==1: + index=range(3,25,5) + elif m==2: + index=range(4,25,5) + else: + index=[(mp+2)*5+(m+2)] + + out=list() + for i in index: + #mp=-2 + if i==0:x=0.25*(1+c)**2 + if i==1:x=0.5*(1+c)*s + if i==2:x=np.sqrt(3/8)*s**2 + if i==3:x=0.5*(1-c)*s + if i==4:x=0.25*(1-c)**2 + #mp=-1 + if i==5:x=-0.5*(1+c)*s + if i==6:x=c**2-0.5*(1-c) + if i==7:x=np.sqrt(3/8)*2*c*s + if i==8:x=0.5*(1+c)-c**2 + if i==9:x=0.5*(1-c)*s + #mp=0 + if i==10:x=np.sqrt(3/8)*s**2 + if i==11:x=-np.sqrt(3/8)*2*s*c + if i==12:x=0.5*(3*c**2-1) + if i==13:x=np.sqrt(3/8)*2*s*c + if i==14:x=np.sqrt(3/8)*s**2 + #mp=1 + if i==15:x=-0.5*(1-c)*s + if i==16:x=0.5*(1+c)-c**2 + if i==17:x=-np.sqrt(3/8)*2*s*c + if i==18:x=c**2-0.5*(1-c) + if i==19:x=0.5*(1+c)*s + #mp=2 + if i==20:x=0.25*(1-c)**2 + if i==21:x=-0.5*(1-c)*s + if i==22:x=np.sqrt(3/8)*s**2 + if i==23:x=-0.5*(1+c)*s + if i==24:x=0.25*(1+c)**2 + out.append(x) + + if m is None or mp is None: + return np.array(out) + else: + return out[0] + +def D2(cA=0,sA=0,cB=0,sB=None,cG=None,sG=None,m=None,mp=0): + """ + Calculates components of the Wigner rotation matrix from Euler angles or + from the list of sines and cosines of those euler angles. All vectors must + be the same size (or have only a single element) + + mp and m should be specified. m may be set to None, so that all components + are returned in a 2D array + + D2_m_mp=D2(m,mp,cA,sA,cB,sB,cG,sG) #Provide sines and cosines + + or + + D2_m_mp=D2(m,mp,alpha,beta,gamma) #Give the angles directly + + (Note that m is the final index) + """ + if sB is None: + cA,sA,cB,sB,cG,sG=np.cos(cA),np.sin(cA),np.cos(sA),np.sin(sA),np.cos(cB),np.sin(cB) + + + d2c=d2(cB,sB,m,mp) + + "Rotation around z with alpha (mp)" + if mp is None: + ea1=cA-1j*sA + eam1=cA+1j*sA + ea2=ea1**2 + eam2=eam1**2 + ea0=np.ones(ea1.shape) + ea=np.array([eam2,eam1,ea0,ea1,ea2]) + else: + if mp!=0: + ea=cA-1j*np.sign(mp)*sA + if np.abs(mp)==2: + ea=ea**2 + else: + ea=1 + + "Rotation around z with gamma (m)" + if m is None: + eg1=cG-1j*sG + egm1=cG+1j*sG + eg2=eg1**2 + egm2=egm1**2 + eg0=np.ones(eg1.shape) + eg=np.array([egm2,egm1,eg0,eg1,eg2]) + else: + if m!=0: + eg=cG-1j*np.sign(m)*sG + if np.abs(m)==2: + eg=eg**2 + else: + eg=1 + + return ea*d2c*eg + + +def D2vec(v1,v2=None,m=None,mp=0): + """ + Calculates the Wigner rotation elements that bring a vector or vectors from + their own principle axis system into a reference frame (whichever frame + v1 and v2 are defined in) + """ + + cA,sA,cB,sB,cG,sG=getFrame(v1,v2) + "I think these are already the passive angles above" + + return D2(cA,sA,cB,sB,cG,sG,m,mp) + +def getD2inf(v,n=2500): + """ + Calculates the expectation value of the Spherical components of the D2 rotation + elements, that is + lim t->oo _tau + + These are estimated given a vector v. Note, we are always performing averaging + from the PAS of a vector into a given frame. Then, there should never be + a contribution from asymmetry (arguably, we could correct for eta in case + of CSA or quadrupolar relaxation, but we won't do that here) + + n specifies the maximum number of time points to take from a vector. Default + is 500, although setting to None will set N=v.shape[-1] + """ + + if n is None or v.shape[-1]oo _tau + + Provide normalized x,y,z and the desired component + """ + + if m==0: + D2avg=-1/2 + for alpha in [x,y,z]: + for beta in [x,y,z]: + D2avg+=3/2*((alpha*beta).mean())**2 + return D2avg + + "Beta" + cb,sb=z,np.sqrt(1-z**2) + "Gamma" + lenXY=np.sqrt(x**2+y**2) + i=lenXY==0 + lenXY[i]=1 #cG and sG will be 0 since x and y are both zero + cg,sg=x/lenXY,y/lenXY + cg[i]=1. #Set cG to 1 where cG/sG is undefined (set gamma=0) + + n=x.shape[0] + + D2avg=np.zeros(5,dtype=complex) + if m is None: #Get all components + m1=[-2,-1,0,1,2] + else: + m1=[m] + + + for cb0,sb0,cg0,sg0 in zip(cb,sb,cg,sg): + x1,y1,z1=x*cg0+y*sg0,-x*sg0+y*cg0,z + x2,y2,z2=x1*cb0-z1*sb0,y1,x1*sb0+z1*cb0 #vectors in frame of current element + + for m0 in m1: + if m0==-2: + D2avg[0]+=np.sqrt(3/8)*((x2+1j*y2)**2).mean() + elif m0==-1: + D2avg[1]+=-np.sqrt(3/2)*((x2+1j*y2)*z2).mean() + elif m0==1: + if m is not None: #Don't repeat this calculation if None + D2avg[3]+=np.sqrt(3/2)*((x2-1j*y2)*z2).mean() + elif m0==2: + if m is not None: #Same as above + D2avg[4]+=np.sqrt(3/8)*((x2-1j*y2)**2).mean() + + + if m is not None: + D2avg=D2avg[m+2]/n + else: + for k in range(2): + D2avg[k]=D2avg[k]/n + D2avg[3]=-np.conjugate(D2avg[1]) + D2avg[4]=np.conjugate(D2avg[0]) + + d2=-1/2 + for alpha in [x,y,z]: + for beta in [x,y,z]: + d2+=3/2*((alpha*beta).mean())**2 + D2avg[2]=d2 + + return D2avg + +# x1,y1,z1=np.dot(np.array([x]).T,np.array([cg]))+np.dot(np.array([y]).T,np.array([sg])),\ +# -np.dot(np.array([x]).T,np.array([sg]))+np.dot(np.array([y]).T,np.array([cg])),np.repeat(np.array([z]).T,z.size,axis=1) +# x2,y2,z2=x1*cb-z1*sb,y1,+x1*sb+z1*cb +# + + +# if m==-2: +# return np.sqrt(3/8)*((x2+1j*y2)**2).mean() +# elif m==-1: +# return -np.sqrt(3/2)*((x2+1j*y2)*z2).mean() +# elif m==1: +# return np.sqrt(3/2)*((x2-1j*y2)*z2).mean() +# elif m==2: +# return np.sqrt(3/8)*((x2-1j*y2)**2).mean() + +def D2inf_v2(vZ,m=None): + if m is None: + m1=[-2,-1,0] + else: + m1=[m] + + if m!=0: + sc=getFrame(vZ) + vX=R([1,0,0],*sc) + vY=R([0,1,0],*sc) + + + if vZ.ndim==2: + N=0 + else: + N=vZ.shape[1] + + D2inf=list() + + for m0 in m1: + if N==0: + d2=np.array(0,dtype=complex) + else: + d2=np.zeros(N,dtype=complex) + + if m0==-2: + for ax,ay,az in zip(vX,vY,vZ): + for bx,by,bz in zip(vX,vY,vZ): + d2+=np.sqrt(3/8)*((ax*bx).mean(axis=-1)-(ay*by).mean(axis=-1))*(az*bz).mean(axis=-1)\ + +1j*np.sqrt(3/2)*(ax*by).mean(axis=-1)*(az*bz).mean(axis=-1) + elif m0==-1: + for ax,ay,az in zip(vX,vY,vZ): + for bz in vZ: + d2+=-np.sqrt(3/2)*(ax*bz).mean(axis=-1)*(az*bz).mean(axis=-1)\ + +1j*np.sqrt(3/2)*(ay*bz).mean(axis=-1)*(az*bz).mean(axis=-1) + elif m0==0: + d2+=-1/2 + for az in vZ: + for bz in vZ: + d2+=3/2*(az*bz).mean(axis=-1)**2 + elif m0==1: + for ax,ay,az in zip(vX,vY,vZ): + for bz in vZ: + d2+=np.sqrt(3/2)*(ax*bz).mean(axis=-1)*(az*bz).mean(axis=-1)\ + +1j*np.sqrt(3/2)*(ay*bz).mean(axis=-1)*(az*bz).mean(axis=-1) + elif m0==2: + for ax,ay,az in zip(vX,vY,vZ): + for bx,by,bz in zip(vX,vY,vZ): + d2+=np.sqrt(3/8)*((ax*bx).mean(axis=-1)-(ay*by).mean(axis=-1))*(az*bz).mean(axis=-1)\ + -1j*np.sqrt(3/2)*(ax*by).mean(axis=-1)*(az*bz).mean(axis=-1) + D2inf.append(d2) + + if m is None: + D2inf.append(-np.conjugate(D2inf[1])) + D2inf.append(np.conjugate(D2inf[0])) + else: + D2inf=D2inf[0] + + return np.array(D2inf) + +def D2avgLF(vZ,m=None): + """ + """ + if m is None: + m1=[-2,-1,0] + else: + m1=[m] + + ca,sa,cb,sb,cg,sg=getFrame(vZ) + + +# if vZ.ndim==2: +# N=0 +# else: +# N=vZ.shape[1] + + D2avg=list() + + for m0 in m1: +# if N==0: +# d2=np.array(0,dtype=complex) +# else: +# d2=np.zeros(N,dtype=complex) + + if m0==-2: + d2=np.sqrt(3/8)*((cg*sb)**2-(sg*sb)**2+2*1j*sg*sb*cg*sb).mean(-1) + elif m0==-1: + d2=-np.sqrt(3/2)*(cg*sb*cb+1j*sg*sb*cb).mean(-1) + elif m0==0: + d2=1/2*(3*cb**2-1).mean(-1) + elif m0==1: + d2=np.sqrt(3/2)*(cg*sb*cb-1j*sg*sb*cb).mean(-1) + elif m0==2: + d2=np.sqrt(3/8)*((cg*sb)**2-(sg*sb)**2-2*1j*sg*sb*cg*sb).mean(-1) + D2avg.append(d2) + + if m is None: + D2avg.append(-np.conjugate(D2avg[1])) + D2avg.append(np.conjugate(D2avg[0])) + else: + D2avg=D2avg[0] + + return np.array(D2avg) + +def Spher2Cart(rho): + """ + Takes a set of components of a spherical tensor and calculates its cartesian + representation (as a vector, with components in order of Axx,Axy,Axz,Ayy,Ayz) + + Input may be a list (or 2D array), with each new column a new tensor + """ + + rho=np.array(rho,dtype=complex) + + M=np.array([[0.5,0,-np.sqrt(1/6),0,0.5], + [0.5*1j,0,0,0,-0.5*1j], + [0,0.5,0,-0.5,0], + [-0.5,0,-np.sqrt(1/6),0,-.5], + [0,.5*1j,0,.5*1j,0]]) + SZ0=rho.shape + SZ=[5,np.prod(SZ0[1:]).astype(int)] + out=np.dot(M,rho.reshape(SZ)).real + return out.reshape(SZ0) + + +def Spher2pars(rho,return_angles=False): + """ + Takes a set of components of a spherical tensor and calculates the parameters + describing that tensor (delta,eta,alpha,beta,gamma) + + + delta,eta,cA,sA,cB,sB,cG,sG=Spher2pars(rho) + + or + + delta,eta,alpha,beta,gamma=Spher2pars(rho,return_angles=True) + + + Input may be a list (or 2D array), with each new column a new tensor (5xN) + """ + + A0=Spher2Cart(rho) #Get the Cartesian tensor + if A0.ndim==1: + A0=np.atleast_2d(A0).T + + R=list() + delta=list() + eta=list() + + + for k,x in enumerate(A0.T): + Axx,Axy,Axz,Ayy,Ayz=x + A=np.array([[Axx,Axy,Axz],[Axy,Ayy,Ayz],[Axz,Ayz,-Axx-Ayy]]) #Full matrix + D,V=np.linalg.eigh(A) #Get eigenvalues, eigenvectors + i=np.argsort(np.abs(D)) + D,V=D[i[[1,0,2]]],V[:,i[[1,0,2]]] #Ordering is |azz|>=|axx|>=|ayy| + "V should have a determinant of +1 (proper vs. improper rotation)" + V=V*np.sign(np.linalg.det(V)) + delta.append(D[2]) + eta.append((D[1]-D[0])/D[2]) + R.append(V) + + delta=np.array(delta) + eta=np.array(eta) + euler=R2euler(np.array(R)) + + if return_angles: + euler=sc2angles(*euler) + + return np.concatenate(([delta],[eta],euler),axis=0) + + +def pars2Spher(delta,eta=None,cA=None,sA=None,cB=None,sB=None,cG=None,sG=None): + """ + Converts parameters describing a spherical tensor (delta, eta, alpha, beta, + gamma) into the tensor itself. All arguments except delta are optional. Angles + may be provided, or their cosines and sines may be provided. The size of the + elements should follow the rules required for Rspher. + """ + + if cA is None: + cA,sA,cB,sB,cG,sG=np.array([1,0,1,0,1,0]) + + if eta is None: + eta=np.zeros(np.shape(delta)) + + rho0=np.array([-0.5*eta*delta,0,np.sqrt(3/2)*delta,0,-0.5*eta*delta]) + + return Rspher(rho0,cA,sA,cB,sB,cG,sG) + + + +#%% RMS alignment +def RMSalign(v0,vref): + """ + Returns the optimal rotation matrix to rotate a set of vectors v0 to a set + of reference vectors, vref + + R=alignR(v0,vref) + + Uses the Kabsch algorithm. Assumes *vectors*, with origins at zero, not + points, so that no translation will be performed + (reference https://en.wikipedia.org/wiki/Kabsch_algorithm) + + We minimize + np.sum((np.dot(R,v0.T).T-vref)**2) + """ + + H=np.dot(v0,vref.T) + + U,S,Vt=svd(H) + V=Vt.T + Ut=U.T + + d=np.linalg.det(np.dot(V,Ut)) + m=np.eye(3) + m[2,2]=d #This is supposed to ensure a right-handed coordinate system + #But I think it could equivalently be thought of as making this a proper rotation(??) + + R=np.dot(V,np.dot(m,Ut)) + return R + + +#%% Fit points to a plane +def RMSplane(v,weight=None): + """ + For a set of points (v: 3xN array), calculates the normal vector for a plane + fitted to that set of points. May include a weighting (weight: N elements) + """ + v=np.array(norm(v)) + + "Default, uniform weighting" + if weight is None: + weight=np.ones(v.shape[1]) + + "Subtract away the centroid" + v=(v.T-v.mean(axis=1)).T + + """Applying weighting, taking singular value decomposition, return + row of U corresponding to the smallest(last) singular value""" + return svd(v*weight)[0].T[2] + +#%% Get principle axes of moment of inertia +def principle_axis_MOI(v): + """ + Calculates the principle axis system of the moment of inertia, without + considering weights of individual particles. A 3xN numpy array should be + provided. The smallest component of the moment of inertia is returned in the + 0 element, and largest in the 2 element + + Note- the directions of the principle axes can switch directions (180 deg) + between frames, due to the symmetry of the MOI tensor. This can be corrected + for with a reference vector. The dot product of the reference vector and the + vector for a given frame should remain positive. If it doesn't, then switch + the direction of the vector (v=v*np.sign(np.dot(v.T,v))) + """ + + """ + Ixx=sum_i m_i*(y_i^2+z_i^2) + Iyy=sum_i m_i*(x_i^2+z_i^2) + Izz=sum_i m_i*(x_i^2+y_i^2) + Ixy=Iyx=-sum_i m_i*x_i*y_i + Ixz=Izx=-sum_i m_i*x_i*z_i + Iyz=Izy=-sum_i m_i*y_i*z_i + """ + + + v=v-np.atleast_2d(v.mean(axis=1)).T.repeat(v.shape[1],axis=1) #v after subtracting center of mass + + H=np.dot(v,v.T) + + I=-1*H + I[0,0]=H[1,1]+H[2,2] + I[1,1]=H[0,0]+H[2,2] + I[2,2]=H[1,1]+H[0,0] + _,V=np.linalg.eigh(I) + + return V + +#%% Project onto axis +def projZ(v0,vr=[0,0,1]): + """ + Takes the projection of a vector, v0, onto another vector, vr. + + Input should be 3xN vectors (vnorm can also be a 1D, 3 element vector, or + both inputs can be 3 element vectors). + + Input does not need to be normalized, but also note that output is not + normalized + + Default project is along z + """ +# v0=np.atleast_2d(v0) +# if np.ndim(vr)==1 or np.shape(vr)[1]==1: #Make matrices the same size +# vr=np.atleast_2d(vr).T.repeat(np.shape(v0)[1],axis=1) + + vr=norm(vr) + a=v0[0]*vr[0]+v0[1]*vr[1]+v0[2]*vr[2] + return np.array([a*vr[0],a*vr[1],a*vr[2]]) + +# return np.atleast_2d((norm(v0)*norm(vr)).sum(axis=0))*vr + +#%% Project onto plane +def projXY(v0,vnorm=[0,0,1]): + """ + Takes the projection of a vector, v0, onto a plane defined by its normal + vector, vnorm. + + Input should be 3xN vectors (vnorm can also be a 1D, 3 element vector, or + both inputs can be 3 element vectors). + + Input does not need to be normalized, but also note that output is not + normalized + """ + + return v0-projZ(v0,vnorm) + +#%% Sort by distance +def sort_by_dist(v,maxd=1e4): + """ + Returns an index that sorts a set of points such that each point is next + to its nearest neighbors in space in the vector itself (although points will + not repeat, so this has exceptions) + + Searchest for the point closest to (-Inf,-Inf,-Inf), then looks for its nearest + neighbor, and then searchest for the nearest neighhbor of the next point + (etc...) + + The purpose is that we can take a set of points, and take the difference in + position of each one to generate a set of vectors (which may be subsequently + corrected for periodic boundary conditions) + + Returns the sorting index, as oppposed to the vector itself + + i=sort_by_dist(v) + + such that v_sorted=v[i] + + Note 1: not a highly efficient algorithm- recommended only for setup of a + vector calculation, but should avoided inside loop over a trajectory + + Note 2: We presume here that the dimensions can't be larger than 1e4, rather + than assuming the dimension is arbitrarily large (creating some numeric + problems). If this is not true, set maxd to an appropriate value + """ + + v=v.copy() #Avoid editing the vector itself...never quite sure when this is necessary + X,Y,Z=v.T + + ref=maxd*2 + + i=list() + "Find the most negative element" + i.append(np.argmin((X+ref)**2+(Y+ref)**2+(Z+ref)**2)) + + for _ in range(X.size-1): + x,y,z=X[i[-1]].copy(),Y[i[-1]].copy(),Z[i[-1]].copy() + "Set the currect vector to be far away" + X[i[-1]],Y[i[-1]],Z[i[-1]]=ref*np.array([-2,-2,-2]) + "Find the nearest vector" + i.append(np.argmin((X-x)**2+(Y-y)**2+(Z-z)**2)) + + return i + + + + + \ No newline at end of file diff --git a/pyDIFRATE/__init__.py b/pyDIFRATE/__init__.py new file mode 100644 index 0000000..dbad790 --- /dev/null +++ b/pyDIFRATE/__init__.py @@ -0,0 +1,15 @@ +# __init__.py + + +from .Struct.structure import molecule +from .data.data_class import data +from .data import fitting +from .data import in_out as io +from .plots import plotting_funs as plotting +from .tools import DRtools as tools +from .iRED import Ct_fast +from .Struct import eval_fr as frames +from .chimera import chimeraX_funs as chimeraX + + + diff --git "a/pyDIFRATE/__pycache__/Icon\r" "b/pyDIFRATE/__pycache__/Icon\r" new file mode 100644 index 0000000..e69de29 diff --git a/pyDIFRATE/chimera/.DS_Store b/pyDIFRATE/chimera/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..cade251988684b85cd220ff5f969125f26dd463e GIT binary patch literal 6148 zcmeHKOG*Pl5PhvdgKk{vZcbou>24Co9}yQu!ClNGqC&WEJ}8Z6>RW~xj@EqX2=;I>+FZfai)Hzeg_4!R%x$M42pDbJSv8)ZsG_d ztsJ4o>ONBS3Pv;bDpfTk?5&Hjj&aGDO z3ojI>qqF}=)8S%|W{m-3;4K5Q-d&RWfAiz_|Jx$-i~(cdPch&sNh@h^N#SmtTAbXq s5%q*BB6+1pEujh5v3|%^yi8SLTO=REIAH0K5sLi?C=F(efgfey5ipx|ssI20 literal 0 HcmV?d00001 diff --git "a/pyDIFRATE/chimera/Icon\r" "b/pyDIFRATE/chimera/Icon\r" new file mode 100644 index 0000000..e69de29 diff --git a/pyDIFRATE/chimera/__init__.py b/pyDIFRATE/chimera/__init__.py new file mode 100644 index 0000000..e69de29 diff --git "a/pyDIFRATE/chimera/__pycache__/Icon\r" "b/pyDIFRATE/chimera/__pycache__/Icon\r" new file mode 100644 index 0000000..e69de29 diff --git a/pyDIFRATE/chimera/chimeraX_funs.py b/pyDIFRATE/chimera/chimeraX_funs.py new file mode 100644 index 0000000..b72017b --- /dev/null +++ b/pyDIFRATE/chimera/chimeraX_funs.py @@ -0,0 +1,1222 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Copyright 2021 Albert Smith-Penzel + +This file is part of Frames Theory Archive (FTA). + +FTA is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +FTA is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with FTA. If not, see . + + +Questions, contact me at: +albert.smith-penzel@medizin.uni-leipzig.de + +Created on Tue Jul 13 13:42:37 2021 + +@author: albertsmith + +Created on Wed Nov 11 15:13:30 2020 + +@author: albertsmith +""" + +import os +import numpy as np +import MDAnalysis as md +import matplotlib.pyplot as plt +from shutil import copyfile +#os.chdir('../Struct') +import pyDIFRATE.Struct.select_tools as selt +#os.chdir('../Struct') +from pyDIFRATE.Struct.vf_tools import Spher2pars,norm,getFrame,Rspher,pbc_corr,pars2Spher,sc2angles,pass2act +#os.chdir('../chimera') + +def chimera_path(**kwargs): + "Returns the location of the ChimeraX program" + + assert is_chimera_setup(),\ + "ChimeraX path does not exist. Run chimeraX.set_chimera_path(path) first, with "+\ + "path set to the ChimeraX executable file location." + + with open(os.path.join(get_path(),'ChimeraX_program_path.txt'),'r') as f: + path=f.readline() + + return path + +def is_chimera_setup(): + "Determines whether chimeraX executable path has been provided" + return os.path.exists(os.path.join(get_path(),'ChimeraX_program_path.txt')) + +def clean_up(): + """Deletes chimera scripts and tensor files that may have been created but + not deleted + + (Under ideal circumstances, this shouldn't happen, but may occur due to errors) + """ + + names=[fn for fn in os.listdir(get_path()) \ + if fn.startswith('chimera_script') and fn.endswith('.py') and len(fn)==23] + + tensors=[fn for fn in os.listdir(get_path()) \ + if fn.startswith('tensors') and fn.endswith('.txt') and len(fn)==18] + + for n in names: + os.remove(os.path.join(get_path(),n)) + for t in tensors: + os.remove(os.path.join(get_path(),t)) + + print('{0} files removed'.format(len(names)+len(tensors))) + +def set_chimera_path(path): + """ + Stores the location of ChimeraX in a file, entitled ChimeraX_program_path.txt + + This function needs to be run before execution of Chimera functions (only + once) + """ + assert os.path.exists(path),"No file found at '{0}'".format(path) + + with open(os.path.join(get_path(),'ChimeraX_program_path.txt'),'w') as f: + f.write(path) + + +def run_command(**kwargs): + "Code to import runCommand from chimeraX" + return 'from chimerax.core.commands import run as rc\n' + +def get_path(filename=None): + """ + Determines the location of THIS script, and returns a path to the + chimera_script given by filename. + + full_path=get_path(filename) + """ + dir_path=os.path.dirname(os.path.realpath(__file__)) + return dir_path if filename is None else os.path.join(dir_path,filename) + +def WrCC(f,command,nt=0): + """Function to print chimera commands correctly, using the runCommand function + within ChimeraX. nt specifies the number of tabs in python to use. + + f: File handle + command: command to print + nt: number of tabs + + WrCC(f,command,nt) + """ + for _ in range(nt): + f.write('\t') + f.write('rc(session,"{0}")\n'.format(command)) + + +def py_line(f,text,nt=0): + """ + Prints a line to a file for reading as python code. Inserts the newline and + also leading tabs (if nt specified) + + python_line(f,text,nt=0) + """ + + for _ in range(nt): + f.write('\t') + f.write(text) + f.write('\n') + +def guess_disp_mode(mol): + """ + Attempts to guess how to plot a molecule, based on the contents of the pdb + and if specified, the selections in mol.sel1 and mol.sel2. + + Current options are: + 'bond' : Plots the full molecule (or molecules), and displays dynamics + on the given bond + 'backbone': Plots a protein backbone, with dynamics displayed on the + H,N,CA,C, and O atoms + 'equiv': If selections (mol.sel1,mol.sel2) include a heteroatom in + sel1(or sel2) and a proton in sel2(or sel1), dynamics will + be plotted on those atoms and any other equivalent atoms. The + full molecule will be displayed + 'methyl': If atoms in mol.sel1 and mol.sel2 are members of methyl groups, + and the full molecule is determined to be a protein, then + dynamics will be displayed on the fully methyl group. + Furthermore, the protein backbone will be displayed along + with sidechains containing the relevant sidechains. + + disp_mode=guess_disp_mode(mol) + """ + + sel0=mol.mda_object.atoms[0:0] #An empty atom group + if mol.sel1 is not None: + sel0=sel0+mol.sel1 + if mol.sel2 is not None: + sel0=sel0+mol.sel1 + if len(sel0)==0: + sel0=mol.mda_object.atoms #No selections: sel0 is the full universe + + segids=np.unique(sel0.segids) + resids=np.unique(sel0.resids) + if len(segids)!=0: + i=np.isin(mol.mda_object.atoms.segids,segids) + sel1=mol.mda_object.atoms[i] + elif len(resids)!=0: + i=np.isin(mol.mda_object.atoms.resids,resids) + sel1=mol.mda_object.atoms[i] + else: + sel1=mol.mda_object.atoms + + + if 'N' in sel1.names and 'CA' in sel1.names and 'C' in sel1.names and 'O' in sel1.names: + "This is probably a protein" + if mol.sel1 is None or mol.sel2 is None: + return 'backbone' #Without further info, we assume this is a backbone plote + + is_met=True #Switch to false if we find a selection not consistent with methyl + is_bb=True #Switch to false if we find a selection not consistent with backbone + for s1,s2 in zip(mol.sel1,mol.sel2): + if not(s1.name in ['N','CA','C','O'] or s2.name in ['N','CA','C','O']): + is_bb=False + types1=[s.types[0] for s in selt.find_bonded(s1,sel0=sel1,sort='massi',n=3)] + types2=[s.types[0] for s in selt.find_bonded(s2,sel0=sel1,sort='massi',n=3)] + if not(np.all([t=='H' for t in types1]) or np.all([t=='H' for t in types2])): + is_met=False + + if not(is_met or is_bb): + return 'bond' #Neither methyl or backbone, so return 'bond' mode + + if is_met: + return 'methyl' + return 'backbone' + else: + "Probably not a protein" + if len(np.unique(mol.sel1))==len(mol.sel1) and len(np.unique(mol.sel2))==len(mol.sel2): + "Then, no repeated selections, so probably ok to highlight other bonded 1H" + return 'equiv' + else: + return 'bond' + +def sel_indices(mol,disp_mode,mode='all'): + """ + Generates list of indices plotting dynamics or for showing the correct + selection. Set mode to all to select all atoms to be displayed and to + 'value' to get an index for each selection to be plotted. + + str=sel_str(mol,disp_mode='protein',mode='all') + + str_list1,str_list2=sel_str(mol,disp_mode='bond',mode='value') + In bond mode, there may be overlaps in selections, so we return two lists, + otherwise: + str_list=sel_str(mol,disp_mode='methyl,mode='value') + """ + uni=mol.mda_object + if mode.lower()=='all': + "First get all atoms in mol.sel1 and mol.sel2, or just everything in the universe if no selections" + if mol.sel1 is not None and mol.sel2 is not None: + sel0=mol.sel1+mol.sel2 + elif mol.sel1 is not None: + sel0=mol.sel1 + elif mol.sel2 is not None: + sel0=mol.sel2 + else: + sel0=uni.atoms + + "Resids and Segids in the selection" + resind=np.unique(sel0.resindices) + segind=np.unique(sel0.segindices) + sel0=uni.atoms + + "If all selections in one segment or one residue, only display that segment/residue" + if len(segind)==1: + sel0=sel0.segments[np.isin(uni.segments.segindices,segind)] + if len(resind)==1: + sel0=sel0.residues[np.isin(sel0.residues.resindices,resind)] + sel0=sel0.atoms + + "Get selections according to mode" + if disp_mode.lower()=='backbone': + sel0=sel0.select_atoms('name N C CA') + for s1,s2 in zip(mol.sel1,mol.sel2): + sel0=sel0+uni.residues[s1.resindex].atoms.select_atoms('name H HN')+\ + uni.residues[s2.resindex].atoms.select_atoms('name H HN') + sel0=sel0+uni.residues[s1.resindex-1].atoms.select_atoms('name O')+\ + uni.residues[s2.resindex-1].atoms.select_atoms('name O') + elif disp_mode.lower()=='methyl': + "In methyl mode, get the backbone and residues that were in the mol.sel1/mol.sel2" + sel0=sel0.select_atoms('name N C CA')+uni.residues[resind].atoms.select_atoms('type C N O S') + "And now, add on the protons attached to the bonded carbon" + for s1,s2 in zip(mol.sel1,mol.sel2): + if s1.type=='C': + bonded=selt.find_bonded(s1,uni.residues[s1.resindex].atoms,sort='massi',n=3) + else: + bonded=selt.find_bonded(s1,uni.residues[s2.resindex].atoms,sort='massi',n=3) + + for b in bonded:sel0=sel0+b[0] + out=uni2pdb_index(np.unique(sel0.ids),mol.pdb_id) + + return out[out!=-1] + + else: + if disp_mode.lower()=='backbone': + ids=list() + sel0=uni.atoms + sel=mol.sel2 if mol.sel1 is None else mol.sel1 + for s in sel: + resindex=s.resindex + sel1=sel0.residues[resindex].atoms.select_atoms('name N H HN') + sel2=sel0.residues[resindex-1].atoms.select_atoms('name C O') + if len(sel2)==2 and np.sqrt(((sel1.select_atoms('name N').positions-\ + sel2.select_atoms('name C').positions)**2).sum())<1.6: + sel1=sel1+sel2 + ids.append(uni2pdb_index(sel1.ids,mol.pdb_id)) + elif disp_mode.lower()=='bond': + ids=list() + for s1,s2 in zip(mol.sel1,mol.sel2): + ids.append(uni2pdb_index((s1+s2).ids,mol.pdb_id)) + elif disp_mode.lower()=='methyl': + ids=list() + sel0=uni.atoms + for s1,s2 in zip(mol.sel1,mol.sel2): + s=s1 if s1.type=='C' else s2 + sel0=uni.residues[s.resindex].atoms + sel=selt.find_bonded(s,sel0=sel0,sort='massi',n=3) + ids.append([uni2pdb_index(s.id,mol.pdb_id)[0]]) + ids[-1].extend(uni2pdb_index([s1[0].id for s1 in sel],mol.pdb_id)) + elif disp_mode.lower()=='equiv': + ids=list() + sel0=uni.residues[np.unique((mol.sel1+mol.sel2).resindices)].atoms + for s1,s2 in zip(mol.sel1,mol.sel2): + s1,s2=(s1,s2) if s1.mass>s2.mass else (s2,s1) + bond=selt.find_bonded(s1,sel0,sort='massi',n=3) + id0=[s1.id] + for b in bond: + if b[0].type==s2.type:id0.append(b[0].id) + ids.append(uni2pdb(id0,mol.pdb_id)) + else: + print('Unrecognized display mode ({0}) in sel_indices'.format(disp_mode)) + print('Use backbone,bond,methyl, or equiv') + return + +# ids=[i*(i!=-1) for i in ids] + + return ids + +def py_print_npa(f,name,x,format_str='.6f',dtype=None,nt=0): + """ + Hard-codes an array into a python script for running within ChimeraX. Provide + the file handle, the name for the variable within ChimeraX, the values to + be stored in the array, and the number of tabs in for the variable (nt) + + A format string may be used to determine the precision written to file + (example .6f, .3e, etc.) + + The data type stored in ChimeraX may also be controlled, by specifying the + data type + + Only for single elements, 1D, and 2D arrays (will create a numpy array in + ChimeraX) + + py_print_npa(f,name,x,format_str='.6f',dtype=None,nt=0) + """ + + x=np.array(x) + + f.write('\n') + for _ in range(nt):f.write('\t') + + if x.size==0: + print('Warning: writing an empty matrix to ChimeraX') + f.write(name+'=np.zeros({0})'.format(x.shape)) + elif x.ndim==0: + f.write((name+'=np.array({0:'+format_str+'})').format(x)) + elif x.ndim==1: + f.write(name+'=np.array([') + for k,x0 in enumerate(x): + f.write(('{0:'+format_str+'},').format(x0)) + if np.mod(k,10)==9 and k!=len(x)-1: + f.write('\\\n') #Only 10 numbers per line + for _ in range(nt+1):f.write('\t') + f.seek(f.tell()-1) #Delete the last comma + f.write('])') + elif x.ndim==2: + f.write(name+'=np.array([') + for m,x0 in enumerate(x): + if m!=0: #Tab in lines following the first line + for _ in range(nt+1):f.write('\t') + f.write('[') #Start the current row + for k,x00 in enumerate(x0): + f.write(('{0:'+format_str+'},').format(x00)) + if np.mod(k,10)==9 and k!=len(x0)-1: + f.write('\\\n') #Only 10 numbers per line + for _ in range(nt+1):f.write('\t') + f.seek(f.tell()-1) #Delete the last comma + f.write('],\n') #Close the current row + + f.seek(f.tell()-2) #Delete the new line and comma + f.write('])') #Close the matrix + else: + print('Too many dimensions for py_print_npa') + return + + if dtype is not None: + f.write('.astype("{0}")'.format(dtype)) + f.write('\n') + +def py_print_lists(f,name,x,format_str='.6f',nt=0): + """ + Hard-codes a list or list of lists into a python script for running within + ChimeraX. Provide the file handle, the name for the variable within ChimeraX + the values to be stored in the lists, and the number of tabs in for the + variable (nt) + + A format string may be used to determine the precision written to file + (example .6f, .3e, etc.) + + Only for single elements, 1D, and 2D lists + + py_print_lists(f,name,x,nt=0) + """ + + f.write('\n') + for _ in range(nt):f.write('\t') + + if not(hasattr(x,'__len__')): + f.write((name+'{0:'+format_str+'}').format(x)) + elif not(hasattr(x[0],'__len__')): + f.write(name+'=[') + for k,x0 in enumerate(x): + f.write(('{0:'+format_str+'},').format(x0)) + if np.mod(k,10)==9 and k!=len(x)-1: + f.write('\\\n') #10 numbers per line + for _ in range(nt+1):f.write('\t') + f.seek(f.tell()-1) #Delete the last comma + f.write(']') + elif not(hasattr(x[0][0],'__len__')): + f.write(name+'=[') + for m,x0 in enumerate(x): + if m!=0: + for _ in range(nt+1):f.write('\t') + f.write('[') + for k,x00 in enumerate(x0): + f.write(('{0:'+format_str+'},').format(x00)) + if np.mod(k,10)==9 and k!=len(x0)-1: + f.write('\\\n') #Only 10 numbers per line + for _ in range(nt+1):f.write('\t') + f.seek(f.tell()-1) #Delete last comma + f.write('],\n') #Close the current row + f.seek(f.tell()-2) #Delete the new line and comma + f.write(']') #Close the matrix + else: + print('Too many dimensions for py_print_lists') + return + + f.write('\n') + + + + +def color_calc(x,x0=None,colors=[[0,0,255,255],[210,180,140,255],[255,0,0,255]]): + """ + Calculates color values for a list of values in x (x ranges from 0 to 1). + + These values are linear combinations of reference values provided in colors. + We provide a list of N colors, and a list of N x0 values (if x0 is not provided, + it is set to x0=np.linspace(0,1,N). If x is between the 0th and 1st values + of x0, then the color is somewhere in between the first and second color + provided. Default colors are blue at x=0, tan at x=0.5, and red at x=1. + + color_calc(x,x0=None,colors=[[0,0,255,255],[210,180,140,255],[255,0,0,255]]) + """ + + colors=np.array(colors,dtype='uint8') + N=len(colors) + if x0 is None:x0=np.linspace(0,1,N) + x=np.array(x) + if x.min()x0.max(): + print('Warning: x values greater than max(x0) are set to max(x0)') + x[x>x0.max()]=x0.max() + + i=np.digitize(x,x0) + i[i==len(x0)]=len(x0)-1 + + clr=(((x-x0[i-1])*colors[i].T+(x0[i]-x)*colors[i-1].T)/(x0[i]-x0[i-1])).T + + return clr.astype('uint8') + +def get_default_colors(det_num): + """ + Returns in RGBA the default color for a given detector number. + """ + + clr0=plt.rcParams['axes.prop_cycle'].by_key()['color'] + clr=clr0[np.mod(det_num,len(clr0))] + + return np.concatenate((hex_to_rgb(clr),[255])) + + +def hex_to_rgb(value): + """Return (red, green, blue) for the color given as #rrggbb.""" + value = value.lstrip('#') + lv = len(value) + return [int(value[i:i + lv // 3], 16) for i in range(0, lv, lv // 3)] + +def run_chimeraX(mol,disp_mode=None,x=None,chimera_cmds=None,fileout=None,save_opts=None,\ + scene=None,x0=None,marker=None,absval=True,colors=[[255,255,0,255],[255,0,0,255]]): + """ + Opens an instance of chimera, displaying the current pdb (if no pdb exists + in mol, it will also create the pdb). Atoms will be displayed in accordance + with the display mode: + 'bond' : Shows all atoms in the same segment/residues if mol.sel1 + and mol.sel2 contain only one segment/residue. Parameter + encoding only shown on particular bond + 'equiv' : Same display as bond, but parameter encoding will be shown + on all equivalent bonds + 'backbone': Shows only protein backbone, along with parameter encoding + on all atoms in the peptide plane + 'methyl': Shows protein backbone, plus side chains of residues found + in mol.sel1,mol.sel2. Parameter encoding is shown all the + methyl protons, carbon, and the next bonded carbon. + If the display mode is not specified, it will be guessed based on the atoms + in mol.sel1, mol.sel2 + + One may also specify x, a vector with values between 0 and 1, which is + encoded into the radius and color of atoms. The displayed radius is + calculcated from x as 4*x+0.9 Angstroms. + + chimera_cmds: A list of commands to execute in chimera after setup + scene : A saved chimera session to open and execute run_chimera in. + Note: the pdb open in scene must have the same indices as the + pdb in mol.pdb + fileout : File to save chimera image to. + save_opts : Options for file saving + colors : List of colors used for encoding + x0 : Values of x corresponding to each color + marker : Index of selection to color black (for example, for cc plots) + + run_chimera(mol,disp_mode=None,x=None,chimera_cmds=None,fileout=None,save_opts=None,\ + scene=None,x0=None,colors=[[0,0,255,255],[210,180,140,255],[255,0,0,255]]) + """ + + if mol.pdb is None: + mol.MDA2pdb() + pdb=mol.pdb #Get pdb name + + rand_index=np.random.randint(1e6) #We'll tag a random number onto the filename + #This lets us run multiple instances without interference + full_path=get_path('chimera_script{0:06d}.py'.format(rand_index)) #Location to write out chimera script + + + "Here we try to guess the display mode if not given" + if disp_mode is None: + disp_mode=guess_disp_mode(mol) + + di=sel_indices(mol,disp_mode,mode='all') + + + with open(full_path,'w') as f: + + py_line(f,'try:') + py_line(f,run_command(),1) + py_line(f,'import os',1) + py_line(f,'import numpy as np',1) + py_print_npa(f,'di',di,format_str='d',dtype='uint32',nt=1) + + if x is not None: + "Print out matrices required for parameter encoding" + if len(mol.sel1)!=len(x) or len(mol.sel2)!=len(x): + print('The lengths of mol.sel1, mol.sel2, and x (the parameter to be encoded)') + print('must all match') + return + + id0=sel_indices(mol,disp_mode,mode='value') + x=np.concatenate([x0*np.ones(len(i)) for i,x0 in zip(id0,x)]) + id0=np.concatenate([i for i in id0]).astype(int) + ids,b=np.unique(id0,return_index=True) + x=np.array([x[id0[b0]==id0].mean() for b0 in b]) + clrs=color_calc(x=x,x0=x0,colors=colors) + + i=ids!=-1 + ids,x,clrs=ids[i],x[i],clrs[i] + + if marker: + id_mark=sel_indices(mol,disp_mode,mode='value')[marker] + py_print_npa(f,'id_mark',id_mark,format_str='d',dtype='uint32',nt=1) + py_print_npa(f,'ids',ids,format_str='d',dtype='uint32',nt=1) + py_print_npa(f,'r',4*np.abs(x)+0.9,format_str='.6f',dtype='float',nt=1) #Scale up radius ?? + py_print_npa(f,'clr',clrs,format_str='d',dtype='uint8',nt=1) + + if scene: +# WrCC(f,'open '+scene,1) + py_line(f,'mdl=session.open_command.open_data("{0}")[0]'.format(scene),1) + py_line(f,'session.models.add(mdl)',1) + else: +# WrCC(f,"open '"+pdb+"'",1) + py_line(f,'mdl=session.open_command.open_data("{0}")[0]'.format(pdb),1) + py_line(f,'session.models.add(mdl)',1) + WrCC(f,'~display',1) + WrCC(f,'~ribbon',1) + + + #Get the atoms to be displayed + py_line(f,'if len(session.models)>1:',1) + py_line(f,'atoms=session.models[1].atoms',2) + WrCC(f,'display #1.1',2) + py_line(f,'else:',1) + py_line(f,'atoms=session.models[0].atoms',2) + WrCC(f,'display #1',2) + + #Set to ball and stick + + + #Display the correct selection + +# py_line(f,'display=getattr(atoms.atoms,"visibles")',1) +# py_line(f,'display[:]=0',1) +# py_line(f,'display[di]=1',1) +# py_line(f,'setattr(atoms.atoms,"visibles",display)',1) + py_line(f,'hide=getattr(atoms,"hides")',1) + py_line(f,'hide[:]=1',1) + py_line(f,'hide[di]=0',1) + py_line(f,'setattr(atoms,"hides",hide)',1) + + #Parameter encoding + if x is not None: + WrCC(f,'style ball',1) + WrCC(f,'size stickRadius 0.2',1) + WrCC(f,'color all tan',1) + py_line(f,'r0=getattr(atoms,"radii").copy()',1) + py_line(f,'clr0=getattr(atoms,"colors").copy()',1) + py_line(f,'r0[:]=.8',1) + py_line(f,'r0[ids]=r',1) + py_line(f,'clr0[ids]=clr',1) + if marker: + py_line(f,'clr0[id_mark]=[70,70,70,255]',1) + py_line(f,'setattr(atoms,"radii",r0)',1) + py_line(f,'setattr(atoms,"colors",clr0)',1) + + if chimera_cmds is not None: + if isinstance(chimera_cmds,str):chimera_cmds=[chimera_cmds] + for cc in chimera_cmds: + WrCC(f,cc,1) + + if fileout is not None: + if len(fileout)>=4 and fileout[-4]!='.':fileout=fileout+'.png' + if save_opts is None:save_opts='' + WrCC(f,"save " +fileout+' '+save_opts,1) + + py_line(f,'except:') + py_line(f,'pass',1) + py_line(f,'finally:') + py_line(f,'os.remove("{0}")'.format(full_path),1) + if fileout is not None: #Exit if a file is saved + WrCC(f,'exit',1) + copyfile(full_path,full_path[:-9]+'.py') + + os.spawnl(os.P_NOWAIT,chimera_path(),chimera_path(),full_path) +# import subprocess +# subprocess.Popen([chimera_path(),'--start shell',full_path]) + +def molecule_only(mol,disp_mode=None): + """ + Displays the molecule in ChimeraX + """ + + if mol.pdb is None: + mol.MDA2pdb() + pdb=mol.pdb #Get pdb name + + rand_index=np.random.randint(1e6) #We'll tag a random number onto the filename + full_path=get_path('chimera_script{0:06d}.py'.format(rand_index)) #Location to write out chimera script + + "Here we try to guess the display mode if not given" + if disp_mode is None and (mol.sel1 is not None or mol.sel2 is not None): + disp_mode=guess_disp_mode(mol) + + di=sel_indices(mol,disp_mode,mode='all') + else: + di=None + + + with open(full_path,'w') as f: + + py_line(f,'try:') + py_line(f,run_command(),1) + py_line(f,'import os',1) + py_line(f,'import numpy as np',1) + if di is not None: + py_print_npa(f,'di',di,format_str='d',dtype='uint32',nt=1) + + + + WrCC(f,'open '+pdb,1) + WrCC(f,'~display',1) + WrCC(f,'~ribbon',1) + + #Get the atoms to be displayed + py_line(f,'if len(session.models)>1:',1) + py_line(f,'atoms=session.models[1].atoms',2) + WrCC(f,'display #1.1',2) + py_line(f,'else:',1) + py_line(f,'atoms=session.models[0].atoms',2) + WrCC(f,'display #1',2) + + py_line(f,'hide=getattr(atoms,"hides")',1) + py_line(f,'hide[:]=1',1) + py_line(f,'hide[di]=0',1) + py_line(f,'setattr(atoms,"hides",hide)',1) + + py_line(f,'except:') + py_line(f,'pass',1) + py_line(f,'finally:') + py_line(f,'os.remove("{0}")'.format(full_path),1) + + copyfile(full_path,full_path[:-9]+'.py') + + os.spawnl(os.P_NOWAIT,chimera_path(),chimera_path(),full_path) + +def draw_tensors(A,mol=None,sc=2.09,tstep=0,disp_mode=None,index=None,scene=None,\ + fileout=None,save_opts=None,chimera_cmds=None,\ + colors=[[255,100,100,255],[100,100,255,255]],marker=None,\ + marker_color=[[100,255,100,255],[255,255,100,255]],deabg=False,\ + pos=None,frame='inter',vft=None): + """ + Plots tensors onto bonds of a molecule. One must provide the tensors, A, which + are plotted onto a molecule (if molecule object provided), the molecule + object, where mol.sel1 and mol.sel2 define the bonds corresponding to the + elements in A. + + A: Tensors, where input should be 5xN. Tensors should be provided + in the frame of the bond. By default, these are the complex + components of the tensor itself. Alternatively, provide delta, + eta, alpha, beta, and gamma (radians) in a 5xN matrix. Set + deabg to True. + mol: molecule object, with N atoms selected in mol.sel1 and mol.sel2, + and where mol._vft is defined (that is, mol.tensor_frame was + run, and mol.clear_frames has not been executed. mol._vft() should + also return N vectors. We need both these pieces of information + to determine both the orientation of the tensors and their + positions. + If mol not provided, tensors will be simply plotted in space, + instead of on a pdb + + tstep: Time step to use for generating pdb, orienting tensors + index: Only show some tensors. Index can select which, out of A and + mol.sel1, mol.sel2, etc. to use + sel + + frame: Provides the frame that tensors are defined in. By default, this + frame is the frame of the interaction (so, A=[0,0,1,0,0] would + lie along the bond (set frame='inter'). However, one may also + choose the lab frame (set frame ='LF'), for which A=[0,0,1,0,0] + would lie along z. + """ + + "Filenames" + rand_index=np.random.randint(1e6) #We'll tag a random number onto the filenames + #This lets us run multiple instances without interference + full_path=get_path('chimera_script{0:06d}.py'.format(rand_index)) #Location to write out chimera script + tensor_file=get_path('tensors_{0:06d}.txt'.format(rand_index)) + + + A=np.atleast_2d(A) + if A.shape[0]!=5 and A.shape[1]==5: + A=A.T + elif A.shape[0]!=5: + print('A must be 5xN, where N is number of tensors') + + + if mol is not None and (len(mol.sel1)!=len(A[0]) or len(mol.sel2)!=len(A[0])): + print('The lengths of mol.sel1, mol.sel2, and A (the tensor)') + print('must all match') + print('Length of: [mol.sel1, mol.sel1, A]:[{0},{1},{2}]'.format(len(mol.sel1),len(mol.sel2),len(A[0]))) + return + + if index is None: + index=np.ones(len(A[0]),dtype=bool) + else: + index=np.array(index) + if index.max()>1 or (len(index)<3 and len(mol.sel1)>3): #Then probably not a logical index + in0=index + index=np.zeros(len(mol.sel1),dtype=bool) + index[in0]=True + + if marker is None: + marker=np.zeros(len(A[0]),dtype=bool) + else: + marker=np.array(marker) + if marker.max()>1 or (len(marker)<3 and len(marker.sel1)>3): #Then probably not a logical index + in0=marker + marker=np.zeros(len(mol.sel1),dtype=bool) + marker[in0]=True + marker=marker[index] + + "Here we try to guess the display mode if not given" + if mol is not None and disp_mode is None: + disp_mode=guess_disp_mode(mol) + + + if scene is None: + if mol is not None: + if mol.pdb is None: + if disp_mode.lower()=='methyl' or disp_mode.lower()=='backbone': + mol.MDA2pdb(tstep=tstep,select='protein') + else: + resids=np.unique((mol.sel1+mol.sel2).resids) + select='resid' + for r in resids:select+=' {0}'.format(r) + mol.MDA2pdb(tstep=tstep,select=select) + pdb=mol.pdb + else: + pdb=None + + if mol is not None: + di=sel_indices(mol,disp_mode,mode='all') #INdex for which atom to display + +# id0=np.concatenate([i for i in id0]).astype(int) +# ids,b=np.unique(id0,return_index=True) + + "Calculate parameters required for tensor file" + if mol is not None: + mol.mda_object.trajectory[tstep] #Go to the correct time step + + if deabg:A=pars2Spher(*A) #Move to tensor components + + A=[a[index] for a in A] #Index the tensors + if mol is None: + if pos is None: + pos=np.zeros([3,len(A[0])]) + pos[0]=np.arange(len(A[0]))*3 + else: + if frame[0].lower()!='l': + vZ,vXZ=mol._vft() if vft is None else vft() #Get bond directions + scF=getFrame(vZ[:,index],vXZ[:,index]) #Get frame of bond, apply index + A=Rspher(A,*scF) #Apply frame to tensors + + pos=(mol.sel1.positions[index]+mol.sel2.positions[index]).T/2 #Get the positions, along with index + delta,eta,*euler=Spher2pars(A,return_angles=True) + + write_tensor(tensor_file,delta=delta*sc,eta=eta,euler=euler,pos=pos,marker=marker) #Write out the tensor file + + with open(full_path,'w') as f: + py_line(f,'import os') + py_line(f,'import numpy as np') + py_line(f,run_command()) + + copy_funs(f) #Copy required functions into chimeraX script + + py_line(f,'\n') + py_line(f,'try:') + + if mol is not None: + py_print_npa(f,'di',di,format_str='d',dtype='uint32',nt=1) #Print out ids for visualizing + + if scene is not None: + WrCC(f,'open '+scene,1) + elif pdb is not None: + WrCC(f,'open '+pdb,1) + WrCC(f,'~display',1) + WrCC(f,'~ribbon',1) + + #Get the atoms to be displayed + if mol is not None: + py_line(f,'if len(session.models)>1:',1) + py_line(f,'atoms=session.models[1].atoms',2) + WrCC(f,'display #1.1',2) + py_line(f,'else:',1) + py_line(f,'atoms=session.models[0].atoms',2) + WrCC(f,'display #1',2) + + #Display the correct selection + py_line(f,'hide=getattr(atoms,"hides")',1) + py_line(f,'hide[:]=1',1) + py_line(f,'hide[di]=0',1) + py_line(f,'setattr(atoms,"hides",hide)',1) + + WrCC(f,'style stick',1) + + + + negative_color=[int(c) for c in colors[1]] + positive_color=[int(c) for c in colors[0]] + nc=[int(c) for c in marker_color[1]] + pc=[int(c) for c in marker_color[0]] + + py_line(f,('load_surface(session,"{0}",sc={1},theta_steps={2},phi_steps={3},positive_color={4},negative_color={5},'\ + +'marker_pos_color={6},marker_neg_color={7})')\ + .format(tensor_file,sc,50,25,positive_color,negative_color,pc,nc),1) + + WrCC(f,'display',1) + + if chimera_cmds is not None: + if isinstance(chimera_cmds,str):chimera_cmds=[chimera_cmds] + for cmd in chimera_cmds: + WrCC(f,cmd,1) + + + + if fileout is not None: + if len(fileout)<=4 or fileout[-4]!='.':fileout=fileout+'.png' + if save_opts is None:save_opts='' + WrCC(f,"save " +fileout+' '+save_opts,1) + py_line(f,'except:') + py_line(f,'pass',1) + py_line(f,'finally:') + py_line(f,'os.remove("{0}")'.format(full_path),1) + py_line(f,'os.remove("{0}")'.format(tensor_file),1) +# py_line(f,'pass',1) + if fileout is not None: + WrCC(f,'exit',1) + copyfile(full_path,full_path[:-9]+'.py') + copyfile(tensor_file,tensor_file[:-11]+'.txt') + + os.spawnl(os.P_NOWAIT,chimera_path(),chimera_path(),full_path) + + + +def uni2pdb_index(index,pdb_index,report_err=False): + "Converts the universe index to the index for a stored pdb" + "The stored pdb is in molecule.pdb, and the index is in molecule.pdb_in" + + index=np.atleast_1d(index) + + i=-np.ones(np.size(index),dtype=int) + for k,ind in enumerate(index): + if np.any(ind==pdb_index): + i[k]=np.argwhere(ind==pdb_index)[0,0] + elif report_err: + print('Index: {0} not found in pdb_index'.format(ind)) + return i.astype(int) + + +def write_tensor(filename,delta,eta=None,euler=None,pos=None,marker=None): + """ + Writes out a tab-separated file with delta, eta, alpha, beta, gamma, and + x,y,z for tensors. For reading within ChimeraX + + write_tensor(filename,delta,eta=None,euler=None,pos=None,marker=None) + """ + + delta=np.array(delta) + n=delta.size + + #Defaults, make sure all numpy arrays + eta=np.zeros(n) if eta is None else np.array(eta) + euler=np.zeros([3,n]) if euler is None else np.array(euler) + pos=np.zeros([3,n]) if pos is None else np.array(pos) + if marker is None: + marker=np.zeros(n) + else: + if not(hasattr(marker,'__len__')):marker=[marker] + if len(marker)1: + m1=marker + marker=np.zeros(n) + marker[np.array(m1,dtype=int)]=1 + + if len(euler)==3: + alpha,beta,gamma=euler + else: + alpha,beta,gamma=sc2angles(*euler) + X,Y,Z=pos + + + with open(filename,'w') as f: + for vals in zip(delta,eta,alpha,beta,gamma,X,Y,Z,marker): + for v in vals[:-1]:f.write('{0:16.8}\t'.format(v)) + f.write('{0:d}\t'.format(int(vals[-1]))) + f.write('\n') + +def copy_funs(f): + """ + Copys all functions in THIS file below the comment "Files used inside ChimeraX" + + Input is the file handle, f, to which the pythons functions should be copied + + copy_funs(f) + """ + + with open(get_path('chimeraX_funs.py'),'r') as funs: + start_copy=False + for line in funs: + if start_copy: + f.write(line) + else: + if len(line)>=30 and line[:30]=="#%% Files used inside ChimeraX": + start_copy=True + f.write('\n') + +#%% Files used inside ChimeraX (don't edit this comment!!..it will break the code) +""" +Everything after these lines is printed into the chimeraX script, so don't add +anything below that you don't need in chimeraX +""" +def sphere_triangles(theta_steps=100,phi_steps=50): + """ + Creates arrays of theta and phi angles for plotting spherical tensors in ChimeraX. + Also returns the corresponding triangles for creating the surfaces + """ + + theta=np.linspace(0,2*np.pi,theta_steps,endpoint=False).repeat(phi_steps) + phi=np.repeat([np.linspace(0,np.pi,phi_steps,endpoint=True)],theta_steps,axis=0).reshape(theta_steps*phi_steps) + + triangles = [] + for t in range(theta_steps): + for p in range(phi_steps-1): + i = t*phi_steps + p + t1 = (t+1)%theta_steps + i1 = t1*phi_steps + p + triangles.append((i,i+1,i1+1)) + triangles.append((i,i1+1,i1)) + + return theta,phi,triangles + +def spherical_surface(delta,eta=None,euler=None,pos=None,sc=2.09, + theta_steps = 100, + phi_steps = 50, + positive_color = (255,100,100,255), # red, green, blue, alpha, 0-255 + negative_color = (100,100,255,255)): + """ + Function for generating a surface in ChimeraX. delta, eta, and euler angles + should be provided, as well positions for each tensor (length of all arrays + should be the same, that is (N,), (N,), (3,N), (3,N) respectively. + + Returns arrays with the vertices positions (Nx3), the triangles definitions + (list of index triples, Nx3), and a list of colors (Nx4) + + xyz,tri,colors=spherical_surface(delta,eta=None,euler=None,pos=None, + theta_steps=100,phi_steps=50, + positive_color=(255,100,100,255), + negative_color=(100,100,255,255)) + """ + # Compute vertices and vertex colors + a,b,triangles=sphere_triangles(theta_steps,phi_steps) + + if euler is None:euler=[0,0,0] + if pos is None:pos=[0,0,0] + if eta is None:eta=0 + + # Compute r for each set of angles + sc=np.sqrt(2/3)*sc + + A=[-1/2*delta*eta,0,np.sqrt(3/2)*delta,0,-1/2*delta*eta] #Components in PAS + + #0 component after rotation by a and b + A0=np.array([A[mp+2]*d2(b,m=0,mp=mp)*np.exp(1j*mp*a) for mp in range(-2,3)]).sum(axis=0).real + + #Coordinates before rotation by alpha, beta, gamma + x0=np.cos(a)*np.sin(b)*np.abs(A0)*sc/2 + y0=np.sin(a)*np.sin(b)*np.abs(A0)*sc/2 + z0=np.cos(b)*np.abs(A0)*sc/2 + + alpha,beta,gamma=euler + alpha,beta,gamma=-alpha,-beta,-gamma #Added 30.09.21 along with edits to vf_tools>R2euler + #Rotate by alpha + x1,y1,z1=x0*np.cos(alpha)+y0*np.sin(alpha),-x0*np.sin(alpha)+y0*np.cos(alpha),z0 + #Rotate by beta + x2,y2,z2=x1*np.cos(beta)-z1*np.sin(beta),y1,np.sin(beta)*x1+np.cos(beta)*z1 + #Rotate by gamma + x,y,z=x2*np.cos(gamma)+y2*np.sin(gamma),-x2*np.sin(gamma)+y2*np.cos(gamma),z2 + + x=x+pos[0] + y=y+pos[1] + z=z+pos[2] + +# xyz=[[x0,y0,z0] for x0,y0,z0 in zip(x,y,z)] + #Determine colors + colors=np.zeros([A0.size,4],np.uint8) + colors[A0>=0]=positive_color + colors[A0<0]=negative_color + + + # Create numpy arrays +# xyz = np.array(xyz, np.float32) + xyz=np.ascontiguousarray(np.array([x,y,z]).T,np.float32) #ascontiguousarray forces a transpose in memory- not just editing the stride + colors = np.array(colors, np.uint8) + tri = np.array(triangles, np.int32) + + return xyz,tri,colors + + +def load_tensor(filename): + """ + Reads in a tab-separated file with delta, eta, alpha,beta, gamma, and x,y,z + for a set of tensors. + + delta,eta,euler,pos=load_tensor(filename) + """ + delta=list() + eta=list() + alpha=list() + beta=list() + gamma=list() + x=list() + y=list() + z=list() + marker=list() + with open(filename,'r') as f: + for line in f: + out=line.strip().split('\t') + out=[np.array(o,float) for o in out] + delta.append(out[0]) + eta.append(out[1]) + alpha.append(out[2]) + beta.append(out[3]) + gamma.append(out[4]) + x.append(out[5]) + y.append(out[6]) + z.append(out[7]) + marker.append(out[8]) + + delta=np.array(delta) + eta=np.array(eta) + euler=np.array([alpha,beta,gamma]).T + pos=np.array([x,y,z]).T + marker=np.array(marker) + + return delta,eta,euler,pos,marker + + + +def load_surface(session,tensor_file,sc=2.09,theta_steps=100,phi_steps=50, + positive_color=(255,100,100,255),negative_color=(100,100,255,255), + marker_pos_color=(100,255,100,255),marker_neg_color=(255,255,100,255)): + + Delta,Eta,Euler,Pos,Marker=load_tensor(tensor_file) + + from chimerax.core.models import Surface + from chimerax.surface import calculate_vertex_normals,combine_geometry_vntc + + geom=list() + + for k,(delta,eta,euler,pos,marker) in enumerate(zip(Delta,Eta,Euler,Pos,Marker)): + if marker==1: + pc=marker_pos_color + nc=marker_neg_color + else: + pc=positive_color + nc=negative_color + xyz,tri,colors=spherical_surface(\ + delta=delta,eta=eta,euler=euler,pos=pos,\ + sc=sc,theta_steps=theta_steps,\ + phi_steps=phi_steps,\ + positive_color=pc,\ + negative_color=nc) + + norm_vecs=calculate_vertex_normals(xyz,tri) + + geom.append((xyz,norm_vecs,tri,colors)) + + xyz,norm_vecs,tri,colors=combine_geometry_vntc(geom) + s = Surface('surface',session) + s.set_geometry(xyz,norm_vecs,tri) + s.vertex_colors = colors + session.models.add([s]) + + return s + + +def d2(c=0,s=None,m=None,mp=0): + """ + Calculates components of the d2 matrix. By default only calculates the components + starting at m=0 and returns five components, from -2,-1,0,1,2. One may also + edit the starting component and select a specific final component + (mp=None returns all components, whereas mp may be specified between -2 and 2) + + d2_m_mp=d2(m,mp,c,s) #c and s are the cosine and sine of the desired beta angle + + or + + d2_m_mp=d2(m,mp,beta) #Give the angle directly + + Setting mp to None will return all values for mp in a 2D array + + (Note that m is the final index) + """ + + if s is None: + c,s=np.cos(c),np.sin(c) + + """ + Here we define each of the components as functions. We'll collect these into + an array, and then call them out with the m and mp indices + """ + "First, for m=-2" + + if m is None or mp is None: + if m is None and mp is None: + print('m or mp must be specified') + return + elif m is None: + if mp==-2: + index=range(0,5) + elif mp==-1: + index=range(5,10) + elif mp==0: + index=range(10,15) + elif mp==1: + index=range(15,20) + elif mp==2: + index=range(20,25) + elif mp is None: + if m==-2: + index=range(0,25,5) + elif m==-1: + index=range(1,25,5) + elif m==0: + index=range(2,25,5) + elif m==1: + index=range(3,25,5) + elif m==2: + index=range(4,25,5) + else: + index=[(mp+2)*5+(m+2)] + + out=list() + for i in index: + #mp=-2 + if i==0:x=0.25*(1+c)**2 + if i==1:x=0.5*(1+c)*s + if i==2:x=np.sqrt(3/8)*s**2 + if i==3:x=0.5*(1-c)*s + if i==4:x=0.25*(1-c)**2 + #mp=-1 + if i==5:x=-0.5*(1+c)*s + if i==6:x=c**2-0.5*(1-c) + if i==7:x=np.sqrt(3/8)*2*c*s + if i==8:x=0.5*(1+c)-c**2 + if i==9:x=0.5*(1-c)*s + #mp=0 + if i==10:x=np.sqrt(3/8)*s**2 + if i==11:x=-np.sqrt(3/8)*2*s*c + if i==12:x=0.5*(3*c**2-1) + if i==13:x=np.sqrt(3/8)*2*s*c + if i==14:x=np.sqrt(3/8)*s**2 + #mp=1 + if i==15:x=-0.5*(1-c)*s + if i==16:x=0.5*(1+c)-c**2 + if i==17:x=-np.sqrt(3/8)*2*s*c + if i==18:x=c**2-0.5*(1-c) + if i==19:x=0.5*(1+c)*s + #mp=2 + if i==20:x=0.25*(1-c)**2 + if i==21:x=-0.5*(1-c)*s + if i==22:x=np.sqrt(3/8)*s**2 + if i==23:x=-0.5*(1+c)*s + if i==24:x=0.25*(1+c)**2 + out.append(x) + + if m is None or mp is None: + return np.array(out) + else: + return out[0] diff --git a/pyDIFRATE/data/.DS_Store b/pyDIFRATE/data/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..5008ddfcf53c02e82d7eee2e57c38e5672ef89f6 GIT binary patch literal 6148 zcmeH~Jr2S!425mzP>H1@V-^m;4Wg<&0T*E43hX&L&p$$qDprKhvt+--jT7}7np#A3 zem<@ulZcFPQ@L2!n>{z**++&mCkOWA81W14cNZlEfg7;MkzE(HCqgga^y>{tEnwC%0;vJ&^%eQ zLs35+`xjp>T0. + + +Questions, contact me at: +albert.smith-penzel@medizin.uni-leipzig.de + +Created on Wed Jul 31 16:37:08 2019 + +@author: albertsmith +""" + +import pickle +#from data import data_class +#from data.data_class import data + +def save_bin(filename,obj): + """ + |save_bin saves a python object. + | + |save_bin(filename,obj) + | + |Fails if that object contains an MDanalysis object + """ + + with open(filename,'wb') as f: + pickle.dump(obj,f) + +def load_bin(filename): + """ + |Loads a python object + | + |obj = load_bin(filename) + | + |If object saved with save_DIFRATE, reload with load_DIFRATE + """ + with open(filename,'rb') as f: + obj=pickle.load(f) + + return obj + + +def save_DIFRATE(filename,obj): + """ + |save_DIFRATE saves a DIFRATE object. + | + |save_DIFRATE(filename,obj) + | + |Deletes the MDanalysis object before saving- this object otherwise creates + |a pickling error + """ + + """Note- I don't understand why this function is necessary. The MDAnalysis + universe exists in the atom selections, and can be recovered from these. + Nonetheless, pickling fails if we don't first remove the saved universe. + """ + + if hasattr(obj,'copy'): + obj=obj.copy() + + if hasattr(obj,'sens') and hasattr(obj,'detect'): + if obj.sens is not None and obj.sens.molecule is not None: + obj.sens.molecule.del_MDA_object() + if obj.detect is not None and obj.detect.molecule is not None: + obj.detect.molecule.del_MDA_object() + elif hasattr(obj,'molecule'): + obj.molecule.del_MDA_object() + elif hasattr(obj,'mda_object'): + obj.del_MDA_object() + + save_bin(filename,obj) + + if hasattr(obj,'sens') and hasattr(obj,'detect'): + if obj.sens is not None and obj.sens.molecule is not None: + obj.sens.molecule.reload_MDA() + if obj.detect is not None and obj.detect.molecule is not None: + obj.detect.molecule.reload_MDA() + elif hasattr(obj,'molecule'): + obj.molecule.reload_MDA() + elif hasattr(obj,'mda_object'): + obj.reload_MDA() + + +def load_DIFRATE(filename): + """ + |load_DIFRATE loads a DIFRATE object from a file + | + |obj = load_DIFRATE(filename) + | + |Replaces the mda_object in the various DIFRATE objects + """ + + obj=load_bin(filename) + + + if hasattr(obj,'sens') and hasattr(obj,'detect'): + if obj.sens is not None and obj.sens.molecule is not None and obj.sens.molecule.sel1 is not None: + obj.sens.molecule.mda_object=obj.sens.molecule.sel1.universe + if obj.detect is not None and obj.detect.molecule is not None and obj.detect.molecule.sel1 is not None: + obj.detect.molecule.mda_object=obj.detect.molecule.sel1.universe + elif hasattr(obj,'molecule') and obj.molecule.sel1 is not None: + obj.molecule.mda_object=obj.molecule.sel1.universe + elif hasattr(obj,'mda_object') and obj.sel1 is not None: + obj.mda_object=obj.sel1.universe + + return obj \ No newline at end of file diff --git a/pyDIFRATE/data/data_class.py b/pyDIFRATE/data/data_class.py new file mode 100755 index 0000000..1722925 --- /dev/null +++ b/pyDIFRATE/data/data_class.py @@ -0,0 +1,791 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Copyright 2021 Albert Smith-Penzel + +This file is part of Frames Theory Archive (FTA). + +FTA is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +FTA is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with FTA. If not, see . + + +Questions, contact me at: +albert.smith-penzel@medizin.uni-leipzig.de + +Created on Tue May 7 16:51:28 2019 + +@author: albertsmith +""" + +import numpy as np +import pandas as pd +from pyDIFRATE.r_class.Ctsens import Ct +from pyDIFRATE.r_class.detectors import detect +from pyDIFRATE.chimera.chimeraX_funs import run_chimeraX,get_default_colors +import pyDIFRATE.plots.plotting_funs as pf +from pyDIFRATE.data.fitting import fit_data +from pyDIFRATE.data.bin_in_out import save_DIFRATE +import copy + +class data(object): +#%% Basic container for experimental and MD data + def __init__(self,**kwargs): + + self.vars=dict() #Location for extra variables by name + + self.label=None + self.chi2=None + + self.R=None + self.R_std=None + self.R_u=None + self.R_l=None + self.conf=0.68 + + self.S2=None + self._S2=None #Hidden location for S2 calc in case we don't include it + self.S2_std=None + self.tot_cc=None + self.tot_cc_norm=None + + self.Rcc=list() + self.Rcc_norm=list() + + self.Rin=None + self.Rin_std=None + self.Rc=None + + self.S2in=None + self.S2in_std=None + self.S2c=None + + self.ired=None + self.type=None + + self.sens=None + self.detect=None + + + + self.load(**kwargs) +#%% Some options for loading in data + def load(self,subS2=False,**kwargs): + EstErr=False #Default don't estimate error. This is overridden for 'Ct' + "Load in correlation functions from an iRED calculation" + if 'iRED' in kwargs: + ired=kwargs['iRED'] + self.ired=ired + self.R=ired['DelCt'] + del ired['DelCt'] + nt=ired['t'].size + + if subS2: + self.sens=Ct(t=ired['t'],S2=None,**kwargs) + else: + self.sens=Ct(t=ired['t'],**kwargs) + + if 'N' in self.ired: + stdev=1/np.sqrt(self.ired['N']) + stdev[0]=1e-6 + self.sens.info.loc['stdev']=stdev + self.R_std=np.repeat([stdev],self.R.shape[0],axis=0) + else: + norm=1/(self.ired.get('Ct')[:,0]-self.ired.get('CtInf')) + norm=np.transpose([norm/norm[0:-(self.ired.get('rank')*2+1)].mean()]) + + self.R_std=np.dot(norm,[self.sens.info.loc['stdev']]) + + elif 'Ct' in kwargs: + EstErr=False #Default estimate error for correlation functions + ct=kwargs.get('Ct') + self.R=ct.get('Ct') + + + if not(subS2) and 'S2' in kwargs: + "Removes the S2 correction" + self._S2=kwargs.pop('S2') + + self.sens=Ct(t=ct.get('t'),**kwargs) + nt=ct['t'].size + if 'R_std' in ct: + self.R_std=ct['R_std'] + EstErr=False + elif 'N' in ct: + stdev=1/np.sqrt(ct['N']) + stdev[0]=1e-6 + self.sens.info.loc['stdev']=stdev + self.new_detect() + self.R_std=np.repeat([stdev],self.R.shape[0],axis=0) + else: + self.R_std=np.repeat([self.sens.info.loc['stdev']],self.R.shape[0],axis=0) + if 'S2' in kwargs: + self.S2=kwargs.get('S2') + + + if 'EstErr' in kwargs: + if kwargs.get('EstErr')[0].lower()=='n': + EstErr=False + elif kwargs.get('EstErr')[0].lower()=='y': + EstErr=True + + if self.sens is not None: + self.detect=detect(self.sens) + + if EstErr: + try: + self.detect.r_no_opt(np.min([15,nt])) + fit0=self.fit() +# plt.plot(self.sens.t(),self.R[0,:]) +# plt.plot(self.sens.t(),fit0.Rc[0,:]) + self.R_std=np.sqrt((1/nt)*(np.atleast_2d(fit0.chi)*self.R_std.T**2)).T + # self.R_std[:,0]=self.R_std.min()/1e3 + self.sens.info.loc['stdev']=np.median(self.R_std,axis=0) + self.detect=detect(self.sens) + except: + print('Warning: Error estimation failed') + + + if 'molecule' in kwargs: + mol=kwargs.get('molecule') + if self is not None: + self.sens.molecule=mol + self.detect.molecule=mol + if self.sens.molecule.sel1 is not None and self.sens.molecule.sel2 is not None: + self.sens.molecule.set_selection() + self.label=mol.label + + def new_detect(self,mdl_num=None,sens=None,exp_num=None): + """ + Creates a new detector object. Usually for updating the detectors after + a new model has been created (Typically, we create the data object + with the correct sensitivity object already in place, such that it + doesn't make sens to update the detectors unless some model of motion + is changed. However, if this is not the case, then the detector object + may also need to be updated) + + data.detect(mdl_num=None,sens=None,exp_num=None) + + mdl_num and exp_num should either be the same length or mdl_num should + just have one element + """ + + if sens is not None: + self.detect=detect(sens,exp_num=exp_num,mdl_num=mdl_num) + else: + self.detect=detect(self.sens,exp_num=exp_num,mdl_num=mdl_num) + +#%% Option for deleting experiments + def del_exp(self,exp_num): + """ + |Deletes an experiment or experiments + |obj.del_exp(exp_num) + | + |Note that this method will automatically delete the detector object, + |since it is no longer valid after deletion of an experiment. Add it back + |with obj.new_detect() + """ + + if hasattr(exp_num,'__len__'): + exp_num=np.atleast_1d(exp_num) + exp_num[::-1].sort() + for m in exp_num: + self.del_exp(m) + else: + if self.R is not None and self.R.shape[1]>exp_num: + self.R=np.delete(self.R,exp_num,axis=1) + if self.R_std is not None: + self.R_std=np.delete(self.R_std,exp_num,axis=1) + if self.R_u is not None: + self.R_u=np.delete(self.R_u,exp_num,axis=1) + if self.R_l is not None: + self.R_l=np.delete(self.R_l,exp_num,axis=1) + + if self.sens is not None: + self.sens.del_exp(exp_num) + self.new_detect() #Detectors are no longer valid, and so are reset here + else: + print('Warning: exp_num {0} was not found'.format(exp_num)) + + +#%% Option for deleting a data point or points + def del_data_pt(self,index): + """ + Deletes a particular residue number (or list of number), given their + index (or indices). Deletes values out of R, R_std, R_l, R_u, Rc, Rin, + and Rin_std. + + obj.del_data_pt(index) + + Warning: This will not edit the selections in the sensitivity's molecule + object, or delete bond-specific sensitivities + """ + + if np.size(index)>1: + index=np.atleast_1d(index) + index[::-1].sort() + for m in index: + self.del_data_pt(m) + else: + if index>=self.R.shape[0]: + print('Warning: index of {0} is greater than or equal to the number of data points ({1})'.format(index,self.R.shape[0])) + return + attr=['R','R_l','R_u','R_std','Rc','Rin','Rin_std','label',\ + 'S2','S2_std','S2c','S2in','S2in_std'] + for at in attr: + if hasattr(self,at): + x=getattr(self,at) + if x is not None: + setattr(self,at,np.delete(x,index,axis=0)) + +#%% Run fit_data from the object + def fit(self,detect=None,**kwargs): + if detect is None: + detect=self.detect + + return fit_data(self,detect,**kwargs) + +#%% Convert iRED data types into normal detector responses and cross-correlation matrices + def iRED2rho(self,mode_index=None): + """ + Convert the fitting of iRED data to the auto-correlated detectors and + cross correlation matrices. By default, omits the last 2*rank+1 modes + (3 or 5 modes). Alternatively, the user may provide a boolean array with + size equal to the number of modes, or a list of the modes to be used + + fit=fit0.iRED2rho() + + or + + fit=fit0.iRED2rho(mode_index) + """ + if self.ired is None or not isinstance(self.sens,detect): + print('Function only applicable to iRED-derived detector responses') + return + + out=data() + + nd=self.sens.rhoz(bond=0).shape[0] + nb=self.R.shape[0] + + rank=self.ired.get('rank') + ne=2*rank+1 + + if mode_index is None: + mode_index=np.ones(nb,dtype=bool) + mode_index[-ne:]=False + else: + if len(mode_index)==self.ired['M'].shape[0]: + mode_index=np.array(mode_index,dtype=bool) + else: + mo=mode_index.copy() + mode_index=np.zeros(nb,dtype=bool) + mode_index[mo]=True + + +# if self.sens.molecule.sel1in is not None: +# nb0=np.size(self.sens.molecule.sel1in) +# elif self.sens.molecule.sel2in is not None: +# nb0=np.size(self.sens.molecule.sel2in) +# else: +# nb0=self.sens.molecule.sel1.n_atoms + nb0=self.R.shape[0]-self.ired['n_added_vecs'] + + out.R=np.zeros([nb0,nd]) + out.R_std=np.zeros([nb0,nd]) + out.R_l=np.zeros([nb0,nd]) + out.R_u=np.zeros([nb0,nd]) + + for k in range(0,nd): + lambda_rho=np.repeat([self.ired.get('lambda')[mode_index]*self.R[mode_index,k]],nb0,axis=0) + out.R[:,k]=np.sum(lambda_rho*self.ired.get('m')[0:nb0,mode_index]**2,axis=1) + + lambda_rho=np.repeat([self.ired.get('lambda')[mode_index]*self.R_std[mode_index,k]],nb0,axis=0) + out.R_std[:,k]=np.sqrt(np.sum(lambda_rho*self.ired.get('m')[0:nb0,mode_index]**2,axis=1)) + + lambda_rho=np.repeat([self.ired.get('lambda')[mode_index]*self.R_l[mode_index,k]],nb0,axis=0) + out.R_l[:,k]=np.sqrt(np.sum(lambda_rho*self.ired.get('m')[0:nb0,mode_index]**2,axis=1)) + + lambda_rho=np.repeat([self.ired.get('lambda')[mode_index]*self.R_u[mode_index,k]],nb0,axis=0) + out.R_u[:,k]=np.sqrt(np.sum(lambda_rho*self.ired.get('m')[0:nb0,mode_index]**2,axis=1)) + + + out.sens=self.sens + + "Pre-allocate nd matrices for the cross-correlation calculations" + out.tot_cc=np.zeros([nb0,nb0]) + for k in range(0,nd): + out.Rcc.append(np.zeros([nb0,nb0])) + "Loop over all eigenmodes" + for k in np.argwhere(mode_index).squeeze(): #We only use the user-specified modes (or by default, leave at last 2*rank+1 modes) + m=self.ired.get('m')[0:nb0,k] + mat=self.ired.get('lambda')[k]*np.dot(np.transpose([m]),[m]) + "Calculate total correlation" + out.tot_cc+=mat + "Loop over all detectors" + for m in range(0,nd): + out.Rcc[m]+=mat*self.R[k,m] + + "Calculate the normalized correlation" + dg=np.sqrt([np.diag(out.tot_cc)]) + out.tot_cc_norm=out.tot_cc/np.dot(np.transpose(dg),dg) + for k in range(0,nd): + dg=np.sqrt([np.diag(out.Rcc[k])]) + out.Rcc_norm.append(out.Rcc[k]/np.dot(np.transpose(dg),dg)) + + if self.label is not None: + out.label=self.label + elif self.sens is not None and np.size(self.sens.molecule.label)!=0: + out.label=self.sens.molecule.label + + "Calculate the order parameters" + + lda=np.repeat([self.ired.get('lambda')[0:-ne]],nb0,axis=0) + out.S2=1-np.sum(lda*self.ired.get('m')[0:nb0,0:-ne]**2,axis=1) + + return out + + def plot_rho(self,fig=None,plot_sens=True,index=None,rho_index=None,errorbars=False,style='plot',**kwargs): + """ + Plots the full series of detector responses + Arguments: + fig: Specify which figure to plot into + plot_sens (True/False): Plot the sensitivity at the top of the figure + index: Specify which residues to plot + rho_index: Specify which detectors to plot + errobars (True/False): Display error bars + style ('p'/'s'/'b'): Plot style (line plot, scatter plot, bar plot) + **kwargs: Plotting arguments (passed to plotting functions) + """ + return pf.plot_rho_series(self,fig,plot_sens,index,rho_index,errorbars,style,**kwargs) + + def plot_cc(self,det_num,cutoff=None,ax=None,norm=True,index=None,**kwargs): + if np.size(self.Rcc)==0: + print('Data object does not contain cross-correlation data') + print('First, create a data object from iRED analysis (data=iRED2data(...))') + print('Then, analyze with detectors, data.r_auto(...);fit0=data.fit(...)') + print('Finally, convert fit into normal detector responses, fit=fit0.iRED2rho()') + return + + if det_num is None: + x=self.tot_cc + else: + x=self.Rcc[det_num] + if index is None: + index=np.arange(x.shape[0]) + + ax=pf.plot_cc(x,self.label,ax,norm,index,**kwargs) + if det_num is None: + ax.set_title('Total cross correlation') + else: + ax.set_title(r'Cross correlation for $\rho_{' + '{}'.format(det_num) + '}$') + + return ax + + def plot_fit(self,errorbars=True,index=None,exp_index=None,fig=None,style='log',**kwargs): + """ + Plots the fit quality of the input data. This produces bar plots with + errorbars for the input data and scatter points for the fit, in the case + of experimental data. If correlation functions are being fit, then + line plots (without errorbars) are used (specify style as log or linear, + log is default) + + One may specify the residue index and also the index of experiments to + be plotted + + plot_fit(errorbars=True,index=None,exp_index=None,fig=None,ax=None) + """ + + "Return if data missing" + if self.Rc is None: + print('data object is not a fit or calculated values are not stored') + return + + info=self.sens.info_in + + if info is None or 't' in info.index.values: + if info is None: + t=np.arange(1,self.Rin.shape[1]+1) + else: + t=info.loc['t'].to_numpy() + ax=pf.plot_all_Ct(t,Ct=self.Rin,Ct_fit=self.Rc,lbl=self.label,index=index,fig=fig,style=style,**kwargs) + else: + if self.S2c is not None: + Rin=np.concatenate((self.Rin,np.atleast_2d(self.S2in).T),1) + Rin_std=np.concatenate((self.Rin_std,np.atleast_2d(self.S2in_std).T),1) + Rc=np.concatenate((self.Rc,np.atleast_2d(self.S2c).T),1) + info0=info[0].copy() + for a,b in info0.items(): + info0[a]='' if isinstance(b,str) else 0 + info0['Type']='S2' + info=pd.concat((info,info0),1,ignore_index=True) + + else: + Rin,Rin_std,Rc=self.Rin,self.Rin_std,self.Rc + if not(errorbars):Rin_std=None + + ax=pf.plot_fit(self.label,Rin,Rc,Rin_std,info,index,exp_index,fig) + + return ax + + def draw_cc3D(self,bond,det_num=None,index=None,scaling=1,norm=True,absval=True,\ + disp_mode=None,chimera_cmds=None,fileout=None,save_opts=None,\ + scene=None,x0=None,colors=None): + + if self.label is None: + print('User has not defined any bond labels, bond will now refer to the absolute index') + assert bond2: + index=np.array(index,dtype=bool) + else: + index=np.array(index,dtype=int) + s1,s2=mol.sel1.copy(),mol.sel2.copy() + mol.sel1,mol.sel2=mol.sel1[index],mol.sel2[index] + x=x[index] + i=np.argwhere((self.label[index]==bond) if self.label is not None else (i==index))[0,0] + + x*=scaling + + run_chimeraX(mol=mol,disp_mode=disp_mode,x=x,chimera_cmds=chimera_cmds,\ + fileout=fileout,save_opts=save_opts,scene=scene,x0=x0, + colors=colors,marker=i) + if index is not None:mol.sel1,mol.sel2=s1,s2 + +# def draw_cc3D(self,bond,det_num=None,chain=None,fileout=None,scaling=None,norm='y',**kwargs): +# "bond is the user-defined label! Not the absolute index..." +# +# if self.label is None: +# print('User has not defined any bond labels, bond will now refer to the absolute index') +# index=bond +# elif any(np.atleast_1d(self.label)==bond): +# index=np.where(np.array(self.label)==bond)[0][0] +# else: +# print('Invalid bond selection') +# return +# +# if norm.lower()[0]=='y': +# if det_num is None: +# values=self.tot_cc_norm[index,:] +# else: +# values=self.Rcc_norm[det_num][index,:] +# else: +# if det_num is None: +# values=self.tot_cc[index,:] +# else: +# values=self.Rcc[det_num][index,:] +# +# "Take absolute value- I'm not convinced about this yet..." +# values=np.abs(values) +# +# if scaling is None: +## "Default is to scale to the maximum of all correlations" +## scale0=0 +## for k in range(0,np.shape(self.Rcc_norm)[0]): +## a=self.Rcc_norm[k]-np.eye(np.shape(self.Rcc_norm)[1]) +## scale0=np.max([scale0,np.max(np.abs(a))]) +# if norm.lower()[0]=='y': +## if det_num is None: +## scale0=np.max(np.abs(self.tot_cc_norm)-np.eye(np.shape(self.tot_cc_norm)[0])) +## else: +## scale0=np.max(np.abs(self.Rcc_norm[det_num]-np.eye(np.shape(self.Rcc_norm)[1]))) +# scale0=1 +# else: +# scale0=np.max(np.abs(values)) +# scaling=1/scale0 +# +# res1=self.sens.molecule.sel1.resids +# chain1=self.sens.molecule.sel1.segids +# res2=self.sens.molecule.sel2.resids +# chain2=self.sens.molecule.sel2.segids +# +# color_scheme=kwargs.pop('color_scheme') if 'color_scheme' in kwargs else 'blue' +# +# if np.all(self.sens.molecule.sel1.names=='N') or np.all(self.sens.molecule.sel2.names=='N') and\ +# np.all(res1==res2) and np.all(chain1==chain2): +# style='pp' +# else: +# style='bond' +# +# if style=='pp': +# "Color the whole peptide plane one color" +# resi=res1 +# chain=chain1 +# plt_cc3D(self.sens.molecule,resi,values,resi0=bond,chain=chain,chain0=chain[index],\ +# fileout=fileout,scaling=scaling,color_scheme=color_scheme,style=style,**kwargs) +# else: +# "Color the individual bonds specified in the molecule selections" +# "I'm not sure the indexing of resi0 is correct here!!!" +# plt_cc3D(self.sens.molecule,None,values,resi0=index,scaling=scaling,color_scheme=color_scheme,style=style,**kwargs) +# """I'm going in circles here for some reason. Just switched resi0=res[index] +# to resi0=index. plot_cc in 'bond' mode expects the index found in molecule.sel1 +# and molecule.sel2. So this seems like it should be correct...but let's see +# if it glitches again for lipids""" +## print('Selections over multiple residues/chains- not currently implemented') +# + + +# def draw_rho3D(self,det_num=None,resi=None,fileout=None,scaling=None,**kwargs): +# +# if det_num is None: +# values=1-self.S2 +# else: +# values=self.R[:,det_num] +# +# +# res1=self.sens.molecule.sel1.resids +# chain1=self.sens.molecule.sel1.segids +# res2=self.sens.molecule.sel2.resids +# chain2=self.sens.molecule.sel2.segids +# +# +# if np.size(res1)==np.size(res2) and (np.all(res1==res2) and np.all(chain1==chain2)): +# resi0=resi +# resi=res1 +# chain=chain1 +## chain[chain=='PROA']='p' +# +# +# +# if resi0 is not None: +# index=np.in1d(resi,resi0) +# resi=resi[index] +# chain=chain[index] +# values=values[index] +# +# if scaling is None: +# scale0=np.max(values) +# scaling=1/scale0 +# +# plot_rho(self.sens.molecule,resi,values,chain=chain,\ +# fileout=fileout,scaling=scaling,**kwargs) +# +# else: +# if scaling is None: +# scale0=np.max(values) +# scaling=1/scale0 +# +# plot_rho(self.sens.molecule,None,values,scaling=scaling,**kwargs) +## print('Selections over multiple residues/chains- not currently implemented') + + def draw_rho3D(self,det_num=None,index=None,scaling=1,disp_mode=None,\ + chimera_cmds=None,fileout=None,save_opts=None,\ + scene=None,x0=None,colors=None): + + if colors is None: + if det_num is None: + colors=[[255,255,0,255],[255,0,0,255]] + else: + clr=get_default_colors(det_num) + colors=[np.array([210,180,140,255]),clr] + + + if det_num is None: + x=1-self.S2.copy() + else: + x=self.R[:,det_num].copy() + + mol=self.sens.molecule + if index is not None: + if np.max(index)==1 and len(index)>2: + index=np.array(index,dtype=bool) + else: + index=np.array(index,dtype=int) + s1,s2=mol.sel1,mol.sel2 + mol.sel1=mol.sel1[index] + mol.sel2=mol.sel2[index] + x=x[index] + + x*=scaling + x[x<0]=0 + + run_chimeraX(mol=mol,disp_mode=disp_mode,x=x,chimera_cmds=chimera_cmds,\ + fileout=fileout,save_opts=save_opts,scene=scene,x0=x0, + colors=colors) + if index is not None:mol.sel1,mol.sel2=s1,s2 + + def draw_mode(self,mode_num=None,resi=None,fileout=None,scaling=None,**kwargs): + + + values=self.ired['m'][mode_num] +# values=values/np.abs(values).max() + if self.ired['n_added_vecs']!=0: + values=values[0:-self.ired['n_added_vecs']] + + res1=self.sens.molecule.sel1.resids + chain1=self.sens.molecule.sel1.segids + res2=self.sens.molecule.sel2.resids + chain2=self.sens.molecule.sel2.segids + + + if np.size(res1)==np.size(res2) and (np.all(res1==res2) and np.all(chain1==chain2)): + resi0=resi + resi=res1 + chain=chain1 +# chain[chain=='PROA']='p' + + + + if resi0 is not None: + index=np.in1d(resi,resi0) + resi=resi[index] + chain=chain[index] + values=values[index] + + if scaling is None: + scale0=np.max(values) + scaling=1/scale0 + + plot_rho(self.sens.molecule,resi,values,chain=chain,\ + fileout=fileout,scaling=scaling,color_scheme='rb',**kwargs) + + else: + if scaling is None: + scale0=np.max(values) + scaling=1/scale0 + + plot_rho(self.sens.molecule,None,values,scaling=scaling,color_scheme='rb',**kwargs) +# print('Selections over multiple residues/chains- not currently implemented') + + def save(self,filename): + """ + |Save data to filename + |self.save(filename) + """ + save_DIFRATE(filename,self) + + def copy(self,type='deep'): + """ + | + |Returns a copy of the object. Default is deep copy (all objects except the molecule object) + | obj = obj0.copy(type='deep') + |To also create a copy of the molecule object, set type='ddeep' + |To do a shallow copy, set type='shallow' + """ + if type=='ddeep': + out=copy.deepcopy(self) + elif type!='deep': + out=copy.copy(self) + else: + if self.sens is not None and self.detect is not None: + mol=self.sens.molecule + self.sens.molecule=None + self.detect.molecule=None + out=copy.deepcopy(self) + self.sens.molecule=mol + self.detect.molecule=mol + out.sens.molecule=mol + out.detect.molecule=mol + elif self.sens is not None: + mol=self.sens.molecule + self.sens.molecule=None + out=copy.deepcopy(self) + self.sens.molecule=mol + out.sens.molecule=mol + else: + out=copy.deepcopy(self) + + return out + + def print2text(self,filename,variables=['label','R','R_std'],precision=4): + """ + Prints data to a text file, specified by filename. The user may specify + which variables to print (default: label, R, and R_std) + """ + form='{0:.{1}f}' + + with open(filename,'w') as f: + f.write('data') + for v in variables: + f.write('\n'+v) + X=np.array(getattr(self,v)) + if X.ndim==1: + sz0=np.size(X) + if isinstance(X[0],str): + for k in range(sz0): + f.write('\n'+X[k]) + else: + for k in range(sz0): + f.write('\n'+form.format(X[k],precision)) + elif X.ndim==2: + sz0,sz1=np.shape(X) + for k in range(sz0): + for m in range(sz1): + if m==0: + f.write('\n') + else: + f.write('\t') + f.write(form.format(X[k,m],precision)) + f.write('\n') + + + +# f.write('\nlabel') +# sz0=np.size(self.label) +# for k in range(sz0): +# f.write('\n{0}'.format(self.label[k])) +# f.write('\nR') +# sz0,sz1=self.R.shape +# for k in range(sz0): +# for m in range(sz1): +# if m==0: +# f.write('\n') +# else: +# f.write('\t') +# f.write(form.format(self.R[k,m],precision)) +# f.write('\nRstd') +# for k in range(sz0): +# for m in range(sz1): +# if m==0: +# f.write('\n') +# else: +# f.write('\t') +# f.write(form.format(self.R_std[k,m],precision)) +# if conf[0].lower()=='y' and self.R_l is not None: +# f.write('\nR_l, conf={0}'.format(self.conf)) +# for k in range(sz0): +# for m in range(sz1): +# if m==0: +# f.write('\n') +# else: +# f.write('\t') +# f.write(form.format(self.R_l[k,m],precision)) +# f.write('\nR_u, conf={0}'.format(self.conf)) +# for k in range(sz0): +# for m in range(sz1): +# if m==0: +# f.write('\n') +# else: +# f.write('\t') +# f.write(form.format(self.R[k,m],precision)) +# \ No newline at end of file diff --git a/pyDIFRATE/data/explicit_fits.py b/pyDIFRATE/data/explicit_fits.py new file mode 100755 index 0000000..0a47b7d --- /dev/null +++ b/pyDIFRATE/data/explicit_fits.py @@ -0,0 +1,106 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Copyright 2021 Albert Smith-Penzel + +This file is part of Frames Theory Archive (FTA). + +FTA is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +FTA is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with FTA. If not, see . + + +Questions, contact me at: +albert.smith-penzel@medizin.uni-leipzig.de + + +Created on Wed May 15 13:39:40 2019 + +@author: albertsmith +""" + +import pyDIFRATE as DR +import numpy as np +from scipy.signal import savgol_filter + +#%% Fit all correlation functions in data object +def fitCtdata(data): + n=np.shape(data.R)[0] + ne=np.shape(data.R)[1] + + + dist=np.zeros([np.shape(data.R)[0],np.size(data.sens.tc())]) + tc=data.sens.tc() + Rz=data.sens._rho(np.arange(0,ne)) + + for k in range(0,n): + Af,As,_,index=Ctfit3p((tc,data.R[k,:],Rz)) + dist[k,0]=Af + dist[k,index]=As + + out=DR.data() + out.R=dist + out.ired=data.ired + + + + return out + +#%% Minimization function +def Ctfit3p(X): + tc0=X[0] + Ct=X[1] + Rz=X[2] + + Af=Ct[0]-Ct[1] + As=1-Af + Ct=Ct/As + + + n=int(np.size(Ct)/2) + Rz=Rz[1:n,:] + Ct=Ct[1:n] + + err=np.sum((Rz-np.repeat(np.transpose([Ct]),np.shape(Rz)[1],axis=1))**2,axis=0) + + a=np.argmin(err) + + tc=tc0[a] + + return Af,As,tc,a + +def ired2dist(data): + + n=data.R.shape[0] + n0=n-data.ired.get('n_added_vecs') + ne=data.ired.get('rank')*2+1 + + nd=data.R.shape[1] + dist=np.zeros([n0,nd]) + + for k in range(0,nd): + lambda_dist=np.repeat([data.ired.get('lambda')[0:-ne]*data.R[0:-ne,k]],n0,axis=0) + dist[:,k]=np.sum(lambda_dist*data.ired.get('m')[0:n0,0:-ne]**2,axis=1) + + return dist + +def smooth(dist0,box_pts): + + dist0=np.atleast_2d(dist0) + box = np.ones(box_pts)/box_pts + + dist=np.zeros(np.shape(dist0)) + for k in range(0,np.shape(dist0)[0]): +# dist[k,:]=savgol_filter(dist0[k,:],11,3) + dist[k,:]=np.convolve(dist0[k,:],box,mode='same') + + return dist \ No newline at end of file diff --git a/pyDIFRATE/data/fitting.py b/pyDIFRATE/data/fitting.py new file mode 100755 index 0000000..0ddd204 --- /dev/null +++ b/pyDIFRATE/data/fitting.py @@ -0,0 +1,451 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Copyright 2021 Albert Smith-Penzel + +This file is part of Frames Theory Archive (FTA). + +FTA is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +FTA is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with FTA. If not, see . + + +Questions, contact me at: +albert.smith-penzel@medizin.uni-leipzig.de + +Created on Wed May 8 12:26:05 2019 + +@author: albertsmith +""" + +import numpy as np +#import data.data_class as dc +from scipy.optimize import lsq_linear as lsq +from scipy.stats import norm +#import os +import multiprocessing as mp +#os.chdir('../r_class') +from pyDIFRATE.r_class.detectors import detect as dt +#os.chdir('../data') + +def fit_data(data,detect=None,bounds=True,ErrorAna=None,save_input=True,parallel=True,subS2=False,**kwargs): + """ + Subsequent fitting is currently failing (I think), because we are later trying to + fit the detectors that result from the R2 exchange correction. Should have an + automatic mechanism to discard these in later fits. + """ + if detect is None: + if data.detect is None: + print('A detect object must be provided in the input or as part of the data object') + return + else: + detect=data.detect + + if detect.r(bond=0) is None: + print('First optimize a set of detectors for analysis') + return + + nb=data.R.shape[0] #number of data points to fit (n_bonds) + + "Output object" +# out=dc.data() + out=data.__class__() + "The new sensitivities of the output data are the detectors used" + out.sens=detect.copy() + out.sens._disable() #Clear the input sensitivities (restricts ability to further edit sens) + + "Delete the estimation of R2 due to exchange if included in the data here" + if hasattr(data.sens,'detect_par') and data.sens.detect_par['R2_ex_corr']: + R=data.R.copy()[:,:-1] + R_std=data.R_std.copy()[:,:-1] + data=data.copy('shallow') #We don't want to edit the original data object by deleting some of the R data + "The shallow copy alone would still edit the original R data" + "Replacing the matrices, however, should leave the orignal matrices untouched" + data.R=R + data.R_std=R_std + + + nd=detect.r(bond=0).shape[1] #Number of detectors + out.R=np.zeros([nb,nd]) + + "Set some defaults for error analysis" + if ErrorAna is not None: + ea=ErrorAna + if ea.lower()[0:2]=='mc': + if len(ea)>2: + nmc=int(ea[2:]) + else: + nmc=100 + else: + nmc=0 + else: + nmc=0 + + if 'Conf' in kwargs: + conf=kwargs.get('Conf') + else: + conf=0.68 + out.conf=conf + + inclS2=detect.detect_par['inclS2'] + if data.S2 is not None and subS2 and not(inclS2): + print('Subtracting S2') + subS2=True + else: + subS2=False + + +# "Set up parallel processing" +# if 'parallel' in kwargs: +# if kwargs.get('parallel')[0].lower()=='y': +# para=True +# else: +# para=False +# elif not(bounds): +# para=False +# else: +# if nmc==0: +# para=True +# else: +# para=True +# + if not(bounds): + Y=list() + for k in range(nb): + r,R,_,_=fit_prep(k,data,detect,subS2) + + nstd=norm.ppf(1/2+conf/2) + std=np.sqrt(np.sum(np.linalg.pinv(r)**2,axis=1)) + u=nstd*std + l=nstd*std + rho=np.dot(np.linalg.pinv(r),R) + Y.append((rho,std,u,l)) + + elif not(parallel): + "Series processing (only on specific user request)" + Y=list() + for k in range(nb): + r,R,UB,LB=fit_prep(k,data,detect,subS2) + X=(r,R,LB,UB,conf,nmc) + Y.append(para_fit(X)) + else: + "Here, we buildup up X with all the information required for each fit" + "required: normalized data, normalized r, upper and lower bounds" + X0=list() + for k in range(0,nb): + r,R,UB,LB=fit_prep(k,data,detect,subS2) + X0.append((r,R,LB,UB,conf,nmc)) + + "Parallel processing (default)" + nc=mp.cpu_count() + if 'n_cores' in kwargs: + nc=np.min([kwargs.get('n_cores'),nc]) + + with mp.Pool(processes=nc) as pool: + Y=pool.map(para_fit,X0) + + + + Rc=np.zeros(data.R.shape) + S2c=np.zeros(data.R.shape[0]) + + nd=detect.r(bond=0).shape[1] + out.R=np.zeros([nb,nd]) + out.R_std=np.zeros([nb,nd]) + out.R_l=np.zeros([nb,nd]) + out.R_u=np.zeros([nb,nd]) + for k in range(0,nb): + out.R[k,:]=Y[k][0] + out.R_std[k,:]=Y[k][1] + out.R_l[k,:]=Y[k][2] + out.R_u[k,:]=Y[k][3] +# if detect.detect_par['inclS2'] and data.S2 is not None: + if inclS2: + R0in=np.concatenate((detect.R0in(k),[0])) + Rc0=np.dot(detect.r(bond=k),out.R[k,:])+R0in + Rc[k,:]=Rc0[:-1] + S2c[k]=Rc0[-1] + else: + Rc[k,:]=np.dot(detect.r(bond=k),out.R[k,:])+detect.R0in(k) + + if save_input: + out.Rc=Rc + if inclS2: + out.S2c=1-S2c + + out.sens.info.loc['stdev']=np.median(out.R_std,axis=0) + + if save_input: + out.Rin=data.R + out.Rin_std=data.R_std + if inclS2: + out.S2in=data.S2 + out.S2in_std=data.S2_std + + + + out.detect=dt(detect) + + out.ired=data.ired + out.label=data.label + + + out.chi2=np.sum((data.R-Rc)**2/(data.R_std**2),axis=1) + + return out + +def fit_prep(k,data,detect,subS2): + """ + Function that prepares data for fitting (builds the R matrix), re-normalizes + the detector matrix, r, establishes bounds + """ + rhoz=detect.rhoz(bond=k) + UB=rhoz.max(axis=1) + LB=rhoz.min(axis=1) + r=detect.r(bond=k) + + if data.S2 is not None and detect.detect_par['inclS2']: + R0=np.concatenate((data.R[k,:]-detect.R0in(k),[1-data.S2[k]])) + Rstd=np.concatenate((data.R_std[k,:],[data.S2_std[k]])) + R=R0/Rstd + elif data.S2 is not None and subS2: + Rstd=data.R_std[k,:] + R=(data.R[k,:]-data.S2[k]-detect.R0in(k))/Rstd + else: + Rstd=data.R_std[k,:] + R=(data.R[k,:]-detect.R0in(k))/Rstd + + r=r/np.repeat(np.transpose([Rstd]),r.shape[1],axis=1) + + return r,R,UB,LB + + + +def para_fit(X): + """Function to calculate results in parallel + Input is the r matrix, after normalization by the standard deviations, R/R_std, + such that the data is normalized to a standard deviation of 1, upper and + lower bounds, the desired confidence interval (.95, for example), and finally + the number of Monte-Carlo repetitions to perform (if set to 0, performs + linear propagation-of-erro) + """ + Y=lsq(X[0],X[1],bounds=(X[2],X[3])) + rho=Y['x'] + Rc=Y['fun']+X[1] + + if X[5]==0: + std=np.sqrt(np.sum(np.linalg.pinv(X[0])**2,axis=1)) + nstd=norm.ppf(1/2+X[4]/2) + u=nstd*std + l=nstd*std + + else: + Y1=list() + nmc=max([X[5],np.ceil(2/X[4])]) + for k in range(0,X[5]): + Y0=lsq(X[0],Rc+np.random.normal(size=X[1].shape)) + Y1.append(Y0['x']) + std=np.std(Y1,axis=0) + Y1sort=np.sort(Y1,axis=0) + in_l=np.round(nmc*(1/2-X[4]/2)) + in_u=np.round(nmc*(1/2+X[4]/2)) + l=rho-Y1sort[int(in_l)] + u=Y1sort[int(in_u)]-rho + + return rho,std,l,u + +#%% Function to force a data object to be fully consistent with a positive dynamics distribution +def opt2dist(data,sens=None,parallel=True,return_dist=False,in_place=False,detect=None,**kwargs): + """ + Takes a distribution and sensitivity object (usually contained in the data + object, but can be provided separately), and for each bond/residue, optimizes + a distribution that approximately yields the set of detectors, while requiring + that the distribution itself only contains positive values and has an integral + of 1 (or 1-S2, if S2 is stored in data). Note that the distribution itself + is not a good reporter on dynamics; it is neither regularized or a stable + description of dynamics. However, its use makes the detector responses more + physically consistent + + If the original detector object is provided, the original data fit will be recalculated + + opt_data=opt2dist(data,sens=None,para=True,return_dist=False,in_place=False,detect=None) + + returns 0, 1, or 2 values, depending on the setting of return_dist and in_place + + """ + + nb=data.R.shape[0] + + if data.S2 is None: + S2=np.zeros(nb) + else: + S2=data.S2 + + if sens is None: + sens=data.sens + + "data required for optimization" + X=[(R,R_std,sens._rho(bond=k),S2r) for k,(R,R_std,S2r) in enumerate(zip(data.R,data.R_std,S2))] + + if parallel: + nc=mp.cpu_count() + if 'n_cores' in kwargs: + nc=np.min([kwargs.get('n_cores'),nc]) + + with mp.Pool(processes=nc) as pool: + Y=pool.map(dist_opt,X) + else: + Y=[dist_opt(X0) for X0 in X] + + out=data if in_place else data.copy() #We'll edit out, which might be the same object as data + + dist=list() + for k,y in enumerate(Y): + out.R[k]=y[0] + dist.append(y[1]) + + "If these are detector responses, we'll recalculate the data fit if detector object provided" + if detect is not None: + Rc=list() + if detect.detect_par['inclS2']: + for k in range(out.R.shape[0]): + R0in=np.concatenate((detect.R0in(k),[0])) + Rc0=np.dot(detect.r(bond=k),out.R[k,:])+R0in + Rc.append(Rc0[:-1]) + else: + for k in range(out.R.shape[0]): + Rc.append(np.dot(detect.r(bond=k),out.R[k,:])+detect.R0in(k)) + out.Rc=np.array(Rc) + + + "Output" + if in_place and return_dist: + return dist + elif in_place: + return + elif return_dist: + return (out,dist) + else: + return out + +def dist_opt(X): + """ + Optimizes a distribution that yields detector responses, R, where the + distribution is required to be positive, and have an integral of 1-S2 + + Ropt,dist=dist_opt((R,R_std,rhoz,S2,dz)) + + Note- intput is via tuple + """ + + R,R_std,rhoz,S2=X + total=np.atleast_1d(1-S2) + """Later, we may need to play with the weighting here- at the moment, we + fit to having a sum of 1, but in fact it is not forced....it should be + """ + + ntc=rhoz.shape[1] + rhoz=np.concatenate((rhoz/np.repeat(np.atleast_2d(R_std).T,ntc,axis=1), + np.atleast_2d(np.ones(ntc))),axis=0) + Rin=np.concatenate((R/R_std,total)) + + dist=0 + while np.abs(np.sum(dist)-total)>1e-3: #This is a check to see that the sum condition has been satisfied + dist=lsq(rhoz,Rin,bounds=(0,1))['x'] + Rin[-1]=Rin[-1]*10 + rhoz[-1]=rhoz[-1]*10 + Ropt=np.dot(rhoz[:-1],dist)*R_std + + return Ropt,dist + +#%% Function to fit a set of detector responses to a single correlation time +def fit2tc(data,df=2,sens=None,z=None,Abounds=False): + """ + Takes a data object, and corresponding sensitivity (optional if included in + data), and fits each set of detector responses to a single correlation time + (mono-exponential fit). Returns the log-correlation time for each data entry, + corresponding amplitudes, error, and back-calculated values. + + Note that the sensitivity may be a sensitivity object, or a numpy array, but + in the latter case, the log-correlation time axis, z, must also be included + + One may change the fitting function: + df=1: exp(-t/tc) + df=2: A*exp(-t/tc) + df=3: A*exp(-t/tc)+C + + Note- in the case of df=3, C is *not* calculated. Instead, any detector that + reaches its max (test:>.95*max(rhoz)) at the last correlation time is omitted + from the fit. Its predicted value is still included in Rc + + Setting Abounds to True will force A to fall within the range of 0 and 1 + + z,A,err,Rc=fit2tc(data,df=2,sens=None,z=None,Abounds=False) + """ + + if sens is None: + sens=data.sens + + z0=sens.z() if z is None else z + + err=list() + z=list() + A=list() + rho_c=list() + for k,(rho,rho_std) in enumerate(zip(data.R,data.R_std)): + if hasattr(sens,'rhoz'): + rhoz=sens.rhoz(k) + else: + rhoz=sens + + + if df==3: + y=(rho-rhoz[:,-1])/rho_std + x=((rhoz.T-rhoz[:,-1])/rho_std).T + x=x[:,:-1] + else: + y=rho/rho_std + x=(rhoz.T/rho_std).T +# if df==3: +# j=rhoz[:,-1]/rhoz.max(axis=1)<.95 +# jj=np.logical_not(j) +# y=y[j] +# x=x[j] + + + + if df!=1: + beta=(((1/(x**2).sum(axis=0))*x).T*y).sum(axis=1) + else: + beta=1 + + err0=(((beta*x).T-y)**2).sum(axis=1) + if Abounds: + err0[beta>1]=1e10 + + i=np.argmin(err0) + err.append(err0[i]) + z.append(z0[i]) + if df==2: + rho_c.append(rhoz[:,i]*beta[i]) + A.append(beta[i]) + elif df==3: + rho_c.append((rhoz[:,i]-rhoz[:,-1])*beta[i]+rhoz[:,-1]) + + A.append(beta[i]) + else: + rho_c.append(rhoz[:,i]) + A.append(1) + + return np.array(z),np.array(A),np.array(err),np.array(rho_c) + + diff --git a/pyDIFRATE/data/in_out.py b/pyDIFRATE/data/in_out.py new file mode 100644 index 0000000..da283b2 --- /dev/null +++ b/pyDIFRATE/data/in_out.py @@ -0,0 +1,35 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Copyright 2021 Albert Smith-Penzel + +This file is part of Frames Theory Archive (FTA). + +FTA is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +FTA is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with FTA. If not, see . + + +Questions, contact me at: +albert.smith-penzel@medizin.uni-leipzig.de + +Created on Wed Jul 31 11:56:56 2019 + +@author: albertsmith + +""" + +from pyDIFRATE.data.load_nmr import load_NMR +from pyDIFRATE.data.load_nmr import load_NMR_info + +from pyDIFRATE.data.bin_in_out import save_DIFRATE +from pyDIFRATE.data.bin_in_out import load_DIFRATE diff --git a/pyDIFRATE/data/load_nmr.py b/pyDIFRATE/data/load_nmr.py new file mode 100644 index 0000000..ccf4326 --- /dev/null +++ b/pyDIFRATE/data/load_nmr.py @@ -0,0 +1,385 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Copyright 2021 Albert Smith-Penzel + +This file is part of Frames Theory Archive (FTA). + +FTA is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +FTA is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with FTA. If not, see . + + +Questions, contact me at: +albert.smith-penzel@medizin.uni-leipzig.de + +Functions for loading data from files + +Created on Mon Jul 29 14:14:04 2019 + +@author: albertsmith +""" + +import numpy as np +from pyDIFRATE.r_class.sens import rates +from pyDIFRATE.r_class.detectors import detect +from pyDIFRATE.data import data_class as dc +#import pyDIFRATE.data.data_class as dc + +def load_NMR(filename): + """ + |load_NMR loads NMR data and experimental information from a text file + |data = load_NMR(filename) + | + |The file can contain experimental data, information about those experiments, + |and information about models (currently only isotropic models) + |Each part of the file is initiated by keywords 'data', 'info', or 'model' + |A section terminates at the occurence of another one of the keywords or at + |the end of the file. + | + |Within the data section, 'R', 'Rstd', and 'label' initiate sections + |containing data (contains all data, regardless of whether that data is + |usually referred to as a rate, ex. order parameters), standard deviation of + |the data, and labels. 'R' and 'Rstd' can be input as a matrix, where each + |column is a different experiment. One may also have multiple 'R' and 'Rstd' + |sections, where each new section will be appended as a new set of experiments + |The 'label' section is a single column with numbers or strings, naming the + |different residues or bonds. This is only used for plot labeling. + | + |Within the info section, one inputs parameters describing a given experiment + |for a given system. 'Type','v0','v1','vr','offset','stdev','Nuc','Nuc1','dXY', + |'CSA','QC','eta','theta' all are possible parameters. These should appear on + |one line with the following line containing the value for the parameter. + |Multiple experiments may be entered by including multiple parameters for the + |experimental parameters (although only one spin system may be entered this way) + |However, if a parameter is repeated in the 'info' section, this will initiate + |entry of a new set of experiments. + | + |Isotropic models may be entered in the model section. However, a molecule needs + |to be loaded before entry of anisotropic models, so that it is not currently + |possible to enter anisotropic model parameters from a file. Within the model + |section, tthe first line names the model, and subsequent lines come in pairs, + |where we name the parameter and give its value in the second line + | + |An example file would look like + | + |data + |R + |1.6337 1.6337 3.4796 2.1221 + |2.2000 2.0245 9.3194 3.6051 + |... + |0.2500 0.2900 4.0425 2.0151 + | + |Rstd + |0.0327 0.0327 0.0696 0.0424 + |0.0440 0.0405 0.1864 0.0721 + |... + |0.0050 0.0058 0.0808 0.0403 + | + |label + |gamma + |beta + |... + |C16 + | + |info + |Type + |R1 + |v0 + |500 800 + |Type + |R1p + |v0 + |600 + |vr + |10 + |v1 + |15,35 + | + |model + |IsoDif + |tM + |4.84e-9 + """ + + keys0=np.array(['info','data','model']) + + data=dc.data() + data.sens=rates() + + rate_args=list() + mdl_args=list() + + with open(filename,'r') as f: + while not eof(f): + a=f.readline() + if a.strip().lower()=='info': + rate_args=read_info(f,keys0) + for k in rate_args: + data.sens.new_exp(**k) + elif a.strip().lower()=='data': + R,Rstd,label,S2,S2_std=read_data(f,keys0) + data.R=R + data.R_std=Rstd + data.label=label + if S2 is not None: + data.S2=S2 + data.S2_std=S2_std + elif a.strip().lower()=='model': + mdl_args.append(read_model(f,keys0)) + + mdl=False + for mdls in mdl_args: + data.sens.new_mdl(**mdls) + mdl=True + + if mdl: + data.detect=detect(data.sens,mdl_num=0) + else: + data.detect=detect(data.sens) + + if data.sens.info.shape[1]!=0: + if data.sens.info.shape[1]!=data.R.shape[1]: + print('Warning: number of data sets does not match number of experiments in info') + else: + for k in range(data.sens.info.shape[1]): + if data.sens.info.loc['stdev'][k] is None: + data.sens.info.loc['stdev'][k]=np.median(data.R_std[:,k]/data.R[:,k]) + + return data + +def load_NMR_info(filename): + """ + |load_NMR_info loads a description of NMR experiments from a file. Formatting + |is as for load_NMR (which loads a full data set and experimental info). + | + |rates = load_NMR_info(filename) + | + |Note that this simply calls load_NMR, and extracts the 'sens' object from + |data + """ + data=load_NMR(filename) + rates=data.sens + + return rates + +def read_data(f,keys0): + """ + Reads data out of a file (called by load_NMR) + """ + cont=True + R=list() + Rstd=list() + S2=list() + S2_std=list() + label=None + ne=0 + + keys1=['R','Rstd','label','R_std','S2','S2_std'] + + while not(eof(f)) and cont: + pos=f.tell() + a=f.readline() + + if np.isin(a.strip(),keys1): + if a.strip()=='R': + R.append(read_lines(f,np.concatenate((keys0,keys1)))) + elif a.strip()=='Rstd' or a.strip()=='R_std': + Rstd.append(read_lines(f,np.concatenate((keys0,keys1)))) + elif a.strip()=='label': + label=read_label(f,np.concatenate((keys0,keys1))) + elif a.strip()=='S2': + S2.append(read_lines(f,np.concatenate((keys0,keys1)))) + elif a.strip()=='S2_std': + S2_std.append(read_lines(f,np.concatenate((keys0,keys1)))) + elif np.isin(a.strip(),keys0): + cont=False + f.seek(pos) + + if np.size(R)!=0: + R=np.concatenate(R,axis=1) + else: + R=None + print('Warning: no data found in data entry') + return None,None + + if np.size(Rstd)!=0: + Rstd=np.concatenate(Rstd,axis=1) + else: + Rstd=None + if len(S2)!=0: + S2=np.atleast_1d(np.concatenate(S2,axis=0).squeeze()) + else: + S2=None + + if len(S2_std)!=0: + S2_std=np.atleast_1d(np.concatenate(S2_std,axis=0).squeeze()) + else: + S2_std=None + + + if Rstd is None: + print('Warning: Standard deviations are not provided') + print('Standard deviations set equal to 1/10 of the median of the rate constants') + ne=R.shape[0] + Rstd=np.repeat([np.median(R,axis=0)],ne,axis=0) + elif np.any(R.shape!=Rstd.shape): + print('Warning: Shape of standard deviation does not match shape of rate constants') + print('Standard deviations set equal to 1/10 of the median of the rate constants') + ne=R.shape[0] + Rstd=np.repeat([np.median(R,axis=0)]/10,ne,axis=0) + + if (S2 is not None and S2_std is None) or (S2 is not None and S2.size!=S2_std.size): + print('Warning: Shape of S2 does not match the shape of S2_std') + print('Standard deviations set to 0.01') + S2_std=np.ones(S2.shape)*0.01 + + return R,Rstd,label,S2,S2_std + +def read_lines(f,keys0): + """ + Reads individual lines of data from a file + """ + + R=list() + ne=0 + cont=True + + while not(eof(f)) and cont: + pos=f.tell() + a=f.readline() + if np.isin(a.strip(),keys0): + cont=False + f.seek(pos) + else: + try: + R0=np.atleast_1d(a.strip().split()).astype('float') + except: + print('Warning: Could not convert data in file into float') + return None + if R0.size>0: + if ne==0: + ne=R0.size + elif ne!=R0.size: + print('Inconsistent row lengths, data input aborted') + return None + R.append(R0) + + return np.atleast_2d(R) + +def read_label(f,keys0): + """ + Reads out labels from a file. Tries to convert to label to float, returns strings if fails + """ + label=list() + cont=True + while not(eof(f)) and cont: + pos=f.tell() + a=f.readline() + if np.isin(a.strip(),keys0): + cont=False + f.seek(pos) + else: + label.append(a.strip()) + + label=np.atleast_1d(label) + + try: + label=label.astype('float') + except: + pass + + return label + +def read_model(f,keys0): + """ + Reads out description of a model + """ + mdl_pars=dict() + cont=True + + a=f.readline() + mdl_pars.update({'Model':a.strip()}) + + while not(eof(f)) and cont: + pos=f.tell() + a=f.readline() + if np.isin(a.strip(),keys0): + cont=False + f.seek(pos) + elif a.strip(): + name=a.strip() + a=f.readline() + val=np.atleast_1d(a.strip().split()) + try: + val=val.astype('float') + except: + pass + if val.size==1: + val=val[0] + mdl_pars.update({name:val}) + + return mdl_pars + +def read_info(f,keys0): + """ + Reads out information on an experiment from file (called by load_NMR) + """ + temp=rates() + keywords=np.concatenate((temp.retExper(),temp.retSpinSys())) #These are the possible variables to load + + rate_args=list() + args=dict() + used=list() + cont=True + while not eof(f) and cont: + pos=f.tell() + a=f.readline() + + if np.isin(a.strip(),keywords): + name=a.strip() + if name in used: #We reset to a new set of experiments if a parameter is repeated (usually 'Type') + rate_args.append(args) + used=list() + used.append(name) +# print(args) + args=dict() + else: + used.append(name) + + val=f.readline().strip().split() + try: + val=np.array(val).astype('float') + except: + pass + + args.update({name:val}) + + elif np.isin(a.strip().lower(),keys0): + cont=False + f.seek(pos) + + if args: + rate_args.append(args) + + return rate_args + +def eof(f): + "Determines if we are at the end of the file" + pos=f.tell() #Current position in the file + f.readline() #Read out a line + if pos==f.tell(): #If position unchanged, we're at end of file + return True + else: #Otherwise, reset pointer, return False + f.seek(pos) + return False + \ No newline at end of file diff --git a/pyDIFRATE/iRED/.DS_Store b/pyDIFRATE/iRED/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..78569753905a313dfba6c444488d80b9b12fec84 GIT binary patch literal 6148 zcmeHKu}%Xq47H)dp)MUeW@POjgev`le!$UQ5gl6HF)*-Y;cNO8d)G z2_a-l$#W9h@jdC1n230OznKwDh^RsnWKjl0rU#eK%y{D=YE*&@|g(R*jW8E^(p49NKq&;+AlRxC#cx>5oFTT~)fK3&Y$XP(JLZGgMZ>J9?!;Dnu. + + +Questions, contact me at: +albert.smith-penzel@medizin.uni-leipzig.de + + +""" + +""" +We do basic analysis of correlation functions, without iRED analysis, to +compare to iRED results +""" + +import numpy as np +import multiprocessing as mp +#import os +#os.chdir('../data') +from pyDIFRATE.data.data_class import data +#os.chdir('../iRED') +from pyDIFRATE.iRED.iRED_ana import get_vec +from pyDIFRATE.iRED.iRED_ana import align_vec + +#%% Create a data object from the Correlation function results +def Ct2data(molecule,**kwargs): + + + + if molecule.sel1in is None: + in1=np.arange(molecule.sel1.atoms.n_atoms) + else: + in1=molecule.sel1in + if molecule.sel2in is None: + in2=np.arange(molecule.sel2.atoms.n_atoms) + else: + in2=molecule.sel2in + + vec=get_vec(molecule.sel1,molecule.sel2,in1=in1,in2=in2,**kwargs) + + + if 'align' in kwargs and kwargs.get('align').lower()[0]=='y': + vec=align_vec(vec) + + ct=Ct(vec,**kwargs) + + S2=S2calc(vec) + + + Ctdata=data(molecule=molecule,Ct=ct,S2=S2) + + + return Ctdata + +#%% Calculate correlation functions +def Ct(vec,**kwargs): + if 'dt' in kwargs: + dt=kwargs.get('dt') + nt=vec.get('t').size + t=np.arange(0,dt*nt,dt) + else: + t=vec.get('t') + + nb=vec.get('X').shape[0] + + "Prepare the data needed for each correlation function" + v1=list() + for k in range(0,nb): + v1.append(np.array([vec.get('X')[k,:],vec.get('Y')[k,:],vec.get('Z')[k,:]])) + + "Run in series or in parallel" + if 'parallel' in kwargs and kwargs.get('parallel').lower()[0]=='n': + ct0=list() + for k in range(0,nb): + ct0.append(Ct_parfun(v1[k])) + else: + nc=mp.cpu_count() + if'n_cores' in kwargs: + nc=np.min([kwargs.get('n_cores'),nc]) + + with mp.Pool(processes=nc) as pool: + ct0=pool.map(Ct_parfun,v1) + + + ct={'t':t,'Ct':np.array(ct0)} + + return ct + + + +def Ct_parfun(v): + nt=np.shape(v)[1] + for m in range(0,nt): + v0=np.repeat(np.transpose([v[:,m]]),nt-m,axis=1) + if m==0: + ct=(3*np.sum(v0*v[:,m:],axis=0)**2-1)/2 + else: + ct[0:-m]+=(3*np.sum(v0*v[:,m:],axis=0)**2-1)/2 + + ct=ct/np.arange(nt,0,-1) + + return ct + +def Ct_kj(vec,bond1,bond2,**kwargs): + #We can change the time axis here (should we move this into iRED_ana.get_vec?) + if 'dt' in kwargs: + dt=kwargs.get('dt') + nt=vec.get('t').size + t=np.arange(0,dt*nt,dt) + else: + t=vec.get('t') + t=np.concatenate((-t[-1:0:-1],t)) + + v1=np.array([vec.get('X')[bond1],vec.get('Y')[bond1],vec.get('Z')[bond1]]) + v2=np.array([vec.get('X')[bond2],vec.get('Y')[bond2],vec.get('Z')[bond2]]) + + nt=np.shape(v1)[1] + Ct=np.zeros([2*nt-1]) + for k in range(0,nt): + v0=np.repeat(np.transpose([v1[:,k]]),nt,axis=1) + Ct[nt-k-1:2*nt-k-1]+=(3*np.sum(v0*v2,axis=0)**2-1)/2 + + norm=np.arange(1,nt+1) + norm=np.concatenate((norm,norm[-2::-1])) + + Ct=Ct/norm + + ct={'t':t,'Ct':Ct} + + return ct + +#%% Calculate the order parameter +def S2calc(vec): + v=[vec.get('X'),vec.get('Y'),vec.get('Z')] + S2=np.zeros(np.shape(vec.get('X'))[0]) + for k in v: + for m in v: + S2+=np.mean(k*m,axis=1)**2 + + S2=3/2*S2-1/2 + + return S2 \ No newline at end of file diff --git a/pyDIFRATE/iRED/Ct_fast.py b/pyDIFRATE/iRED/Ct_fast.py new file mode 100644 index 0000000..30cc826 --- /dev/null +++ b/pyDIFRATE/iRED/Ct_fast.py @@ -0,0 +1,343 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Copyright 2021 Albert Smith-Penzel + +This file is part of Frames Theory Archive (FTA). + +FTA is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +FTA is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with FTA. If not, see . + + +Questions, contact me at: +albert.smith-penzel@medizin.uni-leipzig.de + +Created on Wed Aug 7 17:02:56 2019 + +@author: albertsmith +""" + +""" +Functions for accelarated calculation of correlation functions + +Some notes on these functions: Let's take an example. Say we have a trajectory, +sampled every 1 ps, out to 500 ns (500,000 pts). The first 1000 points each are +calculated from about 5e5 pairs of time points. However, we learn about +approximately 3 orders of magnitude of dynamics from those first 1000 points. By +comparison, starting from 250 ns, the following 1000 pts of the correlation +function are calculated from 2.5e5 time point pairs (similar accuracy). However, +there is virtually no new information between the time point at 250 ns and +251 ns. There is almost no decay, because all the correlation times comparable to +1 ns have already decayed. It stands to reason that we don't really need so +many time points from later in the correlation function. In fact, it would make +sense to only calculate the correlation function on a log-spaced time axis. + +This is problematic, however, because we still need to load all time points to +get the log-spaced time points. On the other hand, we could load log-spaced time +points from the trajectory, and calculate the correlation function for all +possible time points available based on the spacing of the trajectory. Then, +long correlation times will still be common in the correlation function, but they +will not be accurately calculated. Hopefully, we can still successfully fit them +with detectors, and recover the information based on the number of time points +instead of the accuracy +""" + +import numpy as np +#import multiprocessing as mp +#import os +#os.chdir('../data') +from pyDIFRATE.data.data_class import data +#os.chdir('../iRED') +#from MDAnalysis.analysis.align import rotation_matrix +#from psutil import virtual_memory +#from fast_index import trunc_t_axis,get_count +from pyDIFRATE.iRED.fast_funs import S2calc,Ct,get_trunc_vec,align_mean +from pyDIFRATE.iRED.fast_index import trunc_t_axis + +def Ct2data(molecule,n=100,nr=10,**kwargs): + """ + data=Ct2data(molecule,n=100,nr=10,**kwargs) + Takes a molecule object (generated from an MD trajectory), and creates a + data object, where the data contains elements of the correlation function, + where the trajectory has been sparsely sampled (according to arguments n + and nr) + """ + + mol=molecule + if 'nt' in kwargs: + nt=np.min([mol.mda_object.trajectory.n_frames,kwargs.get('nt')]) + else: + nt=mol.mda_object.trajectory.n_frames + + index=trunc_t_axis(nt,n,nr) + + vec=get_trunc_vec(mol,index,**kwargs) + + Ctdata=vec2data(vec,molecule=mol,**kwargs) + return Ctdata + +def vec2data(vec,**kwargs): + """ + Takes a vector and creates the corresponding data object + + data=vec2data(vec,**kwargs) + """ + + if 'align_iRED' in kwargs and kwargs.get('align_iRED'): + vec=align_mean(vec) + + ct=Ct(vec,**kwargs) + S2=S2calc(vec) + Ctdata=data(Ct=ct,S2=S2,**kwargs) + + return Ctdata + +def Ct_S2(molecule,n=100,nr=10,**kwargs): + nt=molecule.mda_object.trajectory.n_frames + + index=trunc_t_axis(nt,n,nr) + + vec=get_trunc_vec(molecule,index,**kwargs) + + ct=Ct(vec,**kwargs) + + S2=S2calc(vec) + + return ct,S2 + +#def get_trunc_vec(molecule,index,**kwargs): +# """ +# vec=get_trunc_vec(molecule,index) +# """ +# +# if molecule._vf is not None: +# vf=molecule.vec_fun +# special=True +# else: +# sel1=molecule.sel1 +# sel2=molecule.sel2 +# sel1in=molecule.sel1in +# sel2in=molecule.sel2in +# +# "Indices to allow using the same atom more than once" +# if sel1in is None: +# sel1in=np.arange(sel1.n_atoms) +# if sel2in is None: +# sel2in=np.arange(sel2.n_atoms) +# +# if sel1.universe!=sel2.universe: +# print('sel1 and sel2 must be generated from the same MDAnalysis universe') +# return +# +# if np.size(sel1in)!=np.size(sel2in): +# print('sel1 and sel2 or sel1in and sel2in must have the same number of atoms') +# return +# special=False +# +# nt=np.size(index) #Number of time steps +# if special: +# na=vf().shape[1] +# else: +# na=np.size(sel1in) #Number of vectors +# +# X=np.zeros([nt,na]) +# Y=np.zeros([nt,na]) +# Z=np.zeros([nt,na]) +# t=np.zeros([nt]) +# +# uni=molecule.mda_object +# traj=uni.trajectory +# if 'dt' in kwargs: +# dt=kwargs.get('dt') +# else: +# dt=traj.dt/1e3 +## if traj.units['time']=='ps': #Convert time units into ns +## dt=dt/1e3 +## elif traj.units['time']=='ms': +## dt=dt*1e3 +# +# +# ts=iter(traj) +# for k,t0 in enumerate(index): +# try: +# traj[t0] #This jumps to time point t in the trajectory +# except: +# "Maybe traj[t] doesn't work, so we skip through the iterable manually" +# if k!=0: +# for _ in range(index[k]-index[k-1]): +# next(ts,None) +# +# if special: +# X0,Y0,Z0=vf() +# else: +# pos=sel1[sel1in].positions-sel2[sel2in].positions +# # pos=sel1.positions[sel1in]-sel2.positions[sel2in] +# X0=pos[:,0] +# Y0=pos[:,1] +# Z0=pos[:,2] +# +# length=np.sqrt(X0**2+Y0**2+Z0**2) +# X[k,:]=np.divide(X0,length) +# Y[k,:]=np.divide(Y0,length) +# Z[k,:]=np.divide(Z0,length) +# t[k]=dt*t0 +# if k%int(nt/100)==0 or k+1==nt: +# printProgressBar(k+1, nt, prefix = 'Loading:', suffix = 'Complete', length = 50) +# +# vec={'X':X,'Y':Y,'Z':Z,'t':t,'index':index} +# +# if not('alignCA' in kwargs and kwargs.get('alignCA').lower()[0]=='n'): +# "Default is always to align the molecule (usually with CA)" +# vec=align(vec,uni,**kwargs) +# +# return vec + + +#def Ct(vec,**kwargs): +# if'n_cores' in kwargs: +# nc=np.min([kwargs.get('n_cores'),nc]) +# else: +# nc=mp.cpu_count() +# +# nb=vec['X'].shape[1] +# +# v0=list() #Store date required for each core +# for k in range(nc): +# v0.append((vec['X'][:,range(k,nb,nc)],vec['Y'][:,range(k,nb,nc)],vec['Z'][:,range(k,nb,nc)],vec['index'])) +# +# if 'parallel' in kwargs and kwargs.get('parallel').lower()[0]=='n': +# ct0=list() +# for v in v0: +# ct0.append(Ct_par(v)) +# else: +# with mp.Pool(processes=nc) as pool: +# ct0=pool.map(Ct_par,v0) +# +# +# "Get the count of number of averages" +# index=vec['index'] +# N=get_count(index) +# +# i=N!=0 +# N=N[i] +# +# ct=np.zeros([np.size(N),nb]) +# N0=N +# +# for k in range(nc): +# N=np.repeat([N0],np.shape(ct0[k])[1],axis=0).T +# ct[:,range(k,nb,nc)]=np.divide(ct0[k][i],N) +# +# +# dt=(vec['t'][1]-vec['t'][0])/(vec['index'][1]-vec['index'][0]) +# t=np.linspace(0,dt*np.max(index),index[-1]+1) +# t=t[i] +# +# Ct={'t':t,'Ct':ct.T,'N':N0,'index':index} +# +# return Ct +# +#def Ct_par(v): +# index=v[3] +# X=v[0] +# Y=v[1] +# Z=v[2] +# +# n=np.size(index) +# c=np.zeros([np.max(index)+1,np.shape(X)[1]]) +# +# for k in range(n): +# c[index[k:]-index[k]]+=(3*(np.multiply(X[k:],X[k])+np.multiply(Y[k:],Y[k])\ +# +np.multiply(Z[k:],Z[k]))**2-1)/2 +## if k%int(n/100)==0 or k+1==n: +## printProgressBar(k+1, n, prefix = 'C(t) calc:', suffix = 'Complete', length = 50) +# return c +# +#def align(vec0,uni,**kwargs): +# """ +# Removes overall rotation from a trajectory, by aligning to a set of reference +# atoms. Default is protein backbone CA. +# """ +# if 'align_ref' in kwargs: +# uni0=uni.select_atoms(kwargs.get('align_ref')) +# else: +# uni0=uni.select_atoms('name CA') +# if uni0.n_atoms==0: +# uni0=uni.select_atoms('name C11') #Not sure about this. Alignment for lipids? +# if uni0.n_atoms==0: +# uni0=uni.select_atoms('name *') +# +# ref0=uni0.positions-uni0.atoms.center_of_mass() +# +# SZ=np.shape(vec0.get('X')) +# index=vec0['index'] +# "Pre-allocate the direction vector" +# vec={'X':np.zeros(SZ),'Y':np.zeros(SZ),'Z':np.zeros(SZ),'t':vec0.get('t'),'index':index} +# +# nt=vec0['t'].size +# +# +# traj=uni.trajectory +# ts=iter(traj) +# for k,t0 in enumerate(index): +# try: +# traj[t0] #This jumps to time point t in the trajectory +# except: +# "Maybe traj[t] doesn't work, so we skip through the iterable manually" +# if k!=0: +# for _ in range(index[k]-index[k-1]): +# next(ts,None) +# "CA positions" +# pos=uni0.positions-uni0.atoms.center_of_mass() +# +# "Rotation matrix for this time point" +# R,_=rotation_matrix(pos,ref0) +# vec['X'][k,:]=vec0['X'][k,:]*R[0,0]+vec0['Y'][k,:]*R[0,1]+vec0['Z'][k,:]*R[0,2] +# vec['Y'][k,:]=vec0['X'][k,:]*R[1,0]+vec0['Y'][k,:]*R[1,1]+vec0['Z'][k,:]*R[1,2] +# vec['Z'][k,:]=vec0['X'][k,:]*R[2,0]+vec0['Y'][k,:]*R[2,1]+vec0['Z'][k,:]*R[2,2] +# if k%int(np.size(index)/100)==0 or k+1==nt: +# printProgressBar(k+1, np.size(index), prefix = 'Aligning:', suffix = 'Complete', length = 50) +# +# return vec +# +#def S2calc(vec): +# v=[vec.get('X'),vec.get('Y'),vec.get('Z')] +# S2=np.zeros(np.shape(vec.get('X'))[1]) +# for k in v: +# for m in v: +# S2+=np.mean(k*m,axis=0)**2 +# +# S2=3/2*S2-1/2 +# +# return S2 +# +#def printProgressBar (iteration, total, prefix = '', suffix = '', decimals = 1, length = 100, fill = '█'): +# """ +# Call in a loop to create terminal progress bar +# @params: +# iteration - Required : current iteration (Int) +# total - Required : total iterations (Int) +# prefix - Optional : prefix string (Str) +# suffix - Optional : suffix string (Str) +# decimals - Optional : positive number of decimals in percent complete (Int) +# length - Optional : character length of bar (Int) +# fill - Optional : bar fill character (Str) +# """ +# percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total))) +# filledLength = int(length * iteration // total) +# bar = fill * filledLength + '-' * (length - filledLength) +# print('\r%s |%s| %s%% %s' % (prefix, bar, percent, suffix), end = '\r') +# # Print New Line on Complete +# if iteration == total: +# print() +# diff --git "a/pyDIFRATE/iRED/Icon\r" "b/pyDIFRATE/iRED/Icon\r" new file mode 100644 index 0000000..e69de29 diff --git a/pyDIFRATE/iRED/__init__.py b/pyDIFRATE/iRED/__init__.py new file mode 100644 index 0000000..e69de29 diff --git "a/pyDIFRATE/iRED/__pycache__/Icon\r" "b/pyDIFRATE/iRED/__pycache__/Icon\r" new file mode 100644 index 0000000..e69de29 diff --git a/pyDIFRATE/iRED/fast_funs.py b/pyDIFRATE/iRED/fast_funs.py new file mode 100644 index 0000000..e3452b6 --- /dev/null +++ b/pyDIFRATE/iRED/fast_funs.py @@ -0,0 +1,590 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Copyright 2021 Albert Smith-Penzel + +This file is part of Frames Theory Archive (FTA). + +FTA is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +FTA is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with FTA. If not, see . + + +Questions, contact me at: +albert.smith-penzel@medizin.uni-leipzig.de + + +Created on Wed Aug 28 10:24:19 2019 + +@author: albertsmith +""" +#import os +import numpy as np +import multiprocessing as mp +from pyDIFRATE.iRED.parCt import par_class as pct +from pyDIFRATE.iRED.fast_index import get_count +#os.chdir('../Struct') +import pyDIFRATE.Struct.vf_tools as vft +#os.chdir('../iRED') + +#%% Estimate the order parameter +def S2calc(vec): + """ + Calculates an estimate of the order parameter, according to + 3/2*(^2+^2+^2+2^2+2^2+2^2)-1/2 with averages performed + over the complete vector + + S2=S2calc(vec) + """ + if 'Y' in vec.keys(): + v=np.array([vec.get('X'),vec.get('Y'),vec.get('Z')]) + SZ=vec['X'].shape[1] + + else: + v=np.array([vec['Z']['X'],vec['Z']['Y'],vec['Z']['Z']]) + SZ=vec['Z']['X'].shape[1] + v=v/np.sqrt((v**2).sum(axis=0)) + + S2=np.zeros(SZ) + for k in v: + for m in v: + S2+=np.mean(k*m,axis=0)**2 + + S2=3/2*S2-1/2 + return S2 + +#%% Returns the correlation function defined by vec +def Ct(vec,**kwargs): + """ + Calculates the correlation functions for vectors with unequal spacing in the + time axis. By default, uses parallel processing (using all available cores) + Optional arguments are parallel, which determines whether or not to use + parallel processing ('y'/'n'), or optionally one may simply set parallel to + the desired number of cores (parallel=4, for example) + """ + + if 'parallel' in kwargs: + p=kwargs.get('parallel') + if isinstance(p,str) and p.lower()[0]=='n': + nc=1 + elif isinstance(p,int): + nc=p if p>0 else 1 #Check the # cores is bigger than 0 + else: #Default use parallel processing + nc=mp.cpu_count() #Use all available cores + else: + nc=mp.cpu_count() + + + if 'n_cores' in kwargs: + nc=np.min([kwargs.get('n_cores'),nc]) + print('Warning: n_cores argument will be removed in a later version. set parallel=n_cores') + "Optional second argument. Not documented- possibly will be removed" + + + if 'Y' not in vec.keys(): + nc=1 + print('Only series processing if eta is non-zero') + nb=vec['X']['X'].shape[1] + else: + nb=vec['X'].shape[1] + + if nc==1: + "Might later replace this with the code in place" + "But, should keep some variant- parallel version isn't necessarily stable" + v0=list() #Store date required for each core + k=0 + if 'Y' in vec.keys(): + v0.append((vec['X'][:,range(k,nb,nc)],vec['Y'][:,range(k,nb,nc)],vec['Z'][:,range(k,nb,nc)],vec['index'])) + else: + v0.append((vec['X']['X'][:,range(k,nb,nc)],vec['X']['Y'][:,range(k,nb,nc)],vec['X']['Z'][:,range(k,nb,nc)],\ + vec['Z']['X'][:,range(k,nb,nc)],vec['Z']['Y'][:,range(k,nb,nc)],vec['Z']['Z'][:,range(k,nb,nc)],\ + vec['eta'],vec['index'])) + if nc==1: #Series processing + ct0=list() + for v in v0: + ct0.append(Ct_par(v)) + else: + ref_num,v0=pct.store_vecs(vec,nc) + print('Success') + try: + with mp.Pool(processes=nc) as pool: +# ct=pool.map(ctpar.Ct,v0) + ct=pool.map(pct.Ct,v0) + ct=pct.returnCt(ref_num,ct) + finally: + pct.clear_data(ref_num) + + "Get the count of number of averages" + index=vec['index'] + N=get_count(index) + + "i finds all times points for which we have some averaging of the correlation function" + i=N!=0 + N=N[i] + + + N0=N + + if nc==1: + ct=np.zeros([np.size(N0),nb]) + for k in range(nc): + N=np.repeat([N0],np.shape(ct0[k])[1],axis=0).T #N with same shape as ct + ct[:,range(k,nb,nc)]=np.divide(ct0[k][i],N) #Normalize correlation function based on how many averages + + + dt=(vec['t'][1]-vec['t'][0])/(index[1]-index[0]) + t=np.linspace(0,dt*np.max(index),index[-1]+1) + t=t[i] + + Ct={'t':t,'Ct':ct.T,'N':N0,'index':index} + + return Ct + + +#%% Parallel function to calculate correlation functions +def Ct_par(v): + if len(v)==8: + X_X=v[0] + Y_X=v[1] + Z_X=v[2] + X_Z=v[3] + Y_Z=v[4] + Z_Z=v[5] + eta=v[6] + index=v[7] + + n=np.size(index) + c=np.zeros([np.max(index)+1,np.shape(X_X)[1]]) + + for k in range(n): + Cb2=(np.multiply(X_Z[k:],X_Z[k])+np.multiply(Y_Z[k:],Y_Z[k])+np.multiply(Z_Z[k:],Z_Z[k]))**2 + Ca2Sb2=(np.multiply(X_Z[k:],X_X[k])+np.multiply(Y_Z[k:],Y_X[k])+np.multiply(Z_Z[k:],Z_X[k]))**2 +# c[index[k:]-index[k]]+=Cb2*(3-eta)/2-eta*Ca2Sb2+(eta-1)/2 + c[index[k:]-index[k]]+=(3-eta)/2*Cb2-eta*Ca2Sb2+(eta-1)/2 + return c + else: + index=v[3] + X=v[0] + Y=v[1] + Z=v[2] + + n=np.size(index) + c=np.zeros([np.max(index)+1,np.shape(X)[1]]) + + for k in range(n): + c[index[k:]-index[k]]+=(3*(np.multiply(X[k:],X[k])+np.multiply(Y[k:],Y[k])\ + +np.multiply(Z[k:],Z[k]))**2-1)/2 + # if k%int(n/100)==0 or k+1==n: + # printProgressBar(k+1, n, prefix = 'C(t) calc:', suffix = 'Complete', length = 50) + return c + +#%% Load in the truncated vectors from the trajectory +def get_trunc_vec(molecule,index,**kwargs): + """ + vec=get_trunc_vec(molecule,index,**kwargs) + + Returns time-dependent vectors defined in the molecule object. Usually this + is vectors defined by atom selections in sel1 and sel2 (and possibly indexed + by sel1in and sel2in). Alternatively, if function-defined vectors are stored + in molecule._vf (molecule.vec_fun() returns vectors), then these will be + returned instead + + One must provide the molecule object, and an index determining which time + points to analyze. + + Optional arguments are dt, which re-defines the time step + (vs. the time step returned by MDAnalysis), and align, which can be set to + 'y' and will remove overall motion by aligning all frames to a reference + set of atoms. Default is CA in proteins. To change default, provide a second + argument, align_ref, which is an MDAnalysis selection string. This string + will select from all atoms in the trajectory, and align them. + + + """ + + + + if molecule._vf is not None and False: #De-activate this functionality. Replace with frames + vf=molecule.vec_fun + special=True + else: + sel1=molecule.sel1 + sel2=molecule.sel2 + sel1in=molecule.sel1in + sel2in=molecule.sel2in + + "Indices to allow using the same atom more than once" + if sel1in is None: + sel1in=np.arange(sel1.n_atoms) + if sel2in is None: + sel2in=np.arange(sel2.n_atoms) + + if sel1.universe!=sel2.universe: + print('sel1 and sel2 must be generated from the same MDAnalysis universe') + return + + if np.size(sel1in)!=np.size(sel2in): + print('sel1 and sel2 or sel1in and sel2in must have the same number of atoms') + return + special=False + + nt=np.size(index) #Number of time steps + if special: + na=vf().shape[1] + else: + na=np.size(sel1in) #Number of vectors + + X=np.zeros([nt,na]) + Y=np.zeros([nt,na]) + Z=np.zeros([nt,na]) + t=np.zeros([nt]) + + uni=molecule.mda_object + traj=uni.trajectory + if 'dt' in kwargs: + dt=kwargs.get('dt') + else: + dt=traj.dt/1e3 +# if traj.units['time']=='ps': #Convert time units into ns +# dt=dt/1e3 +# elif traj.units['time']=='ms': +# dt=dt*1e3 + + + ts=iter(traj) + for k,t0 in enumerate(index): + try: + traj[t0] #This jumps to time point t in the trajectory + except: + "Maybe traj[t] doesn't work, so we skip through the iterable manually" + if k!=0: + for _ in range(index[k]-index[k-1]): + next(ts,None) + + if special: + "Run the function to return vector" + X0,Y0,Z0=vf() + else: + "Else just get difference in atom positions" + v=sel1[sel1in].positions-sel2[sel2in].positions + "We correct here for vectors extended across the simulation box" + box=np.repeat([uni.dimensions[0:3]],v.shape[0],axis=0) + + i=v>box/2 + v[i]=v[i]-box[i] + + i=v<-box/2 + v[i]=v[i]+box[i] + + "Store the results" + X0=v[:,0] + Y0=v[:,1] + Z0=v[:,2] + + "Make sure length is one" + length=np.sqrt(X0**2+Y0**2+Z0**2) + if np.any(length>2): +# print(molecule.sel1[molecule.sel1in[length>3]].names) +# print(molecule.sel2[molecule.sel2in[length>3]].names) +# print(length[length>3]) + print(k) + X[k,:]=np.divide(X0,length) + Y[k,:]=np.divide(Y0,length) + Z[k,:]=np.divide(Z0,length) + "Keep track of the time axis" + t[k]=dt*t0 + if k%np.ceil(nt/100).astype(int)==0 or k+1==nt: + printProgressBar(k+1, nt, prefix = 'Loading:', suffix = 'Complete', length = 50) + + vec={'X':X,'Y':Y,'Z':Z,'t':t,'index':index} + + "Re-align vectors to some set of reference atoms" + if 'align' in kwargs and kwargs.get('align').lower()[0]=='y': + "Default does not align molecule" + vec=align(vec,uni,**kwargs) + +# "Re-align vectors so they all point along z" +# if 'align_iRED' in kwargs and kwargs.get('align_iRED').lower()[0]=='y': +# vec=align_mean(vec) + + return vec + +def align_mean(vec0,rank=2,align_type='ZDir'): + """ + Aligns the mean direction of a set of vectors along the z-axis. This can be + useful for iRED analysis, to mitigate the orientational dependence of the + iRED analysis procedure. + + vec = align_mean(vec0) + + Options are introduced for the rotation of third angle: + Type='ZDir' : Sets gamma = -alpha (only option for rank 1 calc) + Type='tensor' : Aligns the rank 2 tensor, including the asymmetry + Type='xy-motion' : Aligns the z-component of the rank-2 tensor, and + maximizes correlation of the x and y components to the previous bond + """ + + + """At some point, we should consider whether it would make sense to use a + tensor alignment instead of a vector alignment. + """ + vec=vec0.copy() #Just operate on the copy here, to avoid accidental edits + + X,Y,Z=vec['X'],vec['Y'],vec['Z'] #Coordinates + +# nt=X.shape[0] + + + + #%% Calculate sines and cosines of beta,gamma rotations + if rank==1: + "Mean direction of the vectors" + X0,Y0,Z0=X.mean(axis=0),Y.mean(axis=0),Z.mean(axis=0) + + "Normalize the length" + length=np.sqrt(X0**2+Y0**2+Z0**2) + X0,Y0,Z0=np.divide([X0,Y0,Z0],length) + + "beta" + cB,sB=Z0,np.sqrt(1-Z0**2) + + "gamma" + lXY=np.sqrt(X0**2+Y0**2) + i=lXY==0 + lXY[i]=1. + cA,sA=[X0,Y0]/lXY + cA[i]=1. + cG,sG=cA,-sA + elif rank==2: + "Note, rank 2 also aligns the asymmetry of a motion so is a better alignment" + cossin=vft.getFrame([X,Y,Z]) #Get euler angles for this vector + D2c=vft.D2(*cossin) #Calculate Spherical components + D20=D2c.mean(axis=1) #Calculate average + sc=vft.Spher2pars(D20)[2:] #Get euler angles + cA,sA,cB,sB,cG,sG=vft.pass2act(*sc) + + "apply rotations" + X,Y=cA*X+sA*Y,-sA*X+cA*Y #Apply alpha + X,Z=cB*X-sB*Z,sB*X+cB*Z #Apply beta + if align_type.lower()[0]=='z': + X,Y=cA*X-sA*Y,sA*X+cA*Y #Rotate back by -alpha + else: #Tensor- default option (undone later if using xy-motion) + X,Y=cG*X+sG*Y,-sG*X+cG*Y #Apply gamma + + if rank==2: + "Make sure axis is pointing along +z (rotate 180 around y)" + i=Z.mean(axis=0)<0 + X[:,i],Z[:,i]=-X[:,i],-Z[:,i] + + + "Try to maximize the correlation by aligning X/Y deviations" + if align_type.lower()[0]=='x': + c,s=RMS2Dalign(X,Y) + X,Y=c*X+s*Y,-s*X+c*Y + + """ + Note, I'd eventually like to try aligning the vectors to maximimize correlation + in all three dimensions, that is, an RMS3Dalign function...maybe wouldn't + make such a difference since we already align the mean tensor directions along z, + but worth some consideration + """ + +# "Check that deviations from average direction go same way for all bonds " +# iX=(X[:,0]*X).mean(axis=0)-X[:,0].mean()*X.mean(axis=0)<0 +# iY=(Y[:,0]*Y).mean(axis=0)-Y[:,0].mean()*Y.mean(axis=0)<0 +# +# X[:,iX],Y[:,iY]=-X[:,iX],-Y[:,iY] +# +# iX,iY,iZ=X.mean(axis=0)<0,Y.mean(axis=0)<0,Z.mean(axis=0)<0 +# X[:,iX],Y[:,iY],Z[:,iZ]=-X[:,iX],-Y[:,iY],-Z[:,iZ] +# "Can we really do this? I think flipping the axes should not influence dynamics" +# "Check 1- uncommenting below still yields D2 along z" +# "Check 2- small changes to detector responses observed....less convincing" + +# "Check that rotation is correct (if uncommented, D20[1] and D20[3] should be ~zeros, and all elements ~real" +# cossin=vft.getFrame([X,Y,Z]) #Get euler angles for this vector +# D2c=vft.D2(*cossin) #Calculate Spherical components +# D20=D2c.mean(axis=1) #Calculate average +# print(D20) + + "return results" + vec['X'],vec['Y'],vec['Z']=X,Y,Z + return vec +# nt=X.shape[0] +# X0,Y0,Z0=X.mean(axis=0),Y.mean(axis=0),Z.mean(axis=0) #Coordinates +# length=np.sqrt(X0**2+Y0**2+Z0**2) +# X0,Y0,Z0=np.divide([X0,Y0,Z0],length) +# +# "Angle away from the z-axis" +# beta=np.arccos(Z0) +# +# "Angle of rotation axis away from y-axis" +# "Rotation axis is at (-Y0,X0): cross product of X0,Y0,Z0 and (0,0,1)" +# theta=np.arctan2(-Y0,X0) +# +# +# xx=np.cos(-theta)*np.cos(-beta)*np.cos(theta)-np.sin(-theta)*np.sin(theta) +# yx=-np.cos(theta)*np.sin(-theta)-np.cos(-theta)*np.cos(-beta)*np.sin(theta) +# zx=np.cos(-theta)*np.sin(-beta) +# +# X=np.repeat([xx],nt,axis=0)*vec0.get('X')+\ +# np.repeat([yx],nt,axis=0)*vec0.get('Y')+\ +# np.repeat([zx],nt,axis=0)*vec0.get('Z') +# +# xy=np.cos(-theta)*np.sin(theta)+np.cos(-beta)*np.cos(theta)*np.sin(-theta) +# yy=np.cos(-theta)*np.cos(theta)-np.cos(-beta)*np.sin(-theta)*np.sin(theta) +# zy=np.sin(-theta)*np.sin(-beta) +# +# Y=np.repeat([xy],nt,axis=0)*vec0.get('X')+\ +# np.repeat([yy],nt,axis=0)*vec0.get('Y')+\ +# np.repeat([zy],nt,axis=0)*vec0.get('Z') +# +# xz=-np.cos(theta)*np.sin(-beta) +# yz=np.sin(-beta)*np.sin(theta) +# zz=np.cos(-beta) +# +# Z=np.repeat([xz],nt,axis=0)*vec0.get('X')+\ +# np.repeat([yz],nt,axis=0)*vec0.get('Y')+\ +# np.repeat([zz],nt,axis=0)*vec0.get('Z') +# +# vec={'X':X,'Y':Y,'Z':Z,'t':vec0['t'],'index':vec0['index']} +# +# return vec + +def RMS2Dalign(X,Y,return_angles=False): + """ + Returns the optimal 2D rotation to bring a vector of X and Y coordinates onto + a reference set of coordinates (the X and Y may be a 2D matrix, where each + column is a new bond, for example). Returns, by default c and s, the cosine + and sine for the optimal rotation matrix (set return_angles=True to get the + angle directly) + """ + + "Consider eliminating for-loop with direct SVD calc" + "see: https://lucidar.me/en/mathematics/singular-value-decomposition-of-a-2x2-matrix/" + + c=list() + s=list() + xr,yr=X[:,0],Y[:,0] + for x,y in zip(X.T,Y.T): + H=np.array([[(x*xr).sum(),(x*yr).sum()],[(y*xr).sum(),(y*yr).sum()]]) + U,S,Vt=np.linalg.svd(H) + Ut,V=U.T,Vt.T + d=np.linalg.det(np.dot(V,Ut)) + R=np.dot(V,np.dot([[1,0],[0,d]],Ut)) + c.append(R[0,0]) + s.append(R[0,1]) + xr,yr=c[-1]*x+s[-1],-s[-1]*x+c[-1]*y +# print(R) + c=np.array(c) + s=np.array(s) + + if return_angles: + return np.arctan2(s,c) + else: + return c,s + + +#%% Removes +def align(vec0,uni,**kwargs): + """ + Removes overall rotation from a trajectory, by aligning to a set of reference + atoms. Default is protein backbone CA. If no CA found, try C11 for lipids + (possibly this isn't standard- shouldn't create problems for the time being). + Next try all carbons, and finally all atoms) + """ +# if 'align_ref' in kwargs: +# uni0=uni.select_atoms(kwargs.get('align_ref')) +# else: +# uni0=uni.select_atoms('name CA') #Standard alignment for proteins +# if uni0.n_atoms==0: +# uni0=uni.select_atoms('name C11') #Not sure about this. Alignment for lipids? +# if uni0.n_atoms==0: +# uni0=uni.select_atoms('type C') #Try for all carbons +# if uni0.n_atoms==0: +# uni0=uni.atoms #Take all atoms +# +# if uni0.n_segments>1: +# "DIfferent segments may be split up after unwrapping. We'll take the segment with the most atoms" +# count=list() +# for s in uni0.segments: +# count.append(s.atoms.n_atoms) +# uni0=uni0.segments[np.argmax(count)].atoms +# +# "Unwrap the segment before this calculation" +## make_whole(uni0) +# +# ref0=uni0.positions-uni0.atoms.center_of_mass() +# +# SZ=np.shape(vec0.get('X')) +# index=vec0['index'] +# "Pre-allocate the direction vector" +# vec={'X':np.zeros(SZ),'Y':np.zeros(SZ),'Z':np.zeros(SZ),'t':vec0.get('t'),'index':index} +# +# nt=vec0['t'].size +# +# +# traj=uni.trajectory +# ts=iter(traj) +# for k,t0 in enumerate(index): +# try: +# traj[t0] #This jumps to time point t in the trajectory +# except: +# "Maybe traj[t] doesn't work, so we skip through the iterable manually" +# if k!=0: +# for _ in range(index[k]-index[k-1]): +# next(ts,None) +# "Ref positions, first unwrapping the reference segment" +## make_whole(uni0) +# pos=uni0.positions-uni0.atoms.center_of_mass() +# +# "Rotation matrix for this time point" +# R,_=rotation_matrix(pos,ref0) +# "Apply the rotation matrix to the input vector" +# vec['X'][k,:]=vec0['X'][k,:]*R[0,0]+vec0['Y'][k,:]*R[0,1]+vec0['Z'][k,:]*R[0,2] +# vec['Y'][k,:]=vec0['X'][k,:]*R[1,0]+vec0['Y'][k,:]*R[1,1]+vec0['Z'][k,:]*R[1,2] +# vec['Z'][k,:]=vec0['X'][k,:]*R[2,0]+vec0['Y'][k,:]*R[2,1]+vec0['Z'][k,:]*R[2,2] +# +## vec['X'][k,:]=vec0['X'][k,:]*R[0,0]+vec0['Y'][k,:]*R[1,0]+vec0['Z'][k,:]*R[2,0] +## vec['Y'][k,:]=vec0['X'][k,:]*R[0,1]+vec0['Y'][k,:]*R[1,1]+vec0['Z'][k,:]*R[2,1] +## vec['Z'][k,:]=vec0['X'][k,:]*R[0,2]+vec0['Y'][k,:]*R[1,2]+vec0['Z'][k,:]*R[2,2] +# "Print out progress" +# if k%int(np.size(index)/100)==0 or k+1==nt: +# printProgressBar(k+1, np.size(index), prefix = 'Aligning:', suffix = 'Complete', length = 50) +# +# return vec + print('Warning: the align function has been removed- please pre-align the trajectory') + print('Use molecule.align(sel) prior to processing') + return vec0 + + + +#%% Progress bar for loading/aligning +def printProgressBar (iteration, total, prefix = '', suffix = '', decimals = 1, length = 100, fill = '█'): + """ + Call in a loop to create terminal progress bar + @params: + iteration - Required : current iteration (Int) + total - Required : total iterations (Int) + prefix - Optional : prefix string (Str) + suffix - Optional : suffix string (Str) + decimals - Optional : positive number of decimals in percent complete (Int) + length - Optional : character length of bar (Int) + fill - Optional : bar fill character (Str) + """ + percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total))) + filledLength = int(length * iteration // total) + bar = fill * filledLength + '-' * (length - filledLength) + print('\r%s |%s| %s%% %s' % (prefix, bar, percent, suffix), end = '\r') + # Print New Line on Complete + if iteration == total: + print() \ No newline at end of file diff --git a/pyDIFRATE/iRED/fast_index.py b/pyDIFRATE/iRED/fast_index.py new file mode 100644 index 0000000..260abbc --- /dev/null +++ b/pyDIFRATE/iRED/fast_index.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Copyright 2021 Albert Smith-Penzel + +This file is part of Frames Theory Archive (FTA). + +FTA is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +FTA is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with FTA. If not, see . + + +Questions, contact me at: +albert.smith-penzel@medizin.uni-leipzig.de + + +Functions for efficicient sampling of an MD trajectory. +(separated from Ct_fast due to circular import issues) + +Created on Fri Aug 23 10:17:11 2019 + +@author: albertsmith +""" + +import numpy as np + +def trunc_t_axis(nt,n=100,nr=10,**kwargs): + """ + Calculates a log-spaced sampling schedule for an MD time axis. Parameters are + nt, the number of time points, n, which is the number of time points to + load in before the first time point is skipped, and finally nr is how many + times to repeat that schedule in the trajectory (so for nr=10, 1/10 of the + way from the beginning of the trajectory, the schedule will start to repeat, + and this will be repeated 10 times) + + """ + + n=np.array(n).astype('int') + nr=np.array(nr).astype('int') + + if n==-1: + index=np.arange(nt) + return index + + "Step size: this log-spacing will lead to the first skip after n time points" + logdt0=np.log10(1.50000001)/n + + index=list() + index.append(0) + dt=0 + while index[-1]= nt, eliminate repeats, and sort the index" + "(repeats in above line lead to unsorted axis, unique gets rid of repeats and sorts)" + index=index[index. + + +Questions, contact me at: +albert.smith-penzel@medizin.uni-leipzig.de + + +Created on Mon May 6 13:47:45 2019 + +@author: albertsmith +""" + +""" +We create a number of functions for performing the iRED analysis. These depend +on the input of one or two MDanalysis objects, that can be used to specify the +bond vector direction. +""" + +import numpy as np +import multiprocessing as mp +#import os +import MDAnalysis as md +from MDAnalysis.analysis import align +#os.chdir('../data') +from pyDIFRATE.data.data_class import data +#os.chdir('../iRED') + + +#%% Run the full iRED analysis +def iRED_full(sel1,sel2,rank,**kwargs): + if 'alignCA' not in kwargs: + kwargs['alignCA']='n' + "We don't need this functionality for the iRED analysis, although user can still force it" + + vec=get_vec(sel1,sel2,**kwargs) + + if 'align' in kwargs and kwargs.get('align').lower()[0]=='y': + vec0=vec + vec=align_vec(vec) + + + if not('refvecs' in kwargs and kwargs.get('refVecs').lower()[0]=='y'): + n_added_vecs=vec0.get('X').shape[0] + for k in vec.keys(): + if k!='t': + vec[k]=np.concatenate((vec.get(k),vec0.get(k)),axis=0) + aligned=True + else: + aligned=False + + if 'refVecs' in kwargs and kwargs.get('refVecs').lower()[0]=='y': + """If we align the vectors, we need reference vectors as well + This allows us to properly separate overall motion" + Otherwise, overall motion is gathered into 1 eigenvector, instead of 2*rank+1 vectors + """ + sel01=sel1.universe.select_atoms('protein and name CA and bonded name N') + sel02=sel1.universe.select_atoms('protein and name N and bonded name CA') + + if sel01.n_atoms==0: + sel01=sel1 + sel02=sel2 + + vec0=get_vec(sel01,sel02,**kwargs) + n_added_vecs=vec0.get('X').shape[0] + + for k in vec.keys(): + if k!='t': + vec[k]=np.concatenate((vec.get(k),vec0.get(k)),axis=0) + else: + n_added_vecs=0 + + M=Mmat(vec,rank) + Yl=Ylm(vec,rank) + aqt=Aqt(Yl,M) + + "Default is to use parallel processing" + if 'parallel' in kwargs and kwargs.get('parallel').lower()[0]=='n': + cqt=Cqt(aqt) + else: + cqt=Cqt_par(aqt,**kwargs) + + ct=Ct(cqt) + ctinf=CtInf(aqt) + dct=DelCt(ct,ctinf) + + if 'dt' in kwargs: + "mdanalysis seems to import the wrong time step in some instances." + "This can be corrected by providing dt" + dt=kwargs.get('dt') + nt=np.size(vec.get('t')) + t=np.arange(0,dt*nt,dt) + vec['t']=t + + ired={'rank':rank,'M':M.get('M'),'lambda':M.get('lambda'),'m':M.get('m'),\ + 't':vec.get('t'),'Ct':ct.get('Ct'),'DelCt':dct.get('DelCt'),'CtInf':ctinf,\ + 'Aligned':aligned,'n_added_vecs':n_added_vecs} + + + return ired +#%% Create a data object from the iRED results (also runs the analysis) +def iRED2data(molecule,rank,**kwargs): + """Input a molecule object with selections already made, to get a full iRED + analysis, moved into a data object + """ + + if molecule.sel1in is None: + in1=np.arange(molecule.sel1.n_atoms) + else: + in1=molecule.sel1in + if molecule.sel2in is None: + in2=np.arange(molecule.sel2.n_atoms) + else: + in2=molecule.sel2in + + ired=iRED_full(molecule.sel1,molecule.sel2,rank,in1=in1,in2=in2,**kwargs) + + + Ctdata=data(iRED=ired,molecule=molecule) + Ctdata.sens.molecule=molecule + Ctdata.sens.molecule.set_selection() + Ctdata.detect.molecule=Ctdata.sens.molecule + + return Ctdata + +#%% Load in vectors for the iRED analysis +def get_vec(sel1,sel2,**kwargs): + "Gets vectors from an MDanalysis selection, returns X,Y,Z in dictionary" + a=sel1.universe + b=sel2.universe + + if 'in1' in kwargs: + in1=kwargs.get('in1') + else: + "Just changed this line. Could be wrong!!!" + in1=np.arange(sel1.n_atoms) + + if 'in2' in kwargs: + in2=kwargs.get('in2') + else: + "Also changed this line. " + in2=np.arange(sel2.n_atoms) + + + if a!=b: + print('sel1 and sel2 must be generated from the same MDAnalysis universe!') + return + + if sel1.n_atoms!=sel2.n_atoms and np.size(in1)!=np.size(in2): + print('sel1 and sel2 or indices sel1in and sel2in must have the same number of atoms') + return + + + if 'tstep'in kwargs: + tstep=kwargs.get('tstep') + print('Take every {0}th frame'.format(tstep)) + else: + tstep=1 + + + nt=int((a.trajectory.n_frames-1)/tstep)+1 + na=np.size(in1) + + X=np.zeros([na,nt]) + Y=np.zeros([na,nt]) + Z=np.zeros([na,nt]) + + k=0 + + + try: + for k in range(0,nt): + a.trajectory[k*tstep] + pos=sel1.positions[in1]-sel2.positions[in2] + + X0=pos[:,0] + Y0=pos[:,1] + Z0=pos[:,2] + + length=np.sqrt(X0**2+Y0**2+Z0**2) + + X[:,k]=np.divide(X0,length) + Y[:,k]=np.divide(Y0,length) + Z[:,k]=np.divide(Z0,length) + if k%int(nt/100)==0 or k+1==nt: + printProgressBar(k+1, nt, prefix = 'Loading:', suffix = 'Complete', length = 50) + except: + ts0=iter(a.trajectory) + + for ts in ts0: + for _ in range(tstep-1): + next(ts0,None) + pos=sel1.positions[in1]-sel2.positions[in2] + X0=pos[:,0] + Y0=pos[:,1] + Z0=pos[:,2] + + length=np.sqrt(X0**2+Y0**2+Z0**2) + + X[:,k]=np.divide(X0,length) + Y[:,k]=np.divide(Y0,length) + Z[:,k]=np.divide(Z0,length) + + k=k+1 + if k%int(nt/100)==0 or k+1==nt: + printProgressBar(k+1, nt, prefix = 'Loading:', suffix = 'Complete', length = 50) + dt=a.trajectory.dt*tstep + t=np.arange(0,nt*dt,dt) + + vec={'X':X,'Y':Y,'Z':Z,'t':t} + + if not('alignCA' in kwargs and kwargs.get('alignCA').lower()[0]=='n'): + "Default is to always align the CA" + vec=alignCA(vec,a,**kwargs) + + + + + return vec + +def alignCA(vec0,uni,tstep=1,**kwargs): + "reference CA positions" + + if 'align_ref' in kwargs: + uni0=uni.select_atoms(kwargs.get('align_ref')) + else: + uni0=uni.select_atoms('name CA') + + if uni0.n_atoms==0: + print('No atoms found for alignment, specify atom for alignment with align_ref') + return vec0 + + ref0=uni0.positions-uni0.atoms.center_of_mass() + + SZ=np.shape(vec0.get('X')) + "Pre-allocate the direction vector" + vec={'X':np.zeros(SZ),'Y':np.zeros(SZ),'Z':np.zeros(SZ),'t':vec0.get('t')} + + nt=vec0['t'].size + + for k in range(0,nt): + try: + uni.trajectory[k*tstep] + except: + if k!=0: + for _ in range(0,tstep): + uni.next() + "CA positions" + pos=uni0.positions-uni0.atoms.center_of_mass() + + "Rotation matrix for this time point" + R,_=align.rotation_matrix(pos,ref0) + "Apply rotation to vectors" + vec['X'][:,k]=vec0['X'][:,k]*R[0,0]+vec0['Y'][:,k]*R[0,1]+vec0['Z'][:,k]*R[0,2] + vec['Y'][:,k]=vec0['X'][:,k]*R[1,0]+vec0['Y'][:,k]*R[1,1]+vec0['Z'][:,k]*R[1,2] + vec['Z'][:,k]=vec0['X'][:,k]*R[2,0]+vec0['Y'][:,k]*R[2,1]+vec0['Z'][:,k]*R[2,2] + + if k%int(nt/100)==0 or k+1==nt: + printProgressBar(k+1, nt, prefix = 'Aligning positions:', suffix = 'Complete', length = 50) + + return vec + + +#%% Make all vectors point in the same directon (remove influence of orientation on analysis) +def align_vec(vec0): + "Aligns the mean direction of a set of vectors along the z-axis" + + nt=vec0.get('X').shape[1] + + "Mean direction of the vectors" + X0=vec0.get('X').mean(axis=1) + Y0=vec0.get('Y').mean(axis=1) + Z0=vec0.get('Z').mean(axis=1) + + length=np.sqrt(X0**2+Y0**2+Z0**2) + X0=np.divide(X0,length) + Y0=np.divide(Y0,length) + Z0=np.divide(Z0,length) + + "Angle away from the z-axis" + beta=np.arccos(Z0) + + "Angle of rotation axis away from y-axis" + "Rotation axis is at (-Y0,X0): cross product of X0,Y0,Z0 and (0,0,1)" + theta=np.arctan2(-Y0,X0) + + xx=np.cos(-theta)*np.cos(-beta)*np.cos(theta)-np.sin(-theta)*np.sin(theta) + yx=-np.cos(theta)*np.sin(-theta)-np.cos(-theta)*np.cos(-beta)*np.sin(theta) + zx=np.cos(-theta)*np.sin(-beta) + + X=np.repeat(np.transpose([xx]),nt,axis=1)*vec0.get('X')+\ + np.repeat(np.transpose([yx]),nt,axis=1)*vec0.get('Y')+\ + np.repeat(np.transpose([zx]),nt,axis=1)*vec0.get('Z') + + xy=np.cos(-theta)*np.sin(theta)+np.cos(-beta)*np.cos(theta)*np.sin(-theta) + yy=np.cos(-theta)*np.cos(theta)-np.cos(-beta)*np.sin(-theta)*np.sin(theta) + zy=np.sin(-theta)*np.sin(-beta) + + Y=np.repeat(np.transpose([xy]),nt,axis=1)*vec0.get('X')+\ + np.repeat(np.transpose([yy]),nt,axis=1)*vec0.get('Y')+\ + np.repeat(np.transpose([zy]),nt,axis=1)*vec0.get('Z') + + xz=-np.cos(theta)*np.sin(-beta) + yz=np.sin(-beta)*np.sin(theta) + zz=np.cos(-beta) + + Z=np.repeat(np.transpose([xz]),nt,axis=1)*vec0.get('X')+\ + np.repeat(np.transpose([yz]),nt,axis=1)*vec0.get('Y')+\ + np.repeat(np.transpose([zz]),nt,axis=1)*vec0.get('Z') + + +# "Some code here to make a specific pair of vectors anticorrelated" +# "DELETE ME" +# print('Making first and second bond anti-correlated') +# X[139,:]=-X[140,:] +# Y[139,:]=-Y[140,:] +# Z[139,:]=Z[140,:] +# + vec={'X':X,'Y':Y,'Z':Z,'t':vec0.get('t')} + + return vec + +def Mmat(vec,rank): + + nb=vec.get('X').shape[0] + + M=np.eye(nb) + + for k in range(0,nb-1): + x0=np.repeat([vec.get('X')[k,:]],nb-k-1,axis=0) + y0=np.repeat([vec.get('Y')[k,:]],nb-k-1,axis=0) + z0=np.repeat([vec.get('Z')[k,:]],nb-k-1,axis=0) + + dot=x0*vec.get('X')[k+1:,:]+y0*vec.get('Y')[k+1:,:]+z0*vec.get('Z')[k+1:,:] + + if rank==1: + val=np.mean(dot,axis=1) + elif rank==2: + val=np.mean((3*dot**2-1)/2,axis=1) + + M[k,k+1:]=val + M[k+1:,k]=val + + a=np.linalg.eigh(M) + return {'M':M,'lambda':a[0],'m':a[1],'rank':rank} + +def Mt(vec,rank,tstep): + nb=vec.get('X').shape[0] + + M=np.eye(nb) + for k in range(0,nb): + x0=np.repeat([vec.get('X')[k,tstep:]],nb,axis=0) + y0=np.repeat([vec.get('Y')[k,tstep:]],nb,axis=0) + z0=np.repeat([vec.get('Z')[k,tstep:]],nb,axis=0) + + if tstep!=0: + dot=x0*vec.get('X')[:,0:-tstep]+y0*vec.get('Y')[:,0:-tstep]+z0*vec.get('Z')[:,0:-tstep] + else: + dot=x0*vec.get('X')+y0*vec.get('Y')+z0*vec.get('Z') + + if rank==1: + val=np.mean(dot,axis=2) + elif rank==2: + val=np.mean((3*dot**2-1)/2,axis=1) + + M[k,:]=val + + return M + +def Ylm(vec,rank): + + X=vec.get('X') + Y=vec.get('Y') + Z=vec.get('Z') + + + Yl=dict() + if rank==1: + c=np.sqrt(3/(2*np.pi)) + Yl['1,0']=c/np.sqrt(2)*Z + a=(X+Y*1j) + b=np.sqrt(X**2+Y**2) + Yl['1,+1']=-c/2*b*a + Yl['1,-1']=c/2*b*a.conjugate() + elif rank==2: + c=np.sqrt(15/(32*np.pi)) + Yl['2,0']=c*np.sqrt(2/3)*(3*Z**2-1) + a=(X+Y*1j) + b=np.sqrt(X**2+Y**2) + Yl['2,+1']=2*c*Z*b*a + Yl['2,-1']=2*c*Z*b*a.conjugate() + a=np.exp(2*np.log(X+Y*1j)) + b=b**2 + Yl['2,+2']=c*b*a + Yl['2,-2']=c*b*a.conjugate() + + Yl['t']=vec.get('t') + + return Yl + +def Aqt(Yl,M): + "Project the Ylm onto the eigenmodes" + aqt=dict() + for k in Yl.keys(): + if k!='t': + aqt[k]=np.dot(M.get('m').T,Yl.get(k)) + + aqt['t']=Yl.get('t') + + return aqt + + +def Cqt(aqt): + "Get correlation functions for each spherical component" + cqt=dict() + for k in aqt.keys(): + if k!='t': + "Loop over each component" + nt=aqt.get(k).shape[1] + nb=aqt.get(k).shape[0] + for m in range(0,nt): + "Correlate the mth time point with all other time points" + a0=np.repeat(np.conj(np.transpose([aqt.get(k)[:,m]])),nt-m,axis=1) + if m==0: + c0=a0*aqt.get(k)+np.zeros([nb,nt])*1j #Make c0 complex + else: + c0[:,0:-m]+=a0*aqt.get(k)[:,m:] + + if m%int(nt/100)==0 or m+1==nt: + printProgressBar(m+1, nt, prefix = 'Ct({}):'.format(k), suffix = 'Complete', length = 50) + print() + "Divide to normalize for more time points at beginning than end" + cqt[k]=c0/np.repeat([np.arange(nt,0,-1)],nb,axis=0) + + + + cqt['t']=aqt['t'] + + return cqt + +def Cqt_par(aqt,**kwargs): + "Performs same operation as Cqt, but using parallel processing" + X=list() + + nc=mp.cpu_count() + if'n_cores' in kwargs: + nc=np.min([kwargs.get('n_cores'),nc]) + + + for k in range(0,nc): + X.append((aqt,k,nc)) + + + with mp.Pool(processes=nc) as pool: + X=pool.map(Cqt_parfun,X) + + cqt=dict() + + for k in aqt.keys(): + if k!='t': + nt=aqt.get(k).shape[1] + nb=aqt.get(k).shape[0] + cqt[k]=np.zeros([nb,nt])+0*1j + + for cqt0 in X: + for k in cqt0.keys(): + cqt[k]+=cqt0[k] + + for k in cqt.keys(): + cqt[k]=cqt[k]/np.repeat([np.arange(nt,0,-1)],nb,axis=0) + + cqt['t']=aqt['t'] + + return cqt + +def Cqt_parfun(X): + "Function to be run by Cqt_par in parallel" + aqt=X[0] + index=X[1] + nc=X[2] + + cqt0=dict() + for k in aqt.keys(): + if k!='t': + "Loop over each component" + nt=aqt.get(k).shape[1] + nb=aqt.get(k).shape[0] + c0=np.zeros([nb,nt])+0*1j + for l,m in enumerate(range(index,nt,nc)): + "Correlate the mth time point with all other time points" + a0=np.repeat(np.conj(np.transpose([aqt.get(k)[:,m]])),nt-m,axis=1) + if m==0: + c0=a0*aqt.get(k)+0*1j #Make c0 complex + else: + c0[:,0:-m]+=a0*aqt.get(k)[:,m:] + + cqt0[k]=c0 + + return cqt0 + +def Ct(cqt): + "Sum up all components to get the overall correlation function" + ct0=None + for k in cqt.keys(): + if k!='t': + if np.shape(ct0)==(): + ct0=cqt.get(k) + else: + ct0+=cqt.get(k) + + ct={'t':cqt.get('t'),'Ct':ct0.real} + + return ct + +def CtInf(aqt): + "Get final value of correlation function" + ctinf=None + for k in aqt.keys(): + if k!='t': + a=aqt.get(k).mean(axis=1) + if np.shape(ctinf)==(): + ctinf=np.real(a*a.conj()) + else: + ctinf+=np.real(a*a.conj()) + + return ctinf + +def DelCt(ct,ctinf): + "Get a normalized version of the correlation function (starts at 1, decays to 0)" + t=ct.get('t') + ct=ct.get('Ct') + nt=ct.shape[1] + ctinf=np.repeat(np.transpose([ctinf]),nt,axis=1) + ct0=np.repeat(np.transpose([ct[:,0]]),nt,axis=1) + delCt={'t':t,'DelCt':(ct-ctinf)/(ct0-ctinf)} + + return delCt + + +def printProgressBar (iteration, total, prefix = '', suffix = '', decimals = 1, length = 100, fill = '█'): + """ + Call in a loop to create terminal progress bar + @params: + iteration - Required : current iteration (Int) + total - Required : total iterations (Int) + prefix - Optional : prefix string (Str) + suffix - Optional : suffix string (Str) + decimals - Optional : positive number of decimals in percent complete (Int) + length - Optional : character length of bar (Int) + fill - Optional : bar fill character (Str) + """ + percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total))) + filledLength = int(length * iteration // total) + bar = fill * filledLength + '-' * (length - filledLength) + print('\r%s |%s| %s%% %s' % (prefix, bar, percent, suffix), end = '\r') + # Print New Line on Complete + if iteration == total: + print() \ No newline at end of file diff --git a/pyDIFRATE/iRED/iRED_fast.py b/pyDIFRATE/iRED/iRED_fast.py new file mode 100644 index 0000000..85cd841 --- /dev/null +++ b/pyDIFRATE/iRED/iRED_fast.py @@ -0,0 +1,545 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Copyright 2021 Albert Smith-Penzel + +This file is part of Frames Theory Archive (FTA). + +FTA is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +FTA is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with FTA. If not, see . + + +Questions, contact me at: +albert.smith-penzel@medizin.uni-leipzig.de + +Created on Wed Aug 28 10:06:35 2019 + +@author: albertsmith +""" + +import numpy as np +import multiprocessing as mp +#import os +#os.chdir('../data') +from pyDIFRATE.data.data_class import data +#os.chdir('../iRED') +from MDAnalysis.analysis.align import rotation_matrix +from psutil import virtual_memory +from pyDIFRATE.iRED.fast_funs import S2calc,Ct,get_trunc_vec,align_mean +from pyDIFRATE.iRED.fast_index import trunc_t_axis,get_count +from pyDIFRATE.iRED.par_iRED import par_class as ipc +from time import time + +#%% Run the full iRED analysis +def iRED_full(mol,rank=2,n=100,nr=10,align_iRED=False,refVecs=None,**kwargs): + """ + Runs the full iRED analysis for a given selection (or set of vec_special functions) + Arguments are the rank (0 or 1), the sampling (n,nr), whether to align the + vectors (align_iRED='y'/'n', and refVecs, which may be a dict containing + a vector, created by DIFRATE, a tuple of strings selecting two sets of atoms + defining bonds), or simply 'y', which will default to using the N-CA bonds in + a protein. + + ired=iRED_full(mol,rank=2,n=100,nr=10,align_iRED='n',refVecs='n',**kwargs) + + """ + + if 'nt' in kwargs: + nt=np.min([mol.mda_object.trajectory.n_frames,kwargs.get('nt')]) + else: + nt=mol.mda_object.trajectory.n_frames + index=trunc_t_axis(nt,n,nr) + vec=get_trunc_vec(mol,index,**kwargs) + + if align_iRED: + if refVecs is not None: + vec0=refVecs + if isinstance(vec0,dict): + pass + elif len(vec0)==2 and isinstance(vec0[0],str) and isinstance(vec0[1],str): + mol1=mol.copy() + mol1.select_atoms(sel1=vec0[0],sel2=vec0[1]) + vec0=get_trunc_vec(mol1,index) + elif isinstance(vec0,str) and vec0.lower()[0]=='y': + s1='protein and name CA and around 1.6 N' + s2='protein and name N and around 1.6 CA' + mol1=mol.copy() + mol1.select_atoms(sel1=s1,sel2=s2) + vec0=get_trunc_vec(mol1,index) + else: + print('Warning: refVecs entry not valid, using input vectors as reference (without aligning)') + vec0=vec + else: + vec0=vec + else: + vec0=None + + ired=vec2iRED(vec,rank,align_iRED,refVecs=vec0,molecule=mol,**kwargs) + + return ired + +#%% Process with iRED from a vector +def vec2iRED(vec,rank=2,align_iRED=False,align_type='ZDir',refVecs=None,**kwargs): + """ + Takes a vector object and returns the iRED object (vec contains X,Y,Z,t, and + usually an index for sparse sampling of the time axis) + + If align_iRED is set to True, then by default, vec will be used aligned and + unaligned for a reference vector. The reference vectors (refVecs) may be + used to replace the unaligned input vector + + iRED=vec2iRED(vec,rank=2,align_iRED=False,**kwargs) + """ + + if refVecs is not None: + vec0=refVecs + n_added_vecs=vec0.get('X').shape[1] + elif align_iRED: + vec0=vec.copy() + n_added_vecs=vec0.get('X').shape[1] + else: + vec0=None + n_added_vecs=0 + + + if align_iRED: + vec=align_mean(vec,rank,align_type) + aligned=True + else: + aligned=False + + if vec0 is not None: + for k in ['X','Y','Z']: + vec[k]=np.concatenate((vec[k],vec0[k]),axis=1) + + M=Mmat(vec,rank) + Yl=Ylm(vec,rank) + aqt=Aqt(Yl,M) + + "parallel calculation of correlation functions" + ct=Cqt(aqt) + ctinf=CtInf(aqt) + dct=DelCt(ct,ctinf) + ired={'rank':rank,'M':M['M'],'lambda':M['lambda'],'m':M['m'],'t':ct['t'],\ + 'N':ct['N'],'index':ct['index'],'DelCt':dct['DelCt'].T,'CtInf':ctinf,\ + 'Aligned':aligned,'n_added_vecs':n_added_vecs} + + Ctdata=data(iRED=ired,**kwargs) +# Ctdata.sens.molecule=molecule +# Ctdata.detect.molecule=Ctdata.sens.molecule + + return Ctdata + +#%% Generate a data object with iRED results +def iRED2data(molecule,rank=2,**kwargs): + """Input a molecule object with selections already made, to get a full iRED + analysis, moved into a data object + """ + + + """ + Not sure what happened here. Looks like iRED_full performs all steps of this + calculation. Should get rid of one name or the other... + """ + Ctdata=iRED_full(molecule,rank,**kwargs) +# ired=iRED_full(molecule,**kwargs) + +# Ctdata=data(iRED=ired,molecule=molecule,**kwargs) +# Ctdata.sens.molecule=molecule +## Ctdata.sens.molecule.set_selection() +# Ctdata.detect.molecule=Ctdata.sens.molecule +# + return Ctdata +#%% Calculate the iRED M matrix +def Mmat(vec,rank=2): + """Calculates the iRED M-matrix, yielding correlation of vectors at time t=0 + M = Mmat(vec,rank=2) + M is returned as dictionary object, including the matrix itself, and also + the + """ + + X=vec['X'].T + Y=vec['Y'].T + Z=vec['Z'].T + + nb=X.shape[0] + + M=np.eye(nb) + + for k in range(0,nb-1): + "These are the x,y,z positions for one bond" + x0=np.repeat([X[k,:]],nb-k-1,axis=0) + y0=np.repeat([Y[k,:]],nb-k-1,axis=0) + z0=np.repeat([Z[k,:]],nb-k-1,axis=0) + + "We correlate those positions with all bonds having a larger index (symmetry of matrix allows this)" + dot=x0*X[k+1:,:]+y0*Y[k+1:,:]+z0*Z[k+1:,:] + + if rank==1: + val=np.mean(dot,axis=1) + elif rank==2: + val=np.mean((3*dot**2-1)/2,axis=1) + + M[k,k+1:]=val + M[k+1:,k]=val + + Lambda,m=np.linalg.eigh(M) + return {'M':M,'lambda':Lambda,'m':m,'rank':rank} + +def Mlagged(vec,lag,rank=2): + """Calculates the iRED M-matrix, with a lag time, which is provided by an + index or range of indices (corresponding to the separation in time points) + M = Mlagged(vec,rank=2,lag) + + lag=10 + or + lag=[10,20] + + The first instance calculates M using time points separated by exactly the + lag index. The second takes all time points separated by the first argument, + up to one less the last argument (here, separated by 10 up to 19) + + """ + + X=vec['X'].T + Y=vec['Y'].T + Z=vec['Z'].T + + index0=vec['index'] + + if np.size(lag)==1: + lag=np.atleast_1d(lag) + elif np.size(lag)==2: + lag=np.arange(lag[0],lag[1]) + + "Calculate indices for pairing time points separated within the range given in lag" + index1=np.zeros(0,dtype=int) + index2=np.zeros(0,dtype=int) + for k in lag: + i=np.isin(index0+k,index0) + j=np.isin(index0,index0+k) + index1=np.concatenate((index1,np.where(i)[0])) + index2=np.concatenate((index2,np.where(j)[0])) + + nb=X.shape[0] + M=np.eye(nb) + + for k in range(0,nb): + "We correlate all times that have a second time within the lag range" + x0=np.repeat([X[k,index1]],nb,axis=0) + y0=np.repeat([Y[k,index1]],nb,axis=0) + z0=np.repeat([Z[k,index1]],nb,axis=0) + + dot=x0*X[:,index2]+y0*Y[:,index2]+z0*Z[:,index2] + + if rank==1: + val=np.mean(dot,axis=1) + elif rank==2: + val=np.mean((3*dot**2-1)/2,axis=1) + + M[k,:]=val + + return M +#%% Estimates cross-correlation of the eigenvectors of the M matrix +def Mrange(vec,rank,i0,i1): + """Estimates the Mmatrix for frames offset by a minimum distance of i0 and + a maximum distance of i1-1. All M-matrices are simply added together + M=Mrange(vec,rank,i0,i1) + """ + pass + +#%% Calculates the spherical tensor components for the individual bonds +def Ylm(vec,rank=2): + """ + Calculates the values of the rank-2 spherical components of a set of vectors + Yl=Ylm(vec,rank) + """ + X=vec.get('X') + Y=vec.get('Y') + Z=vec.get('Z') + + + Yl=dict() + if rank==1: + c=np.sqrt(3/(2*np.pi)) + Yl['1,0']=c/np.sqrt(2)*Z + a=(X+Y*1j) +# b=np.sqrt(X**2+Y**2) +# Yl['1,+1']=-c/2*b*a #a was supposed to equal exp(i*phi), but wasn't normalized (should be normalized by b) +# Yl['1,-1']=c/2*b*a.conjugate() #Correction below + Yl['1,+1']=-c/2*a + Yl['1,-1']=c/2*a.conjugate() + elif rank==2: + c=np.sqrt(15/(32*np.pi)) + Yl['2,0']=c*np.sqrt(2/3)*(3*Z**2-1) + a=(X+Y*1j) +# b=np.sqrt(X**2+Y**2) +# b2=b**2 +# b[b==0]=1 +# Yl['2,+1']=2*c*Z*b*a +# Yl['2,-1']=2*c*Z*b*a.conjugate() + Yl['2,+1']=2*c*Z*a + Yl['2,-1']=2*c*Z*a.conjugate() +# a=np.exp(2*np.log(X+Y*1j)) +# b=b**2 +# Yl['2,+2']=c*b*a +# Yl['2,-2']=c*b*a.conjugate() + a2=a**2 +# a2[a!=0]=np.exp(2*np.log(a[a!=0]/b[a!=0])) + Yl['2,+2']=c*a2 + Yl['2,-2']=c*a2.conjugate() + + Yl['t']=vec['t'] + Yl['index']=vec['index'] + return Yl + +def Aqt(Yl,M): + """ + Project the Ylm onto the eigenmodes + aqt=Aqt(Yl,M) + """ + aqt=dict() + for k,y in Yl.items(): + if k!='t' and k!='index': + aqt[k]=np.dot(M['m'].T,y.T).T + else: + aqt[k]=y + + return aqt + +def Cqt(aqt,**kwargs): + + "Get number of cores" + if 'parallel' in kwargs: + p=kwargs.get('parallel') + if isinstance(p,str) and p.lower()[0]=='n': + nc=1 + elif isinstance(p,int): + nc=p if p>0 else 1 #Check the # cores is bigger than 0 + else: #Default use parallel processing + nc=mp.cpu_count() #Use all available cores + else: + nc=mp.cpu_count() + + ref_num,v0=ipc.store_vecs(aqt,nc) + try: + t0=time() + with mp.Pool(processes=nc) as pool: + ct=pool.map(ipc.Ct,v0) +# print('t={0}'.format(time()-t0)) + ct=ipc.returnCt(ref_num,ct) + except: + print('Error in calculating correlation functions') + finally: + ipc.clear_data(ref_num) + + index=aqt['index'] + N=get_count(index) + dt=np.diff(aqt['t'][0:2])/np.diff(index[0:2]) + t=np.linspace(0,dt.squeeze()*np.max(index),index[-1]+1) + i=N!=0 + N=N[i] + t=t[i] + ct=dict({'Ct':ct,'t':t,'index':index,'N':N}) + + return ct + +def Cij_t(aqt,i,j,**kwargs): + """ + Calculates the cross correlation between modes in the iRED analysis, indexed + by i and j + (this function should later be improved using parallel processing for multiple + pairs of modes. Currently supports only one pair) + c_ij=Cij_t(aqt,i,j,**kwargs) + """ + + index=aqt['index'] + n=np.size(index) + + + for p,(name,a) in enumerate(aqt.items()): + if p==0: + ct=np.zeros(index[-1]+1)+0j + if name!='index' and name!='t': + for k in range(n): + ct[index[k:]-index[k]]+=np.multiply(a[k:,i],a[k,j].conjugate()) + N0=get_count(index) + nz=N0!=0 + N=N0[nz] + dt=np.diff(aqt['t'][0:2])/np.diff(index[0:2]) + t=np.linspace(0,dt.squeeze()*np.max(index),index[-1]+1) + t=t[nz] + ct=np.divide(ct[nz].real,N) + + ct=dict({'Ct':ct,'t':t,'index':index,'N':N}) + + return ct + +#%% Estimate the correlation function at t=infinity +def CtInf(aqt): + "Get final value of correlation function" + ctinf=None + for k in aqt.keys(): + if k!='t' and k!='index': + a=aqt.get(k).mean(axis=0) + if np.shape(ctinf)==(): + ctinf=np.real(a*a.conj()) + else: + ctinf+=np.real(a*a.conj()) + + return ctinf + +#%% Estimate the correlation function at t=infinity +def Cij_Inf(aqt,i,j): + "Get final value of correlation function" + ctinf=None + for k in aqt.keys(): + if k!='t' and k!='index': + a=aqt.get(k)[:,i].mean() + b=aqt.get(k)[:,j].mean() + if np.shape(ctinf)==(): + ctinf=np.real(a*b.conj()) + else: + ctinf+=np.real(a*b.conj()) + + return ctinf + +#%% Returns normalized correlation function +def DelCt(ct,ctinf): + "Get a normalized version of the correlation function (starts at 1, decays to 0)" + t=ct.get('t') + ct=ct.get('Ct') + nt=ct.shape[0] + ctinf=np.repeat([ctinf],nt,axis=0) + ct0=np.repeat([ct[0,:]],nt,axis=0) + delCt={'t':t,'DelCt':(ct-ctinf)/(ct0-ctinf)} + + return delCt + + + +def iRED2dist(bond,data,nbins=None,all_modes=False,Type='avg'): + """ + Estimates a distribution of correlation times for a given bond in the iRED + analysis. We calculate a correlation time for each mode (we fit detector + responses to a single mode). Then, we calculate the amplitude of each mode + on the selected bond. Finally, we calculate a histogram from the results. + + z,A=iRED2dist(bond,fit,nbins=None) + + Note, that fit needs to be the detector fit of the iRED modes, not the final + fit (resulting from fit.iRED2rho()) + """ + + "Get the best-fit correlation time for each mode" +# z0,_,_=fit2tc(data.R,data.sens.rhoz(),data.sens.z(),data.R_std) + if Type[0].lower()=='a': + z0=avgz(data.R,data.sens.z(),data.sens.rhoz()) + else: + z0,_,_=fit2tc(data.R,data.sens.rhoz(),data.sens.z()) + + if bond in data.label: + i=np.argwhere(bond==data.label).squeeze() + else: + i=bond + + m0=data.ired['m'].T + l0=data.ired['lambda'] + + A0=np.zeros(z0.shape) + + for k,(l,m) in enumerate(zip(l0,m0)): + A0[k]=m[i]**2*l + + + if nbins is None: + nbins=np.min([data.sens.z().size,z0.size/10]) + + #Axis for histogram + z=np.linspace(data.sens.z()[0],data.sens.z()[-1],nbins) + + i=np.digitize(z0,z)-1 + + if all_modes: + ne=-A0.size + else: + ne=data.ired['rank']*2+1 + + A=np.zeros(z.shape) + for k,a in enumerate(A0[:-ne]): + A[i[k]]+=a + + return z,A + +def avgz(R,z,rhoz): + """ + Estimates an "average" z for a set of detector responses, determined simply + by the weighted average of the z0 for each detector (weighted by the + detector responses). Note that we use max-normalized detectors for this + calculation + """ + nd,nz=np.shape(rhoz) + z0=np.sum(np.repeat([z],nd,axis=0)*rhoz,axis=1)/np.sum(rhoz,axis=1) + nb=R.shape[0] + norm=np.max(rhoz,axis=1) + + R=np.divide(R,np.repeat([norm],nb,axis=0)) + + z=np.divide(np.multiply(R,np.repeat([z0],nb,axis=0)).sum(axis=1),R.sum(axis=1)) + + return z + +def fit2tc(R,rhoz,tc,R_std=None): + """ + Estimates a single correlation time for a set of detector responses, based + on the sensitivities of thoses detectors (in principle, may be applied to + any sensitivity object, but with better performance for optimized detectors) + + tc,A=fit2tc(R,sens) + + R may be a 2D matrix, in which case each row is a separate set of detector + responses (and will be analyzed separately) + """ + + R=np.atleast_2d(R) #Make sure R is a 2D matrix + if R_std is None: + R_std=np.ones(R.shape) + + + nd,nz=rhoz.shape #Number of detectors, correlation times + nb=R.shape[0] #Number of bonds + + err=list() #Storage for error + A=list() #Storage for fit amplitudes + + + for X in rhoz.T: + R0=np.divide(R,R_std) + rho=np.divide(np.repeat([X],nb,axis=0),R_std) + A.append(np.divide(np.mean(np.multiply(rho,R0),axis=1),np.mean(rho**2,axis=1))) + err.append(np.power(R0-rho*np.repeat(np.transpose([A[-1]]),nd,axis=1),2).sum(axis=1)) + + A0=np.array(A) + err=np.array(err) + + i=err.argmin(axis=0) + tc=np.array(tc[i]) + + A=np.zeros(nb) + Rc=np.zeros(R.shape) + + for k in range(nb): + A[k]=A0[i[k],k] + Rc[k]=A[k]*rhoz[:,i[k]] + + return tc,A,Rc \ No newline at end of file diff --git a/pyDIFRATE/iRED/parCt.py b/pyDIFRATE/iRED/parCt.py new file mode 100644 index 0000000..d753af9 --- /dev/null +++ b/pyDIFRATE/iRED/parCt.py @@ -0,0 +1,183 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Copyright 2021 Albert Smith-Penzel + +This file is part of Frames Theory Archive (FTA). + +FTA is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +FTA is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with FTA. If not, see . + + +Questions, contact me at: +albert.smith-penzel@medizin.uni-leipzig.de + +Created on Thu Aug 29 12:36:42 2019 + +@author: albertsmith +""" +import numpy as np +#import traceback +from pyDIFRATE.iRED.fast_index import get_count + +#%% Parallel class for fast parallel calculation +class par_class(): + """ + We create this class to perform parallel calculation of correlation functions. + The variables required for calculation are stored as *class* variables, which + are dictionaries. Because they are class variables, these are available to + any instance of par_class (that is, if multiple par_class *objects* are + created by different processes, they will all have access to these dicts. To + make problems unlikely, we assign the dictionary keys using a random number. + That number is passed out to the parent process, and then used to find + the correct data later. In principle, although different processes are + looking into the same dictionaries, they should avoid using/editing the same + data) + """ + X=dict() + Y=dict() + Z=dict() + + ct=dict() + + keys=dict() + nb=dict() + nk=dict() + index=dict() + + @classmethod + def Ct(cls,v): + i,ref_num=v + index=cls.index[ref_num] + + "Use FT if more than 10% of data points are populated" + if index.size/(index[-1]+1)>.1: + return cls.CtFT(v) + + X=cls.X[i] + Y=cls.Y[i] + Z=cls.Z[i] + n=np.size(index) + + ct=np.zeros([index[-1]+1,np.shape(X)[1]]) + + for k in range(n): + ct[index[k:]-index[k]]+=((np.multiply(X[k:],X[k])+np.multiply(Y[k:],Y[k])\ + +np.multiply(Z[k:],Z[k]))**2) + + "Store results of correlation function calculation" +# cls.storeCt(i,ct) +# cls.ct[i]=ct + return ct + + @classmethod + def CtFT(cls,v): + print('Processing with FT') + i,ref_num=v + index=cls.index[ref_num] + SZ=[(index[-1]+1)*2,np.shape(cls.X[i])[1]] + X=np.zeros(SZ) + Y=np.zeros(SZ) + Z=np.zeros(SZ) + X[index]=cls.X[i] + Y[index]=cls.Y[i] + Z[index]=cls.Z[i] + ft_prod=np.zeros(SZ,dtype=complex) + + v=[X,Y,Z] + + for k in range(3): + for j in range(k,3): + ft0=np.fft.fft(v[k]*v[j],axis=0) + ft_prod+=ft0.conj()*ft0 if k==j else 2*ft0.conj()*ft0 + + return np.fft.ifft(ft_prod,axis=0)[:int(SZ[0]/2)].real + + @classmethod + def store_vecs(cls,vec,nc): + """Responsible for sorting out the vectors for each process. + Uses class variables, which are effectively global, but indexes them randomly + so that we shouldn't end up accessing the same variables in multiple processes + """ + + nk=nc #Maybe we should change this to reduce memory usage. Right now just nc steps + + """nc is the number of cores to be used, and nk the number of chunks to + do the calculation in. Currently equal. + """ + + ref_num=np.random.randint(0,1e9) + + cls.keys[ref_num]=ref_num+np.arange(nk) #Keys where the data is stored + cls.nb[ref_num]=vec['X'].shape[1] #Number of correlation functions (n bonds) + cls.nk[ref_num]=nk #Number of chunks + cls.index[ref_num]=vec['index'] #Index of frames taken + nb=cls.nb[ref_num] + for k,i in enumerate(cls.keys[ref_num]): #Separate and store parts of the vector + cls.X[i]=vec['X'][:,range(k,nb,nk)] + cls.Y[i]=vec['Y'][:,range(k,nb,nk)] + cls.Z[i]=vec['Z'][:,range(k,nb,nk)] + + v0=list() + for i in cls.keys[ref_num]: + v0.append((i,ref_num)) + + return ref_num,v0 + + @classmethod + def returnCt(cls,ref_num,ct): + nk=cls.nk[ref_num] + index=cls.index[ref_num] + N0=get_count(index) + nz=N0!=0 + N0=N0[nz] + nb=cls.nb[ref_num] + +# ct=list() +# for i in cls.keys[ref_num]: +# ct.append(cls.ct[i]) + + ct0=np.zeros([np.size(N0),nb]) + for k,c in enumerate(ct): + N=np.repeat([N0],np.shape(c)[1],axis=0).T + ct0[:,range(k,nb,nk)]=np.divide(c[nz],N) + + return 3/2*ct0-1/2 + + @classmethod + def clear_data(cls,ref_num): + locs=['X','Y','Z','ct'] + if ref_num in cls.keys: + for ref0 in cls.keys[ref_num]: + for loc in locs: + if ref0 in getattr(cls,loc): + del getattr(cls,loc)[ref0] + else: + print('Data already deleted') + + locs=['keys','nb','nk','index'] + for loc in locs: + if ref_num in getattr(cls,loc): + del getattr(cls,loc)[ref_num] + + @classmethod + def _clear_all(cls): + locs=['X','Y','Z','ct'] + for loc in locs: + while len(getattr(cls,loc).keys())!=0: + try: + k=list(getattr(cls,loc).keys()) + cls.clear_data(k[0]) + except: + pass + diff --git a/pyDIFRATE/iRED/par_iRED.py b/pyDIFRATE/iRED/par_iRED.py new file mode 100644 index 0000000..c218fdf --- /dev/null +++ b/pyDIFRATE/iRED/par_iRED.py @@ -0,0 +1,150 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Copyright 2021 Albert Smith-Penzel + +This file is part of Frames Theory Archive (FTA). + +FTA is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +FTA is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with FTA. If not, see . + + +Questions, contact me at: +albert.smith-penzel@medizin.uni-leipzig.de + +Created on Thu Aug 29 12:36:42 2019 + +@author: albertsmith +""" +import numpy as np +import traceback +from time import time +from pyDIFRATE.iRED.fast_index import get_count + +#%% Parallel class for fast parallel calculation +class par_class(): + """ + We create this class to perform parallel calculation of correlation functions. + The variables required for calculation are stored as *class* variables, which + are dictionaries. Because they are class variables, these are available to + any instance of par_class (that is, if multiple par_class *objects* are + created by different processes, they will all have access to these dicts. To + make problems unlikely, we assign the dictionary keys using a random number. + That number is passed out to the parent process, and then used to find + the correct data later. In principle, although different processes are + looking into the same dictionaries, they should avoid using/editing the same + data) + """ + aqt=dict() + + keys=dict() + nb=dict() + nk=dict() + index=dict() + + @classmethod + def Ct(cls,v): + i,ref_num=v + index=cls.index[ref_num] + aqt=cls.aqt[i] + n=np.size(index) + + ct=dict() + + nb=cls.nb[ref_num] + + for p,a in enumerate(aqt.values()): + if p==0: + ct=np.zeros([index[-1]+1,a.shape[1]])+0j + for k in range(n): + ct[index[k:]-index[k]]+=np.multiply(a[k:],a[k].conjugate()) + return ct.real +# + @classmethod + def store_vecs(cls,aqt,nc): + """Responsible for sorting out the vectors for each process. + Uses class variables, which are effectively global, but indexes them randomly + so that we shouldn't end up accessing the same variables in multiple processes + """ + + nk=nc #Maybe we should change this to reduce memory usage. Right now just nc steps + + """nc is the number of cores to be used, and nk the number of chunks to + do the calculation in. Currently equal. + """ + + ref_num=np.random.randint(0,1e9) + + cls.keys[ref_num]=ref_num+np.arange(nk) #Keys where the data is stored + if '1,0' in aqt: + cls.nb[ref_num]=aqt['1,0'].shape[1] + elif '2,0' in aqt: + cls.nb[ref_num]=aqt['2,0'].shape[1] + cls.nk[ref_num]=nk #Number of chunks + cls.index[ref_num]=aqt['index'] #Index of frames taken + + nb=cls.nb[ref_num] + + for k,i in enumerate(cls.keys[ref_num]): #Separate and store parts of the vector + cls.aqt[i]=dict() + for m,a in aqt.items(): + if m!='t' and m!='index': + cls.aqt[i][m]=a[:,range(k,nb,nk)] + + v0=list() + for i in cls.keys[ref_num]: + v0.append((i,ref_num)) + + return ref_num,v0 + + @classmethod + def returnCt(cls,ref_num,ct): + "Still needs updated" + nk=cls.nk[ref_num] + index=cls.index[ref_num] + N0=get_count(index) + nz=N0!=0 + N0=N0[nz] + nb=cls.nb[ref_num] + + ct0=np.zeros([np.size(N0),nb]) + for k,c in enumerate(ct): + N=np.repeat([N0],np.shape(c)[1],axis=0).T + ct0[:,range(k,nb,nk)]=np.divide(c[nz],N) + + return ct0 + + @classmethod + def clear_data(cls,ref_num): + locs=['aqt','ct'] + if ref_num in cls.keys: + for ref0 in cls.keys[ref_num]: + if ref0 in cls.aqt: + del cls.aqt[ref0] + else: + print('Data already deleted') + + locs=['keys','nb','nk','index'] + for loc in locs: + if ref_num in getattr(cls,loc): + del getattr(cls,loc)[ref_num] + + @classmethod + def _clear_all(cls): + while len(cls.aqt.keys())!=0: + try: + k=list(cls.aqt.keys()) + cls.clear_data(k[0]) + except: + pass + \ No newline at end of file diff --git a/pyDIFRATE/iRED/parallel_Ct.py b/pyDIFRATE/iRED/parallel_Ct.py new file mode 100644 index 0000000..7f40760 --- /dev/null +++ b/pyDIFRATE/iRED/parallel_Ct.py @@ -0,0 +1,180 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Copyright 2021 Albert Smith-Penzel + +This file is part of Frames Theory Archive (FTA). + +FTA is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +FTA is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with FTA. If not, see . + + +Questions, contact me at: +albert.smith-penzel@medizin.uni-leipzig.de + +Created on Thu Aug 29 12:36:42 2019 + +@author: albertsmith +""" +import numpy as np +import traceback + +#%% Parallel class for fast parallel calculation +class par_class(): + """ + We create this class to perform parallel calculation of correlation functions. + The variables required for calculation are stored as *class* variables, which + are dictionaries. Because they are class variables, these are available to + any instance of par_class (that is, if multiple par_class *objects* are + created by different processes, they will all have access to these dicts. To + make problems unlikely, we assign the dictionary keys using a random number. + That number is passed out to the parent process, and then used to find + the correct data later. In principle, although different processes are + looking into the same dictionaries, they should avoid using/editing the same + data) + """ + X=dict() + Y=dict() + Z=dict() + + ct=dict() + + keys=dict() + nb=dict() + nk=dict() + index=dict() + + def __init__(self,vec,nc): + "Random number for storage of data of this process" + self.ref_num=np.random.randint(0,1e9) + self.store_vecs(vec,nc,self.ref_num) + + def __enter__(self): + "Required for use in with statement" + return self + + def __exit__(self, exc_type, exc_value, tb): + self.clear_data(self.ref_num) #Clears data created by this instance (with self.ref_num) + if exc_type is not None: + traceback.print_exception(exc_type, exc_value, tb) + + def v0(self): + "Creates a list of tuples for parallel processing" + v0=list() + for k in self.keys[self.ref_num]: + v0.append((k,self.ref_num)) + return v0 + + @classmethod + def Ct(cls,v): + i,ref_num=v + index=cls.index[ref_num] + X=cls.X[i] + Y=cls.Y[i] + Z=cls.Z[i] + n=np.size(index) + + "Delete data out of the dictionary after stored here" + cls.clearXYZ(i) + + ct=np.zeros([index[-1]+1,np.shape(X)[1]]) + + for k in range(n): + ct[index[k:]-index[k]]+=(3*(np.multiply(X[k:],X[k])+np.multiply(Y[k:],Y[k])\ + +np.multiply(Z[k:],Z[k]))**2-1)/2 + + "Store results of correlation function calculation" +# cls.storeCt(i,ct) + cls.ct[i]=ct + return ct +# + @classmethod + def store_vecs(cls,vec,nc,ref_num): + """Responsible for sorting out the vectors for each process. + Uses class variables, which are effectively global, but indexes them randomly + so that we shouldn't end up accessing the same variables in multiple processes + """ + + nk=nc #Maybe we should change this to reduce memory usage. Right now just nc steps + + """nc is the number of cores to be used, and nk the number of chunks to + do the calculation in. Currently equal. + """ + + cls.keys[ref_num]=ref_num+np.arange(nk) #Keys where the data is stored + cls.nb[ref_num]=vec['X'].shape[1] #Number of correlation functions (n bonds) + cls.nk[ref_num]=nk #Number of chunks + cls.index[ref_num]=vec['index'] #Index of frames taken + nb=cls.nb[ref_num] + for k,i in enumerate(cls.keys[ref_num]): #Separate and store parts of the vector + cls.X[i]=vec['X'][:,range(k,nb,nk)] + cls.Y[i]=vec['Y'][:,range(k,nb,nk)] + cls.Z[i]=vec['Z'][:,range(k,nb,nk)] + + return ref_num + + @classmethod + def clearXYZ(cls,i): + "Responsible for deleting vectors for a given job" + del cls.X[i] + del cls.Y[i] + del cls.Z[i] + +# @classmethod +# def storeCt(cls,ref0,ct): +# cls.ct[ref0]=ct + + def returnCt(self,ct): + ref_num=self.ref_num + nk=self.nk[ref_num] + keys=range(nk) + index=self.index[ref_num] + N0=get_count(index) + nz=N0!=0 + N0=N0[nz] + nb=self.nb[ref_num] + + ct0=np.zeros([np.size(N0),nb]) + for k,c in enumerate(ct): + N=np.repeat([N0],np.shape(c)[1],axis=0).T + ct0[:,range(k,nb,nk)]=np.divide(c[nz],N) + + return ct0 + + @classmethod + def clear_data(cls,ref_num): + locs=['X','Y','Z','ct'] + for ref0 in cls.keys[ref_num]: + for loc in locs: + if ref0 in getattr(cls,loc): + del getattr(cls,loc)[ref0] + + locs=['keys','nb','nk','index'] + for loc in locs: + if ref_num in getattr(cls,loc): + del getattr(cls,loc)[ref_num] + + +#%% Determine how many frame pairs are averaged into each time point +def get_count(index): + """ + Returns the number of averages for each time point in the sparsely sampled + correlation function + """ + N=np.zeros(index[-1]+1) + n=np.size(index) + + for k in range(n): + N[index[k:]-index[k]]+=1 + + return N \ No newline at end of file diff --git a/pyDIFRATE/plots/.DS_Store b/pyDIFRATE/plots/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..5008ddfcf53c02e82d7eee2e57c38e5672ef89f6 GIT binary patch literal 6148 zcmeH~Jr2S!425mzP>H1@V-^m;4Wg<&0T*E43hX&L&p$$qDprKhvt+--jT7}7np#A3 zem<@ulZcFPQ@L2!n>{z**++&mCkOWA81W14cNZlEfg7;MkzE(HCqgga^y>{tEnwC%0;vJ&^%eQ zLs35+`xjp>T0. + + +Questions, contact me at: +albert.smith-penzel@medizin.uni-leipzig.de + + +Created on Thu Oct 10 14:23:32 2019 + +@author: albertsmith +""" + +import numpy as np +import matplotlib.pyplot as plt +import matplotlib.colors as colors + +def plot_cc(Rcc,lbl=None,ax=None,norm=True,index=None,**kwargs): + """"2D plot of the cross-correlation, given by a square matrix, and an axis label + plot_cc(Rcc,lbl,ax=None,norm='y',**kwargs) + """ + if ax==None: + fig=plt.figure() + ax=fig.add_subplot(111) + else: + fig=ax.figure + + + if norm: + dg=np.sqrt([np.diag(Rcc)]) + "Should we use abs here or not?" + x=np.abs(Rcc)/np.dot(dg.T,dg) + else: + x=Rcc + + if index is None: + index=np.arange(x.shape[0]) + x=x[index][:,index] + + + if lbl is not None and len(lbl)==x.shape[0]: + lbl=np.array(lbl)[index] + if isinstance(lbl[0],str): + xaxis_lbl=lbl.copy() + lbl=np.arange(np.size(lbl)) + else: + xaxis_lbl=None + else: + lbl=np.arange(0,Rcc.shape[0]) + xaxis_lbl=None + + sz=(np.max(lbl)+1)*np.array([1,1]) + mat=np.zeros(sz) + mat1=np.zeros([sz[0],sz[1],4]) + mat2=np.ones([sz[0],sz[1],4])*0.75 + mat2[:,:,3]=1 + + for i,k in enumerate(lbl): + mat[k][np.array(lbl)]=x[i,:] + mat1[k,k,3]=1 + mat2[k,np.array(lbl),3]=0 + +# mat1[:,:,3]=-(mat1[:,:,3]-1) + + if 'cmap' in kwargs: + cmap=kwargs.get('cmap') + elif mat.min()<0: + cmap='RdBu_r' + if norm:mat[0,0],mat[-1,-1]=1,-1 + else: + cmap='Blues' + + cax=ax.imshow(mat,interpolation=None,cmap=cmap) + if norm:ax.imshow(mat1,interpolation=None) + ax.imshow(mat2,interpolation=None) + fig.colorbar(cax) + + if 'axis_label' in kwargs: + axlbl=kwargs.get('axis_label') + else: + axlbl='Residue' + + ax.set_xlabel(axlbl) + ax.set_ylabel(axlbl) + + "Limit to 50 axis labels" + while xaxis_lbl is not None and len(lbl)>50: + xaxis_lbl=np.array(xaxis_lbl) + xaxis_lbl=xaxis_lbl[range(0,len(lbl),2)] + lbl=lbl[range(0,len(lbl),2)] + + if xaxis_lbl is not None: + ax.set_xticks(lbl) + ax.set_xticklabels(xaxis_lbl,rotation=90) + ax.set_yticks(lbl) + ax.set_yticklabels(xaxis_lbl,rotation=0) + ax.invert_yaxis() + fig.show() + + return ax + + +def plot_rho_series(data,fig=None,plot_sens=True,index=None,rho_index=None,errorbars=False,style='plot',**kwargs): + """ + Plots the full series of detector response (or a limited set, specified by rho_index) + """ + + + if fig is None: + fig=plt.figure() + + nd=data.R.shape[1] + + rho_index=np.atleast_1d(np.arange(nd) if rho_index is None else np.array(rho_index)) + + if hasattr(data.sens,'detect_par') and data.sens.detect_par['R2_ex_corr'] and\ + nd-1 in rho_index: + R2ex=True + else: + R2ex=False + + if plot_sens and data.sens is not None: + nplts=np.size(rho_index)+2 + ax0=fig.add_subplot(int(nplts/2)+1,1,1) + + temp=data.sens._rho(rho_index,bond=None) + if R2ex: + temp[-1][:]=0 + + hdl=ax0.plot(data.sens.z(),temp.T) + ax0.set_xlabel(r'$\log_{10}(\tau$ / s)') + ax0.set_ylabel(r'$\rho(z)$') + ax0.set_xlim(data.sens.z()[[0,-1]]) + mini=np.min(temp) + maxi=np.max(temp) + ax0.set_ylim([mini-(maxi-mini)*.05,maxi+(maxi-mini)*.05]) + + + color=[h.get_color() for h in hdl] + else: + nplts=np.size(rho_index) + color=plt.rcParams['axes.prop_cycle'].by_key()['color'] + + ax=list() + + if index is not None: + index=np.atleast_1d(index).astype(int) + else: + index=np.arange(data.R.shape[0]).astype(int) + + if np.size(data.label)==data.R.shape[0]: + lbl=np.array(data.label)[index] + if isinstance(lbl[0],str): + xaxis_lbl=lbl.copy() + lbl=np.arange(np.size(lbl)) + else: + xaxis_lbl=None + else: + lbl=np.arange(np.size(index)) + xaxis_lbl=None + + for k,ri in enumerate(rho_index): + if k==0: + ax.append(fig.add_subplot(nplts,1,k+nplts-np.size(rho_index)+1)) + else: + ax.append(fig.add_subplot(nplts,1,k+nplts-np.size(rho_index)+1,sharex=ax[0])) + + + if errorbars: + if data.R_l is None or data.R_u is None: + plot_rho(lbl,data.R[index,ri],data.R_std[:,ri],ax=ax[-1],\ + color=color[k],style=style,**kwargs) + else: + plot_rho(lbl,data.R[index,ri],[data.R_l[index,ri],data.R_u[index,ri]],ax=ax[-1],\ + color=color[k],style=style,**kwargs) + else: + plot_rho(lbl,data.R[index,ri],ax=ax[-1],color=color[k],style=style,**kwargs) + + + + ax[-1].set_ylabel(r'$\rho_'+str(k)+'^{(\\theta,S)}$') + + yl=ax[-1].get_ylim() + ax[-1].set_ylim([np.min([yl[0],0]),yl[1]]) + + + + if k50: + xaxis_lbl=xaxis_lbl[range(0,len(lbl),2)] + lbl=lbl[range(0,len(lbl),2)] + if xaxis_lbl is not None: + ax[-1].set_xticks(lbl) + ax[-1].set_xticklabels(xaxis_lbl,rotation=90) + if R2ex: + ax[-1].set_ylabel(r'$R_2^{ex} / s^{-1}$') + + fig.subplots_adjust(hspace=0.25) + fig.show() + return ax + +def plot_rho(lbl,R,R_std=None,style='plot',color=None,ax=None,split=True,**kwargs): + """ + Plots a set of rates or detector responses. + """ + + if ax is None: + ax=plt.figure().add_subplot(111) + + "We divide the x-axis up where there are gaps between the indices" + lbl1=list() + R1=list() + R_u1=list() + R_l1=list() + + lbl=np.array(lbl) #Make sure this is a np array + if not(np.issubdtype(lbl.dtype,np.number)): + split=False + lbl0=lbl.copy() + lbl=np.arange(len(lbl0)) + else: + lbl0=None + + if split: + s0=np.where(np.concatenate(([True],np.diff(lbl)>1,[True])))[0] + else: + s0=np.array([0,np.size(R)]) + + for s1,s2 in zip(s0[:-1],s0[1:]): + lbl1.append(lbl[s1:s2]) + R1.append(R[s1:s2]) + if R_std is not None: + if np.ndim(R_std)==2: + R_l1.append(R_std[0][s1:s2]) + R_u1.append(R_std[1][s1:s2]) + else: + R_l1.append(R_std[s1:s2]) + R_u1.append(R_std[s1:s2]) + else: + R_l1.append(None) + R_u1.append(None) + + "Plotting style (plot,bar, or scatter, scatter turns the linestyle to '' and adds a marker)" + if style.lower()[0]=='s': + if 'marker' not in kwargs: + kwargs['marker']='o' + if 'linestyle' not in kwargs: + kwargs['linestyle']='' + ebar_clr=color + elif style.lower()[0]=='b': + if 'linestyle' not in kwargs: + kwargs['linestyle']='' + ebar_clr='black' + else: + ebar_clr=color + + for lbl,R,R_u,R_l in zip(lbl1,R1,R_u1,R_l1): + if R_l is None: + ax.plot(lbl,R,color=color,**kwargs) + else: + ax.errorbar(lbl,R,[R_l,R_u],color=ebar_clr,capsize=3,**kwargs) + if style.lower()[0]=='b': + kw=kwargs.copy() + if 'linestyle' in kw: kw.pop('linestyle') + ax.bar(lbl,R,color=color,**kw) + if color is None: + color=ax.get_children()[0].get_color() + + if lbl0 is not None: + ax.set_xticks(lbl) + ax.set_xticklabels(lbl0,rotation=90) + + return ax + +#%% Plot the data fit +def plot_fit(lbl,Rin,Rc,Rin_std=None,info=None,index=None,exp_index=None,fig=None): + """ + Plots the fit of experimental data (small data sizes- not MD correlation functions) + Required inputs are the data label, experimental rates, fitted rates. One may + also input the standard deviation of the experimental data, and the info + structure from the experimental data. + + Indices may be provided to specify which residues to plot, and which + experiments to plot + + A figure handle may be provided to specifiy the figure (subplots will be + created), or a list of axis handles may be input, although this must match + the number of experiments + + plot_fit(lbl,Rin,Rc,Rin_std=None,info=None,index=None,exp_index=None,fig=None,ax=None) + + one may replace Rin_std with R_l and R_u, to have different upper and lower bounds + """ + + "Apply index to all data" + if index is not None: + lbl=lbl[index] + Rin=Rin[index] + Rc=Rc[index] + if Rin_std is not None: Rin_std=Rin_std[index] + + "Remove experiments if requested" + if exp_index is not None: + if info is not None: + info=info.loc[:,exp_index].copy + info.columns=range(Rin.shape[0]) + + Rin=Rin[:,exp_index] + Rc=Rc[:,exp_index] + if Rin_std is not None: Rin_std=Rin_std[:,exp_index] + + nexp=Rin.shape[1] #Number of experiments + + ax,xax,yax=subplot_setup(nexp,fig) + SZ=np.array([np.sum(xax),np.sum(yax)]) + #Make sure the labels are set up + """Make lbl a numpy array. If label is already numeric, then we use it as is. + If it is text, then we replace lbl with a numeric array, and store the + original lbl as lbl0, which we'll label the x-axis with. + """ + lbl=np.array(lbl) #Make sure this is a np array + if not(np.issubdtype(lbl.dtype,np.number)): + split=False + lbl0=lbl.copy() + lbl=np.arange(len(lbl0)) + + + else: + lbl0=None + + "Use truncated labels if too many residues" + if lbl0 is not None and len(lbl0)>50/SZ[0]: #Let's say we can fit 50 labels in one figure + nlbl=np.floor(50/SZ[0]) + space=np.floor(len(lbl0)/nlbl).astype(int) + ii=range(0,len(lbl0),space) + else: + ii=range(0,len(lbl)) + + #Sweep through each experiment + clr=[k for k in colors.TABLEAU_COLORS.values()] #Color table + for k,a in enumerate(ax): + a.bar(lbl,Rin[:,k],color=clr[np.mod(k,len(clr))]) #Bar plot of experimental data + if Rin_std is not None: + a.errorbar(lbl,Rin[:,k],Rin_std[:,k],color='black',linestyle='',\ + capsize=3) #Errorbar + a.plot(lbl,Rc[:,k],linestyle='',marker='o',color='black',markersize=3) + if xax[k]: + if lbl0 is not None: + a.set_xticks(ii) + a.set_xticklabels(lbl0[ii],rotation=90) + else: + plt.setp(a.get_xticklabels(),visible=False) + if lbl0 is not None: + a.set_xticks(ii) + if yax[k]: + a.set_ylabel(r'R / s$^{-1}$') + + #Apply labels to each plot if we find experiment type in the info array + if info is not None and 'Type' in info.index.to_numpy(): + if info[k]['Type'] in {'R1','NOE','R2'}: + a.set_ylim(np.min(np.concatenate(([0],Rin[:,k],Rc[:,k]))),\ + np.max(np.concatenate((Rin[:,k],Rc[:,k])))*1.25) + i=info[k] + string=r'{0} {1}@{2:.0f} MHz'.format(i['Nuc'],i['Type'],i['v0']) + a.text(np.min(lbl),a.get_ylim()[1]*0.88,string,FontSize=8) + else: + a.set_ylim(np.min(np.concatenate(([0],Rin[:,k],Rc[:,k]))),\ + np.max(np.concatenate((Rin[:,k],Rc[:,k])))*1.45) + i=info[k] + string=r'{0} {1}@{2:.0f} MHz'.format(i['Nuc'],i['Type'],i['v0']) + a.text(np.min(lbl),a.get_ylim()[1]*0.88,string,FontSize=8) + string=r'$\nu_r$={0} kHz, $\nu_1$={1} kHz'.format(i['vr'],i['v1']) + a.text(np.min(lbl),a.get_ylim()[1]*0.73,string,FontSize=8) +# fig.show() + return ax + + +def plot_Ct(t,Ct,Ct_fit=None,ax=None,color=None,style='log',**kwargs): + """ + Plots correlation functions and fits of correlation functions + + ax=plot_Ct(t,Ct,Ct_ft=None,ax=None,color,**kwargs) + + Color specifies the color of the line color. One entry specifies only the + color of Ct, but if Ct_fit is included, one may use a list of two colors. + + Keyword arguments are passed to the plotting functions. + """ + if ax is None: + ax=plt.figure().add_subplot(111) + + if color is None: + color=[[.8,0,0],[0.3,0.3,0.3]] + elif len(color)!=2: + color=[color,[0.3,0.3,0.3]] + + if style[:2].lower()=='lo': + ax.semilogx(t,Ct,color=color[0],**kwargs) + else: + ax.plot(t,Ct,color=color[0],**kwargs) + if 'linewidth' not in kwargs: + kwargs['linewidth']=1 + if Ct_fit is not None: + if style[:2].lower()=='lo': + ax.semilogx(t,Ct_fit,color=color[1],**kwargs) + else: + ax.plot(t,Ct_fit,color=color[1],**kwargs) + + return ax + +def plot_all_Ct(t,Ct,Ct_fit=None,lbl=None,index=None,color=None,fig=None,style='log',**kwargs): + """ + Plots a series of correlation functions and their fits, using the plot_Ct + function + + plot_all_Ct(t,Ct,Ct_fit=None,lbl=None,linecolor=None,figure=None,**kwargs) + """ + + if index is not None: + index=np.atleast_1d(index).astype(int) + Ct=Ct[index] + if Ct_fit is not None: + Ct_fit=Ct_fit[index] + if lbl is not None: + lbl=lbl[index] + + nexp=Ct.shape[0] + ax,xax,yax=subplot_setup(nexp,fig) + fig=ax[0].figure + + if Ct_fit is None: + ylim=[np.min([0,Ct.min()]),Ct.max()] + else: + ylim=[np.min([Ct.min(),Ct_fit.min()]),np.max([Ct.max(),Ct_fit.max()])] + + if Ct_fit is None:Ct_fit=[None for k in range(nexp)] + + + for k,a in enumerate(ax): + plot_Ct(t,Ct[k],Ct_fit[k],ax=a,color=color,style=style,**kwargs) + if xax[k]: + plt.setp(a.get_xticklabels(),visible=True) + a.set_xlabel('t / ns') + else: + plt.setp(a.get_xticklabels(),visible=False) + + if yax[k]: + a.set_ylabel('C(t)') + plt.setp(a.get_yticklabels(),visible=True) + else: + plt.setp(a.get_yticklabels(),visible=False) + + a.set_xlim(t[0],t[-1]) + a.set_ylim(*ylim) + if lbl is not None: + a.set_title(lbl[k],y=1,pad=-6,FontSize=6) + + fig.show() + return ax + +def subplot_setup(nexp,fig=None): + """ + Creates subplots neatly distributed on a figure for a given number of + experments. Returns a list of axes, and two logical indices, xax and yax, + which specify whether the figure sits on the bottom of the figure (xax) or + to the left side of the figure (yax) + + Also creates the figure if none provided. + + subplot_setup(nexp,fig=None) + """ + if fig is None:fig=plt.figure() + + "How many subplots" + SZ=np.sqrt(nexp) + SZ=[np.ceil(SZ).astype(int),np.floor(SZ).astype(int)] + if np.prod(SZ)1 or exp_num!=None: + a=a[:,exp_num] + b=b[:,exp_num] + + if norm: + N=np.max(np.abs(a),axis=0) + a=a/np.tile(N,[np.size(sens.tc()),1]) + b=b/np.tile(N,[np.size(sens.tc()),1]) + + + if ax is None: + fig=plt.figure() + ax=fig.add_subplot(111) + hdl1=ax.plot(sens.z(),a,'k') + hdl2=ax.plot(sens.z(),b,'r--') +# hdl1=plt.plot(self.z(),a,'k') +# hdl2=plt.plot(self.z(),b,'r--') +# ax=hdl1[0].axes + else: + hdl1=ax.plot(sens.z(),a,'k') + hdl2=ax.plot(sens.z(),b,'r--') + + ax.set_xlabel(r'$\log_{10}(\tau$ / s)') + if norm: + ax.set_ylabel(r'$R(z)$ (normalized)') + else: + ax.set_ylabel(r'$R(z) / $s$^{-1}$') + ax.set_xlim(sens.z()[[0,-1]]) + ax.set_title('Rate Constant Reproduction') + + hdl=hdl1+hdl2 + + fig.show() + return hdl + +#%% Sets plot attributes from kwargs +def _set_plot_attr(hdl,**kwargs): + """ + Get properties for a list of handles. If values in kwargs are found in props, + then that attribute is set (ignores unmatched values) + """ + if not(hasattr(hdl,'__len__')): #Make sure hdl is a list + hdl=[hdl] + + props=hdl[0].properties().keys() + for k in kwargs: + if k in props: + for m in hdl: + getattr(m,'set_{}'.format(k))(kwargs.get(k)) \ No newline at end of file diff --git a/pyDIFRATE/r_class/.DS_Store b/pyDIFRATE/r_class/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..ae283df1fa7e3cb8c26f8ee1b796fe05fcdf8e17 GIT binary patch literal 6148 zcmeHKy-ou$47Q=ep)MU6^9mi_I5v. + + +Questions, contact me at: +albert.smith-penzel@medizin.uni-leipzig.de + + +Created on Wed Apr 10 16:49:23 2019 + +@author: albertsmith +""" + +import numpy as np +import pandas as pd +#import DIFRATE_funs as dff +#import matplotlib.pyplot as plt +import pyDIFRATE.r_class.mdl_sens as mdl +import os +#os.chdir('../iRED') +from pyDIFRATE.iRED.fast_index import trunc_t_axis,get_count +#os.chdir('../r_class') + +class Ct(mdl.model): + def __init__(self,tc=None,z=None,t=None,**kwargs): + + """Probably a better way to do this, but I need to identify which + child of mdl_sens is which later. Using isinstance requires me to + import the children into mdl_sens, but also import mdl_sens into its + children. This seems to create some strange dependence so that I can't + actually load any of the classes any more""" + + self._class='Ct' + self._origin='Ct' + + """The detectors class may have bond-specific sensitivities in _rho. We + need to know if this is the case for the mdl_sens class to work + properly + """ + self._BondSpfc='no' + + """Get user defined tc if provided. Options are to provide the tc + vector directly, to provide the log directly (define z instead of tc), + or to specify it as a start, end, and number of steps, which are then + log-spaced (3 entries for tc or z) + """ + + if tc is None: + if z is not None: + if np.size(z)==3: + self.__tc=np.logspace(z[0],z[1],z[2]) + else: + self.__tc=np.power(10,z) + "Allow users to input z instead of tc" + else: + self.__tc=np.logspace(-14,-3,200) + elif np.size(tc)==3: + self.__tc=np.logspace(np.log10(tc[0]),np.log10(tc[1]),tc[2]) + else: + self.__tc=np.array(tc) + """We don't allow editing of the tc vector; you must initialize a new + instance of rates if you want to change it""" + + + """If you want to edit the code to include new experiments, and these + require new variables, they MUST be added to one of these lists + """ + + "We need to initialize self.info" + self.info=None + + a=dict() + if t is not None: + if np.size(t)==3: + self.__t=np.arange(t[0],t[1],t[2]) + elif np.size(t)==2: + self.__t=np.arange(0,t[0],t[1]) + else: + self.__t=t + elif 'sparse' in kwargs: + "Include nt, n, nr and dt in dict object" + sparse=kwargs.get('sparse') + if 'dt' not in sparse or 'nt' not in sparse: + print('dt and nt are required arguments for generating a sparse sensitivity object') + return + index=trunc_t_axis(**sparse) + + "Get the count of number of averages" + N=get_count(index) + + t=sparse.get('dt')*np.arange(index[-1]+1) + i=N!=0 + N=N[i] + self.__t=t[i] + + if 'stdev' not in kwargs: + stdev=1/np.sqrt(N) + stdev[0]=1e-6 + kwargs.update({'stdev':stdev}) + else: + self.__t=np.arange(0,500.001,.005) + + a.update({'t' : self.__t}) + + nt=self.__t.size + + if 'stdev' in kwargs: + stdev=kwargs.get('stdev') + if np.size(stdev)==1: + vec=1/np.sqrt(np.arange(nt,0,-1)) + vec=vec/vec[0] + stdev=vec*stdev + stdev[0]=1e-6 + elif np.size(stdev)!=np.size(self.__t): + vec=1/np.sqrt(np.arange(nt,0,-1)) + stdev=vec/vec[-1] + stdev[0]=1e-6 + else: + vec=1/np.sqrt(np.arange(nt,0,-1)) + stdev=vec/vec[-1] + stdev[0]=1e-6 + + a.update({'stdev' : stdev}) + + if 'median_val' in kwargs: + median_val=kwargs.get('median_val') + if np.size(median_val)==1: + median_val=np.ones(nt)*median_val + else: + median_val=np.ones(nt) + a.update({'median_val' : median_val}) + + self.info=pd.DataFrame.from_dict(a).T + + + if 'S2' in kwargs: +# self.__R=np.exp(-1e-9*np.dot(np.atleast_2d(self.__t).T,1/np.atleast_2d(self.__tc)))\ +# -np.repeat([np.exp(-1e-9*self.__t[-1]/self.__tc)],self.__t.shape[0],axis=0) + "Note the new formula for sensitivity after S2 subtraction. Based on Poisson distribution" + T=self.__t[-1]*1e-9 #Length of the trajectory + Lambda=1./(2.*self.__tc) #Constant for Poisson distribution + self.__R=np.exp(-1e-9*np.dot(np.atleast_2d(self.__t).T,1/np.atleast_2d(self.__tc)))\ + -np.repeat([1./(T*Lambda)*(1-np.exp(-T*Lambda))],self.__t.shape[0],axis=0) + else: + self.__R=np.exp(-1e-9*np.dot(np.atleast_2d(self.__t).T,1/np.atleast_2d(self.__tc))) +# self.__R=np.exp(-1e-9*np.dot(np.transpose([self.__t]),np.divide(1,[self.__tc]))) + "Names of the experimental variables that are available" + self.__exper=['t','stdev'] + "Names of the spin system variables that are available" + self.__spinsys=[] + + super().__init__() + + def Ct(self,exp_num=None,**kwargs): + + if exp_num is None: + exp_num=self.info.columns.values + + "Make sure we're working with numpy array for exp_num" + if not isinstance(exp_num,np.ndarray): + exp_num=np.array(exp_num) + if exp_num.shape==(): + exp_num=np.array([exp_num]) + "Make sure we're working with numpy array" + if not isinstance(exp_num,np.ndarray): + exp_num=np.array(exp_num) + if exp_num.shape==(): + exp_num=np.array([exp_num]) + + return self.__R[exp_num,:] + + def t(self): + return self.__t + + def tc(self): + return self.__tc.copy() + + def z(self): + return np.log10(self.__tc) + + def retExper(self): + return self.__exper + + def retSpinSys(self): + return self.__spinsys + + #%% Hidden output of rates (semi-hidden, can be found if the user knows about it ;-) ) + def _rho(self,exp_num=None,bond=None): + """The different children of mdl_sens will have different names for + their sensitivities. For example, this class returns R, which are the + rate constant sensitivities, but the correlation function class returns + Ct, and the detector class returns rho. Then, we have a function, + __rho(self), that exists and functions the same way in all classes + """ + + if exp_num is None: + exp_num=self.info.columns.values + + R=self.Ct(exp_num) + + + return R + + def _rhoCSA(self,exp_num=None,bond=None): + + if exp_num is None: + exp_num=self.info.columns.values + + if bond==-1 & self.molecule.vXY.shape[0]>0: + nb=self.molecule.vXY.shape[0] + R=np.zeros([nb,np.size(exp_num),np.size(self.__tc)]) + else: + R=np.zeros([np.size(exp_num),np.size(self.__tc)]) + + + return R + + def Cteff(self,exp_num=None,mdl_num=0,bond=None): + R,R0=self._rho_eff(exp_num,mdl_num,bond) + + return R,R0 \ No newline at end of file diff --git a/pyDIFRATE/r_class/DIFRATE_funs.py b/pyDIFRATE/r_class/DIFRATE_funs.py new file mode 100755 index 0000000..7c740b3 --- /dev/null +++ b/pyDIFRATE/r_class/DIFRATE_funs.py @@ -0,0 +1,264 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Copyright 2021 Albert Smith-Penzel + +This file is part of Frames Theory Archive (FTA). + +FTA is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +FTA is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with FTA. If not, see . + + +Questions, contact me at: +albert.smith-penzel@medizin.uni-leipzig.de + + +Created on Fri Mar 22 09:32:49 2019 + +@author: albertsmith + +Collection of useful functions for DIFRATE +""" + +import numpy as np +#import os +#os.chdir('../tools') +from pyDIFRATE.tools.DRtools import NucInfo +#os.chdir('../r_class') + + + +def J(tc,v): + "Returns the spectral density" + return 2/5*tc/(1+(2*np.pi*v*tc)**2) + +def rate(tc,exper): + """Returns the sensitivity of an experiment, specified by exper (one + column of a pandas array), for a given set of correlation times, tc. + """ + try: + if exper.loc['Type'] in globals(): + fun=globals()[exper.loc['Type']] + R=fun(tc,exper) + return R + else: + print('Experiment type {0} was not recognized'.format(exper.loc['Type'])) + return + except: + print('Calculation of experiment {0} failed. Check parameters.'.format(exper.loc['Type'])) + return + +def S2(tc,exper): + """ + Order parameter (note- one must provide 1-S2 into the data.R matrix!) + + Returns a uniform sensitivity, independent of correlation time + """ + return np.ones(np.shape(tc)) + +def R1(tc,exper): + "Returns longitudinal relaxation rate constant" + "Dipolar relaxation updated to include MAS spinning, relevant for homonuclear couplings" + v0=exper['v0']*1e6 + vr=exper['vr']*1e3 + dXY=exper['dXY'] + Nuc=exper['Nuc'] + Nuc1=exper['Nuc1'] + QC=exper['QC'] + eta=exper['eta'] + vX=NucInfo(Nuc)/NucInfo('1H')*v0 + CSA=exper['CSA']*vX/1e6 + R=np.zeros(tc.shape) + + if Nuc1 is not None and dXY is not None: + "Dipolar relaxation" + if np.size(dXY)==1: + vY=NucInfo(Nuc1)/NucInfo('1H')*v0 + S=NucInfo(Nuc1,'spin') + sc=S*(S+1)*4/3 # Scaling factor depending on the spin, =1 for spin 1/2 + + if vX==vY: + Delv=exper['CSoff']*vX/1e6 + R+=sc*(np.pi*dXY/2)**2*(1/6*J(tc,Delv+2*vr)+1/6*J(tc,Delv-2*vr)\ + +1/3*J(tc,Delv+vr)+1/3*J(tc,Delv-vr)+3*J(tc,vX)+6*J(tc,2*vX)) + else: + R+=sc*(np.pi*dXY/2)**2*(J(tc,vX-vY)+3*J(tc,vX)+6*J(tc,vY+vX)) + + else: + for k in range(0,np.size(dXY)): + S=NucInfo(Nuc1[k],'spin') + sc=S*(S+1)*4/3 # Scaling factor depending on the spin, =1 for spin 1/2 + vY=NucInfo(Nuc1[k])/NucInfo('1H')*v0 + if vX==vY: + Delv=exper['CSoff'][k]*vX/1e6 + R+=sc*(np.pi*dXY[k]/2)**2*(1/6*J(tc,Delv+2*vr)+1/6*J(tc,Delv-2*vr)\ + +1/3*J(tc,Delv+vr)+1/3*J(tc,Delv-vr)+3*J(tc,vX)+6*J(tc,2*vX)) + else: + R+=sc*(np.pi*dXY[k]/2)**2*(J(tc,vX-vY)+3*J(tc,vX)+6*J(tc,vY+vX)) + + "CSA relaxation" + R+=3/4*(2*np.pi*CSA)**2*J(tc,vX) + + if QC!=0: + "Quadrupolar relaxation" + """Note that these formulas give the initial rate of relaxation, that + is, the average rate of relaxation for all orientations, and furthermore + does not include deviations due to multi-exponential relaxation + """ + S=NucInfo(Nuc,'spin') + deltaQ=1/(2*S*(2*S-1))*QC*2*np.pi + C=(deltaQ/2)**2*(1+eta**2/3) #Constant that scales the relaxation + if S==0.5: + print('No quadrupole coupling for S=1/2') + elif S==1: + R+=C*(3*J(tc,vX)+12*J(tc,2*vX)) + elif S==1.5: + R+=C*(36/5*J(tc,vX)+144/5*J(tc,2*vX)) + elif S==2.5: + R+=C*(96/5*J(tc,vX)+384/5*J(tc,2*vX)) + else: + print('Spin={0} not implemented for quadrupolar relaxation'.format(S)) + + return R + +def R1Q(tc,exper): + """This function calculates the relaxation rate constant for relaxation of + quadrupolar order + """ + v0=exper['v0']*1e6 + Nuc=exper['Nuc'] + QC=exper['QC'] + eta=exper['eta'] + vX=NucInfo(Nuc)/NucInfo('1H')*v0 + + S=NucInfo(Nuc,'spin') + deltaQ=1/(2*S*(2*S-1))*QC*2*np.pi + C=(deltaQ/2)**2*(1+eta**2/3) #Constant scaling the relaxation + if S==0.5: + print('No quadruple coupling for spin=1/2') + elif S==1: + R=C*9*J(tc,vX) + elif S==1.5: + R=C*(36*J(tc,vX)+36*J(tc,2*vX)) + elif S==2.5: + R=C*(792/7*J(tc,vX)+972/7*J(tc,2*vX)) + else: + print('Spin not implemented') + + return R + +def R1p(tc,exper): + v0=exper['v0']*1e6 + dXY=exper['dXY'] + Nuc=exper['Nuc'] + Nuc1=exper['Nuc1'] + QC=exper['QC'] + eta=exper['eta'] + vr=exper['vr']*1e3 + v1=exper['v1']*1e3 + off=exper['offset']*1e3 + vX=NucInfo(Nuc)/NucInfo('1H')*v0 + CSA=exper['CSA']*vX/1e6 + R=np.zeros(tc.shape) + + "Treat off-resonance spin-lock" + ve=np.sqrt(v1**2+off**2) + if ve==0: + theta=np.pi/2 + else: + theta=np.arccos(off/ve) + + R10=R1(tc,exper) #We do this first, because it includes all R1 contributions + "Start here with the dipole contributions" + if Nuc1 is not None: + if np.size(dXY)==1: + vY=NucInfo(Nuc1)/NucInfo('1H')*v0 + S=NucInfo(Nuc1,'spin') + sc=S*(S+1)*4/3 #Scaling depending on spin of second nucleus + R1del=sc*(np.pi*dXY/2)**2*(3*J(tc,vY)+ + 1/3*J(tc,2*vr-ve)+2/3*J(tc,vr-ve)+2/3*J(tc,vr+ve)+1/3*J(tc,2*vr+ve)) + else: + R1del=np.zeros(tc.shape) + for k in range(0,np.size(dXY)): + vY=NucInfo(Nuc1[k])/NucInfo('1H')*v0 + S=NucInfo(Nuc1[k],'spin') + sc=S*(S+1)*4/3 #Scaling depending on spin of second nucleus + R1del+=sc*(np.pi*dXY[k]/2)**2*(3*J(tc,vY)+ + 1/3*J(tc,2*vr-ve)+2/3*J(tc,vr-ve)+2/3*J(tc,vr+ve)+1/3*J(tc,2*vr+ve)) + else: + R1del=np.zeros(tc.shape) + "CSA contributions" + R1del+=1/6*(2*np.pi*CSA)**2*(1/2*J(tc,2*vr-ve)+J(tc,vr-ve)+J(tc,vr+ve)+1/2*J(tc,2*vr+ve)) + "Here should follow the quadrupole treatment!!!" + + "Add together R1 and R1p contributions, depending on the offset" + R+=R10+np.sin(theta)**2*(R1del-R10/2) #Add together the transverse and longitudinal contributions + return R + +def R2(tc,exper): + exper['off']=0 + exper['v1']=0 + + return R1p(tc,exper) + +def NOE(tc,exper): + v0=exper['v0']*1e6 + dXY=exper['dXY'] + Nuc=exper['Nuc'] + Nuc1=exper['Nuc1'] + vX=NucInfo(Nuc)/NucInfo('1H')*v0 + R=np.zeros(tc.shape) + + if Nuc1!=None: + vY=NucInfo(Nuc1)/NucInfo('1H')*v0 + S=NucInfo(Nuc1,'spin') + sc=S*(S+1)*4/3 # Scaling factor depending on the spin, =1 for spin 1/2 + R+=sc*(np.pi*dXY/2)**2*(-J(tc,vX-vY)+6*J(tc,vY+vX)) + + return R + +def ccXY(tc,exper): + """ + CSA-dipole cross-correlated transverse relaxation + """ + v0,dXY,Nuc,Nuc1,theta=exper['v0']*1e6,exper['dXY'],exper['Nuc'],exper['Nuc1'],exper['theta']*np.pi/180 + vX=NucInfo(Nuc)/NucInfo('1H')*v0 + CSA=exper['CSA']*vX/1e6 + + if Nuc1 is not None: + S=NucInfo(Nuc1,'spin') + if S!=0.5: + print('Warning: Formulas for cross-correlated relaxation have only been checked for S=1/2') + sc=S*(S+1)*4/3 + R=np.sqrt(sc)*1/8*(2*np.pi*dXY)*(2*np.pi*CSA)*(3*np.cos(theta)**2-1)/2.*(4*J(tc,0)+3*J(tc,vX)) + else: + R=np.zeros(tc.shape) + return R + +def ccZ(tc,exper): + """ + CSA-dipole cross-correlated longitudinal relaxation + """ + v0,dXY,Nuc,Nuc1,theta=exper['v0']*1e6,exper['dXY'],exper['Nuc'],exper['Nuc1'],exper['theta']*np.pi/180 + vX=NucInfo(Nuc)/NucInfo('1H')*v0 + CSA=exper['CSA']*vX/1e6 + + if Nuc1 is not None: + S=NucInfo(Nuc1,'spin') + if S!=0.5: + print('Warning: Formulas for cross-correlated relaxation have only been checked for S=1/2') + sc=S*(S+1)*4/3 + R=np.sqrt(sc)*1/8*(2*np.pi*dXY)*(2*np.pi*CSA)*(3*np.cos(theta)**2-1)/2.*6*J(tc,vX) + else: + R=np.zeros(tc.shape) + return R \ No newline at end of file diff --git a/pyDIFRATE/r_class/DynamicModels.py b/pyDIFRATE/r_class/DynamicModels.py new file mode 100755 index 0000000..80aa2f3 --- /dev/null +++ b/pyDIFRATE/r_class/DynamicModels.py @@ -0,0 +1,318 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Copyright 2021 Albert Smith-Penzel + +This file is part of Frames Theory Archive (FTA). + +FTA is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +FTA is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with FTA. If not, see . + + +Questions, contact me at: +albert.smith-penzel@medizin.uni-leipzig.de + + +Created on Thu Apr 4 14:51:02 2019 + +@author: albertsmith +""" + +""" +Here we store all models for motion. We keep the basic input in the mdl_sens +class very general, so you can add whatever parameters desired here (pass with +**kwargs) +To add a new model, just make a new definition. Should return an array of +correlation times and amplitudes. If model is anisotropic, there should be an +array of arrays, that is, each bond should have an array. The inner array should +have the same size as the array of correlation times. Note that **kwargs must +be in the arguments, even if it isn't used. We always pass the structure and +direction to the model function, regardless if it's required, so these can be +collected with **kwargs. Note, all models should return a third parameter that +states whether the model is bond-specific or not. Value should be a string, 'yes' +or 'no' + +Note that we furthermore can have access to the structure, imported via +the mdanalysis module +""" + +import numpy as np +from numpy import inf + +#def ModelBondSpfc(Model): +# "Add models with bond-specific dynamics to this list" +# mdls=np.array(['AnisoDif']) +# return np.any(Model==mdls) + + +def ModelSel(Model,direct='dXY',struct=None,**kwargs): + """ + General function to select the correct model + """ + + if Model=='Combined': + tMdl,AMdl,BndSpfc=Combined(tMdl1=kwargs.get('tMdl1'),AMdl1=kwargs.get('AMdl1'),\ + tMdl2=kwargs.get('tMdl2'),AMdl2=kwargs.get('AMdl2')) + BndSpfc='no' + else: + try: + if Model in globals(): + fun=globals()[Model] + else: + print('Model "{0}" was not recognized'.format(Model)) + return +# if 'struct' in fun.__code__.co_varnames[range(fun.__code__.co_argcount)]: +# print('Bond Specific') +# if struct.vXY.size==0: +# print('Before defining an model with anisotropic motion, import a structure and select the desired bonds') +# return + tMdl,AMdl,BndSpfc=fun(struct=struct,direct=direct,**kwargs) + + except: + print('Model "{0}" failed. Check parameters'.format(Model)) + return + +# if Model=='IsoDif': +# tMdl,AMdl,BndSpfc=IsoDif(**kwargs) +## BndSpfc='no' +# """We must always say if the model is bond specific, so we know if an +# array of models is being returned""" +# elif Model=='AnisoDif': +# tMdl,AMdl,BndSpfc=AnisoDif(struct,direct,**kwargs) +## BndSpfc='yes' +# elif Model=='Combined': +# tMdl,AMdl,BndSpfc=Combined(tMdl1=kwargs.get('tMdl1'),AMdl1=kwargs.get('AMdl1'),\ +# tMdl2=kwargs.get('tMdl2'),AMdl2=kwargs.get('AMdl2')) +# BndSpfc='no' + + "Make sure we return np arrays with a dimension" + tMdl=np.atleast_1d(tMdl) + AMdl=np.atleast_1d(AMdl) +# if not isinstance(tMdl,np.ndarray): +# tMdl=np.array(tMdl) +# if tMdl.shape==(): +# tMdl=np.array([tMdl]) +# if not isinstance(AMdl,np.ndarray): +# AMdl=np.array(AMdl) +# if AMdl.shape==(): +# AMdl=np.array([AMdl]) + + tMdl[tMdl==inf]=1000 + return tMdl,AMdl,BndSpfc +#%% Simple isotropic diffusion +"Isotropic tumbling in solution" +def IsoDif(**kwargs): + + if 'tM' in kwargs: + tMdl=kwargs.get('tM') + elif 'tm' in kwargs: + tMdl=kwargs.get('tm') + elif 'tr' in kwargs: + tMdl=kwargs.get('tr') + elif 'tR' in kwargs: + tMdl=kwargs.get('tR') + + AMdl=1 + BndSpfc='no' + return tMdl,AMdl,BndSpfc + +#%% Simple fast motion +"Fast motion (too fast to be detected by relaxation, or occuring within 1st pt of trajectory" +def FastMotion(S2=None,**kwargs): + tMdl=1e-14 #Arbitrarily short correlation time + if 'AMdl' in kwargs: + AMdl=kwargs.get('AMdl') + elif 'A' in kwargs: + AMdl=kwargs.get('A') + elif S2 is None: + print('You must provide S2 to define the FastMotion model') + return + else: + AMdl=1-S2 + + if np.size(S2)!=1: + struct=kwargs.get('struct') + if struct.sel1in is not None: + nb=np.size(struct.sel1in) + elif struct.sel1 is not None: + nb=struct.sel1.n_atoms + else: + nb=None + if nb is not None and np.size(S2)!=nb: + print('The size of S2 must be 1 or equal the number of bonds being analyzed') + return + else: + BndSpfc='yes' + AMdl=np.atleast_2d(AMdl).T + else: + BndSpfc='no' + + return tMdl,AMdl,BndSpfc +#%% Anisotropic diffusion +def AnisoDif(struct,direct='vXY',**kwargs): + + """First we get the diffusion tensor, and also Diso and D2. This can be + input either as the principle values, Dxx, Dyy, and Dzz, or as the trace of + the tensor (the isotropic value, tM), plus optionally the anisotropy, xi, + and the asymmetry, eta + """ + if 'Dxx' in kwargs and 'Dyy' in kwargs and 'Dzz' in kwargs: + Dzz=kwargs.get('Dzz') + Dxx=kwargs.get('Dxx') + Dyy=kwargs.get('Dyy') + Diso=1/3*(Dxx+Dyy+Dzz) + Dsq=(Dxx*Dyy+Dyy*Dzz+Dzz*Dxx)/3; + else: + if 'tM' in kwargs: + tM=kwargs.get('tM') + elif 'tm' in kwargs: + tM=kwargs.get('tm') + elif 'tr' in kwargs: + tM=kwargs.get('tr') + elif 'tR' in kwargs: + tM=kwargs.get('tR') + + if 'xi' in kwargs: + xi=kwargs.get('xi') + else: + xi=1 + if 'eta' in kwargs: + eta=kwargs.get('eta') + else: + eta=0 + + Diso=1/(6*tM); + Dzz=3*Diso*xi/(2+xi); + Dxx=(3*Diso-(2/3*eta*(xi-1)/xi+1)*Dzz)/2; + Dyy=2/3*eta*Dzz*(xi-1)/xi+Dxx; + Dsq=(Dxx*Dyy+Dyy*Dzz+Dzz*Dxx)/3; + + "We the relaxation rates" + D1=4*Dxx+Dyy+Dzz; + D2=Dxx+4*Dyy+Dzz; + D3=Dxx+Dyy+4*Dzz; + D4=6*Diso+6*np.sqrt(Diso**2-Dsq); + D5=6*Diso-6*np.sqrt(Diso**2-Dsq); + + + + dx=(Dxx-Diso)/np.sqrt(Diso**2-Dsq); + dy=(Dyy-Diso)/np.sqrt(Diso**2-Dsq); + dz=(Dzz-Diso)/np.sqrt(Diso**2-Dsq); + + + "We rotate the vectors in structure" + if 'euler' in kwargs and direct=='vXY': + vec=RotVec(kwargs.get('euler'),struct.vXY) + elif 'euler' in kwargs: +# vec=RotVec(kwargs.get('euler'),struct.vCSA) + "Use the ABOVE LINE! We need to add support for calculating the CSA direction first...." + vec=RotVec(kwargs.get('euler'),struct.vXY) + else: + print('Euler angles are required') + return + + + n=vec.shape[0] + tM=np.zeros([5]) + A=np.zeros([n,5]) + + for k in range(0,n): + m=vec[k,:] + res1=(1/4)*(3*(m[0]**4+m[1]**4+m[2]**4)-1) + res2=(1/12)*(dx*(3*m[0]**4+6*m[1]**2*m[2]**2-1)\ + +dy*(3*m[1]**4+6*m[2]**2*m[0]**2-1)\ + +dz*(3*m[2]**4+6*m[0]**2*m[1]**2-1)) + + A[k,0]=3*(m[1]**2)*(m[2]**2); + A[k,1]=3*(m[0]**2)*(m[2]**2); + A[k,2]=3*(m[0]**2)*(m[1]**2); + A[k,3]=res1-res2; + A[k,4]=res1+res2; + + tM[0]=1/D1 + tM[1]=1/D2 + tM[2]=1/D3 + tM[3]=1/D4 + tM[4]=1/D5 + + BndSpfc='yes' + + return tM,A,BndSpfc + +#%% Combine two models +def Combined(tMdl1,AMdl1,tMdl2,AMdl2): + if np.ndim(tMdl1)==np.ndim(AMdl1) and np.ndim(tMdl2)==np.ndim(AMdl2): + BndSpfc='no' + else: + BndSpfc='yes' + + nt1=tMdl1.size + nt2=tMdl2.size + if np.size(tMdl1)==0: + tMdl=tMdl2 + AMdl=AMdl2 + elif np.size(tMdl2)==0: + tMdl=tMdl1 + AMdl=AMdl1 + else: + tMdl=np.zeros((nt1+1)*(nt2+1)-1) + + tMdl[0:nt1]=tMdl1 + tMdl[nt1:nt1+nt2]=tMdl2 + + + for k in range(0,nt1): + for m in range(0,nt2): + tMdl[nt1+nt2+m+k*nt2]=tMdl1[k]*tMdl2[m]/(tMdl1[k]+tMdl2[m]) + + AMdl1=np.swapaxes(AMdl1,0,-1) + AMdl2=np.swapaxes(AMdl2,0,-1) + + + if AMdl1.shape[1:]!=AMdl2.shape[1:]: + if AMdl1.ndim>AMdl2.ndim: + for k in range(1,AMdl1.ndim): + AMdl2=np.repeat(np.array([AMdl2.T]),AMdl1.shape[k],axis=k) + else: + for k in range(1,AMdl2.ndim): + AMdl1=np.repeat(np.array([AMdl1.T]),AMdl2.shape[k],axis=k) + + S21=1-np.sum(AMdl1,axis=0) + S22=1-np.sum(AMdl2,axis=0) + + + + AMdl=np.zeros(np.concatenate(([(nt1+1)*(nt2+1)-1],AMdl2.shape[1:])).astype(int)) + AMdl[0:nt1]=np.multiply(np.repeat([S22],AMdl1.shape[0],axis=0),AMdl1) + AMdl[nt1:nt1+nt2]=np.multiply(np.repeat([S21],AMdl2.shape[0],axis=0),AMdl2) + + for k in range(0,nt1): + for m in range(0,nt2): + AMdl[nt1+nt2+m+k*nt2]=np.multiply(AMdl1[k],AMdl2[m]) + + AMdl=np.swapaxes(AMdl,0,-1) + + return tMdl,AMdl,BndSpfc + +def RotVec(euler,vec): + def Rz(theta): + return np.array([[np.cos(theta),np.sin(theta),0],[-np.sin(theta),np.cos(theta),0],[0,0,1]]) + def Ry(theta): + return np.array([[np.cos(theta),0,-np.sin(theta)],[0,1,0],[np.sin(theta),0,np.cos(theta)]]) + + + + return Rz(euler[2]).dot(Ry(euler[1]).dot(Rz(euler[0]).dot(vec.T))).T + + \ No newline at end of file diff --git "a/pyDIFRATE/r_class/Icon\r" "b/pyDIFRATE/r_class/Icon\r" new file mode 100644 index 0000000..e69de29 diff --git a/pyDIFRATE/r_class/__init__.py b/pyDIFRATE/r_class/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pyDIFRATE/r_class/detectors.py b/pyDIFRATE/r_class/detectors.py new file mode 100755 index 0000000..f238a37 --- /dev/null +++ b/pyDIFRATE/r_class/detectors.py @@ -0,0 +1,1941 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Copyright 2021 Albert Smith-Penzel + +This file is part of Frames Theory Archive (FTA). + +FTA is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +FTA is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with FTA. If not, see . + + +Questions, contact me at: +albert.smith-penzel@medizin.uni-leipzig.de + + +Created on Thu Apr 11 20:32:25 2019 + +@author: albertsmith +""" +#import os +#cwd=os.getcwd() +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt +#from matplotlib.patches import Polygon +import pyDIFRATE.r_class.mdl_sens as mdl +from numpy.linalg import svd +#from scipy.sparse.linalg import svds +from scipy.sparse.linalg import eigs +from scipy.optimize import linprog +from scipy.optimize import lsq_linear as lsqlin +from pyDIFRATE.tools.DRtools import linear_ex +import multiprocessing as mp +import warnings +#os.chdir('../plotting') +import pyDIFRATE.plots.plotting_funs as pf +#os.chdir(cwd) + +warnings.filterwarnings("ignore",r"Ill-conditioned matrix*") +warnings.filterwarnings("ignore",r"Solving system with option*") + +class detect(mdl.model): + def __init__(self,sens,exp_num=None,mdl_num=None): + """ We initiate the detectors class by giving it a sens/Ctsens class, from + which it extracts the specified experiments and models for each + experiment. + + I've replaced the normalization here with 1/max of the sensitivity, and + then using a relative standard deviation, as opposed to the absolute + standard deviation. The question is- do we want to minimize the + """ + + self.n=None; + self._class='detector' + self._origin=sens._origin + + self.__tc=sens.tc() + ntc=np.size(self.__tc) + """This is the maximum number of singular values to return if not + specified. Probably its higher than necessary. + """ + self.__maxN=20; + + + if np.size(exp_num)==1 and exp_num is None: + exp_num=sens.info.columns.values + + "Make sure we're working with numpy array for exp_num" + exp_num=np.atleast_1d(exp_num) + + ne=np.size(exp_num) + + if mdl_num is None: + mdl_num=-1 + + "Make sure we're working with numpy array for mdl_num" + mdl_num=np.atleast_1d(mdl_num) + +# "If all mdl_num are the same, replace with a single entry" +# if np.size(mdl_num)>1 and np.all(mdl_num[0]==mdl_num): +# mdl_num=mdl_num[0] +# + "Delete detector used for R2 exchange correction" + if hasattr(sens,'detect_par') and sens.detect_par['R2_ex_corr']: + sens=sens.copy() #We don't want to edit the original sensitivy object + ne=ne-1 + exp_num=exp_num[exp_num!=sens.info.axes[1][-1]] + sens._remove_R2_ex() + + + "Store all the experiment and model information" + self.info_in=sens.info.loc[:,exp_num].copy() + self.MdlPar_in=sens.MdlPar.copy() + self.mdl_num=mdl_num.copy() + + + k=0 + nM=np.size(self.MdlPar_in) + while kk)[0] + self.mdl_num[in1[in2]]+=-1 + nM=nM-1 + else: + k=k+1 + + if np.all(self.mdl_num==-1): + self.mdl_num=[] + + "Determine if any models are bond specific" + self.BondSpfc='no' + if sens._rho(bond=-1).ndim==3: + self.BondSpfc='yes' #If the previously applied models are bond-specific, we need to maintain bond specificity + else: + for k in self.MdlPar_in: + if k.get('BondSpfc')=='yes': + self.BondSpfc='yes' + + "Pass the molecule object" + """Note that this is not a copy, but rather a pointer to the same object. + If you edit the object, it will be changed in the original sens object + Best to set the selections first with the sens object, and leave alone + here""" + self.molecule=sens.molecule + + + "How many bonds are there?" + nb=self._nb() + + "Storage for the input rate constants" + self.__R=list() #Store experimental sensitivities + self.__R0=list() + self.__RCSA=list() #Store experimental sensitivities for CSA only + self.__R0CSA=list() + + "Load in the sensitivity of the selected experiments" + if np.size(mdl_num)==1: + if self.BondSpfc=='yes': +# for k in range(0,nb): +# a,b=sens._rho_eff(exp_num=exp_num,mdl_num=mdl_num[0],bond=k) +# self.__R.append(a) +# self.__R0.append(b) +# a,b=sens._rho_effCSA(exp_num=exp_num,mdl_num=mdl_num[0],bond=k) +# self.__RCSA.append(a) +# self.__R0CSA.append(b) + a,b=sens._rho_eff(exp_num=exp_num,mdl_num=mdl_num[0],bond=-1) + c,d=sens._rho_effCSA(exp_num=exp_num,mdl_num=mdl_num[0],bond=-1) + for k in range(0,nb): + self.__R.append(a[k]) + self.__R0.append(b[k]) + self.__RCSA.append(c[k]) + self.__R0CSA.append(d[k]) + elif mdl_num!=-1: + a,b=sens._rho_eff(exp_num=exp_num,mdl_num=mdl_num[0]) + self.__R.append(a) + self.__R0.append(b) + a,b=sens._rho_effCSA(exp_num=exp_num,mdl_num=mdl_num[0]) + self.__RCSA.append(a) + self.__R0CSA.append(b) + else: + self.__R.append(sens._rho(exp_num)) + self.__R0.append(np.zeros(self.__R[0].shape[0])) + self.__RCSA.append(sens._rhoCSA(exp_num)) + self.__R0CSA.append(np.zeros(self.__RCSA[0].shape[0])) + else: + "In this case, we have to get the experiments one at a time" + if self.BondSpfc=='yes': + for k in range(0,nb): + self.__R.append(np.zeros([ne,ntc])) + self.__R0.append(np.zeros(ne)) + self.__RCSA.append(np.zeros([ne,ntc])) + self.__R0CSA.append(np.zeros(ne)) + for m in range(0,ne): + a,b=sens._rho_eff(exp_num=exp_num[m],mdl_num=mdl_num[m],bond=k) + self.__R[k][m,:]=a + self.__R0[k][m]=b + a,b=sens._rho_effCSA(exp_num=exp_num[m],mdl_num=mdl_num[m],bond=k) + self.__RCSA[k][m,:]=a + self.__R0CSA[k][m]=b + else: + self.__R.append(np.zeros([ne,ntc])) + self.__R0.append(np.zeros(ne)) + self.__RCSA.append(np.zeros([ne,ntc])) + self.__R0CSA.append(np.zeros(ne)) + for m in range(0,ne): + a,b=sens._rho_eff(exp_num=exp_num[m],mdl_num=mdl_num[m]) + self.__R[0][m,:]=a + self.__R0[0][m]=b + a,b=sens._rho_effCSA(exp_num=exp_num[m],mdl_num=mdl_num[m]) + self.__RCSA[0][m,:]=a + self.__R0CSA[0][m]=b + + "Names of the experimental variables that are available" + self.__exper=['rho','z0','z0_std','Del_z','Del_z_std','stdev'] + "Names of the spin system variables that are available" + self.__spinsys=[] + "Initialize self.info" + self.info=None + + + "Some global defaults" + self.detect_par={'Normalization':'M', #Normalization of detectors + 'inclS2':False, + 'NegAllow':0.5, + 'R2_ex_corr':False} + + + + ####################Critical edits################ + "Pass the normalization" + a=self.info_in.loc['stdev'].to_numpy() + b=np.max(np.abs(np.mean(self.__R,axis=0)),axis=-1) #Mean over bonds, absolute value, max of abs val. + b[b==0]=1 #Doesn't really help- zeros in sensitivity doesn't work in SVD + """ + IMPORTANT EDITS HERE. Make Sure to document + """ + "Replace None with 1" + index=a==None + a[index]=1 + self.norm=np.divide(1,a*b).astype('float64') +# self.norm=np.divide(1,a).astype('float64') + ################################################## + "Storage for the detection vectors" + self.__r=[None]*nb #Store the detection vectors + self.__rho=[None]*nb #Store the detector sensitivities + self.__rhoAvg=None + self.__rAvg=None + self.__Rc=[None]*nb #Store the back-calculated sensitivities + self.__RcAvg=None + self.__rhoCSA=[None]*nb #CSA only sensitivities + + "Store SVD matrix for parallel function" +# self.__Vt=None + + self.z0=[None]*nb + self.Del_z=[None]*nb + self.stdev=[None]*nb + + self.SVD=list(np.zeros(nb)) + self.SVDavg=dict() + + "Store error for r_auto routine" + self.__r_auto=dict() + + super().__init__() + + "The previous line clears self.molecule, so we have to load it again :-/" + self.molecule=sens.molecule + + +#%% Performs and stores results of singular value decomposition of experimental sensitivities + def getSVD(self,bond=None,n=None): + "Function to perform (and store) all singular value decomposition calculations" + ne=np.shape(self.__R)[1] + if n is None: + n=np.min([np.shape(self.__R)[1],self.__maxN]) + + if bond is None: + if 'S' in self.SVDavg.keys() and self.SVDavg['S'].size>=n: + U=self.SVDavg['U'][:,0:n] + S=self.SVDavg['S'][0:n] + Vt=self.SVDavg['Vt'][0:n,:] + VtCSA=0 + else: + norm=np.repeat(np.transpose([self.norm]),np.size(self.__tc),axis=1) + U,S,Vt=svd0(np.multiply(np.mean(self.__R,axis=0),norm),n) + + self.SVDavg['U']=U + self.SVDavg['Vt']=Vt + self.SVDavg['S']=S + VtCSA=0 + + U=U[:,0:n] + S=S[0:n] + Vt=Vt[0:n,:] + else: + if self.SVD[bond]!=0 and self.SVD[bond]['S'].size>=n: + U=self.SVD[bond]['U'][:,0:n] + S=self.SVD[bond]['S'][0:n] + Vt=self.SVD[bond]['Vt'][0:n,:] + VtCSA=self.SVD[bond]['VtCSA'][0:n,:] + + else: + norm=np.repeat(np.transpose([self.norm]),np.size(self.__tc),axis=1) + + U,S,Vt=svd0(np.multiply(self.Rin(bond),norm),n) + U=U[:,0:np.size(S)] + + + VtCSA=np.dot(np.diag(1/S),np.dot(U.T,np.multiply(self._RCSAin(bond),norm))) + + if self.SVD[bond]==0: + self.SVD[bond]=dict() + + self.SVD[bond]['U']=U + self.SVD[bond]['S']=S + self.SVD[bond]['Vt']=Vt + self.SVD[bond]['VtCSA']=VtCSA + + U=U[:,0:n] + S=S[0:n] + Vt=Vt[0:n,:] + VtCSA=VtCSA[0:n,:] + + return U,S,Vt,VtCSA +#%% Generate r matrix for fitting tests (detector sensitivies are not optimized- and not well-separated) + def r_no_opt(self,n,bond=None,R2_ex_corr=False,**kwargs): + + self.detect_par['inclS2']=False + self.detect_par['R2_ex_corr']=False + + self.n=n + nb=self._nb() + + if nb==1: + bond=0 + + if bond is not None and np.size(bond)==1 and np.atleast_1d(bond)[0]==-1: + bond=np.arange(0,nb) + + if bond is None: + U,S,Vt,VCSA=self.getSVD(None,n) + self.__rAvg=np.multiply(np.repeat(np.transpose([1/self.norm]),n,axis=1),np.dot(U,np.diag(S))) + self.__rhoAvg=Vt + norm=np.repeat(np.transpose([self.norm]),np.size(self.__tc),axis=1) + self.__RcAvg=np.divide(np.dot(U,np.dot(np.diag(S),Vt)),norm) + self.SVDavg['T']=np.eye(n) + self.SVDavg['stdev']=1/S + if 'sort_rho' not in kwargs: + kwargs['sort_rho']='n' + + self.__r_info(None,**kwargs) + else: + + bond=np.atleast_1d(bond) + + for k in bond: + U,S,Vt,VCSA=self.getSVD(k,n) + #Here, we try to control the sign returned for Vt + #(it would be nice if repeated runs of r_no_opt returned the same results) + sgn=np.sign(Vt.sum(axis=1)) + sgn[sgn==0]=Vt[sgn==0,:].max(axis=1) + Vt=(sgn*Vt.T).T + VCSA=(sgn*VCSA.T).T + U=sgn*U + self.__r[k]=np.multiply(np.repeat(np.transpose([1/self.norm]),n,axis=1),np.dot(U,np.diag(S))) + self.__rho[k]=Vt + self.__rhoCSA[k]=VCSA + norm=np.repeat(np.transpose([self.norm]),np.size(self.__tc),axis=1) + self.__Rc[k]=np.divide(np.dot(U,np.dot(np.diag(S),Vt)),norm) + self.SVD[k]['T']=np.eye(n) + + if R2_ex_corr: + self.R2_ex_corr(bond=k,**kwargs) + + self.__r_info(k,**kwargs) + + + if 'sort_rho' not in kwargs: + kwargs['sort_rho']='n' + self.__r_info(None,**kwargs) + +#%% Automatic generation of detectors from a set of sensitivities + def r_auto(self,n,Normalization='Max',inclS2=False,NegAllow=0.5,R2_ex_corr=False,bond=None,parallel=True,z0=None,**kwargs): + + assert n<=self.Rin().shape[0],'Number of detectors cannot be larger than the number of experiments' + + self.n=n + + self.detect_par['inclS2']=inclS2 + + "A little bit silly that the variable names changed...fix later" + Neg=NegAllow + R2ex=R2_ex_corr + + "Store some of the inputs" + self.detect_par.update({'Normalization':Normalization,'inclS2':inclS2,'R2_ex_corr':R2_ex_corr,'NegAllow':NegAllow}) + + + nb=self._nb() + "If bond set to -1, run through all orientations." + if bond is None: + bonds=np.zeros(0) + elif np.size(bond)==1 and np.atleast_1d(bond)[0]==-1: + bond=None + bonds=np.arange(0,nb) + else: + bond=np.atleast_1d(bond) + bonds=bond[1:] + bond=bond[0] + + + if nb==1: + "If we only have one set of sensitivities (that is, no orientation dependence, then don't use averages" + bond=0 + + if bond is None: + "Here we operate on the average sensitivities" + U,S,Vt,VCSA=self.getSVD(None,n) + norm=np.repeat(np.transpose([self.norm]),np.size(self.__tc),axis=1) + self.__RcAvg=np.divide(np.dot(U,np.dot(np.diag(S),Vt)),norm) + else: + "We work on the first bond given, and use r_target for the remaining bonds" + U,S,Vt,VCSA=self.getSVD(bond,n) + norm=np.repeat(np.transpose([self.norm]),np.size(self.__tc),axis=1) + self.__Rc[bond]=np.divide(np.dot(U,np.dot(np.diag(S),Vt)),norm) + + ntc=np.size(self.__tc) #Number of correlation times + + + """ + In the follow lines (loop over ntc, or z0), we optimize detectors at + either every possible correlation time, or correlation times specified + by z0. + """ + + def true_range(k,untried): + """Finds the range around k in untried where all values are True + """ + i=np.nonzero(np.logical_not(untried[k:]))[0] + right=(k+i[0]) if len(i)!=0 else len(untried) + i=np.nonzero(np.logical_not(untried[:k]))[0] + left=(i[-1]+1) if len(i)!=0 else 0 + + return left,right + + def find_nearest(Vt,k,untried,error=None,endpoints=False): + """Finds the location of the best detector near index k. Note that the + vector untried indicates where detectors may still exist. k must fall + inside a range of True elements in untried, and we will only search within + that range. Note that by default, finding the best detector at the + end of that range will be disallowed, since the range is usually bound + by detectors that have already been identified. Exceptions are the first + and last positions. untried will be modified in-place + """ + + left,right=true_range(k,untried) + + maxi=100000 + test=k + while k!=maxi: + if not(np.any(untried[left:right])):return #Give up if the whole range of untried around k is False + k=test + rhoz0,x,maxi=det_opt(Vt,k) + error[k]=np.abs(k-maxi) + if k<=maxi:untried[k:maxi+1]=False #Update the untried index + else:untried[maxi:k+1]=False + test=maxi + + if (k<=left or k>=right-1) and not(endpoints): + return None #Don't return ends of the range unless 0 or ntc + else: + return rhoz0,x,k + + def biggest_gap(untried): + """Finds the longest range of True values in the untried index + """ + k=np.nonzero(untried)[0][0] + gap=0 + biggest=0 + while True: + left,right=true_range(k,untried) + if right-left>gap: + gap=right-left + biggest=np.mean([left,right],dtype=int) + i0=np.nonzero(untried[right:])[0] + if len(i0)>0: + k=right+np.nonzero(untried[right:])[0][0] + else: + break + return biggest + + + + def det_opt(Vt,k,target=None): + """Performs the optimization of a detectors having a value of 1 at the kth + correlation time, and minimized elsewhere. Target is the minimum allowed + value for the detector as a function of correlation time. Default is zeros + everywhere. + + Returns the optimized detector and the location of the maximum of that + detector + """ + ntc=Vt.shape[1] + target=target if target else np.zeros(ntc) + x=linprog(Vt.sum(1),-Vt.T,-target,[Vt[:,k]],1,bounds=(-500,500),\ + method='interior-point',options={'disp':False}) + rhoz=(Vt.T@x['x']).T + maxi=np.argmax(np.abs(rhoz)) + return rhoz,x['x'],maxi + + #Locate where the Vt are sufficiently large to have maxima + i0=np.nonzero(np.any(np.abs(Vt.T)>(np.abs(Vt).max(1)*.75),1))[0] + + untried=np.ones(ntc,dtype=bool) + untried[:i0[0]]=False + untried[i0[-1]+1:]=False + count=0 #How many detectors have we found? + index=list() #List of indices where detectors are found + rhoz=list() #Optimized sensitivity + X=list() #Columns of the T-matrix + err=np.ones(ntc,dtype=int)*ntc #Keep track of error at all time points tried + + "Locate the left-most detector" + if untried[0]: + rhoz0,x,k=find_nearest(Vt,0,untried,error=err,endpoints=True) + rhoz.append(rhoz0) + X.append(x) + index.append(k) + count+=1 + "Locate the right-most detector" + if untried[-1] and n>1: + rhoz0,x,k=find_nearest(Vt,ntc-1,untried,error=err,endpoints=True) + rhoz.append(rhoz0) + X.append(x) + index.append(k) + count+=1 + "Locate remaining detectors" + while count0.95*np.max(rhoz[k,:]) or rhoz[k,-1]>0.95*np.max(rhoz[k,:])) and Neg!=0: + + reopt=True #Option to cancel the re-optimization in special cases + + if rhoz[k,0]>0.95*np.max(rhoz[k,:]): + pm=1; + else: + pm=-1; + + temp=rhoz[k,:] + "Locate maxima and minima in the detector" + mini=np.where((temp[2:]-temp[1:-1]>=0) & (temp[1:-1]-temp[0:-2]<=0))[0]+1 + maxi=np.where((temp[2:]-temp[1:-1]<=0) & (temp[1:-1]-temp[0:-2]>=0))[0]+1 + + """Filter out minima that occur at more than 90% of the sensitivity max, + since these are probably just glitches in the optimization. + """ + if np.size(mini)>=2 and np.size(maxi)>=2: + mini=mini[(temp[mini]<.9) & (temp[mini]<.05*np.max(-pm*np.diff(temp[maxi])))] + elif np.size(mini)>=2: + mini=mini[temp[mini]<.9] + + if np.size(maxi)>=2: + maxi=maxi[(temp[maxi]<.9) & (temp[maxi]>0.0)] + # maxi=maxi[(temp[maxi]<.9) & (temp[maxi]>0.0*np.max(-pm*np.diff(temp[maxi])))] + + + if rhoz[k,0]>0.95*np.max(rhoz[k,:]): + + "Calculation for the first detection vector" + + if np.size(maxi)>=2 & np.size(mini)>=2: + step=int(np.round(np.diff(mini[0:2])/2)) + slope2=-(temp[maxi[-1]]-temp[maxi[0]])*Neg/(maxi[-1]-maxi[0]) + elif np.size(maxi)==1 and np.size(mini)>=1: + step=maxi[0]-mini[0] + slope2=temp[maxi[0]]*Neg/step + else: + reopt=False + + if reopt: + a=np.max([1,mini[0]-step]) + slope1=-temp[maxi[0]]/step*Neg + line1=np.arange(0,-temp[maxi[0]]*Neg-1e-12,slope1) + line2=np.arange(-temp[maxi[0]]*Neg,1e-12,slope2) + try: + target=np.concatenate((np.zeros(a),line1,line2,np.zeros(ntc-a-np.size(line1)-np.size(line2)))) + except: + reopt=False + + else: + "Calculation otherwise (last detection vector)" + if np.size(maxi)>=2 & np.size(mini)>=2: + step=int(np.round(np.diff(mini[-2:])/2)) + slope2=-(temp[maxi[0]]-temp[maxi[-1]])*Neg/(maxi[0]-maxi[-1]) + elif np.size(maxi)==1 and np.size(mini)>=1: + step=mini[-1]-maxi[0] + slope2=-temp[maxi[0]]*Neg/step + else: + reopt=False + + if reopt: + a=np.min([ntc,mini[-1]+step]) + slope1=temp[maxi[-1]]/step*Neg + + line1=np.arange(-temp[maxi[-1]]*Neg,1e-12,slope1) + line2=np.arange(0,-temp[maxi[-1]]*Neg-1e-12,slope2) + target=np.concatenate((np.zeros(a-np.size(line1)-np.size(line2)),line2,line1,np.zeros(ntc-a))) + + + if reopt: + Y=(Vt,pks[k],target) + + X=linprog_par(Y) + T[k,:]=X + rhoz[k,:]=np.dot(T[k,:],Vt) + except: + pass + + "Save the results into the detect object" +# self.r0=self.__r + if bond is None: + self.__rAvg=np.multiply(np.repeat(np.transpose([1/self.norm]),n,axis=1),\ + np.dot(U,np.linalg.solve(T.T,np.diag(S)).T)) + self.__rhoAvg=rhoz + self.SVDavg['T']=T + self.__r_auto={'Error':err,'Peaks':pks,'rho_z':self.__rhoAvg.copy()} + if R2ex: + self.R2_ex_corr(bond,**kwargs) + self.__r_norm(bond,**kwargs) + if inclS2: + self.inclS2(bond=None,**kwargs) + self.__r_info(bond,**kwargs) + if np.size(bonds)>0: + if 'NT' in kwargs: #We don't re-normalize the results of detectors obtained with r_target + kwargs.pop('NT') + if 'Normalization' in kwargs: + kwargs.pop('Normalization') + self.r_target(n,bond=bonds,Normalization=None,**kwargs) + else: + + """This isn't correct yet- if more than one bond, we want to + use the result for the average calculation as a target for the + individual bonds, not loop over all bonds with the result here + """ + self.__r[bond]=np.multiply(np.repeat(np.transpose([1/self.norm]),n,axis=1),\ + np.dot(U,np.linalg.solve(T.T,np.diag(S)).T)) + self.__rho[bond]=rhoz + self.__rhoCSA[bond]=np.dot(T,VCSA) + self.SVD[bond]['T']=T + self.__r_auto={'Error':err,'Peaks':pks,'rho_z':self.__rho[bond].copy()} + if R2ex: + self.R2_ex_corr(bond,**kwargs) + + self.__r_norm(bond,**kwargs) + if inclS2: + self.inclS2(bond=k,**kwargs) + self.__r_info(bond,**kwargs) + if np.size(bonds)>0: + if 'NT' in kwargs: #We don't re-normalize the results of detectors obtained with r_target + kwargs.pop('NT') + if 'Normalization' in kwargs: + kwargs.pop('Normalization') + self.r_target(n,self.__rho[bond],bonds,Normalization=None,**kwargs) + + def r_auto2(self,n,Normalization='Max',inclS2=False,NegAllow=0.5,R2_ex_corr=False,bond=None,parallel=True,z0=None,**kwargs): + + assert n<=self.Rin().shape[0],'Number of detectors cannot be larger than the number of experiments' + + self.n=n + + self.detect_par['inclS2']=inclS2 + + "A little bit silly that the variable names changed...fix later" + Neg=NegAllow + R2ex=R2_ex_corr + + "Store some of the inputs" + self.detect_par.update({'Normalization':Normalization,'inclS2':inclS2,'R2_ex_corr':R2_ex_corr,'NegAllow':NegAllow}) + + + nb=self._nb() + "If bond set to -1, run through all orientations." + if bond is None: + bonds=np.zeros(0) + elif np.size(bond)==1 and np.atleast_1d(bond)[0]==-1: + bond=None + bonds=np.arange(0,nb) + else: + bond=np.atleast_1d(bond) + bonds=bond[1:] + bond=bond[0] + + + if nb==1: + "If we only have one set of sensitivities (that is, no orientation dependence, then don't use averages" + bond=0 + + if bond is None: + "Here we operate on the average sensitivities" + U,S,Vt,VCSA=self.getSVD(None,n) + norm=np.repeat(np.transpose([self.norm]),np.size(self.__tc),axis=1) + self.__RcAvg=np.divide(np.dot(U,np.dot(np.diag(S),Vt)),norm) + else: + "We work on the first bond given, and use r_target for the remaining bonds" + U,S,Vt,VCSA=self.getSVD(bond,n) + norm=np.repeat(np.transpose([self.norm]),np.size(self.__tc),axis=1) + self.__Rc[bond]=np.divide(np.dot(U,np.dot(np.diag(S),Vt)),norm) + + ntc=np.size(self.__tc) #Number of correlation times + err=np.zeros(ntc) #Error of fit + + + """ + In the follow lines (loop over ntc, or z0), we optimize detectors at + either every possible correlation time, or correlation times specified + by z0. + """ + + "Prepare data for parallel processing" + Y=list() + if z0 is None: + z0index=range(ntc) + else: + if n>len(z0): + print('z0 must have at least length n') + return + else: + z0index=list() + for z1 in z0: + z0index.append(np.argmin(np.abs(self.z()-z1))) + + for k in z0index: + Y.append((Vt,k)) + + "Default is parallel processing" + if not(parallel): + X=list() + for Y0 in Y: + X.append(linprog_par(Y0)) + else: + with mp.Pool() as pool: + X=pool.map(linprog_par,Y) + + """We optimized detectors at every correlation time (see __linprog_par), + which have values at 1 for the given correlation time. We want to keep + those detectors where the maximum is closest to the correlation time set + to 1. We search for those here: + """ + if z0 is None: + for k in range(0,ntc): + err[k]=np.abs(np.argmax(np.dot(Vt.T,X[k]))-k) + else: + err=np.ones(ntc)*ntc + for m,k in enumerate(z0index): + err[k]=np.abs(np.argmax(np.dot(Vt.T,X[m]))-k) + x0=X.__iter__() + X=[x0.__next__() if k in z0index else None for k in range(ntc)] + + + if 'Type' in self.info_in.index and 'S2' in self.info_in.loc['Type'].to_numpy() and z0 is None: + err[0]=0 #Forces a detector that is non-zero at the shortest correlation time if S2 included + "Possibly need to delete above two lines...not fully tested" + + """Ideally, the number of detectors equals the number of minima in err, + however, due to calculation error, this may not always be the case. We + start searching for values where err=0. If we don't have enough, we + raise this value in steps (looking for err=1, err=2), until we have + enough. If, in one step, we go from too few to too many, we eliminate, + one at a time, the peak that is closest to another peak. + """ + test=True + thresh=0 + while test: + pks=np.where(err<=thresh)[0] + if pks.size==n: + test=False + elif pks.sizen: + + while pks.size>n: + a=np.argsort(np.diff(pks)) +# pks=np.concatenate([pks[a[np.size(pks)-n:]],[pks[-1]]]) + pks=np.concatenate([pks[a[1:]],[pks[-1]]]) + pks.sort() + test=False + + "Save the linear combinations for the best detectors" + T=np.zeros([n,n]) + for k in range(0,n): + T[k,:]=X[pks[k]] + + rhoz=np.dot(T,Vt) + + """Detectors that are not approaching zero at the end of the range of + correlation times tend to oscillate where they do approach zero. We want + to push that oscillation slightly below zero + """ + + for k in range(0,n): + try: + if (rhoz[k,0]>0.95*np.max(rhoz[k,:]) or rhoz[k,-1]>0.95*np.max(rhoz[k,:])) and Neg!=0: + + reopt=True #Option to cancel the re-optimization in special cases + + if rhoz[k,0]>0.95*np.max(rhoz[k,:]): + pm=1; + else: + pm=-1; + + temp=rhoz[k,:] + "Locate maxima and minima in the detector" + mini=np.where((temp[2:]-temp[1:-1]>=0) & (temp[1:-1]-temp[0:-2]<=0))[0]+1 + maxi=np.where((temp[2:]-temp[1:-1]<=0) & (temp[1:-1]-temp[0:-2]>=0))[0]+1 + + """Filter out minima that occur at more than 90% of the sensitivity max, + since these are probably just glitches in the optimization. + """ + if np.size(mini)>=2 and np.size(maxi)>=2: + mini=mini[(temp[mini]<.9) & (temp[mini]<.05*np.max(-pm*np.diff(temp[maxi])))] + elif np.size(mini)>=2: + mini=mini[temp[mini]<.9] + + if np.size(maxi)>=2: + maxi=maxi[(temp[maxi]<.9) & (temp[maxi]>0.0)] + # maxi=maxi[(temp[maxi]<.9) & (temp[maxi]>0.0*np.max(-pm*np.diff(temp[maxi])))] + + + if rhoz[k,0]>0.95*np.max(rhoz[k,:]): + "Calculation for the first detection vector" + + if np.size(maxi)>=2 & np.size(mini)>=2: + step=int(np.round(np.diff(mini[0:2])/2)) + slope2=-(temp[maxi[-1]]-temp[maxi[0]])*Neg/(maxi[-1]-maxi[0]) + elif np.size(maxi)==1 and np.size(mini)>=1: + step=maxi[0]-mini[0] + slope2=temp[maxi[0]]*Neg/step + else: + reopt=False + + if reopt: + a=np.max([1,mini[0]-step]) + slope1=-temp[maxi[0]]/step*Neg + line1=np.arange(0,-temp[maxi[0]]*Neg-1e-12,slope1) + line2=np.arange(-temp[maxi[0]]*Neg,1e-12,slope2) + try: + target=np.concatenate((np.zeros(a),line1,line2,np.zeros(ntc-a-np.size(line1)-np.size(line2)))) + except: + reopt=False + + else: + "Calculation otherwise (last detection vector)" + if np.size(maxi)>=2 & np.size(mini)>=2: + step=int(np.round(np.diff(mini[-2:])/2)) + slope2=-(temp[maxi[0]]-temp[maxi[-1]])*Neg/(maxi[0]-maxi[-1]) + elif np.size(maxi)==1 and np.size(mini)>=1: + step=mini[-1]-maxi[0] + slope2=-temp[maxi[0]]*Neg/step + else: + reopt=False + + if reopt: + a=np.min([ntc,mini[-1]+step]) + slope1=temp[maxi[-1]]/step*Neg + + line1=np.arange(-temp[maxi[-1]]*Neg,1e-12,slope1) + line2=np.arange(0,-temp[maxi[-1]]*Neg-1e-12,slope2) + target=np.concatenate((np.zeros(a-np.size(line1)-np.size(line2)),line2,line1,np.zeros(ntc-a))) + + + if reopt: + Y=(Vt,pks[k],target) + + X=linprog_par(Y) + T[k,:]=X + rhoz[k,:]=np.dot(T[k,:],Vt) + except: + pass + + + "Save the results into the detect object" +# self.r0=self.__r + if bond is None: + self.__rAvg=np.multiply(np.repeat(np.transpose([1/self.norm]),n,axis=1),\ + np.dot(U,np.linalg.solve(T.T,np.diag(S)).T)) + self.__rhoAvg=rhoz + self.SVDavg['T']=T + self.__r_auto={'Error':err,'Peaks':pks,'rho_z':self.__rhoAvg.copy()} + if R2ex: + self.R2_ex_corr(bond,**kwargs) + self.__r_norm(bond,**kwargs) + if inclS2: + self.inclS2(bond=None,**kwargs) + self.__r_info(bond,**kwargs) + if np.size(bonds)>0: + if 'NT' in kwargs: #We don't re-normalize the results of detectors obtained with r_target + kwargs.pop('NT') + if 'Normalization' in kwargs: + kwargs.pop('Normalization') + self.r_target(n,bond=bonds,Normalization=None,**kwargs) + else: + + """This isn't correct yet- if more than one bond, we want to + use the result for the average calculation as a target for the + individual bonds, not loop over all bonds with the result here + """ + self.__r[bond]=np.multiply(np.repeat(np.transpose([1/self.norm]),n,axis=1),\ + np.dot(U,np.linalg.solve(T.T,np.diag(S)).T)) + self.__rho[bond]=rhoz + self.__rhoCSA[bond]=np.dot(T,VCSA) + self.SVD[bond]['T']=T + self.__r_auto={'Error':err,'Peaks':pks,'rho_z':self.__rho[bond].copy()} + if R2ex: + self.R2_ex_corr(bond,**kwargs) + + self.__r_norm(bond,**kwargs) + if inclS2: + self.inclS2(bond=k,**kwargs) + self.__r_info(bond,**kwargs) + if np.size(bonds)>0: + if 'NT' in kwargs: #We don't re-normalize the results of detectors obtained with r_target + kwargs.pop('NT') + if 'Normalization' in kwargs: + kwargs.pop('Normalization') + self.r_target(n,self.__rho[bond],bonds,Normalization=None,**kwargs) + + def r_target(self,target=None,n=None,bond=None,Normalization=None,inclS2=None,R2_ex_corr=None,parallel=True,**kwargs): + """Set sensitivities as close to some target function as possible + + Note, if no target given, this function updates bonds to match the r_auto + sensitivity results. Then, the settings R2_ex_corr, and inclS2 are taken + from the previous settings. Otherwise, these are set to False. + """ + + + + if target is None: + try: +# target=self.__r_auto.get('rho_z') + target=self.rhoz() + except: + print('No target provided, and no sensitivity from r_auto available') + return + + R2ex=self.detect_par['R2_ex_corr'] + inS2=self.detect_par['inclS2'] + target=self.rhoz(bond=None) + if R2ex: + target=target[:-1] + if inS2: + target=target[1:] + if R2_ex_corr is None: + R2_ex_corr=R2ex + if inclS2 is None: + inclS2=inS2 + + + target=np.atleast_2d(target) + + "Store some of the inputs" + self.detect_par.update({'Normalization':Normalization,'inclS2':inclS2,'R2_ex_corr':R2_ex_corr}) + + + + + if n is None: + n=target.shape[0] + + self.n=n + nb=self._nb() + + "If bond set to -1, run through all orientations." + if bond is not None and np.size(bond)==1 and np.atleast_1d(bond)[0]==-1: + bond=np.arange(0,nb) + + if nb==1: + bond=0 + + + if bond is None: + "Here we operate on the average sensitivities" + U,S,Vt,VCSA=self.getSVD(None,n) + norm=np.repeat(np.transpose([self.norm]),np.size(self.__tc),axis=1) + self.__RcAvg=np.divide(np.dot(U,np.dot(np.diag(S),Vt)),norm) + + T=lsqlin_par((Vt,target)) + + rhoz=np.dot(T,Vt) + self.__rAvg=np.multiply(np.repeat(np.transpose([1/self.norm]),n,axis=1),\ + np.dot(U,np.linalg.solve(T.T,np.diag(S)).T)) + self.__rhoAvg=rhoz + self.SVDavg['T']=T + if Normalization is not None: + self.__r_norm(None,**kwargs) + if ('inclS2' in kwargs and kwargs['inclS2']) or\ + self.detect_par['inclS2']: + self.inclS2(bond=None,**kwargs) + self.__r_info(bond,**kwargs) + else: + Y=list() + bond=np.atleast_1d(bond) + + for k in bond: + U,S,Vt,VCSA=self.getSVD(k,n) + norm=np.repeat(np.transpose([self.norm]),np.size(self.__tc),axis=1) + self.__Rc[k]=np.divide(np.dot(U,np.dot(np.diag(S),Vt)),norm) + Y.append((Vt,target)) + + "Default is parallel processing" + if not(parallel) or len(Y)==1: + T=[lsqlin_par(k) for k in Y] + else: + with mp.Pool() as pool: + T=pool.map(lsqlin_par,Y) + + for index,k in enumerate(bond): + U,S,Vt,VCSA=self.getSVD(k,n) + self.SVD[k]['T']=T[index] + self.__r[k]=np.multiply(np.repeat(np.transpose([1/self.norm]),n,axis=1),\ + np.dot(U,np.linalg.solve(T[index].T,np.diag(S)).T)) + self.__rho[k]=np.dot(T[index],Vt) + self.__rhoCSA[k]=np.dot(T[index],VCSA) + self.SVD[k]['T']=T[index] + if Normalization is not None: + self.__r_norm(k,**kwargs) + if self.detect_par['R2_ex_corr']: + self.R2_ex_corr(bond=k,**kwargs) + if self.detect_par['inclS2']: + self.inclS2(bond=k,**kwargs) + + + if 'sort_rho' not in kwargs: + kwargs['sort_rho']='n' + self.__r_info(bond,**kwargs) + + +# def r_IMPACT(self,n=None,tc=None,z=None,IMPACTbnds=True,inclS2=False,Normalization='MP',unidist_range=[-11,-8],**kwargs): +# """ +# Optimizes a set of detectors that behave as an IMPACT fit. That is, +# we set up an array of correlation times to fit data to. +# +# Options are to request a specific number of correlation times, in which +# case we will optimize the fit of a uniform distribution with n +# correlation times, where we vary the width and center of the array of +# correlation times. Otherwise, one may input the array of correlation +# times directly (set either n or tc as an array). +# +# Furthermore, by default we calculate the sensitivities for a single +# correlation time using bounds on each correlation time (min=0,max=1), +# and with the sum of all amplitudes adding to 1 (IMPACTbnds=True). This +# is the default IMPACT behavior, although it is not the recommended +# detectors methodology (detectors optimized to use the IMPACT methodology +# will not behave like true detectors!). Alternatively, we may set +# IMPACTbnds False for standard detectors behavior. +# +# Note that IMPACT required all amplitudes to sum to 1. We do not require +# this, although setting inclS2 to True, and Normalization to MP will add +# a detector that will cause the total amplitude to be 1. This is equivalent +# to adding a very short (~10 ps) correlation time to the correlation +# time array. +# +# IMPACT Reference: +# Khan, S. N., C. Charlier, R. Augustyniak, N. Salvi, V. Dejean, +# G. Bodenhausen, O. Lequin, P. Pelupessy, and F. Ferrage. +# “Distribution of Pico- and Nanosecond Motions in Disordered Proteins +# from Nuclear Spin Relaxation.” Biophys J, 2015. +# https://doi.org/10.1016/j.bpj.2015.06.069. +# """ +# +# +# assert not(n is None and tc is None and z is None),"Set n, tc, or z" +# +# self.detect_par['inclS2']=inclS2 +# if Normalization is not None:self.detect_par['Normalization']=Normalization +# if 'R2_ex_corr' in kwargs:self.detect_par['R2_ex_corr']=kwargs['R2_ex_corr'] +# +# if tc is None and z is None: +# dz=np.diff(self.z()[:2])[0] +# i1=np.argmin(np.abs(self.z()-unidist_range[0])) +# i2=np.argmin(np.abs(self.z()-unidist_range[1])) +# R=self.__R[0][:,i1:i2].sum(1)/(i2-i1) +## assert n<=R.size,"n must be less than or equal to the number of experiments" +# +# err=list() +# w=list() +# c=list() +# for width in np.linspace(2,8,30): +# zswp=self.z() +# zswp=np.linspace(self.z()[0]+width/2,self.z()[-1]-width/2,100) +# for center in zswp: +# z=np.linspace(-width/2,width/2,n)+center +# r=linear_ex(self.z(),self.__R[0],z) +# err.append(lsqlin(r,R,(0,1) if IMPACTbnds else (-np.inf,np.inf))['cost']) +# w.append(width) +# c.append(center) +# i=np.argmin(np.array(err)) +# z=np.linspace(-w[i]/2,w[i]/2,n)+c[i] +# +# +# z=np.array(z) if tc is None else np.log10(tc) +# print(z) +# assert len(z)<=self.__R[0].shape[0],\ +# "Number of correlation times must be less than or equal to the number of experiments" +# r=linear_ex(self.z(),self.__R[0],z) +# +# ntc=self.tc().size +# rhoz=np.zeros([len(z),ntc]) +# rhozCSA=np.zeros([len(z),ntc]) +# for k in range(ntc): +# rhoz[:,k]=lsqlin(r,self.__R[0][:,k],(0,1) if IMPACTbnds else (-np.inf,np.inf))['x'] +# rhozCSA[:,k]=lsqlin(r,self.__RCSA[0][:,k],(0,1) if IMPACTbnds else (-np.inf,np.inf))['x'] +# self.__rho[0]=rhoz +# self.__rhoCSA[0]=rhozCSA +# self.__r[0]=r +# self.n=self.__r[0].shape[1] +# self.SVD[0]={'U':None,'S':None,'Vt':None,'VtCSA':None,'T':np.eye(self.n)} +# if Normalization is not None: +# self.__r_norm(0,Normalization=Normalization) +# if self.detect_par['R2_ex_corr']: +# self.R2_ex_corr(bond=0,**kwargs) +# if self.detect_par['inclS2']: +# self.inclS2(bond=0,Normalization=Normalization) +# self.__r_info() + + + def __addS2(self,bond=None,**kwargs): + if 'NT' in kwargs: + NT=kwargs.get('NT') + elif 'Normalization' in kwargs: + NT=kwargs.get('Normalization') + else: + NT=self.detect_par.get('Normalization') + + + def R2_ex_corr(self,bond=None,v_ref=None,**kwargs): + """ + detect.R2_ex_corr(bond=None,v_ref=None,**kwargs) + Attempts to fit exchange contributions to R2 relaxation. Requires R2 + measured at at least two fields. By default, adds a detection vector which + corrects for exchange, and returns the estimated exchange contribution at + the lowest field at which R2 was measured. + """ + self.detect_par.update({'R2_ex_corr':True}) + + index=self.info_in.loc['Type']=='R2' + if np.where(index)[0].size<2: + print('Warning: At least 2 R2 experiments are required to perform the exchange correction') + return + + nb=self._nb() + if nb==1: + "If bond is not specified, and we don't have bond specificity, operate on bond 0" + bond=0 + + + + + r_ex_vec=np.zeros(self.info_in.shape[1]) + v0=np.atleast_1d(self.info_in.loc['v0'][index]) + if v_ref is None: + v_ref=np.min(v0) + + r_ex_vec[index]=np.divide(v0**2,v_ref**2) + + rhoz=np.zeros(self.tc().size) + rhoz[-1]=1e6 + if bond is None: + self.__rAvg=np.concatenate((self.__rAvg,np.transpose([r_ex_vec])),axis=1) + self.__rhoAvg=np.concatenate((self.__rhoAvg,[rhoz]),axis=0) + else: + bond=np.atleast_1d(bond) #Work with np arrays + for k in bond: + self.__r[k]=np.concatenate((self.__r[k],np.transpose([r_ex_vec])),axis=1) + self.__rho[k]=np.concatenate((self.__rho[k],[rhoz]),axis=0) + self.__rhoCSA[k]=np.concatenate((self.__rhoCSA[k],np.zeros([1,self.tc().size]))) + + def inclS2(self,bond=0,**kwargs): + """ + Adds an additional detector calculated from a measured order parameter. + If using, one must include the last column of data.R as the order parameter + measurement (input as 1-S2) + """ + self.detect_par['inclS2']=True + + nb=self._nb() + if nb==1: + bond=0 #No bond specificity, operate on bond 0 + + "Put together pretty quickly- should review and verify CSA behavior is correct" + "Note that we don't really expect users to use CSA w/ S2...could be though" + + bond=np.atleast_1d(bond) + for k in bond: + if self.detect_par['Normalization'][:2].lower()=='mp': + wt=linprog(-(self.__rho[k].sum(axis=1)).T,self.__rho[k].T,np.ones(self.__rho[k].shape[1]),\ + bounds=(-500,500),method='interior-point',options={'disp' :False,})['x'] + rhoz0=[1-np.dot(self.__rho[k].T,wt).T] + rhoz0CSA=[1-np.dot(self.__rhoCSA[k].T,wt).T] + sc=np.atleast_1d(rhoz0[0].max()) + self.__rho[k]=np.concatenate((rhoz0/sc,self.__rho[k])) + self.__rhoCSA[k]=np.concatenate((rhoz0CSA/sc,self.__rhoCSA[k])) + mat1=np.concatenate((np.zeros([self.__r[k].shape[0],1]),self.__r[k]),axis=1) + mat2=np.atleast_2d(np.concatenate((sc,wt.T),axis=0)) + self.__r[k]=np.concatenate((mat1,mat2),axis=0) + elif self.detect_par['Normalization'][0].lower()=='m': + self.__r[k]=np.concatenate((\ + np.concatenate((np.zeros([self.__r[k].shape[0],1]),self.__r[k]),axis=1),\ + np.ones([1,self.__r[k].shape[1]+1])),axis=0) + self.__rho[k]=np.concatenate(([1-self.__rho[k].sum(axis=0)],\ + self.__rho[k]),axis=0) + self.__rhoCSA[k]=np.concatenate(([1-self.__rhoCSA[k].sum(axis=0)],\ + self.__rhoCSA[k]),axis=0) + elif self.detect_par['Normalization'][0].lower()=='i': + wt=linprog(-(self.__rho[k].sum(axis=1)).T,self.__rho[k].T,np.ones(self.__rho[k].shape[1]),\ + bounds=(-500,500),method='interior-point',options={'disp' :False,})['x'] + rhoz0=[1-np.dot(self.__rho[k].T,wt).T] + rhoz0CSA=[1-np.dot(self.__rhoCSA[k].T,wt).T] + sc=rhoz0[0].sum()*np.diff(self.z()[:2]) + self.__rho[k]=np.concatenate((rhoz0/sc,self.__rho[k])) + self.__rhoCSA[k]=np.concatenate((rhoz0CSA/sc,self.__rhoCSA[k])) + mat1=np.concatenate((np.zeros([self.__r[k].shape[0],1]),self.__r[k]),axis=1) + mat2=np.atleast_2d(np.concatenate((sc,wt.T),axis=0)) + self.__r[k]=np.concatenate((mat1,mat2),axis=0) + + def _remove_R2_ex(self): + """ + Deletes the R2 exchange correction from all bonds and from the average + sensitivity calculation. If the user has manually set detect_par['R2_ex_corr'] + to 'no', this function will do nothing (so don't edit this parameter + manually!) + detect._remove_R2_ex() + """ + + if not(self.detect_par['R2_ex_corr']): + return + else: + self.detect_par['R2_ex_corr']=False + if self.info is not None: + self.info=self.info.drop(self.info.axes[1][-1],axis=1) + if self.__rAvg is not None: + self.__rAvg=self.__rAvg[:,:-1] + self.__rhoAvg=self.__rhoAvg[:-1] +# nb=np.shape(self.__r)[0] + nb=self._nb() + + for k in range(nb): + if self.__r is not None and self.__r[k] is not None: + self.__r[k]=self.__r[k][:,:-1] + if self.__rho[k] is not None: + self.__rho[k]=self.__rho[k][:-1] + self.__rhoCSA[k]=self.__rhoCSA[k][:-1] + + def del_exp(self,exp_num): + + if self.info is not None and self.info_in is not None: + print('Deleting experiments from the detector object requires disabling the detectors') + self._disable() + print('Detectors now disabled') + + if np.size(exp_num)>1: #Multiple experiments: Just run this function for each experiment + exp_num=np.atleast_1d(exp_num) + exp_num[::-1].sort() #Sorts exp_num in descending order + for m in exp_num: + self.del_exp(m) + else: + if exp_num==self.n and self.detect_par['R2_ex_corr']: + self._remove_R2_ex() #In case we try to delete the last experient, which is R2 exchange, we remove this way + else: + if np.ndim(exp_num)>0: + exp_num=exp_num[0] + self.info=self.info.drop(exp_num,axis=1) + self.info.columns=range(len(self.info.columns)) + if self.__rhoAvg is not None: + self.__rhoAvg=np.delete(self.__rhoAvg,exp_num,axis=0) + nb=self._nb() + for k in range(nb): + self.__rho[k]=np.delete(self.__rho[k],exp_num,axis=0) + self.__rhoCSA[k]=np.delete(self.__rhoCSA[k],exp_num,axis=0) + + self.n+=-1 + + + def _disable(self): + """ + Clears many of the variables that allow a detectors object to be used + for fitting and for further detector optimization. This is useful when + passing the detector object as a sensitivity object resulting from a fit. + The reasoning is that the sensitivity stored in a fit should not be changed + for any reason, since the fit has already been performed and therefore + the detector sensitivities should not be changed (hidden, only intended + for internal use) + + Note, there is an added benefit that detectors generated for direct + application to MD-derived correlation functions may be rather large, and + so we save considerable memory here as well (esp. when saving) + """ + nb=self._nb() + + self.__R=None #Stores experimental sensitivities + self.__R0=list() + self.__RCSA=list() #Stores experimental sensitivities for CSA only + self.__R0CSA=list() + + self.__r=None + + self.__Rc=None #Stores the back-calculated sensitivities + self.__RcAvg=None + + self.SVD=None #Stores SVD results + self.SVDavg=None + +# self.MdlPar_in=None + if self.info_in.shape[1]>100000: #We cleared because the info_in is huge for MD data + self.info_in=None #Commenting this. Why did we clear these? + #Still, the question remains, why would we ever want to keep info_in? + + self.norm=None + + def __r_norm(self,bond=None,**kwargs): + "Applies equal-max or equal-integral normalization" + if 'NT' in kwargs: + NT=kwargs.get('NT') + self.detect_par['Normalization']=NT + elif 'Normalization' in kwargs: + NT=kwargs.get('Normalization') + self.detect_par['Normalization']=NT + else: + NT=self.detect_par.get('Normalization') + + nb=self._nb() + if nb==1: + bond=0 + + rhoz=self.rhoz(bond) + + NT=self.detect_par.get('Normalization') + nd=self.n + + dz=np.diff(self.z()[0:2]) + + for k in range(0,nd): + if NT.upper()[0]=='I': + sc=np.sum(rhoz[k,:])*dz + elif NT.upper()[0]=='M': + sc=np.max(rhoz[k,:]) + else: + print('Normalization type not recognized (use "N" or "I"). Defaulting to equal-max') + sc=np.max(rhoz[k,:]) + + if bond is None: + self.__rhoAvg[k,:]=rhoz[k,:]/sc + self.SVDavg['T'][k,:]=self.SVDavg['T'][k,:]/sc + self.__rAvg[:,k]=self.__rAvg[:,k]*sc + else: + self.__rho[bond][k,:]=rhoz[k,:]/sc + self.__rhoCSA[bond][k,:]=self.__rhoCSA[bond][k,:]/sc + self.SVD[bond]['T'][k,:]=self.SVD[bond]['T'][k,:]/sc + self.__r[bond][:,k]=self.__r[bond][:,k]*sc + + + def __r_info(self,bond=None,**kwargs): + """ Calculates paramaters describing the detector sensitivities (z0, Del_z, + and standard deviation of the detectors). Also resorts the detectors by + z0, unless 'sort_rho' is set to 'no'. Does not return anything, but edits + internal values, z0, Del_z, stdev + """ + +# nb=np.shape(self.__R)[0] + nb=self._nb() + """Trying to determine how many detectors to characterize (possible that + not all bonds have same number of detectors. Note- this situation is not + allowed for data processing. + """ + + if np.ndim(bond)>0: + bond=bond[0] + + if bond is None: + if self.__rAvg is not None: + nd0=self.__rAvg.shape[1] + else: + cont=True + k=0 + while cont: + if self.__r[k] is not None: + cont=False + nd0=self.__r[k].shape[1] + else: + k=k+1 + if k==nb: + print('Warning: no detectors are defined. detect.info cannot be calculated') + return + elif self.__r[bond] is not None: + nd0=self.__r[bond].shape[1] + elif self.__rAvg is not None: + nd0=self.__rAvg.shape[1] + else: + cont=True + k=0 + while cont: + if self.__r[k] is not None: + cont=False + nd0=self.__r[k].shape[1] + else: + k=k+1 + if k==nb: + print('Warning: no detectors are defined. detect.info cannot be calculated') + return + + index=[False]*nb + for k in range(nb): + if self.__r[k] is not None: + z0,_,_=self.r_info(k) + nd=z0.shape[0] + if nd==nd0: + index[k]=True + + a=dict() + flds=['z0','Del_z','stdev'] + + if np.any(index): + for f in flds: + x=list() + x0=getattr(self,f) + for k in np.where(index)[0]: + x.append(x0[k]) + a.update({f : np.mean(x,axis=0)}) + if f!='stdev': + a.update({f+'_std':np.std(x,axis=0)}) + + else: + "Re-do calculation for average detectors" + z0,Del_z,stdev=self.r_info(bond=None) + a.update({'z0':z0,'Del_z':Del_z,'stdev':stdev}) + + self.info=pd.DataFrame.from_dict(a) + self.info=self.info.transpose() + + def r_info(self,bond=None,**kwargs): + """ + |Returns z0, Del_z, and the standard deviation of a detector + | + |z0,Del_z,stdev = detect.r_info(bond) + | + |If requested, these will be sorted with z0 ascending (set sort_rho='y' + |as argument). Note this will also sort the internal detector! + """ + r=self.r(bond) + rhoz=self.rhoz(bond) + if r is not None: + nd=self.r(bond).shape[1] + z0=np.divide(np.sum(np.multiply(rhoz,\ + np.repeat([self.z()],nd,axis=0)),axis=1),\ + np.sum(self.rhoz(bond),axis=1)) + + iS2=self.detect_par['inclS2'] + R2ex=self.detect_par['R2_ex_corr'] + + if iS2 and R2ex: + i0=np.argsort(z0[1:-1]) + i=np.concatenate(([0],i0+1,[nd-1])) + elif iS2: + i0=np.argsort(z0[1:]) + i=np.concatenate(([0],i0+1)) + elif R2ex: + i0=np.argsort(z0[0:-1]) + i=np.concatenate((i0,[nd-1])) + else: + i0=np.argsort(z0) + i=i0 + if 'sort_rho' not in kwargs or kwargs.get('sort_rho')[0].lower()=='n': + i0=np.arange(np.size(i0)) + i=np.arange(np.size(i)) + + z0=z0[i] + rhoz=rhoz[i,:] + r=r[:,i] + Del_z=np.diff(self.z()[0:2])*np.divide(np.sum(rhoz,axis=1), + np.max(rhoz,axis=1)) + + if R2ex: + #Dummy value for z0 of R2ex + z0[-1]=0 + Del_z[-1]=0 + + + + + if self.detect_par['inclS2']: + st0=np.concatenate(([.1],self.info_in.loc['stdev'])) + stdev=np.power(np.dot(np.linalg.pinv(r)**2,st0**2),0.5) + else: + stdev=np.power(np.dot(np.linalg.pinv(r)**2,self.info_in.loc['stdev']**2),0.5) + + if bond is not None: + self.z0[bond]=z0 + self.SVD[bond]['T']=self.SVD[bond]['T'][i0,:] + self.__r[bond]=r + self.__rho[bond]=rhoz + self.Del_z[bond]=Del_z + self.stdev[bond]=stdev + else: + self.SVDavg['T']=self.SVDavg['T'][i0,:] + self.__rAvg=r + self.__rhoAvg=rhoz + + return z0,Del_z,stdev + else: + return + + def ___r_info(self,bond=None,**kwargs): + """Calculates some parameters related to the detectors generates, z0, + Del_z, and standard deviation of resulting detectors. Also resorts the + detectors according to z0 + """ + nb=self._nb() + + match=True + if self.__r[0].ndim==2: + nd0=np.shape(self.__r[0])[1] + else: + match=False + nd0=0 + + stdev=np.zeros(nd0) + + if bond is None: +# a=np.arange(0,nb) + a=np.arange(0,0) + match=False + else: + a=np.atleast_1d(bond) + + for k in a: + if self.__r[0].ndim==2: + nd=np.shape(self.__r[k])[1] + else: + nd=0 + + + if nd0!=nd: + match=False + + if nd>0: + z0=np.divide(np.sum(np.multiply(self.__rho[k],\ + np.repeat([self.z()],nd,axis=0)),axis=1),\ + np.sum(self.__rho[k],axis=1)) + + if 'sort_rho' in kwargs and kwargs.get('sort_rho').lower()[0]=='n': + i=np.arange(0,np.size(z0)) + i0=i + if self.detect_par['inclS2']: + i0=i0[1:] + if self.detect_par['R2_ex_corr']: + i0=i0[0:-1] + else: + if self.detect_par['inclS2'] and self.detect_par['R2_ex_corr']: + i0=np.argsort(z0[1:-1]) + i=np.concatenate(([0],i0,[np.size(z0)])) + elif self.detect_par['inclS2']: + i0=np.argsort(z0[1:]) + i=np.concatenate(([0],i0)) + elif self.detect_par['R2_ex_corr']: + i0=np.argsort(z0[0:-1]) + i=np.concatenate((i0,[np.size(z0)])) + else: + i0=np.argsort(z0) + i=i0 + + self.z0[k]=z0[i] + self.SVD[k]['T']=self.SVD[k]['T'][i0,:] + + self.__r[k]=self.__r[k][:,i] + self.__rho[k]=self.__rho[k][i,:] + self.Del_z[k]=np.diff(self.z()[0:2])*np.divide(np.sum(self.__rho[k],axis=1), + np.max(self.__rho[k],axis=1)) + stdev=np.sqrt(np.dot(self.SVD[k]['T']**2,1/self.SVD[k]['S'][0:np.size(i0)]**2)) + if self.detect_par['inclS2']: + "THIS IS WRONG. ADD STANDARD DEVIATION LATER!!!" + stdev=np.concatenate(([0],stdev)) + if self.detect_par['R2_ex_corr']: + stdev=np.concatenate((stdev,[0])) + self.SVD[k]['stdev']=stdev + if match: + stdev+=self.SVD[k]['stdev'] + + if match: + a=dict() + a.update({'z0' : np.mean(self.z0,axis=0)}) + a.update({'Del_z' : np.mean(self.Del_z,axis=0)}) + a.update({'stdev' : stdev/nb}) + if nb>1: + a.update({'z0_std' : np.std(self.z0,axis=0)}) + a.update({'Del_z_std': np.std(self.Del_z,axis=0)}) + else: + a.update({'z0_std' : np.zeros(nd)}) + a.update({'Del_z_std' : np.zeros(nd)}) + + self.info=pd.DataFrame.from_dict(a) + self.info=self.info.transpose() + + + + + def r(self,bond=None): + nb=self._nb() + if nb==1: + bond=0 + + if bond is None: + if self.__rAvg is None: + print('First generate the detectors for the average sensitivities') + return + else: + return self.__rAvg + else: + if np.size(self.__r[bond])==1: + print('First generate the detectors for the selected bond') + return + else: + return self.__r[bond] + + def rhoz(self,bond=None): + nb=self._nb() + if nb==1: + bond=0 + + if bond is None: + if self.__rAvg is None: + print('First generate the detectors for the average sensitivities') + else: + return self.__rhoAvg.copy() + else: + if np.size(self.__rho[bond])==1: + print('First generate the detectors for the selected bond') + return + else: + if bond==-1: + return np.array(self.__rho).copy() + else: + return self.__rho[bond].copy() + + def Rc(self,bond=None): + nb=self._nb() + if nb==1: + bond=0 + + if bond is None: + if self.__RcAvg is None: + print('First generate the detectors to back-calculate rate constant sensitivities') + else: + return self.__RcAvg + else: + if np.size(self.__Rc[bond])==1: + print('First generate the detectors for the selected bond') + return + else: + return self.__Rc[bond] + + + + def Rin(self,bond=0): + nb=self._nb() + if nb==1: + bond=0 + return self.__R[bond] + + def R0in(self,bond=0): + nb=self._nb() + if nb==1: + bond=0 + return self.__R0[bond] + + def rho_eff(self,exp_num=None,mdl_num=0,bond=None,**kwargs): + rho_eff,_=self._rho_eff(exp_num,mdl_num,bond,**kwargs) + return rho_eff + + def rho0(self,exp_num=None,mdl_num=0,bond=None,**kwargs): + _,rho0=self._rho_eff(exp_num,mdl_num,bond,**kwargs) + return rho0 + + def _RCSAin(self,bond=0): + nb=self._nb() + if nb==1: + bond=0 + return self.__RCSA[bond] + + def _R0CSAin(self,bond=0): + nb=self._nb() + if nb==1: + bond=0 + return self.__R0CSA[bond] + + def retExper(self): + return self.__exper + + def retSpinSys(self): + return self.__spinsys + + def tc(self): + return self.__tc.copy() + + def z(self): + return np.log10(self.__tc) + + def _rho(self,exp_num=None,bond=None): + """The different children of mdl_sens will have different names for + their sensitivities. For example, this class returns rho_z, which are the + rate constant sensitivities, but the correlation function class returns + Ct, and the detector class returns rho. Then, we have a function, + _rho(self), that exists and functions the same way in all children + """ + + + + if bond is None or self._nb()==1: + bond=0 + + if np.size(self.__rho[bond])==1: + print('First generate the detectors for the selected bond') + return + + if exp_num is None: + exp_num=self.info.columns + + exp_num=np.atleast_1d(exp_num) + + + if bond==-1: + rhoz=self.rhoz(bond) + if rhoz.ndim==3: + rhoz=rhoz[:,exp_num,:] + elif rhoz.ndim==2: + rhoz=rhoz[exp_num,:] + else: + rhoz=self.rhoz(bond)[exp_num,:] + + return rhoz + + def _rhoCSA(self,exp_num=None,bond=None): + """The different children of mdl_sens will have different names for + their sensitivities. For example, this class returns R, which are the + rate constant sensitivities, but the correlation function class returns + Ct, and the detector class returns rho. Then, we have a function, + _rho(self), that exists and functions the same way in all children + """ + + if bond is None: + bond=0 + + if np.size(self.__rhoCSA[bond])==1: + print('First generate the detectors for the selected bond') + return + + + if exp_num is None: + exp_num=self.info.columns + + exp_num=np.atleast_1d(exp_num) + + + if bond==-1: + rhoz=np.array(self.__rhoCSA) + if rhoz.ndim==3: + rhoz=rhoz[:,exp_num,:] + elif rhoz.ndim==2: + rhoz=rhoz[exp_num,:] + + if rhoz.shape[0]==1: + rhoz=rhoz[0] + else: + rhoz=self.__rhoCSA[bond] + rhoz=rhoz[exp_num,:] + + + return rhoz + + def plot_rhoz(self,bond=None,rho_index=None,ax=None,norm=False,**kwargs): + """ + Plots the sensitivities. Options are to specify the bond, the rho_index, + the + """ + hdl=pf.plot_rhoz(self,bond=bond,index=rho_index,ax=ax,norm=norm,**kwargs) + ax=hdl[0].axes + ax.set_ylabel(r'$\rho_n(z)$') + ax.set_title('Detector Sensitivities') + return hdl + + + def plot_r_opt(self,fig=None): + if fig is None: + fig=plt.figure() + + ax1=fig.add_subplot(211) + ax1.plot(self.z(),self.__r_auto.get('Error')) + max_err=self.__r_auto.get('Error').max() + + ax2=fig.add_subplot(212) + hdls=ax2.plot(self.z(),self.__r_auto.get('rho_z').T) + + for index, k in enumerate(np.sort(self.__r_auto.get('Peaks'))): + ax1.plot(np.repeat(self.z()[k],2),[-max_err/20,0],linewidth=.5,color=[0,0,0]) + ax1.text(self.z()[k],-max_err*1/10,r'$\rho_{'+str(index+1)+'}$',horizontalalignment='center',\ + verticalalignment='center',color=hdls[index].get_color()) + ax2.text(self.z()[k],self.__r_auto.get('rho_z')[index,:].max()+.05,\ + r'$\rho_{'+str(index+1)+'}$',horizontalalignment='center',\ + verticalalignment='center',color=hdls[index].get_color()) + + ax1.set_xlabel(r'$\log_{10}(\tau$ / s)') + ax1.set_ylabel(r'Opt. Error, $\Delta$(max)') + ax1.set_xlim(self.z()[[0,-1]]) + ax1.set_ylim([-max_err*3/20,max_err*21/20]) + + ax2.set_xlabel(r'$\log_{10}(\tau$ / s)') + ax2.set_ylabel(r'$\rho_n(z)$') + min_rho=self.__r_auto.get('rho_z').min() + max_rho=self.__r_auto.get('rho_z').max() + + ax2.set_xlim(self.z()[[0,-1]]) + ax2.set_ylim([min_rho-.05,max_rho+.1]) + + return hdls + + + + def plot_Rc(self,exp_num=None,norm=True,bond=None,ax=None): + """ + Plots the input sensitivities compared to their reproduction by fitting to + detectors. Options are to specifiy experiments (exp_num), to normalize (norm), + to specify a specific bond (bond), and a specific axis to plot onto (ax). + + plot_Rc(exp_num=None,norm=True,bond=None,ax=None) + """ + + hdl=pf.plot_Rc(sens=self,exp_num=exp_num,norm=norm,bond=bond,ax=ax) + + return hdl + + def _nb(self): + # nb=np.shape(self.__R)[0] + if self.BondSpfc=='yes': + nb=self.molecule.vXY.shape[0] + else: + nb=1 + return nb + +def svd0(X,n): + if np.shape(X)[0]>np.shape(X)[1]: +# U,S,Vt=svds(X,k=n,tol=0,which='LM') #Large data sets use sparse svd to avoid memory overload +# U=U[:,-1::-1] #svds puts out eigenvalues in opposite order of svd +# S=S[-1::-1] +# Vt=Vt[-1::-1,:] + S2,V=eigs(np.dot(np.transpose(X),X),k=n) + S=np.sqrt(S2.real) + U=np.dot(np.dot(X,V.real),np.diag(1/S)) + Vt=V.real.T + else: + U,S,Vt=svd(X) #But, typically better results from full calculation + U=U[:,0:np.size(S)] #Drop all the empty vectors + Vt=Vt[0:np.size(S),:] + + return U,S,Vt + + + + +def linprog_par(Y): + """This function optimizes a detector sensitivity that has a value of 1 + at correlation time k, and cannot go below some specific value at all + other correlation times (usually 0). While satisfying these + requirements, the sensitivity is minimized. + """ + Vt=Y[0] + k=Y[1] + ntc=np.shape(Vt)[1] + + if np.size(Y)==3: + target=Y[2] + else: + target=np.zeros(ntc) + + try: +# if k. + + +Questions, contact me at: +albert.smith-penzel@medizin.uni-leipzig.de + + + +Created on Wed Apr 3 22:07:08 2019 + +@author: albertsmith +""" + + +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt +from matplotlib.patches import Polygon +import copy +import pyDIFRATE.r_class.DynamicModels as dm +#import os +#os.chdir('../Struct') +from pyDIFRATE.Struct.structure import molecule +#os.chdir('../plotting') +from pyDIFRATE.plots.plotting_funs import plot_rhoz +#os.chdir('../r_class') +#import detectors +from scipy.interpolate import interp1d as interp + + +class model(object): + def __init__(self): + +# if self._class=='Ct': +# self.__Reff=np.zeros([0,np.size(self.t()),np.size(self.tc())]) +# self.__R0=np.zeros([0,np.size(self.t())]) +# else: +# self.__Reff=np.zeros([0,np.size(self.tc())]) +# self.__R0=np.zeros([0,1]) + +# self.__mdlinfo=pd.DataFrame(index=self.retExper()+self.retSpinSys()) +# self.__tMdl=list() +# self.__AMdl=list() + + self.__Reff=list() + self.__R0=list() + self.__ReffCSA=list() + self.__R0CSA=list() + + self.MdlPar=list() + self.tMdl=list() + self.AMdl=list() + self.molecule=molecule() + + + def new_mdl(self,tMdl=None,AMdl=None,Model=None,**kwargs): + + if tMdl is not None and AMdl is not None: + tMdl=np.atleast_1d(tMdl) + AMdl=np.atleast_1d(AMdl) + + if AMdl.ndim==3: + MdlPar=dict(Model='Direct',BondSpfc='yes') + elif AMdl.ndim==1: + MdlPar=dict(Model='Direct',BondSpfc='no') + else: + print('AMdl must be a single value, a 1D array, or a 3D array') + return + + self.MdlPar.append(MdlPar) + self.tMdl.append(tMdl) + self.AMdl.append(AMdl) + elif Model=='Combined' and 'mdl_nums' in kwargs: + + mdl_nums=kwargs.get('mdl_nums') + if not isinstance(mdl_nums,np.ndarray): + mdl_nums=np.array(mdl_nums) + if mdl_nums.shape==(): + mdl_nums=np.array([mdl_nums]) + + BndSpfc='no' + Models=list() + "Maybe we can determine bond specificity inside of the Combined function? (below)" + for k in mdl_nums: + Models.append(self.MdlPar[k]) + if self.MdlPar[k]['BondSpfc']=='yes': + BndSpfc='yes' + + MdlPar=dict(Model='Combined',BondSpfc=BndSpfc,SubModels=Models) + + + tMdl=np.array([]); #Empty model + AMdl=np.array([]); # + for k in mdl_nums: + tMdl,AMdl,_=dm.ModelSel('Combined',tMdl1=tMdl,AMdl1=AMdl,tMdl2=self.tMdl[k],AMdl2=self.AMdl[k]) + + self.MdlPar.append(MdlPar) + self.tMdl.append(tMdl) + self.AMdl.append(AMdl) + + else: +# if dm.ModelBondSpfc(Model) and self.molecule.vXY.size==0: +# print('Before defining an model with anisotropic motion, import a structure and select the desired bonds') +# else: + tMdl,AMdl,BndSp=dm.ModelSel(Model,'dXY',self.molecule,**kwargs) +# if BndSp=='yes' and self._class!='Ct': + if BndSp=='yes': + _,A,_=dm.ModelSel(Model,'dCSA',self.molecule,**kwargs) + AMdl=[AMdl,A] + AMdl=np.swapaxes(AMdl,0,1) + + MdlPar=dict(Model=Model,BondSpfc=BndSp,**kwargs) + + self.MdlPar.append(MdlPar) + self.tMdl.append(tMdl) + self.AMdl.append(AMdl) + + self.__Reff.append(None) + self.__R0.append(None) + self.__ReffCSA.append(None) + self.__R0CSA.append(None) + + + def del_mdl(self,mdl_num): + del self.AMdl[mdl_num] + del self.tMdl[mdl_num] + del self.MdlPar[mdl_num] + del self.__Reff[mdl_num] + del self.__R0[mdl_num] + del self.__ReffCSA[mdl_num] + del self.__R0CSA[mdl_num] + + def del_mdl_calcs(self): + self.__Reff=list(np.repeat(None,np.size(self.MdlPar))) + self.__R0=list(np.repeat(None,np.size(self.MdlPar))) + self.__ReffCSA=list(np.repeat(None,np.size(self.MdlPar))) + self.__R0CSA=list(np.repeat(None,np.size(self.MdlPar))) + + def _rho_eff(self,exp_num=None,mdl_num=0,bond=None,**kwargs): + """This function is mostly responsible for searching for a pre-existing + calculation of the model and experiment + """ + +# if bond==-1: +# bond=None + + if len(self.MdlPar)==0: #If no models present, set mdl_num to None + mdl_num=None + + if exp_num is not None: + exp_num=np.atleast_1d(exp_num) + + if (mdl_num is None) or (mdl_num==-1): + R=self._rho(exp_num,bond) + R0=np.zeros(R.shape[0:-1]) + return R,R0 + + if self.__Reff[mdl_num] is None: + Reff,R0,ReffCSA,R0CSA=self.__apply_mdl(self.tMdl[mdl_num],self.AMdl[mdl_num]) + self.__Reff[mdl_num]=Reff + self.__R0[mdl_num]=R0 + self.__ReffCSA[mdl_num]=ReffCSA + self.__R0CSA[mdl_num]=R0CSA + + if np.shape(self.__Reff[mdl_num])[0]==1: + bond=None + + if exp_num is None and (bond is None or bond==-1): + R=self.__Reff[mdl_num] + R0=self.__R0[mdl_num] + elif exp_num is None: + R=self.__Reff[mdl_num][bond,:,:] + R0=self.__R0[mdl_num][bond,:] + elif bond is None or bond==-1: + R=self.__Reff[mdl_num][:,exp_num,:] + R0=self.__R0[mdl_num][:,exp_num] + else: + R=self.__Reff[mdl_num][bond,exp_num,:] + R0=self.__R0[mdl_num][bond,exp_num] + + if R.shape[0]==1: + R=R[0] + R0=R0[0] + + + return R.copy(),R0.copy() + + + def _rho_effCSA(self,exp_num=None,mdl_num=0,bond=None): + """Same as above, but only for the CSA interaction + """ + +# if bond==-1: +# bond=None + + if exp_num is not None: + exp_num=np.atleast_1d(exp_num) + + if (mdl_num is None) or (mdl_num==-1): + R=self._rhoCSA(exp_num,bond) + R0=np.zeros(R.shape[0:-1]) + return R,R0 + + if self.__ReffCSA[mdl_num] is None: + Reff,R0,ReffCSA,R0CSA=self.__apply_mdl(self.tMdl[mdl_num],self.AMdl[mdl_num]) + self.__Reff[mdl_num]=Reff + self.__R0[mdl_num]=R0 + self.__ReffCSA[mdl_num]=ReffCSA + self.__R0CSA[mdl_num]=R0CSA + + if np.shape(self.__ReffCSA[mdl_num])[0]==1: + bond=None + + if exp_num is None and (bond is None or bond==-1): + R=self.__ReffCSA[mdl_num] + R0=self.__R0CSA[mdl_num] + elif exp_num is None: + R=self.__ReffCSA[mdl_num][bond,:,:] + R0=self.__R0CSA[mdl_num][bond,:] + elif bond is None or bond==-1: + R=self.__ReffCSA[mdl_num][:,exp_num,:] + R0=self.__R0CSA[mdl_num][:,exp_num] + else: + R=self.__ReffCSA[mdl_num][bond,exp_num,:] + R0=self.__R0CSA[mdl_num][bond,exp_num] + + if R.shape[0]==1: + R=R[0] + R0=R0[0] + + + return R.copy(),R0.copy() + + def __apply_mdl(self,tMdl,A): + "tMdl is a list of correlation times in the model, and A the amplitudes" + "Note that if A does not add to 1, we assume that S2 is non-zero (S2=1-sum(A))" + + + "Get the experimental sensitivities" + R=self._rho(self.info.columns,bond=-1) + RCSA=self._rhoCSA(self.info.columns,bond=-1) + + R+=-RCSA #We operate on relaxation from dipole and CSA separately + + "Shapes of matrices, preallocation" + SZA=np.shape(A) + if np.size(SZA)>1: + SZA=SZA[0] + iso=False + else: + iso=True + SZA=1 + + "We repeat R and RCSA for every bond in A if R and RCSA are not already bond specific" + SZR=R.shape + + if np.size(SZR)==3: + if iso: + A=np.repeat([np.repeat([A],2,axis=0)],SZR[0],axis=0) + SZA=SZR[0] + iso=False + SZR=SZR[1:] + else: + R=np.repeat([R],SZA,axis=0) + RCSA=np.repeat([RCSA],SZA,axis=0) + + SZeff=np.concatenate([np.atleast_1d(SZA),np.atleast_1d(SZR)]) + SZ0=np.concatenate([np.atleast_1d(SZA),[SZR[0]]]) + + "Contributions to relaxation coming from model with non-zero S2" + if np.ndim(A)>1: + S2=1-np.sum(A[:,0,:],axis=1) + S2CSA=1-np.sum(A[:,1,:],axis=1) + else: + S2=[1-np.sum(A)] + S2CSA=S2 + + SZ1=[SZeff[0],np.prod(SZeff[1:])] + + """ + The order parameter of the input model yields the fraction of the model correlation that does not + change the internal effective correlation time. + """ + Reff=np.multiply(np.repeat(np.transpose([S2]),SZ1[1],axis=1),np.reshape(R,SZ1)) + ReffCSA=np.multiply(np.repeat(np.transpose([S2CSA]),SZ1[1],axis=1),np.reshape(RCSA,SZ1)) + Reff=np.reshape(Reff,SZeff) + ReffCSA=np.reshape(ReffCSA,SZeff) + + R0=np.zeros(SZ0) + R0CSA=np.zeros(SZ0) + + "Loop over all correlation times in model" + for k,tc in enumerate(tMdl): + "Matrix to transform from z to zeff (or simply to evaluate at z=log10(tc) with M0)" + M,M0=self.z2zeff(tc) + + + SZ1=[np.prod(SZeff[0:2]),SZeff[2]] + R00=np.dot(M0,np.reshape(R,SZ1).T) + R0CSA0=np.dot(M0,np.reshape(RCSA,SZ1).T) + + Reff0=np.reshape(np.dot(M,np.reshape(R,SZ1).T).T-np.transpose([R00]),[1,np.prod(SZeff)]) + ReffCSA0=np.reshape(np.dot(M,np.reshape(RCSA,SZ1).T).T-np.transpose([R0CSA0]),[1,np.prod(SZeff)]) + if iso: + Reff+=A[k]*np.reshape(Reff0,SZeff) + R0+=A[k]*np.reshape(R00,SZ0) + ReffCSA+=A[k]*np.reshape(ReffCSA0,SZeff) + R0CSA+=A[k]*np.reshape(R0CSA0,SZ0) + else: + A0=A[:,0,k] + Reff+=np.reshape(np.multiply(np.repeat(np.transpose([A0]),np.prod(SZR)),Reff0),SZeff) + R0+=np.reshape(np.multiply(np.repeat(np.transpose([A0]),SZR[0]),R00),SZ0) + + A0=A[:,1,k] + ReffCSA+=np.reshape(np.multiply(np.repeat(np.transpose([A0]),np.prod(SZR)),ReffCSA0),SZeff) + R0CSA+=np.reshape(np.multiply(np.repeat(np.transpose([A0]),SZR[0]),R0CSA0),SZ0) + + + Reff+=ReffCSA + R0+=R0CSA + return Reff,R0,ReffCSA,R0CSA + + def z2zeff(self,tc): + + z=self.z() + zeff=z+np.log10(tc)-np.log10(tc+10**z) #Calculate the effective log-correlation time + zeff[zeff<=z[0]]=z[0]+1e-12 #Cleanup: no z shorter than z[0] + zeff[zeff>=z[-1]]=z[-1]-1e-12 #Cleanup: no z longer than z[-1] + i=np.digitize(zeff,z,right=False)-1 #Index to find longest z such that zrho_eff transform + + dz=z[1:]-z[0:-1] + wt=(z[i+1]-zeff)/dz[i] + M[np.arange(0,sz),i]=wt + M[np.arange(0,sz),i+1]=1-wt + + zi=np.log10(tc) #Calculate the log of input tc + if zi<=z[0]: + zi=z[0]+1e-12 #Cleanup: no z shorter than z[0] + if zi>=z[-1]: + zi=z[-1]-1e-12 + i=np.digitize(zi,z,right=False)-1 #Index to find longest z such that zrho_eff transform + + wt=(z[i+1]-zi)/dz[i] + M0[i]=wt + M0[i+1]=1-wt + + return M,M0 + +# def detect(self,exp_num=None,mdl_num=None): +# """ +# r=self.detect(exp_num=None,mdl_num=None) +# Creates a detector object from the current sensitivity object. can +# specifiy particular experiments and models to use (default is all +# experiments) and no model +# """ +# +# r=detectors.detect(self,exp_num,mdl_num) +# +# return r + + def __temp_exper(self,exp_num,inter): + """When we calculate dipole/CSA relaxation under a bond-specific model + (ex. Anisotropic diffusion), we actually need to apply a different + model to the motion of the CSA and dipole. To do this, we create a new + experiment without the dipole, or without the CSA, calculate its + sensitivity, and then delete the experiment from the users's scope + after we're done with it. This gets passed back to _rho_eff, where the + new model is applied to experiments with CSA and dipole separately""" + + exper=self.info.loc[:,exp_num].copy() + if inter=='dXY': + exper.at['CSA']=0 + else: + exper.at['dXY']=0 + exper.at['QC']=0 + """We should make sure the quadrupole doesn't count twice. I guess + this shouldn't matter, because we usually neglect dipole and CSA + relaxation when a quadrupole is present, but if the user puts them + in for some reason, it would result in a double-counting of the + quadrupole relaxation""" + self.new_exp(info=exper) #Add the new experiment + n=self.info.columns.values[-1] #Index of the new experiment + R=self._rho(n) + + self.del_exp(n) #Delete the experiment to hide this operation from the user + + return R + + def _clear_stored(self,exp_num=None): + "Unfortunately, we only have methods to apply models to all experiments at once" + "This means a change in the experiment list requires recalculation of all models for all experiments" + "This function deletes all model calculations" + + + if exp_num is None: + for m in self.__Reff: + m=None + for m in self.__ReffCSA: + m=None + for m in self.__R0: + m=None + for m in self.__R0CSA: + m=None + else: + for k,m in enumerate(self.__Reff): + if m is not None: + self.__Reff[k]=np.delete(m,exp_num,axis=1) + for k,m in enumerate(self.__ReffCSA): + if m is not None: + self.__ReffCSA[k]=np.delete(m,exp_num,axis=1) + for k,m in enumerate(self.__R0): + if m is not None: + self.__R0[k]=np.delete(m,exp_num,axis=1) + for k,m in enumerate(self.__R0CSA): + if m is not None: + self.__R0CSA[k]=np.delete(m,exp_num,axis=1) + + + def zeff(self,t,tau=None): + if tau==None: + return self.z()+np.log10(t)-np.log10(10**self.z()+t) + else: + return np.log10(t)+np.log10(tau)-np.log10(t+tau) + +# def plot_eff(self,exp_num=None,mdl_num=0,bond=None,ax=None,**kwargs): +# +# if bond==-1: +# bond=None +# +# a,b=self._rho_eff(exp_num,mdl_num,bond) +# +# if bond is None and np.size(a.shape)==3: +# maxi=np.max(a,axis=0) +# mini=np.min(a,axis=0) +# a=np.mean(a,axis=0) +# pltrange=True +# maxi=maxi.T +# mini=mini.T +# else: +# pltrange=False +# +# a=a.T +# +# if 'norm' in kwargs and kwargs.get('norm')[0].lower()=='y': +# norm=np.max(np.abs(a),axis=0) +# a=a/np.tile(norm,[np.size(self.tc()),1]) +# +# if pltrange: +# maxi=maxi/np.tile(norm,[np.size(self.tc()),1]) +# mini=mini/np.tile(norm,[np.size(self.tc()),1]) +# +# if ax==None: +# fig=plt.figure() +# ax=fig.add_subplot(111) +# hdl=ax.plot(self.z(),a) +## hdl=plt.plot(self.z(),a) +## ax=hdl[0].axes +# else: +# hdl=ax.plot(self.z(),a) +# +# if pltrange: +# x=np.concatenate([self.z(),self.z()[-1::-1]],axis=0) +# for k in range(0,a.shape[1]): +# y=np.concatenate([mini[:,k],maxi[-1::-1,k]],axis=0) +# xy=np.concatenate(([x],[y]),axis=0).T +# patch=Polygon(xy,facecolor=hdl[k].get_color(),edgecolor=None,alpha=0.5) +# ax.add_patch(patch) +# +# +# ax.set_xlabel(r'$\log_{10}(\tau$ / s)') +# if 'norm' in kwargs and kwargs.get('norm')[0].lower()=='y': +# ax.set_ylabel(r'$R$ (normalized)') +# else: +# ax.set_ylabel(r'$R$ / s$^{-1}$') +# ax.set_xlim(self.z()[[0,-1]]) +# ax.set_title('Sensitivity for Model #{0}'.format(mdl_num)) +# +# return hdl + + def plot_eff(self,exp_num=None,mdl_num=0,bond=None,ax=None,norm=False,**kwargs): + if bond==-1: + bond=None + + hdl=plot_rhoz(self,index=exp_num,mdl_num=mdl_num,norm=norm,ax=ax,bond=bond,**kwargs) + ax=hdl[0].axes + ax.set_title('Sensitivity for Model #{0}'.format(mdl_num)) + + return hdl + + def _set_plot_attr(self,hdl,**kwargs): + props=hdl[0].properties().keys() + for k in kwargs: + if k in props: + for m in hdl: + getattr(m,'set_{}'.format(k))(kwargs.get(k)) + + def copy(self,type='deep'): + """ + | + |Returns a copy of the object. Default is deep copy (all objects except the molecule object) + | obj = obj0.copy(type='deep') + |To also create a copy of the molecule object, set type='ddeep' + |To do a shallow copy, set type='shallow' + """ + if type=='ddeep': + out=copy.deepcopy(self) + elif type!='deep': + out=copy.copy(self) + else: + mol=self.molecule + self.molecule=None + out=copy.deepcopy(self) + self.molecule=mol + out.molecule=mol + + return out + + \ No newline at end of file diff --git a/pyDIFRATE/r_class/parallel_funs.py b/pyDIFRATE/r_class/parallel_funs.py new file mode 100755 index 0000000..dda6ea5 --- /dev/null +++ b/pyDIFRATE/r_class/parallel_funs.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Copyright 2021 Albert Smith-Penzel + +This file is part of Frames Theory Archive (FTA). + +FTA is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +FTA is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with FTA. If not, see . + + +Questions, contact me at: +albert.smith-penzel@medizin.uni-leipzig.de + + +Created on Tue Apr 16 13:53:01 2019 + +@author: albertsmith +""" + +from scipy.optimize import linprog +import numpy as np + +def linprog_par(Y): + Vt=Y[0] + k=Y[1] + ntc=np.shape(Vt)[1] + try: + x=linprog(np.sum(Vt,axis=1),-Vt.T,np.zeros(ntc),[Vt[:,k]],1,bounds=(-500,500),method='interior-point',options={'disp' :False}) + x=x['x'] + except: + x=np.ones(Vt.shape[0]) +# x=linprog(np.sum(Vt,axis=1),-Vt.T,np.zeros(ntc),[Vt[:,k]],1,bounds=(None,None)) +# X[k]=x['x'] + return x \ No newline at end of file diff --git a/pyDIFRATE/r_class/sens.py b/pyDIFRATE/r_class/sens.py new file mode 100755 index 0000000..4704f5d --- /dev/null +++ b/pyDIFRATE/r_class/sens.py @@ -0,0 +1,555 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Copyright 2021 Albert Smith-Penzel + +This file is part of Frames Theory Archive (FTA). + +FTA is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +FTA is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with FTA. If not, see . + + +Questions, contact me at: +albert.smith-penzel@medizin.uni-leipzig.de + + +Created on Tue Apr 2 21:41:57 2019 + +@author: albertsmith +""" + +import numpy as np +import pandas as pd +import pyDIFRATE.r_class.DIFRATE_funs as dff +import matplotlib.pyplot as plt +import pyDIFRATE.r_class.mdl_sens as mdl +#import sens +from pyDIFRATE.tools.DRtools import dipole_coupling +#os.chdir('../plotting') +from pyDIFRATE.plots.plotting_funs import plot_rhoz +#os.chdir('../r_class') + +class rates(mdl.model): + def __init__(self,tc=None,z=None,**kwargs): + + """Probably a better way to do this, but I need to identify which + child of mdl_sens is which later. Using isinstance requires me to + import the children into mdl_sens, but also import mdl_sens into its + children. This seems to create some strange dependence so that I can't + actually load any of the classes any more""" + + self._class='rates' + self._origin='rates' + """The detectors class may have bond-specific sensitivities in _rho. We + need to know if this is the case for the mdl_sens class to work + properly + """ + self._BondSpfc='no' + + """Get user defined tc if provided. Options are to provide the tc + vector directly, to provide the log directly (define z instead of tc), + or to specify it as a start, end, and number of steps, which are then + log-spaced (3 entries for tc or z) + """ + if tc is None: + if z is not None: + if np.size(z)==3: + self.__tc=np.logspace(z[0],z[1],z[2]) + else: + self.__tc=np.power(10,z) + "Allow users to input z instead of tc" + else: + self.__tc=np.logspace(-14,-3,200) + + elif np.size(tc)==3: + self.__tc=np.logspace(np.log10(tc[0]),np.log10(tc[1]),tc[2]) + else: + self.__tc=np.array(tc) + """We don't allow editing of the tc vector; you must initialize a new + instance of rates if you want to change it""" + + + """If you want to edit the code to include new experiments, and these + require new variables, they MUST be added to one of these lists + """ + + "Names of the experimental variables that are available" + self.__exper=['Type','v0','v1','vr','offset','stdev'] + "Names of the spin system variables that are available" + self.__spinsys=['Nuc','Nuc1','dXY','CSA','CSoff','QC','eta','theta'] + + "Initialize storage for rate constant calculation" + self.__R=list() + self.__RCSA=list() + + "We need to initialize self.info" + self.info=None + + + "Initialize some storage for rate constant calculation" +# self.__R=np.zeros([0,np.size(self.__tc)]) + +# self.__info=pd.DataFrame(index=self.__exper+self.__spinsys) + + super().__init__() + "Here we feed in all the information on the experiments" + self.new_exp(**kwargs) + + def new_exp(self,info=None,**kwargs): + """Adds new experiments to a sensitivity object. Options are to input + info as a pandas array, with the appropriate values (usually from another) + sensitivity object), or list the variables directly. + + Experiment: Type, v0, v1, vr, offset, stdev. These may be lists of values, + in which case multiple experiments will be created. + + Spin system: Nuc, Nuc1, dXY, CSA, QC, eta, theta. These must be the same + for all simultaneously entered experiments. Nuc1 and dXY may have multiple + values if the Nuc is coupled to multiple other nuclei. + + """ + + + + if info is None: + "Count how many experiments are given" + ne=0 + for k in self.__exper: + if k in kwargs: + ne=np.max([ne,np.size(kwargs.get(k))]) + + "Add a None type element in self.__R for each new experiment" + for k in range(0,ne): + self.__R.append(None) + self.__RCSA.append(None) + + "Move all input variables to __sys and __exp" + "Defaults that don't depend on the observed nucleus can be set here" + self.__exp=dict() + for k in self.__exper: + if k in kwargs: + self.__exp.update({k : kwargs.get(k)}) + else: + self.__exp.update({k : None}) + + self.__sys=dict() + for k in self.__spinsys: + if k in kwargs: + self.__sys.update({k : kwargs.get(k)}) + elif k=='Nuc': + self.__sys.update({k : '15N'}) + else: + self.__sys.update({k : None}) + + + self.__cleanup(ne) + self.__set_defaults(ne) + + "Create the new pandas array" + + info=pd.concat([pd.DataFrame.from_dict(self.__exp),pd.DataFrame.from_dict(self.__sys)],axis=1).T + else: + ne=info.shape[1] + for k in range(0,ne): + self.__R.append(None) + self.__RCSA.append(None) +# try: #I don't like using try....but not sure what to do here +# info=pd.concat([info0,info.T],axis=1,ignore_index=True) +# except: +# info=info.T + + if not isinstance(self.info,pd.DataFrame): + self.info=info + else: + self.info=pd.concat([self.info,info],axis=1,ignore_index=True) + + self.del_mdl_calcs() + +#%% Make sure inputs all are the correct type (numpy arrays) + "Function to make sure all inputs are arrays, and have the correct sizes" + def __cleanup(self,ne): + "Check that all experimental variables can be treated as arrays" + + for k in self.__exper: + a=np.atleast_1d(self.__exp.get(k)) + rep=np.ceil(ne/np.size(a)) + a=np.repeat(a,rep) + a=a[0:ne] +# a=self.__exp.get(k) +# if not isinstance(a,(list,np.ndarray,pd.DataFrame)): +# a=[a]*ne +# elif np.size(a)!=ne: +# "We tile the output if the number of experiments doesn't match up" +# a=a*int(np.ceil(ne/np.size(a))) +# a=a[0:ne] +# else: +# if not isinstance(a,np.ndarray): +# a=np.array(a) + self.__exp.update({k:a}) + + for k in self.__spinsys: + a=np.atleast_1d(self.__sys.get(k)) + + if (k=='dXY' or k=='Nuc1' or k=='CSoff') and np.size(a)>1: + a=[a]*ne + else: + a=[a[0]]*ne + +# a=self.__sys.get(k) +# +# if not isinstance(a,(list,np.ndarray)): +# a=[a]*ne +# elif k=='dXY' or k=='Nuc1': +# b=np.array([None]*ne) +# for m in range(0,ne): +# b[m]=a +# a=b +# if not isinstance(a,np.ndarray): +# a=np.array(a) +# +# if a.dtype.str[0:2]=='1: + exp_num=np.array(np.atleast_1d(exp_num)) + exp_num[::-1].sort() #Crazy, but this sorts exp_num in descending order + "delete largest index first, because otherwise the indices will be wrong for later deletions" + for m in exp_num: + self.del_exp(m) + else: + if np.ndim(exp_num)>0: + exp_num=np.array(exp_num[0]) + self.info=self.info.drop(exp_num,axis=1) + del self.__R[exp_num] + del self.__RCSA[exp_num] + + self.info.set_axis(np.arange(np.size(self.info.axes[1])),axis=1,inplace=True) + self._clear_stored(exp_num) + +#%% Adjust a parameter + "We can adjust all parameters of a given type, or just one with the experiment index" + def set_par(self,type,value,exp_num=None): + if exp_num is None: + if hasattr(value,'__len__') and len(value)!=1: + for k in range(self.info.shape[1]): + self.info.at[type,k]=value + else: + self.info.at[type,:]=value + self.__R[:]=[None]*len(self.__R) + else: + self.info.at[type,exp_num]=value + self.__R[exp_num]=None + self.__RCSA[exp_num]=None + + self._clear_stored() + + self._reset_exp(exp_num) + +#%% Correlation time axes + "Return correlation times or log of correlation times" + def tc(self): + return self.__tc.copy() + + def z(self): + return np.log10(self.__tc) + +#%% Rate constant calculations + "Calculate rate constants for given experiment" + def _rho(self,exp_num=None,bond=None): + + if exp_num is None: + exp_num=self.info.axes[1] + + "Make sure we're working with numpy array" + exp_num=np.atleast_1d(exp_num) + + ntc=self.__tc.size + ne=exp_num.size + R=np.zeros([ne,ntc]) + for k in range(0,ne): + "Look to see if we've already calculated this sensitivity, return it if so" + if self.__R[exp_num[k]] is not None: + R[k,:]=self.__R[exp_num[k]] + else: + "Otherwise, calculate the new sensitivity, and store it" + R[k,:]=dff.rate(self.__tc,self.info.loc[:,exp_num[k]]) + self.__R[exp_num[k]]=R[k,:] +# self.__R=np.vstack([self.__R,R[k,:]]) +# self.__info=pd.concat([self.__info,self.info.loc[:,exp_num[k]]],axis=1,ignore_index=True) + + + return R.copy() + + def _reset_exp(self,exp_num=None): + """ + Deletes sensitivity data for a given experiment, or for all experiments. + Should be run in case a parameter for an experiment is updated. + """ + if exp_num is None: + for m in self.__R: + m=None + else: + for k,m in enumerate(self.__R): + if m is not None: + self.__R[k]=np.delete(m,exp_num,axis=1) + + def Reff(self,exp_num=None,mdl_num=0,bond=None,**kwargs): + R,_=self._rho_eff(exp_num,mdl_num,bond,**kwargs) + return R + + def R0(self,exp_num=None,mdl_num=None,bond=None,**kwargs): + _,R0=self._rho_eff(exp_num,mdl_num,bond,**kwargs) + return R0 + + def _rhoCSA(self,exp_num,bond=None): + """Calculates relaxation due to CSA only. We need this function to + allow application of anisotropic models, which then have different + influence depending on the direction of the interaction tensor. CSA + points in a different direction (slighlty) than the dipole coupling + """ + "Make sure we're working with numpy array" + exp_num=np.atleast_1d(exp_num) + + + ntc=self.__tc.size + ne=exp_num.size + R=np.zeros([ne,ntc]) + for k in range(0,ne): + "Get the information for this experiment" + exper=self.info.loc[:,exp_num[k]].copy() + "Turn off other interactions" + exper.at['Nuc1']=None + exper.at['dXY']=0 + exper.at['QC']=0 + + "Look to see if we've already calculated this sensitivity, return it if so" +# count=0 +# test=False +# n=self.__R.shape[0] +# while count0: + nb=self.molecule.vXY.shape[0] + R=np.repeat([R],nb,axis=0) + return R.copy() + + +##%% Plot the rate constant sensitivites +# def plot_R(self,exp_num=None,ax=None,**kwargs): +# +# if exp_num is None: +# exp_num=self.info.columns.values +# +# a=self.R(exp_num).T +# if 'norm' in kwargs and kwargs.get('norm')[0].lower()=='y': +# norm=np.max(a,axis=0) +# a=a/np.tile(norm,[np.size(self.tc()),1]) +# +# if ax is None: +# fig=plt.figure() +# ax=fig.add_subplot(111) +# hdl=ax.plot(self.z(),a) +## ax=hdl[0].axes +# else: +# hdl=ax.plot(self.z(),a) +# +# self._set_plot_attr(hdl,**kwargs) +# +# +# ax.set_xlabel(r'$\log_{10}(\tau$ / s)') +# if 'norm' in kwargs and kwargs.get('norm')[0].lower()=='y': +# ax.set_ylabel(r'$R$ (normalized)') +# else: +# ax.set_ylabel(r'$R$ / s$^{-1}$') +# ax.set_xlim(self.z()[[0,-1]]) +# ax.set_title('Rate Constant Sensitivity (no model)') +# +# return hdl + + def plot_R(self,exp_num=None,norm=False,ax=None,**kwargs): + """ + Plots the sensitivites of the experiments. Default plots all experiments + without normalization. Set norm=True to normalize all experiments to 1. + Specify exp_num to only plot selected experiments. Set ax to specify the + axis on which to plot + + plot_R(exp_num=None,norm=False,ax=None,**kwargs) + """ + hdl=plot_rhoz(self,index=exp_num,norm=norm,ax=ax,**kwargs) + ax=hdl[0].axes + ax.set_ylabel(r'$R / s^{-1}$') + ax.set_title('Experimental Sensitivities') + return hdl + + +#%% Return the names of the experiment and sys variables + def retSpinSys(self): + return self.__spinsys + def retExper(self): + return self.__exper + +#%% Hidden output of rates (semi-hidden, can be found if the user knows about it ;-) ) + def R(self,exp_num=None): + """The different children of mdl_sens will have different names for + their sensitivities. For example, this class returns R, which are the + rate constant sensitivities, but the correlation function class returns + Ct, and the detector class returns rho. Then, we have a function, + _rho(self), that exists and functions the same way in all children + """ + return self._rho(exp_num) + + def Reff(self,exp_num=None,mdl_num=0,bond=None): + R,R0=self._rho_eff(exp_num,mdl_num,bond) + + return R,R0 \ No newline at end of file diff --git a/pyDIFRATE/tools/.DS_Store b/pyDIFRATE/tools/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..5008ddfcf53c02e82d7eee2e57c38e5672ef89f6 GIT binary patch literal 6148 zcmeH~Jr2S!425mzP>H1@V-^m;4Wg<&0T*E43hX&L&p$$qDprKhvt+--jT7}7np#A3 zem<@ulZcFPQ@L2!n>{z**++&mCkOWA81W14cNZlEfg7;MkzE(HCqgga^y>{tEnwC%0;vJ&^%eQ zLs35+`xjp>T0. + + +Questions, contact me at: +albert.smith-penzel@medizin.uni-leipzig.de + + +Created on Fri Nov 8 13:44:13 2019 + +@author: albertsmith +""" + +import os +import numpy as np +import pandas as pd +import re +from scipy.stats import mode + +#%% Some useful tools (Gyromagnetic ratios, spins, dipole couplings) +def NucInfo(Nuc=None,info='gyro'): + """ Returns the gyromagnetic ratio for a given nucleus. Usually, should be + called with the nucleus and mass number, although will default first to + spin 1/2 nuclei if mass not specified, and second to the most abundant + nucleus. A second argument, info, can be specified to request the + gyromagnetic ratio ('gyro'), the spin ('spin'), the abundance ('abund'), or + if the function has been called without the mass number, one can return the + default mass number ('mass'). If called without any arguments, a pandas + object is returned containing all nuclear info ('nuc','mass','spin','gyro', + 'abund') + """ + + Nucs=[] + MassNum=[] + spin=[] + g=[] + Abund=[] + + dir_path = os.path.dirname(os.path.realpath(__file__)) + + with open(dir_path+"/GyroRatio") as f: + data=f.readlines() + for line in data: + line=line.strip().split() + MassNum.append(int(line[1])) + Nucs.append(line[3]) + spin.append(float(line[5])) + g.append(float(line[6])) + Abund.append(float(line[7])) + + NucData=pd.DataFrame({'nuc':Nucs,'mass':MassNum,'spin':spin,'gyro':g,'abund':Abund}) + + + if Nuc==None: + return NucData + else: + + if Nuc=='D': + Nuc='2H' + + mass=re.findall(r'\d+',Nuc) + if not mass==[]: + mass=int(mass[0]) + + + Nuc=re.findall(r'[A-Z]',Nuc.upper()) + + if np.size(Nuc)>1: + Nuc=Nuc[0].upper()+Nuc[1].lower() + else: + Nuc=Nuc[0] + + + + NucData=NucData[NucData['nuc']==Nuc] + + if not mass==[]: #Use the given mass number + NucData=NucData[NucData['mass']==mass] + elif any(NucData['spin']==0.5): #Ambiguous input, take spin 1/2 nucleus if exists + NucData=NucData[NucData['spin']==0.5] #Ambiguous input, take most abundant nucleus + elif any(NucData['spin']>0): + NucData=NucData[NucData['spin']>0] + + NucData=NucData[NucData['abund']==max(NucData['abund'])] + + + h=6.6260693e-34 + muen=5.05078369931e-27 + + NucData['gyro']=float(NucData['gyro'])*muen/h +# spin=float(NucData['spin']) +# abund=float(NucData['abund']) +# mass=float(NucData['spin']) + if info[:3]=='all': + return NucData + else: + return float(NucData[info]) + +def dipole_coupling(r,Nuc1,Nuc2): + """ Returns the dipole coupling between two nuclei ('Nuc1','Nuc2') + separated by a distance 'r' (in nm). Result in Hz (gives full anisotropy, + not b12, that is 2x larger than b12) + """ + + gamma1=NucInfo(Nuc1) + gamma2=NucInfo(Nuc2) + + h=6.6260693e-34 #Plancks constant in J s + mue0 = 12.56637e-7 #Permeability of vacuum [T^2m^3/J] + + return h*2*mue0/(4*np.pi*(r/1e9)**3)*gamma1*gamma2 + +def corr_SVD_switching(data0,data_in): + """ + Identifies sign switching in data objects that have been processed only + with un-optimized r matrices (r_no_opt). Will resort the R data, and + average the results + """ + rhoz0=data0.sens._rho_eff(mdl_num=None)[0] + + R=list() + Rvar=list() + + for d in data_in: + mat=np.dot(rhoz0,d.sens._rho_eff(mdl_num=None)[0].T) + R.append(np.dot(mat,d.R.T).T) + Rvar.append(np.dot(mat,d.R_std.T**2).T) + + return np.array(R),np.array(Rvar) + +#%% Tool for averaging data classes +def avg_data(data_in,weighted=True,weight=None,r_no_opt=False): + """ + Averages together a list of data objects generated by pyDIFRATE. Performs + a quick check that the sensitivities are the same for each object, and will + perform sign flips for detectors that don't match (this may happen for + detectors generated using the r_no_opt option- the sign of the SVD is not + numerically stable). If detectors exhibit a mismatch, a warning will + be printed (no warning will occur for a difference in sign). If weighting + is included, then data will be averaged, considering the standard deviation + of the data. + + Data produced with unoptimized r matrices may have sign swaps and occasional + scrambling of the detectors. This can be corrected, set r_no_opt=True + """ + if isinstance(data_in,dict): + data_in,_=dict2list(data_in) + + try: + data=data_in[0].copy() + except: + data=data_in[0].__class__() + data.sens=data_in[0].sens + data.R=data_in[0].R.copy() + print('Warning: deep copy failed') + + + + + if r_no_opt: + R,Rvar=corr_SVD_switching(data,data_in) + else: + R=list() + Rvar=list() + SZ=data.R.shape + + sign=sens_sign_check(data,data_in) + + for k,(d,s) in enumerate(zip(data_in,sign)): + R.append(s*d.R) + Rvar.append((d.R_std**2)) + + R[-1]=R[-1].reshape(np.prod(SZ)) + Rvar[-1]=Rvar[-1].reshape(np.prod(SZ)) + + R=np.array(R) + Rvar=np.array(Rvar) + if weighted: + if weight is None: + wt=1/Rvar + wt=(wt/wt.sum(axis=0)) + else: + wt=np.array(weight) + wt=wt/wt.sum(axis=0) + else: + wt=1/R.shape[0] + R=(R*wt).sum(axis=0) + Rvar=(Rvar*(wt**2)).sum(axis=0) + + if r_no_opt: + data.R=R + data.R_std=np.sqrt(Rvar) + else: + data.R=R.reshape(SZ) + data.R_std=np.sqrt(Rvar.reshape(SZ)) + data.R_u=None + data.R_l=None + + return data + +#%% Appends data classes together +def append_data(data_in,labels=None,index=None): + """ + Appends a list of data objects. A second argument, labels, may contain a list, + the same length as data_in, which will be appended to the existing labels + for an object. + + One may also input a dictionary, containing data objects. In this case, the + keys will be used as labels, unless the user inputs their own labels (or sets + labels='' to override this functionality) + + One may re-sort the result by providing an index to re-order all data. + """ + + if isinstance(data_in,dict): + data_in,dict_label=dict2list(data_in) + if labels is None and labels!='': + labels=dict_label + elif labels=='': + labels=None + + data=data_in[0].copy() + + sign=sens_sign_check(data,data_in) + + + flds=['R','R_std','R_u','R_l','Rc','Rin','Rin_std','S2','S2in','S2in_std','S2c'] + R=dict() + label=list() + for f in flds:R[f]=list() + + for k,(d,s) in enumerate(zip(data_in,sign)): + for f in flds: + x=getattr(d,f) + if x is not None: R[f].append(x) + if labels is None: + label.append(d.label) + else: + label.append([str(labels[k])+str(l) for l in d.label]) + + + for f in flds: + if len(R[f])!=0: + try: + setattr(data,f,np.concatenate(R[f],axis=0)) + except: + print('Warning: Data sizes for "{0}" do not match and have been omitted'.format(f)) + data.label=np.concatenate(label,axis=0) + + if index is not None: + for f in flds: + x=getattr(data,f) + if x is not None: + x0=np.zeros(x.shape) + x0[index]=x + setattr(data,f,x0) + if data.label is not None: + lbl=np.empty(data.label.shape,dtype=data.label.dtype) + lbl[index]=data.label + data.label=lbl +# R=list() +# R_std=list() +# R_u=list() +# R_l=list() +# Rc=list() +# Rin=list() +# Rin_std=list() +# label=list() + +# for k,(d,s) in enumerate(zip(data_in,sign)): +# R.append(s*d.R) +# R_std.append(d.R_std) +# if d.Rc is not None: +# Rc.append(d.Rc) +# if d.R_u is not None: +# R_u.append(d.R_u) +# if d.R_l is not None: +# R_l.append(d.R_l) +# if d.Rin is not None: +# Rin.append(d.Rin) +# if d.Rin_std is not None: +# +# if labels is None: +# label.append(d.label) +# else: +# label.append([str(labels[k])+str(l) for l in d.label]) + +# data.R=np.concatenate(R,axis=0) +# data.R_std=np.concatenate(R_std,axis=0) +# if len(Rc)>0: +# data.Rc=np.concatenate(Rc,axis=0) +# if len(R_u)>0: +# data.R_u=np.concatenate(R_u,axis=0) +# if len(R_l)>0: +# data.R_l=np.concatenate(R_l,axis=0) +# data.label=np.concatenate(label,axis=0) + + return data + +def dict2list(data_in): + """ + If data is provided in a dictionary, this function returns all instances + of data found in that dictionary, and also returns labels based on the keys + + """ + + labels=list() + data=list() + + for l,d in data_in.items(): + if hasattr(d,'R') and hasattr(d,'R_std') and hasattr(d,'label'): + labels.append(l) + data.append(d) + return data,labels + +def sens_sign_check(data0,data_in): + """ + Compares the sensitivities for a reference data object and a list of other + objects. Returns a list of "signs" which indicate how to switch the sign + on the detector responses, in case the sign on the sensitivities is switched. + + Also returns warnings if the sensitivities cannot be resolved between two + data objects. + """ + + if data0.sens is not None: + "We'll use the sensitivity of the first object to compare to the rest, and switch signs accordingly" + rhoz0=data0.sens._rho_eff(mdl_num=None)[0] + step=np.array(rhoz0.shape[1]/10).astype(int) + rhoz0=rhoz0[:,::step] + z0=data0.sens.tc()[[0,-1]] + else: + rhoz0=None + + sign=list() + nd=data0.R.shape[1] + + for k,d in enumerate(data_in): + if rhoz0 is not None and d.sens is not None: + rhoz=d.sens._rho_eff(mdl_num=None)[0][:,::step] + + if rhoz0.shape==rhoz.shape and np.all(z0==d.sens.tc()[[0,-1]]): + test=rhoz0/rhoz + + if np.any(np.abs(np.abs(test)-1)>1e-3): + print('Sensitivities disagree for the {}th data object'.format(k)) + sign.append(np.ones(nd)) + else: + + sign.append(np.squeeze(mode(np.sign(test),axis=1)[0])) + """ + #Just in case we come up with some nan, we use mode, + which gets rid of them. + Also, could fix average out spurious sign changes near 0 + """ + else: + print('Sensitivity shapes or ranges do not match for the {}th data object'.format(k)) + sign.append(np.ones(nd)) + + else: + sign.append(np.ones(nd)) + sign[-1][np.isnan(sign[-1])]=1 + + + return sign + +#%% Take the product of two data classes (in correlation function space) +def prod_mat(r1,r2=None,r=None): + """ + Calculates a matrix that allows one to take the product of two sets of + detectors. + + That is, suppose C(t)=C0(t)*C1(t). We have detector analyzes of C1(t) and + C2(t), where C0(t) and C1(t) are of the same resolution, and therefore have + r matrices of the same size + + Given the detector analysis of C1(t) and C2(t), we calculate a product matrix: + + p01= + [[p0_0*p1_0,p0_1*p1_0,p0_2*p1_0,...], + [p0_1*p1_0,p0_1*p1_1,p0_2*p1_1,...], + [p0_2*p1_0,p0_1*p1_2,p0_2*p1_2,...], + ...] + + Expanding this into a single vector, say p01, we may multiply + p=np.dot(pr,p01) + Where p is the detector analysis result of C(t) + + Note 1: p01=np.dot(np.atleast_2d(p1).T,np.atleast(2d(p0))).reshape(n**2), + where n is the number of detectors + + Note 2: The matrices need to be the same size, but do not need to be the + same matrix. If they are the same, one argument is required. If they are + different, two arguments are required. One may finally provide a 3rd r + if the final set of detectors is different than the initial set (the third + r matrix will be th final matrix) + + pr = calc_prod_mat(r1,r2=None,r=None) + """ + + if hasattr(r1,'r'): + r1=r1.r() + if r2 is None: + r2=r1 + elif hasattr(r2,'r'): + r2=r2.r() + if r is None: + r=r1 + elif hasattr(r,'r'): + r=r.r() + + n1=r1.shape[1] + n2=r2.shape[1] + pr0=np.array([np.dot(np.atleast_2d(row2).T,np.atleast_2d(row1)).reshape(n1*n2)\ + for row1,row2 in zip(r1,r2)]) + + pr=np.linalg.lstsq(r,pr0,rcond=None)[0] +# pr=np.dot(np.linalg.pinv(r),pr0) + + return pr + +def calc_prod(data,r,nf=None): + """ + Takes the product of a data sets, where two or more detector analyses are + assumed to be analyzing individual correlation functions such that + C(t)=C0(t)*C1(t) + + One may input a list of data objects, where the product of all is returned + or one may input a single object, for which multiple analyzes are contained + (usually, this is the result of an iRED/frame analysis) + + In case a single data object is used, the user must provide the number of + different frames used (nf) + + In case the list of data has different detectors, the detectors from the + first element of the list will be used (all data must come from correlation + functions having the same resolution) + + out=calc_prod(data_list) + + or + + out=calc_prod(data,nf) + + """ + + "User input check" + if not(isinstance(data,list)) and nf is None: + print('If a list of data is not provided, then the number of frames must be given') + return + + + R=list() + R_std=list() + "I wish that we didn't have to provide r manually, especially " +# r0=list() + + "Prepare the data" + if nf is None: + out=data[0].copy() +# r=data[0].detect + for d in data: + R.append(d.R) + R_std.append(d.R_std) +# r0.append(d.detect) + else: + out=data.copy() +# r=data.detect + n=int(data.R.shape[0]/nf) + for k in range(nf): + R.append(data.R[k*n:(k+1)*n,:]) + R_std.append(data.R_std[k*n:(k+1)*n,:]) +# r0.append(data.detect) + + + "Make sure r0 is a list" + if isinstance(r,list): + r0=r + else: + r0=[r for k in range(len(R))] + + "If r is detector object, get out the r matrix itself" + r0=[r.r() if hasattr(r,'r') else r for r in r0] + r=r0[0] + + R1=R.pop() + r1=r0.pop() + R1_var=R_std.pop()**2 + + + while len(R)>0: + "Calculate the product matrix" + r2=r0.pop() + n1=r1.shape[1] + n2=r2.shape[1] + pr=prod_mat(r1,r2,r) + r1=r + "Calculate the product of detectors" + R2=R.pop() + p1p2=np.array([np.dot(np.atleast_2d(row2).T,np.atleast_2d(row1)).reshape(n1*n2)\ + for row1,row2 in zip(R1,R2)]).T + R1=np.dot(pr,p1p2).T + + "Calculate the variance of the product of detector responses" + R2_var=R_std.pop()**2 + p1p2_var=list() + for row1,row2,var1,var2,f in zip(R1,R2,R1_var,R2_var,p1p2.T): + + x=np.atleast_2d(var1/row1).T.repeat(n2,axis=1).reshape(n1*n2) + y=np.atleast_2d(var2/row2).repeat(n1,axis=0).reshape(n1*n2) + + p1p2_var.append(f**2*(x+y)) + + "Calculate the variance of the product matrix with individual variances" + R1_var=np.dot(pr,np.array(p1p2_var).T).T + + out.R=R1 + out.R_std=np.sqrt(R1_var) + + return out + + +def linear_ex(x0,I0,x,dim=None,mode='last_slope'): + """ + Takes some initial data, I0, that is a function a function of x0 in some + dimension of I0 (by default, we search for a matching dimension- if more than + one dimension match, then the first matching dimension will be used) + + Then, we extrapolate I0 between the input points such that we return a new + I with axis x. + + This is a simple linear extrapolation– just straight lines between points. + If points in x fall outside of points in x0, we will use the two end points + to calculate a slope and extrapolate from there. + + x0 must be sorted in ascending or descending order. x does not need to be sorted. + + If values of x fall outside of the range of x0, by default, we will take the + slope at the ends of the given range. Alternatively, set mode to 'last_value' + to just take the last value in x0 + """ + + assert all(np.diff(x0)>=0) or all(np.diff(x0)<=0),"x0 is not sorted in ascending/descending order" + + + + x0=np.array(x0) + I0=np.array(I0) + ndim=np.ndim(x) + x=np.atleast_1d(x) + + "Determine what dimension we should extrapolate over" + if dim is None: + i=np.argwhere(x0.size==np.array(I0.shape)).squeeze() + assert i.size!=0,"No dimensions of I0 match the size of x0" + dim=i if i.ndim==0 else i[0] + + + "Swap dimensions of I0" + I0=I0.swapaxes(0,dim) + if np.any(np.diff(x0)<0): +# i=np.argwhere(np.diff(x0)<0)[0,0] +# x0=x0[:i] +# I0=I0[:i] + x0,I0=x0[::-1],I0[::-1] + + "Deal with x being extend beyond x0 limits" + if x.min()<=x0[0]: + I0=np.insert(I0,0,np.zeros(I0.shape[1:]),axis=0) + x0=np.concatenate(([x.min()-1],x0),axis=0) + if mode.lower()=='last_slope': + run=x0[2]-x0[1] + rise=I0[2]-I0[1] + slope=rise/run + I0[0]=I0[1]-slope*(x0[1]-x0[0]) + else: + I0[0]=I0[1] + if x.max()>=x0[-1]: + I0=np.concatenate((I0,[np.zeros(I0.shape[1:])]),axis=0) + x0=np.concatenate((x0,[x.max()+1]),axis=0) + if mode.lower()=='last_slope': + run=x0[-3]-x0[-2] + rise=I0[-3]-I0[-2] + slope=rise/run + I0[-1]=I0[-2]-slope*(x0[-2]-x0[-1]) + else: + I0[-1]=I0[-2] + + "Index for summing" + i=np.digitize(x,x0) + + I=((I0[i-1].T*(x0[i]-x)+I0[i].T*(x-x0[i-1]))/(x0[i]-x0[i-1])).T + + if ndim==0: + return I[0] + else: + return I.swapaxes(0,dim) + + + +#%% Some classes for making nice labels with units and unit prefixes + +class Default2Parent(object): + def __init__(self,varname): + self.value=list() + self.varname=varname + def __get__(self,instance,owner): + if not(self.varname in instance.index): + instance.index[self.varname]=len(self.value) + self.value.append(None) + i=instance.index[self.varname] + if self.value[i] is not None:return self.value[i] + return getattr(instance.parent,self.varname) + def __set__(self,instance,value): + if not(self.varname in instance.index): + instance.index[self.varname]=len(self.value) + self.value.append(None) + i=instance.index[self.varname] + self.value[i]=value + def __repr__(self): + return str(self.__get__()) + +class NiceStr(): + unit=Default2Parent('unit') + include_space=Default2Parent('include_space') + no_prefix=Default2Parent('no_prefix') + + def __init__(self,value,parent): + self.value=value + self.parent=parent + self.range=False + self.index={} + +# self.unit +# self.include_space +# self.no_prefix + + def __repr__(self): + return self.value + + def prefix(self,value): + if value==0: + return '',0,1 + pwr=np.log10(np.abs(value)) + x=np.concatenate((np.arange(-15,18,3),[np.inf])) + pre=['a','f','p','n',r'$\mu$','m','','k','M','G','T'] + #Probably the mu doesn't work + for x0,pre0 in zip(x,pre): + if pwr