Merge pull request #1481 from veg/develop

BUSTED fixes
veg · May 26, 2022 · d6d2d3e · d6d2d3e · kjlevitz · May 26, 2022
2 parents d562d1b + 39b86b7
commit d6d2d3e
Show file tree

Hide file tree

Showing 4 changed files with 64 additions and 22 deletions.
diff --git a/res/TemplateBatchFiles/SelectionAnalyses/BUSTED.bf b/res/TemplateBatchFiles/SelectionAnalyses/BUSTED.bf
@@ -33,7 +33,7 @@ for branch-site variation in synonymous substitution rates. Version 3.1 adds HMM
 Version 4.0 adds support for multiple hits, ancestral state reconstruction saved to JSON, and profiling of branch-site level support for selection / multiple hits.
 ",
                                terms.io.version : "4.0",
-                               terms.io.reference : "*Gene-wide identification of episodic selection*, Mol Biol Evol. 32(5):1365-71",
+                               terms.io.reference : "*Gene-wide identification of episodic selection*, Mol Biol Evol. 32(5):1365-71, *Synonymous Site-to-Site Substitution Rate Variation Dramatically Inflates False Positive Rates of Selection Analyses: Ignore at Your Own Peril*, Mol Biol Evol. 37(8):2430-2439",
                                terms.io.authors : "Sergei L Kosakovsky Pond",
                                terms.io.contact : "[email protected]",
                                terms.io.requirements : "in-frame codon alignment and a phylogenetic tree (optionally annotated with {})"
@@ -368,7 +368,7 @@ if (busted.multi_hit != "None") {
     if (None != busted.triple_hit_parameter) {
         busted._mh_parameters [busted.triple_hit_parameter] = "branch_level_th";
         busted._mh_to_local [busted.triple_hit_parameter] = 1;
-        busted._mh_parameter_map [^'terms.parameters.triple_hit_rate'] = busted._mh_parameters [busted.triple_hit_rate];
+        busted._mh_parameter_map [^'terms.parameters.triple_hit_rate'] = busted._mh_parameters [busted.triple_hit_parameter];
     }
 
     function busted.export_mh_er_model () {        
@@ -412,6 +412,11 @@ if (busted.do_srv) {
     } 
 }
 
+if (busted.multi_hit != "None" && busted.has_background) {
+     models.BindGlobalParameters ({"0" : busted.test.bsrel_model, "1" : busted.background.bsrel_model}, ^'terms.parameters.multiple_hit_rate');
+     models.BindGlobalParameters ({"0" : busted.test.bsrel_model, "1" : busted.background.bsrel_model}, ^'terms.parameters.triple_hit_rate');
+}
+
 busted.distribution = models.codon.BS_REL.ExtractMixtureDistribution(busted.test.bsrel_model);
 
 
@@ -712,6 +717,8 @@ if (busted.do_srv) {
 
 }
 
+busted.stashLF = estimators.TakeLFStateSnapshot (busted.full_model[terms.likelihood_function]);
+
 if (busted.has_selection_support) {
 
     utility.ToggleEnvVariable ("KEEP_OPTIMAL_ORDER", TRUE);
@@ -740,7 +747,8 @@ if (busted.has_selection_support) {
             }
         }
 
-        busted.tested_branches = Rows (busted.tested_branches);
+        busted.tested_branches = Rows (busted.tested_branches);    
+
         utility.ToggleEnvVariable ("SET_MODEL_KEEP_LOCALS", TRUE);
         SetParameter ( busted.tested_branches , MODEL, ^busted.er_model);
         utility.ToggleEnvVariable ("SET_MODEL_KEEP_LOCALS", None);
@@ -830,10 +838,10 @@ if (busted.has_selection_support) {
         //console.log ("\n\n" + (exp_counter2-exp_counter) + "\n\n");
 
         LFCompute (^(busted.full_model[terms.likelihood_function]),LF_DONE_COMPUTE);
-        utility.ToggleEnvVariable ("SET_MODEL_KEEP_LOCALS", TRUE);
 	    SetParameter ( busted.tested_branches , MODEL, busted.test);
-        utility.ToggleEnvVariable ("SET_MODEL_KEEP_LOCALS", None);
-
+
+        estimators.RestoreLFStateFromSnapshot (busted.full_model[terms.likelihood_function], busted.stashLF);
+
 
         selection.io.json_store_branch_attribute(busted.json, busted.ER, terms.json.branch_annotations, busted.display_orders[busted.ER],
                                              _partition_,
@@ -868,9 +876,6 @@ if (utility.Array1D (busted._mh_parameters)) {
         for (_partition_, _selection_; in; busted.selected_branches) {
             busted.branch_site_level_ER = {};
 
-            for (_key_, _value_; in; busted.mixture_weights_parameters) {
-                busted.current_weights  [_value_] = Eval (_key_);
-            }
 
             busted.tested_branches = {};
             busted.full_to_short = {};
@@ -929,10 +934,10 @@ if (utility.Array1D (busted._mh_parameters)) {
                     ConstructCategoryMatrix (busted.siteLLConstrained, ^(busted.full_model[terms.likelihood_function]) , SITE_LOG_LIKELIHOODS, {{+_partition_}});
                     busted.branch3H [busted.full_to_short [_b_]] = busted.FilteredEvidenceRatios (busted.siteLL, busted.siteLLConstrained, 1);
                     if (busted.do2H) {
-                        busted.er_parameter =  (_b_ + "." + busted._mh_parameter_map[^'terms.parameters.multiple_hit_rate']);
-                        ^busted.er_parameter  = 0;
+                        busted.er2_parameter =  (_b_ + "." + busted._mh_parameter_map[^'terms.parameters.multiple_hit_rate']);
+                        ^busted.er2_parameter  = 0;
                         ConstructCategoryMatrix (busted.siteLLConstrained, ^(busted.full_model[terms.likelihood_function]) , SITE_LOG_LIKELIHOODS, {{+_partition_}});
-                        ^busted.er_parameter  = ^busted.double_hit_parameter;
+                        ^busted.er2_parameter  = ^busted.double_hit_parameter;
                         busted.branch23H [busted.full_to_short [_b_]] = busted.FilteredEvidenceRatios (busted.siteLL, busted.siteLLConstrained, 1);
                     }
 
@@ -943,9 +948,8 @@ if (utility.Array1D (busted._mh_parameters)) {
 
             LFCompute (^(busted.full_model[terms.likelihood_function]),LF_DONE_COMPUTE);
 
-            utility.ToggleEnvVariable ("SET_MODEL_KEEP_LOCALS", TRUE);
             SetParameter ( busted.tested_branches , MODEL, busted.test);
-            utility.ToggleEnvVariable ("SET_MODEL_KEEP_LOCALS", None);
+            estimators.RestoreLFStateFromSnapshot (busted.full_model[terms.likelihood_function], busted.stashLF);
 
             if (busted.do2H) {
                 selection.io.json_store_branch_attribute(busted.json, busted.ER2H, terms.json.branch_annotations, busted.display_orders[busted.ER],
@@ -1152,8 +1156,8 @@ lfunction busted._renormalize (v, distro, mean) {
 //------------------------------------------------------------------------------
 
 lfunction busted.get_multi_hit (model_fit) {
-    params = selection.io.extract_global_MLE_re (model_fit, ^'terms.parameters.multiple_hit_rate');
-    for (k,v; in; selection.io.extract_global_MLE_re (model_fit, ^'terms.parameters.triple_hit_rate')) {
+    params = selection.io.extract_global_MLE_re (model_fit, '^' + ^'terms.parameters.multiple_hit_rate');
+    for (k,v; in; selection.io.extract_global_MLE_re (model_fit, '^' + ^'terms.parameters.triple_hit_rate')) {
         params + v;
     }
     return params;

diff --git a/src/core/likefunc.cpp b/src/core/likefunc.cpp
@@ -4998,7 +4998,7 @@ _Matrix*        _LikelihoodFunction::Optimize (_AssociativeList const * options)
                 BufferToConsole (buffer);
                 snprintf (buffer, sizeof(buffer),"\nSmoothing term: %g", smoothingTerm);
                 BufferToConsole (buffer);
-                snprintf (buffer, sizeof(buffer),"\nExponential count: %d", smoothingTerm);
+                snprintf (buffer, sizeof(buffer),"\nExponential count: %d", matrix_exp_count);
                 BufferToConsole (buffer);
             }
 
@@ -5940,9 +5940,9 @@ HBLObjectRef   _LikelihoodFunction::CovarianceMatrix (_SimpleList* parameterList
 
         hyFloat locH = 1./131072.; //*(t1>10.?exp(log(10.)*((long)log(t1)/log(10.))):1.);
 
-        if (locH<1e-7) {
+        /*if (locH<1e-7) {
             locH = 1e-7;
-        }
+        }*/
 
         /*hyFloat locH = 1./1024.,
                    tryH = locH * 0.25,

diff --git a/src/core/matrix.cpp b/src/core/matrix.cpp
@@ -3979,10 +3979,48 @@ void    _Matrix::Multiply  (_Matrix& storage, _Matrix const& secondArg) const
                         long col_offset = 0L;
                         for (long c = 0; c < 20L; c++, ti++, col_offset += 20L) {
                                 float64x2_t A4 = vdupq_n_f64(theData[ti]);
-                            //for (long k = 0; k < 20L; k+=4) {
+
+
+                                /*float64x2x4_t B, D, B_2, D_2;
+
+                                D = vld1q_f64_x4(dest + row_offset);
+                                B = vld1q_f64_x4(secondArg.theData + col_offset);
+                                D_2 = vld1q_f64_x4(dest + row_offset + 8);
+                                B_2 = vld1q_f64_x4(secondArg.theData + col_offset + 8);
+
+                                float64x2x2_t B2, D2;
+                                D2 = vld1q_f64_x2(dest + row_offset + 16);
+                                B2 = vld1q_f64_x2(secondArg.theData + col_offset + 16);
+
+
+
+
+
+
+                                D.val[0] = vfmaq_f64 (D.val[0], A4, B.val[0]);
+                                D.val[1] = vfmaq_f64 (D.val[1], A4, B.val[1]);
+                                D.val[2] = vfmaq_f64 (D.val[2], A4, B.val[2]);
+                                D.val[3] = vfmaq_f64 (D.val[3], A4, B.val[3]);
+
+
+                                D_2.val[0] = vfmaq_f64 (D_2.val[0], A4, B_2.val[0]);
+                                D_2.val[1] = vfmaq_f64 (D_2.val[1], A4, B_2.val[1]);
+                                D_2.val[2] = vfmaq_f64 (D_2.val[2], A4, B_2.val[2]);
+                                D_2.val[3] = vfmaq_f64 (D_2.val[3], A4, B_2.val[3]);
+
+                                D2.val[0] = vfmaq_f64 (D2.val[0], A4, B2.val[0]);
+                                D2.val[1] = vfmaq_f64 (D2.val[1], A4, B2.val[1]);
+
+                                vst1q_f64_x4 (dest + row_offset, D);
+                                vst1q_f64_x4 (dest + row_offset + 8, D_2);
+                                vst1q_f64_x2 (dest + row_offset + 16, D2);*/
+
+
+
                                 float64x2_t D4_1, D4_2, D4_3, D4_4, D4_5, D4_6, D4_7, D4_8, D4_9, D4_10;
                                 float64x2_t B4_1, B4_2, B4_3, B4_4, B4_5, B4_6, B4_7, B4_8, B4_9, B4_10;
 
+
                                 DO_GROUP_OP1 (D4_1,B4_1,0);
                                 DO_GROUP_OP1 (D4_2,B4_2,2);
                                 DO_GROUP_OP1 (D4_3,B4_3,4);

diff --git a/src/core/variablecontainer.cpp b/src/core/variablecontainer.cpp
@@ -401,7 +401,7 @@ _VariableContainer::~_VariableContainer(void) {
         delete gVariables;
     }
     if (templateFormulaClone) {
-        delete templateFormulaClone;
+        delete [] templateFormulaClone;
     }
 }
 
@@ -827,7 +827,7 @@ void      _VariableContainer::Clear(void) {
         gVariables = nil;
     }
     if (templateFormulaClone) {
-        delete templateFormulaClone;
+        delete [] templateFormulaClone;
         templateFormulaClone = nil;
     }
 }