From bc4cf96386a07c46a0497e8f468b45a03f889b57 Mon Sep 17 00:00:00 2001 From: Chi Zhang Date: Sun, 22 Nov 2015 14:52:39 +0000 Subject: [PATCH] finishing the command compareref --- src/command.c | 6 +- src/mcmc.c | 90 +++++++++++++---- src/sumpt.c | 266 +++++++++++++++++++++++++++++++++++++++++++++----- 3 files changed, 315 insertions(+), 47 deletions(-) diff --git a/src/command.c b/src/command.c index f2d58759..3f8467da 100644 --- a/src/command.c +++ b/src/command.c @@ -12947,9 +12947,9 @@ else if (!strcmp(helpTkn, "Set")) MrBayesPrint (" tree distance values. \n"); MrBayesPrint (" \n"); MrBayesPrint (" Note that the \"Sumt\" command provides a different set of convergence diag- \n"); - MrBayesPrint (" nostics tools that you may also want to explore. Unlike \"Comparetree\", \"Sumt\"\n"); - MrBayesPrint (" can compare more than two tree samples and will calculate consensus trees and \n"); - MrBayesPrint (" split frequencies from the pooled samples. \n"); + MrBayesPrint (" nostics tools that you may also want to explore. Unlike \"Comparetree\", \n"); + MrBayesPrint (" \"Sumt\" can compare more than two tree samples and will calculate consensus \n"); + MrBayesPrint (" trees and split frequencies from the pooled samples. \n"); MrBayesPrint (" \n"); MrBayesPrint (" Options: \n"); MrBayesPrint (" \n"); diff --git a/src/mcmc.c b/src/mcmc.c index 7469ef75..2336a8fd 100644 --- a/src/mcmc.c +++ b/src/mcmc.c @@ -507,12 +507,12 @@ int AddTreeSamples (int from, int to, int saveToList) { do { if (fgets (lineBuf, longestLine, fp) == NULL) - { - SafeFclose (&fp); - free (lineBuf); - free (tempStr); + { + SafeFclose (&fp); + free (lineBuf); + free (tempStr); return ERROR; - } + } word = strtok (lineBuf, " "); } while (strcmp (word, "tree") != 0); if (k>=from) @@ -1708,7 +1708,7 @@ void CalcPartFreqStats (PFNODE *p, STATS *stat) n = chainParams.numRuns; min = (int)(chainParams.minPartFreq * stat->numSamples); - if (((MrBFlt)min) != chainParams.minPartFreq * stat->numSamples) + if ((MrBFlt)min != chainParams.minPartFreq * stat->numSamples) min++; /* recursively compute partition frequencies for all subpartitions */ @@ -1817,6 +1817,56 @@ void CalcTopoConvDiagn (int numSamples) } +/* used in DoCompRefTree */ +void PartFreq (PFNODE *p, STATS *stat, int *ntrees) +{ + int i, n = chainParams.numRuns; + MrBFlt f, sum, sumsq, stdev; + + /* recursively compute partition frequencies for all subpartitions */ + if (p->left != NULL) + PartFreq (p->left, stat, ntrees); + if (p->right != NULL) + PartFreq (p->right, stat, ntrees); + + sum = sumsq = 0.0; + for (i=0; icount[i]) / (MrBFlt)ntrees[i]; + sum += f; + sumsq += f * f; + } + + f = (sumsq - sum * sum / n) / (n - 1); + if (f < 0.0) + stdev = 0.0; + else + stdev = sqrt (f); + + stat->sum += stdev; + if (stat->max < stdev) + stat->max = stdev; + + stat->numPartitions++; +} +void CalcTopoConvDiagn2 (int *nTrees) +{ + int n; + STATS *stat; + + for (n=0; nnumPartitions = 0.0; + stat->sum = stat->max = 0.0; + + PartFreq (partFreqTreeRoot[n], stat, nTrees); + + stat->avgStdDev = stat->sum / stat->numPartitions; + } +} + + int CheckTemperature (void) { if (chainParams.userDefinedTemps == YES) @@ -16102,16 +16152,15 @@ int RunChain (RandLong *seed) for (i=0; i 1 - && ((n > 0 && chainParams.relativeBurnin == YES && - (chainParams.isSS == NO || (n > chainParams.burninSS * chainParams.sampleFreq && (n-lastStepEndSS) > numGenInStepBurninSS))) - || (n >= chainParams.chainBurnIn * chainParams.sampleFreq && chainParams.relativeBurnin == NO))) + if (chainParams.numRuns > 1 && + ((n > 0 && chainParams.relativeBurnin == YES && (chainParams.isSS == NO || (n > chainParams.burninSS * chainParams.sampleFreq && (n-lastStepEndSS) > numGenInStepBurninSS))) + || (n >= chainParams.chainBurnIn * chainParams.sampleFreq && chainParams.relativeBurnin == NO))) { /* we need some space for coming output */ MrBayesPrint ("\n"); @@ -16992,7 +17042,7 @@ int RunChain (RandLong *seed) } if (chainParams.isSS == YES) splitfreqSS[i*chainParams.numStepsSS+chainParams.numStepsSS-stepIndexSS-1] = f; - if (chainParams.stat[i].numPartitions == 0 && f > chainParams.stopVal) + if (chainParams.stat[i].numPartitions == 0 || f > chainParams.stopVal) stopChain = NO; } if (n < chainParams.numGen - chainParams.printFreq && (chainParams.stopRule == NO || stopChain == NO)) diff --git a/src/sumpt.c b/src/sumpt.c index 30e82b66..f959470d 100644 --- a/src/sumpt.c +++ b/src/sumpt.c @@ -93,6 +93,11 @@ extern int DoUserTree (void); extern int DoUserTreeParm (char *parmName, char *tkn); extern int SafeFclose(FILE **); +extern int SetUpPartitionCounters (void); +extern int AddTreeToPartitionCounters (Tree *tree, int treeId, int runId); +extern void CalcTopoConvDiagn2 (int *nTrees); +extern void FreeChainMemory (void); + /* local prototypes */ int CompareModelProbs (const void *x, const void *y); int PrintModelStats (char *fileName, char **headerNames, int nHeaders, ParameterSample *parameterSamples, int nRuns, int nSamples); @@ -3188,7 +3193,7 @@ void CalculateTreeToTreeDistance (Tree *tree1, Tree *tree2, MrBFlt *d1, MrBFlt * } -/* ConTree: Construct consensus tree FIXME: numTreeParts is not used*/ +/* ConTree: Construct consensus tree */ int ConTree (PartCtr **treeParts, int numTreeParts) { int i, j, targetNode, nBits, isCompat, numTerminalsEncountered; @@ -3840,7 +3845,7 @@ int DoCompareTree (void) /* Read file 2 for real */ if ((fp = OpenTextFileR(comptreeParams.comptFileName2)) == NULL) - return ERROR; + goto errorExit; /* ...and fast forward to beginning of last tree block. */ for (i=0; i= tFileInfo.numTreesInLastBlock) + { + MrBayesPrint ("%s Burnin should be smaller than the total number of trees\n", spacer); + goto errorExit; + } + + /* open the ref tree file */ + if ((fpTre = OpenTextFileR (inRefName)) == NULL) + goto errorExit; + /* ...and fast forward to beginning in last tree block */ + for (i=0; i <= tFileInfo.lastTreeBlockBegin; i++) + { + if (fgets(lineBuf, longestL-2, fpTre) == NULL) + goto errorExit; + } + + /* process each ref tree */ + for (i=1; i <= tFileInfo.numTreesInLastBlock; i++) + { + do { + if (fgets (lineBuf, longestL-2, fpTre) == NULL) + goto errorExit; + s = strtok (lineBuf, " "); + } + while (strcmp (s, "tree") != 0); + + /* add reference trees to partition counters, discarding burnin */ + if (i > burnin) + { + s = strtok (NULL, ";"); + while (*s != '(') + s++; + StripComments(s); + + if (ResetTopology (t, s) == ERROR) + goto errorExit; + if (AddTreeToPartitionCounters (t, 0, n) == ERROR) + goto errorExit; + nTre[n]++; + } + } + + /* close the tree file */ + SafeFclose (&fpTre); + } + /* end reference tree files */ + + /* open output file */ + strcpy (outName, comptreeParams.comptOutfile); + strcat (outName, ".sdsf"); + if ((fpOut = OpenNewMBPrintFile (outName)) == NULL) + goto errorExit; + /* print stamp and header */ + if ((int)strlen(stamp) > 1) + MrBayesPrintf (fpOut, "[ID: %s]\n", stamp); + if (chainParams.diagnStat == AVGSTDDEV) + MrBayesPrintf (fpOut, "Gen\tASDSF\n"); + else // MAXSTDDEV + MrBayesPrintf (fpOut, "Gen\tMSDSF\n"); + + /* Examine the tree file to be compared, save info to tFileInfo */ + strcpy(inName, comptreeParams.comptFileName1); + if (ExamineSumtFile(inName, &tFileInfo, treeName, &sumtParams.brlensDef) == ERROR) + goto errorExit; + if (longestL < tFileInfo.longestLineLength) + { + longestL = tFileInfo.longestLineLength; + lineBuf = (char *) SafeRealloc (lineBuf, (size_t)longestL * sizeof(char)); + if (!lineBuf) + { + MrBayesPrint ("%s Problem allocating string for reading tree file\n", spacer); + goto errorExit; + } + } + + /* open the tree file to be compared */ + if ((fpTre = OpenTextFileR (inName)) == NULL) + goto errorExit; + /* ...and fast forward to beginning in last tree block */ + for (i=0; i <= tFileInfo.lastTreeBlockBegin; i++) + { + if (fgets(lineBuf, longestL-2, fpTre) == NULL) + goto errorExit; + } + + /* process each tree to be compared and print SDSF to file */ + for (i=1; i <= tFileInfo.numTreesInLastBlock; i++) + { + do { + if (fgets (lineBuf, longestL-2, fpTre) == NULL) + goto errorExit; + s = strtok (lineBuf, " "); + } + while (strcmp (s, "tree") != 0); + + s = strtok (NULL, ";"); + gen = atoi(s+4); // 4 is offset to get rid of "rep." in tree name + while (*s != '(') + s++; + StripComments(s); + + /* add the tree to partition counters */ + if (ResetTopology (t, s) == ERROR) + goto errorExit; + if (AddTreeToPartitionCounters (t, 0, nRefRun) == ERROR) + goto errorExit; + nTre[nRefRun]++; + + /* calculate and write stdev of split freq */ + CalcTopoConvDiagn2 (nTre); + if (chainParams.stat[0].numPartitions == 0) + { + MrBayesPrintf (fpOut, "%d\tNA\n", gen); + } + else if (chainParams.diagnStat == AVGSTDDEV) + { + MrBayesPrintf (fpOut, "%d\t%lf\n", gen, chainParams.stat[0].avgStdDev); + } + else // MAXSTDDEV + { + MrBayesPrintf (fpOut, "%d\t%lf\n", gen, chainParams.stat[0].max); + } + } + + /* change back to the actual numRuns, end of hack */ + chainParams.numRuns -= 1; + + /* close tree file */ + SafeFclose (&fpTre); + /* close output file */ + SafeFclose (&fpOut); + /* free memory */ + free(lineBuf); + FreeTree (t); + FreeChainMemory(); + + return (NO_ERROR); + + /* error exit */ +errorExit: + MrBayesPrint ("%s Error in DoCompRefTree\n", spacer); + + chainParams.numRuns -= 1; + SafeFclose (&fpTre); + SafeFclose (&fpOut); + + FreeTree (t); + FreeChainMemory(); + free(lineBuf); + + return (ERROR); } @@ -5505,25 +5723,25 @@ int DoSumt (void) MrBayesPrint ("\n\n"); } - /* Exclude trivial splits when calculating average standard deviation of split frequencies. */ - avgStdDev = sumStdDev / (numTreePartsToPrint-sumtParams.numTaxa-1); - avgPSRF = sumPSRF / numPSRFSamples; - - if (sumtParams.numRuns > 1 && sumtParams.summary == YES) - { - MrBayesPrint ("%s Summary statistics for partitions with frequency >= %1.2lf in at least one run:\n", spacer, sumtParams.minPartFreq); - MrBayesPrint ("%s Average standard deviation of split frequencies = %1.6lf\n", spacer, avgStdDev); - MrBayesPrint ("%s Maximum standard deviation of split frequencies = %1.6lf\n", spacer, maxStdDev); - } - if (sumtParams.brlensDef == YES && sumtParams.numRuns > 1 && sumtParams.summary == YES) - { - MrBayesPrint ("%s Average PSRF for parameter values (excluding NA and >10.0) = %1.3lf\n", spacer, avgPSRF); - if (maxPSRF == 10) - MrBayesPrint ("%s Maximum PSRF for parameter values = NA\n", spacer); - else - MrBayesPrint ("%s Maximum PSRF for parameter values = %1.3lf\n", spacer, maxPSRF); - } - MrBayesPrint ("\n"); + /* Exclude trivial splits when calculating average standard deviation of split frequencies. */ + avgStdDev = sumStdDev / (numTreePartsToPrint-sumtParams.numTaxa-1); + avgPSRF = sumPSRF / numPSRFSamples; + + if (sumtParams.numRuns > 1 && sumtParams.summary == YES) + { + MrBayesPrint ("%s Summary statistics for partitions with frequency >= %1.2lf in at least one run:\n", spacer, sumtParams.minPartFreq); + MrBayesPrint ("%s Average standard deviation of split frequencies = %1.6lf\n", spacer, avgStdDev); + MrBayesPrint ("%s Maximum standard deviation of split frequencies = %1.6lf\n", spacer, maxStdDev); + } + if (sumtParams.brlensDef == YES && sumtParams.numRuns > 1 && sumtParams.summary == YES) + { + MrBayesPrint ("%s Average PSRF for parameter values (excluding NA and >10.0) = %1.3lf\n", spacer, avgPSRF); + if (maxPSRF == 10) + MrBayesPrint ("%s Maximum PSRF for parameter values = NA\n", spacer); + else + MrBayesPrint ("%s Maximum PSRF for parameter values = %1.3lf\n", spacer, maxPSRF); + } + MrBayesPrint ("\n"); SortPartCtr (treeParts, 0, numUniqueSplitsFound-1); /* We sort again but this time we sort all partitions instead of just first numTreePartsToPrintNow */ /* make the majority rule consensus tree */