From 1e1015d50875eb8779293295790ba654c01cdc6d Mon Sep 17 00:00:00 2001
From: JohannesGawron <johannes.gawron@bsse.ethz.ch>
Date: Mon, 10 Jun 2024 17:22:17 +0200
Subject: [PATCH] cleanup: remove Rcode/markdowns

---
 Rcode/markdowns/Br11.Rmd         | 181 -------------------------------
 Rcode/markdowns/Br16.Rmd         | 181 -------------------------------
 Rcode/markdowns/Br16_AC.Rmd      | 181 -------------------------------
 Rcode/markdowns/Br16_AC_max1.Rmd | 172 -----------------------------
 Rcode/markdowns/Br16_AC_max2.Rmd | 180 ------------------------------
 Rcode/markdowns/Br16_AC_max3.Rmd | 180 ------------------------------
 Rcode/markdowns/Br16_AC_max4.Rmd | 180 ------------------------------
 Rcode/markdowns/Br16_B.Rmd       | 181 -------------------------------
 Rcode/markdowns/Br16_B_max1.Rmd  | 172 -----------------------------
 Rcode/markdowns/Br16_B_max2.Rmd  | 180 ------------------------------
 Rcode/markdowns/Br16_B_max3.Rmd  | 180 ------------------------------
 Rcode/markdowns/Br16_B_max4.Rmd  | 180 ------------------------------
 Rcode/markdowns/Br16_C.Rmd       | 181 -------------------------------
 Rcode/markdowns/Br16_C_max1.Rmd  | 172 -----------------------------
 Rcode/markdowns/Br16_C_max2.Rmd  | 180 ------------------------------
 Rcode/markdowns/Br16_C_max3.Rmd  | 180 ------------------------------
 Rcode/markdowns/Br23.Rmd         | 181 -------------------------------
 Rcode/markdowns/Br26.Rmd         | 181 -------------------------------
 Rcode/markdowns/Br30.Rmd         | 181 -------------------------------
 Rcode/markdowns/Br37.Rmd         | 181 -------------------------------
 Rcode/markdowns/Br38.Rmd         | 181 -------------------------------
 Rcode/markdowns/Br39.Rmd         | 181 -------------------------------
 Rcode/markdowns/Br44.Rmd         | 181 -------------------------------
 Rcode/markdowns/Br45.Rmd         | 181 -------------------------------
 Rcode/markdowns/Br46.Rmd         | 181 -------------------------------
 Rcode/markdowns/Br53.Rmd         | 181 -------------------------------
 Rcode/markdowns/Br57.Rmd         | 181 -------------------------------
 Rcode/markdowns/Br61.Rmd         | 181 -------------------------------
 Rcode/markdowns/Br7.Rmd          | 181 -------------------------------
 Rcode/markdowns/Brx50.Rmd        | 181 -------------------------------
 Rcode/markdowns/LM2.Rmd          | 181 -------------------------------
 Rcode/markdowns/Lu2.Rmd          | 181 -------------------------------
 Rcode/markdowns/Lu7.Rmd          | 181 -------------------------------
 Rcode/markdowns/Ov8.Rmd          | 181 -------------------------------
 Rcode/markdowns/Pr6.Rmd          | 181 -------------------------------
 Rcode/markdowns/Pr9.Rmd          | 181 -------------------------------
 Rcode/markdowns/template.Rmd     | 181 -------------------------------
 37 files changed, 6662 deletions(-)
 delete mode 100644 Rcode/markdowns/Br11.Rmd
 delete mode 100644 Rcode/markdowns/Br16.Rmd
 delete mode 100644 Rcode/markdowns/Br16_AC.Rmd
 delete mode 100644 Rcode/markdowns/Br16_AC_max1.Rmd
 delete mode 100644 Rcode/markdowns/Br16_AC_max2.Rmd
 delete mode 100644 Rcode/markdowns/Br16_AC_max3.Rmd
 delete mode 100644 Rcode/markdowns/Br16_AC_max4.Rmd
 delete mode 100644 Rcode/markdowns/Br16_B.Rmd
 delete mode 100644 Rcode/markdowns/Br16_B_max1.Rmd
 delete mode 100644 Rcode/markdowns/Br16_B_max2.Rmd
 delete mode 100644 Rcode/markdowns/Br16_B_max3.Rmd
 delete mode 100644 Rcode/markdowns/Br16_B_max4.Rmd
 delete mode 100644 Rcode/markdowns/Br16_C.Rmd
 delete mode 100644 Rcode/markdowns/Br16_C_max1.Rmd
 delete mode 100644 Rcode/markdowns/Br16_C_max2.Rmd
 delete mode 100644 Rcode/markdowns/Br16_C_max3.Rmd
 delete mode 100644 Rcode/markdowns/Br23.Rmd
 delete mode 100644 Rcode/markdowns/Br26.Rmd
 delete mode 100644 Rcode/markdowns/Br30.Rmd
 delete mode 100644 Rcode/markdowns/Br37.Rmd
 delete mode 100644 Rcode/markdowns/Br38.Rmd
 delete mode 100644 Rcode/markdowns/Br39.Rmd
 delete mode 100644 Rcode/markdowns/Br44.Rmd
 delete mode 100644 Rcode/markdowns/Br45.Rmd
 delete mode 100644 Rcode/markdowns/Br46.Rmd
 delete mode 100644 Rcode/markdowns/Br53.Rmd
 delete mode 100644 Rcode/markdowns/Br57.Rmd
 delete mode 100644 Rcode/markdowns/Br61.Rmd
 delete mode 100644 Rcode/markdowns/Br7.Rmd
 delete mode 100644 Rcode/markdowns/Brx50.Rmd
 delete mode 100644 Rcode/markdowns/LM2.Rmd
 delete mode 100644 Rcode/markdowns/Lu2.Rmd
 delete mode 100644 Rcode/markdowns/Lu7.Rmd
 delete mode 100644 Rcode/markdowns/Ov8.Rmd
 delete mode 100644 Rcode/markdowns/Pr6.Rmd
 delete mode 100644 Rcode/markdowns/Pr9.Rmd
 delete mode 100755 Rcode/markdowns/template.Rmd

diff --git a/Rcode/markdowns/Br11.Rmd b/Rcode/markdowns/Br11.Rmd
deleted file mode 100644
index 9d4db6d..0000000
--- a/Rcode/markdowns/Br11.Rmd
+++ /dev/null
@@ -1,181 +0,0 @@
----
-title: "Br11"
-author: "Johannes Gawron"
-date: "2024-03-07"
-output: html_document
----
-
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-```
-
-## Splitting statistics
-
-This code analyses splitting statistics for CTC-clusters.
-
-The analysis takes a list of trees sampled from its posterior distribution as input and samples mutations placements for each of the trees.
-
-
-## Configure the script
-```{r config}
-inputFolder <- "../../../input_folder"
-simulationInputFolder <- "../../../simulations/simulations2"
-treeName <- "Br11"
-nTreeSamplingEvents <- 100
-nMutationSamplingEvents <- 100
-```
-
-## Loading data
-```{r load}
-source("../functions.R")
-
-
-input <- load_data(inputFolder, treeName)
-
-postSampling <- input$postSampling
-nClusters <- input$nClusters
-ClusterID <- input$clusterID
-nCells <- input$nCells  
-nMutations <- input$nMutations
-nClusters <- input$nClusters
-alleleCount <- input$alleleCount
-mutatedReadCounts <- input$mutatedReadCounts
-totalReadCounts <- input$totalReadCounts
-sampleDescription <- input$sample_description
-```
-
-
-## Sample description 
-
-Each row corresponds to a cell.
-Column description:
-  - Cluster: An number indicating which sample the cell belongs to.
-  - ClusterName: The name of the sample in the nodeDescription.tsv file
-  - WBC: a binary vector indicating whether the cell is a white blood cell (1) or not (0).
-  - color: Indicates the color of the cluster in the tree, as described in the nodeDescription.tsv
-          file.
-  
-```{r Describe samples}
-print(sampleDescription)
-```
-
-
-
-Get null distributions of relevant statistics, stratified by sample:
-```{r}
-cutoffsSplittingProbs <- data.frame(clusterSize = vector(), Cutoff = vector())
-cutoffsBranchingProbabilities <- data.frame(clusterSize = vector(), Cutoff = vector())
-
-for (clusterSize in 2:5){
-  try(
-  {treeNameSimulated <- paste(treeName, clusterSize, sep = '_')
-
-
-  inputSimulated <- load_data(simulationInputFolder, treeNameSimulated)
-
-  postSamplingSimulated <- inputSimulated$postSampling
-  nClustersSimulated <- inputSimulated$nClusters
-  ClusterIDSimulated <- inputSimulated$clusterID
-  nCellsSimulated <- inputSimulated$nCells  
-  nMutationsSimulated <- inputSimulated$nMutations
-  nClustersSimulated <- inputSimulated$nClusters
-  alleleCountSimulated <- inputSimulated$alleleCount
-  mutatedReadCountsSimulated <- inputSimulated$mutatedReadCounts
-  totalReadCountsSimulated <- inputSimulated$totalReadCounts
-  sampleDescriptionSimulated <- inputSimulated$sample_description
-  
-  distance <- computeClusterSplits(sampleDescriptionSimulated, postSamplingSimulated, treeNameSimulated, nCellsSimulated,
-                     nMutationsSimulated, nClustersSimulated,
-                     alleleCountSimulated,
-                     mutatedReadCountsSimulated, totalReadCountsSimulated,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c("orchid", "orchid1", "orchid2",
-                                           "orchid3", "orchid4", "darkorchid",
-                                           "darkorchid1","darkorchid2", "darkorchid3",
-                                           "darkorchid4", "purple", "purple1",
-                                           "purple2", "purple3", "purple4"))
-
-  
-
-  plot(ggplot(distance$splittingProbs, aes(x = "Values", y = Splitting_probability, fill = 'Splitting_probability')) +
-    geom_boxplot())
-  cutoffsSplittingProbs <- rbind(cutoffsSplittingProbs, data.frame(clusterSize = clusterSize, Cutoff = mean(distance$splittingProbs$Splitting_probability) + 2 * sd(distance$splittingProbs$Splitting_probability) ))
-  
-  ##Note that the way the aggregatedBranchingProbabilities are computed all pairs of cells from the same cluster are
-  ## taken into account. This has the effect that clusters with more cells would be counted more often and contribute more
-  ## to the shape of the final distribution. This is no problem right now as we only aggregate counts from clusters
-  ## of the same size, it is however the potential source of a future bug!!
-  
-  plot(ggplot(data.frame(x = distance$aggregatedBranchingProbabilities), aes(x = x)) +
-    geom_histogram(binwidth = 0.01))
-  print(data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  cutoffsBranchingProbabilities <- rbind(cutoffsBranchingProbabilities, data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  })
-}
-```
-
-
-
-Get the relevant statistics for each of the clusters of a dataset and output numbers of oligoclonal clusters:
-```{r}
-nTumorClusters <- 0
-nOligoclonalClusters1 <- 0
-nOligoclonalClusters2 <- 0
-splittingSummary1 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-splittingSummary2 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-
-for(clusterSize in 2:5){
-  try({
-    clusterColor <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() == clusterSize) %>%
-    pull(color) %>%
-    unique() 
-    
-    for(color in clusterColor){
-      distance <- computeClusterSplits(sampleDescription, postSampling, treeName, nCells,
-                     nMutations, nClusters,
-                     alleleCount,
-                     mutatedReadCounts, totalReadCounts,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c(color))
-
-      splittingProbs <- mean(distance$splittingProbs$Splitting_probability)
-      branchingProbs <- mean(distance$aggregatedBranchingProbabilities)
-    
-      nTumorClusters <- nTumorClusters + 1
-      oligoclonal <- FALSE
-      print(clusterSize)
-      print(cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])
-      if(splittingProbs > (cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])){
-        nOligoclonalClusters1 <- nOligoclonalClusters1 + 1
-        oligoclonal <- TRUE
-      }
-      splittingSummary1 <- rbind(splittingSummary1, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-      oligoclonal <- FALSE
-      if(branchingProbs > cutoffsBranchingProbabilities[(cutoffsBranchingProbabilities$clusterSize == clusterSize), 2]){
-        nOligoclonalClusters2 <- nOligoclonalClusters2 + 1
-        oligoclonal <- TRUE
-      }
-      splittingSummary2 <- rbind(splittingSummary2, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-    }
-  })
-}
-
-
-numberOfCancerClusters <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() > 1) %>%
-    pull(color) %>%
-    unique() %>% length() 
-
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 1', nOligoclonalClusters1, numberOfCancerClusters, treeName))
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 2', nOligoclonalClusters2, numberOfCancerClusters, treeName))
-print(splittingSummary1)
-print(splittingSummary2)
-```
-
-
-
diff --git a/Rcode/markdowns/Br16.Rmd b/Rcode/markdowns/Br16.Rmd
deleted file mode 100644
index e4bcf7f..0000000
--- a/Rcode/markdowns/Br16.Rmd
+++ /dev/null
@@ -1,181 +0,0 @@
----
-title: "Br16"
-author: "Johannes Gawron"
-date: "2024-03-07"
-output: html_document
----
-
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-```
-
-## Splitting statistics
-
-This code analyses splitting statistics for CTC-clusters.
-
-The analysis takes a list of trees sampled from its posterior distribution as input and samples mutations placements for each of the trees.
-
-
-## Configure the script
-```{r config}
-inputFolder <- "../../../input_folder"
-simulationInputFolder <- "../../../simulations/simulations2"
-treeName <- "Br16"
-nTreeSamplingEvents <- 100
-nMutationSamplingEvents <- 100
-```
-
-## Loading data
-```{r load}
-source("../functions.R")
-
-
-input <- load_data(inputFolder, treeName)
-
-postSampling <- input$postSampling
-nClusters <- input$nClusters
-ClusterID <- input$clusterID
-nCells <- input$nCells  
-nMutations <- input$nMutations
-nClusters <- input$nClusters
-alleleCount <- input$alleleCount
-mutatedReadCounts <- input$mutatedReadCounts
-totalReadCounts <- input$totalReadCounts
-sampleDescription <- input$sample_description
-```
-
-
-## Sample description 
-
-Each row corresponds to a cell.
-Column description:
-  - Cluster: An number indicating which sample the cell belongs to.
-  - ClusterName: The name of the sample in the nodeDescription.tsv file
-  - WBC: a binary vector indicating whether the cell is a white blood cell (1) or not (0).
-  - color: Indicates the color of the cluster in the tree, as described in the nodeDescription.tsv
-          file.
-  
-```{r Describe samples}
-print(sampleDescription)
-```
-
-
-
-Get null distributions of relevant statistics, stratified by sample:
-```{r}
-cutoffsSplittingProbs <- data.frame(clusterSize = vector(), Cutoff = vector())
-cutoffsBranchingProbabilities <- data.frame(clusterSize = vector(), Cutoff = vector())
-
-for (clusterSize in 2:5){
-  try(
-  {treeNameSimulated <- paste(treeName, clusterSize, sep = '_')
-
-
-  inputSimulated <- load_data(simulationInputFolder, treeNameSimulated)
-
-  postSamplingSimulated <- inputSimulated$postSampling
-  nClustersSimulated <- inputSimulated$nClusters
-  ClusterIDSimulated <- inputSimulated$clusterID
-  nCellsSimulated <- inputSimulated$nCells  
-  nMutationsSimulated <- inputSimulated$nMutations
-  nClustersSimulated <- inputSimulated$nClusters
-  alleleCountSimulated <- inputSimulated$alleleCount
-  mutatedReadCountsSimulated <- inputSimulated$mutatedReadCounts
-  totalReadCountsSimulated <- inputSimulated$totalReadCounts
-  sampleDescriptionSimulated <- inputSimulated$sample_description
-  
-  distance <- computeClusterSplits(sampleDescriptionSimulated, postSamplingSimulated, treeNameSimulated, nCellsSimulated,
-                     nMutationsSimulated, nClustersSimulated,
-                     alleleCountSimulated,
-                     mutatedReadCountsSimulated, totalReadCountsSimulated,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c("orchid", "orchid1", "orchid2",
-                                           "orchid3", "orchid4", "darkorchid",
-                                           "darkorchid1","darkorchid2", "darkorchid3",
-                                           "darkorchid4", "purple", "purple1",
-                                           "purple2", "purple3", "purple4"))
-
-  
-
-  plot(ggplot(distance$splittingProbs, aes(x = "Values", y = Splitting_probability, fill = 'Splitting_probability')) +
-    geom_boxplot())
-  cutoffsSplittingProbs <- rbind(cutoffsSplittingProbs, data.frame(clusterSize = clusterSize, Cutoff = mean(distance$splittingProbs$Splitting_probability) + 2 * sd(distance$splittingProbs$Splitting_probability) ))
-  
-  ##Note that the way the aggregatedBranchingProbabilities are computed all pairs of cells from the same cluster are
-  ## taken into account. This has the effect that clusters with more cells would be counted more often and contribute more
-  ## to the shape of the final distribution. This is no problem right now as we only aggregate counts from clusters
-  ## of the same size, it is however the potential source of a future bug!!
-  
-  plot(ggplot(data.frame(x = distance$aggregatedBranchingProbabilities), aes(x = x)) +
-    geom_histogram(binwidth = 0.01))
-  print(data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  cutoffsBranchingProbabilities <- rbind(cutoffsBranchingProbabilities, data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  })
-}
-```
-
-
-
-Get the relevant statistics for each of the clusters of a dataset and output numbers of oligoclonal clusters:
-```{r}
-nTumorClusters <- 0
-nOligoclonalClusters1 <- 0
-nOligoclonalClusters2 <- 0
-splittingSummary1 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-splittingSummary2 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-
-for(clusterSize in 2:5){
-  try({
-    clusterColor <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() == clusterSize) %>%
-    pull(color) %>%
-    unique() 
-    
-    for(color in clusterColor){
-      distance <- computeClusterSplits(sampleDescription, postSampling, treeName, nCells,
-                     nMutations, nClusters,
-                     alleleCount,
-                     mutatedReadCounts, totalReadCounts,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c(color))
-
-      splittingProbs <- mean(distance$splittingProbs$Splitting_probability)
-      branchingProbs <- mean(distance$aggregatedBranchingProbabilities)
-    
-      nTumorClusters <- nTumorClusters + 1
-      oligoclonal <- FALSE
-      print(clusterSize)
-      print(cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])
-      if(splittingProbs > (cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])){
-        nOligoclonalClusters1 <- nOligoclonalClusters1 + 1
-        oligoclonal <- TRUE
-      }
-      splittingSummary1 <- rbind(splittingSummary1, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-      oligoclonal <- FALSE
-      if(branchingProbs > cutoffsBranchingProbabilities[(cutoffsBranchingProbabilities$clusterSize == clusterSize), 2]){
-        nOligoclonalClusters2 <- nOligoclonalClusters2 + 1
-        oligoclonal <- TRUE
-      }
-      splittingSummary2 <- rbind(splittingSummary2, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-    }
-  })
-}
-
-
-numberOfCancerClusters <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() > 1) %>%
-    pull(color) %>%
-    unique() %>% length() 
-
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 1', nOligoclonalClusters1, numberOfCancerClusters, treeName))
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 2', nOligoclonalClusters2, numberOfCancerClusters, treeName))
-print(splittingSummary1)
-print(splittingSummary2)
-```
-
-
-
diff --git a/Rcode/markdowns/Br16_AC.Rmd b/Rcode/markdowns/Br16_AC.Rmd
deleted file mode 100644
index a828586..0000000
--- a/Rcode/markdowns/Br16_AC.Rmd
+++ /dev/null
@@ -1,181 +0,0 @@
----
-title: "Br16_AC"
-author: "Johannes Gawron"
-date: "2024-03-07"
-output: html_document
----
-
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-```
-
-## Splitting statistics
-
-This code analyses splitting statistics for CTC-clusters.
-
-The analysis takes a list of trees sampled from its posterior distribution as input and samples mutations placements for each of the trees.
-
-
-## Configure the script
-```{r config}
-inputFolder <- "../../../input_folder"
-simulationInputFolder <- "../../../simulations/simulations2"
-treeName <- "Br16_AC"
-nTreeSamplingEvents <- 100
-nMutationSamplingEvents <- 100
-```
-
-## Loading data
-```{r load}
-source("../functions.R")
-
-
-input <- load_data(inputFolder, treeName)
-
-postSampling <- input$postSampling
-nClusters <- input$nClusters
-ClusterID <- input$clusterID
-nCells <- input$nCells  
-nMutations <- input$nMutations
-nClusters <- input$nClusters
-alleleCount <- input$alleleCount
-mutatedReadCounts <- input$mutatedReadCounts
-totalReadCounts <- input$totalReadCounts
-sampleDescription <- input$sample_description
-```
-
-
-## Sample description 
-
-Each row corresponds to a cell.
-Column description:
-  - Cluster: An number indicating which sample the cell belongs to.
-  - ClusterName: The name of the sample in the nodeDescription.tsv file
-  - WBC: a binary vector indicating whether the cell is a white blood cell (1) or not (0).
-  - color: Indicates the color of the cluster in the tree, as described in the nodeDescription.tsv
-          file.
-  
-```{r Describe samples}
-print(sampleDescription)
-```
-
-
-
-Get null distributions of relevant statistics, stratified by sample:
-```{r}
-cutoffsSplittingProbs <- data.frame(clusterSize = vector(), Cutoff = vector())
-cutoffsBranchingProbabilities <- data.frame(clusterSize = vector(), Cutoff = vector())
-
-for (clusterSize in 2:5){
-  try(
-  {treeNameSimulated <- paste(treeName, clusterSize, sep = '_')
-
-
-  inputSimulated <- load_data(simulationInputFolder, treeNameSimulated)
-
-  postSamplingSimulated <- inputSimulated$postSampling
-  nClustersSimulated <- inputSimulated$nClusters
-  ClusterIDSimulated <- inputSimulated$clusterID
-  nCellsSimulated <- inputSimulated$nCells  
-  nMutationsSimulated <- inputSimulated$nMutations
-  nClustersSimulated <- inputSimulated$nClusters
-  alleleCountSimulated <- inputSimulated$alleleCount
-  mutatedReadCountsSimulated <- inputSimulated$mutatedReadCounts
-  totalReadCountsSimulated <- inputSimulated$totalReadCounts
-  sampleDescriptionSimulated <- inputSimulated$sample_description
-  
-  distance <- computeClusterSplits(sampleDescriptionSimulated, postSamplingSimulated, treeNameSimulated, nCellsSimulated,
-                     nMutationsSimulated, nClustersSimulated,
-                     alleleCountSimulated,
-                     mutatedReadCountsSimulated, totalReadCountsSimulated,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c("orchid", "orchid1", "orchid2",
-                                           "orchid3", "orchid4", "darkorchid",
-                                           "darkorchid1","darkorchid2", "darkorchid3",
-                                           "darkorchid4", "purple", "purple1",
-                                           "purple2", "purple3", "purple4"))
-
-  
-
-  plot(ggplot(distance$splittingProbs, aes(x = "Values", y = Splitting_probability, fill = 'Splitting_probability')) +
-    geom_boxplot())
-  cutoffsSplittingProbs <- rbind(cutoffsSplittingProbs, data.frame(clusterSize = clusterSize, Cutoff = mean(distance$splittingProbs$Splitting_probability) + 2 * sd(distance$splittingProbs$Splitting_probability) ))
-  
-  ##Note that the way the aggregatedBranchingProbabilities are computed all pairs of cells from the same cluster are
-  ## taken into account. This has the effect that clusters with more cells would be counted more often and contribute more
-  ## to the shape of the final distribution. This is no problem right now as we only aggregate counts from clusters
-  ## of the same size, it is however the potential source of a future bug!!
-  
-  plot(ggplot(data.frame(x = distance$aggregatedBranchingProbabilities), aes(x = x)) +
-    geom_histogram(binwidth = 0.01))
-  print(data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  cutoffsBranchingProbabilities <- rbind(cutoffsBranchingProbabilities, data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  })
-}
-```
-
-
-
-Get the relevant statistics for each of the clusters of a dataset and output numbers of oligoclonal clusters:
-```{r}
-nTumorClusters <- 0
-nOligoclonalClusters1 <- 0
-nOligoclonalClusters2 <- 0
-splittingSummary1 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-splittingSummary2 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-
-for(clusterSize in 2:5){
-  try({
-    clusterColor <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() == clusterSize) %>%
-    pull(color) %>%
-    unique() 
-    
-    for(color in clusterColor){
-      distance <- computeClusterSplits(sampleDescription, postSampling, treeName, nCells,
-                     nMutations, nClusters,
-                     alleleCount,
-                     mutatedReadCounts, totalReadCounts,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c(color))
-
-      splittingProbs <- mean(distance$splittingProbs$Splitting_probability)
-      branchingProbs <- mean(distance$aggregatedBranchingProbabilities)
-    
-      nTumorClusters <- nTumorClusters + 1
-      oligoclonal <- FALSE
-      print(clusterSize)
-      print(cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])
-      if(splittingProbs > (cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])){
-        nOligoclonalClusters1 <- nOligoclonalClusters1 + 1
-        oligoclonal <- TRUE
-      }
-      splittingSummary1 <- rbind(splittingSummary1, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-      oligoclonal <- FALSE
-      if(branchingProbs > cutoffsBranchingProbabilities[(cutoffsBranchingProbabilities$clusterSize == clusterSize), 2]){
-        nOligoclonalClusters2 <- nOligoclonalClusters2 + 1
-        oligoclonal <- TRUE
-      }
-      splittingSummary2 <- rbind(splittingSummary2, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-    }
-  })
-}
-
-
-numberOfCancerClusters <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() > 1) %>%
-    pull(color) %>%
-    unique() %>% length() 
-
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 1', nOligoclonalClusters1, numberOfCancerClusters, treeName))
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 2', nOligoclonalClusters2, numberOfCancerClusters, treeName))
-print(splittingSummary1)
-print(splittingSummary2)
-```
-
-
-
diff --git a/Rcode/markdowns/Br16_AC_max1.Rmd b/Rcode/markdowns/Br16_AC_max1.Rmd
deleted file mode 100644
index 35c4fce..0000000
--- a/Rcode/markdowns/Br16_AC_max1.Rmd
+++ /dev/null
@@ -1,172 +0,0 @@
----
-title: "Br16_AC_max1"
-author: "Johannes Gawron"
-date: "2024-023-04"
-output: html_document
----
-
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-```
-
-## Splitting statistics
-
-This code analyses splitting statistics for CTC-clusters.
-
-The analysis takes a list of trees sampled from its posterior distribution as input and samples mutations placements for each of the trees.
-
-
-## Configure the script
-```{r config}
-inputFolder <- "../../../input_folder"
-simulationInputFolder <- "../../../simulations/simulations2"
-treeName <- "Br16_AC_max1"
-nTreeSamplingEvents <- 100
-nMutationSamplingEvents <- 100
-```
-
-## Loading data
-```{r load}
-source("../functions.R")
-
-
-input <- load_data(inputFolder, treeName)
-
-postSampling <- input$postSampling
-nClusters <- input$nClusters
-ClusterID <- input$clusterID
-nCells <- input$nCells  
-nMutations <- input$nMutations
-nClusters <- input$nClusters
-alleleCount <- input$alleleCount
-mutatedReadCounts <- input$mutatedReadCounts
-totalReadCounts <- input$totalReadCounts
-sampleDescription <- input$sample_description
-```
-
-
-## Sample description 
-
-Each row corresponds to a cell.
-Column description:
-  - Cluster: An number indicating which sample the cell belongs to.
-  - ClusterName: The name of the sample in the nodeDescription.tsv file
-  - WBC: a binary vector indicating whether the cell is a white blood cell (1) or not (0).
-  - color: Indicates the color of the cluster in the tree, as described in the nodeDescription.tsv
-          file.
-  
-```{r Describe samples}
-print(sampleDescription)
-```
-
-
-
-Get null distributions of relevant statistics, stratified by sample:
-```{r}
-cutoffsSplittingProbs <- data.frame(clusterSize = vector(), Cutoff = vector())
-cutoffsBranchingProbabilities <- data.frame(clusterSize = vector(), Cutoff = vector())
-
-for (clusterSize in 2:5){
-  try(
-  {treeNameSimulated <- paste(treeName, clusterSize, sep = '_')
-
-
-  inputSimulated <- load_data(simulationInputFolder, treeNameSimulated)
-
-  postSamplingSimulated <- inputSimulated$postSampling
-  nClustersSimulated <- inputSimulated$nClusters
-  ClusterIDSimulated <- inputSimulated$clusterID
-  nCellsSimulated <- inputSimulated$nCells  
-  nMutationsSimulated <- inputSimulated$nMutations
-  nClustersSimulated <- inputSimulated$nClusters
-  alleleCountSimulated <- inputSimulated$alleleCount
-  mutatedReadCountsSimulated <- inputSimulated$mutatedReadCounts
-  totalReadCountsSimulated <- inputSimulated$totalReadCounts
-  sampleDescriptionSimulated <- inputSimulated$sample_description
-  
-  distance <- computeClusterSplits(sampleDescriptionSimulated, postSamplingSimulated, treeNameSimulated, nCellsSimulated,
-                     nMutationsSimulated, nClustersSimulated,
-                     alleleCountSimulated,
-                     mutatedReadCountsSimulated, totalReadCountsSimulated,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c("orchid", "orchid1", "orchid2",
-                                           "orchid3", "orchid4", "darkorchid",
-                                           "darkorchid1","darkorchid2", "darkorchid3",
-                                           "darkorchid4", "purple", "purple1",
-                                           "purple2", "purple3", "purple4"))
-
-  
-
-  plot(ggplot(distance$splittingProbs, aes(x = "Values", y = Splitting_probability, fill = 'Splitting_probability')) +
-    geom_boxplot())
-  cutoffsSplittingProbs <- rbind(cutoffsSplittingProbs, data.frame(clusterSize = clusterSize, Cutoff = mean(distance$splittingProbs$Splitting_probability) + 2 * sd(distance$splittingProbs$Splitting_probability) ))
-  
-  ##Note that the way the aggregatedBranchingProbabilities are computed all pairs of cells from the same cluster are
-  ## taken into account. This has the effect that clusters with more cells would be counted more often and contribute more
-  ## to the shape of the final distribution. This is no problem right now as we only aggregate counts from clusters
-  ## of the same size, it is however the potential source of a future bug!!
-  
-  plot(ggplot(data.frame(x = distance$aggregatedBranchingProbabilities), aes(x = x)) +
-    geom_histogram(binwidth = 0.01))
-  print(data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  cutoffsBranchingProbabilities <- rbind(cutoffsBranchingProbabilities, data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  })
-}
-```
-
-
-
-Get the relevant statistics for each of the clusters of a dataset and output numbers of oligoclonal clusters:
-```{r}
-nTumorClusters <- 0
-nOligoclonalClusters1 <- 0
-nOligoclonalClusters2 <- 0
-splittingSummary1 <- data.frame(Color = vector(), Oligoclonal = vector())
-splittingSummary2 <- data.frame(Color = vector(), Oligoclonal = vector())
-
-for(clusterSize in 2:5){
-  try({
-    clusterColor <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() == clusterSize) %>%
-    pull(color) %>%
-    unique() 
-    
-    for(color in clusterColor){
-      distance <- computeClusterSplits(sampleDescription, postSampling, treeName, nCells,
-                     nMutations, nClusters,
-                     alleleCount,
-                     mutatedReadCounts, totalReadCounts,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c(color))
-
-      splittingProbs <- mean(distance$splittingProbs$Splitting_probability)
-      branchingProbs <- mean(distance$aggregatedBranchingProbabilities)
-    
-      nTumorClusters <- nTumorClusters + 1
-      oligoclonal <- FALSE
-      print(clusterSize)
-      print(cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])
-      if(splittingProbs > (cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])){
-        nOligoclonalClusters1 <- nOligoclonalClusters1 + 1
-        oligoclonal <- TRUE
-      }
-      if(branchingProbs > cutoffsBranchingProbabilities[(cutoffsBranchingProbabilities$clusterSize == clusterSize), 2]){
-        nOligoclonalClusters2 <- nOligoclonalClusters2 + 1
-        oligoclonal <- TRUE
-      }
-      splittingSummary1 <- rbind(splittingSummary1, data.frame(Color = color, Oligoclonal = oligoclonal))
-      splittingSummary2 <- rbind(splittingSummary2, data.frame(Color = color, Oligoclonal = oligoclonal))
-    }
-  })
-}
-
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 1', nOligoclonalClusters1, nClusters, treeName))
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 2', nOligoclonalClusters2, nClusters, treeName))
-print(splittingSummary1)
-print(splittingSummary2)
-```
-
-
-
diff --git a/Rcode/markdowns/Br16_AC_max2.Rmd b/Rcode/markdowns/Br16_AC_max2.Rmd
deleted file mode 100644
index 329c7f2..0000000
--- a/Rcode/markdowns/Br16_AC_max2.Rmd
+++ /dev/null
@@ -1,180 +0,0 @@
----
-title: "Br16_AC_max2"
-author: "Johannes Gawron"
-date: "2024-03-07"
-output: html_document
----
-
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-```
-
-## Splitting statistics
-
-This code analyses splitting statistics for CTC-clusters.
-
-The analysis takes a list of trees sampled from its posterior distribution as input and samples mutations placements for each of the trees.
-
-
-## Configure the script
-```{r config}
-inputFolder <- "../../../input_folder"
-simulationInputFolder <- "../../../simulations/simulations2"
-treeName <- "Br16_AC_max2"
-nTreeSamplingEvents <- 100
-nMutationSamplingEvents <- 100
-```
-
-## Loading data
-```{r load}
-source("../functions.R")
-
-
-input <- load_data(inputFolder, treeName)
-
-postSampling <- input$postSampling
-nClusters <- input$nClusters
-ClusterID <- input$clusterID
-nCells <- input$nCells  
-nMutations <- input$nMutations
-nClusters <- input$nClusters
-alleleCount <- input$alleleCount
-mutatedReadCounts <- input$mutatedReadCounts
-totalReadCounts <- input$totalReadCounts
-sampleDescription <- input$sample_description
-```
-
-
-## Sample description 
-
-Each row corresponds to a cell.
-Column description:
-  - Cluster: An number indicating which sample the cell belongs to.
-  - ClusterName: The name of the sample in the nodeDescription.tsv file
-  - WBC: a binary vector indicating whether the cell is a white blood cell (1) or not (0).
-  - color: Indicates the color of the cluster in the tree, as described in the nodeDescription.tsv
-          file.
-  
-```{r Describe samples}
-print(sampleDescription)
-```
-
-
-
-Get null distributions of relevant statistics, stratified by sample:
-```{r}
-cutoffsSplittingProbs <- data.frame(clusterSize = vector(), Cutoff = vector())
-cutoffsBranchingProbabilities <- data.frame(clusterSize = vector(), Cutoff = vector())
-
-for (clusterSize in 2:5){
-  try(
-  {treeNameSimulated <- paste(treeName, clusterSize, sep = '_')
-
-
-  inputSimulated <- load_data(simulationInputFolder, treeNameSimulated)
-
-  postSamplingSimulated <- inputSimulated$postSampling
-  nClustersSimulated <- inputSimulated$nClusters
-  ClusterIDSimulated <- inputSimulated$clusterID
-  nCellsSimulated <- inputSimulated$nCells  
-  nMutationsSimulated <- inputSimulated$nMutations
-  nClustersSimulated <- inputSimulated$nClusters
-  alleleCountSimulated <- inputSimulated$alleleCount
-  mutatedReadCountsSimulated <- inputSimulated$mutatedReadCounts
-  totalReadCountsSimulated <- inputSimulated$totalReadCounts
-  sampleDescriptionSimulated <- inputSimulated$sample_description
-  
-  distance <- computeClusterSplits(sampleDescriptionSimulated, postSamplingSimulated, treeNameSimulated, nCellsSimulated,
-                     nMutationsSimulated, nClustersSimulated,
-                     alleleCountSimulated,
-                     mutatedReadCountsSimulated, totalReadCountsSimulated,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c("orchid", "orchid1", "orchid2",
-                                           "orchid3", "orchid4", "darkorchid",
-                                           "darkorchid1","darkorchid2", "darkorchid3",
-                                           "darkorchid4", "purple", "purple1",
-                                           "purple2", "purple3", "purple4"))
-
-  
-
-  plot(ggplot(distance$splittingProbs, aes(x = "Values", y = Splitting_probability, fill = 'Splitting_probability')) +
-    geom_boxplot())
-  cutoffsSplittingProbs <- rbind(cutoffsSplittingProbs, data.frame(clusterSize = clusterSize, Cutoff = mean(distance$splittingProbs$Splitting_probability) + 2 * sd(distance$splittingProbs$Splitting_probability) ))
-  
-  ##Note that the way the aggregatedBranchingProbabilities are computed all pairs of cells from the same cluster are
-  ## taken into account. This has the effect that clusters with more cells would be counted more often and contribute more
-  ## to the shape of the final distribution. This is no problem right now as we only aggregate counts from clusters
-  ## of the same size, it is however the potential source of a future bug!!
-  
-  plot(ggplot(data.frame(x = distance$aggregatedBranchingProbabilities), aes(x = x)) +
-    geom_histogram(binwidth = 0.01))
-  print(data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  cutoffsBranchingProbabilities <- rbind(cutoffsBranchingProbabilities, data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  })
-}
-```
-
-
-
-Get the relevant statistics for each of the clusters of a dataset and output numbers of oligoclonal clusters:
-```{r}
-nTumorClusters <- 0
-nOligoclonalClusters1 <- 0
-nOligoclonalClusters2 <- 0
-splittingSummary1 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-splittingSummary2 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-
-for(clusterSize in 2:5){
-  try({
-    clusterColor <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() == clusterSize) %>%
-    pull(color) %>%
-    unique() 
-    
-    for(color in clusterColor){
-      distance <- computeClusterSplits(sampleDescription, postSampling, treeName, nCells,
-                     nMutations, nClusters,
-                     alleleCount,
-                     mutatedReadCounts, totalReadCounts,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c(color))
-
-      splittingProbs <- mean(distance$splittingProbs$Splitting_probability)
-      branchingProbs <- mean(distance$aggregatedBranchingProbabilities)
-    
-      nTumorClusters <- nTumorClusters + 1
-      oligoclonal <- FALSE
-      print(clusterSize)
-      print(cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])
-      if(splittingProbs > (cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])){
-        nOligoclonalClusters1 <- nOligoclonalClusters1 + 1
-        oligoclonal <- TRUE
-      }
-      if(branchingProbs > cutoffsBranchingProbabilities[(cutoffsBranchingProbabilities$clusterSize == clusterSize), 2]){
-        nOligoclonalClusters2 <- nOligoclonalClusters2 + 1
-        oligoclonal <- TRUE
-      }
-      splittingSummary1 <- rbind(splittingSummary1, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-      splittingSummary2 <- rbind(splittingSummary2, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-    }
-  })
-}
-
-
-numberOfCancerClusters <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() > 1) %>%
-    pull(color) %>%
-    unique() %>% length() 
-
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 1', nOligoclonalClusters1, numberOfCancerClusters, treeName))
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 2', nOligoclonalClusters2, numberOfCancerClusters, treeName))
-print(splittingSummary1)
-print(splittingSummary2)
-```
-
-
-
diff --git a/Rcode/markdowns/Br16_AC_max3.Rmd b/Rcode/markdowns/Br16_AC_max3.Rmd
deleted file mode 100644
index 3ddb701..0000000
--- a/Rcode/markdowns/Br16_AC_max3.Rmd
+++ /dev/null
@@ -1,180 +0,0 @@
----
-title: "Br16_AC_max3"
-author: "Johannes Gawron"
-date: "2024-03-07"
-output: html_document
----
-
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-```
-
-## Splitting statistics
-
-This code analyses splitting statistics for CTC-clusters.
-
-The analysis takes a list of trees sampled from its posterior distribution as input and samples mutations placements for each of the trees.
-
-
-## Configure the script
-```{r config}
-inputFolder <- "../../../input_folder"
-simulationInputFolder <- "../../../simulations/simulations2"
-treeName <- "Br16_AC_max3"
-nTreeSamplingEvents <- 100
-nMutationSamplingEvents <- 100
-```
-
-## Loading data
-```{r load}
-source("../functions.R")
-
-
-input <- load_data(inputFolder, treeName)
-
-postSampling <- input$postSampling
-nClusters <- input$nClusters
-ClusterID <- input$clusterID
-nCells <- input$nCells  
-nMutations <- input$nMutations
-nClusters <- input$nClusters
-alleleCount <- input$alleleCount
-mutatedReadCounts <- input$mutatedReadCounts
-totalReadCounts <- input$totalReadCounts
-sampleDescription <- input$sample_description
-```
-
-
-## Sample description 
-
-Each row corresponds to a cell.
-Column description:
-  - Cluster: An number indicating which sample the cell belongs to.
-  - ClusterName: The name of the sample in the nodeDescription.tsv file
-  - WBC: a binary vector indicating whether the cell is a white blood cell (1) or not (0).
-  - color: Indicates the color of the cluster in the tree, as described in the nodeDescription.tsv
-          file.
-  
-```{r Describe samples}
-print(sampleDescription)
-```
-
-
-
-Get null distributions of relevant statistics, stratified by sample:
-```{r}
-cutoffsSplittingProbs <- data.frame(clusterSize = vector(), Cutoff = vector())
-cutoffsBranchingProbabilities <- data.frame(clusterSize = vector(), Cutoff = vector())
-
-for (clusterSize in 2:5){
-  try(
-  {treeNameSimulated <- paste(treeName, clusterSize, sep = '_')
-
-
-  inputSimulated <- load_data(simulationInputFolder, treeNameSimulated)
-
-  postSamplingSimulated <- inputSimulated$postSampling
-  nClustersSimulated <- inputSimulated$nClusters
-  ClusterIDSimulated <- inputSimulated$clusterID
-  nCellsSimulated <- inputSimulated$nCells  
-  nMutationsSimulated <- inputSimulated$nMutations
-  nClustersSimulated <- inputSimulated$nClusters
-  alleleCountSimulated <- inputSimulated$alleleCount
-  mutatedReadCountsSimulated <- inputSimulated$mutatedReadCounts
-  totalReadCountsSimulated <- inputSimulated$totalReadCounts
-  sampleDescriptionSimulated <- inputSimulated$sample_description
-  
-  distance <- computeClusterSplits(sampleDescriptionSimulated, postSamplingSimulated, treeNameSimulated, nCellsSimulated,
-                     nMutationsSimulated, nClustersSimulated,
-                     alleleCountSimulated,
-                     mutatedReadCountsSimulated, totalReadCountsSimulated,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c("orchid", "orchid1", "orchid2",
-                                           "orchid3", "orchid4", "darkorchid",
-                                           "darkorchid1","darkorchid2", "darkorchid3",
-                                           "darkorchid4", "purple", "purple1",
-                                           "purple2", "purple3", "purple4"))
-
-  
-
-  plot(ggplot(distance$splittingProbs, aes(x = "Values", y = Splitting_probability, fill = 'Splitting_probability')) +
-    geom_boxplot())
-  cutoffsSplittingProbs <- rbind(cutoffsSplittingProbs, data.frame(clusterSize = clusterSize, Cutoff = mean(distance$splittingProbs$Splitting_probability) + 2 * sd(distance$splittingProbs$Splitting_probability) ))
-  
-  ##Note that the way the aggregatedBranchingProbabilities are computed all pairs of cells from the same cluster are
-  ## taken into account. This has the effect that clusters with more cells would be counted more often and contribute more
-  ## to the shape of the final distribution. This is no problem right now as we only aggregate counts from clusters
-  ## of the same size, it is however the potential source of a future bug!!
-  
-  plot(ggplot(data.frame(x = distance$aggregatedBranchingProbabilities), aes(x = x)) +
-    geom_histogram(binwidth = 0.01))
-  print(data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  cutoffsBranchingProbabilities <- rbind(cutoffsBranchingProbabilities, data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  })
-}
-```
-
-
-
-Get the relevant statistics for each of the clusters of a dataset and output numbers of oligoclonal clusters:
-```{r}
-nTumorClusters <- 0
-nOligoclonalClusters1 <- 0
-nOligoclonalClusters2 <- 0
-splittingSummary1 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-splittingSummary2 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-
-for(clusterSize in 2:5){
-  try({
-    clusterColor <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() == clusterSize) %>%
-    pull(color) %>%
-    unique() 
-    
-    for(color in clusterColor){
-      distance <- computeClusterSplits(sampleDescription, postSampling, treeName, nCells,
-                     nMutations, nClusters,
-                     alleleCount,
-                     mutatedReadCounts, totalReadCounts,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c(color))
-
-      splittingProbs <- mean(distance$splittingProbs$Splitting_probability)
-      branchingProbs <- mean(distance$aggregatedBranchingProbabilities)
-    
-      nTumorClusters <- nTumorClusters + 1
-      oligoclonal <- FALSE
-      print(clusterSize)
-      print(cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])
-      if(splittingProbs > (cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])){
-        nOligoclonalClusters1 <- nOligoclonalClusters1 + 1
-        oligoclonal <- TRUE
-      }
-      if(branchingProbs > cutoffsBranchingProbabilities[(cutoffsBranchingProbabilities$clusterSize == clusterSize), 2]){
-        nOligoclonalClusters2 <- nOligoclonalClusters2 + 1
-        oligoclonal <- TRUE
-      }
-      splittingSummary1 <- rbind(splittingSummary1, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-      splittingSummary2 <- rbind(splittingSummary2, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-    }
-  })
-}
-
-
-numberOfCancerClusters <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() > 1) %>%
-    pull(color) %>%
-    unique() %>% length() 
-
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 1', nOligoclonalClusters1, numberOfCancerClusters, treeName))
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 2', nOligoclonalClusters2, numberOfCancerClusters, treeName))
-print(splittingSummary1)
-print(splittingSummary2)
-```
-
-
-
diff --git a/Rcode/markdowns/Br16_AC_max4.Rmd b/Rcode/markdowns/Br16_AC_max4.Rmd
deleted file mode 100644
index 5103cd3..0000000
--- a/Rcode/markdowns/Br16_AC_max4.Rmd
+++ /dev/null
@@ -1,180 +0,0 @@
----
-title: "Br16_AC_max4"
-author: "Johannes Gawron"
-date: "2024-03-07"
-output: html_document
----
-
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-```
-
-## Splitting statistics
-
-This code analyses splitting statistics for CTC-clusters.
-
-The analysis takes a list of trees sampled from its posterior distribution as input and samples mutations placements for each of the trees.
-
-
-## Configure the script
-```{r config}
-inputFolder <- "../../../input_folder"
-simulationInputFolder <- "../../../simulations/simulations2"
-treeName <- "Br16_AC_max4"
-nTreeSamplingEvents <- 100
-nMutationSamplingEvents <- 100
-```
-
-## Loading data
-```{r load}
-source("../functions.R")
-
-
-input <- load_data(inputFolder, treeName)
-
-postSampling <- input$postSampling
-nClusters <- input$nClusters
-ClusterID <- input$clusterID
-nCells <- input$nCells  
-nMutations <- input$nMutations
-nClusters <- input$nClusters
-alleleCount <- input$alleleCount
-mutatedReadCounts <- input$mutatedReadCounts
-totalReadCounts <- input$totalReadCounts
-sampleDescription <- input$sample_description
-```
-
-
-## Sample description 
-
-Each row corresponds to a cell.
-Column description:
-  - Cluster: An number indicating which sample the cell belongs to.
-  - ClusterName: The name of the sample in the nodeDescription.tsv file
-  - WBC: a binary vector indicating whether the cell is a white blood cell (1) or not (0).
-  - color: Indicates the color of the cluster in the tree, as described in the nodeDescription.tsv
-          file.
-  
-```{r Describe samples}
-print(sampleDescription)
-```
-
-
-
-Get null distributions of relevant statistics, stratified by sample:
-```{r}
-cutoffsSplittingProbs <- data.frame(clusterSize = vector(), Cutoff = vector())
-cutoffsBranchingProbabilities <- data.frame(clusterSize = vector(), Cutoff = vector())
-
-for (clusterSize in 2:5){
-  try(
-  {treeNameSimulated <- paste(treeName, clusterSize, sep = '_')
-
-
-  inputSimulated <- load_data(simulationInputFolder, treeNameSimulated)
-
-  postSamplingSimulated <- inputSimulated$postSampling
-  nClustersSimulated <- inputSimulated$nClusters
-  ClusterIDSimulated <- inputSimulated$clusterID
-  nCellsSimulated <- inputSimulated$nCells  
-  nMutationsSimulated <- inputSimulated$nMutations
-  nClustersSimulated <- inputSimulated$nClusters
-  alleleCountSimulated <- inputSimulated$alleleCount
-  mutatedReadCountsSimulated <- inputSimulated$mutatedReadCounts
-  totalReadCountsSimulated <- inputSimulated$totalReadCounts
-  sampleDescriptionSimulated <- inputSimulated$sample_description
-  
-  distance <- computeClusterSplits(sampleDescriptionSimulated, postSamplingSimulated, treeNameSimulated, nCellsSimulated,
-                     nMutationsSimulated, nClustersSimulated,
-                     alleleCountSimulated,
-                     mutatedReadCountsSimulated, totalReadCountsSimulated,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c("orchid", "orchid1", "orchid2",
-                                           "orchid3", "orchid4", "darkorchid",
-                                           "darkorchid1","darkorchid2", "darkorchid3",
-                                           "darkorchid4", "purple", "purple1",
-                                           "purple2", "purple3", "purple4"))
-
-  
-
-  plot(ggplot(distance$splittingProbs, aes(x = "Values", y = Splitting_probability, fill = 'Splitting_probability')) +
-    geom_boxplot())
-  cutoffsSplittingProbs <- rbind(cutoffsSplittingProbs, data.frame(clusterSize = clusterSize, Cutoff = mean(distance$splittingProbs$Splitting_probability) + 2 * sd(distance$splittingProbs$Splitting_probability) ))
-  
-  ##Note that the way the aggregatedBranchingProbabilities are computed all pairs of cells from the same cluster are
-  ## taken into account. This has the effect that clusters with more cells would be counted more often and contribute more
-  ## to the shape of the final distribution. This is no problem right now as we only aggregate counts from clusters
-  ## of the same size, it is however the potential source of a future bug!!
-  
-  plot(ggplot(data.frame(x = distance$aggregatedBranchingProbabilities), aes(x = x)) +
-    geom_histogram(binwidth = 0.01))
-  print(data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  cutoffsBranchingProbabilities <- rbind(cutoffsBranchingProbabilities, data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  })
-}
-```
-
-
-
-Get the relevant statistics for each of the clusters of a dataset and output numbers of oligoclonal clusters:
-```{r}
-nTumorClusters <- 0
-nOligoclonalClusters1 <- 0
-nOligoclonalClusters2 <- 0
-splittingSummary1 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-splittingSummary2 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-
-for(clusterSize in 2:5){
-  try({
-    clusterColor <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() == clusterSize) %>%
-    pull(color) %>%
-    unique() 
-    
-    for(color in clusterColor){
-      distance <- computeClusterSplits(sampleDescription, postSampling, treeName, nCells,
-                     nMutations, nClusters,
-                     alleleCount,
-                     mutatedReadCounts, totalReadCounts,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c(color))
-
-      splittingProbs <- mean(distance$splittingProbs$Splitting_probability)
-      branchingProbs <- mean(distance$aggregatedBranchingProbabilities)
-    
-      nTumorClusters <- nTumorClusters + 1
-      oligoclonal <- FALSE
-      print(clusterSize)
-      print(cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])
-      if(splittingProbs > (cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])){
-        nOligoclonalClusters1 <- nOligoclonalClusters1 + 1
-        oligoclonal <- TRUE
-      }
-      if(branchingProbs > cutoffsBranchingProbabilities[(cutoffsBranchingProbabilities$clusterSize == clusterSize), 2]){
-        nOligoclonalClusters2 <- nOligoclonalClusters2 + 1
-        oligoclonal <- TRUE
-      }
-      splittingSummary1 <- rbind(splittingSummary1, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-      splittingSummary2 <- rbind(splittingSummary2, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-    }
-  })
-}
-
-
-numberOfCancerClusters <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() > 1) %>%
-    pull(color) %>%
-    unique() %>% length() 
-
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 1', nOligoclonalClusters1, numberOfCancerClusters, treeName))
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 2', nOligoclonalClusters2, numberOfCancerClusters, treeName))
-print(splittingSummary1)
-print(splittingSummary2)
-```
-
-
-
diff --git a/Rcode/markdowns/Br16_B.Rmd b/Rcode/markdowns/Br16_B.Rmd
deleted file mode 100644
index 7923d92..0000000
--- a/Rcode/markdowns/Br16_B.Rmd
+++ /dev/null
@@ -1,181 +0,0 @@
----
-title: "Br16_B"
-author: "Johannes Gawron"
-date: "2024-03-07"
-output: html_document
----
-
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-```
-
-## Splitting statistics
-
-This code analyses splitting statistics for CTC-clusters.
-
-The analysis takes a list of trees sampled from its posterior distribution as input and samples mutations placements for each of the trees.
-
-
-## Configure the script
-```{r config}
-inputFolder <- "../../../input_folder"
-simulationInputFolder <- "../../../simulations/simulations2"
-treeName <- "Br16_B"
-nTreeSamplingEvents <- 100
-nMutationSamplingEvents <- 100
-```
-
-## Loading data
-```{r load}
-source("../functions.R")
-
-
-input <- load_data(inputFolder, treeName)
-
-postSampling <- input$postSampling
-nClusters <- input$nClusters
-ClusterID <- input$clusterID
-nCells <- input$nCells  
-nMutations <- input$nMutations
-nClusters <- input$nClusters
-alleleCount <- input$alleleCount
-mutatedReadCounts <- input$mutatedReadCounts
-totalReadCounts <- input$totalReadCounts
-sampleDescription <- input$sample_description
-```
-
-
-## Sample description 
-
-Each row corresponds to a cell.
-Column description:
-  - Cluster: An number indicating which sample the cell belongs to.
-  - ClusterName: The name of the sample in the nodeDescription.tsv file
-  - WBC: a binary vector indicating whether the cell is a white blood cell (1) or not (0).
-  - color: Indicates the color of the cluster in the tree, as described in the nodeDescription.tsv
-          file.
-  
-```{r Describe samples}
-print(sampleDescription)
-```
-
-
-
-Get null distributions of relevant statistics, stratified by sample:
-```{r}
-cutoffsSplittingProbs <- data.frame(clusterSize = vector(), Cutoff = vector())
-cutoffsBranchingProbabilities <- data.frame(clusterSize = vector(), Cutoff = vector())
-
-for (clusterSize in 2:5){
-  try(
-  {treeNameSimulated <- paste(treeName, clusterSize, sep = '_')
-
-
-  inputSimulated <- load_data(simulationInputFolder, treeNameSimulated)
-
-  postSamplingSimulated <- inputSimulated$postSampling
-  nClustersSimulated <- inputSimulated$nClusters
-  ClusterIDSimulated <- inputSimulated$clusterID
-  nCellsSimulated <- inputSimulated$nCells  
-  nMutationsSimulated <- inputSimulated$nMutations
-  nClustersSimulated <- inputSimulated$nClusters
-  alleleCountSimulated <- inputSimulated$alleleCount
-  mutatedReadCountsSimulated <- inputSimulated$mutatedReadCounts
-  totalReadCountsSimulated <- inputSimulated$totalReadCounts
-  sampleDescriptionSimulated <- inputSimulated$sample_description
-  
-  distance <- computeClusterSplits(sampleDescriptionSimulated, postSamplingSimulated, treeNameSimulated, nCellsSimulated,
-                     nMutationsSimulated, nClustersSimulated,
-                     alleleCountSimulated,
-                     mutatedReadCountsSimulated, totalReadCountsSimulated,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c("orchid", "orchid1", "orchid2",
-                                           "orchid3", "orchid4", "darkorchid",
-                                           "darkorchid1","darkorchid2", "darkorchid3",
-                                           "darkorchid4", "purple", "purple1",
-                                           "purple2", "purple3", "purple4"))
-
-  
-
-  plot(ggplot(distance$splittingProbs, aes(x = "Values", y = Splitting_probability, fill = 'Splitting_probability')) +
-    geom_boxplot())
-  cutoffsSplittingProbs <- rbind(cutoffsSplittingProbs, data.frame(clusterSize = clusterSize, Cutoff = mean(distance$splittingProbs$Splitting_probability) + 2 * sd(distance$splittingProbs$Splitting_probability) ))
-  
-  ##Note that the way the aggregatedBranchingProbabilities are computed all pairs of cells from the same cluster are
-  ## taken into account. This has the effect that clusters with more cells would be counted more often and contribute more
-  ## to the shape of the final distribution. This is no problem right now as we only aggregate counts from clusters
-  ## of the same size, it is however the potential source of a future bug!!
-  
-  plot(ggplot(data.frame(x = distance$aggregatedBranchingProbabilities), aes(x = x)) +
-    geom_histogram(binwidth = 0.01))
-  print(data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  cutoffsBranchingProbabilities <- rbind(cutoffsBranchingProbabilities, data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  })
-}
-```
-
-
-
-Get the relevant statistics for each of the clusters of a dataset and output numbers of oligoclonal clusters:
-```{r}
-nTumorClusters <- 0
-nOligoclonalClusters1 <- 0
-nOligoclonalClusters2 <- 0
-splittingSummary1 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-splittingSummary2 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-
-for(clusterSize in 2:5){
-  try({
-    clusterColor <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() == clusterSize) %>%
-    pull(color) %>%
-    unique() 
-    
-    for(color in clusterColor){
-      distance <- computeClusterSplits(sampleDescription, postSampling, treeName, nCells,
-                     nMutations, nClusters,
-                     alleleCount,
-                     mutatedReadCounts, totalReadCounts,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c(color))
-
-      splittingProbs <- mean(distance$splittingProbs$Splitting_probability)
-      branchingProbs <- mean(distance$aggregatedBranchingProbabilities)
-    
-      nTumorClusters <- nTumorClusters + 1
-      oligoclonal <- FALSE
-      print(clusterSize)
-      print(cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])
-      if(splittingProbs > (cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])){
-        nOligoclonalClusters1 <- nOligoclonalClusters1 + 1
-        oligoclonal <- TRUE
-      }
-      splittingSummary1 <- rbind(splittingSummary1, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-      oligoclonal <- FALSE
-      if(branchingProbs > cutoffsBranchingProbabilities[(cutoffsBranchingProbabilities$clusterSize == clusterSize), 2]){
-        nOligoclonalClusters2 <- nOligoclonalClusters2 + 1
-        oligoclonal <- TRUE
-      }
-      splittingSummary2 <- rbind(splittingSummary2, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-    }
-  })
-}
-
-
-numberOfCancerClusters <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() > 1) %>%
-    pull(color) %>%
-    unique() %>% length() 
-
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 1', nOligoclonalClusters1, numberOfCancerClusters, treeName))
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 2', nOligoclonalClusters2, numberOfCancerClusters, treeName))
-print(splittingSummary1)
-print(splittingSummary2)
-```
-
-
-
diff --git a/Rcode/markdowns/Br16_B_max1.Rmd b/Rcode/markdowns/Br16_B_max1.Rmd
deleted file mode 100644
index 7172010..0000000
--- a/Rcode/markdowns/Br16_B_max1.Rmd
+++ /dev/null
@@ -1,172 +0,0 @@
----
-title: "Br16_B_max1"
-author: "Johannes Gawron"
-date: "2024-023-04"
-output: html_document
----
-
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-```
-
-## Splitting statistics
-
-This code analyses splitting statistics for CTC-clusters.
-
-The analysis takes a list of trees sampled from its posterior distribution as input and samples mutations placements for each of the trees.
-
-
-## Configure the script
-```{r config}
-inputFolder <- "../../../input_folder"
-simulationInputFolder <- "../../../simulations/simulations2"
-treeName <- "Br16_B_max1"
-nTreeSamplingEvents <- 100
-nMutationSamplingEvents <- 100
-```
-
-## Loading data
-```{r load}
-source("../functions.R")
-
-
-input <- load_data(inputFolder, treeName)
-
-postSampling <- input$postSampling
-nClusters <- input$nClusters
-ClusterID <- input$clusterID
-nCells <- input$nCells  
-nMutations <- input$nMutations
-nClusters <- input$nClusters
-alleleCount <- input$alleleCount
-mutatedReadCounts <- input$mutatedReadCounts
-totalReadCounts <- input$totalReadCounts
-sampleDescription <- input$sample_description
-```
-
-
-## Sample description 
-
-Each row corresponds to a cell.
-Column description:
-  - Cluster: An number indicating which sample the cell belongs to.
-  - ClusterName: The name of the sample in the nodeDescription.tsv file
-  - WBC: a binary vector indicating whether the cell is a white blood cell (1) or not (0).
-  - color: Indicates the color of the cluster in the tree, as described in the nodeDescription.tsv
-          file.
-  
-```{r Describe samples}
-print(sampleDescription)
-```
-
-
-
-Get null distributions of relevant statistics, stratified by sample:
-```{r}
-cutoffsSplittingProbs <- data.frame(clusterSize = vector(), Cutoff = vector())
-cutoffsBranchingProbabilities <- data.frame(clusterSize = vector(), Cutoff = vector())
-
-for (clusterSize in 2:5){
-  try(
-  {treeNameSimulated <- paste(treeName, clusterSize, sep = '_')
-
-
-  inputSimulated <- load_data(simulationInputFolder, treeNameSimulated)
-
-  postSamplingSimulated <- inputSimulated$postSampling
-  nClustersSimulated <- inputSimulated$nClusters
-  ClusterIDSimulated <- inputSimulated$clusterID
-  nCellsSimulated <- inputSimulated$nCells  
-  nMutationsSimulated <- inputSimulated$nMutations
-  nClustersSimulated <- inputSimulated$nClusters
-  alleleCountSimulated <- inputSimulated$alleleCount
-  mutatedReadCountsSimulated <- inputSimulated$mutatedReadCounts
-  totalReadCountsSimulated <- inputSimulated$totalReadCounts
-  sampleDescriptionSimulated <- inputSimulated$sample_description
-  
-  distance <- computeClusterSplits(sampleDescriptionSimulated, postSamplingSimulated, treeNameSimulated, nCellsSimulated,
-                     nMutationsSimulated, nClustersSimulated,
-                     alleleCountSimulated,
-                     mutatedReadCountsSimulated, totalReadCountsSimulated,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c("orchid", "orchid1", "orchid2",
-                                           "orchid3", "orchid4", "darkorchid",
-                                           "darkorchid1","darkorchid2", "darkorchid3",
-                                           "darkorchid4", "purple", "purple1",
-                                           "purple2", "purple3", "purple4"))
-
-  
-
-  plot(ggplot(distance$splittingProbs, aes(x = "Values", y = Splitting_probability, fill = 'Splitting_probability')) +
-    geom_boxplot())
-  cutoffsSplittingProbs <- rbind(cutoffsSplittingProbs, data.frame(clusterSize = clusterSize, Cutoff = mean(distance$splittingProbs$Splitting_probability) + 2 * sd(distance$splittingProbs$Splitting_probability) ))
-  
-  ##Note that the way the aggregatedBranchingProbabilities are computed all pairs of cells from the same cluster are
-  ## taken into account. This has the effect that clusters with more cells would be counted more often and contribute more
-  ## to the shape of the final distribution. This is no problem right now as we only aggregate counts from clusters
-  ## of the same size, it is however the potential source of a future bug!!
-  
-  plot(ggplot(data.frame(x = distance$aggregatedBranchingProbabilities), aes(x = x)) +
-    geom_histogram(binwidth = 0.01))
-  print(data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  cutoffsBranchingProbabilities <- rbind(cutoffsBranchingProbabilities, data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  })
-}
-```
-
-
-
-Get the relevant statistics for each of the clusters of a dataset and output numbers of oligoclonal clusters:
-```{r}
-nTumorClusters <- 0
-nOligoclonalClusters1 <- 0
-nOligoclonalClusters2 <- 0
-splittingSummary1 <- data.frame(Color = vector(), Oligoclonal = vector())
-splittingSummary2 <- data.frame(Color = vector(), Oligoclonal = vector())
-
-for(clusterSize in 2:5){
-  try({
-    clusterColor <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() == clusterSize) %>%
-    pull(color) %>%
-    unique() 
-    
-    for(color in clusterColor){
-      distance <- computeClusterSplits(sampleDescription, postSampling, treeName, nCells,
-                     nMutations, nClusters,
-                     alleleCount,
-                     mutatedReadCounts, totalReadCounts,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c(color))
-
-      splittingProbs <- mean(distance$splittingProbs$Splitting_probability)
-      branchingProbs <- mean(distance$aggregatedBranchingProbabilities)
-    
-      nTumorClusters <- nTumorClusters + 1
-      oligoclonal <- FALSE
-      print(clusterSize)
-      print(cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])
-      if(splittingProbs > (cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])){
-        nOligoclonalClusters1 <- nOligoclonalClusters1 + 1
-        oligoclonal <- TRUE
-      }
-      if(branchingProbs > cutoffsBranchingProbabilities[(cutoffsBranchingProbabilities$clusterSize == clusterSize), 2]){
-        nOligoclonalClusters2 <- nOligoclonalClusters2 + 1
-        oligoclonal <- TRUE
-      }
-      splittingSummary1 <- rbind(splittingSummary1, data.frame(Color = color, Oligoclonal = oligoclonal))
-      splittingSummary2 <- rbind(splittingSummary2, data.frame(Color = color, Oligoclonal = oligoclonal))
-    }
-  })
-}
-
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 1', nOligoclonalClusters1, nClusters, treeName))
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 2', nOligoclonalClusters2, nClusters, treeName))
-print(splittingSummary1)
-print(splittingSummary2)
-```
-
-
-
diff --git a/Rcode/markdowns/Br16_B_max2.Rmd b/Rcode/markdowns/Br16_B_max2.Rmd
deleted file mode 100644
index 9000389..0000000
--- a/Rcode/markdowns/Br16_B_max2.Rmd
+++ /dev/null
@@ -1,180 +0,0 @@
----
-title: "Br16_B_max2"
-author: "Johannes Gawron"
-date: "2024-03-07"
-output: html_document
----
-
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-```
-
-## Splitting statistics
-
-This code analyses splitting statistics for CTC-clusters.
-
-The analysis takes a list of trees sampled from its posterior distribution as input and samples mutations placements for each of the trees.
-
-
-## Configure the script
-```{r config}
-inputFolder <- "../../../input_folder"
-simulationInputFolder <- "../../../simulations/simulations2"
-treeName <- "Br16_B_max2"
-nTreeSamplingEvents <- 100
-nMutationSamplingEvents <- 100
-```
-
-## Loading data
-```{r load}
-source("../functions.R")
-
-
-input <- load_data(inputFolder, treeName)
-
-postSampling <- input$postSampling
-nClusters <- input$nClusters
-ClusterID <- input$clusterID
-nCells <- input$nCells  
-nMutations <- input$nMutations
-nClusters <- input$nClusters
-alleleCount <- input$alleleCount
-mutatedReadCounts <- input$mutatedReadCounts
-totalReadCounts <- input$totalReadCounts
-sampleDescription <- input$sample_description
-```
-
-
-## Sample description 
-
-Each row corresponds to a cell.
-Column description:
-  - Cluster: An number indicating which sample the cell belongs to.
-  - ClusterName: The name of the sample in the nodeDescription.tsv file
-  - WBC: a binary vector indicating whether the cell is a white blood cell (1) or not (0).
-  - color: Indicates the color of the cluster in the tree, as described in the nodeDescription.tsv
-          file.
-  
-```{r Describe samples}
-print(sampleDescription)
-```
-
-
-
-Get null distributions of relevant statistics, stratified by sample:
-```{r}
-cutoffsSplittingProbs <- data.frame(clusterSize = vector(), Cutoff = vector())
-cutoffsBranchingProbabilities <- data.frame(clusterSize = vector(), Cutoff = vector())
-
-for (clusterSize in 2:5){
-  try(
-  {treeNameSimulated <- paste(treeName, clusterSize, sep = '_')
-
-
-  inputSimulated <- load_data(simulationInputFolder, treeNameSimulated)
-
-  postSamplingSimulated <- inputSimulated$postSampling
-  nClustersSimulated <- inputSimulated$nClusters
-  ClusterIDSimulated <- inputSimulated$clusterID
-  nCellsSimulated <- inputSimulated$nCells  
-  nMutationsSimulated <- inputSimulated$nMutations
-  nClustersSimulated <- inputSimulated$nClusters
-  alleleCountSimulated <- inputSimulated$alleleCount
-  mutatedReadCountsSimulated <- inputSimulated$mutatedReadCounts
-  totalReadCountsSimulated <- inputSimulated$totalReadCounts
-  sampleDescriptionSimulated <- inputSimulated$sample_description
-  
-  distance <- computeClusterSplits(sampleDescriptionSimulated, postSamplingSimulated, treeNameSimulated, nCellsSimulated,
-                     nMutationsSimulated, nClustersSimulated,
-                     alleleCountSimulated,
-                     mutatedReadCountsSimulated, totalReadCountsSimulated,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c("orchid", "orchid1", "orchid2",
-                                           "orchid3", "orchid4", "darkorchid",
-                                           "darkorchid1","darkorchid2", "darkorchid3",
-                                           "darkorchid4", "purple", "purple1",
-                                           "purple2", "purple3", "purple4"))
-
-  
-
-  plot(ggplot(distance$splittingProbs, aes(x = "Values", y = Splitting_probability, fill = 'Splitting_probability')) +
-    geom_boxplot())
-  cutoffsSplittingProbs <- rbind(cutoffsSplittingProbs, data.frame(clusterSize = clusterSize, Cutoff = mean(distance$splittingProbs$Splitting_probability) + 2 * sd(distance$splittingProbs$Splitting_probability) ))
-  
-  ##Note that the way the aggregatedBranchingProbabilities are computed all pairs of cells from the same cluster are
-  ## taken into account. This has the effect that clusters with more cells would be counted more often and contribute more
-  ## to the shape of the final distribution. This is no problem right now as we only aggregate counts from clusters
-  ## of the same size, it is however the potential source of a future bug!!
-  
-  plot(ggplot(data.frame(x = distance$aggregatedBranchingProbabilities), aes(x = x)) +
-    geom_histogram(binwidth = 0.01))
-  print(data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  cutoffsBranchingProbabilities <- rbind(cutoffsBranchingProbabilities, data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  })
-}
-```
-
-
-
-Get the relevant statistics for each of the clusters of a dataset and output numbers of oligoclonal clusters:
-```{r}
-nTumorClusters <- 0
-nOligoclonalClusters1 <- 0
-nOligoclonalClusters2 <- 0
-splittingSummary1 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-splittingSummary2 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-
-for(clusterSize in 2:5){
-  try({
-    clusterColor <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() == clusterSize) %>%
-    pull(color) %>%
-    unique() 
-    
-    for(color in clusterColor){
-      distance <- computeClusterSplits(sampleDescription, postSampling, treeName, nCells,
-                     nMutations, nClusters,
-                     alleleCount,
-                     mutatedReadCounts, totalReadCounts,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c(color))
-
-      splittingProbs <- mean(distance$splittingProbs$Splitting_probability)
-      branchingProbs <- mean(distance$aggregatedBranchingProbabilities)
-    
-      nTumorClusters <- nTumorClusters + 1
-      oligoclonal <- FALSE
-      print(clusterSize)
-      print(cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])
-      if(splittingProbs > (cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])){
-        nOligoclonalClusters1 <- nOligoclonalClusters1 + 1
-        oligoclonal <- TRUE
-      }
-      if(branchingProbs > cutoffsBranchingProbabilities[(cutoffsBranchingProbabilities$clusterSize == clusterSize), 2]){
-        nOligoclonalClusters2 <- nOligoclonalClusters2 + 1
-        oligoclonal <- TRUE
-      }
-      splittingSummary1 <- rbind(splittingSummary1, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-      splittingSummary2 <- rbind(splittingSummary2, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-    }
-  })
-}
-
-
-numberOfCancerClusters <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() > 1) %>%
-    pull(color) %>%
-    unique() %>% length() 
-
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 1', nOligoclonalClusters1, numberOfCancerClusters, treeName))
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 2', nOligoclonalClusters2, numberOfCancerClusters, treeName))
-print(splittingSummary1)
-print(splittingSummary2)
-```
-
-
-
diff --git a/Rcode/markdowns/Br16_B_max3.Rmd b/Rcode/markdowns/Br16_B_max3.Rmd
deleted file mode 100644
index ee68ad7..0000000
--- a/Rcode/markdowns/Br16_B_max3.Rmd
+++ /dev/null
@@ -1,180 +0,0 @@
----
-title: "Br16_B_max3"
-author: "Johannes Gawron"
-date: "2024-03-07"
-output: html_document
----
-
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-```
-
-## Splitting statistics
-
-This code analyses splitting statistics for CTC-clusters.
-
-The analysis takes a list of trees sampled from its posterior distribution as input and samples mutations placements for each of the trees.
-
-
-## Configure the script
-```{r config}
-inputFolder <- "../../../input_folder"
-simulationInputFolder <- "../../../simulations/simulations2"
-treeName <- "Br16_B_max3"
-nTreeSamplingEvents <- 100
-nMutationSamplingEvents <- 100
-```
-
-## Loading data
-```{r load}
-source("../functions.R")
-
-
-input <- load_data(inputFolder, treeName)
-
-postSampling <- input$postSampling
-nClusters <- input$nClusters
-ClusterID <- input$clusterID
-nCells <- input$nCells  
-nMutations <- input$nMutations
-nClusters <- input$nClusters
-alleleCount <- input$alleleCount
-mutatedReadCounts <- input$mutatedReadCounts
-totalReadCounts <- input$totalReadCounts
-sampleDescription <- input$sample_description
-```
-
-
-## Sample description 
-
-Each row corresponds to a cell.
-Column description:
-  - Cluster: An number indicating which sample the cell belongs to.
-  - ClusterName: The name of the sample in the nodeDescription.tsv file
-  - WBC: a binary vector indicating whether the cell is a white blood cell (1) or not (0).
-  - color: Indicates the color of the cluster in the tree, as described in the nodeDescription.tsv
-          file.
-  
-```{r Describe samples}
-print(sampleDescription)
-```
-
-
-
-Get null distributions of relevant statistics, stratified by sample:
-```{r}
-cutoffsSplittingProbs <- data.frame(clusterSize = vector(), Cutoff = vector())
-cutoffsBranchingProbabilities <- data.frame(clusterSize = vector(), Cutoff = vector())
-
-for (clusterSize in 2:5){
-  try(
-  {treeNameSimulated <- paste(treeName, clusterSize, sep = '_')
-
-
-  inputSimulated <- load_data(simulationInputFolder, treeNameSimulated)
-
-  postSamplingSimulated <- inputSimulated$postSampling
-  nClustersSimulated <- inputSimulated$nClusters
-  ClusterIDSimulated <- inputSimulated$clusterID
-  nCellsSimulated <- inputSimulated$nCells  
-  nMutationsSimulated <- inputSimulated$nMutations
-  nClustersSimulated <- inputSimulated$nClusters
-  alleleCountSimulated <- inputSimulated$alleleCount
-  mutatedReadCountsSimulated <- inputSimulated$mutatedReadCounts
-  totalReadCountsSimulated <- inputSimulated$totalReadCounts
-  sampleDescriptionSimulated <- inputSimulated$sample_description
-  
-  distance <- computeClusterSplits(sampleDescriptionSimulated, postSamplingSimulated, treeNameSimulated, nCellsSimulated,
-                     nMutationsSimulated, nClustersSimulated,
-                     alleleCountSimulated,
-                     mutatedReadCountsSimulated, totalReadCountsSimulated,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c("orchid", "orchid1", "orchid2",
-                                           "orchid3", "orchid4", "darkorchid",
-                                           "darkorchid1","darkorchid2", "darkorchid3",
-                                           "darkorchid4", "purple", "purple1",
-                                           "purple2", "purple3", "purple4"))
-
-  
-
-  plot(ggplot(distance$splittingProbs, aes(x = "Values", y = Splitting_probability, fill = 'Splitting_probability')) +
-    geom_boxplot())
-  cutoffsSplittingProbs <- rbind(cutoffsSplittingProbs, data.frame(clusterSize = clusterSize, Cutoff = mean(distance$splittingProbs$Splitting_probability) + 2 * sd(distance$splittingProbs$Splitting_probability) ))
-  
-  ##Note that the way the aggregatedBranchingProbabilities are computed all pairs of cells from the same cluster are
-  ## taken into account. This has the effect that clusters with more cells would be counted more often and contribute more
-  ## to the shape of the final distribution. This is no problem right now as we only aggregate counts from clusters
-  ## of the same size, it is however the potential source of a future bug!!
-  
-  plot(ggplot(data.frame(x = distance$aggregatedBranchingProbabilities), aes(x = x)) +
-    geom_histogram(binwidth = 0.01))
-  print(data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  cutoffsBranchingProbabilities <- rbind(cutoffsBranchingProbabilities, data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  })
-}
-```
-
-
-
-Get the relevant statistics for each of the clusters of a dataset and output numbers of oligoclonal clusters:
-```{r}
-nTumorClusters <- 0
-nOligoclonalClusters1 <- 0
-nOligoclonalClusters2 <- 0
-splittingSummary1 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-splittingSummary2 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-
-for(clusterSize in 2:5){
-  try({
-    clusterColor <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() == clusterSize) %>%
-    pull(color) %>%
-    unique() 
-    
-    for(color in clusterColor){
-      distance <- computeClusterSplits(sampleDescription, postSampling, treeName, nCells,
-                     nMutations, nClusters,
-                     alleleCount,
-                     mutatedReadCounts, totalReadCounts,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c(color))
-
-      splittingProbs <- mean(distance$splittingProbs$Splitting_probability)
-      branchingProbs <- mean(distance$aggregatedBranchingProbabilities)
-    
-      nTumorClusters <- nTumorClusters + 1
-      oligoclonal <- FALSE
-      print(clusterSize)
-      print(cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])
-      if(splittingProbs > (cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])){
-        nOligoclonalClusters1 <- nOligoclonalClusters1 + 1
-        oligoclonal <- TRUE
-      }
-      if(branchingProbs > cutoffsBranchingProbabilities[(cutoffsBranchingProbabilities$clusterSize == clusterSize), 2]){
-        nOligoclonalClusters2 <- nOligoclonalClusters2 + 1
-        oligoclonal <- TRUE
-      }
-      splittingSummary1 <- rbind(splittingSummary1, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-      splittingSummary2 <- rbind(splittingSummary2, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-    }
-  })
-}
-
-
-numberOfCancerClusters <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() > 1) %>%
-    pull(color) %>%
-    unique() %>% length() 
-
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 1', nOligoclonalClusters1, numberOfCancerClusters, treeName))
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 2', nOligoclonalClusters2, numberOfCancerClusters, treeName))
-print(splittingSummary1)
-print(splittingSummary2)
-```
-
-
-
diff --git a/Rcode/markdowns/Br16_B_max4.Rmd b/Rcode/markdowns/Br16_B_max4.Rmd
deleted file mode 100644
index 429498f..0000000
--- a/Rcode/markdowns/Br16_B_max4.Rmd
+++ /dev/null
@@ -1,180 +0,0 @@
----
-title: "Br16_B_max4"
-author: "Johannes Gawron"
-date: "2024-03-07"
-output: html_document
----
-
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-```
-
-## Splitting statistics
-
-This code analyses splitting statistics for CTC-clusters.
-
-The analysis takes a list of trees sampled from its posterior distribution as input and samples mutations placements for each of the trees.
-
-
-## Configure the script
-```{r config}
-inputFolder <- "../../../input_folder"
-simulationInputFolder <- "../../../simulations/simulations2"
-treeName <- "Br16_B_max4"
-nTreeSamplingEvents <- 100
-nMutationSamplingEvents <- 100
-```
-
-## Loading data
-```{r load}
-source("../functions.R")
-
-
-input <- load_data(inputFolder, treeName)
-
-postSampling <- input$postSampling
-nClusters <- input$nClusters
-ClusterID <- input$clusterID
-nCells <- input$nCells  
-nMutations <- input$nMutations
-nClusters <- input$nClusters
-alleleCount <- input$alleleCount
-mutatedReadCounts <- input$mutatedReadCounts
-totalReadCounts <- input$totalReadCounts
-sampleDescription <- input$sample_description
-```
-
-
-## Sample description 
-
-Each row corresponds to a cell.
-Column description:
-  - Cluster: An number indicating which sample the cell belongs to.
-  - ClusterName: The name of the sample in the nodeDescription.tsv file
-  - WBC: a binary vector indicating whether the cell is a white blood cell (1) or not (0).
-  - color: Indicates the color of the cluster in the tree, as described in the nodeDescription.tsv
-          file.
-  
-```{r Describe samples}
-print(sampleDescription)
-```
-
-
-
-Get null distributions of relevant statistics, stratified by sample:
-```{r}
-cutoffsSplittingProbs <- data.frame(clusterSize = vector(), Cutoff = vector())
-cutoffsBranchingProbabilities <- data.frame(clusterSize = vector(), Cutoff = vector())
-
-for (clusterSize in 2:5){
-  try(
-  {treeNameSimulated <- paste(treeName, clusterSize, sep = '_')
-
-
-  inputSimulated <- load_data(simulationInputFolder, treeNameSimulated)
-
-  postSamplingSimulated <- inputSimulated$postSampling
-  nClustersSimulated <- inputSimulated$nClusters
-  ClusterIDSimulated <- inputSimulated$clusterID
-  nCellsSimulated <- inputSimulated$nCells  
-  nMutationsSimulated <- inputSimulated$nMutations
-  nClustersSimulated <- inputSimulated$nClusters
-  alleleCountSimulated <- inputSimulated$alleleCount
-  mutatedReadCountsSimulated <- inputSimulated$mutatedReadCounts
-  totalReadCountsSimulated <- inputSimulated$totalReadCounts
-  sampleDescriptionSimulated <- inputSimulated$sample_description
-  
-  distance <- computeClusterSplits(sampleDescriptionSimulated, postSamplingSimulated, treeNameSimulated, nCellsSimulated,
-                     nMutationsSimulated, nClustersSimulated,
-                     alleleCountSimulated,
-                     mutatedReadCountsSimulated, totalReadCountsSimulated,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c("orchid", "orchid1", "orchid2",
-                                           "orchid3", "orchid4", "darkorchid",
-                                           "darkorchid1","darkorchid2", "darkorchid3",
-                                           "darkorchid4", "purple", "purple1",
-                                           "purple2", "purple3", "purple4"))
-
-  
-
-  plot(ggplot(distance$splittingProbs, aes(x = "Values", y = Splitting_probability, fill = 'Splitting_probability')) +
-    geom_boxplot())
-  cutoffsSplittingProbs <- rbind(cutoffsSplittingProbs, data.frame(clusterSize = clusterSize, Cutoff = mean(distance$splittingProbs$Splitting_probability) + 2 * sd(distance$splittingProbs$Splitting_probability) ))
-  
-  ##Note that the way the aggregatedBranchingProbabilities are computed all pairs of cells from the same cluster are
-  ## taken into account. This has the effect that clusters with more cells would be counted more often and contribute more
-  ## to the shape of the final distribution. This is no problem right now as we only aggregate counts from clusters
-  ## of the same size, it is however the potential source of a future bug!!
-  
-  plot(ggplot(data.frame(x = distance$aggregatedBranchingProbabilities), aes(x = x)) +
-    geom_histogram(binwidth = 0.01))
-  print(data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  cutoffsBranchingProbabilities <- rbind(cutoffsBranchingProbabilities, data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  })
-}
-```
-
-
-
-Get the relevant statistics for each of the clusters of a dataset and output numbers of oligoclonal clusters:
-```{r}
-nTumorClusters <- 0
-nOligoclonalClusters1 <- 0
-nOligoclonalClusters2 <- 0
-splittingSummary1 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-splittingSummary2 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-
-for(clusterSize in 2:5){
-  try({
-    clusterColor <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() == clusterSize) %>%
-    pull(color) %>%
-    unique() 
-    
-    for(color in clusterColor){
-      distance <- computeClusterSplits(sampleDescription, postSampling, treeName, nCells,
-                     nMutations, nClusters,
-                     alleleCount,
-                     mutatedReadCounts, totalReadCounts,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c(color))
-
-      splittingProbs <- mean(distance$splittingProbs$Splitting_probability)
-      branchingProbs <- mean(distance$aggregatedBranchingProbabilities)
-    
-      nTumorClusters <- nTumorClusters + 1
-      oligoclonal <- FALSE
-      print(clusterSize)
-      print(cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])
-      if(splittingProbs > (cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])){
-        nOligoclonalClusters1 <- nOligoclonalClusters1 + 1
-        oligoclonal <- TRUE
-      }
-      if(branchingProbs > cutoffsBranchingProbabilities[(cutoffsBranchingProbabilities$clusterSize == clusterSize), 2]){
-        nOligoclonalClusters2 <- nOligoclonalClusters2 + 1
-        oligoclonal <- TRUE
-      }
-      splittingSummary1 <- rbind(splittingSummary1, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-      splittingSummary2 <- rbind(splittingSummary2, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-    }
-  })
-}
-
-
-numberOfCancerClusters <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() > 1) %>%
-    pull(color) %>%
-    unique() %>% length() 
-
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 1', nOligoclonalClusters1, numberOfCancerClusters, treeName))
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 2', nOligoclonalClusters2, numberOfCancerClusters, treeName))
-print(splittingSummary1)
-print(splittingSummary2)
-```
-
-
-
diff --git a/Rcode/markdowns/Br16_C.Rmd b/Rcode/markdowns/Br16_C.Rmd
deleted file mode 100644
index f878018..0000000
--- a/Rcode/markdowns/Br16_C.Rmd
+++ /dev/null
@@ -1,181 +0,0 @@
----
-title: "Br16_C"
-author: "Johannes Gawron"
-date: "2024-03-07"
-output: html_document
----
-
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-```
-
-## Splitting statistics
-
-This code analyses splitting statistics for CTC-clusters.
-
-The analysis takes a list of trees sampled from its posterior distribution as input and samples mutations placements for each of the trees.
-
-
-## Configure the script
-```{r config}
-inputFolder <- "../../../input_folder"
-simulationInputFolder <- "../../../simulations/simulations2"
-treeName <- "Br16_C"
-nTreeSamplingEvents <- 100
-nMutationSamplingEvents <- 100
-```
-
-## Loading data
-```{r load}
-source("../functions.R")
-
-
-input <- load_data(inputFolder, treeName)
-
-postSampling <- input$postSampling
-nClusters <- input$nClusters
-ClusterID <- input$clusterID
-nCells <- input$nCells  
-nMutations <- input$nMutations
-nClusters <- input$nClusters
-alleleCount <- input$alleleCount
-mutatedReadCounts <- input$mutatedReadCounts
-totalReadCounts <- input$totalReadCounts
-sampleDescription <- input$sample_description
-```
-
-
-## Sample description 
-
-Each row corresponds to a cell.
-Column description:
-  - Cluster: An number indicating which sample the cell belongs to.
-  - ClusterName: The name of the sample in the nodeDescription.tsv file
-  - WBC: a binary vector indicating whether the cell is a white blood cell (1) or not (0).
-  - color: Indicates the color of the cluster in the tree, as described in the nodeDescription.tsv
-          file.
-  
-```{r Describe samples}
-print(sampleDescription)
-```
-
-
-
-Get null distributions of relevant statistics, stratified by sample:
-```{r}
-cutoffsSplittingProbs <- data.frame(clusterSize = vector(), Cutoff = vector())
-cutoffsBranchingProbabilities <- data.frame(clusterSize = vector(), Cutoff = vector())
-
-for (clusterSize in 2:5){
-  try(
-  {treeNameSimulated <- paste(treeName, clusterSize, sep = '_')
-
-
-  inputSimulated <- load_data(simulationInputFolder, treeNameSimulated)
-
-  postSamplingSimulated <- inputSimulated$postSampling
-  nClustersSimulated <- inputSimulated$nClusters
-  ClusterIDSimulated <- inputSimulated$clusterID
-  nCellsSimulated <- inputSimulated$nCells  
-  nMutationsSimulated <- inputSimulated$nMutations
-  nClustersSimulated <- inputSimulated$nClusters
-  alleleCountSimulated <- inputSimulated$alleleCount
-  mutatedReadCountsSimulated <- inputSimulated$mutatedReadCounts
-  totalReadCountsSimulated <- inputSimulated$totalReadCounts
-  sampleDescriptionSimulated <- inputSimulated$sample_description
-  
-  distance <- computeClusterSplits(sampleDescriptionSimulated, postSamplingSimulated, treeNameSimulated, nCellsSimulated,
-                     nMutationsSimulated, nClustersSimulated,
-                     alleleCountSimulated,
-                     mutatedReadCountsSimulated, totalReadCountsSimulated,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c("orchid", "orchid1", "orchid2",
-                                           "orchid3", "orchid4", "darkorchid",
-                                           "darkorchid1","darkorchid2", "darkorchid3",
-                                           "darkorchid4", "purple", "purple1",
-                                           "purple2", "purple3", "purple4"))
-
-  
-
-  plot(ggplot(distance$splittingProbs, aes(x = "Values", y = Splitting_probability, fill = 'Splitting_probability')) +
-    geom_boxplot())
-  cutoffsSplittingProbs <- rbind(cutoffsSplittingProbs, data.frame(clusterSize = clusterSize, Cutoff = mean(distance$splittingProbs$Splitting_probability) + 2 * sd(distance$splittingProbs$Splitting_probability) ))
-  
-  ##Note that the way the aggregatedBranchingProbabilities are computed all pairs of cells from the same cluster are
-  ## taken into account. This has the effect that clusters with more cells would be counted more often and contribute more
-  ## to the shape of the final distribution. This is no problem right now as we only aggregate counts from clusters
-  ## of the same size, it is however the potential source of a future bug!!
-  
-  plot(ggplot(data.frame(x = distance$aggregatedBranchingProbabilities), aes(x = x)) +
-    geom_histogram(binwidth = 0.01))
-  print(data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  cutoffsBranchingProbabilities <- rbind(cutoffsBranchingProbabilities, data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  })
-}
-```
-
-
-
-Get the relevant statistics for each of the clusters of a dataset and output numbers of oligoclonal clusters:
-```{r}
-nTumorClusters <- 0
-nOligoclonalClusters1 <- 0
-nOligoclonalClusters2 <- 0
-splittingSummary1 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-splittingSummary2 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-
-for(clusterSize in 2:5){
-  try({
-    clusterColor <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() == clusterSize) %>%
-    pull(color) %>%
-    unique() 
-    
-    for(color in clusterColor){
-      distance <- computeClusterSplits(sampleDescription, postSampling, treeName, nCells,
-                     nMutations, nClusters,
-                     alleleCount,
-                     mutatedReadCounts, totalReadCounts,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c(color))
-
-      splittingProbs <- mean(distance$splittingProbs$Splitting_probability)
-      branchingProbs <- mean(distance$aggregatedBranchingProbabilities)
-    
-      nTumorClusters <- nTumorClusters + 1
-      oligoclonal <- FALSE
-      print(clusterSize)
-      print(cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])
-      if(splittingProbs > (cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])){
-        nOligoclonalClusters1 <- nOligoclonalClusters1 + 1
-        oligoclonal <- TRUE
-      }
-      splittingSummary1 <- rbind(splittingSummary1, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-      oligoclonal <- FALSE
-      if(branchingProbs > cutoffsBranchingProbabilities[(cutoffsBranchingProbabilities$clusterSize == clusterSize), 2]){
-        nOligoclonalClusters2 <- nOligoclonalClusters2 + 1
-        oligoclonal <- TRUE
-      }
-      splittingSummary2 <- rbind(splittingSummary2, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-    }
-  })
-}
-
-
-numberOfCancerClusters <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() > 1) %>%
-    pull(color) %>%
-    unique() %>% length() 
-
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 1', nOligoclonalClusters1, numberOfCancerClusters, treeName))
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 2', nOligoclonalClusters2, numberOfCancerClusters, treeName))
-print(splittingSummary1)
-print(splittingSummary2)
-```
-
-
-
diff --git a/Rcode/markdowns/Br16_C_max1.Rmd b/Rcode/markdowns/Br16_C_max1.Rmd
deleted file mode 100644
index c43890b..0000000
--- a/Rcode/markdowns/Br16_C_max1.Rmd
+++ /dev/null
@@ -1,172 +0,0 @@
----
-title: "Br16_C_max1"
-author: "Johannes Gawron"
-date: "2024-023-04"
-output: html_document
----
-
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-```
-
-## Splitting statistics
-
-This code analyses splitting statistics for CTC-clusters.
-
-The analysis takes a list of trees sampled from its posterior distribution as input and samples mutations placements for each of the trees.
-
-
-## Configure the script
-```{r config}
-inputFolder <- "../../../input_folder"
-simulationInputFolder <- "../../../simulations/simulations2"
-treeName <- "Br16_C_max1"
-nTreeSamplingEvents <- 100
-nMutationSamplingEvents <- 100
-```
-
-## Loading data
-```{r load}
-source("../functions.R")
-
-
-input <- load_data(inputFolder, treeName)
-
-postSampling <- input$postSampling
-nClusters <- input$nClusters
-ClusterID <- input$clusterID
-nCells <- input$nCells  
-nMutations <- input$nMutations
-nClusters <- input$nClusters
-alleleCount <- input$alleleCount
-mutatedReadCounts <- input$mutatedReadCounts
-totalReadCounts <- input$totalReadCounts
-sampleDescription <- input$sample_description
-```
-
-
-## Sample description 
-
-Each row corresponds to a cell.
-Column description:
-  - Cluster: An number indicating which sample the cell belongs to.
-  - ClusterName: The name of the sample in the nodeDescription.tsv file
-  - WBC: a binary vector indicating whether the cell is a white blood cell (1) or not (0).
-  - color: Indicates the color of the cluster in the tree, as described in the nodeDescription.tsv
-          file.
-  
-```{r Describe samples}
-print(sampleDescription)
-```
-
-
-
-Get null distributions of relevant statistics, stratified by sample:
-```{r}
-cutoffsSplittingProbs <- data.frame(clusterSize = vector(), Cutoff = vector())
-cutoffsBranchingProbabilities <- data.frame(clusterSize = vector(), Cutoff = vector())
-
-for (clusterSize in 2:5){
-  try(
-  {treeNameSimulated <- paste(treeName, clusterSize, sep = '_')
-
-
-  inputSimulated <- load_data(simulationInputFolder, treeNameSimulated)
-
-  postSamplingSimulated <- inputSimulated$postSampling
-  nClustersSimulated <- inputSimulated$nClusters
-  ClusterIDSimulated <- inputSimulated$clusterID
-  nCellsSimulated <- inputSimulated$nCells  
-  nMutationsSimulated <- inputSimulated$nMutations
-  nClustersSimulated <- inputSimulated$nClusters
-  alleleCountSimulated <- inputSimulated$alleleCount
-  mutatedReadCountsSimulated <- inputSimulated$mutatedReadCounts
-  totalReadCountsSimulated <- inputSimulated$totalReadCounts
-  sampleDescriptionSimulated <- inputSimulated$sample_description
-  
-  distance <- computeClusterSplits(sampleDescriptionSimulated, postSamplingSimulated, treeNameSimulated, nCellsSimulated,
-                     nMutationsSimulated, nClustersSimulated,
-                     alleleCountSimulated,
-                     mutatedReadCountsSimulated, totalReadCountsSimulated,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c("orchid", "orchid1", "orchid2",
-                                           "orchid3", "orchid4", "darkorchid",
-                                           "darkorchid1","darkorchid2", "darkorchid3",
-                                           "darkorchid4", "purple", "purple1",
-                                           "purple2", "purple3", "purple4"))
-
-  
-
-  plot(ggplot(distance$splittingProbs, aes(x = "Values", y = Splitting_probability, fill = 'Splitting_probability')) +
-    geom_boxplot())
-  cutoffsSplittingProbs <- rbind(cutoffsSplittingProbs, data.frame(clusterSize = clusterSize, Cutoff = mean(distance$splittingProbs$Splitting_probability) + 2 * sd(distance$splittingProbs$Splitting_probability) ))
-  
-  ##Note that the way the aggregatedBranchingProbabilities are computed all pairs of cells from the same cluster are
-  ## taken into account. This has the effect that clusters with more cells would be counted more often and contribute more
-  ## to the shape of the final distribution. This is no problem right now as we only aggregate counts from clusters
-  ## of the same size, it is however the potential source of a future bug!!
-  
-  plot(ggplot(data.frame(x = distance$aggregatedBranchingProbabilities), aes(x = x)) +
-    geom_histogram(binwidth = 0.01))
-  print(data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  cutoffsBranchingProbabilities <- rbind(cutoffsBranchingProbabilities, data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  })
-}
-```
-
-
-
-Get the relevant statistics for each of the clusters of a dataset and output numbers of oligoclonal clusters:
-```{r}
-nTumorClusters <- 0
-nOligoclonalClusters1 <- 0
-nOligoclonalClusters2 <- 0
-splittingSummary1 <- data.frame(Color = vector(), Oligoclonal = vector())
-splittingSummary2 <- data.frame(Color = vector(), Oligoclonal = vector())
-
-for(clusterSize in 2:5){
-  try({
-    clusterColor <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() == clusterSize) %>%
-    pull(color) %>%
-    unique() 
-    
-    for(color in clusterColor){
-      distance <- computeClusterSplits(sampleDescription, postSampling, treeName, nCells,
-                     nMutations, nClusters,
-                     alleleCount,
-                     mutatedReadCounts, totalReadCounts,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c(color))
-
-      splittingProbs <- mean(distance$splittingProbs$Splitting_probability)
-      branchingProbs <- mean(distance$aggregatedBranchingProbabilities)
-    
-      nTumorClusters <- nTumorClusters + 1
-      oligoclonal <- FALSE
-      print(clusterSize)
-      print(cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])
-      if(splittingProbs > (cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])){
-        nOligoclonalClusters1 <- nOligoclonalClusters1 + 1
-        oligoclonal <- TRUE
-      }
-      if(branchingProbs > cutoffsBranchingProbabilities[(cutoffsBranchingProbabilities$clusterSize == clusterSize), 2]){
-        nOligoclonalClusters2 <- nOligoclonalClusters2 + 1
-        oligoclonal <- TRUE
-      }
-      splittingSummary1 <- rbind(splittingSummary1, data.frame(Color = color, Oligoclonal = oligoclonal))
-      splittingSummary2 <- rbind(splittingSummary2, data.frame(Color = color, Oligoclonal = oligoclonal))
-    }
-  })
-}
-
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 1', nOligoclonalClusters1, nClusters, treeName))
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 2', nOligoclonalClusters2, nClusters, treeName))
-print(splittingSummary1)
-print(splittingSummary2)
-```
-
-
-
diff --git a/Rcode/markdowns/Br16_C_max2.Rmd b/Rcode/markdowns/Br16_C_max2.Rmd
deleted file mode 100644
index a91a921..0000000
--- a/Rcode/markdowns/Br16_C_max2.Rmd
+++ /dev/null
@@ -1,180 +0,0 @@
----
-title: "Br16_C_max2"
-author: "Johannes Gawron"
-date: "2024-03-07"
-output: html_document
----
-
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-```
-
-## Splitting statistics
-
-This code analyses splitting statistics for CTC-clusters.
-
-The analysis takes a list of trees sampled from its posterior distribution as input and samples mutations placements for each of the trees.
-
-
-## Configure the script
-```{r config}
-inputFolder <- "../../../input_folder"
-simulationInputFolder <- "../../../simulations/simulations2"
-treeName <- "Br16_C_max2"
-nTreeSamplingEvents <- 100
-nMutationSamplingEvents <- 100
-```
-
-## Loading data
-```{r load}
-source("../functions.R")
-
-
-input <- load_data(inputFolder, treeName)
-
-postSampling <- input$postSampling
-nClusters <- input$nClusters
-ClusterID <- input$clusterID
-nCells <- input$nCells  
-nMutations <- input$nMutations
-nClusters <- input$nClusters
-alleleCount <- input$alleleCount
-mutatedReadCounts <- input$mutatedReadCounts
-totalReadCounts <- input$totalReadCounts
-sampleDescription <- input$sample_description
-```
-
-
-## Sample description 
-
-Each row corresponds to a cell.
-Column description:
-  - Cluster: An number indicating which sample the cell belongs to.
-  - ClusterName: The name of the sample in the nodeDescription.tsv file
-  - WBC: a binary vector indicating whether the cell is a white blood cell (1) or not (0).
-  - color: Indicates the color of the cluster in the tree, as described in the nodeDescription.tsv
-          file.
-  
-```{r Describe samples}
-print(sampleDescription)
-```
-
-
-
-Get null distributions of relevant statistics, stratified by sample:
-```{r}
-cutoffsSplittingProbs <- data.frame(clusterSize = vector(), Cutoff = vector())
-cutoffsBranchingProbabilities <- data.frame(clusterSize = vector(), Cutoff = vector())
-
-for (clusterSize in 2:5){
-  try(
-  {treeNameSimulated <- paste(treeName, clusterSize, sep = '_')
-
-
-  inputSimulated <- load_data(simulationInputFolder, treeNameSimulated)
-
-  postSamplingSimulated <- inputSimulated$postSampling
-  nClustersSimulated <- inputSimulated$nClusters
-  ClusterIDSimulated <- inputSimulated$clusterID
-  nCellsSimulated <- inputSimulated$nCells  
-  nMutationsSimulated <- inputSimulated$nMutations
-  nClustersSimulated <- inputSimulated$nClusters
-  alleleCountSimulated <- inputSimulated$alleleCount
-  mutatedReadCountsSimulated <- inputSimulated$mutatedReadCounts
-  totalReadCountsSimulated <- inputSimulated$totalReadCounts
-  sampleDescriptionSimulated <- inputSimulated$sample_description
-  
-  distance <- computeClusterSplits(sampleDescriptionSimulated, postSamplingSimulated, treeNameSimulated, nCellsSimulated,
-                     nMutationsSimulated, nClustersSimulated,
-                     alleleCountSimulated,
-                     mutatedReadCountsSimulated, totalReadCountsSimulated,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c("orchid", "orchid1", "orchid2",
-                                           "orchid3", "orchid4", "darkorchid",
-                                           "darkorchid1","darkorchid2", "darkorchid3",
-                                           "darkorchid4", "purple", "purple1",
-                                           "purple2", "purple3", "purple4"))
-
-  
-
-  plot(ggplot(distance$splittingProbs, aes(x = "Values", y = Splitting_probability, fill = 'Splitting_probability')) +
-    geom_boxplot())
-  cutoffsSplittingProbs <- rbind(cutoffsSplittingProbs, data.frame(clusterSize = clusterSize, Cutoff = mean(distance$splittingProbs$Splitting_probability) + 2 * sd(distance$splittingProbs$Splitting_probability) ))
-  
-  ##Note that the way the aggregatedBranchingProbabilities are computed all pairs of cells from the same cluster are
-  ## taken into account. This has the effect that clusters with more cells would be counted more often and contribute more
-  ## to the shape of the final distribution. This is no problem right now as we only aggregate counts from clusters
-  ## of the same size, it is however the potential source of a future bug!!
-  
-  plot(ggplot(data.frame(x = distance$aggregatedBranchingProbabilities), aes(x = x)) +
-    geom_histogram(binwidth = 0.01))
-  print(data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  cutoffsBranchingProbabilities <- rbind(cutoffsBranchingProbabilities, data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  })
-}
-```
-
-
-
-Get the relevant statistics for each of the clusters of a dataset and output numbers of oligoclonal clusters:
-```{r}
-nTumorClusters <- 0
-nOligoclonalClusters1 <- 0
-nOligoclonalClusters2 <- 0
-splittingSummary1 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-splittingSummary2 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-
-for(clusterSize in 2:5){
-  try({
-    clusterColor <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() == clusterSize) %>%
-    pull(color) %>%
-    unique() 
-    
-    for(color in clusterColor){
-      distance <- computeClusterSplits(sampleDescription, postSampling, treeName, nCells,
-                     nMutations, nClusters,
-                     alleleCount,
-                     mutatedReadCounts, totalReadCounts,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c(color))
-
-      splittingProbs <- mean(distance$splittingProbs$Splitting_probability)
-      branchingProbs <- mean(distance$aggregatedBranchingProbabilities)
-    
-      nTumorClusters <- nTumorClusters + 1
-      oligoclonal <- FALSE
-      print(clusterSize)
-      print(cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])
-      if(splittingProbs > (cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])){
-        nOligoclonalClusters1 <- nOligoclonalClusters1 + 1
-        oligoclonal <- TRUE
-      }
-      if(branchingProbs > cutoffsBranchingProbabilities[(cutoffsBranchingProbabilities$clusterSize == clusterSize), 2]){
-        nOligoclonalClusters2 <- nOligoclonalClusters2 + 1
-        oligoclonal <- TRUE
-      }
-      splittingSummary1 <- rbind(splittingSummary1, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-      splittingSummary2 <- rbind(splittingSummary2, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-    }
-  })
-}
-
-
-numberOfCancerClusters <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() > 1) %>%
-    pull(color) %>%
-    unique() %>% length() 
-
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 1', nOligoclonalClusters1, numberOfCancerClusters, treeName))
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 2', nOligoclonalClusters2, numberOfCancerClusters, treeName))
-print(splittingSummary1)
-print(splittingSummary2)
-```
-
-
-
diff --git a/Rcode/markdowns/Br16_C_max3.Rmd b/Rcode/markdowns/Br16_C_max3.Rmd
deleted file mode 100644
index d122632..0000000
--- a/Rcode/markdowns/Br16_C_max3.Rmd
+++ /dev/null
@@ -1,180 +0,0 @@
----
-title: "Br16_C_max3"
-author: "Johannes Gawron"
-date: "2024-03-07"
-output: html_document
----
-
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-```
-
-## Splitting statistics
-
-This code analyses splitting statistics for CTC-clusters.
-
-The analysis takes a list of trees sampled from its posterior distribution as input and samples mutations placements for each of the trees.
-
-
-## Configure the script
-```{r config}
-inputFolder <- "../../../input_folder"
-simulationInputFolder <- "../../../simulations/simulations2"
-treeName <- "Br16_C_max3"
-nTreeSamplingEvents <- 100
-nMutationSamplingEvents <- 100
-```
-
-## Loading data
-```{r load}
-source("../functions.R")
-
-
-input <- load_data(inputFolder, treeName)
-
-postSampling <- input$postSampling
-nClusters <- input$nClusters
-ClusterID <- input$clusterID
-nCells <- input$nCells  
-nMutations <- input$nMutations
-nClusters <- input$nClusters
-alleleCount <- input$alleleCount
-mutatedReadCounts <- input$mutatedReadCounts
-totalReadCounts <- input$totalReadCounts
-sampleDescription <- input$sample_description
-```
-
-
-## Sample description 
-
-Each row corresponds to a cell.
-Column description:
-  - Cluster: An number indicating which sample the cell belongs to.
-  - ClusterName: The name of the sample in the nodeDescription.tsv file
-  - WBC: a binary vector indicating whether the cell is a white blood cell (1) or not (0).
-  - color: Indicates the color of the cluster in the tree, as described in the nodeDescription.tsv
-          file.
-  
-```{r Describe samples}
-print(sampleDescription)
-```
-
-
-
-Get null distributions of relevant statistics, stratified by sample:
-```{r}
-cutoffsSplittingProbs <- data.frame(clusterSize = vector(), Cutoff = vector())
-cutoffsBranchingProbabilities <- data.frame(clusterSize = vector(), Cutoff = vector())
-
-for (clusterSize in 2:5){
-  try(
-  {treeNameSimulated <- paste(treeName, clusterSize, sep = '_')
-
-
-  inputSimulated <- load_data(simulationInputFolder, treeNameSimulated)
-
-  postSamplingSimulated <- inputSimulated$postSampling
-  nClustersSimulated <- inputSimulated$nClusters
-  ClusterIDSimulated <- inputSimulated$clusterID
-  nCellsSimulated <- inputSimulated$nCells  
-  nMutationsSimulated <- inputSimulated$nMutations
-  nClustersSimulated <- inputSimulated$nClusters
-  alleleCountSimulated <- inputSimulated$alleleCount
-  mutatedReadCountsSimulated <- inputSimulated$mutatedReadCounts
-  totalReadCountsSimulated <- inputSimulated$totalReadCounts
-  sampleDescriptionSimulated <- inputSimulated$sample_description
-  
-  distance <- computeClusterSplits(sampleDescriptionSimulated, postSamplingSimulated, treeNameSimulated, nCellsSimulated,
-                     nMutationsSimulated, nClustersSimulated,
-                     alleleCountSimulated,
-                     mutatedReadCountsSimulated, totalReadCountsSimulated,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c("orchid", "orchid1", "orchid2",
-                                           "orchid3", "orchid4", "darkorchid",
-                                           "darkorchid1","darkorchid2", "darkorchid3",
-                                           "darkorchid4", "purple", "purple1",
-                                           "purple2", "purple3", "purple4"))
-
-  
-
-  plot(ggplot(distance$splittingProbs, aes(x = "Values", y = Splitting_probability, fill = 'Splitting_probability')) +
-    geom_boxplot())
-  cutoffsSplittingProbs <- rbind(cutoffsSplittingProbs, data.frame(clusterSize = clusterSize, Cutoff = mean(distance$splittingProbs$Splitting_probability) + 2 * sd(distance$splittingProbs$Splitting_probability) ))
-  
-  ##Note that the way the aggregatedBranchingProbabilities are computed all pairs of cells from the same cluster are
-  ## taken into account. This has the effect that clusters with more cells would be counted more often and contribute more
-  ## to the shape of the final distribution. This is no problem right now as we only aggregate counts from clusters
-  ## of the same size, it is however the potential source of a future bug!!
-  
-  plot(ggplot(data.frame(x = distance$aggregatedBranchingProbabilities), aes(x = x)) +
-    geom_histogram(binwidth = 0.01))
-  print(data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  cutoffsBranchingProbabilities <- rbind(cutoffsBranchingProbabilities, data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  })
-}
-```
-
-
-
-Get the relevant statistics for each of the clusters of a dataset and output numbers of oligoclonal clusters:
-```{r}
-nTumorClusters <- 0
-nOligoclonalClusters1 <- 0
-nOligoclonalClusters2 <- 0
-splittingSummary1 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-splittingSummary2 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-
-for(clusterSize in 2:5){
-  try({
-    clusterColor <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() == clusterSize) %>%
-    pull(color) %>%
-    unique() 
-    
-    for(color in clusterColor){
-      distance <- computeClusterSplits(sampleDescription, postSampling, treeName, nCells,
-                     nMutations, nClusters,
-                     alleleCount,
-                     mutatedReadCounts, totalReadCounts,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c(color))
-
-      splittingProbs <- mean(distance$splittingProbs$Splitting_probability)
-      branchingProbs <- mean(distance$aggregatedBranchingProbabilities)
-    
-      nTumorClusters <- nTumorClusters + 1
-      oligoclonal <- FALSE
-      print(clusterSize)
-      print(cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])
-      if(splittingProbs > (cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])){
-        nOligoclonalClusters1 <- nOligoclonalClusters1 + 1
-        oligoclonal <- TRUE
-      }
-      if(branchingProbs > cutoffsBranchingProbabilities[(cutoffsBranchingProbabilities$clusterSize == clusterSize), 2]){
-        nOligoclonalClusters2 <- nOligoclonalClusters2 + 1
-        oligoclonal <- TRUE
-      }
-      splittingSummary1 <- rbind(splittingSummary1, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-      splittingSummary2 <- rbind(splittingSummary2, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-    }
-  })
-}
-
-
-numberOfCancerClusters <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() > 1) %>%
-    pull(color) %>%
-    unique() %>% length() 
-
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 1', nOligoclonalClusters1, numberOfCancerClusters, treeName))
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 2', nOligoclonalClusters2, numberOfCancerClusters, treeName))
-print(splittingSummary1)
-print(splittingSummary2)
-```
-
-
-
diff --git a/Rcode/markdowns/Br23.Rmd b/Rcode/markdowns/Br23.Rmd
deleted file mode 100644
index 565ed4d..0000000
--- a/Rcode/markdowns/Br23.Rmd
+++ /dev/null
@@ -1,181 +0,0 @@
----
-title: "Br23"
-author: "Johannes Gawron"
-date: "2024-03-07"
-output: html_document
----
-
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-```
-
-## Splitting statistics
-
-This code analyses splitting statistics for CTC-clusters.
-
-The analysis takes a list of trees sampled from its posterior distribution as input and samples mutations placements for each of the trees.
-
-
-## Configure the script
-```{r config}
-inputFolder <- "../../../input_folder"
-simulationInputFolder <- "../../../simulations/simulations2"
-treeName <- "Br23"
-nTreeSamplingEvents <- 100
-nMutationSamplingEvents <- 100
-```
-
-## Loading data
-```{r load}
-source("../functions.R")
-
-
-input <- load_data(inputFolder, treeName)
-
-postSampling <- input$postSampling
-nClusters <- input$nClusters
-ClusterID <- input$clusterID
-nCells <- input$nCells  
-nMutations <- input$nMutations
-nClusters <- input$nClusters
-alleleCount <- input$alleleCount
-mutatedReadCounts <- input$mutatedReadCounts
-totalReadCounts <- input$totalReadCounts
-sampleDescription <- input$sample_description
-```
-
-
-## Sample description 
-
-Each row corresponds to a cell.
-Column description:
-  - Cluster: An number indicating which sample the cell belongs to.
-  - ClusterName: The name of the sample in the nodeDescription.tsv file
-  - WBC: a binary vector indicating whether the cell is a white blood cell (1) or not (0).
-  - color: Indicates the color of the cluster in the tree, as described in the nodeDescription.tsv
-          file.
-  
-```{r Describe samples}
-print(sampleDescription)
-```
-
-
-
-Get null distributions of relevant statistics, stratified by sample:
-```{r}
-cutoffsSplittingProbs <- data.frame(clusterSize = vector(), Cutoff = vector())
-cutoffsBranchingProbabilities <- data.frame(clusterSize = vector(), Cutoff = vector())
-
-for (clusterSize in 2:5){
-  try(
-  {treeNameSimulated <- paste(treeName, clusterSize, sep = '_')
-
-
-  inputSimulated <- load_data(simulationInputFolder, treeNameSimulated)
-
-  postSamplingSimulated <- inputSimulated$postSampling
-  nClustersSimulated <- inputSimulated$nClusters
-  ClusterIDSimulated <- inputSimulated$clusterID
-  nCellsSimulated <- inputSimulated$nCells  
-  nMutationsSimulated <- inputSimulated$nMutations
-  nClustersSimulated <- inputSimulated$nClusters
-  alleleCountSimulated <- inputSimulated$alleleCount
-  mutatedReadCountsSimulated <- inputSimulated$mutatedReadCounts
-  totalReadCountsSimulated <- inputSimulated$totalReadCounts
-  sampleDescriptionSimulated <- inputSimulated$sample_description
-  
-  distance <- computeClusterSplits(sampleDescriptionSimulated, postSamplingSimulated, treeNameSimulated, nCellsSimulated,
-                     nMutationsSimulated, nClustersSimulated,
-                     alleleCountSimulated,
-                     mutatedReadCountsSimulated, totalReadCountsSimulated,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c("orchid", "orchid1", "orchid2",
-                                           "orchid3", "orchid4", "darkorchid",
-                                           "darkorchid1","darkorchid2", "darkorchid3",
-                                           "darkorchid4", "purple", "purple1",
-                                           "purple2", "purple3", "purple4"))
-
-  
-
-  plot(ggplot(distance$splittingProbs, aes(x = "Values", y = Splitting_probability, fill = 'Splitting_probability')) +
-    geom_boxplot())
-  cutoffsSplittingProbs <- rbind(cutoffsSplittingProbs, data.frame(clusterSize = clusterSize, Cutoff = mean(distance$splittingProbs$Splitting_probability) + 2 * sd(distance$splittingProbs$Splitting_probability) ))
-  
-  ##Note that the way the aggregatedBranchingProbabilities are computed all pairs of cells from the same cluster are
-  ## taken into account. This has the effect that clusters with more cells would be counted more often and contribute more
-  ## to the shape of the final distribution. This is no problem right now as we only aggregate counts from clusters
-  ## of the same size, it is however the potential source of a future bug!!
-  
-  plot(ggplot(data.frame(x = distance$aggregatedBranchingProbabilities), aes(x = x)) +
-    geom_histogram(binwidth = 0.01))
-  print(data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  cutoffsBranchingProbabilities <- rbind(cutoffsBranchingProbabilities, data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  })
-}
-```
-
-
-
-Get the relevant statistics for each of the clusters of a dataset and output numbers of oligoclonal clusters:
-```{r}
-nTumorClusters <- 0
-nOligoclonalClusters1 <- 0
-nOligoclonalClusters2 <- 0
-splittingSummary1 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-splittingSummary2 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-
-for(clusterSize in 2:5){
-  try({
-    clusterColor <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() == clusterSize) %>%
-    pull(color) %>%
-    unique() 
-    
-    for(color in clusterColor){
-      distance <- computeClusterSplits(sampleDescription, postSampling, treeName, nCells,
-                     nMutations, nClusters,
-                     alleleCount,
-                     mutatedReadCounts, totalReadCounts,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c(color))
-
-      splittingProbs <- mean(distance$splittingProbs$Splitting_probability)
-      branchingProbs <- mean(distance$aggregatedBranchingProbabilities)
-    
-      nTumorClusters <- nTumorClusters + 1
-      oligoclonal <- FALSE
-      print(clusterSize)
-      print(cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])
-      if(splittingProbs > (cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])){
-        nOligoclonalClusters1 <- nOligoclonalClusters1 + 1
-        oligoclonal <- TRUE
-      }
-      splittingSummary1 <- rbind(splittingSummary1, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-      oligoclonal <- FALSE
-      if(branchingProbs > cutoffsBranchingProbabilities[(cutoffsBranchingProbabilities$clusterSize == clusterSize), 2]){
-        nOligoclonalClusters2 <- nOligoclonalClusters2 + 1
-        oligoclonal <- TRUE
-      }
-      splittingSummary2 <- rbind(splittingSummary2, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-    }
-  })
-}
-
-
-numberOfCancerClusters <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() > 1) %>%
-    pull(color) %>%
-    unique() %>% length() 
-
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 1', nOligoclonalClusters1, numberOfCancerClusters, treeName))
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 2', nOligoclonalClusters2, numberOfCancerClusters, treeName))
-print(splittingSummary1)
-print(splittingSummary2)
-```
-
-
-
diff --git a/Rcode/markdowns/Br26.Rmd b/Rcode/markdowns/Br26.Rmd
deleted file mode 100644
index 0e03473..0000000
--- a/Rcode/markdowns/Br26.Rmd
+++ /dev/null
@@ -1,181 +0,0 @@
----
-title: "Br26"
-author: "Johannes Gawron"
-date: "2024-03-07"
-output: html_document
----
-
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-```
-
-## Splitting statistics
-
-This code analyses splitting statistics for CTC-clusters.
-
-The analysis takes a list of trees sampled from its posterior distribution as input and samples mutations placements for each of the trees.
-
-
-## Configure the script
-```{r config}
-inputFolder <- "../../../input_folder"
-simulationInputFolder <- "../../../simulations/simulations2"
-treeName <- "Br26"
-nTreeSamplingEvents <- 100
-nMutationSamplingEvents <- 100
-```
-
-## Loading data
-```{r load}
-source("../functions.R")
-
-
-input <- load_data(inputFolder, treeName)
-
-postSampling <- input$postSampling
-nClusters <- input$nClusters
-ClusterID <- input$clusterID
-nCells <- input$nCells  
-nMutations <- input$nMutations
-nClusters <- input$nClusters
-alleleCount <- input$alleleCount
-mutatedReadCounts <- input$mutatedReadCounts
-totalReadCounts <- input$totalReadCounts
-sampleDescription <- input$sample_description
-```
-
-
-## Sample description 
-
-Each row corresponds to a cell.
-Column description:
-  - Cluster: An number indicating which sample the cell belongs to.
-  - ClusterName: The name of the sample in the nodeDescription.tsv file
-  - WBC: a binary vector indicating whether the cell is a white blood cell (1) or not (0).
-  - color: Indicates the color of the cluster in the tree, as described in the nodeDescription.tsv
-          file.
-  
-```{r Describe samples}
-print(sampleDescription)
-```
-
-
-
-Get null distributions of relevant statistics, stratified by sample:
-```{r}
-cutoffsSplittingProbs <- data.frame(clusterSize = vector(), Cutoff = vector())
-cutoffsBranchingProbabilities <- data.frame(clusterSize = vector(), Cutoff = vector())
-
-for (clusterSize in 2:5){
-  try(
-  {treeNameSimulated <- paste(treeName, clusterSize, sep = '_')
-
-
-  inputSimulated <- load_data(simulationInputFolder, treeNameSimulated)
-
-  postSamplingSimulated <- inputSimulated$postSampling
-  nClustersSimulated <- inputSimulated$nClusters
-  ClusterIDSimulated <- inputSimulated$clusterID
-  nCellsSimulated <- inputSimulated$nCells  
-  nMutationsSimulated <- inputSimulated$nMutations
-  nClustersSimulated <- inputSimulated$nClusters
-  alleleCountSimulated <- inputSimulated$alleleCount
-  mutatedReadCountsSimulated <- inputSimulated$mutatedReadCounts
-  totalReadCountsSimulated <- inputSimulated$totalReadCounts
-  sampleDescriptionSimulated <- inputSimulated$sample_description
-  
-  distance <- computeClusterSplits(sampleDescriptionSimulated, postSamplingSimulated, treeNameSimulated, nCellsSimulated,
-                     nMutationsSimulated, nClustersSimulated,
-                     alleleCountSimulated,
-                     mutatedReadCountsSimulated, totalReadCountsSimulated,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c("orchid", "orchid1", "orchid2",
-                                           "orchid3", "orchid4", "darkorchid",
-                                           "darkorchid1","darkorchid2", "darkorchid3",
-                                           "darkorchid4", "purple", "purple1",
-                                           "purple2", "purple3", "purple4"))
-
-  
-
-  plot(ggplot(distance$splittingProbs, aes(x = "Values", y = Splitting_probability, fill = 'Splitting_probability')) +
-    geom_boxplot())
-  cutoffsSplittingProbs <- rbind(cutoffsSplittingProbs, data.frame(clusterSize = clusterSize, Cutoff = mean(distance$splittingProbs$Splitting_probability) + 2 * sd(distance$splittingProbs$Splitting_probability) ))
-  
-  ##Note that the way the aggregatedBranchingProbabilities are computed all pairs of cells from the same cluster are
-  ## taken into account. This has the effect that clusters with more cells would be counted more often and contribute more
-  ## to the shape of the final distribution. This is no problem right now as we only aggregate counts from clusters
-  ## of the same size, it is however the potential source of a future bug!!
-  
-  plot(ggplot(data.frame(x = distance$aggregatedBranchingProbabilities), aes(x = x)) +
-    geom_histogram(binwidth = 0.01))
-  print(data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  cutoffsBranchingProbabilities <- rbind(cutoffsBranchingProbabilities, data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  })
-}
-```
-
-
-
-Get the relevant statistics for each of the clusters of a dataset and output numbers of oligoclonal clusters:
-```{r}
-nTumorClusters <- 0
-nOligoclonalClusters1 <- 0
-nOligoclonalClusters2 <- 0
-splittingSummary1 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-splittingSummary2 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-
-for(clusterSize in 2:5){
-  try({
-    clusterColor <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() == clusterSize) %>%
-    pull(color) %>%
-    unique() 
-    
-    for(color in clusterColor){
-      distance <- computeClusterSplits(sampleDescription, postSampling, treeName, nCells,
-                     nMutations, nClusters,
-                     alleleCount,
-                     mutatedReadCounts, totalReadCounts,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c(color))
-
-      splittingProbs <- mean(distance$splittingProbs$Splitting_probability)
-      branchingProbs <- mean(distance$aggregatedBranchingProbabilities)
-    
-      nTumorClusters <- nTumorClusters + 1
-      oligoclonal <- FALSE
-      print(clusterSize)
-      print(cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])
-      if(splittingProbs > (cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])){
-        nOligoclonalClusters1 <- nOligoclonalClusters1 + 1
-        oligoclonal <- TRUE
-      }
-      splittingSummary1 <- rbind(splittingSummary1, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-      oligoclonal <- FALSE
-      if(branchingProbs > cutoffsBranchingProbabilities[(cutoffsBranchingProbabilities$clusterSize == clusterSize), 2]){
-        nOligoclonalClusters2 <- nOligoclonalClusters2 + 1
-        oligoclonal <- TRUE
-      }
-      splittingSummary2 <- rbind(splittingSummary2, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-    }
-  })
-}
-
-
-numberOfCancerClusters <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() > 1) %>%
-    pull(color) %>%
-    unique() %>% length() 
-
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 1', nOligoclonalClusters1, numberOfCancerClusters, treeName))
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 2', nOligoclonalClusters2, numberOfCancerClusters, treeName))
-print(splittingSummary1)
-print(splittingSummary2)
-```
-
-
-
diff --git a/Rcode/markdowns/Br30.Rmd b/Rcode/markdowns/Br30.Rmd
deleted file mode 100644
index 6a62b5d..0000000
--- a/Rcode/markdowns/Br30.Rmd
+++ /dev/null
@@ -1,181 +0,0 @@
----
-title: "Br30"
-author: "Johannes Gawron"
-date: "2024-03-07"
-output: html_document
----
-
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-```
-
-## Splitting statistics
-
-This code analyses splitting statistics for CTC-clusters.
-
-The analysis takes a list of trees sampled from its posterior distribution as input and samples mutations placements for each of the trees.
-
-
-## Configure the script
-```{r config}
-inputFolder <- "../../../input_folder"
-simulationInputFolder <- "../../../simulations/simulations2"
-treeName <- "Br30"
-nTreeSamplingEvents <- 100
-nMutationSamplingEvents <- 100
-```
-
-## Loading data
-```{r load}
-source("../functions.R")
-
-
-input <- load_data(inputFolder, treeName)
-
-postSampling <- input$postSampling
-nClusters <- input$nClusters
-ClusterID <- input$clusterID
-nCells <- input$nCells  
-nMutations <- input$nMutations
-nClusters <- input$nClusters
-alleleCount <- input$alleleCount
-mutatedReadCounts <- input$mutatedReadCounts
-totalReadCounts <- input$totalReadCounts
-sampleDescription <- input$sample_description
-```
-
-
-## Sample description 
-
-Each row corresponds to a cell.
-Column description:
-  - Cluster: An number indicating which sample the cell belongs to.
-  - ClusterName: The name of the sample in the nodeDescription.tsv file
-  - WBC: a binary vector indicating whether the cell is a white blood cell (1) or not (0).
-  - color: Indicates the color of the cluster in the tree, as described in the nodeDescription.tsv
-          file.
-  
-```{r Describe samples}
-print(sampleDescription)
-```
-
-
-
-Get null distributions of relevant statistics, stratified by sample:
-```{r}
-cutoffsSplittingProbs <- data.frame(clusterSize = vector(), Cutoff = vector())
-cutoffsBranchingProbabilities <- data.frame(clusterSize = vector(), Cutoff = vector())
-
-for (clusterSize in 2:5){
-  try(
-  {treeNameSimulated <- paste(treeName, clusterSize, sep = '_')
-
-
-  inputSimulated <- load_data(simulationInputFolder, treeNameSimulated)
-
-  postSamplingSimulated <- inputSimulated$postSampling
-  nClustersSimulated <- inputSimulated$nClusters
-  ClusterIDSimulated <- inputSimulated$clusterID
-  nCellsSimulated <- inputSimulated$nCells  
-  nMutationsSimulated <- inputSimulated$nMutations
-  nClustersSimulated <- inputSimulated$nClusters
-  alleleCountSimulated <- inputSimulated$alleleCount
-  mutatedReadCountsSimulated <- inputSimulated$mutatedReadCounts
-  totalReadCountsSimulated <- inputSimulated$totalReadCounts
-  sampleDescriptionSimulated <- inputSimulated$sample_description
-  
-  distance <- computeClusterSplits(sampleDescriptionSimulated, postSamplingSimulated, treeNameSimulated, nCellsSimulated,
-                     nMutationsSimulated, nClustersSimulated,
-                     alleleCountSimulated,
-                     mutatedReadCountsSimulated, totalReadCountsSimulated,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c("orchid", "orchid1", "orchid2",
-                                           "orchid3", "orchid4", "darkorchid",
-                                           "darkorchid1","darkorchid2", "darkorchid3",
-                                           "darkorchid4", "purple", "purple1",
-                                           "purple2", "purple3", "purple4"))
-
-  
-
-  plot(ggplot(distance$splittingProbs, aes(x = "Values", y = Splitting_probability, fill = 'Splitting_probability')) +
-    geom_boxplot())
-  cutoffsSplittingProbs <- rbind(cutoffsSplittingProbs, data.frame(clusterSize = clusterSize, Cutoff = mean(distance$splittingProbs$Splitting_probability) + 2 * sd(distance$splittingProbs$Splitting_probability) ))
-  
-  ##Note that the way the aggregatedBranchingProbabilities are computed all pairs of cells from the same cluster are
-  ## taken into account. This has the effect that clusters with more cells would be counted more often and contribute more
-  ## to the shape of the final distribution. This is no problem right now as we only aggregate counts from clusters
-  ## of the same size, it is however the potential source of a future bug!!
-  
-  plot(ggplot(data.frame(x = distance$aggregatedBranchingProbabilities), aes(x = x)) +
-    geom_histogram(binwidth = 0.01))
-  print(data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  cutoffsBranchingProbabilities <- rbind(cutoffsBranchingProbabilities, data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  })
-}
-```
-
-
-
-Get the relevant statistics for each of the clusters of a dataset and output numbers of oligoclonal clusters:
-```{r}
-nTumorClusters <- 0
-nOligoclonalClusters1 <- 0
-nOligoclonalClusters2 <- 0
-splittingSummary1 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-splittingSummary2 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-
-for(clusterSize in 2:5){
-  try({
-    clusterColor <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() == clusterSize) %>%
-    pull(color) %>%
-    unique() 
-    
-    for(color in clusterColor){
-      distance <- computeClusterSplits(sampleDescription, postSampling, treeName, nCells,
-                     nMutations, nClusters,
-                     alleleCount,
-                     mutatedReadCounts, totalReadCounts,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c(color))
-
-      splittingProbs <- mean(distance$splittingProbs$Splitting_probability)
-      branchingProbs <- mean(distance$aggregatedBranchingProbabilities)
-    
-      nTumorClusters <- nTumorClusters + 1
-      oligoclonal <- FALSE
-      print(clusterSize)
-      print(cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])
-      if(splittingProbs > (cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])){
-        nOligoclonalClusters1 <- nOligoclonalClusters1 + 1
-        oligoclonal <- TRUE
-      }
-      splittingSummary1 <- rbind(splittingSummary1, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-      oligoclonal <- FALSE
-      if(branchingProbs > cutoffsBranchingProbabilities[(cutoffsBranchingProbabilities$clusterSize == clusterSize), 2]){
-        nOligoclonalClusters2 <- nOligoclonalClusters2 + 1
-        oligoclonal <- TRUE
-      }
-      splittingSummary2 <- rbind(splittingSummary2, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-    }
-  })
-}
-
-
-numberOfCancerClusters <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() > 1) %>%
-    pull(color) %>%
-    unique() %>% length() 
-
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 1', nOligoclonalClusters1, numberOfCancerClusters, treeName))
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 2', nOligoclonalClusters2, numberOfCancerClusters, treeName))
-print(splittingSummary1)
-print(splittingSummary2)
-```
-
-
-
diff --git a/Rcode/markdowns/Br37.Rmd b/Rcode/markdowns/Br37.Rmd
deleted file mode 100644
index 41878cf..0000000
--- a/Rcode/markdowns/Br37.Rmd
+++ /dev/null
@@ -1,181 +0,0 @@
----
-title: "Br37"
-author: "Johannes Gawron"
-date: "2024-03-07"
-output: html_document
----
-
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-```
-
-## Splitting statistics
-
-This code analyses splitting statistics for CTC-clusters.
-
-The analysis takes a list of trees sampled from its posterior distribution as input and samples mutations placements for each of the trees.
-
-
-## Configure the script
-```{r config}
-inputFolder <- "../../../input_folder"
-simulationInputFolder <- "../../../simulations/simulations2"
-treeName <- "Br37"
-nTreeSamplingEvents <- 100
-nMutationSamplingEvents <- 100
-```
-
-## Loading data
-```{r load}
-source("../functions.R")
-
-
-input <- load_data(inputFolder, treeName)
-
-postSampling <- input$postSampling
-nClusters <- input$nClusters
-ClusterID <- input$clusterID
-nCells <- input$nCells  
-nMutations <- input$nMutations
-nClusters <- input$nClusters
-alleleCount <- input$alleleCount
-mutatedReadCounts <- input$mutatedReadCounts
-totalReadCounts <- input$totalReadCounts
-sampleDescription <- input$sample_description
-```
-
-
-## Sample description 
-
-Each row corresponds to a cell.
-Column description:
-  - Cluster: An number indicating which sample the cell belongs to.
-  - ClusterName: The name of the sample in the nodeDescription.tsv file
-  - WBC: a binary vector indicating whether the cell is a white blood cell (1) or not (0).
-  - color: Indicates the color of the cluster in the tree, as described in the nodeDescription.tsv
-          file.
-  
-```{r Describe samples}
-print(sampleDescription)
-```
-
-
-
-Get null distributions of relevant statistics, stratified by sample:
-```{r}
-cutoffsSplittingProbs <- data.frame(clusterSize = vector(), Cutoff = vector())
-cutoffsBranchingProbabilities <- data.frame(clusterSize = vector(), Cutoff = vector())
-
-for (clusterSize in 2:5){
-  try(
-  {treeNameSimulated <- paste(treeName, clusterSize, sep = '_')
-
-
-  inputSimulated <- load_data(simulationInputFolder, treeNameSimulated)
-
-  postSamplingSimulated <- inputSimulated$postSampling
-  nClustersSimulated <- inputSimulated$nClusters
-  ClusterIDSimulated <- inputSimulated$clusterID
-  nCellsSimulated <- inputSimulated$nCells  
-  nMutationsSimulated <- inputSimulated$nMutations
-  nClustersSimulated <- inputSimulated$nClusters
-  alleleCountSimulated <- inputSimulated$alleleCount
-  mutatedReadCountsSimulated <- inputSimulated$mutatedReadCounts
-  totalReadCountsSimulated <- inputSimulated$totalReadCounts
-  sampleDescriptionSimulated <- inputSimulated$sample_description
-  
-  distance <- computeClusterSplits(sampleDescriptionSimulated, postSamplingSimulated, treeNameSimulated, nCellsSimulated,
-                     nMutationsSimulated, nClustersSimulated,
-                     alleleCountSimulated,
-                     mutatedReadCountsSimulated, totalReadCountsSimulated,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c("orchid", "orchid1", "orchid2",
-                                           "orchid3", "orchid4", "darkorchid",
-                                           "darkorchid1","darkorchid2", "darkorchid3",
-                                           "darkorchid4", "purple", "purple1",
-                                           "purple2", "purple3", "purple4"))
-
-  
-
-  plot(ggplot(distance$splittingProbs, aes(x = "Values", y = Splitting_probability, fill = 'Splitting_probability')) +
-    geom_boxplot())
-  cutoffsSplittingProbs <- rbind(cutoffsSplittingProbs, data.frame(clusterSize = clusterSize, Cutoff = mean(distance$splittingProbs$Splitting_probability) + 2 * sd(distance$splittingProbs$Splitting_probability) ))
-  
-  ##Note that the way the aggregatedBranchingProbabilities are computed all pairs of cells from the same cluster are
-  ## taken into account. This has the effect that clusters with more cells would be counted more often and contribute more
-  ## to the shape of the final distribution. This is no problem right now as we only aggregate counts from clusters
-  ## of the same size, it is however the potential source of a future bug!!
-  
-  plot(ggplot(data.frame(x = distance$aggregatedBranchingProbabilities), aes(x = x)) +
-    geom_histogram(binwidth = 0.01))
-  print(data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  cutoffsBranchingProbabilities <- rbind(cutoffsBranchingProbabilities, data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  })
-}
-```
-
-
-
-Get the relevant statistics for each of the clusters of a dataset and output numbers of oligoclonal clusters:
-```{r}
-nTumorClusters <- 0
-nOligoclonalClusters1 <- 0
-nOligoclonalClusters2 <- 0
-splittingSummary1 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-splittingSummary2 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-
-for(clusterSize in 2:5){
-  try({
-    clusterColor <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() == clusterSize) %>%
-    pull(color) %>%
-    unique() 
-    
-    for(color in clusterColor){
-      distance <- computeClusterSplits(sampleDescription, postSampling, treeName, nCells,
-                     nMutations, nClusters,
-                     alleleCount,
-                     mutatedReadCounts, totalReadCounts,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c(color))
-
-      splittingProbs <- mean(distance$splittingProbs$Splitting_probability)
-      branchingProbs <- mean(distance$aggregatedBranchingProbabilities)
-    
-      nTumorClusters <- nTumorClusters + 1
-      oligoclonal <- FALSE
-      print(clusterSize)
-      print(cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])
-      if(splittingProbs > (cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])){
-        nOligoclonalClusters1 <- nOligoclonalClusters1 + 1
-        oligoclonal <- TRUE
-      }
-      splittingSummary1 <- rbind(splittingSummary1, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-      oligoclonal <- FALSE
-      if(branchingProbs > cutoffsBranchingProbabilities[(cutoffsBranchingProbabilities$clusterSize == clusterSize), 2]){
-        nOligoclonalClusters2 <- nOligoclonalClusters2 + 1
-        oligoclonal <- TRUE
-      }
-      splittingSummary2 <- rbind(splittingSummary2, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-    }
-  })
-}
-
-
-numberOfCancerClusters <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() > 1) %>%
-    pull(color) %>%
-    unique() %>% length() 
-
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 1', nOligoclonalClusters1, numberOfCancerClusters, treeName))
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 2', nOligoclonalClusters2, numberOfCancerClusters, treeName))
-print(splittingSummary1)
-print(splittingSummary2)
-```
-
-
-
diff --git a/Rcode/markdowns/Br38.Rmd b/Rcode/markdowns/Br38.Rmd
deleted file mode 100644
index 41908d3..0000000
--- a/Rcode/markdowns/Br38.Rmd
+++ /dev/null
@@ -1,181 +0,0 @@
----
-title: "Br38"
-author: "Johannes Gawron"
-date: "2024-03-07"
-output: html_document
----
-
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-```
-
-## Splitting statistics
-
-This code analyses splitting statistics for CTC-clusters.
-
-The analysis takes a list of trees sampled from its posterior distribution as input and samples mutations placements for each of the trees.
-
-
-## Configure the script
-```{r config}
-inputFolder <- "../../../input_folder"
-simulationInputFolder <- "../../../simulations/simulations2"
-treeName <- "Br38"
-nTreeSamplingEvents <- 100
-nMutationSamplingEvents <- 100
-```
-
-## Loading data
-```{r load}
-source("../functions.R")
-
-
-input <- load_data(inputFolder, treeName)
-
-postSampling <- input$postSampling
-nClusters <- input$nClusters
-ClusterID <- input$clusterID
-nCells <- input$nCells  
-nMutations <- input$nMutations
-nClusters <- input$nClusters
-alleleCount <- input$alleleCount
-mutatedReadCounts <- input$mutatedReadCounts
-totalReadCounts <- input$totalReadCounts
-sampleDescription <- input$sample_description
-```
-
-
-## Sample description 
-
-Each row corresponds to a cell.
-Column description:
-  - Cluster: An number indicating which sample the cell belongs to.
-  - ClusterName: The name of the sample in the nodeDescription.tsv file
-  - WBC: a binary vector indicating whether the cell is a white blood cell (1) or not (0).
-  - color: Indicates the color of the cluster in the tree, as described in the nodeDescription.tsv
-          file.
-  
-```{r Describe samples}
-print(sampleDescription)
-```
-
-
-
-Get null distributions of relevant statistics, stratified by sample:
-```{r}
-cutoffsSplittingProbs <- data.frame(clusterSize = vector(), Cutoff = vector())
-cutoffsBranchingProbabilities <- data.frame(clusterSize = vector(), Cutoff = vector())
-
-for (clusterSize in 2:5){
-  try(
-  {treeNameSimulated <- paste(treeName, clusterSize, sep = '_')
-
-
-  inputSimulated <- load_data(simulationInputFolder, treeNameSimulated)
-
-  postSamplingSimulated <- inputSimulated$postSampling
-  nClustersSimulated <- inputSimulated$nClusters
-  ClusterIDSimulated <- inputSimulated$clusterID
-  nCellsSimulated <- inputSimulated$nCells  
-  nMutationsSimulated <- inputSimulated$nMutations
-  nClustersSimulated <- inputSimulated$nClusters
-  alleleCountSimulated <- inputSimulated$alleleCount
-  mutatedReadCountsSimulated <- inputSimulated$mutatedReadCounts
-  totalReadCountsSimulated <- inputSimulated$totalReadCounts
-  sampleDescriptionSimulated <- inputSimulated$sample_description
-  
-  distance <- computeClusterSplits(sampleDescriptionSimulated, postSamplingSimulated, treeNameSimulated, nCellsSimulated,
-                     nMutationsSimulated, nClustersSimulated,
-                     alleleCountSimulated,
-                     mutatedReadCountsSimulated, totalReadCountsSimulated,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c("orchid", "orchid1", "orchid2",
-                                           "orchid3", "orchid4", "darkorchid",
-                                           "darkorchid1","darkorchid2", "darkorchid3",
-                                           "darkorchid4", "purple", "purple1",
-                                           "purple2", "purple3", "purple4"))
-
-  
-
-  plot(ggplot(distance$splittingProbs, aes(x = "Values", y = Splitting_probability, fill = 'Splitting_probability')) +
-    geom_boxplot())
-  cutoffsSplittingProbs <- rbind(cutoffsSplittingProbs, data.frame(clusterSize = clusterSize, Cutoff = mean(distance$splittingProbs$Splitting_probability) + 2 * sd(distance$splittingProbs$Splitting_probability) ))
-  
-  ##Note that the way the aggregatedBranchingProbabilities are computed all pairs of cells from the same cluster are
-  ## taken into account. This has the effect that clusters with more cells would be counted more often and contribute more
-  ## to the shape of the final distribution. This is no problem right now as we only aggregate counts from clusters
-  ## of the same size, it is however the potential source of a future bug!!
-  
-  plot(ggplot(data.frame(x = distance$aggregatedBranchingProbabilities), aes(x = x)) +
-    geom_histogram(binwidth = 0.01))
-  print(data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  cutoffsBranchingProbabilities <- rbind(cutoffsBranchingProbabilities, data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  })
-}
-```
-
-
-
-Get the relevant statistics for each of the clusters of a dataset and output numbers of oligoclonal clusters:
-```{r}
-nTumorClusters <- 0
-nOligoclonalClusters1 <- 0
-nOligoclonalClusters2 <- 0
-splittingSummary1 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-splittingSummary2 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-
-for(clusterSize in 2:5){
-  try({
-    clusterColor <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() == clusterSize) %>%
-    pull(color) %>%
-    unique() 
-    
-    for(color in clusterColor){
-      distance <- computeClusterSplits(sampleDescription, postSampling, treeName, nCells,
-                     nMutations, nClusters,
-                     alleleCount,
-                     mutatedReadCounts, totalReadCounts,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c(color))
-
-      splittingProbs <- mean(distance$splittingProbs$Splitting_probability)
-      branchingProbs <- mean(distance$aggregatedBranchingProbabilities)
-    
-      nTumorClusters <- nTumorClusters + 1
-      oligoclonal <- FALSE
-      print(clusterSize)
-      print(cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])
-      if(splittingProbs > (cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])){
-        nOligoclonalClusters1 <- nOligoclonalClusters1 + 1
-        oligoclonal <- TRUE
-      }
-      splittingSummary1 <- rbind(splittingSummary1, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-      oligoclonal <- FALSE
-      if(branchingProbs > cutoffsBranchingProbabilities[(cutoffsBranchingProbabilities$clusterSize == clusterSize), 2]){
-        nOligoclonalClusters2 <- nOligoclonalClusters2 + 1
-        oligoclonal <- TRUE
-      }
-      splittingSummary2 <- rbind(splittingSummary2, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-    }
-  })
-}
-
-
-numberOfCancerClusters <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() > 1) %>%
-    pull(color) %>%
-    unique() %>% length() 
-
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 1', nOligoclonalClusters1, numberOfCancerClusters, treeName))
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 2', nOligoclonalClusters2, numberOfCancerClusters, treeName))
-print(splittingSummary1)
-print(splittingSummary2)
-```
-
-
-
diff --git a/Rcode/markdowns/Br39.Rmd b/Rcode/markdowns/Br39.Rmd
deleted file mode 100644
index b3cd1ad..0000000
--- a/Rcode/markdowns/Br39.Rmd
+++ /dev/null
@@ -1,181 +0,0 @@
----
-title: "Br39"
-author: "Johannes Gawron"
-date: "2024-03-07"
-output: html_document
----
-
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-```
-
-## Splitting statistics
-
-This code analyses splitting statistics for CTC-clusters.
-
-The analysis takes a list of trees sampled from its posterior distribution as input and samples mutations placements for each of the trees.
-
-
-## Configure the script
-```{r config}
-inputFolder <- "../../../input_folder"
-simulationInputFolder <- "../../../simulations/simulations2"
-treeName <- "Br39"
-nTreeSamplingEvents <- 100
-nMutationSamplingEvents <- 100
-```
-
-## Loading data
-```{r load}
-source("../functions.R")
-
-
-input <- load_data(inputFolder, treeName)
-
-postSampling <- input$postSampling
-nClusters <- input$nClusters
-ClusterID <- input$clusterID
-nCells <- input$nCells  
-nMutations <- input$nMutations
-nClusters <- input$nClusters
-alleleCount <- input$alleleCount
-mutatedReadCounts <- input$mutatedReadCounts
-totalReadCounts <- input$totalReadCounts
-sampleDescription <- input$sample_description
-```
-
-
-## Sample description 
-
-Each row corresponds to a cell.
-Column description:
-  - Cluster: An number indicating which sample the cell belongs to.
-  - ClusterName: The name of the sample in the nodeDescription.tsv file
-  - WBC: a binary vector indicating whether the cell is a white blood cell (1) or not (0).
-  - color: Indicates the color of the cluster in the tree, as described in the nodeDescription.tsv
-          file.
-  
-```{r Describe samples}
-print(sampleDescription)
-```
-
-
-
-Get null distributions of relevant statistics, stratified by sample:
-```{r}
-cutoffsSplittingProbs <- data.frame(clusterSize = vector(), Cutoff = vector())
-cutoffsBranchingProbabilities <- data.frame(clusterSize = vector(), Cutoff = vector())
-
-for (clusterSize in 2:5){
-  try(
-  {treeNameSimulated <- paste(treeName, clusterSize, sep = '_')
-
-
-  inputSimulated <- load_data(simulationInputFolder, treeNameSimulated)
-
-  postSamplingSimulated <- inputSimulated$postSampling
-  nClustersSimulated <- inputSimulated$nClusters
-  ClusterIDSimulated <- inputSimulated$clusterID
-  nCellsSimulated <- inputSimulated$nCells  
-  nMutationsSimulated <- inputSimulated$nMutations
-  nClustersSimulated <- inputSimulated$nClusters
-  alleleCountSimulated <- inputSimulated$alleleCount
-  mutatedReadCountsSimulated <- inputSimulated$mutatedReadCounts
-  totalReadCountsSimulated <- inputSimulated$totalReadCounts
-  sampleDescriptionSimulated <- inputSimulated$sample_description
-  
-  distance <- computeClusterSplits(sampleDescriptionSimulated, postSamplingSimulated, treeNameSimulated, nCellsSimulated,
-                     nMutationsSimulated, nClustersSimulated,
-                     alleleCountSimulated,
-                     mutatedReadCountsSimulated, totalReadCountsSimulated,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c("orchid", "orchid1", "orchid2",
-                                           "orchid3", "orchid4", "darkorchid",
-                                           "darkorchid1","darkorchid2", "darkorchid3",
-                                           "darkorchid4", "purple", "purple1",
-                                           "purple2", "purple3", "purple4"))
-
-  
-
-  plot(ggplot(distance$splittingProbs, aes(x = "Values", y = Splitting_probability, fill = 'Splitting_probability')) +
-    geom_boxplot())
-  cutoffsSplittingProbs <- rbind(cutoffsSplittingProbs, data.frame(clusterSize = clusterSize, Cutoff = mean(distance$splittingProbs$Splitting_probability) + 2 * sd(distance$splittingProbs$Splitting_probability) ))
-  
-  ##Note that the way the aggregatedBranchingProbabilities are computed all pairs of cells from the same cluster are
-  ## taken into account. This has the effect that clusters with more cells would be counted more often and contribute more
-  ## to the shape of the final distribution. This is no problem right now as we only aggregate counts from clusters
-  ## of the same size, it is however the potential source of a future bug!!
-  
-  plot(ggplot(data.frame(x = distance$aggregatedBranchingProbabilities), aes(x = x)) +
-    geom_histogram(binwidth = 0.01))
-  print(data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  cutoffsBranchingProbabilities <- rbind(cutoffsBranchingProbabilities, data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  })
-}
-```
-
-
-
-Get the relevant statistics for each of the clusters of a dataset and output numbers of oligoclonal clusters:
-```{r}
-nTumorClusters <- 0
-nOligoclonalClusters1 <- 0
-nOligoclonalClusters2 <- 0
-splittingSummary1 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-splittingSummary2 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-
-for(clusterSize in 2:5){
-  try({
-    clusterColor <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() == clusterSize) %>%
-    pull(color) %>%
-    unique() 
-    
-    for(color in clusterColor){
-      distance <- computeClusterSplits(sampleDescription, postSampling, treeName, nCells,
-                     nMutations, nClusters,
-                     alleleCount,
-                     mutatedReadCounts, totalReadCounts,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c(color))
-
-      splittingProbs <- mean(distance$splittingProbs$Splitting_probability)
-      branchingProbs <- mean(distance$aggregatedBranchingProbabilities)
-    
-      nTumorClusters <- nTumorClusters + 1
-      oligoclonal <- FALSE
-      print(clusterSize)
-      print(cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])
-      if(splittingProbs > (cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])){
-        nOligoclonalClusters1 <- nOligoclonalClusters1 + 1
-        oligoclonal <- TRUE
-      }
-      splittingSummary1 <- rbind(splittingSummary1, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-      oligoclonal <- FALSE
-      if(branchingProbs > cutoffsBranchingProbabilities[(cutoffsBranchingProbabilities$clusterSize == clusterSize), 2]){
-        nOligoclonalClusters2 <- nOligoclonalClusters2 + 1
-        oligoclonal <- TRUE
-      }
-      splittingSummary2 <- rbind(splittingSummary2, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-    }
-  })
-}
-
-
-numberOfCancerClusters <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() > 1) %>%
-    pull(color) %>%
-    unique() %>% length() 
-
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 1', nOligoclonalClusters1, numberOfCancerClusters, treeName))
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 2', nOligoclonalClusters2, numberOfCancerClusters, treeName))
-print(splittingSummary1)
-print(splittingSummary2)
-```
-
-
-
diff --git a/Rcode/markdowns/Br44.Rmd b/Rcode/markdowns/Br44.Rmd
deleted file mode 100644
index d3b7201..0000000
--- a/Rcode/markdowns/Br44.Rmd
+++ /dev/null
@@ -1,181 +0,0 @@
----
-title: "Br44"
-author: "Johannes Gawron"
-date: "2024-03-07"
-output: html_document
----
-
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-```
-
-## Splitting statistics
-
-This code analyses splitting statistics for CTC-clusters.
-
-The analysis takes a list of trees sampled from its posterior distribution as input and samples mutations placements for each of the trees.
-
-
-## Configure the script
-```{r config}
-inputFolder <- "../../../input_folder"
-simulationInputFolder <- "../../../simulations/simulations2"
-treeName <- "Br44"
-nTreeSamplingEvents <- 100
-nMutationSamplingEvents <- 100
-```
-
-## Loading data
-```{r load}
-source("../functions.R")
-
-
-input <- load_data(inputFolder, treeName)
-
-postSampling <- input$postSampling
-nClusters <- input$nClusters
-ClusterID <- input$clusterID
-nCells <- input$nCells  
-nMutations <- input$nMutations
-nClusters <- input$nClusters
-alleleCount <- input$alleleCount
-mutatedReadCounts <- input$mutatedReadCounts
-totalReadCounts <- input$totalReadCounts
-sampleDescription <- input$sample_description
-```
-
-
-## Sample description 
-
-Each row corresponds to a cell.
-Column description:
-  - Cluster: An number indicating which sample the cell belongs to.
-  - ClusterName: The name of the sample in the nodeDescription.tsv file
-  - WBC: a binary vector indicating whether the cell is a white blood cell (1) or not (0).
-  - color: Indicates the color of the cluster in the tree, as described in the nodeDescription.tsv
-          file.
-  
-```{r Describe samples}
-print(sampleDescription)
-```
-
-
-
-Get null distributions of relevant statistics, stratified by sample:
-```{r}
-cutoffsSplittingProbs <- data.frame(clusterSize = vector(), Cutoff = vector())
-cutoffsBranchingProbabilities <- data.frame(clusterSize = vector(), Cutoff = vector())
-
-for (clusterSize in 2:5){
-  try(
-  {treeNameSimulated <- paste(treeName, clusterSize, sep = '_')
-
-
-  inputSimulated <- load_data(simulationInputFolder, treeNameSimulated)
-
-  postSamplingSimulated <- inputSimulated$postSampling
-  nClustersSimulated <- inputSimulated$nClusters
-  ClusterIDSimulated <- inputSimulated$clusterID
-  nCellsSimulated <- inputSimulated$nCells  
-  nMutationsSimulated <- inputSimulated$nMutations
-  nClustersSimulated <- inputSimulated$nClusters
-  alleleCountSimulated <- inputSimulated$alleleCount
-  mutatedReadCountsSimulated <- inputSimulated$mutatedReadCounts
-  totalReadCountsSimulated <- inputSimulated$totalReadCounts
-  sampleDescriptionSimulated <- inputSimulated$sample_description
-  
-  distance <- computeClusterSplits(sampleDescriptionSimulated, postSamplingSimulated, treeNameSimulated, nCellsSimulated,
-                     nMutationsSimulated, nClustersSimulated,
-                     alleleCountSimulated,
-                     mutatedReadCountsSimulated, totalReadCountsSimulated,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c("orchid", "orchid1", "orchid2",
-                                           "orchid3", "orchid4", "darkorchid",
-                                           "darkorchid1","darkorchid2", "darkorchid3",
-                                           "darkorchid4", "purple", "purple1",
-                                           "purple2", "purple3", "purple4"))
-
-  
-
-  plot(ggplot(distance$splittingProbs, aes(x = "Values", y = Splitting_probability, fill = 'Splitting_probability')) +
-    geom_boxplot())
-  cutoffsSplittingProbs <- rbind(cutoffsSplittingProbs, data.frame(clusterSize = clusterSize, Cutoff = mean(distance$splittingProbs$Splitting_probability) + 2 * sd(distance$splittingProbs$Splitting_probability) ))
-  
-  ##Note that the way the aggregatedBranchingProbabilities are computed all pairs of cells from the same cluster are
-  ## taken into account. This has the effect that clusters with more cells would be counted more often and contribute more
-  ## to the shape of the final distribution. This is no problem right now as we only aggregate counts from clusters
-  ## of the same size, it is however the potential source of a future bug!!
-  
-  plot(ggplot(data.frame(x = distance$aggregatedBranchingProbabilities), aes(x = x)) +
-    geom_histogram(binwidth = 0.01))
-  print(data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  cutoffsBranchingProbabilities <- rbind(cutoffsBranchingProbabilities, data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  })
-}
-```
-
-
-
-Get the relevant statistics for each of the clusters of a dataset and output numbers of oligoclonal clusters:
-```{r}
-nTumorClusters <- 0
-nOligoclonalClusters1 <- 0
-nOligoclonalClusters2 <- 0
-splittingSummary1 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-splittingSummary2 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-
-for(clusterSize in 2:5){
-  try({
-    clusterColor <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() == clusterSize) %>%
-    pull(color) %>%
-    unique() 
-    
-    for(color in clusterColor){
-      distance <- computeClusterSplits(sampleDescription, postSampling, treeName, nCells,
-                     nMutations, nClusters,
-                     alleleCount,
-                     mutatedReadCounts, totalReadCounts,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c(color))
-
-      splittingProbs <- mean(distance$splittingProbs$Splitting_probability)
-      branchingProbs <- mean(distance$aggregatedBranchingProbabilities)
-    
-      nTumorClusters <- nTumorClusters + 1
-      oligoclonal <- FALSE
-      print(clusterSize)
-      print(cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])
-      if(splittingProbs > (cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])){
-        nOligoclonalClusters1 <- nOligoclonalClusters1 + 1
-        oligoclonal <- TRUE
-      }
-      splittingSummary1 <- rbind(splittingSummary1, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-      oligoclonal <- FALSE
-      if(branchingProbs > cutoffsBranchingProbabilities[(cutoffsBranchingProbabilities$clusterSize == clusterSize), 2]){
-        nOligoclonalClusters2 <- nOligoclonalClusters2 + 1
-        oligoclonal <- TRUE
-      }
-      splittingSummary2 <- rbind(splittingSummary2, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-    }
-  })
-}
-
-
-numberOfCancerClusters <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() > 1) %>%
-    pull(color) %>%
-    unique() %>% length() 
-
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 1', nOligoclonalClusters1, numberOfCancerClusters, treeName))
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 2', nOligoclonalClusters2, numberOfCancerClusters, treeName))
-print(splittingSummary1)
-print(splittingSummary2)
-```
-
-
-
diff --git a/Rcode/markdowns/Br45.Rmd b/Rcode/markdowns/Br45.Rmd
deleted file mode 100644
index a7f440b..0000000
--- a/Rcode/markdowns/Br45.Rmd
+++ /dev/null
@@ -1,181 +0,0 @@
----
-title: "Br45"
-author: "Johannes Gawron"
-date: "2024-03-07"
-output: html_document
----
-
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-```
-
-## Splitting statistics
-
-This code analyses splitting statistics for CTC-clusters.
-
-The analysis takes a list of trees sampled from its posterior distribution as input and samples mutations placements for each of the trees.
-
-
-## Configure the script
-```{r config}
-inputFolder <- "../../../input_folder"
-simulationInputFolder <- "../../../simulations/simulations2"
-treeName <- "Br45"
-nTreeSamplingEvents <- 100
-nMutationSamplingEvents <- 100
-```
-
-## Loading data
-```{r load}
-source("../functions.R")
-
-
-input <- load_data(inputFolder, treeName)
-
-postSampling <- input$postSampling
-nClusters <- input$nClusters
-ClusterID <- input$clusterID
-nCells <- input$nCells  
-nMutations <- input$nMutations
-nClusters <- input$nClusters
-alleleCount <- input$alleleCount
-mutatedReadCounts <- input$mutatedReadCounts
-totalReadCounts <- input$totalReadCounts
-sampleDescription <- input$sample_description
-```
-
-
-## Sample description 
-
-Each row corresponds to a cell.
-Column description:
-  - Cluster: An number indicating which sample the cell belongs to.
-  - ClusterName: The name of the sample in the nodeDescription.tsv file
-  - WBC: a binary vector indicating whether the cell is a white blood cell (1) or not (0).
-  - color: Indicates the color of the cluster in the tree, as described in the nodeDescription.tsv
-          file.
-  
-```{r Describe samples}
-print(sampleDescription)
-```
-
-
-
-Get null distributions of relevant statistics, stratified by sample:
-```{r}
-cutoffsSplittingProbs <- data.frame(clusterSize = vector(), Cutoff = vector())
-cutoffsBranchingProbabilities <- data.frame(clusterSize = vector(), Cutoff = vector())
-
-for (clusterSize in 2:5){
-  try(
-  {treeNameSimulated <- paste(treeName, clusterSize, sep = '_')
-
-
-  inputSimulated <- load_data(simulationInputFolder, treeNameSimulated)
-
-  postSamplingSimulated <- inputSimulated$postSampling
-  nClustersSimulated <- inputSimulated$nClusters
-  ClusterIDSimulated <- inputSimulated$clusterID
-  nCellsSimulated <- inputSimulated$nCells  
-  nMutationsSimulated <- inputSimulated$nMutations
-  nClustersSimulated <- inputSimulated$nClusters
-  alleleCountSimulated <- inputSimulated$alleleCount
-  mutatedReadCountsSimulated <- inputSimulated$mutatedReadCounts
-  totalReadCountsSimulated <- inputSimulated$totalReadCounts
-  sampleDescriptionSimulated <- inputSimulated$sample_description
-  
-  distance <- computeClusterSplits(sampleDescriptionSimulated, postSamplingSimulated, treeNameSimulated, nCellsSimulated,
-                     nMutationsSimulated, nClustersSimulated,
-                     alleleCountSimulated,
-                     mutatedReadCountsSimulated, totalReadCountsSimulated,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c("orchid", "orchid1", "orchid2",
-                                           "orchid3", "orchid4", "darkorchid",
-                                           "darkorchid1","darkorchid2", "darkorchid3",
-                                           "darkorchid4", "purple", "purple1",
-                                           "purple2", "purple3", "purple4"))
-
-  
-
-  plot(ggplot(distance$splittingProbs, aes(x = "Values", y = Splitting_probability, fill = 'Splitting_probability')) +
-    geom_boxplot())
-  cutoffsSplittingProbs <- rbind(cutoffsSplittingProbs, data.frame(clusterSize = clusterSize, Cutoff = mean(distance$splittingProbs$Splitting_probability) + 2 * sd(distance$splittingProbs$Splitting_probability) ))
-  
-  ##Note that the way the aggregatedBranchingProbabilities are computed all pairs of cells from the same cluster are
-  ## taken into account. This has the effect that clusters with more cells would be counted more often and contribute more
-  ## to the shape of the final distribution. This is no problem right now as we only aggregate counts from clusters
-  ## of the same size, it is however the potential source of a future bug!!
-  
-  plot(ggplot(data.frame(x = distance$aggregatedBranchingProbabilities), aes(x = x)) +
-    geom_histogram(binwidth = 0.01))
-  print(data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  cutoffsBranchingProbabilities <- rbind(cutoffsBranchingProbabilities, data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  })
-}
-```
-
-
-
-Get the relevant statistics for each of the clusters of a dataset and output numbers of oligoclonal clusters:
-```{r}
-nTumorClusters <- 0
-nOligoclonalClusters1 <- 0
-nOligoclonalClusters2 <- 0
-splittingSummary1 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-splittingSummary2 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-
-for(clusterSize in 2:5){
-  try({
-    clusterColor <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() == clusterSize) %>%
-    pull(color) %>%
-    unique() 
-    
-    for(color in clusterColor){
-      distance <- computeClusterSplits(sampleDescription, postSampling, treeName, nCells,
-                     nMutations, nClusters,
-                     alleleCount,
-                     mutatedReadCounts, totalReadCounts,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c(color))
-
-      splittingProbs <- mean(distance$splittingProbs$Splitting_probability)
-      branchingProbs <- mean(distance$aggregatedBranchingProbabilities)
-    
-      nTumorClusters <- nTumorClusters + 1
-      oligoclonal <- FALSE
-      print(clusterSize)
-      print(cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])
-      if(splittingProbs > (cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])){
-        nOligoclonalClusters1 <- nOligoclonalClusters1 + 1
-        oligoclonal <- TRUE
-      }
-      splittingSummary1 <- rbind(splittingSummary1, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-      oligoclonal <- FALSE
-      if(branchingProbs > cutoffsBranchingProbabilities[(cutoffsBranchingProbabilities$clusterSize == clusterSize), 2]){
-        nOligoclonalClusters2 <- nOligoclonalClusters2 + 1
-        oligoclonal <- TRUE
-      }
-      splittingSummary2 <- rbind(splittingSummary2, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-    }
-  })
-}
-
-
-numberOfCancerClusters <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() > 1) %>%
-    pull(color) %>%
-    unique() %>% length() 
-
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 1', nOligoclonalClusters1, numberOfCancerClusters, treeName))
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 2', nOligoclonalClusters2, numberOfCancerClusters, treeName))
-print(splittingSummary1)
-print(splittingSummary2)
-```
-
-
-
diff --git a/Rcode/markdowns/Br46.Rmd b/Rcode/markdowns/Br46.Rmd
deleted file mode 100644
index b334cd6..0000000
--- a/Rcode/markdowns/Br46.Rmd
+++ /dev/null
@@ -1,181 +0,0 @@
----
-title: "Br46"
-author: "Johannes Gawron"
-date: "2024-03-07"
-output: html_document
----
-
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-```
-
-## Splitting statistics
-
-This code analyses splitting statistics for CTC-clusters.
-
-The analysis takes a list of trees sampled from its posterior distribution as input and samples mutations placements for each of the trees.
-
-
-## Configure the script
-```{r config}
-inputFolder <- "../../../input_folder"
-simulationInputFolder <- "../../../simulations/simulations2"
-treeName <- "Br46"
-nTreeSamplingEvents <- 100
-nMutationSamplingEvents <- 100
-```
-
-## Loading data
-```{r load}
-source("../functions.R")
-
-
-input <- load_data(inputFolder, treeName)
-
-postSampling <- input$postSampling
-nClusters <- input$nClusters
-ClusterID <- input$clusterID
-nCells <- input$nCells  
-nMutations <- input$nMutations
-nClusters <- input$nClusters
-alleleCount <- input$alleleCount
-mutatedReadCounts <- input$mutatedReadCounts
-totalReadCounts <- input$totalReadCounts
-sampleDescription <- input$sample_description
-```
-
-
-## Sample description 
-
-Each row corresponds to a cell.
-Column description:
-  - Cluster: An number indicating which sample the cell belongs to.
-  - ClusterName: The name of the sample in the nodeDescription.tsv file
-  - WBC: a binary vector indicating whether the cell is a white blood cell (1) or not (0).
-  - color: Indicates the color of the cluster in the tree, as described in the nodeDescription.tsv
-          file.
-  
-```{r Describe samples}
-print(sampleDescription)
-```
-
-
-
-Get null distributions of relevant statistics, stratified by sample:
-```{r}
-cutoffsSplittingProbs <- data.frame(clusterSize = vector(), Cutoff = vector())
-cutoffsBranchingProbabilities <- data.frame(clusterSize = vector(), Cutoff = vector())
-
-for (clusterSize in 2:5){
-  try(
-  {treeNameSimulated <- paste(treeName, clusterSize, sep = '_')
-
-
-  inputSimulated <- load_data(simulationInputFolder, treeNameSimulated)
-
-  postSamplingSimulated <- inputSimulated$postSampling
-  nClustersSimulated <- inputSimulated$nClusters
-  ClusterIDSimulated <- inputSimulated$clusterID
-  nCellsSimulated <- inputSimulated$nCells  
-  nMutationsSimulated <- inputSimulated$nMutations
-  nClustersSimulated <- inputSimulated$nClusters
-  alleleCountSimulated <- inputSimulated$alleleCount
-  mutatedReadCountsSimulated <- inputSimulated$mutatedReadCounts
-  totalReadCountsSimulated <- inputSimulated$totalReadCounts
-  sampleDescriptionSimulated <- inputSimulated$sample_description
-  
-  distance <- computeClusterSplits(sampleDescriptionSimulated, postSamplingSimulated, treeNameSimulated, nCellsSimulated,
-                     nMutationsSimulated, nClustersSimulated,
-                     alleleCountSimulated,
-                     mutatedReadCountsSimulated, totalReadCountsSimulated,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c("orchid", "orchid1", "orchid2",
-                                           "orchid3", "orchid4", "darkorchid",
-                                           "darkorchid1","darkorchid2", "darkorchid3",
-                                           "darkorchid4", "purple", "purple1",
-                                           "purple2", "purple3", "purple4"))
-
-  
-
-  plot(ggplot(distance$splittingProbs, aes(x = "Values", y = Splitting_probability, fill = 'Splitting_probability')) +
-    geom_boxplot())
-  cutoffsSplittingProbs <- rbind(cutoffsSplittingProbs, data.frame(clusterSize = clusterSize, Cutoff = mean(distance$splittingProbs$Splitting_probability) + 2 * sd(distance$splittingProbs$Splitting_probability) ))
-  
-  ##Note that the way the aggregatedBranchingProbabilities are computed all pairs of cells from the same cluster are
-  ## taken into account. This has the effect that clusters with more cells would be counted more often and contribute more
-  ## to the shape of the final distribution. This is no problem right now as we only aggregate counts from clusters
-  ## of the same size, it is however the potential source of a future bug!!
-  
-  plot(ggplot(data.frame(x = distance$aggregatedBranchingProbabilities), aes(x = x)) +
-    geom_histogram(binwidth = 0.01))
-  print(data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  cutoffsBranchingProbabilities <- rbind(cutoffsBranchingProbabilities, data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  })
-}
-```
-
-
-
-Get the relevant statistics for each of the clusters of a dataset and output numbers of oligoclonal clusters:
-```{r}
-nTumorClusters <- 0
-nOligoclonalClusters1 <- 0
-nOligoclonalClusters2 <- 0
-splittingSummary1 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-splittingSummary2 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-
-for(clusterSize in 2:5){
-  try({
-    clusterColor <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() == clusterSize) %>%
-    pull(color) %>%
-    unique() 
-    
-    for(color in clusterColor){
-      distance <- computeClusterSplits(sampleDescription, postSampling, treeName, nCells,
-                     nMutations, nClusters,
-                     alleleCount,
-                     mutatedReadCounts, totalReadCounts,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c(color))
-
-      splittingProbs <- mean(distance$splittingProbs$Splitting_probability)
-      branchingProbs <- mean(distance$aggregatedBranchingProbabilities)
-    
-      nTumorClusters <- nTumorClusters + 1
-      oligoclonal <- FALSE
-      print(clusterSize)
-      print(cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])
-      if(splittingProbs > (cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])){
-        nOligoclonalClusters1 <- nOligoclonalClusters1 + 1
-        oligoclonal <- TRUE
-      }
-      splittingSummary1 <- rbind(splittingSummary1, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-      oligoclonal <- FALSE
-      if(branchingProbs > cutoffsBranchingProbabilities[(cutoffsBranchingProbabilities$clusterSize == clusterSize), 2]){
-        nOligoclonalClusters2 <- nOligoclonalClusters2 + 1
-        oligoclonal <- TRUE
-      }
-      splittingSummary2 <- rbind(splittingSummary2, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-    }
-  })
-}
-
-
-numberOfCancerClusters <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() > 1) %>%
-    pull(color) %>%
-    unique() %>% length() 
-
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 1', nOligoclonalClusters1, numberOfCancerClusters, treeName))
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 2', nOligoclonalClusters2, numberOfCancerClusters, treeName))
-print(splittingSummary1)
-print(splittingSummary2)
-```
-
-
-
diff --git a/Rcode/markdowns/Br53.Rmd b/Rcode/markdowns/Br53.Rmd
deleted file mode 100644
index 59a2d09..0000000
--- a/Rcode/markdowns/Br53.Rmd
+++ /dev/null
@@ -1,181 +0,0 @@
----
-title: "Br53"
-author: "Johannes Gawron"
-date: "2024-03-07"
-output: html_document
----
-
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-```
-
-## Splitting statistics
-
-This code analyses splitting statistics for CTC-clusters.
-
-The analysis takes a list of trees sampled from its posterior distribution as input and samples mutations placements for each of the trees.
-
-
-## Configure the script
-```{r config}
-inputFolder <- "../../../input_folder"
-simulationInputFolder <- "../../../simulations/simulations2"
-treeName <- "Br53"
-nTreeSamplingEvents <- 100
-nMutationSamplingEvents <- 100
-```
-
-## Loading data
-```{r load}
-source("../functions.R")
-
-
-input <- load_data(inputFolder, treeName)
-
-postSampling <- input$postSampling
-nClusters <- input$nClusters
-ClusterID <- input$clusterID
-nCells <- input$nCells  
-nMutations <- input$nMutations
-nClusters <- input$nClusters
-alleleCount <- input$alleleCount
-mutatedReadCounts <- input$mutatedReadCounts
-totalReadCounts <- input$totalReadCounts
-sampleDescription <- input$sample_description
-```
-
-
-## Sample description 
-
-Each row corresponds to a cell.
-Column description:
-  - Cluster: An number indicating which sample the cell belongs to.
-  - ClusterName: The name of the sample in the nodeDescription.tsv file
-  - WBC: a binary vector indicating whether the cell is a white blood cell (1) or not (0).
-  - color: Indicates the color of the cluster in the tree, as described in the nodeDescription.tsv
-          file.
-  
-```{r Describe samples}
-print(sampleDescription)
-```
-
-
-
-Get null distributions of relevant statistics, stratified by sample:
-```{r}
-cutoffsSplittingProbs <- data.frame(clusterSize = vector(), Cutoff = vector())
-cutoffsBranchingProbabilities <- data.frame(clusterSize = vector(), Cutoff = vector())
-
-for (clusterSize in 2:5){
-  try(
-  {treeNameSimulated <- paste(treeName, clusterSize, sep = '_')
-
-
-  inputSimulated <- load_data(simulationInputFolder, treeNameSimulated)
-
-  postSamplingSimulated <- inputSimulated$postSampling
-  nClustersSimulated <- inputSimulated$nClusters
-  ClusterIDSimulated <- inputSimulated$clusterID
-  nCellsSimulated <- inputSimulated$nCells  
-  nMutationsSimulated <- inputSimulated$nMutations
-  nClustersSimulated <- inputSimulated$nClusters
-  alleleCountSimulated <- inputSimulated$alleleCount
-  mutatedReadCountsSimulated <- inputSimulated$mutatedReadCounts
-  totalReadCountsSimulated <- inputSimulated$totalReadCounts
-  sampleDescriptionSimulated <- inputSimulated$sample_description
-  
-  distance <- computeClusterSplits(sampleDescriptionSimulated, postSamplingSimulated, treeNameSimulated, nCellsSimulated,
-                     nMutationsSimulated, nClustersSimulated,
-                     alleleCountSimulated,
-                     mutatedReadCountsSimulated, totalReadCountsSimulated,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c("orchid", "orchid1", "orchid2",
-                                           "orchid3", "orchid4", "darkorchid",
-                                           "darkorchid1","darkorchid2", "darkorchid3",
-                                           "darkorchid4", "purple", "purple1",
-                                           "purple2", "purple3", "purple4"))
-
-  
-
-  plot(ggplot(distance$splittingProbs, aes(x = "Values", y = Splitting_probability, fill = 'Splitting_probability')) +
-    geom_boxplot())
-  cutoffsSplittingProbs <- rbind(cutoffsSplittingProbs, data.frame(clusterSize = clusterSize, Cutoff = mean(distance$splittingProbs$Splitting_probability) + 2 * sd(distance$splittingProbs$Splitting_probability) ))
-  
-  ##Note that the way the aggregatedBranchingProbabilities are computed all pairs of cells from the same cluster are
-  ## taken into account. This has the effect that clusters with more cells would be counted more often and contribute more
-  ## to the shape of the final distribution. This is no problem right now as we only aggregate counts from clusters
-  ## of the same size, it is however the potential source of a future bug!!
-  
-  plot(ggplot(data.frame(x = distance$aggregatedBranchingProbabilities), aes(x = x)) +
-    geom_histogram(binwidth = 0.01))
-  print(data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  cutoffsBranchingProbabilities <- rbind(cutoffsBranchingProbabilities, data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  })
-}
-```
-
-
-
-Get the relevant statistics for each of the clusters of a dataset and output numbers of oligoclonal clusters:
-```{r}
-nTumorClusters <- 0
-nOligoclonalClusters1 <- 0
-nOligoclonalClusters2 <- 0
-splittingSummary1 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-splittingSummary2 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-
-for(clusterSize in 2:5){
-  try({
-    clusterColor <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() == clusterSize) %>%
-    pull(color) %>%
-    unique() 
-    
-    for(color in clusterColor){
-      distance <- computeClusterSplits(sampleDescription, postSampling, treeName, nCells,
-                     nMutations, nClusters,
-                     alleleCount,
-                     mutatedReadCounts, totalReadCounts,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c(color))
-
-      splittingProbs <- mean(distance$splittingProbs$Splitting_probability)
-      branchingProbs <- mean(distance$aggregatedBranchingProbabilities)
-    
-      nTumorClusters <- nTumorClusters + 1
-      oligoclonal <- FALSE
-      print(clusterSize)
-      print(cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])
-      if(splittingProbs > (cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])){
-        nOligoclonalClusters1 <- nOligoclonalClusters1 + 1
-        oligoclonal <- TRUE
-      }
-      splittingSummary1 <- rbind(splittingSummary1, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-      oligoclonal <- FALSE
-      if(branchingProbs > cutoffsBranchingProbabilities[(cutoffsBranchingProbabilities$clusterSize == clusterSize), 2]){
-        nOligoclonalClusters2 <- nOligoclonalClusters2 + 1
-        oligoclonal <- TRUE
-      }
-      splittingSummary2 <- rbind(splittingSummary2, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-    }
-  })
-}
-
-
-numberOfCancerClusters <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() > 1) %>%
-    pull(color) %>%
-    unique() %>% length() 
-
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 1', nOligoclonalClusters1, numberOfCancerClusters, treeName))
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 2', nOligoclonalClusters2, numberOfCancerClusters, treeName))
-print(splittingSummary1)
-print(splittingSummary2)
-```
-
-
-
diff --git a/Rcode/markdowns/Br57.Rmd b/Rcode/markdowns/Br57.Rmd
deleted file mode 100644
index 6bd3fab..0000000
--- a/Rcode/markdowns/Br57.Rmd
+++ /dev/null
@@ -1,181 +0,0 @@
----
-title: "Br57"
-author: "Johannes Gawron"
-date: "2024-03-07"
-output: html_document
----
-
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-```
-
-## Splitting statistics
-
-This code analyses splitting statistics for CTC-clusters.
-
-The analysis takes a list of trees sampled from its posterior distribution as input and samples mutations placements for each of the trees.
-
-
-## Configure the script
-```{r config}
-inputFolder <- "../../../input_folder"
-simulationInputFolder <- "../../../simulations/simulations2"
-treeName <- "Br57"
-nTreeSamplingEvents <- 100
-nMutationSamplingEvents <- 100
-```
-
-## Loading data
-```{r load}
-source("../functions.R")
-
-
-input <- load_data(inputFolder, treeName)
-
-postSampling <- input$postSampling
-nClusters <- input$nClusters
-ClusterID <- input$clusterID
-nCells <- input$nCells  
-nMutations <- input$nMutations
-nClusters <- input$nClusters
-alleleCount <- input$alleleCount
-mutatedReadCounts <- input$mutatedReadCounts
-totalReadCounts <- input$totalReadCounts
-sampleDescription <- input$sample_description
-```
-
-
-## Sample description 
-
-Each row corresponds to a cell.
-Column description:
-  - Cluster: An number indicating which sample the cell belongs to.
-  - ClusterName: The name of the sample in the nodeDescription.tsv file
-  - WBC: a binary vector indicating whether the cell is a white blood cell (1) or not (0).
-  - color: Indicates the color of the cluster in the tree, as described in the nodeDescription.tsv
-          file.
-  
-```{r Describe samples}
-print(sampleDescription)
-```
-
-
-
-Get null distributions of relevant statistics, stratified by sample:
-```{r}
-cutoffsSplittingProbs <- data.frame(clusterSize = vector(), Cutoff = vector())
-cutoffsBranchingProbabilities <- data.frame(clusterSize = vector(), Cutoff = vector())
-
-for (clusterSize in 2:5){
-  try(
-  {treeNameSimulated <- paste(treeName, clusterSize, sep = '_')
-
-
-  inputSimulated <- load_data(simulationInputFolder, treeNameSimulated)
-
-  postSamplingSimulated <- inputSimulated$postSampling
-  nClustersSimulated <- inputSimulated$nClusters
-  ClusterIDSimulated <- inputSimulated$clusterID
-  nCellsSimulated <- inputSimulated$nCells  
-  nMutationsSimulated <- inputSimulated$nMutations
-  nClustersSimulated <- inputSimulated$nClusters
-  alleleCountSimulated <- inputSimulated$alleleCount
-  mutatedReadCountsSimulated <- inputSimulated$mutatedReadCounts
-  totalReadCountsSimulated <- inputSimulated$totalReadCounts
-  sampleDescriptionSimulated <- inputSimulated$sample_description
-  
-  distance <- computeClusterSplits(sampleDescriptionSimulated, postSamplingSimulated, treeNameSimulated, nCellsSimulated,
-                     nMutationsSimulated, nClustersSimulated,
-                     alleleCountSimulated,
-                     mutatedReadCountsSimulated, totalReadCountsSimulated,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c("orchid", "orchid1", "orchid2",
-                                           "orchid3", "orchid4", "darkorchid",
-                                           "darkorchid1","darkorchid2", "darkorchid3",
-                                           "darkorchid4", "purple", "purple1",
-                                           "purple2", "purple3", "purple4"))
-
-  
-
-  plot(ggplot(distance$splittingProbs, aes(x = "Values", y = Splitting_probability, fill = 'Splitting_probability')) +
-    geom_boxplot())
-  cutoffsSplittingProbs <- rbind(cutoffsSplittingProbs, data.frame(clusterSize = clusterSize, Cutoff = mean(distance$splittingProbs$Splitting_probability) + 2 * sd(distance$splittingProbs$Splitting_probability) ))
-  
-  ##Note that the way the aggregatedBranchingProbabilities are computed all pairs of cells from the same cluster are
-  ## taken into account. This has the effect that clusters with more cells would be counted more often and contribute more
-  ## to the shape of the final distribution. This is no problem right now as we only aggregate counts from clusters
-  ## of the same size, it is however the potential source of a future bug!!
-  
-  plot(ggplot(data.frame(x = distance$aggregatedBranchingProbabilities), aes(x = x)) +
-    geom_histogram(binwidth = 0.01))
-  print(data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  cutoffsBranchingProbabilities <- rbind(cutoffsBranchingProbabilities, data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  })
-}
-```
-
-
-
-Get the relevant statistics for each of the clusters of a dataset and output numbers of oligoclonal clusters:
-```{r}
-nTumorClusters <- 0
-nOligoclonalClusters1 <- 0
-nOligoclonalClusters2 <- 0
-splittingSummary1 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-splittingSummary2 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-
-for(clusterSize in 2:5){
-  try({
-    clusterColor <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() == clusterSize) %>%
-    pull(color) %>%
-    unique() 
-    
-    for(color in clusterColor){
-      distance <- computeClusterSplits(sampleDescription, postSampling, treeName, nCells,
-                     nMutations, nClusters,
-                     alleleCount,
-                     mutatedReadCounts, totalReadCounts,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c(color))
-
-      splittingProbs <- mean(distance$splittingProbs$Splitting_probability)
-      branchingProbs <- mean(distance$aggregatedBranchingProbabilities)
-    
-      nTumorClusters <- nTumorClusters + 1
-      oligoclonal <- FALSE
-      print(clusterSize)
-      print(cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])
-      if(splittingProbs > (cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])){
-        nOligoclonalClusters1 <- nOligoclonalClusters1 + 1
-        oligoclonal <- TRUE
-      }
-      splittingSummary1 <- rbind(splittingSummary1, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-      oligoclonal <- FALSE
-      if(branchingProbs > cutoffsBranchingProbabilities[(cutoffsBranchingProbabilities$clusterSize == clusterSize), 2]){
-        nOligoclonalClusters2 <- nOligoclonalClusters2 + 1
-        oligoclonal <- TRUE
-      }
-      splittingSummary2 <- rbind(splittingSummary2, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-    }
-  })
-}
-
-
-numberOfCancerClusters <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() > 1) %>%
-    pull(color) %>%
-    unique() %>% length() 
-
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 1', nOligoclonalClusters1, numberOfCancerClusters, treeName))
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 2', nOligoclonalClusters2, numberOfCancerClusters, treeName))
-print(splittingSummary1)
-print(splittingSummary2)
-```
-
-
-
diff --git a/Rcode/markdowns/Br61.Rmd b/Rcode/markdowns/Br61.Rmd
deleted file mode 100644
index f91766d..0000000
--- a/Rcode/markdowns/Br61.Rmd
+++ /dev/null
@@ -1,181 +0,0 @@
----
-title: "Br61"
-author: "Johannes Gawron"
-date: "2024-03-07"
-output: html_document
----
-
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-```
-
-## Splitting statistics
-
-This code analyses splitting statistics for CTC-clusters.
-
-The analysis takes a list of trees sampled from its posterior distribution as input and samples mutations placements for each of the trees.
-
-
-## Configure the script
-```{r config}
-inputFolder <- "../../../input_folder"
-simulationInputFolder <- "../../../simulations/simulations2"
-treeName <- "Br61"
-nTreeSamplingEvents <- 100
-nMutationSamplingEvents <- 100
-```
-
-## Loading data
-```{r load}
-source("../functions.R")
-
-
-input <- load_data(inputFolder, treeName)
-
-postSampling <- input$postSampling
-nClusters <- input$nClusters
-ClusterID <- input$clusterID
-nCells <- input$nCells  
-nMutations <- input$nMutations
-nClusters <- input$nClusters
-alleleCount <- input$alleleCount
-mutatedReadCounts <- input$mutatedReadCounts
-totalReadCounts <- input$totalReadCounts
-sampleDescription <- input$sample_description
-```
-
-
-## Sample description 
-
-Each row corresponds to a cell.
-Column description:
-  - Cluster: An number indicating which sample the cell belongs to.
-  - ClusterName: The name of the sample in the nodeDescription.tsv file
-  - WBC: a binary vector indicating whether the cell is a white blood cell (1) or not (0).
-  - color: Indicates the color of the cluster in the tree, as described in the nodeDescription.tsv
-          file.
-  
-```{r Describe samples}
-print(sampleDescription)
-```
-
-
-
-Get null distributions of relevant statistics, stratified by sample:
-```{r}
-cutoffsSplittingProbs <- data.frame(clusterSize = vector(), Cutoff = vector())
-cutoffsBranchingProbabilities <- data.frame(clusterSize = vector(), Cutoff = vector())
-
-for (clusterSize in 2:5){
-  try(
-  {treeNameSimulated <- paste(treeName, clusterSize, sep = '_')
-
-
-  inputSimulated <- load_data(simulationInputFolder, treeNameSimulated)
-
-  postSamplingSimulated <- inputSimulated$postSampling
-  nClustersSimulated <- inputSimulated$nClusters
-  ClusterIDSimulated <- inputSimulated$clusterID
-  nCellsSimulated <- inputSimulated$nCells  
-  nMutationsSimulated <- inputSimulated$nMutations
-  nClustersSimulated <- inputSimulated$nClusters
-  alleleCountSimulated <- inputSimulated$alleleCount
-  mutatedReadCountsSimulated <- inputSimulated$mutatedReadCounts
-  totalReadCountsSimulated <- inputSimulated$totalReadCounts
-  sampleDescriptionSimulated <- inputSimulated$sample_description
-  
-  distance <- computeClusterSplits(sampleDescriptionSimulated, postSamplingSimulated, treeNameSimulated, nCellsSimulated,
-                     nMutationsSimulated, nClustersSimulated,
-                     alleleCountSimulated,
-                     mutatedReadCountsSimulated, totalReadCountsSimulated,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c("orchid", "orchid1", "orchid2",
-                                           "orchid3", "orchid4", "darkorchid",
-                                           "darkorchid1","darkorchid2", "darkorchid3",
-                                           "darkorchid4", "purple", "purple1",
-                                           "purple2", "purple3", "purple4"))
-
-  
-
-  plot(ggplot(distance$splittingProbs, aes(x = "Values", y = Splitting_probability, fill = 'Splitting_probability')) +
-    geom_boxplot())
-  cutoffsSplittingProbs <- rbind(cutoffsSplittingProbs, data.frame(clusterSize = clusterSize, Cutoff = mean(distance$splittingProbs$Splitting_probability) + 2 * sd(distance$splittingProbs$Splitting_probability) ))
-  
-  ##Note that the way the aggregatedBranchingProbabilities are computed all pairs of cells from the same cluster are
-  ## taken into account. This has the effect that clusters with more cells would be counted more often and contribute more
-  ## to the shape of the final distribution. This is no problem right now as we only aggregate counts from clusters
-  ## of the same size, it is however the potential source of a future bug!!
-  
-  plot(ggplot(data.frame(x = distance$aggregatedBranchingProbabilities), aes(x = x)) +
-    geom_histogram(binwidth = 0.01))
-  print(data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  cutoffsBranchingProbabilities <- rbind(cutoffsBranchingProbabilities, data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  })
-}
-```
-
-
-
-Get the relevant statistics for each of the clusters of a dataset and output numbers of oligoclonal clusters:
-```{r}
-nTumorClusters <- 0
-nOligoclonalClusters1 <- 0
-nOligoclonalClusters2 <- 0
-splittingSummary1 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-splittingSummary2 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-
-for(clusterSize in 2:5){
-  try({
-    clusterColor <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() == clusterSize) %>%
-    pull(color) %>%
-    unique() 
-    
-    for(color in clusterColor){
-      distance <- computeClusterSplits(sampleDescription, postSampling, treeName, nCells,
-                     nMutations, nClusters,
-                     alleleCount,
-                     mutatedReadCounts, totalReadCounts,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c(color))
-
-      splittingProbs <- mean(distance$splittingProbs$Splitting_probability)
-      branchingProbs <- mean(distance$aggregatedBranchingProbabilities)
-    
-      nTumorClusters <- nTumorClusters + 1
-      oligoclonal <- FALSE
-      print(clusterSize)
-      print(cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])
-      if(splittingProbs > (cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])){
-        nOligoclonalClusters1 <- nOligoclonalClusters1 + 1
-        oligoclonal <- TRUE
-      }
-      splittingSummary1 <- rbind(splittingSummary1, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-      oligoclonal <- FALSE
-      if(branchingProbs > cutoffsBranchingProbabilities[(cutoffsBranchingProbabilities$clusterSize == clusterSize), 2]){
-        nOligoclonalClusters2 <- nOligoclonalClusters2 + 1
-        oligoclonal <- TRUE
-      }
-      splittingSummary2 <- rbind(splittingSummary2, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-    }
-  })
-}
-
-
-numberOfCancerClusters <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() > 1) %>%
-    pull(color) %>%
-    unique() %>% length() 
-
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 1', nOligoclonalClusters1, numberOfCancerClusters, treeName))
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 2', nOligoclonalClusters2, numberOfCancerClusters, treeName))
-print(splittingSummary1)
-print(splittingSummary2)
-```
-
-
-
diff --git a/Rcode/markdowns/Br7.Rmd b/Rcode/markdowns/Br7.Rmd
deleted file mode 100644
index 9fcaa9c..0000000
--- a/Rcode/markdowns/Br7.Rmd
+++ /dev/null
@@ -1,181 +0,0 @@
----
-title: "Br7"
-author: "Johannes Gawron"
-date: "2024-03-07"
-output: html_document
----
-
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-```
-
-## Splitting statistics
-
-This code analyses splitting statistics for CTC-clusters.
-
-The analysis takes a list of trees sampled from its posterior distribution as input and samples mutations placements for each of the trees.
-
-
-## Configure the script
-```{r config}
-inputFolder <- "../../../input_folder"
-simulationInputFolder <- "../../../simulations/simulations2"
-treeName <- "Br7"
-nTreeSamplingEvents <- 100
-nMutationSamplingEvents <- 100
-```
-
-## Loading data
-```{r load}
-source("../functions.R")
-
-
-input <- load_data(inputFolder, treeName)
-
-postSampling <- input$postSampling
-nClusters <- input$nClusters
-ClusterID <- input$clusterID
-nCells <- input$nCells  
-nMutations <- input$nMutations
-nClusters <- input$nClusters
-alleleCount <- input$alleleCount
-mutatedReadCounts <- input$mutatedReadCounts
-totalReadCounts <- input$totalReadCounts
-sampleDescription <- input$sample_description
-```
-
-
-## Sample description 
-
-Each row corresponds to a cell.
-Column description:
-  - Cluster: An number indicating which sample the cell belongs to.
-  - ClusterName: The name of the sample in the nodeDescription.tsv file
-  - WBC: a binary vector indicating whether the cell is a white blood cell (1) or not (0).
-  - color: Indicates the color of the cluster in the tree, as described in the nodeDescription.tsv
-          file.
-  
-```{r Describe samples}
-print(sampleDescription)
-```
-
-
-
-Get null distributions of relevant statistics, stratified by sample:
-```{r}
-cutoffsSplittingProbs <- data.frame(clusterSize = vector(), Cutoff = vector())
-cutoffsBranchingProbabilities <- data.frame(clusterSize = vector(), Cutoff = vector())
-
-for (clusterSize in 2:5){
-  try(
-  {treeNameSimulated <- paste(treeName, clusterSize, sep = '_')
-
-
-  inputSimulated <- load_data(simulationInputFolder, treeNameSimulated)
-
-  postSamplingSimulated <- inputSimulated$postSampling
-  nClustersSimulated <- inputSimulated$nClusters
-  ClusterIDSimulated <- inputSimulated$clusterID
-  nCellsSimulated <- inputSimulated$nCells  
-  nMutationsSimulated <- inputSimulated$nMutations
-  nClustersSimulated <- inputSimulated$nClusters
-  alleleCountSimulated <- inputSimulated$alleleCount
-  mutatedReadCountsSimulated <- inputSimulated$mutatedReadCounts
-  totalReadCountsSimulated <- inputSimulated$totalReadCounts
-  sampleDescriptionSimulated <- inputSimulated$sample_description
-  
-  distance <- computeClusterSplits(sampleDescriptionSimulated, postSamplingSimulated, treeNameSimulated, nCellsSimulated,
-                     nMutationsSimulated, nClustersSimulated,
-                     alleleCountSimulated,
-                     mutatedReadCountsSimulated, totalReadCountsSimulated,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c("orchid", "orchid1", "orchid2",
-                                           "orchid3", "orchid4", "darkorchid",
-                                           "darkorchid1","darkorchid2", "darkorchid3",
-                                           "darkorchid4", "purple", "purple1",
-                                           "purple2", "purple3", "purple4"))
-
-  
-
-  plot(ggplot(distance$splittingProbs, aes(x = "Values", y = Splitting_probability, fill = 'Splitting_probability')) +
-    geom_boxplot())
-  cutoffsSplittingProbs <- rbind(cutoffsSplittingProbs, data.frame(clusterSize = clusterSize, Cutoff = mean(distance$splittingProbs$Splitting_probability) + 2 * sd(distance$splittingProbs$Splitting_probability) ))
-  
-  ##Note that the way the aggregatedBranchingProbabilities are computed all pairs of cells from the same cluster are
-  ## taken into account. This has the effect that clusters with more cells would be counted more often and contribute more
-  ## to the shape of the final distribution. This is no problem right now as we only aggregate counts from clusters
-  ## of the same size, it is however the potential source of a future bug!!
-  
-  plot(ggplot(data.frame(x = distance$aggregatedBranchingProbabilities), aes(x = x)) +
-    geom_histogram(binwidth = 0.01))
-  print(data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  cutoffsBranchingProbabilities <- rbind(cutoffsBranchingProbabilities, data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  })
-}
-```
-
-
-
-Get the relevant statistics for each of the clusters of a dataset and output numbers of oligoclonal clusters:
-```{r}
-nTumorClusters <- 0
-nOligoclonalClusters1 <- 0
-nOligoclonalClusters2 <- 0
-splittingSummary1 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-splittingSummary2 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-
-for(clusterSize in 2:5){
-  try({
-    clusterColor <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() == clusterSize) %>%
-    pull(color) %>%
-    unique() 
-    
-    for(color in clusterColor){
-      distance <- computeClusterSplits(sampleDescription, postSampling, treeName, nCells,
-                     nMutations, nClusters,
-                     alleleCount,
-                     mutatedReadCounts, totalReadCounts,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c(color))
-
-      splittingProbs <- mean(distance$splittingProbs$Splitting_probability)
-      branchingProbs <- mean(distance$aggregatedBranchingProbabilities)
-    
-      nTumorClusters <- nTumorClusters + 1
-      oligoclonal <- FALSE
-      print(clusterSize)
-      print(cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])
-      if(splittingProbs > (cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])){
-        nOligoclonalClusters1 <- nOligoclonalClusters1 + 1
-        oligoclonal <- TRUE
-      }
-      splittingSummary1 <- rbind(splittingSummary1, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-      oligoclonal <- FALSE
-      if(branchingProbs > cutoffsBranchingProbabilities[(cutoffsBranchingProbabilities$clusterSize == clusterSize), 2]){
-        nOligoclonalClusters2 <- nOligoclonalClusters2 + 1
-        oligoclonal <- TRUE
-      }
-      splittingSummary2 <- rbind(splittingSummary2, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-    }
-  })
-}
-
-
-numberOfCancerClusters <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() > 1) %>%
-    pull(color) %>%
-    unique() %>% length() 
-
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 1', nOligoclonalClusters1, numberOfCancerClusters, treeName))
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 2', nOligoclonalClusters2, numberOfCancerClusters, treeName))
-print(splittingSummary1)
-print(splittingSummary2)
-```
-
-
-
diff --git a/Rcode/markdowns/Brx50.Rmd b/Rcode/markdowns/Brx50.Rmd
deleted file mode 100644
index 625c68f..0000000
--- a/Rcode/markdowns/Brx50.Rmd
+++ /dev/null
@@ -1,181 +0,0 @@
----
-title: "Brx50"
-author: "Johannes Gawron"
-date: "2024-03-07"
-output: html_document
----
-
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-```
-
-## Splitting statistics
-
-This code analyses splitting statistics for CTC-clusters.
-
-The analysis takes a list of trees sampled from its posterior distribution as input and samples mutations placements for each of the trees.
-
-
-## Configure the script
-```{r config}
-inputFolder <- "../../../input_folder"
-simulationInputFolder <- "../../../simulations/simulations2"
-treeName <- "Brx50"
-nTreeSamplingEvents <- 100
-nMutationSamplingEvents <- 100
-```
-
-## Loading data
-```{r load}
-source("../functions.R")
-
-
-input <- load_data(inputFolder, treeName)
-
-postSampling <- input$postSampling
-nClusters <- input$nClusters
-ClusterID <- input$clusterID
-nCells <- input$nCells  
-nMutations <- input$nMutations
-nClusters <- input$nClusters
-alleleCount <- input$alleleCount
-mutatedReadCounts <- input$mutatedReadCounts
-totalReadCounts <- input$totalReadCounts
-sampleDescription <- input$sample_description
-```
-
-
-## Sample description 
-
-Each row corresponds to a cell.
-Column description:
-  - Cluster: An number indicating which sample the cell belongs to.
-  - ClusterName: The name of the sample in the nodeDescription.tsv file
-  - WBC: a binary vector indicating whether the cell is a white blood cell (1) or not (0).
-  - color: Indicates the color of the cluster in the tree, as described in the nodeDescription.tsv
-          file.
-  
-```{r Describe samples}
-print(sampleDescription)
-```
-
-
-
-Get null distributions of relevant statistics, stratified by sample:
-```{r}
-cutoffsSplittingProbs <- data.frame(clusterSize = vector(), Cutoff = vector())
-cutoffsBranchingProbabilities <- data.frame(clusterSize = vector(), Cutoff = vector())
-
-for (clusterSize in 2:5){
-  try(
-  {treeNameSimulated <- paste(treeName, clusterSize, sep = '_')
-
-
-  inputSimulated <- load_data(simulationInputFolder, treeNameSimulated)
-
-  postSamplingSimulated <- inputSimulated$postSampling
-  nClustersSimulated <- inputSimulated$nClusters
-  ClusterIDSimulated <- inputSimulated$clusterID
-  nCellsSimulated <- inputSimulated$nCells  
-  nMutationsSimulated <- inputSimulated$nMutations
-  nClustersSimulated <- inputSimulated$nClusters
-  alleleCountSimulated <- inputSimulated$alleleCount
-  mutatedReadCountsSimulated <- inputSimulated$mutatedReadCounts
-  totalReadCountsSimulated <- inputSimulated$totalReadCounts
-  sampleDescriptionSimulated <- inputSimulated$sample_description
-  
-  distance <- computeClusterSplits(sampleDescriptionSimulated, postSamplingSimulated, treeNameSimulated, nCellsSimulated,
-                     nMutationsSimulated, nClustersSimulated,
-                     alleleCountSimulated,
-                     mutatedReadCountsSimulated, totalReadCountsSimulated,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c("orchid", "orchid1", "orchid2",
-                                           "orchid3", "orchid4", "darkorchid",
-                                           "darkorchid1","darkorchid2", "darkorchid3",
-                                           "darkorchid4", "purple", "purple1",
-                                           "purple2", "purple3", "purple4"))
-
-  
-
-  plot(ggplot(distance$splittingProbs, aes(x = "Values", y = Splitting_probability, fill = 'Splitting_probability')) +
-    geom_boxplot())
-  cutoffsSplittingProbs <- rbind(cutoffsSplittingProbs, data.frame(clusterSize = clusterSize, Cutoff = mean(distance$splittingProbs$Splitting_probability) + 2 * sd(distance$splittingProbs$Splitting_probability) ))
-  
-  ##Note that the way the aggregatedBranchingProbabilities are computed all pairs of cells from the same cluster are
-  ## taken into account. This has the effect that clusters with more cells would be counted more often and contribute more
-  ## to the shape of the final distribution. This is no problem right now as we only aggregate counts from clusters
-  ## of the same size, it is however the potential source of a future bug!!
-  
-  plot(ggplot(data.frame(x = distance$aggregatedBranchingProbabilities), aes(x = x)) +
-    geom_histogram(binwidth = 0.01))
-  print(data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  cutoffsBranchingProbabilities <- rbind(cutoffsBranchingProbabilities, data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  })
-}
-```
-
-
-
-Get the relevant statistics for each of the clusters of a dataset and output numbers of oligoclonal clusters:
-```{r}
-nTumorClusters <- 0
-nOligoclonalClusters1 <- 0
-nOligoclonalClusters2 <- 0
-splittingSummary1 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-splittingSummary2 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-
-for(clusterSize in 2:5){
-  try({
-    clusterColor <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() == clusterSize) %>%
-    pull(color) %>%
-    unique() 
-    
-    for(color in clusterColor){
-      distance <- computeClusterSplits(sampleDescription, postSampling, treeName, nCells,
-                     nMutations, nClusters,
-                     alleleCount,
-                     mutatedReadCounts, totalReadCounts,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c(color))
-
-      splittingProbs <- mean(distance$splittingProbs$Splitting_probability)
-      branchingProbs <- mean(distance$aggregatedBranchingProbabilities)
-    
-      nTumorClusters <- nTumorClusters + 1
-      oligoclonal <- FALSE
-      print(clusterSize)
-      print(cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])
-      if(splittingProbs > (cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])){
-        nOligoclonalClusters1 <- nOligoclonalClusters1 + 1
-        oligoclonal <- TRUE
-      }
-      splittingSummary1 <- rbind(splittingSummary1, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-      oligoclonal <- FALSE
-      if(branchingProbs > cutoffsBranchingProbabilities[(cutoffsBranchingProbabilities$clusterSize == clusterSize), 2]){
-        nOligoclonalClusters2 <- nOligoclonalClusters2 + 1
-        oligoclonal <- TRUE
-      }
-      splittingSummary2 <- rbind(splittingSummary2, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-    }
-  })
-}
-
-
-numberOfCancerClusters <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() > 1) %>%
-    pull(color) %>%
-    unique() %>% length() 
-
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 1', nOligoclonalClusters1, numberOfCancerClusters, treeName))
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 2', nOligoclonalClusters2, numberOfCancerClusters, treeName))
-print(splittingSummary1)
-print(splittingSummary2)
-```
-
-
-
diff --git a/Rcode/markdowns/LM2.Rmd b/Rcode/markdowns/LM2.Rmd
deleted file mode 100644
index 1c3842a..0000000
--- a/Rcode/markdowns/LM2.Rmd
+++ /dev/null
@@ -1,181 +0,0 @@
----
-title: "LM2"
-author: "Johannes Gawron"
-date: "2024-03-07"
-output: html_document
----
-
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-```
-
-## Splitting statistics
-
-This code analyses splitting statistics for CTC-clusters.
-
-The analysis takes a list of trees sampled from its posterior distribution as input and samples mutations placements for each of the trees.
-
-
-## Configure the script
-```{r config}
-inputFolder <- "../../../input_folder"
-simulationInputFolder <- "../../../simulations/simulations2"
-treeName <- "LM2"
-nTreeSamplingEvents <- 100
-nMutationSamplingEvents <- 100
-```
-
-## Loading data
-```{r load}
-source("../functions.R")
-
-
-input <- load_data(inputFolder, treeName)
-
-postSampling <- input$postSampling
-nClusters <- input$nClusters
-ClusterID <- input$clusterID
-nCells <- input$nCells  
-nMutations <- input$nMutations
-nClusters <- input$nClusters
-alleleCount <- input$alleleCount
-mutatedReadCounts <- input$mutatedReadCounts
-totalReadCounts <- input$totalReadCounts
-sampleDescription <- input$sample_description
-```
-
-
-## Sample description 
-
-Each row corresponds to a cell.
-Column description:
-  - Cluster: An number indicating which sample the cell belongs to.
-  - ClusterName: The name of the sample in the nodeDescription.tsv file
-  - WBC: a binary vector indicating whether the cell is a white blood cell (1) or not (0).
-  - color: Indicates the color of the cluster in the tree, as described in the nodeDescription.tsv
-          file.
-  
-```{r Describe samples}
-print(sampleDescription)
-```
-
-
-
-Get null distributions of relevant statistics, stratified by sample:
-```{r}
-cutoffsSplittingProbs <- data.frame(clusterSize = vector(), Cutoff = vector())
-cutoffsBranchingProbabilities <- data.frame(clusterSize = vector(), Cutoff = vector())
-
-for (clusterSize in 2:5){
-  try(
-  {treeNameSimulated <- paste(treeName, clusterSize, sep = '_')
-
-
-  inputSimulated <- load_data(simulationInputFolder, treeNameSimulated)
-
-  postSamplingSimulated <- inputSimulated$postSampling
-  nClustersSimulated <- inputSimulated$nClusters
-  ClusterIDSimulated <- inputSimulated$clusterID
-  nCellsSimulated <- inputSimulated$nCells  
-  nMutationsSimulated <- inputSimulated$nMutations
-  nClustersSimulated <- inputSimulated$nClusters
-  alleleCountSimulated <- inputSimulated$alleleCount
-  mutatedReadCountsSimulated <- inputSimulated$mutatedReadCounts
-  totalReadCountsSimulated <- inputSimulated$totalReadCounts
-  sampleDescriptionSimulated <- inputSimulated$sample_description
-  
-  distance <- computeClusterSplits(sampleDescriptionSimulated, postSamplingSimulated, treeNameSimulated, nCellsSimulated,
-                     nMutationsSimulated, nClustersSimulated,
-                     alleleCountSimulated,
-                     mutatedReadCountsSimulated, totalReadCountsSimulated,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c("orchid", "orchid1", "orchid2",
-                                           "orchid3", "orchid4", "darkorchid",
-                                           "darkorchid1","darkorchid2", "darkorchid3",
-                                           "darkorchid4", "purple", "purple1",
-                                           "purple2", "purple3", "purple4"))
-
-  
-
-  plot(ggplot(distance$splittingProbs, aes(x = "Values", y = Splitting_probability, fill = 'Splitting_probability')) +
-    geom_boxplot())
-  cutoffsSplittingProbs <- rbind(cutoffsSplittingProbs, data.frame(clusterSize = clusterSize, Cutoff = mean(distance$splittingProbs$Splitting_probability) + 2 * sd(distance$splittingProbs$Splitting_probability) ))
-  
-  ##Note that the way the aggregatedBranchingProbabilities are computed all pairs of cells from the same cluster are
-  ## taken into account. This has the effect that clusters with more cells would be counted more often and contribute more
-  ## to the shape of the final distribution. This is no problem right now as we only aggregate counts from clusters
-  ## of the same size, it is however the potential source of a future bug!!
-  
-  plot(ggplot(data.frame(x = distance$aggregatedBranchingProbabilities), aes(x = x)) +
-    geom_histogram(binwidth = 0.01))
-  print(data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  cutoffsBranchingProbabilities <- rbind(cutoffsBranchingProbabilities, data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  })
-}
-```
-
-
-
-Get the relevant statistics for each of the clusters of a dataset and output numbers of oligoclonal clusters:
-```{r}
-nTumorClusters <- 0
-nOligoclonalClusters1 <- 0
-nOligoclonalClusters2 <- 0
-splittingSummary1 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-splittingSummary2 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-
-for(clusterSize in 2:5){
-  try({
-    clusterColor <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() == clusterSize) %>%
-    pull(color) %>%
-    unique() 
-    
-    for(color in clusterColor){
-      distance <- computeClusterSplits(sampleDescription, postSampling, treeName, nCells,
-                     nMutations, nClusters,
-                     alleleCount,
-                     mutatedReadCounts, totalReadCounts,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c(color))
-
-      splittingProbs <- mean(distance$splittingProbs$Splitting_probability)
-      branchingProbs <- mean(distance$aggregatedBranchingProbabilities)
-    
-      nTumorClusters <- nTumorClusters + 1
-      oligoclonal <- FALSE
-      print(clusterSize)
-      print(cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])
-      if(splittingProbs > (cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])){
-        nOligoclonalClusters1 <- nOligoclonalClusters1 + 1
-        oligoclonal <- TRUE
-      }
-      splittingSummary1 <- rbind(splittingSummary1, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-      oligoclonal <- FALSE
-      if(branchingProbs > cutoffsBranchingProbabilities[(cutoffsBranchingProbabilities$clusterSize == clusterSize), 2]){
-        nOligoclonalClusters2 <- nOligoclonalClusters2 + 1
-        oligoclonal <- TRUE
-      }
-      splittingSummary2 <- rbind(splittingSummary2, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-    }
-  })
-}
-
-
-numberOfCancerClusters <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() > 1) %>%
-    pull(color) %>%
-    unique() %>% length() 
-
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 1', nOligoclonalClusters1, numberOfCancerClusters, treeName))
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 2', nOligoclonalClusters2, numberOfCancerClusters, treeName))
-print(splittingSummary1)
-print(splittingSummary2)
-```
-
-
-
diff --git a/Rcode/markdowns/Lu2.Rmd b/Rcode/markdowns/Lu2.Rmd
deleted file mode 100644
index 5949e05..0000000
--- a/Rcode/markdowns/Lu2.Rmd
+++ /dev/null
@@ -1,181 +0,0 @@
----
-title: "Lu2"
-author: "Johannes Gawron"
-date: "2024-03-07"
-output: html_document
----
-
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-```
-
-## Splitting statistics
-
-This code analyses splitting statistics for CTC-clusters.
-
-The analysis takes a list of trees sampled from its posterior distribution as input and samples mutations placements for each of the trees.
-
-
-## Configure the script
-```{r config}
-inputFolder <- "../../../input_folder"
-simulationInputFolder <- "../../../simulations/simulations2"
-treeName <- "Lu2"
-nTreeSamplingEvents <- 100
-nMutationSamplingEvents <- 100
-```
-
-## Loading data
-```{r load}
-source("../functions.R")
-
-
-input <- load_data(inputFolder, treeName)
-
-postSampling <- input$postSampling
-nClusters <- input$nClusters
-ClusterID <- input$clusterID
-nCells <- input$nCells  
-nMutations <- input$nMutations
-nClusters <- input$nClusters
-alleleCount <- input$alleleCount
-mutatedReadCounts <- input$mutatedReadCounts
-totalReadCounts <- input$totalReadCounts
-sampleDescription <- input$sample_description
-```
-
-
-## Sample description 
-
-Each row corresponds to a cell.
-Column description:
-  - Cluster: An number indicating which sample the cell belongs to.
-  - ClusterName: The name of the sample in the nodeDescription.tsv file
-  - WBC: a binary vector indicating whether the cell is a white blood cell (1) or not (0).
-  - color: Indicates the color of the cluster in the tree, as described in the nodeDescription.tsv
-          file.
-  
-```{r Describe samples}
-print(sampleDescription)
-```
-
-
-
-Get null distributions of relevant statistics, stratified by sample:
-```{r}
-cutoffsSplittingProbs <- data.frame(clusterSize = vector(), Cutoff = vector())
-cutoffsBranchingProbabilities <- data.frame(clusterSize = vector(), Cutoff = vector())
-
-for (clusterSize in 2:5){
-  try(
-  {treeNameSimulated <- paste(treeName, clusterSize, sep = '_')
-
-
-  inputSimulated <- load_data(simulationInputFolder, treeNameSimulated)
-
-  postSamplingSimulated <- inputSimulated$postSampling
-  nClustersSimulated <- inputSimulated$nClusters
-  ClusterIDSimulated <- inputSimulated$clusterID
-  nCellsSimulated <- inputSimulated$nCells  
-  nMutationsSimulated <- inputSimulated$nMutations
-  nClustersSimulated <- inputSimulated$nClusters
-  alleleCountSimulated <- inputSimulated$alleleCount
-  mutatedReadCountsSimulated <- inputSimulated$mutatedReadCounts
-  totalReadCountsSimulated <- inputSimulated$totalReadCounts
-  sampleDescriptionSimulated <- inputSimulated$sample_description
-  
-  distance <- computeClusterSplits(sampleDescriptionSimulated, postSamplingSimulated, treeNameSimulated, nCellsSimulated,
-                     nMutationsSimulated, nClustersSimulated,
-                     alleleCountSimulated,
-                     mutatedReadCountsSimulated, totalReadCountsSimulated,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c("orchid", "orchid1", "orchid2",
-                                           "orchid3", "orchid4", "darkorchid",
-                                           "darkorchid1","darkorchid2", "darkorchid3",
-                                           "darkorchid4", "purple", "purple1",
-                                           "purple2", "purple3", "purple4"))
-
-  
-
-  plot(ggplot(distance$splittingProbs, aes(x = "Values", y = Splitting_probability, fill = 'Splitting_probability')) +
-    geom_boxplot())
-  cutoffsSplittingProbs <- rbind(cutoffsSplittingProbs, data.frame(clusterSize = clusterSize, Cutoff = mean(distance$splittingProbs$Splitting_probability) + 2 * sd(distance$splittingProbs$Splitting_probability) ))
-  
-  ##Note that the way the aggregatedBranchingProbabilities are computed all pairs of cells from the same cluster are
-  ## taken into account. This has the effect that clusters with more cells would be counted more often and contribute more
-  ## to the shape of the final distribution. This is no problem right now as we only aggregate counts from clusters
-  ## of the same size, it is however the potential source of a future bug!!
-  
-  plot(ggplot(data.frame(x = distance$aggregatedBranchingProbabilities), aes(x = x)) +
-    geom_histogram(binwidth = 0.01))
-  print(data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  cutoffsBranchingProbabilities <- rbind(cutoffsBranchingProbabilities, data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  })
-}
-```
-
-
-
-Get the relevant statistics for each of the clusters of a dataset and output numbers of oligoclonal clusters:
-```{r}
-nTumorClusters <- 0
-nOligoclonalClusters1 <- 0
-nOligoclonalClusters2 <- 0
-splittingSummary1 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-splittingSummary2 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-
-for(clusterSize in 2:5){
-  try({
-    clusterColor <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() == clusterSize) %>%
-    pull(color) %>%
-    unique() 
-    
-    for(color in clusterColor){
-      distance <- computeClusterSplits(sampleDescription, postSampling, treeName, nCells,
-                     nMutations, nClusters,
-                     alleleCount,
-                     mutatedReadCounts, totalReadCounts,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c(color))
-
-      splittingProbs <- mean(distance$splittingProbs$Splitting_probability)
-      branchingProbs <- mean(distance$aggregatedBranchingProbabilities)
-    
-      nTumorClusters <- nTumorClusters + 1
-      oligoclonal <- FALSE
-      print(clusterSize)
-      print(cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])
-      if(splittingProbs > (cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])){
-        nOligoclonalClusters1 <- nOligoclonalClusters1 + 1
-        oligoclonal <- TRUE
-      }
-      splittingSummary1 <- rbind(splittingSummary1, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-      oligoclonal <- FALSE
-      if(branchingProbs > cutoffsBranchingProbabilities[(cutoffsBranchingProbabilities$clusterSize == clusterSize), 2]){
-        nOligoclonalClusters2 <- nOligoclonalClusters2 + 1
-        oligoclonal <- TRUE
-      }
-      splittingSummary2 <- rbind(splittingSummary2, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-    }
-  })
-}
-
-
-numberOfCancerClusters <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() > 1) %>%
-    pull(color) %>%
-    unique() %>% length() 
-
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 1', nOligoclonalClusters1, numberOfCancerClusters, treeName))
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 2', nOligoclonalClusters2, numberOfCancerClusters, treeName))
-print(splittingSummary1)
-print(splittingSummary2)
-```
-
-
-
diff --git a/Rcode/markdowns/Lu7.Rmd b/Rcode/markdowns/Lu7.Rmd
deleted file mode 100644
index b8940f1..0000000
--- a/Rcode/markdowns/Lu7.Rmd
+++ /dev/null
@@ -1,181 +0,0 @@
----
-title: "Lu7"
-author: "Johannes Gawron"
-date: "2024-03-07"
-output: html_document
----
-
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-```
-
-## Splitting statistics
-
-This code analyses splitting statistics for CTC-clusters.
-
-The analysis takes a list of trees sampled from its posterior distribution as input and samples mutations placements for each of the trees.
-
-
-## Configure the script
-```{r config}
-inputFolder <- "../../../input_folder"
-simulationInputFolder <- "../../../simulations/simulations2"
-treeName <- "Lu7"
-nTreeSamplingEvents <- 100
-nMutationSamplingEvents <- 100
-```
-
-## Loading data
-```{r load}
-source("../functions.R")
-
-
-input <- load_data(inputFolder, treeName)
-
-postSampling <- input$postSampling
-nClusters <- input$nClusters
-ClusterID <- input$clusterID
-nCells <- input$nCells  
-nMutations <- input$nMutations
-nClusters <- input$nClusters
-alleleCount <- input$alleleCount
-mutatedReadCounts <- input$mutatedReadCounts
-totalReadCounts <- input$totalReadCounts
-sampleDescription <- input$sample_description
-```
-
-
-## Sample description 
-
-Each row corresponds to a cell.
-Column description:
-  - Cluster: An number indicating which sample the cell belongs to.
-  - ClusterName: The name of the sample in the nodeDescription.tsv file
-  - WBC: a binary vector indicating whether the cell is a white blood cell (1) or not (0).
-  - color: Indicates the color of the cluster in the tree, as described in the nodeDescription.tsv
-          file.
-  
-```{r Describe samples}
-print(sampleDescription)
-```
-
-
-
-Get null distributions of relevant statistics, stratified by sample:
-```{r}
-cutoffsSplittingProbs <- data.frame(clusterSize = vector(), Cutoff = vector())
-cutoffsBranchingProbabilities <- data.frame(clusterSize = vector(), Cutoff = vector())
-
-for (clusterSize in 2:5){
-  try(
-  {treeNameSimulated <- paste(treeName, clusterSize, sep = '_')
-
-
-  inputSimulated <- load_data(simulationInputFolder, treeNameSimulated)
-
-  postSamplingSimulated <- inputSimulated$postSampling
-  nClustersSimulated <- inputSimulated$nClusters
-  ClusterIDSimulated <- inputSimulated$clusterID
-  nCellsSimulated <- inputSimulated$nCells  
-  nMutationsSimulated <- inputSimulated$nMutations
-  nClustersSimulated <- inputSimulated$nClusters
-  alleleCountSimulated <- inputSimulated$alleleCount
-  mutatedReadCountsSimulated <- inputSimulated$mutatedReadCounts
-  totalReadCountsSimulated <- inputSimulated$totalReadCounts
-  sampleDescriptionSimulated <- inputSimulated$sample_description
-  
-  distance <- computeClusterSplits(sampleDescriptionSimulated, postSamplingSimulated, treeNameSimulated, nCellsSimulated,
-                     nMutationsSimulated, nClustersSimulated,
-                     alleleCountSimulated,
-                     mutatedReadCountsSimulated, totalReadCountsSimulated,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c("orchid", "orchid1", "orchid2",
-                                           "orchid3", "orchid4", "darkorchid",
-                                           "darkorchid1","darkorchid2", "darkorchid3",
-                                           "darkorchid4", "purple", "purple1",
-                                           "purple2", "purple3", "purple4"))
-
-  
-
-  plot(ggplot(distance$splittingProbs, aes(x = "Values", y = Splitting_probability, fill = 'Splitting_probability')) +
-    geom_boxplot())
-  cutoffsSplittingProbs <- rbind(cutoffsSplittingProbs, data.frame(clusterSize = clusterSize, Cutoff = mean(distance$splittingProbs$Splitting_probability) + 2 * sd(distance$splittingProbs$Splitting_probability) ))
-  
-  ##Note that the way the aggregatedBranchingProbabilities are computed all pairs of cells from the same cluster are
-  ## taken into account. This has the effect that clusters with more cells would be counted more often and contribute more
-  ## to the shape of the final distribution. This is no problem right now as we only aggregate counts from clusters
-  ## of the same size, it is however the potential source of a future bug!!
-  
-  plot(ggplot(data.frame(x = distance$aggregatedBranchingProbabilities), aes(x = x)) +
-    geom_histogram(binwidth = 0.01))
-  print(data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  cutoffsBranchingProbabilities <- rbind(cutoffsBranchingProbabilities, data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  })
-}
-```
-
-
-
-Get the relevant statistics for each of the clusters of a dataset and output numbers of oligoclonal clusters:
-```{r}
-nTumorClusters <- 0
-nOligoclonalClusters1 <- 0
-nOligoclonalClusters2 <- 0
-splittingSummary1 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-splittingSummary2 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-
-for(clusterSize in 2:5){
-  try({
-    clusterColor <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() == clusterSize) %>%
-    pull(color) %>%
-    unique() 
-    
-    for(color in clusterColor){
-      distance <- computeClusterSplits(sampleDescription, postSampling, treeName, nCells,
-                     nMutations, nClusters,
-                     alleleCount,
-                     mutatedReadCounts, totalReadCounts,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c(color))
-
-      splittingProbs <- mean(distance$splittingProbs$Splitting_probability)
-      branchingProbs <- mean(distance$aggregatedBranchingProbabilities)
-    
-      nTumorClusters <- nTumorClusters + 1
-      oligoclonal <- FALSE
-      print(clusterSize)
-      print(cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])
-      if(splittingProbs > (cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])){
-        nOligoclonalClusters1 <- nOligoclonalClusters1 + 1
-        oligoclonal <- TRUE
-      }
-      splittingSummary1 <- rbind(splittingSummary1, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-      oligoclonal <- FALSE
-      if(branchingProbs > cutoffsBranchingProbabilities[(cutoffsBranchingProbabilities$clusterSize == clusterSize), 2]){
-        nOligoclonalClusters2 <- nOligoclonalClusters2 + 1
-        oligoclonal <- TRUE
-      }
-      splittingSummary2 <- rbind(splittingSummary2, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-    }
-  })
-}
-
-
-numberOfCancerClusters <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() > 1) %>%
-    pull(color) %>%
-    unique() %>% length() 
-
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 1', nOligoclonalClusters1, numberOfCancerClusters, treeName))
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 2', nOligoclonalClusters2, numberOfCancerClusters, treeName))
-print(splittingSummary1)
-print(splittingSummary2)
-```
-
-
-
diff --git a/Rcode/markdowns/Ov8.Rmd b/Rcode/markdowns/Ov8.Rmd
deleted file mode 100644
index 2ed18ec..0000000
--- a/Rcode/markdowns/Ov8.Rmd
+++ /dev/null
@@ -1,181 +0,0 @@
----
-title: "Ov8"
-author: "Johannes Gawron"
-date: "2024-03-07"
-output: html_document
----
-
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-```
-
-## Splitting statistics
-
-This code analyses splitting statistics for CTC-clusters.
-
-The analysis takes a list of trees sampled from its posterior distribution as input and samples mutations placements for each of the trees.
-
-
-## Configure the script
-```{r config}
-inputFolder <- "../../../input_folder"
-simulationInputFolder <- "../../../simulations/simulations2"
-treeName <- "Ov8"
-nTreeSamplingEvents <- 100
-nMutationSamplingEvents <- 100
-```
-
-## Loading data
-```{r load}
-source("../functions.R")
-
-
-input <- load_data(inputFolder, treeName)
-
-postSampling <- input$postSampling
-nClusters <- input$nClusters
-ClusterID <- input$clusterID
-nCells <- input$nCells  
-nMutations <- input$nMutations
-nClusters <- input$nClusters
-alleleCount <- input$alleleCount
-mutatedReadCounts <- input$mutatedReadCounts
-totalReadCounts <- input$totalReadCounts
-sampleDescription <- input$sample_description
-```
-
-
-## Sample description 
-
-Each row corresponds to a cell.
-Column description:
-  - Cluster: An number indicating which sample the cell belongs to.
-  - ClusterName: The name of the sample in the nodeDescription.tsv file
-  - WBC: a binary vector indicating whether the cell is a white blood cell (1) or not (0).
-  - color: Indicates the color of the cluster in the tree, as described in the nodeDescription.tsv
-          file.
-  
-```{r Describe samples}
-print(sampleDescription)
-```
-
-
-
-Get null distributions of relevant statistics, stratified by sample:
-```{r}
-cutoffsSplittingProbs <- data.frame(clusterSize = vector(), Cutoff = vector())
-cutoffsBranchingProbabilities <- data.frame(clusterSize = vector(), Cutoff = vector())
-
-for (clusterSize in 2:5){
-  try(
-  {treeNameSimulated <- paste(treeName, clusterSize, sep = '_')
-
-
-  inputSimulated <- load_data(simulationInputFolder, treeNameSimulated)
-
-  postSamplingSimulated <- inputSimulated$postSampling
-  nClustersSimulated <- inputSimulated$nClusters
-  ClusterIDSimulated <- inputSimulated$clusterID
-  nCellsSimulated <- inputSimulated$nCells  
-  nMutationsSimulated <- inputSimulated$nMutations
-  nClustersSimulated <- inputSimulated$nClusters
-  alleleCountSimulated <- inputSimulated$alleleCount
-  mutatedReadCountsSimulated <- inputSimulated$mutatedReadCounts
-  totalReadCountsSimulated <- inputSimulated$totalReadCounts
-  sampleDescriptionSimulated <- inputSimulated$sample_description
-  
-  distance <- computeClusterSplits(sampleDescriptionSimulated, postSamplingSimulated, treeNameSimulated, nCellsSimulated,
-                     nMutationsSimulated, nClustersSimulated,
-                     alleleCountSimulated,
-                     mutatedReadCountsSimulated, totalReadCountsSimulated,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c("orchid", "orchid1", "orchid2",
-                                           "orchid3", "orchid4", "darkorchid",
-                                           "darkorchid1","darkorchid2", "darkorchid3",
-                                           "darkorchid4", "purple", "purple1",
-                                           "purple2", "purple3", "purple4"))
-
-  
-
-  plot(ggplot(distance$splittingProbs, aes(x = "Values", y = Splitting_probability, fill = 'Splitting_probability')) +
-    geom_boxplot())
-  cutoffsSplittingProbs <- rbind(cutoffsSplittingProbs, data.frame(clusterSize = clusterSize, Cutoff = mean(distance$splittingProbs$Splitting_probability) + 2 * sd(distance$splittingProbs$Splitting_probability) ))
-  
-  ##Note that the way the aggregatedBranchingProbabilities are computed all pairs of cells from the same cluster are
-  ## taken into account. This has the effect that clusters with more cells would be counted more often and contribute more
-  ## to the shape of the final distribution. This is no problem right now as we only aggregate counts from clusters
-  ## of the same size, it is however the potential source of a future bug!!
-  
-  plot(ggplot(data.frame(x = distance$aggregatedBranchingProbabilities), aes(x = x)) +
-    geom_histogram(binwidth = 0.01))
-  print(data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  cutoffsBranchingProbabilities <- rbind(cutoffsBranchingProbabilities, data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  })
-}
-```
-
-
-
-Get the relevant statistics for each of the clusters of a dataset and output numbers of oligoclonal clusters:
-```{r}
-nTumorClusters <- 0
-nOligoclonalClusters1 <- 0
-nOligoclonalClusters2 <- 0
-splittingSummary1 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-splittingSummary2 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-
-for(clusterSize in 2:5){
-  try({
-    clusterColor <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() == clusterSize) %>%
-    pull(color) %>%
-    unique() 
-    
-    for(color in clusterColor){
-      distance <- computeClusterSplits(sampleDescription, postSampling, treeName, nCells,
-                     nMutations, nClusters,
-                     alleleCount,
-                     mutatedReadCounts, totalReadCounts,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c(color))
-
-      splittingProbs <- mean(distance$splittingProbs$Splitting_probability)
-      branchingProbs <- mean(distance$aggregatedBranchingProbabilities)
-    
-      nTumorClusters <- nTumorClusters + 1
-      oligoclonal <- FALSE
-      print(clusterSize)
-      print(cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])
-      if(splittingProbs > (cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])){
-        nOligoclonalClusters1 <- nOligoclonalClusters1 + 1
-        oligoclonal <- TRUE
-      }
-      splittingSummary1 <- rbind(splittingSummary1, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-      oligoclonal <- FALSE
-      if(branchingProbs > cutoffsBranchingProbabilities[(cutoffsBranchingProbabilities$clusterSize == clusterSize), 2]){
-        nOligoclonalClusters2 <- nOligoclonalClusters2 + 1
-        oligoclonal <- TRUE
-      }
-      splittingSummary2 <- rbind(splittingSummary2, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-    }
-  })
-}
-
-
-numberOfCancerClusters <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() > 1) %>%
-    pull(color) %>%
-    unique() %>% length() 
-
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 1', nOligoclonalClusters1, numberOfCancerClusters, treeName))
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 2', nOligoclonalClusters2, numberOfCancerClusters, treeName))
-print(splittingSummary1)
-print(splittingSummary2)
-```
-
-
-
diff --git a/Rcode/markdowns/Pr6.Rmd b/Rcode/markdowns/Pr6.Rmd
deleted file mode 100644
index 0aea058..0000000
--- a/Rcode/markdowns/Pr6.Rmd
+++ /dev/null
@@ -1,181 +0,0 @@
----
-title: "Pr6"
-author: "Johannes Gawron"
-date: "2024-03-07"
-output: html_document
----
-
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-```
-
-## Splitting statistics
-
-This code analyses splitting statistics for CTC-clusters.
-
-The analysis takes a list of trees sampled from its posterior distribution as input and samples mutations placements for each of the trees.
-
-
-## Configure the script
-```{r config}
-inputFolder <- "../../../input_folder"
-simulationInputFolder <- "../../../simulations/simulations2"
-treeName <- "Pr6"
-nTreeSamplingEvents <- 100
-nMutationSamplingEvents <- 100
-```
-
-## Loading data
-```{r load}
-source("../functions.R")
-
-
-input <- load_data(inputFolder, treeName)
-
-postSampling <- input$postSampling
-nClusters <- input$nClusters
-ClusterID <- input$clusterID
-nCells <- input$nCells  
-nMutations <- input$nMutations
-nClusters <- input$nClusters
-alleleCount <- input$alleleCount
-mutatedReadCounts <- input$mutatedReadCounts
-totalReadCounts <- input$totalReadCounts
-sampleDescription <- input$sample_description
-```
-
-
-## Sample description 
-
-Each row corresponds to a cell.
-Column description:
-  - Cluster: An number indicating which sample the cell belongs to.
-  - ClusterName: The name of the sample in the nodeDescription.tsv file
-  - WBC: a binary vector indicating whether the cell is a white blood cell (1) or not (0).
-  - color: Indicates the color of the cluster in the tree, as described in the nodeDescription.tsv
-          file.
-  
-```{r Describe samples}
-print(sampleDescription)
-```
-
-
-
-Get null distributions of relevant statistics, stratified by sample:
-```{r}
-cutoffsSplittingProbs <- data.frame(clusterSize = vector(), Cutoff = vector())
-cutoffsBranchingProbabilities <- data.frame(clusterSize = vector(), Cutoff = vector())
-
-for (clusterSize in 2:5){
-  try(
-  {treeNameSimulated <- paste(treeName, clusterSize, sep = '_')
-
-
-  inputSimulated <- load_data(simulationInputFolder, treeNameSimulated)
-
-  postSamplingSimulated <- inputSimulated$postSampling
-  nClustersSimulated <- inputSimulated$nClusters
-  ClusterIDSimulated <- inputSimulated$clusterID
-  nCellsSimulated <- inputSimulated$nCells  
-  nMutationsSimulated <- inputSimulated$nMutations
-  nClustersSimulated <- inputSimulated$nClusters
-  alleleCountSimulated <- inputSimulated$alleleCount
-  mutatedReadCountsSimulated <- inputSimulated$mutatedReadCounts
-  totalReadCountsSimulated <- inputSimulated$totalReadCounts
-  sampleDescriptionSimulated <- inputSimulated$sample_description
-  
-  distance <- computeClusterSplits(sampleDescriptionSimulated, postSamplingSimulated, treeNameSimulated, nCellsSimulated,
-                     nMutationsSimulated, nClustersSimulated,
-                     alleleCountSimulated,
-                     mutatedReadCountsSimulated, totalReadCountsSimulated,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c("orchid", "orchid1", "orchid2",
-                                           "orchid3", "orchid4", "darkorchid",
-                                           "darkorchid1","darkorchid2", "darkorchid3",
-                                           "darkorchid4", "purple", "purple1",
-                                           "purple2", "purple3", "purple4"))
-
-  
-
-  plot(ggplot(distance$splittingProbs, aes(x = "Values", y = Splitting_probability, fill = 'Splitting_probability')) +
-    geom_boxplot())
-  cutoffsSplittingProbs <- rbind(cutoffsSplittingProbs, data.frame(clusterSize = clusterSize, Cutoff = mean(distance$splittingProbs$Splitting_probability) + 2 * sd(distance$splittingProbs$Splitting_probability) ))
-  
-  ##Note that the way the aggregatedBranchingProbabilities are computed all pairs of cells from the same cluster are
-  ## taken into account. This has the effect that clusters with more cells would be counted more often and contribute more
-  ## to the shape of the final distribution. This is no problem right now as we only aggregate counts from clusters
-  ## of the same size, it is however the potential source of a future bug!!
-  
-  plot(ggplot(data.frame(x = distance$aggregatedBranchingProbabilities), aes(x = x)) +
-    geom_histogram(binwidth = 0.01))
-  print(data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  cutoffsBranchingProbabilities <- rbind(cutoffsBranchingProbabilities, data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  })
-}
-```
-
-
-
-Get the relevant statistics for each of the clusters of a dataset and output numbers of oligoclonal clusters:
-```{r}
-nTumorClusters <- 0
-nOligoclonalClusters1 <- 0
-nOligoclonalClusters2 <- 0
-splittingSummary1 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-splittingSummary2 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-
-for(clusterSize in 2:5){
-  try({
-    clusterColor <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() == clusterSize) %>%
-    pull(color) %>%
-    unique() 
-    
-    for(color in clusterColor){
-      distance <- computeClusterSplits(sampleDescription, postSampling, treeName, nCells,
-                     nMutations, nClusters,
-                     alleleCount,
-                     mutatedReadCounts, totalReadCounts,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c(color))
-
-      splittingProbs <- mean(distance$splittingProbs$Splitting_probability)
-      branchingProbs <- mean(distance$aggregatedBranchingProbabilities)
-    
-      nTumorClusters <- nTumorClusters + 1
-      oligoclonal <- FALSE
-      print(clusterSize)
-      print(cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])
-      if(splittingProbs > (cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])){
-        nOligoclonalClusters1 <- nOligoclonalClusters1 + 1
-        oligoclonal <- TRUE
-      }
-      splittingSummary1 <- rbind(splittingSummary1, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-      oligoclonal <- FALSE
-      if(branchingProbs > cutoffsBranchingProbabilities[(cutoffsBranchingProbabilities$clusterSize == clusterSize), 2]){
-        nOligoclonalClusters2 <- nOligoclonalClusters2 + 1
-        oligoclonal <- TRUE
-      }
-      splittingSummary2 <- rbind(splittingSummary2, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-    }
-  })
-}
-
-
-numberOfCancerClusters <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() > 1) %>%
-    pull(color) %>%
-    unique() %>% length() 
-
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 1', nOligoclonalClusters1, numberOfCancerClusters, treeName))
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 2', nOligoclonalClusters2, numberOfCancerClusters, treeName))
-print(splittingSummary1)
-print(splittingSummary2)
-```
-
-
-
diff --git a/Rcode/markdowns/Pr9.Rmd b/Rcode/markdowns/Pr9.Rmd
deleted file mode 100644
index 05a0f34..0000000
--- a/Rcode/markdowns/Pr9.Rmd
+++ /dev/null
@@ -1,181 +0,0 @@
----
-title: "Pr9"
-author: "Johannes Gawron"
-date: "2024-03-07"
-output: html_document
----
-
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-```
-
-## Splitting statistics
-
-This code analyses splitting statistics for CTC-clusters.
-
-The analysis takes a list of trees sampled from its posterior distribution as input and samples mutations placements for each of the trees.
-
-
-## Configure the script
-```{r config}
-inputFolder <- "../../../input_folder"
-simulationInputFolder <- "../../../simulations/simulations2"
-treeName <- "Pr9"
-nTreeSamplingEvents <- 100
-nMutationSamplingEvents <- 100
-```
-
-## Loading data
-```{r load}
-source("../functions.R")
-
-
-input <- load_data(inputFolder, treeName)
-
-postSampling <- input$postSampling
-nClusters <- input$nClusters
-ClusterID <- input$clusterID
-nCells <- input$nCells  
-nMutations <- input$nMutations
-nClusters <- input$nClusters
-alleleCount <- input$alleleCount
-mutatedReadCounts <- input$mutatedReadCounts
-totalReadCounts <- input$totalReadCounts
-sampleDescription <- input$sample_description
-```
-
-
-## Sample description 
-
-Each row corresponds to a cell.
-Column description:
-  - Cluster: An number indicating which sample the cell belongs to.
-  - ClusterName: The name of the sample in the nodeDescription.tsv file
-  - WBC: a binary vector indicating whether the cell is a white blood cell (1) or not (0).
-  - color: Indicates the color of the cluster in the tree, as described in the nodeDescription.tsv
-          file.
-  
-```{r Describe samples}
-print(sampleDescription)
-```
-
-
-
-Get null distributions of relevant statistics, stratified by sample:
-```{r}
-cutoffsSplittingProbs <- data.frame(clusterSize = vector(), Cutoff = vector())
-cutoffsBranchingProbabilities <- data.frame(clusterSize = vector(), Cutoff = vector())
-
-for (clusterSize in 2:5){
-  try(
-  {treeNameSimulated <- paste(treeName, clusterSize, sep = '_')
-
-
-  inputSimulated <- load_data(simulationInputFolder, treeNameSimulated)
-
-  postSamplingSimulated <- inputSimulated$postSampling
-  nClustersSimulated <- inputSimulated$nClusters
-  ClusterIDSimulated <- inputSimulated$clusterID
-  nCellsSimulated <- inputSimulated$nCells  
-  nMutationsSimulated <- inputSimulated$nMutations
-  nClustersSimulated <- inputSimulated$nClusters
-  alleleCountSimulated <- inputSimulated$alleleCount
-  mutatedReadCountsSimulated <- inputSimulated$mutatedReadCounts
-  totalReadCountsSimulated <- inputSimulated$totalReadCounts
-  sampleDescriptionSimulated <- inputSimulated$sample_description
-  
-  distance <- computeClusterSplits(sampleDescriptionSimulated, postSamplingSimulated, treeNameSimulated, nCellsSimulated,
-                     nMutationsSimulated, nClustersSimulated,
-                     alleleCountSimulated,
-                     mutatedReadCountsSimulated, totalReadCountsSimulated,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c("orchid", "orchid1", "orchid2",
-                                           "orchid3", "orchid4", "darkorchid",
-                                           "darkorchid1","darkorchid2", "darkorchid3",
-                                           "darkorchid4", "purple", "purple1",
-                                           "purple2", "purple3", "purple4"))
-
-  
-
-  plot(ggplot(distance$splittingProbs, aes(x = "Values", y = Splitting_probability, fill = 'Splitting_probability')) +
-    geom_boxplot())
-  cutoffsSplittingProbs <- rbind(cutoffsSplittingProbs, data.frame(clusterSize = clusterSize, Cutoff = mean(distance$splittingProbs$Splitting_probability) + 2 * sd(distance$splittingProbs$Splitting_probability) ))
-  
-  ##Note that the way the aggregatedBranchingProbabilities are computed all pairs of cells from the same cluster are
-  ## taken into account. This has the effect that clusters with more cells would be counted more often and contribute more
-  ## to the shape of the final distribution. This is no problem right now as we only aggregate counts from clusters
-  ## of the same size, it is however the potential source of a future bug!!
-  
-  plot(ggplot(data.frame(x = distance$aggregatedBranchingProbabilities), aes(x = x)) +
-    geom_histogram(binwidth = 0.01))
-  print(data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  cutoffsBranchingProbabilities <- rbind(cutoffsBranchingProbabilities, data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  })
-}
-```
-
-
-
-Get the relevant statistics for each of the clusters of a dataset and output numbers of oligoclonal clusters:
-```{r}
-nTumorClusters <- 0
-nOligoclonalClusters1 <- 0
-nOligoclonalClusters2 <- 0
-splittingSummary1 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-splittingSummary2 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-
-for(clusterSize in 2:5){
-  try({
-    clusterColor <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() == clusterSize) %>%
-    pull(color) %>%
-    unique() 
-    
-    for(color in clusterColor){
-      distance <- computeClusterSplits(sampleDescription, postSampling, treeName, nCells,
-                     nMutations, nClusters,
-                     alleleCount,
-                     mutatedReadCounts, totalReadCounts,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c(color))
-
-      splittingProbs <- mean(distance$splittingProbs$Splitting_probability)
-      branchingProbs <- mean(distance$aggregatedBranchingProbabilities)
-    
-      nTumorClusters <- nTumorClusters + 1
-      oligoclonal <- FALSE
-      print(clusterSize)
-      print(cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])
-      if(splittingProbs > (cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])){
-        nOligoclonalClusters1 <- nOligoclonalClusters1 + 1
-        oligoclonal <- TRUE
-      }
-      splittingSummary1 <- rbind(splittingSummary1, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-      oligoclonal <- FALSE
-      if(branchingProbs > cutoffsBranchingProbabilities[(cutoffsBranchingProbabilities$clusterSize == clusterSize), 2]){
-        nOligoclonalClusters2 <- nOligoclonalClusters2 + 1
-        oligoclonal <- TRUE
-      }
-      splittingSummary2 <- rbind(splittingSummary2, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-    }
-  })
-}
-
-
-numberOfCancerClusters <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() > 1) %>%
-    pull(color) %>%
-    unique() %>% length() 
-
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 1', nOligoclonalClusters1, numberOfCancerClusters, treeName))
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 2', nOligoclonalClusters2, numberOfCancerClusters, treeName))
-print(splittingSummary1)
-print(splittingSummary2)
-```
-
-
-
diff --git a/Rcode/markdowns/template.Rmd b/Rcode/markdowns/template.Rmd
deleted file mode 100755
index a9aa374..0000000
--- a/Rcode/markdowns/template.Rmd
+++ /dev/null
@@ -1,181 +0,0 @@
----
-title: "__tree__"
-author: "Johannes Gawron"
-date: "2024-03-07"
-output: html_document
----
-
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-```
-
-## Splitting statistics
-
-This code analyses splitting statistics for CTC-clusters.
-
-The analysis takes a list of trees sampled from its posterior distribution as input and samples mutations placements for each of the trees.
-
-
-## Configure the script
-```{r config}
-inputFolder <- "../../../input_folder"
-simulationInputFolder <- "../../../simulations/simulations2"
-treeName <- "__tree__"
-nTreeSamplingEvents <- 100
-nMutationSamplingEvents <- 100
-```
-
-## Loading data
-```{r load}
-source("../functions.R")
-
-
-input <- load_data(inputFolder, treeName)
-
-postSampling <- input$postSampling
-nClusters <- input$nClusters
-ClusterID <- input$clusterID
-nCells <- input$nCells  
-nMutations <- input$nMutations
-nClusters <- input$nClusters
-alleleCount <- input$alleleCount
-mutatedReadCounts <- input$mutatedReadCounts
-totalReadCounts <- input$totalReadCounts
-sampleDescription <- input$sample_description
-```
-
-
-## Sample description 
-
-Each row corresponds to a cell.
-Column description:
-  - Cluster: An number indicating which sample the cell belongs to.
-  - ClusterName: The name of the sample in the nodeDescription.tsv file
-  - WBC: a binary vector indicating whether the cell is a white blood cell (1) or not (0).
-  - color: Indicates the color of the cluster in the tree, as described in the nodeDescription.tsv
-          file.
-  
-```{r Describe samples}
-print(sampleDescription)
-```
-
-
-
-Get null distributions of relevant statistics, stratified by sample:
-```{r}
-cutoffsSplittingProbs <- data.frame(clusterSize = vector(), Cutoff = vector())
-cutoffsBranchingProbabilities <- data.frame(clusterSize = vector(), Cutoff = vector())
-
-for (clusterSize in 2:5){
-  try(
-  {treeNameSimulated <- paste(treeName, clusterSize, sep = '_')
-
-
-  inputSimulated <- load_data(simulationInputFolder, treeNameSimulated)
-
-  postSamplingSimulated <- inputSimulated$postSampling
-  nClustersSimulated <- inputSimulated$nClusters
-  ClusterIDSimulated <- inputSimulated$clusterID
-  nCellsSimulated <- inputSimulated$nCells  
-  nMutationsSimulated <- inputSimulated$nMutations
-  nClustersSimulated <- inputSimulated$nClusters
-  alleleCountSimulated <- inputSimulated$alleleCount
-  mutatedReadCountsSimulated <- inputSimulated$mutatedReadCounts
-  totalReadCountsSimulated <- inputSimulated$totalReadCounts
-  sampleDescriptionSimulated <- inputSimulated$sample_description
-  
-  distance <- computeClusterSplits(sampleDescriptionSimulated, postSamplingSimulated, treeNameSimulated, nCellsSimulated,
-                     nMutationsSimulated, nClustersSimulated,
-                     alleleCountSimulated,
-                     mutatedReadCountsSimulated, totalReadCountsSimulated,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c("orchid", "orchid1", "orchid2",
-                                           "orchid3", "orchid4", "darkorchid",
-                                           "darkorchid1","darkorchid2", "darkorchid3",
-                                           "darkorchid4", "purple", "purple1",
-                                           "purple2", "purple3", "purple4"))
-
-  
-
-  plot(ggplot(distance$splittingProbs, aes(x = "Values", y = Splitting_probability, fill = 'Splitting_probability')) +
-    geom_boxplot())
-  cutoffsSplittingProbs <- rbind(cutoffsSplittingProbs, data.frame(clusterSize = clusterSize, Cutoff = mean(distance$splittingProbs$Splitting_probability) + 2 * sd(distance$splittingProbs$Splitting_probability) ))
-  
-  ##Note that the way the aggregatedBranchingProbabilities are computed all pairs of cells from the same cluster are
-  ## taken into account. This has the effect that clusters with more cells would be counted more often and contribute more
-  ## to the shape of the final distribution. This is no problem right now as we only aggregate counts from clusters
-  ## of the same size, it is however the potential source of a future bug!!
-  
-  plot(ggplot(data.frame(x = distance$aggregatedBranchingProbabilities), aes(x = x)) +
-    geom_histogram(binwidth = 0.01))
-  print(data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  cutoffsBranchingProbabilities <- rbind(cutoffsBranchingProbabilities, data.frame(clusterSize = clusterSize, Cutoff = quantile(distance$aggregatedBranchingProbabilities, probs = 0.95, names = FALSE)[1] ))
-  })
-}
-```
-
-
-
-Get the relevant statistics for each of the clusters of a dataset and output numbers of oligoclonal clusters:
-```{r}
-nTumorClusters <- 0
-nOligoclonalClusters1 <- 0
-nOligoclonalClusters2 <- 0
-splittingSummary1 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-splittingSummary2 <- data.frame(Color = vector(), Oligoclonal = vector(), ClusterSize = vector())
-
-for(clusterSize in 2:5){
-  try({
-    clusterColor <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() == clusterSize) %>%
-    pull(color) %>%
-    unique() 
-    
-    for(color in clusterColor){
-      distance <- computeClusterSplits(sampleDescription, postSampling, treeName, nCells,
-                     nMutations, nClusters,
-                     alleleCount,
-                     mutatedReadCounts, totalReadCounts,
-                     nMutationSamplingEvents = nMutationSamplingEvents, nTreeSamplingEvents = nTreeSamplingEvents,
-                     cellPairSelection = c(color))
-
-      splittingProbs <- mean(distance$splittingProbs$Splitting_probability)
-      branchingProbs <- mean(distance$aggregatedBranchingProbabilities)
-    
-      nTumorClusters <- nTumorClusters + 1
-      oligoclonal <- FALSE
-      print(clusterSize)
-      print(cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])
-      if(splittingProbs > (cutoffsSplittingProbs[(cutoffsSplittingProbs$clusterSize == clusterSize), 2])){
-        nOligoclonalClusters1 <- nOligoclonalClusters1 + 1
-        oligoclonal <- TRUE
-      }
-      splittingSummary1 <- rbind(splittingSummary1, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-      oligoclonal <- FALSE
-      if(branchingProbs > cutoffsBranchingProbabilities[(cutoffsBranchingProbabilities$clusterSize == clusterSize), 2]){
-        nOligoclonalClusters2 <- nOligoclonalClusters2 + 1
-        oligoclonal <- TRUE
-      }
-      splittingSummary2 <- rbind(splittingSummary2, data.frame(Color = color, Oligoclonal = oligoclonal, ClusterSize = clusterSize))
-    }
-  })
-}
-
-
-numberOfCancerClusters <- sampleDescription %>%
-    filter(WBC ==0 &  color != 'gray93') %>%
-    group_by(color) %>%
-    filter(n() > 1) %>%
-    pull(color) %>%
-    unique() %>% length() 
-
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 1', nOligoclonalClusters1, numberOfCancerClusters, treeName))
-print(sprintf('%d out of %d clusters were found to be oligoclonal in %s, using method 2', nOligoclonalClusters2, numberOfCancerClusters, treeName))
-print(splittingSummary1)
-print(splittingSummary2)
-```
-
-
-