From e0da1d64c9a51e8826c02a2505ebacb380161e29 Mon Sep 17 00:00:00 2001
From: Federico Lopez <flopezo84@gmail.com>
Date: Sat, 19 Sep 2020 11:35:40 +0100
Subject: [PATCH] Delete amel_bter_queen_brains_correlation.Rmd

---
 .../amel_bter_queen_brains_correlation.Rmd    | 410 ------------------
 1 file changed, 410 deletions(-)
 delete mode 100644 04-species_comparison/amel_bter_queen_brains_correlation.Rmd

diff --git a/04-species_comparison/amel_bter_queen_brains_correlation.Rmd b/04-species_comparison/amel_bter_queen_brains_correlation.Rmd
deleted file mode 100644
index 0fe0011..0000000
--- a/04-species_comparison/amel_bter_queen_brains_correlation.Rmd
+++ /dev/null
@@ -1,410 +0,0 @@
----
-title: "Comparison of _A. mellifera_ and _B. terrestris_ queen brains"
-author: "Alicja Witwicka, Federico Lopez"
-date: '`r Sys.Date()`'
-output:
-  github_document:
-    toc: yes
-  pdf_document: 
-    fig_caption: yes
-    toc: yes
-  html_document:
-    toc: yes
-editor_options: 
-  chunk_output_type: console
-geometry: margin = 1cm
----
-
-```{r setup, echo = FALSE}
-knitr::opts_chunk$set(
-  echo = TRUE,
-  message = FALSE,
-  warning = FALSE,
-  cache.lazy = FALSE,
-  include = FALSE,
-  out.height = "\textheight",
-  out.width = "\textwidth"
-)
-```
-
-# Load libraries
-```{r}
-load_cran_pkgs <- function(pkg) {
-  sapply(pkg, require, character.only = TRUE)
-}
-cran_pkgs <- c(
-  "BiocManager", "tidyverse", "GGally", "RColorBrewer", "pheatmap", "styler",
-  "here"
-)
-load_cran_pkgs(cran_pkgs)
-
-load_bioconductor_pkgs <- function(pkg) {
-  sapply(pkg, require, character.only = TRUE)
-}
-bioconductor_pkgs <- c(
-  "biomaRt", "rhdf5", "tximport", "DESeq2", "vsn", "hexbin"
-)
-load_bioconductor_pkgs(bioconductor_pkgs)
-```
-
-# Set a custom `ggplot` theme
-```{r}
-# Set a custom ggplot theme
-library(ggthemr)
-custom_palette <- c(
-  "#9E9E9E", "#59BDEF", "#EBCC2A", "#E1AF00", "#F27C8D", "#7B73D1", "#7B9FE0",
-  "#F5CC7F", "#66C2A5", "#28A7B6", "#F2CB57", "#F2A057", "#F2845C"
-)
-# ggthemr::colour_plot(custom_palette)
-
-custom_theme <- ggthemr::define_palette(
-  swatch = custom_palette,
-  gradient = c(lower = "#59BDEF", upper = "#FF6B5A")
-)
-ggthemr::ggthemr(custom_theme)
-```
-
-# Associate transcripts to genes
-I used biomaRt to associate transcripts to genes (from Ensembl Metazoa) 
-seperately for both species.
-```{r}
-biomaRt::listMarts(host = "metazoa.ensembl.org")
-# biomaRt::listMarts(host = "eg40-metazoa.ensembl.org") # version 40
-
-amel_mart <- biomaRt::useMart(
-  biomart = "metazoa_mart",
-  dataset = "amellifera_eg_gene",
-  host = "metazoa.ensembl.org"
-)
-
-bter_mart <- biomaRt::useMart(
-  biomart = "metazoa_mart",
-  dataset = "bterrestris_eg_gene",
-  host = "metazoa.ensembl.org"
-)
-
-amel_transcript_to_gene <- biomaRt::getBM(
-  attributes = c("ensembl_transcript_id", "ensembl_gene_id"),
-  mart = amel_mart
-)
-
-bter_transcript_to_gene <- biomaRt::getBM(
-  attributes = c("ensembl_transcript_id", "ensembl_gene_id"),
-  mart = bter_mart
-)
-```
-
-# Create vectors of nAChR gene identifiers
-```{r}
-amel_nachrs <- c(
-  "GB42850", "GB42644", "GB47845", "GB43275", "GB50159", "GB43416", "GB53053",
-  "GB40923", "GB53427", "GB53055", "GB53428"
-)
-
-bter_nachrs <- c(
-  "LOC100643274", "LOC100643282", "LOC100645032", "LOC100646787",
-  "LOC100647301", "LOC100647350", "LOC100647624", "LOC100648987",
-  "LOC100649515", "LOC100649612", "LOC100649796"
-)
-```
-
-# Introduction
-## Questions
-1. Are there differences in the use of nAChR subunits in the queen brains 
-of _A. mellifera_ and _B. terrestris_?
-
-Datasets used:  
-Manfredini et al. 2015 (amel)  
-Manfredini et al. 2018 (bter)  
-
-# Import `kallisto` quantifications
-I imported the  `kallisto` quantifications using `tximport`. Then, I used the
-accompanying metadata to generate `DESeqDataSet` objects.
-
-## _Apis mellifera_
-```{r}
-# Set sample identifiers using the names of the kallisto output directories
-amel_sample_ids <- dir(here::here(
-  "2015-manfredini_queen_brains", "2020-06-18-kallisto", "results"
-))
-
-# Read metadata table
-amel_samples_table <- read.csv(
-  here::here("amel_metadata", "2015-manfredini_queen_brains_metadata.csv"),
-  header = TRUE,
-  stringsAsFactors = TRUE
-)
-
-# Read .h5 files in
-amel_input_files <- here::here(
-  "2015-manfredini_queen_brains", "2020-06-18-kallisto", "results",
-  amel_samples_table$sample,
-  "abundance.h5"
-)
-
-# Assign names to files
-names(amel_input_files) <- amel_samples_table$sample
-
-# Check that sample names match
-all(amel_sample_ids %in% amel_samples_table$sample)
-
-# Check that all files exist
-all(file.exists(amel_input_files))
-
-# Import kallisto quantifications
-txi_kallisto_amel <- tximport::tximport(amel_input_files,
-  type = "kallisto",
-  tx2gene = amel_transcript_to_gene,
-  txOut = FALSE
-)
-```
-
-## _Bombus terrestris_
-```{r}
-# Set sample identifiers using the names of the kallisto output directories
-bter_sample_ids <- dir(here::here(
-  "2017-manfredini_queen_brain", "2020-08-08-kallisto", "results"
-))
-
-# Read metadata table
-bter_samples_table <- read.csv(
-  here::here("bter_metadata", "2017-manfredini_queen_brain_metadata.csv"),
-  header = TRUE,
-  stringsAsFactors = TRUE
-)
-
-# Read .h5 files in
-bter_input_files <- here::here(
-  "2017-manfredini_queen_brain", "2020-08-08-kallisto", "results",
-  bter_samples_table$sample,
-  "abundance.h5"
-)
-
-# Assign names to files
-names(bter_input_files) <- bter_samples_table$sample
-
-# Check that sample names match
-all(bter_sample_ids %in% bter_samples_table$sample)
-
-# Check that all files exist
-all(file.exists(bter_input_files))
-
-# Import kallisto quantifications
-bter_txi_kallisto <- tximport::tximport(bter_input_files,
-  type = "kallisto",
-  tx2gene = bter_transcript_to_gene,
-  txOut = FALSE
-)
-```
-
-# Create `DESeqDataSet` objects
-```{r}
-amel_dds <- DESeq2::DESeqDataSetFromTximport(txi_kallisto_amel,
-  colData = amel_samples_table,
-  design = ~1
-)
-
-bter_dds <- DESeq2::DESeqDataSetFromTximport(bter_txi_kallisto,
-  colData = bter_samples_table,
-  design = ~1
-)
-```
-
-# Normalise counts and calculate expression proportions
-```{r}
-# Pre-filter low count genes
-amel_dds <- DESeq2::estimateSizeFactors(amel_dds)
-dim(amel_dds)
-keep <- rowSums(DESeq2::counts(amel_dds) >= 10) >= 4
-amel_dds <- amel_dds[keep, ]
-dim(amel_dds)
-
-bter_dds <- DESeq2::estimateSizeFactors(bter_dds)
-dim(bter_dds)
-keep <- rowSums(DESeq2::counts(bter_dds) >= 10) >= 4
-bter_dds <- bter_dds[keep, ]
-dim(bter_dds)
-
-# Create data frames of normalised counts and calculate expression proportions
-# Discard data from divergent subunits
-divergent_subunits <- c("alpha9", "beta2")
-
-# Apis mellifera
-amel_counts <- DESeq2::counts(amel_dds, normalized = TRUE)
-amel_counts <- amel_counts %>%
-  as.data.frame() %>%
-  tibble::rownames_to_column(var = "ens_gene") %>%
-  tidyr::gather(key = "sample", value = "norm_count", -ens_gene) %>%
-  dplyr::mutate_if(is.numeric, round, digits = 4) %>%
-  dplyr::filter(ens_gene %in% amel_nachrs) %>%
-  dplyr::group_by(sample) %>%
-  dplyr::mutate(proportion = norm_count / sum(norm_count)) %>%
-  dplyr::mutate(subunit = case_when(
-    ens_gene == "GB42850" ~ "alpha1",
-    ens_gene == "GB42644" ~ "alpha2",
-    ens_gene == "GB47845" ~ "alpha3",
-    ens_gene == "GB43275" ~ "alpha4",
-    ens_gene == "GB50159" ~ "alpha5",
-    ens_gene == "GB43416" ~ "alpha6",
-    ens_gene == "GB53053" ~ "alpha7",
-    ens_gene == "GB40923" ~ "alpha8",
-    ens_gene == "GB53427" ~ "alpha9",
-    ens_gene == "GB53055" ~ "beta1",
-    ens_gene == "GB53428" ~ "beta2"
-  )) %>%
-  dplyr::mutate(species = "amel") %>%
-  dplyr::filter(!subunit %in% divergent_subunits) # Remove divergent subunits
-
-bter_counts <- DESeq2::counts(bter_dds, normalized = TRUE)
-bter_counts <- bter_counts %>%
-  as.data.frame() %>%
-  tibble::rownames_to_column(var = "ens_gene") %>%
-  tidyr::gather(key = "sample", value = "norm_count", -ens_gene) %>%
-  dplyr::mutate_if(is.numeric, round, digits = 4) %>%
-  dplyr::filter(ens_gene %in% bter_nachrs) %>%
-  dplyr::filter(ens_gene != "LOC100645032") %>% # Exclude low-expression subunit
-  dplyr::group_by(sample) %>%
-  dplyr::mutate(proportion = norm_count / sum(norm_count)) %>%
-  dplyr::mutate(subunit = case_when(
-    ens_gene == "LOC100647624" ~ "alpha1",
-    ens_gene == "LOC100647301" ~ "alpha2",
-    ens_gene == "LOC100647350" ~ "alpha3",
-    ens_gene == "LOC100648987" ~ "alpha4e5",
-    ens_gene == "LOC100649515" ~ "alpha5",
-    ens_gene == "LOC100643274" ~ "alpha6",
-    ens_gene == "LOC100649796" ~ "alpha7",
-    ens_gene == "LOC100643282" ~ "alpha8",
-    ens_gene == "LOC100649612" ~ "beta1",
-    ens_gene == "LOC100646787" ~ "beta2"
-  )) %>%
-  dplyr::mutate(species = "bter") %>%
-  dplyr::filter(!subunit %in% divergent_subunits) # Remove divergent subunits
-
-# Join the data frames
-counts_nachrs <- dplyr::bind_rows(amel_counts, bter_counts)
-
-# Recode subunit "alpha4" in Bombus terrestris
-counts_nachrs$subunit <- dplyr::recode(counts_nachrs$subunit,
-  "alpha4e5" = "alpha4"
-)
-
-# Plot proportion densities
-ggplot(counts_nachrs, aes(x = proportion, color = species)) +
-  geom_density()
-
-# Plot densities of log counts
-# Add pseudocount equal to 1
-ggplot(counts_nachrs, aes(x = norm_count + 1, color = species)) +
-  geom_density() +
-  scale_x_continuous(trans = "log10")
-
-# Plot the proportions to compare each subunit between species
-# Match the colors assigned to subunits in other figures (Fig. 1)
-# Create vector of colors for subunits
-# Although included here, the colors of divergent subunits will not be plotted
-subunit_breaks <- c(
-  "alpha1", "alpha2", "alpha3", "alpha4", "alpha5", "alpha6", "alpha7",
-  "alpha8", "alpha9", "beta1", "beta2"
-)
-subunit_colors <- c(
-  "#59BDEF", "#EBCC2A", "#E1AF00", "#F27C8D", "#7B73D1", "#7B9FE0",
-  "#F5CC7F", "#66C2A5", "#28A7B6", "#F2CB57", "#F2A057"
-)
-
-ggplot(counts_nachrs, aes(
-  x = species, y = proportion, color = subunit, shape = subunit
-)) +
-  geom_point(size = 2, stroke = 1) +
-  scale_shape_manual(values = 0:10) +
-  scale_color_manual(breaks = subunit_breaks, values = subunit_colors) +
-  theme(
-    plot.title = element_text(hjust = 0.5, size = 16, face = "plain"),
-    strip.text = element_text(size = 12, face = "plain", hjust = 0.5),
-    legend.position = "bottom",
-    axis.title = element_text(size = 14),
-    axis.text = element_text(size = 12),
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    legend.text = element_text(size = 12)
-  ) +
-  facet_wrap(~subunit, ncol = 10)
-
-ggsave(
-  filename = here::here("results", "amel_bter_queens_brains_stripchart.pdf"),
-  width = 8,
-  height = 5,
-  units = "in"
-)
-```
-
-# Use the median of subunit proportions to test for correlation between species
-For each subunit per species, calculate the median over replicates. Use the 
-median values to test for the association in subunit expression between species.
-```{r}
-median_proportions <- counts_nachrs %>%
-  dplyr::ungroup() %>% # Remove grouping variable previously created
-  dplyr::select(species, subunit, proportion) %>%
-  dplyr::group_by(species, subunit) %>%
-  dplyr::summarise_at(vars(-group_cols()), ~ median(.)) %>%
-  dplyr::rename(median_proportion = proportion)
-
-# Verify that the median values match the expected results
-counts_nachrs %>%
-  dplyr::filter(subunit == "alpha1" & species == "amel") %>%
-  dplyr::pull(var = proportion)
-median(c(0.0213, 0.0264, 0.0161, 0.0310))
-# 0.02385
-
-counts_nachrs %>%
-  dplyr::filter(subunit == "alpha2" & species == "amel") %>%
-  dplyr::pull(var = proportion)
-median(c(0.123, 0.187, 0.205, 0.201))
-# 0.194
-
-# Change to wide format to run the correlation test
-median_proportions_wide <- median_proportions %>%
-  tidyr::spread(key = species, value = median_proportion)
-
-# Run correlation test using the Spearman method
-cor.test(
-  median_proportions_wide$amel,
-  median_proportions_wide$bter,
-  method = "spearman"
-)
-
-# Create a scatterplot with the median values
-ggplot(median_proportions_wide, aes(
-  x = amel, y = bter, color = subunit, fill = subunit, shape = subunit
-)) +
-  geom_point(size = 4, stroke = 2) +
-  scale_shape_manual(values = c(3, 4, 7:9, 12, 21:25)) +
-  scale_color_manual(breaks = subunit_breaks, values = subunit_colors) +
-  scale_fill_manual(breaks = subunit_breaks, values = subunit_colors) +
-  theme(
-    plot.title = element_text(hjust = 0.5, size = 16, face = "plain"),
-    strip.text = element_text(size = 12, face = "plain", hjust = 0.5),
-    legend.position = "bottom",
-    axis.title = element_text(size = 14),
-    axis.text = element_text(size = 12),
-    legend.text = element_text(size = 12)
-  ) +
-  labs(
-    title = "Proportion of nAChR subunit expression in queen brains",
-    x = expression(paste(
-      "Proportion of expression in ", 
-      italic("A. mellifera")
-    )),
-    y = expression(paste(
-      "Proportion of expression in ", 
-      italic("B. terrestris")
-    ))
-  )
-
-ggsave(
-  filename = here::here(
-    "results", "amel_bter_queens_brains_scatterplot_median_proportions.pdf"
-  ),
-  width = 7,
-  height = 5,
-  units = "in"
-)
-```