From 4a01dc16774d728359fd116e994a20422b7db7da Mon Sep 17 00:00:00 2001
From: Federico Lopez <flopezo84@gmail.com>
Date: Sat, 19 Sep 2020 11:38:39 +0100
Subject: [PATCH] Delete amel_bter_workers_nAChRs_comparison.Rmd

---
 .../amel_bter_workers_nAChRs_comparison.Rmd   | 301 ------------------
 1 file changed, 301 deletions(-)
 delete mode 100755 04-species_comparison/amel_bter_workers_nAChRs_comparison.Rmd
diff --git a/04-species_comparison/amel_bter_workers_nAChRs_comparison.Rmd b/04-species_comparison/amel_bter_workers_nAChRs_comparison.Rmd
deleted file mode 100755
index 65f85c9..0000000
--- a/04-species_comparison/amel_bter_workers_nAChRs_comparison.Rmd
+++ /dev/null
@@ -1,301 +0,0 @@
----
-title: "workers_species_comparison"
-author: "Alicja Witwicka"
-date: "16/06/2020"
-output:
-  github_document:
-    toc: yes
-  pdf_document: 
-    fig_caption: yes
-    toc: yes
-  html_document:
-    toc: yes
-number_sections: true
----
-
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-```
-```{r libraries, echo = FALSE, message = FALSE, warning = FALSE}
-#renv::snapshot()
-```
-```{r libraries, echo = FALSE, message = FALSE, warning = FALSE}
-#Load all packages
-library("tximport")
-library("readr")
-library("ggplot2")
-library("DESeq2")
-library("dplyr")
-library("tibble")
-library("MASS")
-library("e1071")
-for (package in (.packages()) ) {
- print(paste("Package", package, "version", packageVersion(package)))
-}
-
-# Set colour palletes 
-custom_palette <- c(
-  "#9E9E9E", "#59BDEF", "#EBCC2A", "#E1AF00", "#F27C8D", "#7B73D1", "#7B9FE0",
-  "#F5CC7F", "#66C2A5", "#28A7B6", "#F2CB57", "#F2A057", "#F2845C"
-)
-custom_theme <- ggthemr::define_palette(
-  swatch = custom_palette,
-  gradient = c( upper = "#59BDEF", lower = "#E1AF00" ))
-ggthemr::ggthemr(custom_theme)
-ggthemr::colour_plot(custom_palette)
-
-```
-## Introduction
-
-# <b>Questions:</b>
-  1. Are there differences in the usage of nAChR subunits in the workers brains of A. mellifera and B. terrestris?
-Datasets used:
-<br>-Jasper et al. 2015 (apis)
-<br>-Porath et al. 2019 (bter)
-## Load data and prepare it fo analysis
-- The data was loaded using tximport, metadata (.csv) was composed and imported
-
-```{r import_files_&_metadata, include = FALSE}
-# Read kallisto output and metadata #
-#############
-### AMEL ###
-############
-# Set sample identifiers using the names of the kallisto output directories
-sample_amel <- dir(file.path("results/2015-jasper_worker_tissues/2020-06-16-kallisto/results/"))
-sample_amel <- tail(sample_amel, 6)
-# Read metadata table
-samples_amel <- read.csv(
-  file.path("amel_metadata/2015-jasper_worker_tissues_metadata.csv"),
-     header = TRUE,
-       stringsAsFactors = TRUE)
-samples_amel <- tail(samples_amel, 6)
-
-# Read .h5 files in
-filenames_amel <- file.path(
-  "results/2015-jasper_worker_tissues/2020-06-16-kallisto/results/",
-  samples_amel$sample,
-  "abundance.h5")
-
-# Assign names to files
-names(filenames_amel) <- samples_amel$sample
-
-# check that all files were assigned 
-all(file.exists(filenames_amel)) 
-#############
-### BTER ###
-############
-# Set sample identifiers using the names of the kallisto output directories
-sample_bter <- dir(file.path("results/2019-porath_worker_brain/2020-07-14-kallisto/results"))
-
-# Read metadata table
-samples_bter <- read.csv(
-  file.path("bter_metadata/2019-porath_worker_brains_metadata.csv"),
-     header = TRUE,
-       stringsAsFactors = TRUE)
-
-# Read .h5 files in
-filenames_bter <- file.path(
-  "results/2019-porath_worker_brain/2020-07-14-kallisto/results",
-  samples_bter$sample,
-  "abundance.h5")
-
-# Assign names to files
-names(filenames_bter) <- samples_bter$sample
-
-# check that all files were assigned 
-all(file.exists(filenames_bter)) 
-```
-- I used biomaRt to associate transcripts to genes (from ensmbl metazoa) seperately for both species...
-- I imported Kallisto output
-
-```{r biomaRt_tximport, include = FALSE}
-#### Associate transcripts to genes ####
-
-# Remove all biomaRt cached files
-#biomaRt::biomartCacheInfo()
-#biomaRt::biomartCacheClear()
-#biomaRt::listAttributes(metazoa_mart_bter)
-#biomaRt::listAttributes(metazoa_mart_amel)
-
-biomaRt::listMarts(host = "metazoa.ensembl.org")
-# biomaRt::listMarts(host = "eg40-metazoa.ensembl.org") # version 40
-# biomaRt::listEnsemblArchives()
-metazoa_mart_amel <- biomaRt::useMart(
-  biomart = "metazoa_mart",
-  dataset = "amellifera_eg_gene",
-  host = "metazoa.ensembl.org")
-#
-metazoa_mart_bter <- biomaRt::useMart(
-  biomart = "metazoa_mart",
-  dataset = "bterrestris_eg_gene",
-  host = "metazoa.ensembl.org")
-
-transcript_to_gene_amel <- biomaRt::getBM(
-  attributes = c("ensembl_transcript_id", "ensembl_gene_id"),
-  mart = metazoa_mart_amel)
-#
-transcript_to_gene_bter <- biomaRt::getBM(
-  attributes = c("ensembl_transcript_id", "ensembl_gene_id"),
-  mart = metazoa_mart_bter)
-
-# Import kallisto quantifications
-txi_kallisto_amel <- tximport::tximport(filenames_amel,
-  type = "kallisto",
-  tx2gene = transcript_to_gene_amel,
-  txOut = FALSE)
-#
-txi_kallisto_bter <- tximport::tximport(filenames_bter,
-  type = "kallisto",
-  tx2gene = transcript_to_gene_bter,
-  txOut = FALSE)
-```
-# 1.Extracting abundances from the data and making a data frame for both species. 
-# 2.Extracting genes of interest (seperate).
-
-```{r ratios, include = FALSE}
-dds_amel <- DESeq2::DESeqDataSetFromTximport(txi_kallisto_amel,
-                                        colData = samples_amel,
-                                        design = ~ 1)
-dds_bter <- DESeq2::DESeqDataSetFromTximport(txi_kallisto_bter,
-                                        colData = samples_bter,
-                                        design = ~ 1)
-
-dds_amel <- DESeq2::estimateSizeFactors(dds_amel)
-dds_amel <- DESeq2::counts(dds_amel, normalized = TRUE)
-dim(dds_amel)
-keep <- rowSums(dds_amel>= 10) >= 4
-dds_amel <- dds_amel[keep,]
-dim(dds_amel)
-
-dds_bter <- DESeq2::estimateSizeFactors(dds_bter)
-dds_bter <- DESeq2::counts(dds_bter, normalized = TRUE)
-dim(dds_bter)
-keep <- rowSums(dds_bter >= 10) >= 4
-dds_bter <- dds_bter[keep,]
-dim(dds_bter)
-
-# Extract genes of interest
-
-amel_nachrs <- c("GB40923", "GB53053", "GB42644", 
-             "GB53055", "GB47845", "GB43416", 
-             "GB42850", "GB43275", "GB53427", 
-             "GB53428", "GB50159")
-names_achrs_amel <- c("alpha8","alpha7","alpha2","beta1","alpha3",
-                 "alpha6","alpha1","alpha4","alpha9","beta2","alpha5")
-gene_amel <- dds_amel[amel_nachrs,]
-row.names(gene_amel) <- names_achrs_amel
-
-
-bter_nachrs <- c(
-  "LOC100643274", "LOC100643282", 
-  "LOC100647301", "LOC100647350", "LOC100647624", "LOC100648987",
-  "LOC100649515", "LOC100649612", "LOC100649796", "LOC100646787"
-  )
-names_achrs_bter <- c(
-  "alpha6", "alpha8", "alpha2", "alpha3", "alpha1", "alpha4", "alpha5", "beta1", "alpha7", "beta2"
-  )
-gene_bter <- dds_bter[bter_nachrs,]
-row.names(gene_bter) <- names_achrs_bter
-```
-
-```{r normalisation, message = FALSE}
-# Calculate sums of each collumn
-col_sum_amel <- as.data.frame(gene_amel) %>% dplyr::summarize_if(is.numeric, sum)
-col_sum_amel
-col_sum_bter <- as.data.frame(gene_bter) %>% dplyr::summarize_if(is.numeric, sum)
-col_sum_bter
-
-
-ratios_amel <- mapply('/', as.data.frame(gene_amel), col_sum_amel)
-row.names(ratios_amel) <- names_achrs_amel
-
-ratios_bter <- mapply('/', as.data.frame(gene_bter), col_sum_bter)
-row.names(ratios_bter) <- names_achrs_bter
-
-```
-
-```{r}
-ratios_amel
-ratios_bter
-
-# Transform amel
-ratios_amel_median <- apply(ratios_amel, 1, FUN = median)
-
-# Transform bter
-ratios_bter_median <- apply(ratios_bter, 1, FUN = median)
-
-# Remove divergent subunits amel
-ratios_amel_median <- ratios_amel_median %>% as.data.frame() %>% tibble::rownames_to_column()
-ratios_amel_median <- subset(ratios_amel_median, rowname!="alpha9" & rowname!="beta2")
-
-# Remove divergent subunits bter
-ratios_bter_median <- ratios_bter_median %>% as.data.frame() %>% tibble::rownames_to_column()
-ratios_bter_median <- subset(ratios_bter_median, rowname!="alpha9" & rowname!="beta2")
-
-
-```
-
-```{r}
-
-# Sort by nAChRs and sample
-ratios_amel <- ratios_amel_median[with(ratios_amel_median, order(ratios_amel_median$rowname)),]
-colnames(ratios_amel) <- c("nAChRs_amel", "value_amel")
-ratios_bter <- ratios_bter_median[with(ratios_bter_median, order(ratios_bter_median$rowname)),]
-colnames(ratios_bter) <- c("nAChRs_bter", "value_bter")
-# Join the data frames
-ratios_both <- dplyr::bind_cols(ratios_bter, ratios_amel)
-sapply(ratios_both, mode)
-```
-
-
-
-
-```{r test, message = FALSE}
-
-# Checking for distribution 
-hist(ratios_both$value_amel)
-hist(ratios_both$value_bter)
-
-dens_amel <- density(ratios_both$value_amel)
-dens_bter <- density(ratios_both$value_bter)
-plot(dens_amel)
-plot(dens_bter)
-
-
-# Testing non-parametric Spearman's correlation
-cor.test( ~ value_amel + value_bter,
-         data=ratios_both,
-         method = "spearman",
-         continuity = FALSE,
-         conf.level = 0.95)
-
-
-```
-
-
-
-```{r plots, message = FALSE}
-pch_nachrs <- c(15,16,17,18,19,0,1,2,5,8,3)
-
-#plot
-ggplot2::ggplot(ratios_both) + geom_abline(colour = "grey", linetype = "dashed") + 
-  geom_point(size = 2, aes(x = value_bter, y = value_amel, colour=nAChRs_amel, pch=nAChRs_amel)) + 
-  geom_smooth(formula = y ~ x, colour = "black", size = 0.7, method="lm", se = FALSE, aes(x = value_bter, y = value_amel)) + #facet_wrap(~caste, scales = "free_y") +
-  xlab("Proportion of nAChRs expression in"~italic("B. terrestris")) +
-  ylab("Proportion of nAChRs expression in"~italic("A. mellifera")) +
-  theme(axis.title.x = element_text(colour="gray20", size=10),
-        axis.title.y = element_text(colour="gray20", size=10),
-        axis.text.x = element_text(colour="gray20", size=10),
-        axis.text.y = element_text(colour="gray20", size=10)) + geom_segment(x = 0, y = 0.4, xend = 0, yend = 0.7, colour = "grey", linetype = "dashed") + scale_shape_manual(values = pch_nachrs)
-
-
-
-
-amel_side <- ggplot(species, aes(x= amel_tpm, fill=nAChRs...1)) + 
-geom_histogram() 
-amel_side
-bter_side <- ggplot(species, aes(x= bter_tpm, fill=nAChRs...1)) + 
-geom_histogram() 
-bter_side
-```
-``` {r}