Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

pair programming #67

Closed
wants to merge 2 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 43 additions & 35 deletions inst/rmd/scratch_gimap_GI_review.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,12 @@ devtools::load_all()
## Get the gimap GI results

```{r}
gimap_dataset <- get_example_data("gimap") %>%
gimap_filter() %>%
gimap_annotate(cell_line = "HELA") %>%
gimap_normalize(
timepoints = "day") %>%
gimap_dataset <- get_example_data("gimap") %>%
gimap_filter() %>%
gimap_annotate(cell_line = "HELA") %>%
gimap_normalize(
timepoints = "day"
) %>%
calc_crispr() %>%
calc_gi()
```
Expand Down Expand Up @@ -61,7 +62,6 @@ Columns in the GI Mapping results not in the gimap results seem to include model
### Target overlap

```{r}
length(unique(gimap_dataset$normalized_log_fc$pgRNA_target))
length(unique(old_gi_results$pgRNA_target))
Expand Down Expand Up @@ -90,45 +90,53 @@ While the replicate values themselves can't be completely synced like we've done

```{r}
head(old_gi_results)
```

```{r}
joindf <- dplyr::full_join(old_gi_results, gimap_dataset$gi_scores,
by = c("pgRNA_target" = "pgRNA_target_double", "rep"),
suffix = c("_old", "_new"))
by = c("pgRNA_target" = "pgRNA_target_double", "rep"),
suffix = c("_old", "_new")
)
```

## Split out the comparisons we want to make

Have to drop NA's because the gimap results include Day05_RepA_early datapoints while the GI Mapping doesn't. Also have to drop NAs because there are GI Mapping targets that aren't represented in the gimap targets?
Have to drop NA's because the gimap results include `Day05_RepA_early` datapoints while the GI Mapping doesn't. Also have to drop NAs because there are GI Mapping targets that aren't represented in the gimap targets?

```{r}
joined_df <- rbind(
#join the gene_ctrl
full_join(
old_gi_results %>% filter(target_type == "gene_ctrl") %>% select(c("pgRNA_target", "paralog_pair", "mean_GI_score", "rep")),
gimap_dataset$gi_scores %>% separate(pgRNA_target_double, c("gene1", "gene2"), sep="_", remove = FALSE) %>% mutate(ctrl = "ctrl") %>% unite("pgRNA_target_summary", c("gene1", "ctrl"), sep="_") %>% select(c("pgRNA_target_summary", "pgRNA_target_double", "single_target_gi_score_1", "rep")),
by = c("pgRNA_target" = "pgRNA_target_summary", "paralog_pair"="pgRNA_target_double", "rep"),
suffix = c("_old", "_new")
) %>% mutate(target_type = "gene_ctrl") %>% distinct() %>% select(c("pgRNA_target", "mean_GI_score", "rep", "single_target_gi_score_1", "target_type")) %>% `colnames<-`(c("pgRNA_target", "GI_Mapping_GI_score", "rep", "gimap_GI_score", "target_type")) %>% drop_na(),
# join the ctrl_gene
full_join(
old_gi_results %>% filter(target_type == "ctrl_gene") %>% select(c("pgRNA_target", "paralog_pair", "mean_GI_score", "rep")),
gimap_dataset$gi_scores %>% separate(pgRNA_target_double, c("gene1", "gene2"), sep="_", remove=FALSE) %>% mutate(ctrl = "ctrl") %>% unite("pgRNA_target_summary", c("ctrl", "gene2"), sep="_") %>% select(c("pgRNA_target_summary", "pgRNA_target_double", "single_target_gi_score_2", "rep")),
by = c("pgRNA_target" = "pgRNA_target_summary", "rep"),
suffix = c("_old", "_new")
) %>% mutate(target_type = "ctrl_gene") %>% distinct() %>% select(c("pgRNA_target", "mean_GI_score", "rep", "single_target_gi_score_2", "target_type")) %>% `colnames<-`(c("pgRNA_target", "GI_Mapping_GI_score", "rep", "gimap_GI_score", "target_type")) %>% drop_na(),
#join the gene_gene
full_join(
old_gi_results %>% filter(target_type == "gene_gene") %>% select(c("pgRNA_target", "mean_GI_score", "rep")),
gimap_dataset$gi_scores %>% select(c("pgRNA_target_double", "double_target_gi_score", "rep")),
by = c("pgRNA_target" = "pgRNA_target_double", "rep"),
suffix = c("_old", "_new")
) %>% mutate(target_type = "gene_gene") %>% `colnames<-`(c("pgRNA_target", "GI_Mapping_GI_score", "rep", "gimap_GI_score", "target_type")) %>% drop_na()
)
old_gi_results_wide <- old_gi_results %>%
dplyr::select(paralog_pair, target_type, mean_GI_score, rep) %>%
tidyr::pivot_wider(names_from = target_type,
values_from = mean_GI_score)
# Reshape the data so we can plot
joined_df <- gimap_dataset$gi_scores %>%
dplyr::full_join(old_gi_results_wide,
by = c("pgRNA_target_double"= "paralog_pair",
"rep" = "rep")) %>%
dplyr::filter(!grepl("Day05", rep))
```

```{r}
joined_df %>% ggplot(aes(x=gimap_GI_score, y=GI_Mapping_GI_score, color=target_type)) + geom_point() + facet_wrap(rep~target_type) + theme(legend.position = "none")
```
joined_df %>%
ggplot(aes(x = single_target_gi_score_1, y = gene_ctrl)) +
geom_point() +
facet_wrap(~rep) +
theme(legend.position = "none")
```

```{r}
joined_df %>%
ggplot(aes(x = single_target_gi_score_2, y = ctrl_gene)) +
geom_point() +
facet_wrap(~rep) +
theme(legend.position = "none")
```

```{r}
joined_df %>%
ggplot(aes(x = double_target_gi_score, y = gene_gene)) +
geom_point() +
facet_wrap(~rep) +
theme(legend.position = "none")
```
Loading