From 458a265dfeebf690217698f12c73d9fcd675849d Mon Sep 17 00:00:00 2001
From: Matthieu Haudiquet <matthieu.haudiquet@pasteur.fr>
Date: Tue, 1 Oct 2019 19:23:24 +0200
Subject: [PATCH] tkt

---
 .Rhistory        | 771 +++++++++++++++++++++++++++++++----------------
 R/plot_dna_ref.R |   6 +-
 2 files changed, 513 insertions(+), 264 deletions(-)

diff --git a/.Rhistory b/.Rhistory
index 8b71011..ac469bb 100644
--- a/.Rhistory
+++ b/.Rhistory
@@ -1,263 +1,512 @@
-library(readxl)
+x_lims_list <- to_xlims$lim
+genoPlotR::plot_gene_map(dna_segs = list(ref_seq, query_seq), comparisons = list(comparison_table),
+xlims = x_lims_list, dna_seg_labels = c(basename(ref), basename(query)), dna_seg_label_cex = 0.3)
+comparison_table$col <- apply_color_scheme(comparison_table$pident, color_scheme = "grey")
+class(comparison_table) <-  c("comparison", "data.frame")
+to_xlims <- bind_rows(ref_seq, query_seq, .id = "id") %>%
+group_by(id) %>%
+summarise(
+lim1 = 1,
+lim2 = pmax( max(start), max(end))
+) %>%
+select(id, lim1, lim2) %>%
+group_by(id) %>%
+mutate(
+lim = ifelse(test = reverse_query==T & id==2, list(c(lim2,lim1)), list(c(lim1,lim2)) )
+)
+x_lims_list <- to_xlims$lim
+genoPlotR::plot_gene_map(dna_segs = list(ref_seq, query_seq), comparisons = list(comparison_table),
+xlims = x_lims_list, dna_seg_labels = c(basename(ref), basename(query)), dna_seg_label_cex = 0.3)
+ref <- "../data-raw/NC_024711_ori_RepL_function.gbk"
+query <- "../data-raw/ALL_18_ALL_0000049.prt.details.fixed.tsv"
+comp <- "../data-raw/BBH_18_49_Crass_V2.txt"
+ref_seq <- read_dna_seg_from_genbank(file = ref, tagsToParse = c("CDS"), gene_type = "arrows", col = "black", fill = "grey80")
+ref_seq <- read_dna_seg_from_genbank(file = query, tagsToParse = c("CDS"), gene_type = "arrows", col = "black", fill = "grey80")
+try(
+ref_seq <- read_dna_seg_from_genbank(file = query, tagsToParse = c("CDS"), gene_type = "arrows", col = "black", fill = "grey80")
+)
+try(
+ref_seq <- read_dna_seg_from_genbank(file = ref, tagsToParse = c("CDS"), gene_type = "arrows", col = "black", fill = "grey80")
+)
+try(
+ref_seq <- read_dna_seg_from_tab(file = ref, header = T, gene_type = "arrows", col = "black", fill = "grey80")
+)
+query_seq <- read_dna_seg_from_tab(file = query, header = T, gene_type = "arrows", col = "black", fill = "grey80")
+try(
+ref_seq <- read_dna_seg_from_tab(file = ref, header = T, gene_type = "arrows", col = "black", fill = "grey80"),
+silent = T
+)
+# Try either gbk or table for the ref
+try(
+ref_seq <- read_dna_seg_from_genbank(file = ref, tagsToParse = c("CDS"), gene_type = "arrows", col = "black", fill = "grey80"),
+silent = T
+)
+try(
+ref_seq <- read_dna_seg_from_tab(file = ref, header = T, gene_type = "arrows", col = "black", fill = "grey80"),
+silent = T
+)
+# Try either gbk or table for the query
+try(
+query_seq <- read_dna_seg_from_tab(file = query, header = T, gene_type = "arrows", col = "black", fill = "grey80"),
+silent = T
+)
+try(
+query_seq <- read_dna_seg_from_genbank(file = query, tagsToParse = c("CDS"), gene_type = "arrows", col = "black", fill = "grey80"),
+silent = T
+)
+try(
+ref_seq <- read_dna_seg_from_genbank(file = ref, tagsToParse = c("CDS"), gene_type = "arrows", col = "black", fill = "grey80"),
+silent = T
+)
+try(
+ref_seq <- read_dna_seg_from_tab(file = ref, header = T, gene_type = "arrows", col = "black", fill = "grey80"),
+silent = T
+)
+# Try either gbk or table for the query
+try(
+query_seq <- read_dna_seg_from_tab(file = query, header = T, gene_type = "arrows", col = "black", fill = "grey80"),
+silent = T
+)
+try(
+query_seq <- read_dna_seg_from_genbank(file = query, tagsToParse = c("CDS"), gene_type = "arrows", col = "black", fill = "grey80"),
+silent = T
+)
+# BBH table. Would be better to change header names I think
+comparison_bbh <- read.table(file = comp, col.names = c("prot_element_1","prot_element_2","element_2","element_1","pid"), stringsAsFactors = F) %>%
+select(prot_element_1, prot_element_2, pid)
+# Extract position
+bbh_position_table <- bind_rows(ref_seq, query_seq) %>%
+select(name, start, end)
+# Bind the positions for the comparison table. Reverse the coords if the query is gonna be reversed.
+comparison_table <- comparison_bbh %>%
+left_join(bbh_position_table, by = c("prot_element_1"="name")) %>%
+left_join(bbh_position_table, by = c("prot_element_2"="name"), suffix = c("1", "2")) %>%
+group_by(prot_element_2) %>%
+mutate(
+pident = floor(pid),
+start22 = ifelse(reverse_query==T, end2, start2),
+end22 = ifelse(reverse_query==T, start2, end2)
+) %>%
+ungroup() %>%
+select(start1, end1, start22, end22, pident) %>%
+rename(start2=start22, end2=end22)
+# Apply grey color scheme
+comparison_table$col <- apply_color_scheme(comparison_table$pident, color_scheme = "grey")
+class(comparison_table) <-  c("comparison", "data.frame")
+# Reverse the query if necessary
+to_xlims <- bind_rows(ref_seq, query_seq, .id = "id") %>%
+group_by(id) %>%
+summarise(
+lim1 = 1,
+lim2 = pmax( max(start), max(end))
+) %>%
+select(id, lim1, lim2) %>%
+group_by(id) %>%
+mutate(
+lim = ifelse(test = reverse_query==T & id==2, list(c(lim2,lim1)), list(c(lim1,lim2)) )
+)
+x_lims_list <- to_xlims$lim
+# Plot
+genoPlotR::plot_gene_map(dna_segs = list(ref_seq, query_seq), comparisons = list(comparison_table),
+xlims = x_lims_list, dna_seg_labels = c(basename(ref), basename(query)), dna_seg_label_cex = 0.3)
+View(query_seq)
+View(ref_seq)
+# Try either gbk or table for the ref
+try(
+ref_seq <- read_dna_seg_from_genbank(file = ref, tagsToParse = c("CDS"), gene_type = "arrows", col = "black", fill = "grey80"),
+silent = T
+)
+View(ref_seq)
+# Try either gbk or table for the ref
+try(
+ref_seq <- read_dna_seg_from_genbank(file = ref, tagsToParse = c("CDS"), gene_type = "arrows", col = "black", fill = "grey80"),
+silent = T
+)
+View(ref_seq)
+# Try either gbk or table for the ref
+try(
+ref_seq <- read_dna_seg_from_genbank(file = ref, tagsToParse = c("CDS"), gene_type = "arrows", col = "black", fill = "grey80"),
+silent = T
+)
+View(ref_seq)
+# BBH table. Would be better to change header names I think
+comparison_bbh <- read.table(file = comp, col.names = c("prot_element_1","prot_element_2","element_2","element_1","pid"), stringsAsFactors = F) %>%
+select(prot_element_1, prot_element_2, pid)
+# Init the colors
+set.seed(42)
+install.packages("randomcoloR")
+library(randomcoloR)
+View(query_seq)
+category <- c(unique(query_seq$category, unique(ref_seq$product)))
+category
+category <- unique(c(query_seq$category, ref_seq$product))
+category
+library(RColorBrewer)
+fill <- c(brewer.pal(n = length(category), name = "Paired"))
+category <- unique(c(query_seq$category, ref_seq$product))
+fill <- c(brewer.pal(n = length(category), name = "Paired"))
+col_categories <- as.data.frame(cbind(category, fill))
+View(col_categories)
+View(query_seq)
+View(col_categories)
+View(col_categories)
+col_categories <- as.data.frame(cbind(category, fill)) %>%
+mutate(
+product = category
+)
+View(col_categories)
+col_categories <- as.data.frame(cbind(category, fill)) %>%
+mutate(
+product = category
+) %>%
+select(category, product, fill)
+query_annotated <- query_seq %>%
+select(-fill) %>%
+left_join(col_categories)
+ref_annotated <- ref_seq %>%
+select(-fill) %>%
+left_join(col_categories)
+ref_annotated <- ref_seq %>%
+select(-fill) %>%
+left_join(col_categories) %>%
+mutate(
+col = ifelse(feature == "tRNA", "#ff5145", col)
+) %>%
+as.data.frame()
+View(ref_annotated)
+query_annotated <- query_seq %>%
+select(-fill) %>%
+left_join(col_categories)
+query_annotated <- query_seq %>%
+select(-fill) %>%
+left_join(col_categories)%>%
+mutate(
+col = ifelse(feature == "tRNA", "#ff5145", col)
+) %>%
+as.data.frame()
+class(query_annotated) <- c("dna_seg", "data.frame")
+ref_annotated <- ref_seq %>%
+select(-fill) %>%
+left_join(col_categories) %>%
+mutate(
+col = ifelse(feature == "tRNA", "#ff5145", col)
+) %>%
+as.data.frame()
+class(ref_annotated) <- c("dna_seg", "data.frame")
+# Plot
+genoPlotR::plot_gene_map(dna_segs = list(ref_annotated, query_annotated), comparisons = list(comparison_table),
+xlims = x_lims_list, dna_seg_labels = c(basename(ref), basename(query)), dna_seg_label_cex = 0.3)
+View(col_categories)
+# Plot legend
+apply_color_scheme(seq(min(all_blast_res$pident),100))
+# Plot legend
+apply_color_scheme(seq(min(comp$pident),100))
+# Plot legend
+apply_color_scheme(seq(min(comparison_table$pident),100))
+comparison_table %>%
+ggplot(aes(x = pident, y= pident, fill = pident)) +
+geom_point() +
+scale_fill_gradient(low = apply_color_scheme(min(comparison_table$pident)), high = "#40404080")
+library(ggplot2)
+library(ggplot2, ggpubr)
 library(ggpubr)
+comparison_table %>%
+ggplot(aes(x = pident, y= pident, fill = pident)) +
+geom_point() +
+scale_fill_gradient(low = apply_color_scheme(min(comparison_table$pident)), high = "#40404080")
+comparison_table %>%
+ggplot(aes(x = pident, y= pident, fill = pident)) +
+geom_point() +
+scale_fill_gradient(low = apply_color_scheme(min(comparison_table$pident)), high = apply_color_scheme(max(comparison_table$pident)))
+apply_color_scheme(max(comparison_table$pident)
+)
+apply_color_scheme(min(comparison_table$pident))
+min(comparison_table$pident)
+max(comparison_table$pident)
+# Plot legend
+grey <- apply_color_scheme(seq(min(comparison_table$pident),100))
+# Plot legend
+grey <- apply_color_scheme(seq(min(comparison_table$pident),100))
+grey
+grey[,1]
+grey[1,]
+grey[1]
+grey[length(grey)]
+# Plot legend
+greyscale <- apply_color_scheme(seq(min(comparison_table$pident),100))
+comparison_table %>%
+ggplot(aes(x = pident, y= pident, fill = pident)) +
+geom_point() +
+scale_fill_gradient(low = greyscale[1], high = greyscale[length(greyscale)])
+get_legend(
+comparison_table %>%
+ggplot(aes(x = pident, y= pident, fill = pident)) +
+geom_point() +
+scale_fill_gradient(low = greyscale[1], high = greyscale[length(greyscale)])
+)
+plot(get_legend(
+comparison_table %>%
+ggplot(aes(x = pident, y= pident, fill = pident)) +
+geom_point() +
+scale_fill_gradient(low = greyscale[1], high = greyscale[length(greyscale)])
+))
+category <- sort(unique(c(query_seq$category, ref_seq$product)))
+fill <- c(brewer.pal(n = length(category), name = "Paired"))
+col_categories <- as.data.frame(cbind(category, fill)) %>%
+mutate(
+product = category
+) %>%
+select(category, product, fill)
+# Put the colors
+query_annotated <- query_seq %>%
+select(-fill) %>%
+left_join(col_categories)%>%
+mutate(
+col = ifelse(feature == "tRNA", "#ff5145", col)
+) %>%
+as.data.frame()
+class(query_annotated) <- c("dna_seg", "data.frame")
+ref_annotated <- ref_seq %>%
+select(-fill) %>%
+left_join(col_categories) %>%
+mutate(
+col = ifelse(feature == "tRNA", "#ff5145", col)
+) %>%
+as.data.frame()
+class(ref_annotated) <- c("dna_seg", "data.frame")
+# BBH table. Would be better to change header names I think
+comparison_bbh <- read.table(file = comp, col.names = c("prot_element_1","prot_element_2","element_2","element_1","pid"), stringsAsFactors = F) %>%
+select(prot_element_1, prot_element_2, pid)
+# Extract position
+bbh_position_table <- bind_rows(ref_seq, query_seq) %>%
+select(name, start, end)
+# Bind the positions for the comparison table. Reverse the coords if the query is gonna be reversed.
+comparison_table <- comparison_bbh %>%
+left_join(bbh_position_table, by = c("prot_element_1"="name")) %>%
+left_join(bbh_position_table, by = c("prot_element_2"="name"), suffix = c("1", "2")) %>%
+group_by(prot_element_2) %>%
+mutate(
+pident = floor(pid),
+start22 = ifelse(reverse_query==T, end2, start2),
+end22 = ifelse(reverse_query==T, start2, end2)
+) %>%
+ungroup() %>%
+select(start1, end1, start22, end22, pident) %>%
+rename(start2=start22, end2=end22)
+# Apply grey color scheme
+comparison_table$col <- apply_color_scheme(comparison_table$pident, color_scheme = "grey")
+class(comparison_table) <-  c("comparison", "data.frame")
+# Reverse the query if necessary
+to_xlims <- bind_rows(ref_seq, query_seq, .id = "id") %>%
+group_by(id) %>%
+summarise(
+lim1 = 1,
+lim2 = pmax( max(start), max(end))
+) %>%
+select(id, lim1, lim2) %>%
+group_by(id) %>%
+mutate(
+lim = ifelse(test = reverse_query==T & id==2, list(c(lim2,lim1)), list(c(lim1,lim2)) )
+)
+x_lims_list <- to_xlims$lim
+# Plot
+genoPlotR::plot_gene_map(dna_segs = list(ref_annotated, query_annotated), comparisons = list(comparison_table),
+xlims = x_lims_list, dna_seg_labels = c(basename(ref), basename(query)), dna_seg_label_cex = 0.3)
+# Plot legend
+greyscale <- apply_color_scheme(seq(min(comparison_table$pident),100))
+plot(get_legend(
+comparison_table %>%
+ggplot(aes(x = pident, y= pident, fill = pident)) +
+geom_point() +
+scale_fill_gradient(low = greyscale[1], high = greyscale[length(greyscale)])
+))
+col_categories %>%
+ggplot(aes(x = category))+
+geom_bar(aes(fill = category))
+col_categories %>%
+ggplot(aes(x = category))+
+geom_bar(aes(fill = category)) +
+scale_fill_manual(values = col_categories$fill,
+breaks = col_categories$category)
+# Plot
+genoPlotR::plot_gene_map(dna_segs = list(ref_annotated, query_annotated), comparisons = list(comparison_table),
+xlims = x_lims_list, dna_seg_labels = c(basename(ref), basename(query)), dna_seg_label_cex = 0.3)
+col_categories$fill
+plot(col_categories$fill)
+plot(col_categories$fill, col = col_categories$fill)
+View(comparison_table)
+View(comparison_table)
+category
+col_categories <- as.data.frame(cbind(category, fill)) %>%
+mutate(
+category = as.character(category),
+product = as.character(category),
+fill = as.character(fill)
+) %>%
+select(category, product, fill)
+category <- sort(unique(c(query_seq$category, ref_seq$product)))
+fill <- c(brewer.pal(n = length(category), name = "Paired"))
+col_categories <- as.data.frame(cbind(category, fill)) %>%
+mutate(
+category = as.character(category),
+product = as.character(category),
+fill = as.character(fill)
+) %>%
+select(category, product, fill)
+# Put the colors
+query_annotated <- query_seq %>%
+select(-fill) %>%
+left_join(col_categories)%>%
+mutate(
+col = ifelse(feature == "tRNA", "#ff5145", col)
+) %>%
+as.data.frame()
+col_categories %>%
+ggplot(aes(x = category))+
+geom_bar(aes(fill = category)) +
+scale_fill_manual(values = col_categories$fill,
+breaks = col_categories$category)
+plot(get_legend(
+col_categories %>%
+ggplot(aes(x = category))+
+geom_bar(aes(fill = category)) +
+scale_fill_manual(values = col_categories$fill,
+breaks = col_categories$category)
+))
+plot_dna_ref(ref ="../data-raw/NC_024711_ori_RepL_function.gbk", query =  "../data-raw/ALL_18_ALL_0000049.prt.details.fixed.tsv", comp =  "../data-raw/BBH_18_49_Crass_V2.txt", reverse_query = T)
+plot_dna_ref <- function(ref, query, comp, reverse_query = F){
+# Try either gbk or table for the ref
+try(
+ref_seq <- read_dna_seg_from_genbank(file = ref, tagsToParse = c("CDS"), gene_type = "arrows", col = "black", fill = "grey80"),
+silent = T
+)
+try(
+ref_seq <- read_dna_seg_from_tab(file = ref, header = T, gene_type = "arrows", col = "black", fill = "grey80"),
+silent = T
+)
+# Try either gbk or table for the query
+try(
+query_seq <- read_dna_seg_from_tab(file = query, header = T, gene_type = "arrows", col = "black", fill = "grey80"),
+silent = T
+)
+try(
+query_seq <- read_dna_seg_from_genbank(file = query, tagsToParse = c("CDS"), gene_type = "arrows", col = "black", fill = "grey80"),
+silent = T
+)
+# Pick the colors
+set.seed(42)
+category <- sort(unique(c(query_seq$category, ref_seq$product)))
+fill <- c(brewer.pal(n = length(category), name = "Paired"))
+col_categories <- as.data.frame(cbind(category, fill)) %>%
+mutate(
+category = as.character(category),
+product = as.character(category),
+fill = as.character(fill)
+) %>%
+select(category, product, fill)
+# Put the colors
+query_annotated <- query_seq %>%
+select(-fill) %>%
+left_join(col_categories)%>%
+mutate(
+col = ifelse(feature == "tRNA", "#ff5145", col)
+) %>%
+as.data.frame()
+class(query_annotated) <- c("dna_seg", "data.frame")
+ref_annotated <- ref_seq %>%
+select(-fill) %>%
+left_join(col_categories) %>%
+mutate(
+col = ifelse(feature == "tRNA", "#ff5145", col)
+) %>%
+as.data.frame()
+class(ref_annotated) <- c("dna_seg", "data.frame")
+# BBH table. Would be better to change header names I think
+comparison_bbh <- read.table(file = comp, col.names = c("prot_element_1","prot_element_2","element_2","element_1","pid"), stringsAsFactors = F) %>%
+select(prot_element_1, prot_element_2, pid)
+# Extract position
+bbh_position_table <- bind_rows(ref_seq, query_seq) %>%
+select(name, start, end)
+# Bind the positions for the comparison table. Reverse the coords if the query is gonna be reversed.
+comparison_table <- comparison_bbh %>%
+left_join(bbh_position_table, by = c("prot_element_1"="name")) %>%
+left_join(bbh_position_table, by = c("prot_element_2"="name"), suffix = c("1", "2")) %>%
+group_by(prot_element_2) %>%
+mutate(
+pident = floor(pid),
+start22 = ifelse(reverse_query==T, end2, start2),
+end22 = ifelse(reverse_query==T, start2, end2)
+) %>%
+ungroup() %>%
+select(start1, end1, start22, end22, pident) %>%
+rename(start2=start22, end2=end22)
+# Apply grey color scheme
+comparison_table$col <- apply_color_scheme(comparison_table$pident, color_scheme = "grey")
+class(comparison_table) <-  c("comparison", "data.frame")
+# Reverse the query if necessary
+to_xlims <- bind_rows(ref_seq, query_seq, .id = "id") %>%
+group_by(id) %>%
+summarise(
+lim1 = 1,
+lim2 = pmax( max(start), max(end))
+) %>%
+select(id, lim1, lim2) %>%
+group_by(id) %>%
+mutate(
+lim = ifelse(test = reverse_query==T & id==2, list(c(lim2,lim1)), list(c(lim1,lim2)) )
+)
+x_lims_list <- to_xlims$lim
+# Plot
+genoPlotR::plot_gene_map(dna_segs = list(ref_annotated, query_annotated), comparisons = list(comparison_table),
+xlims = x_lims_list, dna_seg_labels = c(basename(ref), basename(query)), dna_seg_label_cex = 0.3)
+# Plot legend
+greyscale <- apply_color_scheme(seq(min(comparison_table$pident),100))
+plot(get_legend(
+comparison_table %>%
+ggplot(aes(x = pident, y= pident, fill = pident)) +
+geom_point() +
+scale_fill_gradient(low = greyscale[1], high = greyscale[length(greyscale)])
+))
+plot(get_legend(
+col_categories %>%
+ggplot(aes(x = category))+
+geom_bar(aes(fill = category)) +
+scale_fill_manual(values = col_categories$fill,
+breaks = col_categories$category)
+))
+}
+plot_dna_ref(ref ="../data-raw/NC_024711_ori_RepL_function.gbk", query =  "../data-raw/ALL_18_ALL_0000049.prt.details.fixed.tsv", comp =  "../data-raw/BBH_18_49_Crass_V2.txt", reverse_query = T)
+usethis::use_package(ggpubr)
+usethis::use_package("ggpubr")
+plot_dna_ref(ref ="../data-raw/NC_024711_ori_RepL_function.gbk", query =  "../data-raw/ALL_18_ALL_0000049.prt.details.fixed.tsv", comp =  "../data-raw/BBH_18_49_Crass_V2.txt", reverse_query = T)
+cairo_pdf(filename = "test.pdf")
+plot_dna_ref(ref ="../data-raw/NC_024711_ori_RepL_function.gbk", query =  "../data-raw/ALL_18_ALL_0000049.prt.details.fixed.tsv", comp =  "../data-raw/BBH_18_49_Crass_V2.txt", reverse_query = T)
+dev.off()
+cairo_pdf(onefile = T)
+plot_dna_ref(ref ="../data-raw/NC_024711_ori_RepL_function.gbk", query =  "../data-raw/ALL_18_ALL_0000049.prt.details.fixed.tsv", comp =  "../data-raw/BBH_18_49_Crass_V2.txt", reverse_query = T)
+dev.off()
+library(plotDNA)
+plot_dna_ref(ref ="../data-raw/NC_024711_ori_RepL_function.gbk", query =  "../data-raw/ALL_18_ALL_0000049.prt.details.fixed.tsv", comp =  "../data-raw/BBH_18_49_Crass_V2.txt", reverse_query = T)
+usethis::use_data("DATASET")
+library(plotDNA)
+setwd("~/Documents/_Dev/plotDNA/data-raw")
+setwd("~/Documents/_Dev/plotDNA/data-raw")
+plot_dna_ref(ref ="../data-raw/NC_024711_ori_RepL_function.gbk", query =  "../data-raw/ALL_18_ALL_0000049.prt.details.fixed.tsv", comp =  "../data-raw/BBH_18_49_Crass_V2.txt", reverse_query = T)
+devtools::install_git(url = "https://gitlab.pasteur.fr/mahaudiq/plot-dna/")
+library(devtools)
+install_git(url = "https://gitlab.pasteur.fr/mahaudiq/plot-dna/")
+install_git(url = "https://gitlab.pasteur.fr/mahaudiq/plot-dna/")
+install.packages("dplyr")
+if (!require('devtools')) install.packages('devtools'); library('devtools')
+library(plotDNA)
+plot_dna_ref(ref ="../data-raw/NC_024711_ori_RepL_function.gbk", query =  "../data-raw/ALL_18_ALL_0000049.prt.details.fixed.tsv", comp =  "../data-raw/BBH_18_49_Crass_V2.txt", reverse_query = T)
+args <- c(1,2,3)
+reverse_or_not <- ifelse( args[4] == 1, T, F)
+reverse_or_not
+reverse_or_not <- if_else( args[4] == 1, T, F, F)
 library(dplyr)
-data <- read_excel("Shld1-mice-phenotype-2.xlsx")
-View(data)
-data %>%
-ggplot(aes(x=Genotype, y = Splenocyte_number))+
-geom_point()+
-stat_compare_means()
-data %>%
-ggplot(aes(x=Genotype, y = Splenocyte_number))+
-geom_point()+
-theme_pubclean()+
-stat_compare_means()
-theme_bw(+
-data %>%
-ggplot(aes(x=Genotype, y = Splenocyte_number))+
-geom_point()+
-theme_bw()+
-stat_compare_means()
-data %>%
-data %>%
-ggplot(aes(x=Genotype, y = Splenocyte_number))+
-geom_point()+
-theme_bw()+
-stat_compare_means()
-data %>%
-ggplot(aes(x=Genotype, y = Splenocyte_number))+
-geom_point()+
-theme_bw()+
-coord_cartesian(ylim = c(0, 200))+
-stat_compare_means()
-data %>%
-ggplot(aes(x=Genotype, y = Splenocyte_number))+
-geom_point()+
-theme_bw()+
-coord_cartesian(ylim = c(0, 200))+
-stat_compare_means(label.x.npc = 1)
-data %>%
-ggplot(aes(x=Genotype, y = Splenocyte_number))+
-geom_point()+
-theme_bw()+
-coord_cartesian(ylim = c(0, 200))+
-stat_compare_means(label.x.npc = 0.11)
-data %>%
-ggplot(aes(x=Genotype, y = Splenocyte_number))+
-geom_point()+
-theme_bw()+
-coord_cartesian(ylim = c(0, 200))+
-stat_compare_means(label.x.npc = 0.01)
-data %>%
-ggplot(aes(x=Genotype, y = Splenocyte_number))+
-geom_point()+
-theme_bw()+
-coord_cartesian(ylim = c(0, 200))+
-stat_compare_means(label.x.npc = 0.0001)
-data %>%
-ggplot(aes(x=Genotype, y = Splenocyte_number))+
-geom_point()+
-theme_bw()+
-ggplot2::stat_summary()
-data %>%
-ggplot(aes(x=Genotype, y = Splenocyte_number))+
-geom_point()+
-theme_bw()+
-ggplot2::stat_summary(fun.y = "mean")
-data %>%
-ggplot(aes(x=Genotype, y = Splenocyte_number))+
-geom_point()+
-theme_bw()+
-ggplot2::stat_summary(fun.y = "mean", geom = "line")
-data %>%
-ggplot(aes(x=Genotype, y = Splenocyte_number))+
-geom_point()+
-theme_bw()+
-ggplot2::stat_summary(fun.y = "mean", geom = "crossbar")
-data %>%
-ggplot(aes(x=Genotype, y = Splenocyte_number))+
-geom_point()+
-theme_bw()+
-ggplot2::stat_summary(fun.y = mean, fun.ymax = mean, fun.ymin = mean, geom = "crossbar")
-data %>%
-ggplot(aes(x=Genotype, y = Splenocyte_number))+
-geom_point()+
-theme_bw()+
-ggplot2::stat_summary(fun.y = mean, fun.ymax = mean, fun.ymin = mean, geom = "crossbar", width = 0.5)
-data %>%
-ggplot(aes(x=Genotype, y = Splenocyte_number))+
-geom_point()+
-theme_bw()+
-ggplot2::stat_summary(fun.y = mean, fun.ymax = mean, fun.ymin = mean, geom = "crossbar", width = 0.2)
-data %>%
-ggplot(aes(x=Genotype, y = Splenocyte_number))+
-geom_point()+
-theme_bw()+
-ggplot2::stat_summary(fun.y = mean, fun.ymax = mean, fun.ymin = mean, geom = "crossbar", width = 0.15)
-data %>%
-ggplot(aes(x=Genotype, y = Splenocyte_number))+
-geom_point()+
-theme_bw()+
-ggplot2::stat_summary(fun.y = mean, fun.ymax = mean, fun.ymin = mean, geom = "crossbar", width = 0.15, alpha = 0.8)
-geom_point(aes(alpha = 0.9))+
-theme_bw()+
-ggplot2::stat_summary(fun.y = mean, fun.ymax = mean, fun.ymin = mean, geom = "crossbar", width = 0.15)
-data %>%
-ggplot(aes(x=Genotype, y = Splenocyte_number))+
-stat_summary(fun.y = mean, fun.ymax = mean, fun.ymin = mean, geom = "crossbar", width = 0.15)+
-geom_point(aes(alpha = 0.9))+
-theme_bw()+
-ggplot2::stat_summary(fun.y = mean, fun.ymax = mean, fun.ymin = mean, geom = "crossbar", width = 0.15)
-data %>%
-ggplot(aes(x=Genotype, y = Splenocyte_number, alpha = 0.9))+
-stat_summary(fun.y = mean, fun.ymax = mean, fun.ymin = mean, geom = "crossbar", width = 0.15)+
-geom_point(aes(alpha = 0.9))+
-theme_bw()+
-ggplot2::stat_summary(fun.y = mean, fun.ymax = mean, fun.ymin = mean, geom = "crossbar", width = 0.15)
-data %>%
-ggplot(aes(x=Genotype, y = Splenocyte_number))+
-stat_summary(fun.y = mean, fun.ymax = mean, fun.ymin = mean, geom = "crossbar", width = 0.15)+
-geom_point()+
-theme_bw()+
-coord_cartesian(ylim = c(0, 200))+
-stat_compare_means()
-data %>%
-ggplot(aes(x=Genotype, y = Splenocyte_number))+
-stat_summary(fun.y = mean, fun.ymax = mean, fun.ymin = mean, geom = "crossbar", width = 0.15)+
-geom_point()+
-theme_bw()+
-coord_cartesian(ylim = c(0, 200))+
-stat_compare_means(label.x = 1, label.y = 100)
-data %>%
-ggplot(aes(x=Genotype, y = Splenocyte_number))+
-stat_summary(fun.y = mean, fun.ymax = mean, fun.ymin = mean, geom = "crossbar", width = 0.15)+
-geom_point()+
-theme_bw()+
-coord_cartesian(ylim = c(0, 200))+
-stat_compare_means(label.x = 1, label.y = 200)
-data %>%
-ggplot(aes(x=Genotype, y = Splenocyte_number))+
-stat_summary(fun.y = mean, fun.ymax = mean, fun.ymin = mean, geom = "crossbar", width = 0.15)+
-geom_point()+
-theme_bw()+
-coord_cartesian(ylim = c(0, 200))+
-stat_compare_means(label.x = -1, label.y = 200)
-data %>%
-ggplot(aes(x=Genotype, y = Splenocyte_number))+
-stat_summary(fun.y = mean, fun.ymax = mean, fun.ymin = mean, geom = "crossbar", width = 0.15)+
-geom_point()+
-theme_bw()+
-coord_cartesian(ylim = c(0, 200))+
-stat_compare_means(label.x = 0.1, label.y = 200)
-data %>%
-ggplot(aes(x=Genotype, y = Splenocyte_number))+
-stat_summary(fun.y = mean, fun.ymax = mean, fun.ymin = mean, geom = "crossbar", width = 0.15)+
-geom_point()+
-theme_bw()+
-coord_cartesian(ylim = c(0, 200))+
-stat_compare_means(label.x = 0.3, label.y = 200)
-data %>%
-ggplot(aes(x=Genotype, y = Splenocyte_number))+
-stat_summary(fun.y = mean, fun.ymax = mean, fun.ymin = mean, geom = "crossbar", width = 0.15)+
-geom_point()+
-theme_bw()+
-coord_cartesian(ylim = c(0, 200))+
-stat_compare_means(label.x = 0.5, label.y = 200)
-data %>%
-ggplot(aes(x=Genotype, y = Splenocyte_number))+
-stat_summary(fun.y = mean, fun.ymax = mean, fun.ymin = mean, geom = "crossbar", width = 0.15)+
-geom_point()+
-theme_bw()+
-coord_cartesian(ylim = c(0, 200))+
-stat_compare_means(label.x = 0.6, label.y = 200)
-data %>%
-ggplot(aes(x=Genotype, y = Splenocyte_number))+
-stat_summary(fun.y = mean, fun.ymax = mean, fun.ymin = mean, geom = "crossbar", width = 0.15)+
-geom_point()+
-theme_bw()+
-coord_cartesian(ylim = c(0, 200))+
-stat_compare_means(label.x = 0.6, label.y = 180)
-data %>%
-ggplot(aes(x=Genotype, y = Splenocyte_number))+
-stat_summary(fun.y = mean, fun.ymax = mean, fun.ymin = mean, geom = "crossbar", width = 0.15)+
-geom_point()+
-theme_bw()+
-coord_cartesian(ylim = c(0, 200))+
-stat_compare_means(label.x = 0.6, label.y = 190)
-class(data)
-class(data$Genotype)
-data %>%
-ggplot(aes(x=Genotype, y = Splenocyte_number))+
-stat_summary(fun.y = mean, fun.ymax = mean, fun.ymin = mean, geom = "crossbar", width = 0.15)+
-geom_point()+
-theme_bw()+
-coord_cartesian(ylim = c(0, 200))+
-stat_compare_means(label.x = 0.6, label.y = 190) +
-scale_x_discrete(limits=c("WT", "Shld1")))
-data %>%
-ggplot(aes(x=Genotype, y = Splenocyte_number))+
-stat_summary(fun.y = mean, fun.ymax = mean, fun.ymin = mean, geom = "crossbar", width = 0.15)+
-geom_point()+
-theme_bw()+
-coord_cartesian(ylim = c(0, 200))+
-stat_compare_means(label.x = 0.6, label.y = 190) +
-scale_x_discrete(limits=c("WT", "Shld1"))
-data %>%
-ggplot(aes(x=Genotype, y = Splenocyte_number))+
-stat_summary(fun.y = mean, fun.ymax = mean, fun.ymin = mean, geom = "crossbar", width = 0.15)+
-geom_point()+
-theme_bw()+
-coord_cartesian(ylim = c(0, 200))+
-scale_x_discrete(limits=c("WT", "Shld1"))+
-stat_compare_means(label.x = 0.6, label.y = 190)
-data$Genotype <- as.factor(data$Genotype)
-data %>%
-ggplot(aes(x=Genotype, y = Splenocyte_number))+
-stat_summary(fun.y = mean, fun.ymax = mean, fun.ymin = mean, geom = "crossbar", width = 0.15)+
-geom_point()+
-theme_bw()+
-coord_cartesian(ylim = c(0, 200))+
-stat_compare_means(label.x = 0.6, label.y = 190)
-data$Genotype<- factor(data$Genotype,levels = c("WT", "Shld1"))
-data %>%
-ggplot(aes(x=Genotype, y = Splenocyte_number))+
-stat_summary(fun.y = mean, fun.ymax = mean, fun.ymin = mean, geom = "crossbar", width = 0.15)+
-geom_point()+
-theme_bw()+
-coord_cartesian(ylim = c(0, 200))+
-stat_compare_means(label.x = 0.6, label.y = 190)
-data %>%
-ggplot(aes(x=Genotype, y = Splenocyte_number))+
-stat_summary(fun.y = mean, fun.ymax = mean, fun.ymin = mean, geom = "crossbar", width = 0.15)+
-geom_point()+
-theme_bw()+
-coord_cartesian(ylim = c(0, 200))+
-ylab("Splenocyte number")+
-stat_compare_means(label.x = 0.6, label.y = 190)
-data %>%
-ggplot(aes(x=Genotype, y = Splenocyte_number))+
-stat_summary(fun.y = mean, fun.ymax = mean, fun.ymin = mean, geom = "crossbar", width = 0.15)+
-geom_point()+
-theme_bw()+
-coord_cartesian(ylim = c(0, 200))+
-ylab("Splenocyte number")+
-labs(title = "plot")
-data %>%
-ggplot(aes(x=Genotype, y = Splenocyte_number))+
-stat_summary(fun.y = mean, fun.ymax = mean, fun.ymin = mean, geom = "crossbar", width = 0.15)+
-geom_point()+
-theme_bw()+
-coord_cartesian(ylim = c(0, 200))+
-ylab("Splenocyte number")+
-labs(title = "plot")+
-stat_compare_means(label.x = 0.6, label.y = 190)
-data %>%
-ggplot(aes(x=Genotype, y = Splenocyte_number))+
-stat_summary(fun.y = mean, fun.ymax = mean, fun.ymin = mean, geom = "crossbar", width = 0.15)+
-geom_point()+
-theme_bw()+
-coord_cartesian(ylim = c(0, 200))+
-ylab("Splenocyte number")+
-labs(title = "plot")+
-stat_compare_means(label.x = 0.6, label.y = 190) +
-ggsave(filename = "plot.png", dpi = "retina")
+reverse_or_not <- if_else( args[4] == 1, T, F, F)
+reverse_or_not
+sort("Lysis", "Others", "DNA_metabolism-Regulation-Recombination", "Packaging,Injection&Assembly", "structure", "Structure-lysis", "Unknown")
+sort(c("Lysis", "Others", "DNA_metabolism-Regulation-Recombination", "Packaging,Injection&Assembly", "structure", "Structure-lysis", "Unknown"))
+sort(c("Lysis", "Others", "DNA_metabolism-Regulation-Recombination", "Packaging-Injection-Assembly", "Structure", "Structure-lysis", "Unknown"))
diff --git a/R/plot_dna_ref.R b/R/plot_dna_ref.R
index 90215f1..0f3ba0f 100644
--- a/R/plot_dna_ref.R
+++ b/R/plot_dna_ref.R
@@ -42,15 +42,15 @@ plot_dna_ref <- function(ref, query, comp, reverse_query = F){
   )
 
   # Pick the colors
-  set.seed(42)
-
+  set.seed(123)
   category <- sort(unique(c(query_seq$category, ref_seq$product)))
   fill <- c(brewer.pal(n = length(category), name = "Paired"))
+
   col_categories <- as.data.frame(cbind(category, fill)) %>%
     mutate(
       category = as.character(category),
       product = as.character(category),
-      fill = as.character(fill)
+      fill = ifelse( category == "Unknown", "grey80", as.character(fill))
     ) %>%
     select(category, product, fill)
 
-- 
GitLab