diff --git a/article/.RData b/article/.RData index 8d8946a10c93df29096639fa1dd7d3ddc1517b26..d0cb02c81f66f20f0e5e408879fa136f4acdae00 100644 Binary files a/article/.RData and b/article/.RData differ diff --git a/article/.RDataTmp1 b/article/.RDataTmp1 deleted file mode 100644 index d23c4208a969b4a3126b84ec16d8327603163176..0000000000000000000000000000000000000000 Binary files a/article/.RDataTmp1 and /dev/null differ diff --git a/article/article_figures.Rmd b/article/article_figures.Rmd index 72b21cb9636131194449735823279fc40769c39e..3dc06022829b9e490f30f502bae6a19e0367753c 100644 --- a/article/article_figures.Rmd +++ b/article/article_figures.Rmd @@ -178,65 +178,6 @@ ggsave("figures/fig2.pdf",grid,width=5,height=8) rm(na_plot,prop_plot) ``` -## Table alleles different between parental strains and F2s - -```{r allele} -#investigation of the role of mark_allele function -#prove that some marker with non corresponding alleles between parents and F2 - -#keep only markers that are exlcuded with mark_allele -allele <- tab2%>% - filter(exclude_allele==1&exclude_poly==0&exclude_prop==0) -strains_allele <- strns_ref %>% filter(marker %in% allele$marker) - -#join with strains genotypes to have parental strains -allele <- left_join(allele,strains_allele,by=c("marker"="marker")) - - -#most of markers excluded with mark_allele that were not excluded with other functions have N/H as genotype for parents -#keep only those with non missing/heterozygous genotypes -allele %<>% filter(parent1 != "N" & parent2 != "N") -allele %<>% select(marker,parent1,parent2,allele_1,allele_2) - -#number of markers in such situation -count(tab2%>% - filter(exclude_allele==1)) - -#keep only beggining of the table -allele <- allele[1:6,] - -print(allele) - -print(xtable::xtable(allele, type = "latex"), file = "tables/tab_alleles.tex",include.rownames=FALSE) -rm(allele,strains_allele) -``` - -## Graph number of markers kept after each function - -```{r barplot} -none <- tab2 %>% nrow() -match <- tab2 %>% filter(exclude_match==0) %>% nrow() -allele <- tab2 %>% filter(exclude_match==0&exclude_allele==0) %>% nrow() -naf <- tab2 %>% filter(exclude_match==0&exclude_allele==0&exclude_na==0) %>% nrow() -poly <- tab2 %>% filter(exclude_match==0&exclude_allele==0&exclude_na==0&exclude_poly==0) %>% nrow() -prop <- tab2 %>% filter(exclude_match==0&exclude_allele==0&exclude_na==0&exclude_poly==0&exclude_prop==0) %>% nrow() - - -functions_plot <- functions_df %>% ggplot(aes(x=markers,y=fct)) + - geom_bar(stat="identity",width=0.6) + - geom_text(aes(label=markers), hjust=1.3, color="white", size=3.5) + - scale_y_discrete(limits=c("prop","poly", "na", "allele","match","none")) + - theme(aspect.ratio=0.7) + - labs(title="Number of markers kept after each step", - x="Number of markers", - y="Function used") + - theme_classic() + - theme(plot.title = element_text(hjust = 0.4,face="bold",size=14)) - -functions_plot -rm(none,allele,match,poly,prop,barplot_df) -``` - ## Graph before after ### Before: creation of Rqtl csv file @@ -431,11 +372,71 @@ dif <- full_join(strains,strns_ref,by=c("marker","chr","cM_cox")) %>% dif %>% filter(dif==1) %>% count() ``` +## Table alleles different between parental strains and F2s + +```{r allele} +#investigation of the role of mark_allele function +#prove that some marker with non corresponding alleles between parents and F2 + +#keep only markers that are exlcuded with mark_allele +allele <- tab2%>% + filter(exclude_allele==1&exclude_poly==0&exclude_prop==0&exclude_na==0&exclude_estmap==0) +strains_allele <- strains %>% filter(marker %in% allele$marker) + +#number of markers not excluded by other functoins +tab2 %>% + filter(exclude_poly==0&exclude_prop==0&exclude_na==0&exclude_estmap==0) %>% nrow() + +#join with strains genotypes to have parental strains +allele <- left_join(allele,strains_allele,by=c("marker"="marker")) + + +#most of markers excluded with mark_allele that were not excluded with other functions have N/H as genotype for parents +#keep only those with non missing/heterozygous genotypes +allele %<>% filter(parent1 != "N" & parent2 != "N") +allele %<>% select(marker,parent1,parent2,allele_1,allele_2) + + +print(allele) + +print(xtable::xtable(allele, type = "latex"), file = "tables/tab_alleles.tex",include.rownames=FALSE) +rm(allele,strains_allele) +``` + ```{r dif_table} table_dif <- dif %>% filter(dif==1) %>% select(marker,parent1_ref=parent1.y,parent1_geno=parent1.x,parent2_ref=parent2.y,parent2_geno=parent2.x) %>% head() knitr::kable(table_dif) ``` +## Number of markers kept after each function + +```{r barplot} +none <- tab2 %>% nrow() +match <- tab2 %>% filter(exclude_match==0) %>% nrow() +allele <- tab2 %>% filter(exclude_match==0&exclude_allele==0) %>% nrow() +naf <- tab2 %>% filter(exclude_match==0&exclude_allele==0&exclude_na==0) %>% nrow() +poly <- tab2 %>% filter(exclude_match==0&exclude_allele==0&exclude_na==0&exclude_poly==0) %>% nrow() +prop <- tab2 %>% filter(exclude_match==0&exclude_allele==0&exclude_na==0&exclude_poly==0&exclude_prop==0) %>% nrow() +estmap <- tab2 %>% filter(exclude_match==0&exclude_allele==0&exclude_na==0&exclude_poly==0&exclude_prop==0&exclude_estmap==0) %>% nrow() + +functions_df <- tibble(fct=c("none","match","allele","na","poly","prop","estmap"), + markers=c(none,match,allele,naf,poly,prop,estmap)) + +functions_plot <- functions_df %>% ggplot(aes(x=markers,y=fct)) + + geom_bar(stat="identity",width=0.6) + + geom_text(aes(label=markers), hjust=1.3, color="white", size=3.5) + + scale_y_discrete(limits=c("estmap","prop","poly", "na", "allele","match","none")) + + theme(aspect.ratio=0.7) + + labs(title="Number of markers kept after each step", + x="Number of markers", + y="Function used") + + theme_classic() + + theme(plot.title = element_text(hjust = 0.4,face="bold",size=14)) + +functions_plot +rm(none,allele,match,poly,prop,barplot_df) +``` + # Pheno data format ```{r pheno} format_pheno <- phenos[1:6,] diff --git a/article/tables/tab_alleles.tex b/article/tables/tab_alleles.tex new file mode 100644 index 0000000000000000000000000000000000000000..a3b393a4572c90e815405f68c25c7e22d1c8e0b7 --- /dev/null +++ b/article/tables/tab_alleles.tex @@ -0,0 +1,17 @@ +% latex table generated in R 4.0.4 by xtable 1.8-4 package +% Tue Feb 15 09:50:40 2022 +\begin{table}[ht] +\centering +\begin{tabular}{lllll} + \hline +marker & parent1 & parent2 & allele\_1 & allele\_2 \\ + \hline +S6J017555686 & C & C & T & C \\ + S6J113080150 & G & G & A & G \\ + gJAX00038569 & C & C & T & C \\ + mUNC21540855 & C & C & A & C \\ + gUNC21555204 & T & T & T & C \\ + gUNC21596600 & A & A & A & G \\ + \hline +\end{tabular} +\end{table}