diff --git a/article/.RData b/article/.RData index a664efc58c763b9f841bd59d020adcf0a278cc6a..c0be9003d44b0570135bb255d8094d5c06fa8b9f 100644 Binary files a/article/.RData and b/article/.RData differ diff --git a/article/article_figures.Rmd b/article/article_figures.Rmd index 0d6d24e6550143d29602c578ec73c9332f1e6b24..f22df32c0c4ac8a6bfc4333741b3cff224b5a199 100644 --- a/article/article_figures.Rmd +++ b/article/article_figures.Rmd @@ -44,6 +44,7 @@ Data frame from stuart with phenotypes of 176 F2 individuals for a quantitative data(phenos) summary(phenos) ``` + ## annotation file Annotation file from K.Broman: https://kbroman.org/MUGAarrays/mini_annotations.html @@ -246,26 +247,6 @@ pheno_before_plot <- qtl_plot(pheno_before,lod=data.frame(group = c("alpha=0.05" pheno_before_plot ``` -```{r before_ann} -chrs <- c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "X") -ann_dat_text<-data.frame( - chr=factor(chrs, - levels=chrs), - lod=c(NA,7.5,NA,NA,15,NA,10.5,10.5,NA,5.5,NA,NA,18.5,NA,NA,NA,NA,NA,NA,NA), - label=c(NA,"1",NA,NA,"2",NA,"3","4",NA,"5",NA,NA,"6",NA,NA,NA,NA,NA,NA,NA), - x=c(NA,17,NA,NA,27,NA,70,10,NA,30,NA,NA,33,NA,NA,NA,NA,NA,NA,NA) -) - -pheno_before_plot + geom_text( - # the new dataframe for annotating text - data = ann_dat_text, - mapping = aes(x = x, - y = lod, - label = label, - color="blue") -) -``` - ```{r before_plot} @@ -378,7 +359,19 @@ pheno_after_plot <- qtl_plot(pheno_after,lod=data.frame(group = c("alpha=0.05", theme(legend.position = "none") + ggtitle("") pheno_after_plot + +qtl_plot(pheno_after,lod=data.frame(group = c("alpha=0.05", "alpha=0.1","alpha=0.63"), + lod = threshold_after[1:3]), + ylab="LOD score", + title="QTL mapping", + x.label = element_blank(), + size=0.6, + strip.pos="bottom", + chr="10") + + theme(legend.position = "none") + + ggtitle("") ``` + ### Grid after ```{r grid fig1, fig.height = 7, fig.width = 13, fig.align = "center"} @@ -490,14 +483,58 @@ find_linked_markers(estrf_matrix_after,mark="S6J011498219",annot=annot_mini) Investigation of high lod score peaks +```{r before_ann} +chrs <- c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "X") +ann_dat_text<-data.frame( + chr=factor(chrs, + levels=chrs), + lod=c(NA,7.5,NA,NA,15,NA,10.5,10.5,NA,5.5,NA,NA,18.3,NA,NA,NA,NA,NA,NA,NA), + label=c(NA,"1",NA,NA,"2",NA,"3","4",NA,"5",NA,NA,"6",NA,NA,NA,NA,NA,NA,NA), + x=c(NA,17,NA,NA,27,NA,70,10,NA,30,NA,NA,33,NA,NA,NA,NA,NA,NA,NA) +) + +pheno_before_annot <- pheno_before_plot + geom_text( + # the new dataframe for annotating text + data = ann_dat_text, + mapping = aes(x = x, + y = lod, + label = label, + color="blue") +) +pheno_before_annot + +rm(ann_dat_text) +rm(chrs) +``` + + ### Peak 1 -1 peak on 1 pseudomarker : c2.loc10, postionned between gUNC2731905 and gUNCHS004244. +```{r peak1_zoom} +peak1 <- qtl_plot(pheno_before, + ylab="LOD score", + title="QTL mapping", + x.label = element_blank(), + size=0.6, + strip.pos="bottom", + chr="2", + rug=TRUE) + + theme(legend.position = "none", + strip.background = element_blank(), + strip.text.x = element_blank()) + + xlab("Position (cM)") + + ggtitle("Peak 1") +peak1 +``` + + +1 peak on chromosome 2 1 pseudomarker : c2.loc10, postionned between gUNC2731905 and gUNCHS004244. Here are the infos on genotype counts for these markers: ```{r summary_geno_chr2} -tab_before %>% filter(marker %in% c("gUNC2731905","gUNCHS004244")) %>% select(marker:n_NA) +peak1_tab <- tab_before %>% filter(marker %in% c("gUNC2731905","gUNCHS004244")) %>% select(marker:n_NA) +peak1_tab ``` For gUNC2731905, all individuals except 1 are homozygous so this marker should be removed. The proportions for gUNCHS004244 seem correct. @@ -505,7 +542,7 @@ For gUNC2731905, all individuals except 1 are homozygous so this marker should b Graph: -```{r geno_plot_chr2} +```{r geno_plot_peak1} phenotypes <- cross_before[["pheno"]] map <- cross_before[["geno"]][["2"]][["map"]] map <- tibble(marker=names(map),pos=map) @@ -529,6 +566,8 @@ geno_plot2 <- pgmap %>% filter(pos > 10 & pos < 20) %>% theme(plot.margin = unit(c(1, 1, 1, 1), "lines"), axis.title.x = element_text(margin = margin(t = 50))) geno_plot2 + +rm(pgmap,phenotypes,map,genotypes,phenogeno) ``` This region contains many markers with non Mendelian proportions (many have an excess of homozygous). This leads to the creation of a pseudomarker between gUNC2731905 and gUNCHS004244 with incorrect genotypic probabilities which leads to a spurious peak. @@ -542,12 +581,31 @@ Recombination fractions between adjacent markers in this regions are indeed high ### Peak 2 -1 peak on 1 marker : mUNC050096588 +```{r peak7_zoom} +peak2 <- qtl_plot(pheno_before,lod=data.frame(group = c("alpha=0.05", "alpha=0.1","alpha=0.63"), + lod = threshold_before[1:3]), + ylab="LOD score", + title="QTL mapping", + x.label = element_blank(), + size=0.6, + strip.pos="bottom", + chr="5", + rug=TRUE) + + theme(legend.position = "none", + strip.background = element_blank(), + strip.text.x = element_blank()) + + xlab("Position (cM)") + + ggtitle("") +peak2 +``` + +1 peak on chromosome 2 on 1 marker : mUNC050096588 Here are the infos on genotype counts for these markers: ```{r summary_geno_chr5} -tab_before %>% filter(marker %in% c("mUNC050096588")) %>% select(marker:n_NA) +peak2_tab <- tab_before %>% filter(marker %in% c("mUNC050096588")) %>% select(marker:n_NA) +peak2_tab ``` All individuals heterozygous so this marker should be removed. @@ -584,16 +642,36 @@ test_plot <- pgmap %>% filter(pos > 20 & pos < 30) %>% theme(plot.margin = unit(c(1, 1, 1, 1), "lines"), axis.title.x = element_text(margin = margin(t = 50))) test_plot +rm(pgmap,phenotypes,map,genotypes,phenogeno) ``` ### Peak 3 -2 peaks, one on 1 pseudomarker : c7.loc74, between UNC13823755 and S3J075374098; and one on 1 marker and 1 pseudomarker : c7.loc82 and gUNC13998623, c7.loc82 being between gUNC13979374 and gUNC13998623. +```{r peak3_zoom} +peak3 <- qtl_plot(pheno_before,lod=data.frame(group = c("alpha=0.05", "alpha=0.1","alpha=0.63"), + lod = threshold_before[1:3]), + ylab="LOD score", + title="QTL mapping", + x.label = element_blank(), + size=0.6, + strip.pos="bottom", + chr="7", + rug=TRUE) + + theme(legend.position = "none", + strip.background = element_blank(), + strip.text.x = element_blank()) + + xlab("Position (cM)") + + ggtitle("") +peak3 +``` + +2 peaks on chromosome 7, one on 1 pseudomarker : c7.loc74, between UNC13823755 and S3J075374098; and one on 1 marker and 1 pseudomarker : c7.loc82 and gUNC13998623, c7.loc82 being between gUNC13979374 and gUNC13998623. Here are the infos on genotype counts for these markers: -```{r summary_geno_chr7} -tab_before %>% filter(marker %in% c("UNC13823755", "S3J075374098","gUNC13979374","gUNC13998623")) %>% select(marker:n_NA) +```{r summary_geno_peak3} +peak3_tab <- tab_before %>% filter(marker %in% c("UNC13823755", "S3J075374098","gUNC13979374","gUNC13998623")) %>% select(marker:n_NA) +peak3_tab ``` There are no homozygous individuals for one allele for S3J075374098, gUNC13979374 and gUNC13998623 so these markers should be removed. Proportions seem correct for UNC13823755. @@ -634,12 +712,31 @@ test_plot ### Peak 4 -1 peaks, one on 1 marker : mbackupJAX00158395 +```{r peak4_zoom} +peak4 <- qtl_plot(pheno_before,lod=data.frame(group = c("alpha=0.05", "alpha=0.1","alpha=0.63"), + lod = threshold_before[1:3]), + ylab="LOD score", + title="QTL mapping", + x.label = element_blank(), + size=0.6, + strip.pos="bottom", + chr="8", + rug=TRUE) + + theme(legend.position = "none", + strip.background = element_blank(), + strip.text.x = element_blank()) + + xlab("Position (cM)") + + ggtitle("") +peak4 +``` + +1 peak on chromosome 8 one on 1 marker : mbackupJAX00158395 Here are the infos on genotype counts for this marker: -```{r summary_geno_chr8} -tab_before %>% filter(marker %in% c("mbackupJAX00158395")) %>% select(marker:n_NA) +```{r summary_geno_peak4} +peak4_tab <- tab_before %>% filter(marker %in% c("mbackupJAX00158395")) %>% select(marker:n_NA) +peak4_tab ``` All individuals are homozygous except one so this marker should be removed. @@ -681,17 +778,34 @@ test_plot ### Peak 5 -1 peaks, one on 1 marker : S6J102311553 +```{r peak5_zoom} +peak5 <- qtl_plot(pheno_before, + ylab="LOD score", + title="QTL mapping", + x.label = element_blank(), + size=0.6, + strip.pos="bottom", + chr="10", + rug=TRUE) + + theme(legend.position = "none", + strip.background = element_blank(), + strip.text.x = element_blank()) + + xlab("Position (cM)") + + ggtitle("Peak 5") +peak5 +``` + +1 peak on chromosome 10 on 1 marker : S6J102311553 Here are the infos on genotype counts for this marker: -```{r summary_geno_chr10} +```{r summary_geno_peak5} tab_before %>% filter(marker %in% c("S6J102311553")) %>% select(marker:n_NA) ``` The genotypic proportions for this marker seem correct. -```{r parents_geno_chr10} +```{r parents_geno_peak5} strns_ref %>% filter(marker %in% c("S6J102311553")) strains %>% filter(marker %in% c("S6J102311553")) ``` @@ -700,7 +814,7 @@ The alleles of parental strains seemed correct too and are the same in the refer Graph: -```{r geno_plot_chr10} +```{r geno_plot_peak5} phenotypes <- cross_before[["pheno"]] map <- cross_before[["geno"]][["10"]][["map"]] map <- tibble(marker=names(map),pos=map) @@ -728,6 +842,11 @@ test_plot S6J102311553 seem to be surrounded by markers with an increased proportion of homozygous individuals. +```{r summary_geno_peak5} +peak5_tab <-tab_before %>% filter(marker %in% c("SSR102275149","gUNC18022023","S6J102311553","gUNC18048671","B10100061261")) %>% select(marker:n_NA) +peak5_tab +``` + ```{r} newmap_before[["10"]][80:110] @@ -737,12 +856,30 @@ Indeed, there is not an increased distance between S6J102311553 and the adjacent ### Peak 6 -1 peak, one on 1 marker : SAC132487883 +```{r peak6_zoom} +peak6 <- qtl_plot(pheno_before, + ylab="LOD score", + title="QTL mapping", + x.label = element_blank(), + size=0.6, + strip.pos="bottom", + chr="13", + rug=TRUE) + + theme(legend.position = "none", + strip.background = element_blank(), + strip.text.x = element_blank()) + + xlab("Position (cM)") + + ggtitle("Peak 6") +peak6 +``` + +1 peak on chromosome 13 on 1 marker : SAC132487883 Here are the infos on genotype counts for this marker: ```{r summary_geno_chr13} -tab_before %>% filter(marker %in% c("SAC132487883")) %>% select(marker:n_NA) +peak6_tab <- tab_before %>% filter(marker %in% c("SAC132487883")) %>% select(marker:n_NA) +peak6_tab ``` All individuals are heterozygous at this loci so this marker should be removed. @@ -847,3 +984,307 @@ mean(compar_pos_after$dif_prev,na.rm=TRUE) sd(compar_pos_after$dif_prev,na.rm=TRUE) ``` + + +```{r} +# #pgm: non +# cross_test <- cross_before +# cross_test[["pheno"]][["pgm"]] <- cross_after2[["pheno"]][["pgm"]] +# identical(cross_test[["pheno"]][["pgm"]],cross_after2[["pheno"]][["pgm"]]) +# +# +# pheno_test <- scanone(cross=cross_test, chr=c("5"), pheno.col="Pheno", model="normal", method="em") +# pheno_before5 <- scanone(cross=cross_before, chr=c("5"), pheno.col="Pheno", model="normal", method="em") +# +# +# identical(pheno_test,pheno_before5) +# +# qtl_plot(pheno_test, +# ylab="LOD score", +# title="QTL mapping", +# x.label = element_blank(), +# size=0.6, +# strip.pos="bottom", +# chr="10", +# rug=TRUE) + +# theme(legend.position = "none", +# strip.background = element_blank(), +# strip.text.x = element_blank()) + +# xlab("Position (cM)") + +# ggtitle("") +# +# qtl_plot(pheno_before, +# ylab="LOD score", +# title="QTL mapping", +# x.label = element_blank(), +# size=0.6, +# strip.pos="bottom", +# chr="10", +# rug=TRUE) + +# theme(legend.position = "none", +# strip.background = element_blank(), +# strip.text.x = element_blank()) + +# xlab("Position (cM)") + +# ggtitle("") +# +# # pheno identical ? Yes +# identical(cross_test[["pheno"]][["Pheno"]],cross_after2[["pheno"]][["Pheno"]]) +# +# # geno identical ? Yes +# identical(cross_test[["geno"]][["10"]][["data"]][,"S6J102311553"],cross_after2[["geno"]][["10"]][["data"]][,"S6J102311553"]) +# +# # strain ref genotypes identical ? Yes +# identical(strns_ref %>% filter(marker=="S6J102311553"),strains %>% filter(marker=="S6J102311553")) +# +# +# ## Test only this marker : low LOD +# +# tab_test <- tab2 %>% filter(marker=="S6J102311553") +# +# # write_rqtl(geno=genos,pheno=phenos,tab=tab_before,ref=strns_ref,par1="parent1",par2="parent2",prefix="ind_",pos="cM_cox",path="files/cluster/cross_before.csv") +# # write_rqtl(geno=genos,pheno=phenos,tab=tab2,ref=strains,par1="parent1",par2="parent2",prefix="ind_",pos="cM_cox",path="files/cluster2/cross_after2.csv") +# +# write_rqtl(geno=genos,pheno=phenos,tab=tab_test,ref=strns_ref,par1="parent1",par2="parent2",prefix="ind_",pos="cM_cox",path="./test.csv") +# +# cross_test2 <- read.cross(format="csv",file="./test.csv", +# genotypes=c("0","1","2"),na.strings=c("NA"), convertXdata=TRUE) +# +# pheno_test2 <- scanone(cross=cross_test2, chr=c("5"), pheno.col="Pheno", model="normal", method="em") +# +# qtl_plot(pheno_test2, +# ylab="LOD score", +# title="QTL mapping", +# x.label = element_blank(), +# size=0.6, +# strip.pos="bottom", +# chr="10", +# rug=TRUE) + +# theme(legend.position = "none", +# strip.background = element_blank(), +# strip.text.x = element_blank()) + +# xlab("Position (cM)") + +# ggtitle("") +# +# ## Add 1 marker on the right +# +# tab_test <- tab_before %>% filter(marker %in% c("S6J102311553","gUNC18048671")) +# +# write_rqtl(geno=genos,pheno=phenos,tab=tab_test,ref=strns_ref,par1="parent1",par2="parent2",prefix="ind_",pos="cM_cox",path="./test.csv") +# +# cross_test2 <- read.cross(format="csv",file="./test.csv", +# genotypes=c("0","1","2"),na.strings=c("NA"), convertXdata=TRUE) +# +# pheno_test2 <- scanone(cross=cross_test2, chr=c("5"), pheno.col="Pheno", model="normal", method="em") +# pheno_test2 +# +# qtl_plot(pheno_test2, +# ylab="LOD score", +# title="QTL mapping", +# x.label = element_blank(), +# size=0.6, +# strip.pos="bottom", +# chr="10", +# rug=TRUE) + +# theme(legend.position = "none", +# strip.background = element_blank(), +# strip.text.x = element_blank()) + +# xlab("Position (cM)") + +# ggtitle("") +# +# ## Add 1 marker on the right & one on the left +# +# tab_test <- tab_before %>% filter(marker %in% c("S6J102311553","gUNC18048671","gUNC18022023")) +# +# write_rqtl(geno=genos,pheno=phenos,tab=tab_test,ref=strns_ref,par1="parent1",par2="parent2",prefix="ind_",pos="cM_cox",path="./test.csv") +# +# cross_test2 <- read.cross(format="csv",file="./test.csv", +# genotypes=c("0","1","2"),na.strings=c("NA"), convertXdata=TRUE) +# +# pheno_test2 <- scanone(cross=cross_test2, chr=c("5"), pheno.col="Pheno", model="normal", method="em") +# pheno_test2 +# +# qtl_plot(pheno_test2, +# ylab="LOD score", +# title="QTL mapping", +# x.label = element_blank(), +# size=0.6, +# strip.pos="bottom", +# chr="10", +# rug=TRUE) + +# theme(legend.position = "none", +# strip.background = element_blank(), +# strip.text.x = element_blank()) + +# xlab("Position (cM)") + +# ggtitle("") +# +# ## Add 1 marker on the right & 2 on the left +# +# tab_test <- tab_before %>% filter(marker %in% c("S6J102311553","gUNC18048671","gUNC18022023","B10100061261")) +# +# write_rqtl(geno=genos,pheno=phenos,tab=tab_test,ref=strns_ref,par1="parent1",par2="parent2",prefix="ind_",pos="cM_cox",path="./test.csv") +# +# cross_test2 <- read.cross(format="csv",file="./test.csv", +# genotypes=c("0","1","2"),na.strings=c("NA"), convertXdata=TRUE) +# +# pheno_test2 <- scanone(cross=cross_test2, pheno.col="Pheno", model="normal", method="em") +# pheno_test2 +# +# qtl_plot(pheno_test2, +# ylab="LOD score", +# title="QTL mapping", +# x.label = element_blank(), +# size=0.6, +# strip.pos="bottom", +# chr="10", +# rug=TRUE) + +# theme(legend.position = "none", +# strip.background = element_blank(), +# strip.text.x = element_blank()) + +# xlab("Position (cM)") + +# ggtitle("") +# +# ## Remove all bad markers on the left: no peak +# tab_test <- tab_before %>% filter(!marker %in% c("gbackupJAX00288802","gJAX00017599","gbackupJAX00017686","gbackupJAX00017686b", +# "S2T101968115","gUNC17926662","gUNC17926662b","gUNCJPD007765", +# "SSR102275149","gUNC18022023")) +# write_rqtl(geno=genos,pheno=phenos,tab=tab_test,ref=strns_ref,par1="parent1",par2="parent2",prefix="ind_",pos="cM_cox",path="./test.csv") +# +# cross_test2 <- read.cross(format="csv",file="./test.csv", +# genotypes=c("0","1","2"),na.strings=c("NA"), convertXdata=TRUE) +# +# pheno_test2 <- scanone(cross=cross_test2, chr=c("10"), pheno.col="Pheno", model="normal", method="em") +# pheno_test2 +# +# qtl_plot(pheno_test2, +# ylab="LOD score", +# title="QTL mapping", +# x.label = element_blank(), +# size=0.6, +# strip.pos="bottom", +# chr="10", +# rug=TRUE) + +# theme(legend.position = "none", +# strip.background = element_blank(), +# strip.text.x = element_blank()) + +# xlab("Position (cM)") + +# ggtitle("") +# +# ## Remove all bad markers on the right : no peak +# tab_test <- tab_before %>% filter(!marker %in% c("gUNC18048671","B10100061261","S2C102505843","UNC18095276", +# "gJAX00290978","UNC18103051","UNC18106657","UNC18109171", +# "gUNC18119184","S2T102642258")) +# write_rqtl(geno=genos,pheno=phenos,tab=tab_test,ref=strns_ref,par1="parent1",par2="parent2",prefix="ind_",pos="cM_cox",path="./test.csv") +# +# cross_test2 <- read.cross(format="csv",file="./test.csv", +# genotypes=c("0","1","2"),na.strings=c("NA"), convertXdata=TRUE) +# +# pheno_test2 <- scanone(cross=cross_test2, chr=c("10"), pheno.col="Pheno", model="normal", method="em") +# pheno_test2 +# +# qtl_plot(pheno_test2, +# ylab="LOD score", +# title="QTL mapping", +# x.label = element_blank(), +# size=0.6, +# strip.pos="bottom", +# chr="10", +# rug=TRUE) + +# theme(legend.position = "none", +# strip.background = element_blank(), +# strip.text.x = element_blank()) + +# xlab("Position (cM)") + +# ggtitle("") +# +# ## Remove all bad markers on the left except one and all bad markers on the right except one +# tab_test <- tab_before %>% filter(!marker %in% c("gbackupJAX00288802","gJAX00017599","gbackupJAX00017686","gbackupJAX00017686b", +# "S2T101968115","gUNC17926662","gUNC17926662b","gUNCJPD007765", +# "SSR102275149", +# "B10100061261","S2C102505843","UNC18095276", +# "gJAX00290978","UNC18103051","UNC18106657","UNC18109171", +# "gUNC18119184","S2T102642258")) +# write_rqtl(geno=genos,pheno=phenos,tab=tab_test,ref=strns_ref,par1="parent1",par2="parent2",prefix="ind_",pos="cM_cox",path="./test.csv") +# +# cross_test2 <- read.cross(format="csv",file="./test.csv", +# genotypes=c("0","1","2"),na.strings=c("NA"), convertXdata=TRUE) +# +# pheno_test2 <- scanone(cross=cross_test2, chr=c("10"), pheno.col="Pheno", model="normal", method="em") +# pheno_test2 +# +# qtl_plot(pheno_test2, +# ylab="LOD score", +# title="QTL mapping", +# size=0.6, +# strip.pos="bottom", +# chr="10", +# rug=TRUE) + +# theme(legend.position = "none", +# strip.background = element_blank(), +# strip.text.x = element_blank()) + +# xlab("Position (cM)") + +# ggtitle("") +``` + +### markers with unmatching alleles that lead to distortion of genetic map + +```{r} +tab2 %>% filter(exclude_allele==1 & exclude_prop==0) %>% select(marker:n_NA) %>% left_join(.,strains) %>% filter(!parent1 %in% c("N","H") & !parent2 %in% c("N","H")) + +# gJAX00038569 +newmap_before[["12"]][c("gUNC21523346","gJAX00038569","gUNCHS034222")] #50cM on each side + +# mUNC21540855 +newmap_before[["12"]][c("gUNCHS034222","mUNC21540855","SFJ123443466")] #50cM on each side + +# gUNC21555204 +newmap_before[["12"]][c("SFJ123443466","gUNC21555204","gJAX00340618")] #30-60cM on each side +``` + +```{r} +tab2 %>% filter(exclude_na==1 & exclude_poly==0) %>% select(marker:n_NA) + +# gJAX00038569 +newmap_before[["12"]][c("gUNC21523346","gJAX00038569","gUNCHS034222")] #50cM on each side + +# mUNC21540855 +newmap_before[["12"]][c("gUNCHS034222","mUNC21540855","SFJ123443466")] #50cM on each side + +# gUNC21555204 +newmap_before[["12"]][c("SFJ123443466","gUNC21555204","gJAX00340618")] #30-60cM on each side +``` + +```{r} +load("data2/data2_peaks.rda") +load("data3/data3_peaks.rda") +load("data4/data4_peaks.rda") + +colnames(peak1_tab) <- c("marker","chr","pos","allele\n1","allele\n2", + "n\nHM1","n\nHM2","n\nHT","n\nNA") +colnames(peak6_tab) <- c("marker","chr","pos","allele\n1","allele\n2", + "n\nHM1","n\nHM2","n\nHT","n\nNA") +colnames(peak11_tab) <- c("marker","chr","pos","allele\n1","allele\n2", + "n\nHM1","n\nHM2","n\nHT","n\nNA") +colnames(peak5_tab) <- c("marker","chr","pos","allele\n1","allele\n2", + "n\nHM1","n\nHM2","n\nHT","n\nNA") + + +narrow_grid <- ggdraw() + + draw_plot(pheno_before_annot,0,.8,.5,.16) + + draw_plot(pheno_before_annot_data2,.5,.8,.5,.16) + + draw_plot(pheno_before_data3,0,.64,.5,.16) + + draw_plot(pheno_before_annot_data4,.5,.64,.5,.16) + + draw_plot(peak1,0,.48,.3,.16) + + draw_plot(tableGrob(peak1_tab,rows=NULL),.3,.48,.7,.16) + + draw_plot(peak6,0,.32,.3,.16) + + draw_plot(tableGrob(peak6_tab,rows=NULL),.3,.32,.7,.16) + + draw_plot(peak11,0,.16,.3,.16) + + draw_plot(tableGrob(peak11_tab,rows=NULL),.3,.16,.7,.16) + + draw_plot(peak5,0,0,.3,.16) + + draw_plot(tableGrob(peak5_tab,rows=NULL),.3,0,.7,.16) + + draw_plot_label(c("A", "B","C","D","E","F","G","H","I","J","K","L"), + c(0,.5,0,.5,0,.35,0,.35,.01,.35,0,.35),c(.96,.96,.8,.8,.64,.615,.48,.45,.32,.30,.16,.162)) + +narrow_grid +ggsave(narrow_grid,file="narrow_grid.png",width=10,height=17) + +``` + diff --git a/article/data2/.RData b/article/data2/.RData index a927b648399c261a1f7ac6b98c65a591677efd80..eda0e03052997e2a704fbe6c3976e57ba3c2c2f7 100644 Binary files a/article/data2/.RData and b/article/data2/.RData differ diff --git a/article/data2/data2.Rmd b/article/data2/data2.Rmd index 091637ce0bc696a0c58bd5047d58457e51e10383..698f48b319645719a0f58a9e07706483f5eeeec0 100644 --- a/article/data2/data2.Rmd +++ b/article/data2/data2.Rmd @@ -83,26 +83,6 @@ pheno_before_plot <- qtl_plot(pheno_before,lod=data.frame(group = c("alpha=0.05" pheno_before_plot ``` -```{r before_ann} -chrs <- c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "X") -ann_dat_text<-data.frame( - chr=factor(chrs, - levels=chrs), - lod=c(rep(NA,10),8,rep(NA,9)), - label=c(rep(NA,10),7,rep(NA,9)), - x=c(rep(NA,10),22,rep(NA,9)) -) - -pheno_before_plot + geom_text( - # the new dataframe for annotating text - data = ann_dat_text, - mapping = aes(x = x, - y = lod, - label = label, - color="blue") -) -``` - ## create file for parental strains genotyped @@ -214,21 +194,66 @@ rm(none,allele,match,poly,prop,functions_df) Investigation of high lod score peaks + +```{r before_ann} +chrs <- c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "X") +ann_dat_text<-data.frame( + chr=factor(chrs, + levels=chrs), + lod=c(rep(NA,10),8,rep(NA,9)), + label=c(rep(NA,10),7,rep(NA,9)), + x=c(rep(NA,10),22,rep(NA,9)) +) + +pheno_before_annot_data2 <- pheno_before_plot + geom_text( + # the new dataframe for annotating text + data = ann_dat_text, + mapping = aes(x = x, + y = lod, + label = label, + color="blue") +) +pheno_before_annot_data2 + +rm(ann_dat_text) +rm(chrs) +``` + + ### Peak 7 -1 peak on 1 pseudomarker : c11.loc18, postionned between SNT111392585 and mJAX00308021. +```{r peak7_zoom} +peak7 <- qtl_plot(pheno_before,lod=data.frame(group = c("alpha=0.05", "alpha=0.1","alpha=0.63"), + lod = threshold_before[1:3]), + ylab="LOD score", + title="QTL mapping", + x.label = element_blank(), + size=0.6, + strip.pos="bottom", + chr="11", + rug=TRUE) + + theme(legend.position = "none", + strip.background = element_blank(), + strip.text.x = element_blank()) + + xlab("Position (cM)") + + ggtitle("") +peak7 +``` + +1 peak on chromosome 11 on 1 pseudomarker : c11.loc18, postionned between SNT111392585 and mJAX00308021. Here are the infos on genotype counts for these markers: -```{r summary_geno_chr2} -tab_before %>% filter(marker %in% c("SNT111392585","mJAX00308021")) %>% select(marker:n_NA) +```{r summary_geno_peak7} +peak7_tab <- tab_before %>% filter(marker %in% c("SNT111392585","mJAX00308021")) %>% select(marker:n_NA) +peak7_tab ``` For SNT111392585, all individuals except 1 are homozygous so this marker should be removed. The proportions for mJAX00308021 seem correct. Graph: -```{r geno_plot_chr2} +```{r geno_plot_peak7} phenotypes <- cross_before[["pheno"]] map <- cross_before[["geno"]][["11"]][["map"]] map <- tibble(marker=names(map),pos=map) @@ -238,7 +263,7 @@ phenogeno <- cbind(phenotypes,genotypes) phenogeno %<>% pivot_longer(mbackupUNC110000218:gUNC20538837,names_to="marker",values_to="genotype") pgmap <- full_join(phenogeno,map,by="marker") -geno_plot2 <- pgmap %>% filter(pos > 15 & pos < 25) %>% +geno_plot7 <- pgmap %>% filter(pos > 15 & pos < 25) %>% filter(id %in% sample(phenotypes$id,10)) %>% ggplot(aes(x=pos,y=as.factor(id))) + geom_point(aes(color=as.factor(genotype))) + @@ -251,7 +276,14 @@ geno_plot2 <- pgmap %>% filter(pos > 15 & pos < 25) %>% theme_bw() + theme(plot.margin = unit(c(1, 1, 1, 1), "lines"), axis.title.x = element_text(margin = margin(t = 50))) -geno_plot2 +geno_plot7 + +rm(pgmap,phenotypes,map,genotypes,phenogeno) +``` + +The two markers before the pseudomarker have an excess of homozygous. + +```{r save_narrow} +save(pheno_before_annot_data2,peak7,peak7_tab,file="data2_peaks.rda") ``` -The two markers before the pseudomarker have an excess of homozygous. \ No newline at end of file diff --git a/article/data2/data2_peaks.rda b/article/data2/data2_peaks.rda new file mode 100644 index 0000000000000000000000000000000000000000..de853d4d43b28d13b745be526e1593ec79652946 Binary files /dev/null and b/article/data2/data2_peaks.rda differ diff --git a/article/data3/.RData b/article/data3/.RData index 9c7a53a6af9454f73ec7c5887b37cecb26760db8..8be84ee35d189aebb25b3ea652a46f118edafee1 100644 Binary files a/article/data3/.RData and b/article/data3/.RData differ diff --git a/article/data3/data3.Rmd b/article/data3/data3.Rmd index ab72582edd8ce1450f1c35981cbdbeccc4878354..d525b578241e8973eaa1a6713090f9613ee97ed2 100644 --- a/article/data3/data3.Rmd +++ b/article/data3/data3.Rmd @@ -199,4 +199,11 @@ functions_plot <- functions_df %>% ggplot(aes(x=markers,y=fct)) + functions_plot rm(none,allele,match,poly,prop) -``` \ No newline at end of file +``` + + +```{r} +pheno_before_data3 <- pheno_before_plot +save(pheno_before_data3,file="data3_peaks.rda") +``` + diff --git a/article/data3/data3_peaks.rda b/article/data3/data3_peaks.rda new file mode 100644 index 0000000000000000000000000000000000000000..0def3724bf1a7190592cd4ebf84b371d34e0d425 Binary files /dev/null and b/article/data3/data3_peaks.rda differ diff --git a/article/data4/.RData b/article/data4/.RData index 5fbfda8e7679ec0598f0472bf1461ab10406d80d..9da838548717419c9e723960988b2a3672bf1503 100644 Binary files a/article/data4/.RData and b/article/data4/.RData differ diff --git a/article/data4/data4.Rmd b/article/data4/data4.Rmd index f3715c8b837d0b658b463e22e9326c9aa6f19c50..1b6938eb82c6d82211608c835f8b9faf23503a37 100644 --- a/article/data4/data4.Rmd +++ b/article/data4/data4.Rmd @@ -82,25 +82,6 @@ pheno_before_plot <- qtl_plot(pheno_before,lod=data.frame(group = c("alpha=0.05" pheno_before_plot ``` -```{r before_ann} -chrs <- c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "X") -ann_dat_text<-data.frame( - chr=factor(chrs, - levels=chrs), - lod=c(9,NA,NA,NA,11,NA,NA,NA,NA,13,NA,NA,8.5,NA,NA,NA,NA,7,NA,NA), - label=c(8,NA,NA,NA,9,NA,NA,NA,NA,10,NA,NA,11,NA,NA,NA,NA,12,NA,NA), - x=c(45,NA,NA,NA,20,NA,NA,NA,NA,27,NA,NA,30,NA,NA,NA,NA,35,NA,NA) -) - -pheno_before_plot + geom_text( - # the new dataframe for annotating text - data = ann_dat_text, - mapping = aes(x = x, - y = lod, - label = label, - color="blue") -) -``` ```{r} #our genotypes @@ -225,11 +206,50 @@ rm(none,allele,match,poly,prop) ## Narrow peaks +```{r before_ann} +chrs <- c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "X") +ann_dat_text<-data.frame( + chr=factor(chrs, + levels=chrs), + lod=c(9,NA,NA,NA,11,NA,NA,NA,NA,13,NA,NA,8.5,NA,NA,NA,NA,7,NA,NA), + label=c(8,NA,NA,NA,9,NA,NA,NA,NA,10,NA,NA,11,NA,NA,NA,NA,12,NA,NA), + x=c(45,NA,NA,NA,20,NA,NA,NA,NA,27,NA,NA,30,NA,NA,NA,NA,35,NA,NA) +) + +pheno_before_annot_data4 <- pheno_before_plot + geom_text( + # the new dataframe for annotating text + data = ann_dat_text, + mapping = aes(x = x, + y = lod, + label = label, + color="blue") +) +pheno_before_annot_data4 +``` + Investigation of high lod score peaks ### Peak 8 -1 peak on 1 pseudomarker : c1.loc42, postionned between gUNC1177319 and S6J013976867. +```{r peak8_zoom} +peak8 <- qtl_plot(pheno_before,lod=data.frame(group = c("alpha=0.05", "alpha=0.1","alpha=0.63"), + lod = threshold_before[1:3]), + ylab="LOD score", + title="QTL mapping", + x.label = element_blank(), + size=0.6, + strip.pos="bottom", + chr="1", + rug=TRUE) + + theme(legend.position = "none", + strip.background = element_blank(), + strip.text.x = element_blank()) + + xlab("Position (cM)") + + ggtitle("") +peak8 +``` + +1 peak on chromosome 1 on 1 pseudomarker : c1.loc42, postionned between gUNC1177319 and S6J013976867. Here are the infos on genotype counts for these markers: @@ -270,7 +290,25 @@ geno_plot8 ### Peak 9 -1 peak on 1 pseudomarker : c5.loc16, in a region with very few markers, postionned between S6J050685107 and mUNC050096588. +```{r peak9_zoom} +peak9 <- qtl_plot(pheno_before,lod=data.frame(group = c("alpha=0.05", "alpha=0.1","alpha=0.63"), + lod = threshold_before[1:3]), + ylab="LOD score", + title="QTL mapping", + x.label = element_blank(), + size=0.6, + strip.pos="bottom", + chr="5", + rug=TRUE) + + theme(legend.position = "none", + strip.background = element_blank(), + strip.text.x = element_blank()) + + xlab("Position (cM)") + + ggtitle("") +peak9 +``` + +1 peak on chromosome 5 on 3 pseudomarkers : c5.loc16, c5.loc18, c5.loc20 in a region with very few markers, postionned between S6J050685107 and mUNC050096588. Here are the infos on genotype counts for these markers: @@ -311,7 +349,25 @@ geno_plot9 ### Peak 10 -1 peak on 1 pseudomarker : c10.loc30, in a region with very few markers, postionned between SAH102097335 and S2C102505843. +```{r peak10_zoom} +peak10 <- qtl_plot(pheno_before,lod=data.frame(group = c("alpha=0.05", "alpha=0.1","alpha=0.63"), + lod = threshold_before[1:3]), + ylab="LOD score", + title="QTL mapping", + x.label = element_blank(), + size=0.6, + strip.pos="bottom", + chr="10", + rug=TRUE) + + theme(legend.position = "none", + strip.background = element_blank(), + strip.text.x = element_blank()) + + xlab("Position (cM)") + + ggtitle("") +peak10 +``` + +1 peak on 1 marker and one pseudomarker : S2C102505843 and c10.loc30 in a region with very few markers, postionned between SAH102097335 and S2C102505843. Here are the infos on genotype counts for these markers: @@ -353,12 +409,30 @@ geno_plot10 ### Peak 11 -1 peak on 1 pseudomarker : c13.loc28, postionned between S6J132182752 and SAC132487883. +```{r peak11_zoom} +peak11 <- qtl_plot(pheno_before, + ylab="LOD score", + title="QTL mapping", + x.label = element_blank(), + size=0.6, + strip.pos="bottom", + chr="13", + rug=TRUE) + + theme(legend.position = "none", + strip.background = element_blank(), + strip.text.x = element_blank()) + + xlab("Position (cM)") + + ggtitle("Peak 11") +peak11 +``` + +1 peak on 1 marker and 1 pseudomarker : SAC132487883 and c13.loc28, postionned between S6J132182752 and SAC132487883. Here are the infos on genotype counts for these markers: ```{r summary_geno_peak11} -tab_before %>% filter(marker %in% c("S6J132182752","SAC132487883")) %>% select(marker:n_NA) +peak11_tab <- tab_before %>% filter(marker %in% c("S6J132182752","SAC132487883")) %>% select(marker:n_NA) +peak11_tab ``` @@ -394,6 +468,24 @@ geno_plot11 ### Peak 12 +```{r peak12_zoom} +peak12 <- qtl_plot(pheno_before,lod=data.frame(group = c("alpha=0.05", "alpha=0.1","alpha=0.63"), + lod = threshold_before[1:3]), + ylab="LOD score", + title="QTL mapping", + x.label = element_blank(), + size=0.6, + strip.pos="bottom", + chr="18", + rug=TRUE) + + theme(legend.position = "none", + strip.background = element_blank(), + strip.text.x = element_blank()) + + xlab("Position (cM)") + + ggtitle("") +peak12 +``` + 1 peak on 1 marker : S3C182557441 Here are the infos on genotype counts for these markers: @@ -432,3 +524,9 @@ geno_plot12 <- pgmap %>% filter(pos > 32 & pos < 42) %>% axis.title.x = element_text(margin = margin(t = 50))) geno_plot12 ``` + + +```{r} +save(pheno_before_annot_data4,peak11,peak11_tab,file="data4_peaks.rda") +``` + diff --git a/article/data4/data4_peaks.rda b/article/data4/data4_peaks.rda new file mode 100644 index 0000000000000000000000000000000000000000..19bfea9468c611336dfe3970ce06b40961b4e1ed Binary files /dev/null and b/article/data4/data4_peaks.rda differ diff --git a/article/narrow_grid.png b/article/narrow_grid.png new file mode 100644 index 0000000000000000000000000000000000000000..1ce02cb7ea540b3514775e187c0a64df74feeba9 Binary files /dev/null and b/article/narrow_grid.png differ