Commit db393eb1 authored by Marie  BOURDON's avatar Marie BOURDON
Browse files

Merge branch 'develop' into 'master'

Develop

See merge request !9
parents e62ef101 80a06a4a
{
"path" : "~/Documents/PhD/stuart_package/stuart/R",
"path" : "~/Documents/PhD/stuart_package/stuart",
"sortOrder" : [
{
"ascending" : true,
......
{
"left" : {
"panelheight" : 689,
"splitterpos" : 180,
"panelheight" : 1402,
"splitterpos" : 363,
"topwindowstate" : "NORMAL",
"windowheight" : 727
"windowheight" : 1440
},
"right" : {
"panelheight" : 689,
"splitterpos" : 434,
"panelheight" : 1402,
"splitterpos" : 880,
"topwindowstate" : "NORMAL",
"windowheight" : 727
"windowheight" : 1440
}
}
\ No newline at end of file
{
"TabSet1" : 0,
"TabSet2" : 0,
"TabSet2" : 3,
"TabZoom" : {
}
}
\ No newline at end of file
build-last-errors="[]"
build-last-errors-base-dir="~/stuart_package/stuart/"
build-last-outputs="[{\"output\":\"==> R CMD INSTALL --no-multiarch --with-keep.source stuart\\n\\n\",\"type\":0},{\"output\":\"* installing to library ‘/Library/Frameworks/R.framework/Versions/4.0/Resources/library’\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"* installing *source* package ‘stuart’ ...\\n\",\"type\":1},{\"output\":\"** using staged installation\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** R\\n\",\"type\":1},{\"output\":\"** data\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"*** moving datasets to lazyload DB\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** inst\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** byte-compile and prepare package for lazy loading\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** help\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"*** installing help indices\\n\",\"type\":1},{\"output\":\"** building package indices\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** installing vignettes\\n\",\"type\":1},{\"output\":\"** testing if installed package can be loaded from temporary location\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** testing if installed package can be loaded from final location\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** testing if installed package keeps a record of temporary installation path\\n\",\"type\":1},{\"output\":\"* DONE (stuart)\\n\",\"type\":1},{\"output\":\"\",\"type\":1}]"
build-last-errors-base-dir="~/Documents/PhD/stuart_package/stuart/"
build-last-outputs="[{\"output\":\"==> R CMD INSTALL --no-multiarch --with-keep.source stuart\\n\\n\",\"type\":0},{\"output\":\"* installing to library ‘/Library/Frameworks/R.framework/Versions/4.0/Resources/library’\\n\",\"type\":1},{\"output\":\"* installing *source* package ‘stuart’ ...\\n\",\"type\":1},{\"output\":\"** using staged installation\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** R\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** data\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"*** moving datasets to lazyload DB\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** inst\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** byte-compile and prepare package for lazy loading\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** help\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"*** installing help indices\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** building package indices\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** installing vignettes\\n\",\"type\":1},{\"output\":\"** testing if installed package can be loaded from temporary location\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** testing if installed package can be loaded from final location\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** testing if installed package keeps a record of temporary installation path\\n\",\"type\":1},{\"output\":\"* DONE (stuart)\\n\",\"type\":1},{\"output\":\"\",\"type\":1}]"
compile_pdf_state="{\"errors\":[],\"output\":\"\",\"running\":false,\"tab_visible\":false,\"target_file\":\"\"}"
files.monitored-path=""
find-in-files-state="{\"handle\":\"\",\"input\":\"\",\"path\":\"\",\"regex\":true,\"results\":{\"file\":[],\"line\":[],\"lineValue\":[],\"matchOff\":[],\"matchOn\":[]},\"running\":false}"
......
/private/var/folders/dn/j71yz2tn5_gdffs8fqxhddrr0000gn/T/RtmpZZZ4WE/preview-31873a42c16d.dir/stuaRt.html
/private/var/folders/dn/j71yz2tn5_gdffs8fqxhddrr0000gn/T/Rtmp3VMULh/preview-1ced48fe920c.dir/stuaRt.html
/private/var/folders/dn/j71yz2tn5_gdffs8fqxhddrr0000gn/T/RtmpbddlPL/preview-1b515493e142.dir/stuart.html
/private/var/folders/dn/j71yz2tn5_gdffs8fqxhddrr0000gn/T/RtmpNme5vw/preview-4c4321234d03.dir/stuaRt.html
/private/var/folders/dn/j71yz2tn5_gdffs8fqxhddrr0000gn/T/RtmpZZZ4WE/preview-3187564b41ab.dir/stuaRt.html
......
%2FVolumes%2F%40Mouselab%2FZika%2FBackcross%2FGeno%2FBC_01.71_stuart.Rmd="73DD6D1F"
%2FVolumes%2F%40Mouselab%2FZika%2FF2%2FF2_B6.CC001%2FQUGA%2FQUGA.Rmd="D7285907"
~%2FDocuments%2FPhD%2Fstuart_R%2Fstuart%2FDESCRIPTION="A58355B1"
~%2FDocuments%2FPhD%2Fstuart_R%2Fstuart%2FR%2Fgeno_strains.R="430FE7D7"
~%2FDocuments%2FPhD%2Fstuart_R%2Fstuart%2FR%2Fmark_allele.R="58D83345"
......@@ -11,13 +13,18 @@
~%2FDocuments%2FPhD%2Fstuart_package%2Fstuart%2FDESCRIPTION="DB43CCAB"
~%2FDocuments%2FPhD%2Fstuart_package%2Fstuart%2FR%2Fgeno_strains.R="746D5550"
~%2FDocuments%2FPhD%2Fstuart_package%2Fstuart%2FR%2Fmark_allele.R="94A0A47C"
~%2FDocuments%2FPhD%2Fstuart_package%2Fstuart%2FR%2Fmark_estmap.R="122CE4C2"
~%2FDocuments%2FPhD%2Fstuart_package%2Fstuart%2FR%2Fmark_match.R="C03D9873"
~%2FDocuments%2FPhD%2Fstuart_package%2Fstuart%2FR%2Fmark_na.R="BC725065"
~%2FDocuments%2FPhD%2Fstuart_package%2Fstuart%2FR%2Fmark_poly.R="E392A021"
~%2FDocuments%2FPhD%2Fstuart_package%2Fstuart%2FR%2Fmark_prop.R="65449E3B"
~%2FDocuments%2FPhD%2Fstuart_package%2Fstuart%2FR%2Fstuart_tab-data.R="5D74CC67"
~%2FDocuments%2FPhD%2Fstuart_package%2Fstuart%2FR%2Ftab_mark.R="38BAAAF9"
~%2FDocuments%2FPhD%2Fstuart_package%2Fstuart%2FR%2Fwrite_rqtl.R="9A1DD653"
~%2FDocuments%2FPhD%2Fstuart_package%2Fstuart%2Fman%2Fmark_na.Rd="E059577D"
~%2FDocuments%2FPhD%2Fstuart_package%2Fstuart%2Fman%2Fmark_prop.Rd="A5177778"
~%2FDocuments%2FPhD%2Fstuart_package%2Fstuart%2Fvignettes%2FstuaRt.Rmd="5BDF5DBA"
~%2FDocuments%2FPhD%2Fstuart_package%2Fstuart%2Fvignettes%2Fstuart.Rmd="4069DBDF"
~%2Fstuart_package%2Fstuart%2FDESCRIPTION="BEB7232"
~%2Fstuart_package%2Fstuart%2FNAMESPACE="AE613167"
~%2Fstuart_package%2Fstuart%2FR%2Fgeno_strains.R="8F7B714A"
......
/home/marie/Documents/stuart/stuart_package/stuart/vignettes/stuart.Rmd="EE74038B"
/mnt/zeus/zeus_mouselab/marie/map_after/map_after.R="6CD0FCD2"
/mnt/zeus/zeus_mouselab/marie/stuart_estmap/stuart_estmap.R="E9861925"
/Users/mariebourdon/Documents/PhD/stuart_package/stuart/DESCRIPTION="5140B408"
/Users/mariebourdon/Documents/PhD/stuart_package/stuart/R/geno_strains.R="EBA82473"
/Users/mariebourdon/Documents/PhD/stuart_package/stuart/R/mark_allele.R="A0A182D3"
/Users/mariebourdon/Documents/PhD/stuart_package/stuart/R/mark_estmap.R="57E1825F"
/Users/mariebourdon/Documents/PhD/stuart_package/stuart/R/mark_match.R="FFA1BE52"
/Users/mariebourdon/Documents/PhD/stuart_package/stuart/R/mark_prop.R="30FA9E8C"
/Users/mariebourdon/Documents/PhD/stuart_package/stuart/R/tab_mark.R="F0B2417"
/Users/mariebourdon/Documents/PhD/stuart_package/stuart/R/write_rqtl.R="F07CE16C"
/Users/mariebourdon/Documents/PhD/stuart_package/stuart/man/mark_na.Rd="C8CA0D4F"
/Users/mariebourdon/Documents/PhD/stuart_package/stuart/man/mark_prop.Rd="893B273D"
/Users/mariebourdon/Documents/PhD/stuart_package/stuart/vignettes/stuaRt.Rmd="DA6206CB"
/Users/mariebourdon/Documents/PhD/stuart_package/stuart/vignettes/stuart.Rmd="54D793B9"
/Volumes/@Mouselab/Zika/Backcross/Geno/BC_01.71_stuart.Rmd="E9F16055"
......@@ -5,3 +5,4 @@ Meta
/Meta/
.Rhistory
.Rhistory
inst/doc
Package: stuart
Title: Sort markers of lab strains genotyping results
Version: 1.0.5
Version: 1.0.6
Authors@R:
person(given = "Marie",
family = "Bourdon",
......@@ -23,7 +23,7 @@ Imports:
Suggests:
knitr,
rmarkdown,
markdown
markdown,
VignetteBuilder: knitr
Depends:
R (>= 3.5.0)
......@@ -4,6 +4,7 @@ export(geno_strains)
export(mark_allele)
export(mark_estmap)
export(mark_match)
export(mark_na)
export(mark_poly)
export(mark_prop)
export(tab_mark)
......
#' @title Exclude markers depending on estimated genetic map
#'
#' @description This function uses a "map" object producted by qtl::est.map() and returns markers which have a high recombination proportion with adjacent markers.
#'
#' High recombination fractions are identified by lookind at differences and ratios between the known and calculated distance with the previous marker.
#' Thus groups of markers (i.e. small calculated recombination fraction with each other but high with other groups) are identified. Groups with a small number of markers (most often even one) separating groups with high number of markers correspond to the markers with incorrect recombination fractions that must be removed.
#'
#' @param tab data frame obtained with tab_mark function.
#' @param map object of class map
#' @param dist a value to identify markers with odd recombination fractions. Default is 100.
#' @param annot data frame with the annotation of the array used
#' @param d a value to identify groups of markers: d is the difference between calculated and known distance with the previous marker, used to identify a new group of markers. Default is 20.
#' @param r a value to identify groups of markers: r is the ratio between calculated and known distance with the previous marker, used to identify a new group of markers. Default is 5.
#' @param n a value to identify which group of markers must be removed: n is the maximum size of a group of markers: markers with incorrect recombination fraction must be isolated or in very small groups. Default is 5.
#'
#' @import dplyr
#'
#' @export
#'
#### mark_estmap ####
mark_estmap <- function(tab,map,dist=100){
mark_estmap <- function(tab,map,annot,d=20,r=5,n=5){
#initialize variables
marker <- c()
mark <- c()
chr <- c()
pos <- c()
place <- c()
previous <- c()
follow <- c()
kn_previous <- c()
kn_follow <- c()
#get information in newmap
for(i in names(map)){
marker <- c(marker,names(map[[i]]))
mark <- c(mark,names(map[[i]]))
chr <- c(chr,rep(i,times=length(map[[i]])))
pos <- c(pos,unname(map[[i]]))
place <- c(place,"first",rep("middle",times=(length(map[[i]])-2)),"last")
......@@ -31,21 +40,82 @@ mark_estmap <- function(tab,map,dist=100){
follow <- c(follow,fol)
}
tab_map <- tibble(marker = marker,
#get information in annot
annot <- annot %>% filter(marker %in% mark)
kn_pos <- annot$cM_cox
kn_prev <- c(NA,annot[1:(nrow(annot)-1),"cM_cox"])
kn_previous <- c(kn_previous,kn_prev)
kn_fol <- c(annot[2:nrow(annot),"cM_cox"],NA)
kn_follow <- c(kn_follow,kn_fol)
#create tab with positions
tab_map <- tibble(marker = mark,
chr = chr,
place = place,
pos = pos,
previous = pos-previous,
follow = follow-pos)
follow = follow-pos,
kn_pos = kn_pos,
kn_previous = kn_pos - kn_previous,
kn_follow = kn_follow - kn_pos)
#remove previous position for first marker and following position for last
tab_map <- tab_map %>% mutate(kn_previous=case_when(is.na(previous)==TRUE ~ NA_real_,
T ~ kn_previous))
tab_map <- tab_map %>% mutate(kn_follow=case_when(is.na(follow)==TRUE ~ NA_real_,
T ~ kn_follow))
#calculate difference and ratio
tab_map <- tab_map %>% mutate(dif_prev = previous - kn_previous)
tab_map <- tab_map %>% mutate(rat_prev = previous/kn_previous)
tab_map <- tab_map %>% mutate(exclude=case_when(place == "first" & follow > dist ~ 1,
place == "middle" & previous > dist & follow > dist ~ 1,
place == "last" & previous > dist ~ 1,
T ~ 0))
#identify new groups of markers
tab_map <- tab_map %>% mutate(group=case_when(is.na(dif_prev)==TRUE ~ "1",
dif_prev > d & rat_prev > r ~ "new"))
bad_markers <- tab_map %>% filter(exclude == 1) %>% pull(marker)
#recode in group number and identify groups to remove
list_map <- split(tab_map,tab_map$chr)
mark_remove <- tibble()
for(i in 1:length(list_map)){
tab_chr <- list_map[[i]]
tab_chr <- tab_chr %>% mutate(row=row_number())
new_pos <- match("new",tab_chr$group)
g <- 2
while(is.na(new_pos)==FALSE){
g_chr <- as.character(g)
tab_chr <- tab_chr %>% mutate(group=case_when(row==new_pos ~ g_chr,
T ~ group))
new_pos <- match("new",tab_chr$group)
g <- g+1
}
tab_chr <- tab_chr %>% tidyr::fill(group)
tab_chr <- tab_chr %>% mutate(group=as.numeric(group)) %>% select(-row)
summa <- tab_chr %>% group_by(group) %>% summarise(N=n())
summa <- summa %>% mutate(n_bef=lag(N,default=9))
summa <- summa %>% mutate(n_af=lead(N,default=9))
summa <- summa %>% mutate(excl=case_when(N<n ~ 1,
T ~ 0))
summa <- summa %>% mutate(ex_bef=lag(excl,default=9))
summa <- summa %>% mutate(ex_af=lead(excl,default=9))
gremove <- summa %>% filter(excl==1 & ((ex_bef!=1 & ex_af!=1) | (ex_bef!=9 & ex_af!=9))) %>% select(group) %>% pull()
mark_remove <- bind_rows(mark_remove,tab_chr %>% filter(group %in% gremove))
}
tab <- tab %>% mutate(exclude_estmap = case_when(marker %in% bad_markers ~ 1,
#stock information in tab
tab <- tab %>% mutate(exclude_estmap = case_when(marker %in% mark_remove$marker ~ 1,
T ~ 0))
#add dif/ratio with following markers
mark_remove <- mark_remove %>% mutate(dif_follow = follow - kn_follow)
mark_remove <- mark_remove %>% mutate(ratio_follow = follow/kn_follow)
#print results
print("Removed marker with known and calculated positions:")
print(mark_remove %>% select(marker,chr,place,
calc_pos=pos,calc_previous=previous,calc_follow=follow,
know_pos=kn_pos,know_previous=kn_previous,know_follow=kn_follow,
dif_prev,ratio_prev=rat_prev))
return(tab)
}
#' @title Exclude markers depending on proportion of missing genotypes
#'
#' @description This function uses the dataframe produced by the tab_mark function and fills the "exclude" column for all the markers that present too much missing genotypes.
#'
#' @param tab data frame obtained with tab_mark function.
#' @param na proportion of non-genotyped individuals above which the marker is excluded.
#'
#' @import dplyr
#' @import tidyselect
#'
#' @export
#'
#### mark_prop ####
## excludes markers depending on proportions of homo/hetorozygous
mark_na <- function(tab,na=0.5){
#calculate total number of individuals genotyped for each marker
tab <- tab %>% mutate(n_geno = (n_HM1 + n_HM2 + n_HT))
#stop with prop of na
#calculate proportion
tab <- tab %>% mutate(p_NA = n_NA/(n_geno+n_NA))
#exclusion
tab <- tab %>%
mutate(exclude_na=case_when(p_NA >= na ~ 1,
T ~ 0))
tab <- tab %>% select(-c(n_geno,p_NA))
return(tab)
}
#' @title Exclude markers depending on Mendelian proportions
#'
#' @description This function uses the dataframe produced by the tab_mark function and fills the "exclude" column for all the markers that present too much missing genotypes or odd proportions of each genotype.
#' You can define these proportions thanks to the arguments of the function.
......@@ -25,7 +26,7 @@
#### mark_prop ####
## excludes markers depending on proportions of homo/hetorozygous
mark_prop <- function(tab,cross,homo=NA,hetero=NA,pval=NA,homo1X=NULL,homo2X=NULL,heteroX=NULL,na=0.5){
mark_prop <- function(tab,cross,homo=NA,hetero=NA,pval=NA,homo1X=NULL,homo2X=NULL,heteroX=NULL){
#calculate total number of individuals genotyped for each marker
tab <- tab %>% mutate(n_geno = (n_HM1 + n_HM2 + n_HT))
......@@ -58,8 +59,7 @@ mark_prop <- function(tab,cross,homo=NA,hetero=NA,pval=NA,homo1X=NULL,homo2X=NUL
tab <- tab %>% mutate(p_NA = n_NA/(n_geno+n_NA))
tab <- tab %>%
mutate(exclude_prop=case_when(p_NA > na ~ 1,
T ~ 0))
mutate(exclude_prop=0)
#stock colnames to join
names <- colnames(tab)
......@@ -72,8 +72,7 @@ mark_prop <- function(tab,cross,homo=NA,hetero=NA,pval=NA,homo1X=NULL,homo2X=NUL
tab <- tab %>% mutate(p_HT = n_HT/n_geno)
tab <- tab %>%
mutate(exclude_prop=case_when(p_NA > na ~ 1,
!chr %in% c("X","Y","M") & cross=="F2" & (p_HM1 < homo | p_HM2 < homo | p_HT < hetero) ~ 1,
mutate(exclude_prop=case_when(!chr %in% c("X","Y","M") & cross=="F2" & (p_HM1 < homo | p_HM2 < homo | p_HT < hetero) ~ 1,
!chr %in% c("X","Y","M") & cross=="N2" & ((p_HM2 == 0 & p_HM1 < homo) |
(p_HM1 == 0 & p_HM2 < homo) |
(p_HT < hetero) |
......@@ -87,8 +86,8 @@ mark_prop <- function(tab,cross,homo=NA,hetero=NA,pval=NA,homo1X=NULL,homo2X=NUL
mutate(exclude_prop=case_when(chr == "X" & p_HM1 >= p_HM2 & !between(p_HM1,homo1X[1],homo1X[2]) ~ 1,
chr == "X" & p_HM2 > p_HM1 & !between(p_HM2,homo1X[1],homo1X[2]) ~ 1,
T ~ exclude_prop
)
)
)
)
}
if(is.null(homo2X)==FALSE){
......@@ -96,16 +95,16 @@ mark_prop <- function(tab,cross,homo=NA,hetero=NA,pval=NA,homo1X=NULL,homo2X=NUL
mutate(exclude_prop=case_when(chr == "X" & p_HM1 >= p_HM2 & !between(p_HM2,homo2X[1],homo2X[2]) ~ 1,
chr == "X" & p_HM2 > p_HM1 & !between(p_HM1,homo2X[1],homo2X[2]) ~ 1,
T ~ exclude_prop
)
)
)
)
}
if(is.null(heteroX)==FALSE){
tab <- tab %>%
mutate(exclude_prop=case_when(chr == "X" & p_HM1 >= p_HM2 & !between(p_HT,heteroX[1],heteroX[2]) ~ 1,
T ~ exclude_prop
)
)
)
)
}
#exclude with pval chisq.test
......
......@@ -2,8 +2,8 @@
#'
#' @description This function creates a table with all the markers that were genotyped in the array, the alleles for these markers, the number of homozygous and heterozygous animals, as well as the number of non genotyped animals.
#' @param geno data frame with the genotyping results for your cross
#' @param ref data frame with the annotation of the array used. It must contain a "chr" column and a column with positions. X, Y and mitochondrial chromosomes must be coded as: "X", "Y" and "M" in all caps.
#' @param ref data frame with the annotation of the array used
#' @param annot data frame with the annotation of the array used
#' @param pos the name of the column with position informations in the annot data frame
#'
#' @import dplyr
#' @import tidyr
......@@ -14,6 +14,15 @@
#### tab_mark function ####
## create table with markers and counts
tab_mark <- function(geno,annot,pos){
#stop if no annot or pos
if(rapportools::is.empty(annot)==FALSE){
stop("No annotation data")
}
if(rapportools::is.empty(pos)==FALSE){
stop("No position data")
}
#rename df columns
geno <- geno %>% rename("marker"=1,
"id"=2,
......
No preview for this file type
......@@ -102,7 +102,7 @@ tab2 <- mark_prop(tab2,cross="F2",homo=0.1,hetero=0.1,homo1X=c(0.1,1),homo2X=c(0
tab2 <- mark_allele(tab2,ref=strns_ref,cross="F2",par1="parent1",par2="parent2")
data(stuart_newmap)
tab2 <- mark_estmap(tab2,map=stuart_newmap,dist=100)
tab2 <- mark_estmap(tab=tab2,map=stuart_newmap,annot=annot_mini)
```
# Graphs
......@@ -112,7 +112,7 @@ tab2 <- mark_estmap(tab2,map=stuart_newmap,dist=100)
### Graph missing genotypes
```{r graph_NA}
na_plot <- tab2 %>% mutate(prop_NA=n_NA/180) %>% ggplot(aes(x=prop_NA)) +
na_plot <- tab2 %>% mutate(prop_NA=n_NA/176) %>% ggplot(aes(x=prop_NA)) +
geom_histogram() +
scale_y_log10() +
theme_bw() +
......@@ -125,22 +125,37 @@ na_plot <- tab2 %>% mutate(prop_NA=n_NA/180) %>% ggplot(aes(x=prop_NA)) +
na_plot
```
Proportions of markers with more than 75% of missing genotypes:
```{r prop_missing}
tab2 %>% mutate(prop_NA=n_NA/176) %>% filter(prop_NA > 0.50) %>% summarise(p=n()/count(tab2)%>%pull())
```
### Graph proportion of genotypes
```{r graph_prop}
prop_plot <- tab2 %>% filter(n_NA<88) %>% ggplot(aes(x=n_HM1/(n_HM1+n_HM2+n_HT),y=n_HM2/(n_HM1+n_HM2+n_HT),color=as.factor(exclude_prop))) +
prop_plot <- tab2 %>% filter(n_NA<88) %>% filter(!chr %in% c("M","X","Y")) %>%
ggplot(aes(x=n_HM1/(n_HM1+n_HM2+n_HT),y=n_HM2/(n_HM1+n_HM2+n_HT),color=as.factor(exclude_prop))) +
geom_point() +
scale_color_manual(values=c("#66bd63","#b2182b")) +
scale_color_manual(values=c("#66bd63","#b2182b"),labels = c("Retained", "Excluded")) +
#geom_text(aes(label=ifelse(exclude_prop=="1",SNP.Name,'')),hjust=0, vjust=0,size=2) +
labs(title="Exclusion of markers with mark_prop()",
x="Proportion of homozygous individuals AA",
y="Proportion of homozygous individuals BB",
color="Exclusion") +
theme_classic() +
theme(
aspect.ratio=0.8,
legend.position=c(0.8,0.8)) +
theme(plot.title = element_text(hjust = 0.4,face="bold",size=14))
theme(aspect.ratio=0.8,
legend.position=c(0.8,0.8),
legend.title = element_blank()) +
theme(plot.title = element_text(hjust = 0.4,face="bold",size=14)) +
geom_segment(x = 0.2, y = 0.82,
xend = 0.25, yend = 0.72,
color = "#1d91c0",
arrow = arrow(type="closed",length=unit(6,"points"))) +
geom_segment(x = 0.39, y = 0.62,
xend = 0.44, yend = 0.52,
color = "#1d91c0",
arrow = arrow(type="closed",length=unit(6,"points")))
prop_plot
```
### Grid
......@@ -158,7 +173,7 @@ grid <- plot_grid(na_plot,prop_plot,
grid
ggsave("figures/naprop.pdf",grid,width=5,height=8)
ggsave("figures/fig2.pdf",grid,width=5,height=8)
rm(na_plot,prop_plot)
```
......@@ -181,7 +196,12 @@ allele <- left_join(allele,strains_allele,by=c("marker"="marker"))
#most of markers excluded with mark_allele that were not excluded with other functions have N/H as genotype for parents
#keep only those with non missing/heterozygous genotypes
allele %<>% filter(parent1 != "N" & parent2 != "N")
allele %<>% select(marker,parent1,parent2,allele_1,allele_2,n_HM1,n_HM2,n_HT)
allele %<>% select(marker,parent1,parent2,allele_1,allele_2)
#number of markers in such situation
count(tab2%>%
filter(exclude_allele==1))
#keep only beggining of the table
allele <- allele[1:6,]
......@@ -196,19 +216,16 @@ rm(allele,strains_allele)
```{r barplot}
none <- tab2 %>% nrow()
match <- tab2 %>% filter(exclude_match==0) %>% nrow()
poly <- tab2 %>% filter(exclude_match==0&exclude_poly==0) %>% nrow()
prop <- tab2 %>% filter(exclude_match==0&exclude_poly==0&exclude_prop==0) %>% nrow()
allele <- tab2 %>% filter(exclude_match==0&exclude_poly==0&exclude_prop==0&exclude_allele==0) %>% nrow()
allele <- tab2 %>% filter(exclude_match==0&exclude_allele==0) %>% nrow()
naf <- tab2 %>% filter(exclude_match==0&exclude_allele==0&exclude_na==0) %>% nrow()
poly <- tab2 %>% filter(exclude_match==0&exclude_allele==0&exclude_na==0&exclude_poly==0) %>% nrow()
prop <- tab2 %>% filter(exclude_match==0&exclude_allele==0&exclude_na==0&exclude_poly==0&exclude_prop==0) %>% nrow()
barplot_df <- tibble(
fct = c("none","match","poly","prop","allele"),
markers = c(none, match, poly, prop, allele)
)
functions_plot <- barplot_df %>% ggplot(aes(x=markers,y=fct)) +
functions_plot <- functions_df %>% ggplot(aes(x=markers,y=fct)) +
geom_bar(stat="identity",width=0.6) +
geom_text(aes(label=markers), hjust=1.3, color="white", size=3.5) +
scale_y_discrete(limits=c("allele", "prop", "poly","match","none")) +
scale_y_discrete(limits=c("prop","poly", "na", "allele","match","none")) +
theme(aspect.ratio=0.7) +
labs(title="Number of markers kept after each step",
x="Number of markers",
......@@ -217,8 +234,6 @@ functions_plot <- barplot_df %>% ggplot(aes(x=markers,y=fct)) +
theme(plot.title = element_text(hjust = 0.4,face="bold",size=14))
functions_plot
ggsave("figures/functions.pdf",functions_plot,width=5,height=3)
rm(none,allele,match,poly,prop,barplot_df)
```
......@@ -228,8 +243,8 @@ rm(none,allele,match,poly,prop,barplot_df)
```{r cross_before}
# filter at minima: remove non polymorphic and NA>0.5
tab_before <- mark_poly(stuart_tab)
tab_before <- mark_prop(tab_before,cross="F2",homo=0,hetero=0)
tab_before <- mark_na(stuart_tab)
tab_before <- mark_poly(tab_before)
# create rqtl csv file
write_rqtl(geno=genos,pheno=phenos,tab=tab_before,ref=strns_ref,par1="parent1",par2="parent2",prefix="ind_",pos="cM_cox",path="files/cluster/cross_before.csv")
......@@ -251,7 +266,7 @@ load("files/cluster/newmap_before.rda")
# plot
plotMap(cross_before,newmap_before,shift=TRUE)
plotmap_before <- ~plotMap(cross_before,newmap_before,shift=TRUE,main="Before stuart")
plotmap_before <- ~plotMap(cross_before,newmap_before,shift=TRUE,main="Before stuart", ylab='')
```
### Before: plot genome scan
......@@ -286,12 +301,34 @@ pheno_before_plot
```
```{r before_scan}
pheno_before_zoom <- qtl_plot(pheno_before,lod=data.frame(group = c("alpha=0.05", "alpha=0.1","alpha=0.63"),
lod = threshold_before[1:3]),
ylab="LOD score",
title="QTL mapping",
chrs = "13",
size=0.6) +
theme(legend.position = "none") +
ggtitle("")
pheno_before_zoom
```
### Before figure
```{r grid_before}
grid_before <- plot_grid(as_grob(plotmap_before),pheno_before_plot,pheno_before_zoom,ncol=1,labels=c("A","B","C"),label_size=20)
ggsave("figures/fig1.pdf",grid_before,width=7,height = 15)
```
### After: creation of Rqtl csv file
```{r cross_after}
# filter with stuart functions: use the good data for parental strains (strains df)
tab2 <- mark_match(stuart_tab,ref=strains)
tab2 <- mark_poly(tab2)
tab2 <- mark_na(tab2)
tab2 <- mark_prop(tab2,cross="F2",homo=0.1,hetero=0.1,homo1X=c(0.1,1),homo2X=c(0.1,1),heteroX=c(0.1,1))
tab2 <- mark_allele(tab2,ref=strains,cross="F2",par1="parent1",par2="parent2")
......@@ -322,43 +359,42 @@ plotmap_after <- ~plotMap(cross_after,newmap_after,shift=TRUE,main="After stuart
```{r after_estmap}
#filter with mark_estmap
tab2 <- mark_estmap(tab2,newmap_after)
tab2 <- mark_estmap(tab2,newmap_after,annot_mini)
# create rqtl csv file
write_rqtl(geno=genos,pheno=phenos,tab=tab2,ref=strains,par1="parent1",par2="parent2",prefix="ind_",pos="cM_cox",path="files/cluster2/cross_after.csv")
write_rqtl(geno=genos,pheno=phenos,tab=tab2,ref=strains,par1="parent1",par2="parent2",prefix="ind_",pos="cM_cox",path="files/cluster2/cross_after2.csv")
```
### After: plot estimated map 2
```{r after_map2}
# import cross
cross_after <- read.cross(format="csv",file="files/cluster2/cross_after.csv",
cross_after2 <- read.cross(format="csv",file="files/cluster2/cross_after2.csv",
genotypes=c("0","1","2"),na.strings=c("NA"), convertXdata=TRUE)
# load rda with newmap
load("files/cluster2/newmap_after.rda")
load("files/cluster2/newmap_after2.rda")
# plot
plotMap(cross_after,newmap_after,shift=TRUE)
plotmap_after <- ~plotMap(cross_after,newmap_after,shift=TRUE,main="After stuart")
plotMap(cross_after2,newmap_after2,shift=TRUE)
plotmap_after <- ~plotMap(cross_after2,newmap_after2,shift=TRUE,main="After stuart")
```
### After: plot genome scan
```{r after_scan}
# load rda with perm
load("files/cluster2/after_1000p.rda")
# load("files/after_1000p.rda")
load("files/cluster2/after_1000p2.rda")
# calculate thresholds
threshold_after <- summary(after_1000p,alpha=c(0.05,0.1,0.63)) #donne lod score pour risque 0.05, 0.1, 0.63
threshold_after <- summary(after_1000p2,alpha=c(0.05,0.1,0.63)) #donne lod score pour risque 0.05, 0.1, 0.63
# scanone
cross_after <- calc.genoprob(cross_after, step=2.0, off.end=0.0,
cross_after <- calc.genoprob(cross_after2, step=2.0, off.end=0.0,
error.prob=1.0E-4, map.function="haldane", stepwidth="fixed")
pheno_after <- scanone(cross=cross_after, chr=c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "X", "Y"), pheno.col="Pheno", model="normal", method="em")
pheno_after <- scanone(cross=cross_after2, chr=c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "X", "Y"), pheno.col="Pheno", model="normal", method="em")
summary(pheno_after)
# Plot
......@@ -372,13 +408,12 @@ pheno_after_plot <- qtl_plot(pheno_after,lod=data.frame(group = c("alpha=0.05",