diff --git a/.RData b/.RData new file mode 100644 index 0000000000000000000000000000000000000000..951e6f7d3fcc7411d35165162293d38666f0b82d Binary files /dev/null and b/.RData differ diff --git a/.Rproj.user/shared/notebooks/paths b/.Rproj.user/shared/notebooks/paths index e184220c7709a0a8b3093338a2879e3fecb7a980..c1fb968de0b842a57eec7c888ae94bf101d95354 100644 --- a/.Rproj.user/shared/notebooks/paths +++ b/.Rproj.user/shared/notebooks/paths @@ -13,5 +13,3 @@ /home/marie/Documents/stuart_package/stuart/R/tab_mark.R="4023F220" /home/marie/Documents/stuart_package/stuart/R/write_rqtl.R="DA3DDAC1" /home/marie/Documents/stuart_package/stuart/vignettes/stuart.Rmd="568884A2" -/mnt/zeus/zeus_mouselab/marie/map_before_us/map_before_us.R="2F43B141" -/mnt/zeus/zeus_mouselab/marie/stuart_estmap/stuart_estmap.R="000583A9" diff --git a/R/mark_prop.R b/R/mark_prop.R index ad37df1efe4d7d130ad00a2aaa229bf017c2dc47..7d077741567dc90c0391793bbd973e1e063377d1 100755 --- a/R/mark_prop.R +++ b/R/mark_prop.R @@ -1,6 +1,14 @@ -#' @title Exclude markers depending on proportions of homo/hetorozygous +#' @title Exclude markers depending on Mendelian proportions +#' +#' @description This function uses the dataframe produced by the tab_mark function and fills the "exclude" column for all the markers that present too much missing genotypes or odd proportions of each genotype. +#' You can define these proportions thanks to the arguments of the function. +#' Specific function arguments can be used to handle genotype proportions for markers on X chromosome. +#' +#' For autosomes, use the homo and hetero arguments or the pval argument. With homo and hetero, mark_prop() excludes markers if the proportion of homozygous or heterozygous individuals is inferior to the value assigned to each argument. +#' With pval, mark_prop() sorts markers with a Chi2 test. If the p-value of the test is significative and inferior to the value assigned to pval argument, the marker is excluded. +#' For X chromosomes, the arguments homo1X, homo2X and heteroX are vectors with two values that correspond to the lower and the upper limit of the proportion of each genotype. +#' homo1X corresponds to the homozygous genotype with the highest proportion while homo2X corresponds to the homozygous genotype with the lowest genotype. If a genotype must not be found, use c(0,0) as limits. #' -#' @description This function uses the dataframe produced by the tab_mark function and fills the "exclude" column for all the markers that present too much missing genotypes or odd proportions of each genotype. You can define these proportions thanks to the arguments of the function. The filter on genotype proportions applies to autosomes only (and not the chromosomes encoded as "X", "Y" and "M") #' @param tab data frame obtained with tab_mark function. #' @param cross F2 or N2. #' @param homo proportion of homozygous individuals under which the marker is excluded. Will apply on both homozygous genotypes for a F2, but only on one for N2. @@ -18,7 +26,7 @@ #### mark_prop #### ## excludes markers depending on proportions of homo/hetorozygous -mark_prop <- function(tab,cross,homo=NA,hetero=NA,pval=NA,homo1X=NA,homo2X=NA,heteroX=NA,na=0.5){ +mark_prop <- function(tab,cross,homo=NA,hetero=NA,pval=NA,homo1X=NULL,homo2X=NULL,heteroX=NULL,na=0.5){ #calculate total number of individuals genotyped for each marker tab <- tab %>% mutate(n_geno = (n_HM1 + n_HM2 + n_HT)) @@ -34,8 +42,15 @@ mark_prop <- function(tab,cross,homo=NA,hetero=NA,pval=NA,homo1X=NA,homo2X=NA,he } #warn if chrX not used - if(is.na(homo1X)==TRUE & is.na(homo2X)==TRUE & is.na(heteroX)==TRUE){ - warning("Arguments homo1X, homo1X and heteroX are not specified: proportions for markers on X chromosomes will not be tested") + if(is.null(homo1X)==TRUE & is.null(homo2X)==TRUE & is.null(heteroX)==TRUE){ + message("Arguments homo1X, homo2X and heteroX are not specified: proportions for markers on X chromosomes will not be tested") + } + + #stop is homo1X | homo2X | heteroX is not a vector with two values + if((is.null(homo1X)==FALSE & length(homo1X)!=2) | + (is.null(homo2X)==FALSE & length(homo2X)!=2) | + (is.null(heteroX)==FALSE & length(heteroX)!=2)){ + stop("Arguments homo1X, homo2X and heteroX must be vectors with two values") } @@ -64,15 +79,14 @@ mark_prop <- function(tab,cross,homo=NA,hetero=NA,pval=NA,homo1X=NA,homo2X=NA,he (p_HM1 == 0 & p_HM2 < homo) | (p_HT < hetero) | (p_HM1 !=0 & p_HM2 != 0)) ~ 1, - chr == "X" & p_HM1 >= p_HM2 & (!between(p_HM1,homo1X[1],homo1X[2]) | - !between(p_HM2,homo2X[1],homo2X[2])) ~ 1, - chr == "X" & p_HM2 > p_HM1 & (!between(p_HM2,homo1X[1],homo1X[2]) | - !between(p_HM1,homo2X[1],homo2X[2])) ~ 1, - chr == "X" & !between(p_HT,heteroX[1],heteroX[2]) ~ 1, + chr == "X" & p_HM1 >= p_HM2 & ((is.null(homo1X)==FALSE & !between(p_HM1,homo1X[1],homo1X[2])) | + (is.null(homo2X)==FALSE & !between(p_HM2,homo2X[1],homo2X[2]))) ~ 1, + chr == "X" & p_HM2 > p_HM1 & ((is.null(homo1X)==FALSE & !between(p_HM2,homo1X[1],homo1X[2])) | + (is.null(homo2X)==FALSE & !between(p_HM1,homo2X[1],homo2X[2]))) ~ 1, + chr == "X" & is.null(heteroX)==FALSE & !between(p_HT,heteroX[1],heteroX[2]) ~ 1, T ~ exclude_prop )) - print(tab) #exclude with pval chisq.test } else if(is.na(pval)==FALSE){ diff --git a/data/stuart_cross.rda b/data/stuart_cross.rda index 50caf3b9d5a7530d868a0e4f2b913fc611ed97ad..32478bffe066f55be2d88e31892d510d8b3c1e06 100644 Binary files a/data/stuart_cross.rda and b/data/stuart_cross.rda differ diff --git a/man/mark_prop.Rd b/man/mark_prop.Rd index aba4964071c238acb137af8531463c7bb0e30e92..b2ff00629645304859a9da155102f2e53f810b2a 100755 --- a/man/mark_prop.Rd +++ b/man/mark_prop.Rd @@ -2,9 +2,19 @@ % Please edit documentation in R/mark_prop.R \name{mark_prop} \alias{mark_prop} -\title{Exclude markers depending on proportions of homo/hetorozygous} +\title{Exclude markers depending on Mendelian proportions} \usage{ -mark_prop(tab, cross, homo = NA, hetero = NA, pval = NA, na = 0.5) +mark_prop( + tab, + cross, + homo = NA, + hetero = NA, + pval = NA, + homo1X = NULL, + homo2X = NULL, + heteroX = NULL, + na = 0.5 +) } \arguments{ \item{tab}{data frame obtained with tab_mark function.} @@ -15,8 +25,21 @@ mark_prop(tab, cross, homo = NA, hetero = NA, pval = NA, na = 0.5) \item{hetero}{proportion of heterozygous individuals under which the marker is excluded.} +\item{homo1X}{a vector of two numbers. The lower and upper limits for the proportion of homozygous individuals for markers on X chromosome. This argument is for homozygous genotype with the highest expected proportion.} + +\item{homo2X}{a vector of two numbers. The lower and upper limits for the proportion of homozygous individuals for markers on X chromosome. This argument is for homozygous genotype with the lowest expected proportion.} + +\item{heteroX}{a vector of two numbers. The lower and upper limits for the proportion of heterozygous individuals for markers on X chromosome.} + \item{na}{proportion of non-genotyped individuals above which the marker is excluded.} } \description{ -This function uses the dataframe produced by the tab_mark function and fills the "exclude" column for all the markers that present too much missing genotypes or odd proportions of each genotype. You can define these proportions thanks to the arguments of the function. The filter on genotype proportions applies to autosomes only (and not the chromosomes encoded as "X", "Y" and "M") +This function uses the dataframe produced by the tab_mark function and fills the "exclude" column for all the markers that present too much missing genotypes or odd proportions of each genotype. +You can define these proportions thanks to the arguments of the function. +Specific function arguments can be used to handle genotype proportions for markers on X chromosome. + +For autosomes, use the homo and hetero arguments or the pval argument. With homo and hetero, mark_prop() excludes markers if the proportion of homozygous or heterozygous individuals is inferior to the value assigned to each argument. +With pval, mark_prop() sorts markers with a Chi2 test. If the p-value of the test is significative and inferior to the value assigned to pval argument, the marker is excluded. +For X chromosomes, the arguments homo1X, homo2X and heteroX are vectors with two values that correspond to the lower and the upper limit of the proportion of each genotype. +homo1X corresponds to the homozygous genotype with the highest proportion while homo2X corresponds to the homozygous genotype with the lowest genotype. If a genotype must not be found, use c(0,0) as limits. } diff --git a/stuart_1.0.3.9000.pdf b/stuart_1.0.3.9000.pdf index f78485af52cd186921d4b9b0119c3cf2436b2e41..d7150624bb958dc837d557326ef1722caf6c4d8b 100644 Binary files a/stuart_1.0.3.9000.pdf and b/stuart_1.0.3.9000.pdf differ diff --git a/stuart_1.0.3.9000.tar.gz b/stuart_1.0.3.9000.tar.gz index cf32f0a98445ba8093781f6ce11323f517ef3882..15f523c76277f3633e67df71fdaf3a48cd6fabc1 100644 Binary files a/stuart_1.0.3.9000.tar.gz and b/stuart_1.0.3.9000.tar.gz differ diff --git a/vignettes/stuart.Rmd b/vignettes/stuart.Rmd index bfb48b06b6b72a59a0af291d91c5244c817ae02b..1c1288bc081f5993f04503f76280244a8dc396b4 100755 --- a/vignettes/stuart.Rmd +++ b/vignettes/stuart.Rmd @@ -126,14 +126,11 @@ head(tab2) %>% print.data.frame() ### mark_prop -The `mark_prop()` function can be used to filter markers depending on the proportion of each genotype. Here, we have a F2 and we use `homo=0.1, hetero=0.1` so the function will exclude all markers with less than 10% of each genotype. Moreover, this function allows to filter marker depending on the proportion on non genotyped animals. By defaults, markers for which more than 50% of individuals were not genotyped. +The `mark_prop()` function can be used to filter markers depending on the proportion of each genotype. Here, we have a F2 and we use `homo=0.1, hetero=0.1` so the function will exclude all markers with less than 10% of each genotype. Moreover, this function allows to filter marker depending on the proportion on non genotyped animals. By defaults, markers for which more than 50% of individuals were not genotyped. For chromosome X, we use the `homo=0.1, hetero=0.1` ```{r mark_prop_ex_homo} -tab2 <- mark_prop(tab2,cross="F2",homo=0.1,hetero=0.1) +tab2 <- mark_prop(tab2,cross="F2",homo=0.1,hetero=0.1,homo1X=c(0.1,1),homo2X=c(0.1,1),heteroX=c(0.1,1)) head(tab2) %>% print.data.frame() - - - ``` We could also use the `pval` argument which allows to exclude markers by performing a Chi2 test comparing observed distribution with Mendelian proportions. By using `pval=0.5` we would exclude all markers with a Chi2 p-value inferior to 0.05. However, for some markers, Chi2 approximation may be incorrect. @@ -178,6 +175,10 @@ The cross object was saved in stuart. Here we can load it as well as the newmap ```{r load_cross} library(qtl) +# stuart_cross <- read.cross(format="csv",file="../stuart_cross.csv", +# genotypes=c("0","1","2"),na.strings=c("NA"), convertXdata=TRUE) +# save(stuart_cross,file="../stuart_cross.rda") + data(stuart_cross) summary(stuart_cross)