diff --git a/.Rproj.user/shared/notebooks/paths b/.Rproj.user/shared/notebooks/paths index d0341e9dff234d84ca97b4718e79d653c06c1b85..e184220c7709a0a8b3093338a2879e3fecb7a980 100644 --- a/.Rproj.user/shared/notebooks/paths +++ b/.Rproj.user/shared/notebooks/paths @@ -1,13 +1,17 @@ +/home/marie/Documents/stuart_package/stuart/NAMESPACE="2ECA567D" /home/marie/Documents/stuart_package/stuart/R/geno_strains.R="AF4CFAD2" /home/marie/Documents/stuart_package/stuart/R/genos-data.R="1CBE4D7A" /home/marie/Documents/stuart_package/stuart/R/mark_allele.R="32D90792" /home/marie/Documents/stuart_package/stuart/R/mark_estmap.R="2E43F0E2" /home/marie/Documents/stuart_package/stuart/R/mark_match.R="39634C04" /home/marie/Documents/stuart_package/stuart/R/mark_poly.R="D19CB401" +/home/marie/Documents/stuart_package/stuart/R/mark_prop.R="31DD9C22" /home/marie/Documents/stuart_package/stuart/R/phenos-data.R="753628A1" /home/marie/Documents/stuart_package/stuart/R/stuart_cross-data.R="2A7132AD" /home/marie/Documents/stuart_package/stuart/R/stuart_newmap-data.R="86A2EB27" /home/marie/Documents/stuart_package/stuart/R/stuart_tab-data.R="D77DF427" +/home/marie/Documents/stuart_package/stuart/R/tab_mark.R="4023F220" +/home/marie/Documents/stuart_package/stuart/R/write_rqtl.R="DA3DDAC1" /home/marie/Documents/stuart_package/stuart/vignettes/stuart.Rmd="568884A2" /mnt/zeus/zeus_mouselab/marie/map_before_us/map_before_us.R="2F43B141" /mnt/zeus/zeus_mouselab/marie/stuart_estmap/stuart_estmap.R="000583A9" diff --git a/R/mark_prop.R b/R/mark_prop.R index 07ca8408639f4fa831ba15436308d87c1ee521b7..ad37df1efe4d7d130ad00a2aaa229bf017c2dc47 100755 --- a/R/mark_prop.R +++ b/R/mark_prop.R @@ -5,6 +5,9 @@ #' @param cross F2 or N2. #' @param homo proportion of homozygous individuals under which the marker is excluded. Will apply on both homozygous genotypes for a F2, but only on one for N2. #' @param hetero proportion of heterozygous individuals under which the marker is excluded. +#' @param homo1X a vector of two numbers. The lower and upper limits for the proportion of homozygous individuals for markers on X chromosome. This argument is for homozygous genotype with the highest expected proportion. +#' @param homo2X a vector of two numbers. The lower and upper limits for the proportion of homozygous individuals for markers on X chromosome. This argument is for homozygous genotype with the lowest expected proportion. +#' @param heteroX a vector of two numbers. The lower and upper limits for the proportion of heterozygous individuals for markers on X chromosome. #' @param na proportion of non-genotyped individuals above which the marker is excluded. #' #' @import dplyr @@ -15,7 +18,7 @@ #### mark_prop #### ## excludes markers depending on proportions of homo/hetorozygous -mark_prop <- function(tab,cross,homo=NA,hetero=NA,pval=NA,na=0.5){ +mark_prop <- function(tab,cross,homo=NA,hetero=NA,pval=NA,homo1X=NA,homo2X=NA,heteroX=NA,na=0.5){ #calculate total number of individuals genotyped for each marker tab <- tab %>% mutate(n_geno = (n_HM1 + n_HM2 + n_HT)) @@ -30,6 +33,10 @@ mark_prop <- function(tab,cross,homo=NA,hetero=NA,pval=NA,na=0.5){ stop("Arguments homo and hetero or argument pval must be specified") } + #warn if chrX not used + if(is.na(homo1X)==TRUE & is.na(homo2X)==TRUE & is.na(heteroX)==TRUE){ + warning("Arguments homo1X, homo1X and heteroX are not specified: proportions for markers on X chromosomes will not be tested") + } #stop with prop of na @@ -50,17 +57,22 @@ mark_prop <- function(tab,cross,homo=NA,hetero=NA,pval=NA,na=0.5){ tab <- tab %>% mutate(p_HM2 = n_HM2/n_geno) tab <- tab %>% mutate(p_HT = n_HT/n_geno) - tab <- tab %>% - mutate(exclude_prop=case_when(!chr %in% c("X","Y","M","PAR") & p_NA > na ~ 1, - !chr %in% c("X","Y","M","PAR") & cross=="F2" & (p_HM1 < homo | p_HM2 < homo | p_HT < hetero) ~ 1, - !chr %in% c("X","Y","M","PAR") & cross=="N2" & ((p_HM2 == 0 & p_HM1 < homo) | - (p_HM1 == 0 & p_HM2 < homo) | - (p_HT < hetero) | - (p_HM1 !=0 & p_HM2 != 0)) ~ 1, + mutate(exclude_prop=case_when(p_NA > na ~ 1, + !chr %in% c("X","Y","M") & cross=="F2" & (p_HM1 < homo | p_HM2 < homo | p_HT < hetero) ~ 1, + !chr %in% c("X","Y","M") & cross=="N2" & ((p_HM2 == 0 & p_HM1 < homo) | + (p_HM1 == 0 & p_HM2 < homo) | + (p_HT < hetero) | + (p_HM1 !=0 & p_HM2 != 0)) ~ 1, + chr == "X" & p_HM1 >= p_HM2 & (!between(p_HM1,homo1X[1],homo1X[2]) | + !between(p_HM2,homo2X[1],homo2X[2])) ~ 1, + chr == "X" & p_HM2 > p_HM1 & (!between(p_HM2,homo1X[1],homo1X[2]) | + !between(p_HM1,homo2X[1],homo2X[2])) ~ 1, + chr == "X" & !between(p_HT,heteroX[1],heteroX[2]) ~ 1, T ~ exclude_prop )) + print(tab) #exclude with pval chisq.test } else if(is.na(pval)==FALSE){ @@ -73,7 +85,7 @@ mark_prop <- function(tab,cross,homo=NA,hetero=NA,pval=NA,na=0.5){ full_join(.,tab,by=all_of(names)) #convert in score - tab <- tab %>% mutate(exclude_prop=case_when(chi_pval < pval ~ 1, + tab <- tab %>% mutate(exclude_prop=case_when(!chr %in% c("X","Y","M") & chi_pval < pval ~ 1, T ~ exclude_prop)) #if cross N2 @@ -89,8 +101,8 @@ mark_prop <- function(tab,cross,homo=NA,hetero=NA,pval=NA,na=0.5){ full_join(.,tab,by=c(all_of(names),"n_hm")) #convert in score - tab <- tab %>% mutate(exclude_prop=case_when(chi_pval < pval ~ 1, - (n_HM1 !=0 & n_HM2 != 0) ~ 1, + tab <- tab %>% mutate(exclude_prop=case_when(!chr %in% c("X","Y","M") & chi_pval < pval ~ 1, + !chr %in% c("X","Y","M") & (n_HM1 !=0 & n_HM2 != 0) ~ 1, T ~ exclude_prop)) } diff --git a/stuart_1.0.3.9000.tar.gz b/stuart_1.0.3.9000.tar.gz index 6b51ad18f4d8ed3d4f64407119f628a10edeaa8c..cf32f0a98445ba8093781f6ce11323f517ef3882 100644 Binary files a/stuart_1.0.3.9000.tar.gz and b/stuart_1.0.3.9000.tar.gz differ diff --git a/vignettes/stuart.Rmd b/vignettes/stuart.Rmd index adfd8d94a1666ff9a36f56dc4d09d1952ee1171a..bfb48b06b6b72a59a0af291d91c5244c817ae02b 100755 --- a/vignettes/stuart.Rmd +++ b/vignettes/stuart.Rmd @@ -131,6 +131,9 @@ The `mark_prop()` function can be used to filter markers depending on the propor ```{r mark_prop_ex_homo} tab2 <- mark_prop(tab2,cross="F2",homo=0.1,hetero=0.1) head(tab2) %>% print.data.frame() + + + ``` We could also use the `pval` argument which allows to exclude markers by performing a Chi2 test comparing observed distribution with Mendelian proportions. By using `pval=0.5` we would exclude all markers with a Chi2 p-value inferior to 0.05. However, for some markers, Chi2 approximation may be incorrect.