Commit 45e067cd authored by Marie Bourdon's avatar Marie Bourdon
Browse files

modify data and vignette with last modifs

parent bfce92d9
File added
...@@ -13,5 +13,3 @@ ...@@ -13,5 +13,3 @@
/home/marie/Documents/stuart_package/stuart/R/tab_mark.R="4023F220" /home/marie/Documents/stuart_package/stuart/R/tab_mark.R="4023F220"
/home/marie/Documents/stuart_package/stuart/R/write_rqtl.R="DA3DDAC1" /home/marie/Documents/stuart_package/stuart/R/write_rqtl.R="DA3DDAC1"
/home/marie/Documents/stuart_package/stuart/vignettes/stuart.Rmd="568884A2" /home/marie/Documents/stuart_package/stuart/vignettes/stuart.Rmd="568884A2"
/mnt/zeus/zeus_mouselab/marie/map_before_us/map_before_us.R="2F43B141"
/mnt/zeus/zeus_mouselab/marie/stuart_estmap/stuart_estmap.R="000583A9"
#' @title Exclude markers depending on proportions of homo/hetorozygous #' @title Exclude markers depending on Mendelian proportions
#'
#' @description This function uses the dataframe produced by the tab_mark function and fills the "exclude" column for all the markers that present too much missing genotypes or odd proportions of each genotype.
#' You can define these proportions thanks to the arguments of the function.
#' Specific function arguments can be used to handle genotype proportions for markers on X chromosome.
#'
#' For autosomes, use the homo and hetero arguments or the pval argument. With homo and hetero, mark_prop() excludes markers if the proportion of homozygous or heterozygous individuals is inferior to the value assigned to each argument.
#' With pval, mark_prop() sorts markers with a Chi2 test. If the p-value of the test is significative and inferior to the value assigned to pval argument, the marker is excluded.
#' For X chromosomes, the arguments homo1X, homo2X and heteroX are vectors with two values that correspond to the lower and the upper limit of the proportion of each genotype.
#' homo1X corresponds to the homozygous genotype with the highest proportion while homo2X corresponds to the homozygous genotype with the lowest genotype. If a genotype must not be found, use c(0,0) as limits.
#' #'
#' @description This function uses the dataframe produced by the tab_mark function and fills the "exclude" column for all the markers that present too much missing genotypes or odd proportions of each genotype. You can define these proportions thanks to the arguments of the function. The filter on genotype proportions applies to autosomes only (and not the chromosomes encoded as "X", "Y" and "M")
#' @param tab data frame obtained with tab_mark function. #' @param tab data frame obtained with tab_mark function.
#' @param cross F2 or N2. #' @param cross F2 or N2.
#' @param homo proportion of homozygous individuals under which the marker is excluded. Will apply on both homozygous genotypes for a F2, but only on one for N2. #' @param homo proportion of homozygous individuals under which the marker is excluded. Will apply on both homozygous genotypes for a F2, but only on one for N2.
...@@ -18,7 +26,7 @@ ...@@ -18,7 +26,7 @@
#### mark_prop #### #### mark_prop ####
## excludes markers depending on proportions of homo/hetorozygous ## excludes markers depending on proportions of homo/hetorozygous
mark_prop <- function(tab,cross,homo=NA,hetero=NA,pval=NA,homo1X=NA,homo2X=NA,heteroX=NA,na=0.5){ mark_prop <- function(tab,cross,homo=NA,hetero=NA,pval=NA,homo1X=NULL,homo2X=NULL,heteroX=NULL,na=0.5){
#calculate total number of individuals genotyped for each marker #calculate total number of individuals genotyped for each marker
tab <- tab %>% mutate(n_geno = (n_HM1 + n_HM2 + n_HT)) tab <- tab %>% mutate(n_geno = (n_HM1 + n_HM2 + n_HT))
...@@ -34,8 +42,15 @@ mark_prop <- function(tab,cross,homo=NA,hetero=NA,pval=NA,homo1X=NA,homo2X=NA,he ...@@ -34,8 +42,15 @@ mark_prop <- function(tab,cross,homo=NA,hetero=NA,pval=NA,homo1X=NA,homo2X=NA,he
} }
#warn if chrX not used #warn if chrX not used
if(is.na(homo1X)==TRUE & is.na(homo2X)==TRUE & is.na(heteroX)==TRUE){ if(is.null(homo1X)==TRUE & is.null(homo2X)==TRUE & is.null(heteroX)==TRUE){
warning("Arguments homo1X, homo1X and heteroX are not specified: proportions for markers on X chromosomes will not be tested") message("Arguments homo1X, homo2X and heteroX are not specified: proportions for markers on X chromosomes will not be tested")
}
#stop is homo1X | homo2X | heteroX is not a vector with two values
if((is.null(homo1X)==FALSE & length(homo1X)!=2) |
(is.null(homo2X)==FALSE & length(homo2X)!=2) |
(is.null(heteroX)==FALSE & length(heteroX)!=2)){
stop("Arguments homo1X, homo2X and heteroX must be vectors with two values")
} }
...@@ -64,15 +79,14 @@ mark_prop <- function(tab,cross,homo=NA,hetero=NA,pval=NA,homo1X=NA,homo2X=NA,he ...@@ -64,15 +79,14 @@ mark_prop <- function(tab,cross,homo=NA,hetero=NA,pval=NA,homo1X=NA,homo2X=NA,he
(p_HM1 == 0 & p_HM2 < homo) | (p_HM1 == 0 & p_HM2 < homo) |
(p_HT < hetero) | (p_HT < hetero) |
(p_HM1 !=0 & p_HM2 != 0)) ~ 1, (p_HM1 !=0 & p_HM2 != 0)) ~ 1,
chr == "X" & p_HM1 >= p_HM2 & (!between(p_HM1,homo1X[1],homo1X[2]) | chr == "X" & p_HM1 >= p_HM2 & ((is.null(homo1X)==FALSE & !between(p_HM1,homo1X[1],homo1X[2])) |
!between(p_HM2,homo2X[1],homo2X[2])) ~ 1, (is.null(homo2X)==FALSE & !between(p_HM2,homo2X[1],homo2X[2]))) ~ 1,
chr == "X" & p_HM2 > p_HM1 & (!between(p_HM2,homo1X[1],homo1X[2]) | chr == "X" & p_HM2 > p_HM1 & ((is.null(homo1X)==FALSE & !between(p_HM2,homo1X[1],homo1X[2])) |
!between(p_HM1,homo2X[1],homo2X[2])) ~ 1, (is.null(homo2X)==FALSE & !between(p_HM1,homo2X[1],homo2X[2]))) ~ 1,
chr == "X" & !between(p_HT,heteroX[1],heteroX[2]) ~ 1, chr == "X" & is.null(heteroX)==FALSE & !between(p_HT,heteroX[1],heteroX[2]) ~ 1,
T ~ exclude_prop T ~ exclude_prop
)) ))
print(tab)
#exclude with pval chisq.test #exclude with pval chisq.test
} else if(is.na(pval)==FALSE){ } else if(is.na(pval)==FALSE){
......
No preview for this file type
...@@ -2,9 +2,19 @@ ...@@ -2,9 +2,19 @@
% Please edit documentation in R/mark_prop.R % Please edit documentation in R/mark_prop.R
\name{mark_prop} \name{mark_prop}
\alias{mark_prop} \alias{mark_prop}
\title{Exclude markers depending on proportions of homo/hetorozygous} \title{Exclude markers depending on Mendelian proportions}
\usage{ \usage{
mark_prop(tab, cross, homo = NA, hetero = NA, pval = NA, na = 0.5) mark_prop(
tab,
cross,
homo = NA,
hetero = NA,
pval = NA,
homo1X = NULL,
homo2X = NULL,
heteroX = NULL,
na = 0.5
)
} }
\arguments{ \arguments{
\item{tab}{data frame obtained with tab_mark function.} \item{tab}{data frame obtained with tab_mark function.}
...@@ -15,8 +25,21 @@ mark_prop(tab, cross, homo = NA, hetero = NA, pval = NA, na = 0.5) ...@@ -15,8 +25,21 @@ mark_prop(tab, cross, homo = NA, hetero = NA, pval = NA, na = 0.5)
\item{hetero}{proportion of heterozygous individuals under which the marker is excluded.} \item{hetero}{proportion of heterozygous individuals under which the marker is excluded.}
\item{homo1X}{a vector of two numbers. The lower and upper limits for the proportion of homozygous individuals for markers on X chromosome. This argument is for homozygous genotype with the highest expected proportion.}
\item{homo2X}{a vector of two numbers. The lower and upper limits for the proportion of homozygous individuals for markers on X chromosome. This argument is for homozygous genotype with the lowest expected proportion.}
\item{heteroX}{a vector of two numbers. The lower and upper limits for the proportion of heterozygous individuals for markers on X chromosome.}
\item{na}{proportion of non-genotyped individuals above which the marker is excluded.} \item{na}{proportion of non-genotyped individuals above which the marker is excluded.}
} }
\description{ \description{
This function uses the dataframe produced by the tab_mark function and fills the "exclude" column for all the markers that present too much missing genotypes or odd proportions of each genotype. You can define these proportions thanks to the arguments of the function. The filter on genotype proportions applies to autosomes only (and not the chromosomes encoded as "X", "Y" and "M") This function uses the dataframe produced by the tab_mark function and fills the "exclude" column for all the markers that present too much missing genotypes or odd proportions of each genotype.
You can define these proportions thanks to the arguments of the function.
Specific function arguments can be used to handle genotype proportions for markers on X chromosome.
For autosomes, use the homo and hetero arguments or the pval argument. With homo and hetero, mark_prop() excludes markers if the proportion of homozygous or heterozygous individuals is inferior to the value assigned to each argument.
With pval, mark_prop() sorts markers with a Chi2 test. If the p-value of the test is significative and inferior to the value assigned to pval argument, the marker is excluded.
For X chromosomes, the arguments homo1X, homo2X and heteroX are vectors with two values that correspond to the lower and the upper limit of the proportion of each genotype.
homo1X corresponds to the homozygous genotype with the highest proportion while homo2X corresponds to the homozygous genotype with the lowest genotype. If a genotype must not be found, use c(0,0) as limits.
} }
No preview for this file type
No preview for this file type
...@@ -126,14 +126,11 @@ head(tab2) %>% print.data.frame() ...@@ -126,14 +126,11 @@ head(tab2) %>% print.data.frame()
### mark_prop ### mark_prop
The `mark_prop()` function can be used to filter markers depending on the proportion of each genotype. Here, we have a F2 and we use `homo=0.1, hetero=0.1` so the function will exclude all markers with less than 10% of each genotype. Moreover, this function allows to filter marker depending on the proportion on non genotyped animals. By defaults, markers for which more than 50% of individuals were not genotyped. The `mark_prop()` function can be used to filter markers depending on the proportion of each genotype. Here, we have a F2 and we use `homo=0.1, hetero=0.1` so the function will exclude all markers with less than 10% of each genotype. Moreover, this function allows to filter marker depending on the proportion on non genotyped animals. By defaults, markers for which more than 50% of individuals were not genotyped. For chromosome X, we use the `homo=0.1, hetero=0.1`
```{r mark_prop_ex_homo} ```{r mark_prop_ex_homo}
tab2 <- mark_prop(tab2,cross="F2",homo=0.1,hetero=0.1) tab2 <- mark_prop(tab2,cross="F2",homo=0.1,hetero=0.1,homo1X=c(0.1,1),homo2X=c(0.1,1),heteroX=c(0.1,1))
head(tab2) %>% print.data.frame() head(tab2) %>% print.data.frame()
``` ```
We could also use the `pval` argument which allows to exclude markers by performing a Chi2 test comparing observed distribution with Mendelian proportions. By using `pval=0.5` we would exclude all markers with a Chi2 p-value inferior to 0.05. However, for some markers, Chi2 approximation may be incorrect. We could also use the `pval` argument which allows to exclude markers by performing a Chi2 test comparing observed distribution with Mendelian proportions. By using `pval=0.5` we would exclude all markers with a Chi2 p-value inferior to 0.05. However, for some markers, Chi2 approximation may be incorrect.
...@@ -178,6 +175,10 @@ The cross object was saved in stuart. Here we can load it as well as the newmap ...@@ -178,6 +175,10 @@ The cross object was saved in stuart. Here we can load it as well as the newmap
```{r load_cross} ```{r load_cross}
library(qtl) library(qtl)
# stuart_cross <- read.cross(format="csv",file="../stuart_cross.csv",
# genotypes=c("0","1","2"),na.strings=c("NA"), convertXdata=TRUE)
# save(stuart_cross,file="../stuart_cross.rda")
data(stuart_cross) data(stuart_cross)
summary(stuart_cross) summary(stuart_cross)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment