diff --git a/.Rproj.user/shared/notebooks/paths b/.Rproj.user/shared/notebooks/paths index c1fb968de0b842a57eec7c888ae94bf101d95354..ae1bda6f20d3cfc1b30c7d64046922ffac4813bb 100644 --- a/.Rproj.user/shared/notebooks/paths +++ b/.Rproj.user/shared/notebooks/paths @@ -13,3 +13,5 @@ /home/marie/Documents/stuart_package/stuart/R/tab_mark.R="4023F220" /home/marie/Documents/stuart_package/stuart/R/write_rqtl.R="DA3DDAC1" /home/marie/Documents/stuart_package/stuart/vignettes/stuart.Rmd="568884A2" +/mnt/zeus/zeus_mouselab/marie/map_after/map_after.R="6CD0FCD2" +/mnt/zeus/zeus_mouselab/marie/stuart_estmap/stuart_estmap.R="E9861925" diff --git a/R/mark_estmap.R b/R/mark_estmap.R index cac3eee8cdd12b1988b07b1e3269a3f77171e3aa..cc159876b5bb44709a32d69544736ce3d25a6480 100644 --- a/R/mark_estmap.R +++ b/R/mark_estmap.R @@ -1,6 +1,7 @@ #' @title Exclude markers depending on estimated genetic map #' #' @description This function uses a "map" object producted by qtl::est.map() and returns markers which have a high recombination proportion with adjacent markers. +#' @param tab data frame obtained with tab_mark function. #' @param map object of class map #' @param dist a value to identify markers with odd recombination fractions. Default is 100. #' @@ -8,9 +9,8 @@ #' #' @export #' - #### mark_estmap #### -mark_estmap <- function(map,dist=100){ +mark_estmap <- function(tab,map,dist=100){ #initialize variables marker <- c() chr <- c() @@ -44,5 +44,8 @@ mark_estmap <- function(map,dist=100){ T ~ 0)) bad_markers <- tab_map %>% filter(exclude == 1) %>% pull(marker) - return(bad_markers) + + tab <- tab %>% mutate(exclude_estmap = case_when(marker %in% bad_markers ~ 1, + T ~ 0)) + return(tab) } diff --git a/R/write_rqtl.R b/R/write_rqtl.R index d74a85b38ad95d200fa0cb44e2916e07b6c58d0d..314e591bc2a1d8c9c041ddcddcbb847e7168c727 100755 --- a/R/write_rqtl.R +++ b/R/write_rqtl.R @@ -44,6 +44,10 @@ write_rqtl <- function(geno,pheno,tab,ref,par1,par2,par_N=TRUE,prefix,pos,path=N tab <- tab %>% filter(exclude_allele==0) } + if("exclude_estmap" %in% colnames(tab)){ + tab <- tab %>% filter(exclude_estmap==0) + } + #filter genotypes for non excluded markers in geno file geno <- geno %>% select(c(marker,id,allele_1,allele_2)) %>% filter(marker %in% tab$marker) diff --git a/man/mark_estmap.Rd b/man/mark_estmap.Rd index 9accd1c32b4c0dc50820ca37d98c88c20e095ef6..00f99654285829a122a73fd1435b0c49d0ad676a 100644 --- a/man/mark_estmap.Rd +++ b/man/mark_estmap.Rd @@ -4,9 +4,11 @@ \alias{mark_estmap} \title{Exclude markers depending on estimated genetic map} \usage{ -mark_estmap(map, dist = 100) +mark_estmap(tab, map, dist = 100) } \arguments{ +\item{tab}{data frame obtained with tab_mark function.} + \item{map}{object of class map} \item{dist}{a value to identify markers with odd recombination fractions. Default is 100.} diff --git a/stuart_1.0.3.9000.pdf b/stuart_1.0.3.9000.pdf index d7150624bb958dc837d557326ef1722caf6c4d8b..ed8e7cd5a185118c9a8360831830f3fdd6ed7372 100644 Binary files a/stuart_1.0.3.9000.pdf and b/stuart_1.0.3.9000.pdf differ diff --git a/stuart_1.0.3.9000.tar.gz b/stuart_1.0.3.9000.tar.gz index 15f523c76277f3633e67df71fdaf3a48cd6fabc1..45c65505370fbc7e2718c0197aad6d68dbfa9149 100644 Binary files a/stuart_1.0.3.9000.tar.gz and b/stuart_1.0.3.9000.tar.gz differ diff --git a/vignettes/stuart.Rmd b/vignettes/stuart.Rmd index 1c1288bc081f5993f04503f76280244a8dc396b4..ede4f6af31fd5a30ad77f0008bedf85e819e5104 100755 --- a/vignettes/stuart.Rmd +++ b/vignettes/stuart.Rmd @@ -197,39 +197,31 @@ plotMap(stuart_cross,stuart_newmap,shift=TRUE) It seems to be still 2 problematic markers. We can identify them with the `mark_estmap()`function. ```{r mark_estmap} -bad_markers <- mark_estmap(map=stuart_newmap,dist=100) -print(bad_markers) +tab2 <- mark_estmap(tab=tab2,map=stuart_newmap,dist=100) +tab2 %>% filter(exclude_estmap == 1) %>% print.data.frame() ``` - -### Exclusion of last problematic markers - -Let's focus on these markers. - -```{r bad_markers_tab} -tab2 %>% filter(marker %in% bad_markers) -``` - We can see that these two markers have correct proportions of each genotypes so they were not excluded. Let's see the alleles of the parental strains for these markers. + ```{r bad_markers_annot} -strains %>% filter(marker %in% bad_markers) +strains %>% filter(marker %in% c("S6J010381992","S6J205609960")) ``` We can see that for these markers, one of the two parents has a missing genotype. In the cases where one parent has a missing of a heterozygous genotype, the `mark_allele()` function does not by default exclude the marker if the observed allele of the genotyped parent correspond to one of the alleles observed in the crossed individuals. +### Exclusion of last problematic markers + To avoid the issues with these situations, you can use the `parNH=FALSE` argument in the `mark_allele()` function. This will exclude all the markers with one parent with a missing or a heterozygous genotype. However, this will exclude a very large number of markers. More than 1000 are in this situation in this cross, but about 700 were excluded with other filters. If we exclude all these markers with `parNH=FALSE` we would lose about 300 markers for only 2 problematic ones. We rather advise you to keep these markers and identify possible problematic markers with `mark_estmap()` and exlude them. You have two possibilities to do so. First, you can drop these markers in the cross object with `qtl::drop.markers()`: ```{r drop_markers} -newcross <- drop.markers(cross=stuart_cross,markers=bad_markers) +newcross <- drop.markers(cross=stuart_cross,markers=c("S6J010381992","S6J205609960")) ``` Or if you prefere to have a correct CSV object that can always be used for QTL analysis, you can delete these markers in the marker tab and create a new cross file with `write_rqtl()`: ```{r} -tab2 <- tab2 %>% filter(!marker %in% bad_markers) - good_rqtl_file <- write_rqtl(geno=genos,pheno=phenos,tab=tab2, ref=strains,par1="parent1",par2="parent2",prefix="ind_",pos="cM_cox") ```