diff --git a/.Rproj.user/9DAE6990/pcs/source-pane.pper b/.Rproj.user/9DAE6990/pcs/source-pane.pper index 70829f67631c05a34ae267bf4377c69a6b3ef1c1..d3d70fa0c6a708df53d33b21eeb319f4067fd72c 100644 --- a/.Rproj.user/9DAE6990/pcs/source-pane.pper +++ b/.Rproj.user/9DAE6990/pcs/source-pane.pper @@ -1,3 +1,3 @@ { - "activeTab" : 1 + "activeTab" : 6 } \ No newline at end of file diff --git a/.Rproj.user/9DAE6990/persistent-state b/.Rproj.user/9DAE6990/persistent-state index cb9ff843b5c8465a7cdfef95a8b03af4ac69a49f..598ea69db312e503afab921bf885574c2cacf39b 100644 --- a/.Rproj.user/9DAE6990/persistent-state +++ b/.Rproj.user/9DAE6990/persistent-state @@ -1,6 +1,6 @@ build-last-errors="[]" build-last-errors-base-dir="~/Documents/PhD/stuart_package/stuart/" -build-last-outputs="[{\"output\":\"==> R CMD INSTALL --no-multiarch --with-keep.source stuart\\n\\n\",\"type\":0},{\"output\":\"* installing to library ‘/Library/Frameworks/R.framework/Versions/4.0/Resources/library’\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"* installing *source* package ‘stuart’ ...\\n\",\"type\":1},{\"output\":\"** using staged installation\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** R\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** data\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"*** moving datasets to lazyload DB\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** inst\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** byte-compile and prepare package for lazy loading\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** help\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"*** installing help indices\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** building package indices\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** installing vignettes\\n\",\"type\":1},{\"output\":\"** testing if installed package can be loaded from temporary location\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** testing if installed package can be loaded from final location\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** testing if installed package keeps a record of temporary installation path\\n\",\"type\":1},{\"output\":\"* DONE (stuart)\\n\",\"type\":1},{\"output\":\"\",\"type\":1}]" +build-last-outputs="[{\"output\":\"==> R CMD INSTALL --no-multiarch --with-keep.source stuart\\n\\n\",\"type\":0},{\"output\":\"* installing to library ‘/Library/Frameworks/R.framework/Versions/4.0/Resources/library’\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"* installing *source* package ‘stuart’ ...\\n\",\"type\":1},{\"output\":\"** using staged installation\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** R\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** data\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"*** moving datasets to lazyload DB\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** inst\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** byte-compile and prepare package for lazy loading\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** help\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"*** installing help indices\\n\",\"type\":1},{\"output\":\"** building package indices\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** installing vignettes\\n\",\"type\":1},{\"output\":\"** testing if installed package can be loaded from temporary location\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** testing if installed package can be loaded from final location\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** testing if installed package keeps a record of temporary installation path\\n\",\"type\":1},{\"output\":\"* DONE (stuart)\\n\",\"type\":1},{\"output\":\"\",\"type\":1}]" compile_pdf_state="{\"errors\":[],\"output\":\"\",\"running\":false,\"tab_visible\":false,\"target_file\":\"\"}" files.monitored-path="" find-in-files-state="{\"handle\":\"\",\"input\":\"\",\"path\":\"\",\"regex\":true,\"results\":{\"file\":[],\"line\":[],\"lineValue\":[],\"matchOff\":[],\"matchOn\":[]},\"running\":false}" diff --git a/.Rproj.user/9DAE6990/sources/prop/INDEX b/.Rproj.user/9DAE6990/sources/prop/INDEX index 17c8889967998ca16e4014ec6166f3cd72d4cf66..c8854a683fb80042b68a0a40757c095808f8c78e 100644 --- a/.Rproj.user/9DAE6990/sources/prop/INDEX +++ b/.Rproj.user/9DAE6990/sources/prop/INDEX @@ -14,11 +14,14 @@ ~%2FDocuments%2FPhD%2Fstuart_package%2Fstuart%2FR%2Fmark_allele.R="94A0A47C" ~%2FDocuments%2FPhD%2Fstuart_package%2Fstuart%2FR%2Fmark_estmap.R="122CE4C2" ~%2FDocuments%2FPhD%2Fstuart_package%2Fstuart%2FR%2Fmark_match.R="C03D9873" +~%2FDocuments%2FPhD%2Fstuart_package%2Fstuart%2FR%2Fmark_na.R="BC725065" ~%2FDocuments%2FPhD%2Fstuart_package%2Fstuart%2FR%2Fmark_poly.R="E392A021" ~%2FDocuments%2FPhD%2Fstuart_package%2Fstuart%2FR%2Fmark_prop.R="65449E3B" ~%2FDocuments%2FPhD%2Fstuart_package%2Fstuart%2FR%2Fstuart_tab-data.R="5D74CC67" ~%2FDocuments%2FPhD%2Fstuart_package%2Fstuart%2FR%2Ftab_mark.R="38BAAAF9" ~%2FDocuments%2FPhD%2Fstuart_package%2Fstuart%2FR%2Fwrite_rqtl.R="9A1DD653" +~%2FDocuments%2FPhD%2Fstuart_package%2Fstuart%2Fman%2Fmark_na.Rd="E059577D" +~%2FDocuments%2FPhD%2Fstuart_package%2Fstuart%2Fman%2Fmark_prop.Rd="A5177778" ~%2FDocuments%2FPhD%2Fstuart_package%2Fstuart%2Fvignettes%2FstuaRt.Rmd="5BDF5DBA" ~%2FDocuments%2FPhD%2Fstuart_package%2Fstuart%2Fvignettes%2Fstuart.Rmd="4069DBDF" ~%2Fstuart_package%2Fstuart%2FDESCRIPTION="BEB7232" diff --git a/.Rproj.user/shared/notebooks/paths b/.Rproj.user/shared/notebooks/paths index f22ff9ee32511c218b85f952a5eeeb616fda2ed9..9a8495e543e3ce6c3f75c2c7691e3d8a244cee85 100644 --- a/.Rproj.user/shared/notebooks/paths +++ b/.Rproj.user/shared/notebooks/paths @@ -6,5 +6,7 @@ /Users/mariebourdon/Documents/PhD/stuart_package/stuart/R/mark_prop.R="30FA9E8C" /Users/mariebourdon/Documents/PhD/stuart_package/stuart/R/tab_mark.R="F0B2417" /Users/mariebourdon/Documents/PhD/stuart_package/stuart/R/write_rqtl.R="F07CE16C" +/Users/mariebourdon/Documents/PhD/stuart_package/stuart/man/mark_na.Rd="C8CA0D4F" +/Users/mariebourdon/Documents/PhD/stuart_package/stuart/man/mark_prop.Rd="893B273D" /Users/mariebourdon/Documents/PhD/stuart_package/stuart/vignettes/stuaRt.Rmd="DA6206CB" /Users/mariebourdon/Documents/PhD/stuart_package/stuart/vignettes/stuart.Rmd="54D793B9" diff --git a/NAMESPACE b/NAMESPACE index 31a3ca8a2c0f6a26bf60e6882269def288bec955..5670e8d4856b5206d1efe864125aaeaf90c26a11 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,6 +4,7 @@ export(geno_strains) export(mark_allele) export(mark_estmap) export(mark_match) +export(mark_na) export(mark_poly) export(mark_prop) export(tab_mark) diff --git a/R/mark_na.R b/R/mark_na.R new file mode 100755 index 0000000000000000000000000000000000000000..dab4744df497a05d0192da2e21294c2026f3c574 --- /dev/null +++ b/R/mark_na.R @@ -0,0 +1,31 @@ +#' @title Exclude markers depending on proportion of missing genotypes +#' +#' @description This function uses the dataframe produced by the tab_mark function and fills the "exclude" column for all the markers that present too much missing genotypes. +#' +#' @param tab data frame obtained with tab_mark function. +#' @param na proportion of non-genotyped individuals above which the marker is excluded. +#' +#' @import dplyr +#' @import tidyselect +#' +#' @export +#' + +#### mark_prop #### +## excludes markers depending on proportions of homo/hetorozygous +mark_na <- function(tab,na=0.5){ + #calculate total number of individuals genotyped for each marker + tab <- tab %>% mutate(n_geno = (n_HM1 + n_HM2 + n_HT)) + + #stop with prop of na + #calculate proportion + tab <- tab %>% mutate(p_NA = n_NA/(n_geno+n_NA)) + + #exclusion + tab <- tab %>% + mutate(exclude_na=case_when(p_NA >= na ~ 1, + T ~ 0)) + + tab <- tab %>% select(-c(n_geno,p_NA)) + return(tab) +} diff --git a/man/mark_estmap.Rd b/man/mark_estmap.Rd index dc46ae8c47238b0fe567b264b5142451edfa7b67..bb64a2f9fa4e38c57a1b5c13c15ff0b7108c6b7f 100644 --- a/man/mark_estmap.Rd +++ b/man/mark_estmap.Rd @@ -4,7 +4,7 @@ \alias{mark_estmap} \title{Exclude markers depending on estimated genetic map} \usage{ -mark_estmap(tab, map, annot, d = 20, r = 10, n = 5) +mark_estmap(tab, map, annot, d = 20, r = 5, n = 5) } \arguments{ \item{tab}{data frame obtained with tab_mark function.} @@ -15,7 +15,7 @@ mark_estmap(tab, map, annot, d = 20, r = 10, n = 5) \item{d}{a value to identify groups of markers: d is the difference between calculated and known distance with the previous marker, used to identify a new group of markers. Default is 20.} -\item{r}{a value to identify groups of markers: r is the ratio between calculated and known distance with the previous marker, used to identify a new group of markers. Default is 10.} +\item{r}{a value to identify groups of markers: r is the ratio between calculated and known distance with the previous marker, used to identify a new group of markers. Default is 5.} \item{n}{a value to identify which group of markers must be removed: n is the maximum size of a group of markers: markers with incorrect recombination fraction must be isolated or in very small groups. Default is 5.} } diff --git a/man/mark_na.Rd b/man/mark_na.Rd new file mode 100644 index 0000000000000000000000000000000000000000..fa3b255610e31448dd2fb1dcf8f7d79515658f8c --- /dev/null +++ b/man/mark_na.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mark_na.R +\name{mark_na} +\alias{mark_na} +\title{Exclude markers depending on proportion of missing genotypes} +\usage{ +mark_na(tab, na = 0.5) +} +\arguments{ +\item{tab}{data frame obtained with tab_mark function.} + +\item{na}{proportion of non-genotyped individuals above which the marker is excluded.} +} +\description{ +This function uses the dataframe produced by the tab_mark function and fills the "exclude" column for all the markers that present too much missing genotypes. +} diff --git a/stuart_1.0.5.tar.gz b/stuart_1.0.5.tar.gz index a68b2bc0e4921a95683397dd1984122546f0c303..ffe005a89d01aa88b963831fc718ece038c1e233 100644 Binary files a/stuart_1.0.5.tar.gz and b/stuart_1.0.5.tar.gz differ diff --git a/vignettes/stuart.Rmd b/vignettes/stuart.Rmd index 2590190f62466abaca87ddf81c9f2e110815dcb3..13fb74af52072740a5b27233cafa45ddfb9d85be 100755 --- a/vignettes/stuart.Rmd +++ b/vignettes/stuart.Rmd @@ -103,10 +103,19 @@ summary(stuart_tab) Then we will use the different mark_* functions in order to filter the markers. +### mark_na + +First, we can use `mark_na()` function in order to remove markers with high proportion of missing genotypes in our F2 individuals. + +```{r mark_na} +tab2 <- mark_na(stuart_tab) +``` + + ### mark_match -First, we can use `mark_match()` function. Here, the parental strains were genotyped with the F2 individuals, but it can happen that you use previous genotyping results for the parental strains. `mark_match()` function excludes markers that are in your genotype file but not in the reference genotype dataset. We recomend using this function as the chip used for genotyping may change. +Then, we can use `mark_match()` function. Here, the parental strains were genotyped with the F2 individuals, but it can happen that you use previous genotyping results for the parental strains. `mark_match()` function excludes markers that are in your genotype file but not in the reference genotype dataset. We recomend using this function as the chip used for genotyping may change. ```{r mark_match} tab2 <- mark_match(stuart_tab,ref=strains)