diff --git a/.Rproj.user/9DAE6990/sources/prop/1416C0B5 b/.Rproj.user/9DAE6990/sources/prop/1416C0B5 index a6d7a30c22455e70e2ae615bad99a076580ac071..a3b2e9f1972ab5f437c3782dab851521fe0d6f46 100644 --- a/.Rproj.user/9DAE6990/sources/prop/1416C0B5 +++ b/.Rproj.user/9DAE6990/sources/prop/1416C0B5 @@ -1,4 +1,4 @@ { - "cursorPosition" : "24,47", - "scrollLine" : "2" + "cursorPosition" : "23,55", + "scrollLine" : "0" } \ No newline at end of file diff --git a/.Rproj.user/9DAE6990/sources/prop/A609F054 b/.Rproj.user/9DAE6990/sources/prop/A609F054 index 7a73a41bfdf76d6f793007240d80983a52f15f97..5f0afe0648b6cd7813c3d9cd265fd84d876fa506 100644 --- a/.Rproj.user/9DAE6990/sources/prop/A609F054 +++ b/.Rproj.user/9DAE6990/sources/prop/A609F054 @@ -1,2 +1,4 @@ { + "cursorPosition" : "2,104", + "scrollLine" : "0" } \ No newline at end of file diff --git a/.Rproj.user/9DAE6990/sources/prop/D602FFE4 b/.Rproj.user/9DAE6990/sources/prop/D602FFE4 index e5eb4991d110bdccead8e28e67be8913da57431f..082b02e9e6bf90cc948fb06aced67605250d702c 100644 --- a/.Rproj.user/9DAE6990/sources/prop/D602FFE4 +++ b/.Rproj.user/9DAE6990/sources/prop/D602FFE4 @@ -1,5 +1,5 @@ { - "cursorPosition" : "106,18", + "cursorPosition" : "127,25", "last_setup_crc32" : "39B546A65bfca283", - "scrollLine" : "90" + "scrollLine" : "119" } \ No newline at end of file diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/42D37312 b/.Rproj.user/9DAE6990/sources/s-39B546A6/42D37312 index 1182daeb619e55806e8083e2f024997371e73f0c..583c1f70b07438033c80b8e1b996e4db2d8d5b17 100644 --- a/.Rproj.user/9DAE6990/sources/s-39B546A6/42D37312 +++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/42D37312 @@ -5,13 +5,15 @@ "dirty" : false, "encoding" : "UTF-8", "folds" : "", - "hash" : "387034705", + "hash" : "0", "id" : "42D37312", "lastKnownWriteTime" : 1622465534, "last_content_update" : 1622465534, "path" : "~/stuart_package/stuart/R/mark_prop.R", "project_path" : "R/mark_prop.R", "properties" : { + "cursorPosition" : "2,104", + "scrollLine" : "0" }, "read_only" : false, "read_only_alternatives" : [ diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/96AB3736 b/.Rproj.user/9DAE6990/sources/s-39B546A6/96AB3736 index ad317c9a6404f708108765fc1086dcf5b8cb6ac1..bf870f4dc334ee3de6a448d1ad982d8d44936526 100644 --- a/.Rproj.user/9DAE6990/sources/s-39B546A6/96AB3736 +++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/96AB3736 @@ -5,16 +5,16 @@ "dirty" : false, "encoding" : "UTF-8", "folds" : "", - "hash" : "0", + "hash" : "3134659970", "id" : "96AB3736", - "lastKnownWriteTime" : 1622645218, - "last_content_update" : 1622645218034, + "lastKnownWriteTime" : 1622647884, + "last_content_update" : 1622647884538, "path" : "~/stuart_package/stuart/vignettes/stuaRt.Rmd", "project_path" : "vignettes/stuaRt.Rmd", "properties" : { - "cursorPosition" : "106,18", + "cursorPosition" : "127,25", "last_setup_crc32" : "39B546A65bfca283", - "scrollLine" : "90" + "scrollLine" : "119" }, "read_only" : false, "read_only_alternatives" : [ diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/96AB3736-contents b/.Rproj.user/9DAE6990/sources/s-39B546A6/96AB3736-contents index e80cec19543afb20b7ca13c7553a96563d2a8641..2dfb70edac53e76fd1e666150ea7dd15d9442f65 100644 --- a/.Rproj.user/9DAE6990/sources/s-39B546A6/96AB3736-contents +++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/96AB3736-contents @@ -111,21 +111,21 @@ head(tab2) The `mark_prop()` function can be used to filter markers depending on the proportion of each genotype. Here, we have a F2 so we can use the "homo" argument in order to filter depending on the proportion of both homozygous genotype. If we have a N2, we can filter with the proportion of homozygous individuals with the "homo" argument and of heterozygous individuals with the hetero" argument. Moreover, this function allows to filter marker depending on the proportion on non genotyped animals. By defaults, markers for which more than 50% of individuals were not genotyped. -```{r mark_prop ex,eval=F} +```{r mark_prop ex} tab2 <- mark_prop(tab2,cross="F2",homo=0.1,hetero=0.1) head(tab2) ``` Last, we can use the `mark_allele()` function. This very helpful function excludes markers for which the alleles found in the F2/N2 individuals do not correspond to the alleles found in the parental strains. For example, if for a marker is not polymorphic in the parental strains but we found two alleles in the F2/N2 individuals, it will be excluded. -```{r mark_allele,eval=F} +```{r mark_allele} tab2 <- mark_allele(tab=tab2,ref=strains,par1="parent1",par2="parent2") tab2 %>% arrange(desc(exclude_allele)) %>% head() ``` Indeed, we can see that the markers excluded with `mark_allele()` have different alleles in the parental strains. -```{r mark_allele-strains,eval=F} +```{r mark_allele-strains} strains %>% filter(marker %in% c("gJAX00038569","gJAX00425031","gUNC12245354","gUNC15530876","gUNC21555204","gUNC21596600")) %>% arrange(marker) %>% select(marker,parent1,parent2) ``` diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/B86C81D8 b/.Rproj.user/9DAE6990/sources/s-39B546A6/B86C81D8 index ab6c927aeb68ad6a84ea9ff826873d7ecbe2d39d..66bd8e587e07c451e45a10606ba2cf11b8bd1fa6 100644 --- a/.Rproj.user/9DAE6990/sources/s-39B546A6/B86C81D8 +++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/B86C81D8 @@ -5,15 +5,15 @@ "dirty" : false, "encoding" : "UTF-8", "folds" : "", - "hash" : "2531250629", + "hash" : "0", "id" : "B86C81D8", - "lastKnownWriteTime" : 1622462353, - "last_content_update" : 1622462353, + "lastKnownWriteTime" : 1622646597, + "last_content_update" : 1622646597951, "path" : "~/stuart_package/stuart/R/mark_allele.R", "project_path" : "R/mark_allele.R", "properties" : { - "cursorPosition" : "24,47", - "scrollLine" : "2" + "cursorPosition" : "23,55", + "scrollLine" : "0" }, "read_only" : false, "read_only_alternatives" : [ diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/B86C81D8-contents b/.Rproj.user/9DAE6990/sources/s-39B546A6/B86C81D8-contents index b679916993328de1dfd7b34b3325bf8066424b8e..f83d3ac4ea31d973b58101efc2b7f0862fb8714a 100644 --- a/.Rproj.user/9DAE6990/sources/s-39B546A6/B86C81D8-contents +++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/B86C81D8-contents @@ -22,21 +22,21 @@ mark_allele <- function(tab,ref,par1,par2){ #join tab and ref genotypes ref <- ref %>% select(marker,!!sym(par1),!!sym(par2)) - tab <- full_join(tab,ref,by=c("SNP.Name"="marker")) + tab <- full_join(tab,ref,by=c("marker"="marker")) #function core - tab <- tab %>% mutate(exclude_allele = case_when(is.na(Allele_2)==FALSE & + tab <- tab %>% mutate(exclude_allele = case_when(is.na(allele_2)==FALSE & !!sym(par1) != "N" & !!sym(par2) != "N" & !!sym(par1) != "H" & !!sym(par2) != "H" & - ((Allele_1!=!!sym(par1) & Allele_1!=!!sym(par2)) | (Allele_2!=!!sym(par1) & Allele_2!=!!sym(par2))) ~ 1, - is.na(Allele_2)==FALSE & + ((allele_1!=!!sym(par1) & allele_1!=!!sym(par2)) | (allele_2!=!!sym(par1) & allele_2!=!!sym(par2))) ~ 1, + is.na(allele_2)==FALSE & (!!sym(par1)=="N" | !!sym(par2)=="N" | !!sym(par1)=="H" | !!sym(par2)=="H") & - ((Allele_1!=!!sym(par1) & Allele_1!=!!sym(par2)) & (Allele_2!=!!sym(par1) & Allele_2!=!!sym(par2))) ~ 1, - is.na(Allele_2)==TRUE & + ((allele_1!=!!sym(par1) & allele_1!=!!sym(par2)) & (allele_2!=!!sym(par1) & allele_2!=!!sym(par2))) ~ 1, + is.na(allele_2)==TRUE & !!sym(par1) != "N" & !!sym(par2) != "N" & !!sym(par1) != "H" & !!sym(par2) != "H" & - (Allele_1!=!!sym(par1) | Allele_1!=!!sym(par2)) ~ 1, - is.na(Allele_2)==TRUE & + (allele_1!=!!sym(par1) | allele_1!=!!sym(par2)) ~ 1, + is.na(allele_2)==TRUE & (!!sym(par1)=="N" | !!sym(par2)=="N" | !!sym(par1)=="H" | !!sym(par2)=="H") & - Allele_1!=!!sym(par1) & Allele_1!=!!sym(par2) ~ 1, + allele_1!=!!sym(par1) & allele_1!=!!sym(par2) ~ 1, T ~ 0) ) diff --git a/R/mark_allele.R b/R/mark_allele.R index b679916993328de1dfd7b34b3325bf8066424b8e..f83d3ac4ea31d973b58101efc2b7f0862fb8714a 100755 --- a/R/mark_allele.R +++ b/R/mark_allele.R @@ -22,21 +22,21 @@ mark_allele <- function(tab,ref,par1,par2){ #join tab and ref genotypes ref <- ref %>% select(marker,!!sym(par1),!!sym(par2)) - tab <- full_join(tab,ref,by=c("SNP.Name"="marker")) + tab <- full_join(tab,ref,by=c("marker"="marker")) #function core - tab <- tab %>% mutate(exclude_allele = case_when(is.na(Allele_2)==FALSE & + tab <- tab %>% mutate(exclude_allele = case_when(is.na(allele_2)==FALSE & !!sym(par1) != "N" & !!sym(par2) != "N" & !!sym(par1) != "H" & !!sym(par2) != "H" & - ((Allele_1!=!!sym(par1) & Allele_1!=!!sym(par2)) | (Allele_2!=!!sym(par1) & Allele_2!=!!sym(par2))) ~ 1, - is.na(Allele_2)==FALSE & + ((allele_1!=!!sym(par1) & allele_1!=!!sym(par2)) | (allele_2!=!!sym(par1) & allele_2!=!!sym(par2))) ~ 1, + is.na(allele_2)==FALSE & (!!sym(par1)=="N" | !!sym(par2)=="N" | !!sym(par1)=="H" | !!sym(par2)=="H") & - ((Allele_1!=!!sym(par1) & Allele_1!=!!sym(par2)) & (Allele_2!=!!sym(par1) & Allele_2!=!!sym(par2))) ~ 1, - is.na(Allele_2)==TRUE & + ((allele_1!=!!sym(par1) & allele_1!=!!sym(par2)) & (allele_2!=!!sym(par1) & allele_2!=!!sym(par2))) ~ 1, + is.na(allele_2)==TRUE & !!sym(par1) != "N" & !!sym(par2) != "N" & !!sym(par1) != "H" & !!sym(par2) != "H" & - (Allele_1!=!!sym(par1) | Allele_1!=!!sym(par2)) ~ 1, - is.na(Allele_2)==TRUE & + (allele_1!=!!sym(par1) | allele_1!=!!sym(par2)) ~ 1, + is.na(allele_2)==TRUE & (!!sym(par1)=="N" | !!sym(par2)=="N" | !!sym(par1)=="H" | !!sym(par2)=="H") & - Allele_1!=!!sym(par1) & Allele_1!=!!sym(par2) ~ 1, + allele_1!=!!sym(par1) & allele_1!=!!sym(par2) ~ 1, T ~ 0) ) diff --git a/stuart_0.1.0.tar.gz b/stuart_0.1.0.tar.gz index 37a19187856fd1245069768322f1ef5defff879f..c0f464039673569fe02a33fa8028029a57a5ce7b 100644 Binary files a/stuart_0.1.0.tar.gz and b/stuart_0.1.0.tar.gz differ diff --git a/vignettes/stuaRt.Rmd b/vignettes/stuaRt.Rmd index e80cec19543afb20b7ca13c7553a96563d2a8641..2dfb70edac53e76fd1e666150ea7dd15d9442f65 100755 --- a/vignettes/stuaRt.Rmd +++ b/vignettes/stuaRt.Rmd @@ -111,21 +111,21 @@ head(tab2) The `mark_prop()` function can be used to filter markers depending on the proportion of each genotype. Here, we have a F2 so we can use the "homo" argument in order to filter depending on the proportion of both homozygous genotype. If we have a N2, we can filter with the proportion of homozygous individuals with the "homo" argument and of heterozygous individuals with the hetero" argument. Moreover, this function allows to filter marker depending on the proportion on non genotyped animals. By defaults, markers for which more than 50% of individuals were not genotyped. -```{r mark_prop ex,eval=F} +```{r mark_prop ex} tab2 <- mark_prop(tab2,cross="F2",homo=0.1,hetero=0.1) head(tab2) ``` Last, we can use the `mark_allele()` function. This very helpful function excludes markers for which the alleles found in the F2/N2 individuals do not correspond to the alleles found in the parental strains. For example, if for a marker is not polymorphic in the parental strains but we found two alleles in the F2/N2 individuals, it will be excluded. -```{r mark_allele,eval=F} +```{r mark_allele} tab2 <- mark_allele(tab=tab2,ref=strains,par1="parent1",par2="parent2") tab2 %>% arrange(desc(exclude_allele)) %>% head() ``` Indeed, we can see that the markers excluded with `mark_allele()` have different alleles in the parental strains. -```{r mark_allele-strains,eval=F} +```{r mark_allele-strains} strains %>% filter(marker %in% c("gJAX00038569","gJAX00425031","gUNC12245354","gUNC15530876","gUNC21555204","gUNC21596600")) %>% arrange(marker) %>% select(marker,parent1,parent2) ```