Commit 546d2317 authored by Marie Bourdon's avatar Marie Bourdon
Browse files

mark_allele option parNH

parent 2389d668
\documentclass[letterpaper]{book}
\usepackage[times,inconsolata,hyper]{Rd}
\usepackage{makeidx}
\usepackage[latin1]{inputenc} % @SET ENCODING@
% \usepackage{graphicx} % @USE GRAPHICX@
\makeindex{}
\begin{document}
\chapter*{}
\begin{center}
{\textbf{\huge \R{} documentation}} \par\bigskip{{\Large of \file{stuart}}}
\par\bigskip{\large \today}
\end{center}
\documentclass[letterpaper]{book}
\usepackage[times,inconsolata,hyper]{Rd}
\usepackage{makeidx}
\usepackage[latin1]{inputenc} % @SET ENCODING@
% \usepackage{graphicx} % @USE GRAPHICX@
\makeindex{}
\begin{document}
\chapter*{}
\begin{center}
{\textbf{\huge \R{} documentation}} \par\bigskip{{\Large of \file{stuaRt}}}
\par\bigskip{\large \today}
\end{center}
# #stop if no value for "homo" for F2 cross
# if(cross=="F2" & (is.na(homo)==TRUE | is.na(hetero)==TRUE)){
# stop("Arguments homo and hetero must be specified for F2 crosses")
# }
#
# #stop if no value for "homo" and "hetero" for N2 cross
# if(cross=="N2" & (is.na(homo)==TRUE | is.na(hetero)==TRUE)){
# stop("Arguments homo and hetero must be specified for N2 crosses")
# }
# #exclude markers according to proportion of na
# tab <- tab %>% mutate(exclude_prop=case_when(p_NA > na ~ 1, #exclude markers according to proportion of na
# cross=="F2" & (p_HM1 < homo | p_HM2 < homo | p_HT < hetero) ~ 1, #exclude markers according to proportion of homozygous individuals for F2 cross
# cross=="N2" & (p_HM1 < homo | p_HT < hetero) ~ 1, #exclude markers according to proportion of homozygous and heterozygous individuals for N2 cross
# T ~ 0))
# tab <- tab %>% select(-c(p_HM1,p_HM2,p_HT,p_NA,n_geno))
# return(tab)
}
test_tab <- tab2[1:20,]
tab3 <- mark_prop(test_tab,cross="F2",pval=0.05)
......@@ -510,3 +494,19 @@ devtools::build_vignettes()
devtools::build_vignettes()
devtools::build_vignettes()
devtools::build_vignettes()
demo()
install.packages("devtools")
install.packages("usethat")
install.packages("raportools")
install.packages("rapportools")
install.packages("roxygen2")
install.packages("testthat")
library(stuart)
library(dplyr)
library(stuart)
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
library(dplyr)
library(stuart)
{"chunk_definitions":[{"chunk_id":"csetup_chunk","chunk_label":"setup","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","include":false,"label":"setup"},"row":31,"row_count":1,"visible":true},{"chunk_id":"cvrfzlkmsnd5m","chunk_label":"annot","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"annot"},"row":45,"row_count":1,"visible":true},{"chunk_id":"c0s26bf1pu5ys","chunk_label":"load","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"load"},"row":60,"row_count":1,"visible":true},{"chunk_id":"cxuyx1brxodqv","chunk_label":"strains","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"strains"},"row":73,"row_count":1,"visible":true},{"chunk_id":"ccbr8kr5rmfiz","chunk_label":"no_parent","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"no_parent"},"row":78,"row_count":1,"visible":true},{"chunk_id":"ceekbogybpgbx","chunk_label":"tab_mark","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"tab_mark"},"row":91,"row_count":1,"visible":true},{"chunk_id":"cgfik6q17v45t","chunk_label":"mark_match","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_match"},"row":100,"row_count":1,"visible":true},{"chunk_id":"c4gkr1xb8o1a9","chunk_label":"mark_poly ex","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_poly ex"},"row":109,"row_count":1,"visible":true},{"chunk_id":"cp87d27x07m61","chunk_label":"mark_prop ex","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_prop ex"},"row":186,"row_count":1,"visible":true}],"default_chunk_options":{},"doc_write_time":1623065923,"working_dir":null}
\ No newline at end of file
{"chunk_definitions":[{"chunk_id":"culdjhv7njxmo","chunk_label":"unnamed-chunk-1","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","include":false,"label":"unnamed-chunk-1"},"row":15,"row_count":1,"visible":true},{"chunk_id":"csetup_chunk","chunk_label":"setup","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","include":false,"label":"setup"},"row":31,"row_count":1,"visible":true}],"doc_write_time":1623691782}
\ No newline at end of file
{"chunk_definitions":[{"chunk_id":"culdjhv7njxmo","chunk_label":"unnamed-chunk-1","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","include":false,"label":"unnamed-chunk-3"},"row":15,"row_count":1,"visible":true},{"chunk_id":"csetup_chunk","chunk_label":"setup","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","include":false,"label":"setup"},"row":31,"row_count":1,"visible":true},{"chunk_id":"cvrfzlkmsnd5m","chunk_label":"annot","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"annot"},"row":45,"row_count":1,"visible":true},{"chunk_id":"c0s26bf1pu5ys","chunk_label":"load","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"load"},"row":60,"row_count":1,"visible":true},{"chunk_id":"cxuyx1brxodqv","chunk_label":"strains","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"strains"},"row":73,"row_count":1,"visible":true},{"chunk_id":"ccbr8kr5rmfiz","chunk_label":"no_parent","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"no_parent"},"row":78,"row_count":1,"visible":true},{"chunk_id":"ceekbogybpgbx","chunk_label":"tab_mark","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"tab_mark"},"row":91,"row_count":1,"visible":true},{"chunk_id":"cgfik6q17v45t","chunk_label":"mark_match","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_match"},"row":100,"row_count":1,"visible":true},{"chunk_id":"c4gkr1xb8o1a9","chunk_label":"mark_poly ex","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_poly ex"},"row":109,"row_count":1,"visible":true},{"chunk_id":"cp87d27x07m61","chunk_label":"mark_prop ex","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_prop ex"},"row":116,"row_count":1,"visible":true}],"default_chunk_options":{},"doc_write_time":1623085296,"working_dir":null}
\ No newline at end of file
{"chunk_definitions":[{"chunk_id":"culdjhv7njxmo","chunk_label":"unnamed-chunk-1","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","include":false,"label":"unnamed-chunk-1"},"row":15,"row_count":1,"visible":true},{"chunk_id":"csetup_chunk","chunk_label":"setup","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","include":false,"label":"setup"},"row":31,"row_count":1,"visible":true}],"doc_write_time":1623691782}
\ No newline at end of file
"0","library(dplyr)"
"2","
Attachement du package : ‘dplyr’
"
"2","The following objects are masked from ‘package:stats’:
filter, lag
"
"2","The following objects are masked from ‘package:base’:
intersect, setdiff, setequal, union
"
"0","library(stuart)"
"0","knitr::opts_chunk$set("
"0"," collapse = TRUE,"
"0"," comment = ""#>"""
"0",")"
/Users/mariebourdon/Documents/PhD/stuart_R/stuart/vignettes/stuaRt.Rmd="C3675061"
/Users/mariebourdon/stuart_package/stuart_old/R/mark_prop.R="5087875"
/home/marie/Documents/stuart_package/stuart/R/mark_prop.R="19C6446D"
/home/marie/Documents/stuart_package/stuart/R/write_rqtl.R="D25FAC55"
/home/marie/Documents/stuart_package/stuart/README.Rmd="C395B1B3"
/home/marie/Documents/stuart_package/stuart/README.md="8BBA9900"
/home/marie/Documents/stuart_package/stuart/vignettes/stuaRt.Rmd="007031F6"
doc
Meta
.Rproj.user
/doc/
/Meta/
Package: stuart
Title: stuart
Version: 1.0.0
Version: 1.0.2
Authors@R:
person(given = "Marie",
family = "Bourdon",
......
#' Data frame with miniMUGA genotyping of F2 individuals and parental strains
#'
#' A dataset containing the genotypes of 176 F2 individuals
#' A dataset containing the genotypes of 176 F2 individuals and 4 individuals of 2 laboratory strains
#'
#' @format A data frame with 2002493 observations of 11 variables
#' @format A data frame with 1957993 observations of 11 variables
"genos"
......@@ -10,7 +10,7 @@
#'
#' @export
#'
mark_allele <- function(tab,ref,par1,par2){
mark_allele <- function(tab,ref,par1,par2,parNH=TRUE){
#markers of ref df as characters
ref$marker <- as.character(ref$marker)
......@@ -24,8 +24,11 @@ mark_allele <- function(tab,ref,par1,par2){
ref <- ref %>% select(marker,!!sym(par1),!!sym(par2))
tab <- full_join(tab,ref,by=c("marker"="marker"))
print(parNH)
#function core
tab <- tab %>% mutate(exclude_allele = case_when(is.na(allele_2)==FALSE &
tab <- tab %>% mutate(exclude_allele = case_when(parNH==FALSE &
(!!sym(par1) == "N" | !!sym(par2) == "N" | !!sym(par1) == "H" | !!sym(par2) == "H") ~ 1,
is.na(allele_2)==FALSE &
!!sym(par1) != "N" & !!sym(par2) != "N" & !!sym(par1) != "H" & !!sym(par2) != "H" &
((allele_1!=!!sym(par1) & allele_1!=!!sym(par2)) | (allele_2!=!!sym(par1) & allele_2!=!!sym(par2))) ~ 1,
is.na(allele_2)==FALSE &
......@@ -40,6 +43,8 @@ mark_allele <- function(tab,ref,par1,par2){
T ~ 0)
)
print(tab)
tab <- tab %>% select(-c(!!sym(par1),!!sym(par2)))
return(tab)
......
#' @title Exclude markers depending on proportions of homo/hetorozygous
#'
#' @description uses the dataframe produced by the tab_mark function and fills the "exclude" column for all the markers that present odd proportions of each genotype. You can define these proportions thanks to the arguments of the function.
#' @description This function uses the dataframe produced by the tab_mark function and fills the "exclude" column for all the markers that present odd proportions of each genotype. You can define these proportions thanks to the arguments of the function.
#' @param tab data frame obtained with tab_mark function.
#' @param cross F2 or N2.
#' @param homo proportion of homozygous individuals under which the marker is excluded. Will apply on both homozygous genotypes for a F2, but only on one for N2.
......@@ -15,17 +15,12 @@
#### mark_prop ####
## excludes markers depending on proportions of homo/hetorozygous
mark_prop <- function(tab,cross,homo=NA,hetero=NA,pval=NA,na=0.5){
#calculate total number of individuals genotyped for each marker
tab <- tab %>% mutate(n_geno = (n_HM1 + n_HM2 + n_HT))
#calculate proportion of each genotype
tab <- tab %>% mutate(p_HM1 = n_HM1/n_geno)
tab <- tab %>% mutate(p_HM2 = n_HM2/n_geno)
tab <- tab %>% mutate(p_HT = n_HT/n_geno)
tab <- tab %>% mutate(p_NA = n_NA/(n_geno+n_NA))
#stock colnames to join
names <- colnames(tab)
print(names)
#calculate total number of individuals genotyped for each marker
tab <- tab %>% mutate(n_geno = (n_HM1 + n_HM2 + n_HT))
#stop if cross != "F2" or "N2"
......@@ -41,12 +36,21 @@ mark_prop <- function(tab,cross,homo=NA,hetero=NA,pval=NA,na=0.5){
#stop with prop of na
#calculate proportion
tab <- tab %>% mutate(p_NA = n_NA/(n_geno+n_NA))
tab <- tab %>%
mutate(exclude_prop=case_when(p_NA > na ~ 1,
T ~ 0))
#stop with prop of homo/hetero
#exclude with prop of homo/hetero
if(is.na(pval)==TRUE){
#calculate proportion of each genotype
tab <- tab %>% mutate(p_HM1 = n_HM1/n_geno)
tab <- tab %>% mutate(p_HM2 = n_HM2/n_geno)
tab <- tab %>% mutate(p_HT = n_HT/n_geno)
tab <- tab %>%
mutate(exclude_prop=case_when(p_NA > na ~ 1,
cross=="F2" & (p_HM1 < homo | p_HM2 < homo | p_HT < hetero) ~ 1,
......@@ -55,19 +59,19 @@ mark_prop <- function(tab,cross,homo=NA,hetero=NA,pval=NA,na=0.5){
))
}
#stop with pval chisq.test
#exclude with pval chisq.test
## NEED TO ADD THIS FILTER IF CROSS = N2
if(is.na(pval)==FALSE){
tab <- tab %>% filter(p_NA != 1) %>% rowwise() %>%
mutate(.,chi_pval = tibble(n_HM1,n_HM2,n_HT) %>%
chisq.test(p=c(0.25,0.25,0.5)) %>% .$p.value) %>%
full_join(.,tab,by=names)
full_join(.,tab,by=all_of(names))
tab <- tab %>% mutate(exclude_prop=case_when(chi_pval < pval ~ 1,
T ~ exclude_prop))
}
tab <- tab %>% select(names)
return(tab)
print(tab)
tab <- tab %>% select(all_of(names),exclude_prop)
print(tab)
}
......@@ -20,7 +20,7 @@
#'
#### write_rqtl ####
## write data frame in rqtl format (csv), if path != NA writes the file in the path indicated
write_rqtl <- function(geno,pheno,tab,ref,par1,par2,prefix,pos,path=NA){
write_rqtl <- function(geno,pheno,tab,ref,par1,par2,par_N=TRUE,prefix,pos,path=NA){
#rename df columns
geno <- geno %>% rename("marker"=1,
"id"=2,
......@@ -95,7 +95,7 @@ write_rqtl <- function(geno,pheno,tab,ref,par1,par2,prefix,pos,path=NA){
!!sym(par1)%in%c("H","N") ~ "0",
###homozygous 2
(!!sym(par1)%in%c("H","N") | !!sym(par2)%in%c("H","N")) &
!!sym(par2)%in%c("H","N") ~ "2",
!!sym(par2)%in%c("H","N") ~ "2"
)
)
......
......@@ -22,11 +22,19 @@ stuart is a R package used to analyze whole genome genotyping results of animals
## Installation
You can install the released version of stuart from GitLab
You can install the released version of stuart from GitLab, by dowloading the latest tar.gz file and installing it with:
``` r
```{r}
install.packages(path_to/stuart_X.Y.Z.tar.gz, repos = NULL, type="source")
```
Or directly in R with:
```{r}
devtools::install_gitlab(repo="mouselab/stuart",host="gitlab.pasteur.fr")
```
## Example
To see an example of the use of stuart package with miniMUGA array, consult the package vignette.
......@@ -5,12 +5,12 @@
\alias{genos}
\title{Data frame with miniMUGA genotyping of F2 individuals and parental strains}
\format{
A data frame with 2002493 observations of 11 variables
A data frame with 1957993 observations of 11 variables
}
\usage{
genos
}
\description{
A dataset containing the genotypes of 176 F2 individuals
A dataset containing the genotypes of 176 F2 individuals and 4 individuals of 2 laboratory strains
}
\keyword{datasets}
......@@ -4,7 +4,7 @@
\alias{mark_allele}
\title{Exclude markers that have different alleles in the individuals of the cross and in parental strains}
\usage{
mark_allele(tab, ref, par1, par2)
mark_allele(tab, ref, par1, par2, parNH = TRUE)
}
\arguments{
\item{tab}{data frame obtained with tab_mark function}
......
......@@ -4,7 +4,7 @@
\alias{mark_prop}
\title{Exclude markers depending on proportions of homo/hetorozygous}
\usage{
mark_prop(tab, cross, homo = NA, hetero = NA, na = 0.5)
mark_prop(tab, cross, homo = NA, hetero = NA, pval = NA, na = 0.5)
}
\arguments{
\item{tab}{data frame obtained with tab_mark function.}
......@@ -18,5 +18,5 @@ mark_prop(tab, cross, homo = NA, hetero = NA, na = 0.5)
\item{na}{proportion of non-genotyped individuals above which the marker is excluded.}
}
\description{
uses the dataframe produced by the tab_mark function and fills the "exclude" column for all the markers that present odd proportions of each genotype. You can define these proportions thanks to the arguments of the function.
This function uses the dataframe produced by the tab_mark function and fills the "exclude" column for all the markers that present odd proportions of each genotype. You can define these proportions thanks to the arguments of the function.
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment