Commit 69e64268 authored by Marie Bourdon's avatar Marie Bourdon
Browse files

add modifs before release

parent 0fdd0170
tab <- tab %>% mutate(n_geno = (n_HM1 + n_HM2 + n_HT))
#calculate proportion of each genotype
tab <- tab %>% mutate(p_HM1 = n_HM1/n_geno)
tab <- tab %>% mutate(p_HM2 = n_HM2/n_geno)
tab <- tab %>% mutate(p_HT = n_HT/n_geno)
tab <- tab %>% mutate(p_NA = n_NA/(n_geno+n_NA))
#stock colnames to join
names <- colnames(tab)
#stop if cross != "F2" or "N2"
if(!cross %in% c("F2","N2")){
stop("Cross must be F2 or N2")
}
#stop of homo&hetero or pval not specified
if((is.na(homo)==TRUE | is.na(hetero)==TRUE) & is.na(pval)==TRUE){
stop("Arguments homo and hetero or argument pval must be specified")
}
#stop with prop of na
tab <- tab %>%
mutate(exclude_prop=case_when(p_NA > na ~ 1,
T ~ 0))
#exclude with prop of homo/hetero
if(is.na(pval)==TRUE){
tab <- tab %>%
mutate(exclude_prop=case_when(p_NA > na ~ 1,
cross=="F2" & (p_HM1 < homo | p_HM2 < homo | p_HT < hetero) ~ 1,
cross=="N2" & (p_HM1 < homo | p_HT < hetero) ~ 1,
T ~ exclude_prop
))
}
#exclude with pval chisq.test
## NEED TO ADD THIS FILTER IF CROSS = N2
if(is.na(pval)==FALSE){
tab <- tab %>% filter(p_NA != 1) %>% rowwise() %>%
mutate(.,chi_pval = tibble(n_HM1,n_HM2,n_HT) %>%
chisq.test(p=c(0.25,0.25,0.5)) %>% .$p.value) %>%
full_join(.,tab,by=all_of(names))
tab <- tab %>% mutate(exclude_prop=case_when(chi_pval < pval ~ 1,
T ~ exclude_prop))
}
print(tab)
tab <- tab %>% select(all_of(names))
print(tab)
}
tab2 <- mark_prop(tab2,cross="F2",homo=0.1,hetero=0.1)
head(tab2)
mark_prop <- function(tab,cross,homo=NA,hetero=NA,pval=NA,na=0.5){
#calculate total number of individuals genotyped for each marker
tab <- tab %>% mutate(n_geno = (n_HM1 + n_HM2 + n_HT))
#calculate proportion of each genotype
tab <- tab %>% mutate(p_HM1 = n_HM1/n_geno)
tab <- tab %>% mutate(p_HM2 = n_HM2/n_geno)
tab <- tab %>% mutate(p_HT = n_HT/n_geno)
tab <- tab %>% mutate(p_NA = n_NA/(n_geno+n_NA))
#stock colnames to join
names <- colnames(tab)
#stop if cross != "F2" or "N2"
if(!cross %in% c("F2","N2")){
stop("Cross must be F2 or N2")
}
#stop of homo&hetero or pval not specified
if((is.na(homo)==TRUE | is.na(hetero)==TRUE) & is.na(pval)==TRUE){
stop("Arguments homo and hetero or argument pval must be specified")
}
#stop with prop of na
tab <- tab %>%
mutate(exclude_prop=case_when(p_NA > na ~ 1,
T ~ 0))
#exclude with prop of homo/hetero
if(is.na(pval)==TRUE){
tab <- tab %>%
mutate(exclude_prop=case_when(p_NA > na ~ 1,
cross=="F2" & (p_HM1 < homo | p_HM2 < homo | p_HT < hetero) ~ 1,
cross=="N2" & (p_HM1 < homo | p_HT < hetero) ~ 1,
T ~ exclude_prop
))
}
#exclude with pval chisq.test
## NEED TO ADD THIS FILTER IF CROSS = N2
if(is.na(pval)==FALSE){
tab <- tab %>% filter(p_NA != 1) %>% rowwise() %>%
mutate(.,chi_pval = tibble(n_HM1,n_HM2,n_HT) %>%
chisq.test(p=c(0.25,0.25,0.5)) %>% .$p.value) %>%
full_join(.,tab,by=all_of(names))
tab <- tab %>% mutate(exclude_prop=case_when(chi_pval < pval ~ 1,
T ~ exclude_prop))
}
print(tab)
tab <- tab %>% select(all_of(names),exclude_prop)
print(tab)
}
tab2 <- mark_prop(tab2,cross="F2",homo=0.1,hetero=0.1)
mark_prop <- function(tab,cross,homo=NA,hetero=NA,pval=NA,na=0.5){
#calculate total number of individuals genotyped for each marker
tab <- tab %>% mutate(n_geno = (n_HM1 + n_HM2 + n_HT))
#stock colnames to join
names <- colnames(tab)
#stop if cross != "F2" or "N2"
if(!cross %in% c("F2","N2")){
stop("Cross must be F2 or N2")
}
#stop of homo&hetero or pval not specified
if((is.na(homo)==TRUE | is.na(hetero)==TRUE) & is.na(pval)==TRUE){
stop("Arguments homo and hetero or argument pval must be specified")
}
#stop with prop of na
tab <- tab %>%
mutate(exclude_prop=case_when(p_NA > na ~ 1,
T ~ 0))
#exclude with prop of homo/hetero
if(is.na(pval)==TRUE){
#calculate proportion of each genotype
tab <- tab %>% mutate(p_HM1 = n_HM1/n_geno)
tab <- tab %>% mutate(p_HM2 = n_HM2/n_geno)
tab <- tab %>% mutate(p_HT = n_HT/n_geno)
tab <- tab %>% mutate(p_NA = n_NA/(n_geno+n_NA))
tab <- tab %>%
mutate(exclude_prop=case_when(p_NA > na ~ 1,
cross=="F2" & (p_HM1 < homo | p_HM2 < homo | p_HT < hetero) ~ 1,
cross=="N2" & (p_HM1 < homo | p_HT < hetero) ~ 1,
T ~ exclude_prop
))
}
#exclude with pval chisq.test
## NEED TO ADD THIS FILTER IF CROSS = N2
if(is.na(pval)==FALSE){
tab <- tab %>% filter(p_NA != 1) %>% rowwise() %>%
mutate(.,chi_pval = tibble(n_HM1,n_HM2,n_HT) %>%
chisq.test(p=c(0.25,0.25,0.5)) %>% .$p.value) %>%
full_join(.,tab,by=all_of(names))
tab <- tab %>% mutate(exclude_prop=case_when(chi_pval < pval ~ 1,
T ~ exclude_prop))
}
print(tab)
tab <- tab %>% select(all_of(names),exclude_prop)
print(tab)
}
tab2 <- mark_prop(tab2,cross="F2",homo=0.1,hetero=0.1)
mark_prop <- function(tab,cross,homo=NA,hetero=NA,pval=NA,na=0.5){
#calculate total number of individuals genotyped for each marker
tab <- tab %>% mutate(n_geno = (n_HM1 + n_HM2 + n_HT))
#stock colnames to join
names <- colnames(tab)
print(names)
#stop if cross != "F2" or "N2"
if(!cross %in% c("F2","N2")){
stop("Cross must be F2 or N2")
}
#stop of homo&hetero or pval not specified
if((is.na(homo)==TRUE | is.na(hetero)==TRUE) & is.na(pval)==TRUE){
stop("Arguments homo and hetero or argument pval must be specified")
}
#stop with prop of na
tab <- tab %>%
mutate(exclude_prop=case_when(p_NA > na ~ 1,
T ~ 0))
#exclude with prop of homo/hetero
if(is.na(pval)==TRUE){
#calculate proportion of each genotype
tab <- tab %>% mutate(p_HM1 = n_HM1/n_geno)
tab <- tab %>% mutate(p_HM2 = n_HM2/n_geno)
tab <- tab %>% mutate(p_HT = n_HT/n_geno)
tab <- tab %>% mutate(p_NA = n_NA/(n_geno+n_NA))
tab <- tab %>%
mutate(exclude_prop=case_when(p_NA > na ~ 1,
cross=="F2" & (p_HM1 < homo | p_HM2 < homo | p_HT < hetero) ~ 1,
cross=="N2" & (p_HM1 < homo | p_HT < hetero) ~ 1,
T ~ exclude_prop
))
}
#exclude with pval chisq.test
## NEED TO ADD THIS FILTER IF CROSS = N2
if(is.na(pval)==FALSE){
tab <- tab %>% filter(p_NA != 1) %>% rowwise() %>%
mutate(.,chi_pval = tibble(n_HM1,n_HM2,n_HT) %>%
chisq.test(p=c(0.25,0.25,0.5)) %>% .$p.value) %>%
full_join(.,tab,by=all_of(names))
tab <- tab %>% mutate(exclude_prop=case_when(chi_pval < pval ~ 1,
T ~ exclude_prop))
}
print(tab)
tab <- tab %>% select(all_of(names),exclude_prop)
print(tab)
}
tab2 <- mark_prop(tab2,cross="F2",homo=0.1,hetero=0.1)
tab2 <- mark_match(stuart_tab,ref=strains)
tab2 %>% filter(exclude_match==1)
tab2 <- mark_poly(tab2)
head(tab2)
mark_prop <- function(tab,cross,homo=NA,hetero=NA,pval=NA,na=0.5){
#calculate total number of individuals genotyped for each marker
tab <- tab %>% mutate(n_geno = (n_HM1 + n_HM2 + n_HT))
#stock colnames to join
names <- colnames(tab)
print(names)
#stop if cross != "F2" or "N2"
if(!cross %in% c("F2","N2")){
stop("Cross must be F2 or N2")
}
#stop of homo&hetero or pval not specified
if((is.na(homo)==TRUE | is.na(hetero)==TRUE) & is.na(pval)==TRUE){
stop("Arguments homo and hetero or argument pval must be specified")
}
#stop with prop of na
tab <- tab %>%
mutate(exclude_prop=case_when(p_NA > na ~ 1,
T ~ 0))
#exclude with prop of homo/hetero
if(is.na(pval)==TRUE){
#calculate proportion of each genotype
tab <- tab %>% mutate(p_HM1 = n_HM1/n_geno)
tab <- tab %>% mutate(p_HM2 = n_HM2/n_geno)
tab <- tab %>% mutate(p_HT = n_HT/n_geno)
tab <- tab %>% mutate(p_NA = n_NA/(n_geno+n_NA))
tab <- tab %>%
mutate(exclude_prop=case_when(p_NA > na ~ 1,
cross=="F2" & (p_HM1 < homo | p_HM2 < homo | p_HT < hetero) ~ 1,
cross=="N2" & (p_HM1 < homo | p_HT < hetero) ~ 1,
T ~ exclude_prop
))
}
#exclude with pval chisq.test
## NEED TO ADD THIS FILTER IF CROSS = N2
if(is.na(pval)==FALSE){
tab <- tab %>% filter(p_NA != 1) %>% rowwise() %>%
mutate(.,chi_pval = tibble(n_HM1,n_HM2,n_HT) %>%
chisq.test(p=c(0.25,0.25,0.5)) %>% .$p.value) %>%
full_join(.,tab,by=all_of(names))
tab <- tab %>% mutate(exclude_prop=case_when(chi_pval < pval ~ 1,
T ~ exclude_prop))
}
print(tab)
tab <- tab %>% select(all_of(names),exclude_prop)
print(tab)
}
tab2 <- mark_prop(tab2,cross="F2",homo=0.1,hetero=0.1)
......@@ -510,3 +278,235 @@ View(genos)
View(genos)
View(genos)
View(test)
library(dplyr)
library(stuart)
test <- tibble(x=c("A","T"),y=c("A","A"),z=c("A","A"))
View(test)
test <- test %>% mutate(geno=x)
View(test)
test %>% mutate(geno = ifelse(geno==y,geno,"N"))
test <- test %>% mutate(geno = ifelse(geno==y,geno,"N"))
test <- test %>% mutate(geno = ifelse(geno==z,geno,"N"))
View(test)
test <- tibble(x=c("A","T"),y=c("A","A"),z=c("A","N"))
test <- tibble(x=c("A","T"),y=c("A","A"),z=c("A","N"))
test <- test %>% mutate(geno=x)
print(test)
test <- test %>% mutate(geno = ifelse(geno==y,geno,"N"))
print(test)
test <- test %>% mutate(geno = ifelse(geno==z,geno,"N"))
print(test)
geno_strains <- function(ref,geno,par1,par2,name1,name2){
#rename df columns
geno <- geno %>% rename("marker"=1,
"id"=2,
"allele_1"=3,
"allele_2"=4)
#recode genotypes from 2 alleles to 1
geno <- geno %>% mutate_all(as.character)
geno <- geno %>% filter(id %in% c(par1,par2))
geno <- geno %>% mutate(Geno=case_when(allele_1 == "-" | allele_2 == "-" ~ "N",
allele_1 == allele_2 ~ allele_1,
allele_1 %in% c("A","T","G","C") & allele_2 %in% c("A","T","G","C") ~ "H"))
geno <- geno %>% select(marker,id,Geno) %>% pivot_wider(names_from = id, values_from = Geno)
#create consensus
geno <- geno %>% mutate(parent1 = !!sym(par1[1]))
if(length(par1)!=1){
for(i in 2:length(par1)){
geno <- geno %>% mutate(parent1 = ifelse(parent1==!!sym(par1[i]),parent1,"N"))
}
}
geno <- geno %>% mutate(parent2 = !!sym(par2[1]))
if(length(par2)!=1){
for(i in 2:length(par2)){
geno <- geno %>% mutate(parent2 = ifelse(parent2==!!sym(par2[i]),parent2,"N"))
}
}
geno <- geno %>% select(marker,parent1,parent2)
colnames(geno) <- c("marker",name1,name2)
#merge with ref file
ref <- full_join(ref,geno,by=c("marker"="marker"))
return(ref)
}
data(genos)
summary(genos)
data(phenos)
summary(phenos)
annot_mini <- read.csv(url("https://raw.githubusercontent.com/kbroman/MUGAarrays/master/UWisc/mini_uwisc_v2.csv"))
data(stuart_tab)
summary(stuart_tab)
View(annot_mini)
ref <- tibble(marker = c("mark1","mark2","mark3","mark4","mark5","mark6"),
chr = c("1","1","2","2","3","X")
)
View(genos)
geno <- tibble(SNP.Name = c("mark1","mark2","mark3","mark4","mark5","mark6",
"mark1","mark2","mark3","mark4","mark5","mark6",
"mark1","mark2","mark3","mark4","mark5","mark6",
"mark1","mark2","mark3","mark4","mark5","mark6",
"mark1","mark2","mark3","mark4","mark5","mark6",
"mark1","mark2","mark3","mark4","mark5","mark6"),
Sample.ID = c("par1a","par1a","par1a","par1a","par1a","par1a",
"par1b","par1b","par1b","par1b","par1b","par1b",
"par1c","par1c","par1c","par1c","par1c","par1c",
"par2a","par2a","par2a","par2a","par2a","par2a",
"par2b","par2b","par2b","par2b","par2b","par2b",
"par2c","par2c","par2c","par2c","par2c","par2c"))
View(geno)
geno <- tibble(SNP.Name = c("mark1","mark2","mark3","mark4","mark5","mark6",
"mark1","mark2","mark3","mark4","mark5","mark6",
"mark1","mark2","mark3","mark4","mark5","mark6",
"mark1","mark2","mark3","mark4","mark5","mark6",
"mark1","mark2","mark3","mark4","mark5","mark6",
"mark1","mark2","mark3","mark4","mark5","mark6"),
Sample.ID = c("par1a","par1a","par1a","par1a","par1a","par1a",
"par1b","par1b","par1b","par1b","par1b","par1b",
"par1c","par1c","par1c","par1c","par1c","par1c",
"par2a","par2a","par2a","par2a","par2a","par2a",
"par2b","par2b","par2b","par2b","par2b","par2b",
"par2c","par2c","par2c","par2c","par2c","par2c"),
Allele1...Forward = c("A","A","A","A","A","A",
"A","A","A","A","T","T",
"A","A","A","N","T","-",
"A","A","A","A","A","A",
"A","A","A","A","T","T",
"A","A","A","N","T","-"),
Allele2...Forward = c("A","A","A","A","A","A",
"A","A","A","A","T","T",
"A","A","A","N","T","-",
"A","A","A","A","A","A",
"A","A","A","A","T","T",
"A","A","A","N","T","-"))
geno_strains(ref=ref,geno=geno,par1=c("par1a","par1b","par1c"),par2=c("par2a","par2b","par2c"),name1="parent1",name2="parent2")
library(dplyr)
library(tidyr)
geno_strains(ref=ref,geno=geno,par1=c("par1a","par1b","par1c"),par2=c("par2a","par2b","par2c"),name1="parent1",name2="parent2")
geno <- tibble(SNP.Name = c("mark1","mark2","mark3","mark4","mark5","mark6",
"mark1","mark2","mark3","mark4","mark5","mark6",
"mark1","mark2","mark3","mark4","mark5","mark6",
"mark1","mark2","mark3","mark4","mark5","mark6",
"mark1","mark2","mark3","mark4","mark5","mark6",
"mark1","mark2","mark3","mark4","mark5","mark6"),
Sample.ID = c("par1a","par1a","par1a","par1a","par1a","par1a",
"par1b","par1b","par1b","par1b","par1b","par1b",
"par1c","par1c","par1c","par1c","par1c","par1c",
"par2a","par2a","par2a","par2a","par2a","par2a",
"par2b","par2b","par2b","par2b","par2b","par2b",
"par2c","par2c","par2c","par2c","par2c","par2c"),
Allele1...Forward = c("A","A","A","A","A","A",
"A","A","A","A","T","T",
"A","A","A","-","T","-",
"A","A","A","A","A","A",
"A","A","A","A","T","T",
"A","A","A","-","T","-"),
Allele2...Forward = c("A","A","A","A","A","A",
"A","A","A","A","T","T",
"A","A","A","-","T","-",
"A","A","A","A","A","A",
"A","A","A","A","T","T",
"A","A","A","-","T","-"))
geno_strains(ref=ref,geno=geno,par1=c("par1a","par1b","par1c"),par2=c("par2a","par2b","par2c"),name1="parent1",name2="parent2")
devtools::build(path=".",vignettes=FAKSE)
rlang::last_error()
devtools::build(path=".",vignettes=FAKSE)
devtools::build(path=".",vignettes=FALSE)
View(genos)
genos %>% filter(SNP.Name %in% c("S3J010123784","SAH010136363","S2H010137098")) %>% filter(Sample.ID %in% c("StrainsA_1","StrainsA_2","StrainsB_1","StrainsB_2"))
genos %>% filter(Sample.ID %in% c("StrainsA_1","StrainsA_2","StrainsB_1","StrainsB_2")) %>% filter(SNP.Name %in% c("S3J010123784","SAH010136363","S2H010137098"))
geno_strains <- function(ref,geno,par1,par2,name1,name2){
#rename df columns
geno <- geno %>% rename("marker"=1,
"id"=2,
"allele_1"=3,
"allele_2"=4)
#recode genotypes from 2 alleles to 1
geno <- geno %>% mutate_all(as.character)
geno <- geno %>% filter(id %in% c(par1,par2))
geno <- geno %>% mutate(Geno=case_when(allele_1 == "-" | allele_2 == "-" ~ "N",
allele_1 == allele_2 ~ allele_1,
allele_1 %in% c("A","T","G","C") & allele_2 %in% c("A","T","G","C") ~ "H"))
geno <- geno %>% select(marker,id,Geno) %>% pivot_wider(names_from = id, values_from = Geno)
#create consensus
geno <- geno %>% mutate(parent1 = !!sym(par1[1]))
if(length(par1)!=1){
for(i in 2:length(par1)){
geno <- geno %>% mutate(parent1 = ifelse(parent1==!!sym(par1[i]),parent1,"N"))
}
}
geno <- geno %>% mutate(parent2 = !!sym(par2[1]))
if(length(par2)!=1){
for(i in 2:length(par2)){
geno <- geno %>% mutate(parent2 = ifelse(parent2==!!sym(par2[i]),parent2,"N"))
}
}
print(geno)
geno <- geno %>% select(marker,parent1,parent2)
colnames(geno) <- c("marker",name1,name2)
#merge with ref file
ref <- full_join(ref,geno,by=c("marker"="marker"))
return(ref)
}
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
library(dplyr)
library(stuart)
annot_mini <- read.csv(url("https://raw.githubusercontent.com/kbroman/MUGAarrays/master/UWisc/mini_uwisc_v2.csv"))
data(genos)
summary(genos)
data(phenos)
summary(phenos)
strains <- geno_strains(ref=annot_mini,geno=genos,par1=c("StrainsA_1","StrainsA_2"),par2=c("StrainsB_1","StrainsB_2"),name1="parent1",name2="parent2")
devtools::build(path=".",vignettes=FALSE)
devtools::build_vignettes()
devtools::build_manual(path=".")
devtools::build(path=".",vignettes=FALSE)
devtools::build_vignettes()
devtools::build_manual(path=".")
devtools::build(path=".",vignettes=FALSE)
install.packages("KableExtra")
install.packages("kableExtra")
install.packages("kableExtra")
library(dplyr)
library(stuart)
print.data.frame(tab2 %>% filter(exclude_match==1))
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
library(dplyr)
library(stuart)
annot_mini <- read.csv(url("https://raw.githubusercontent.com/kbroman/MUGAarrays/master/UWisc/mini_uwisc_v2.csv"))
data(genos)
summary(genos)
data(phenos)
summary(phenos)
strains <- geno_strains(ref=annot_mini,geno=genos,par1=c("StrainsA_1","StrainsA_2"),par2=c("StrainsB_1","StrainsB_2"),name1="parent1",name2="parent2")
print.data.frame(head(strains))
genos <- genos %>% filter(!Sample.ID %in% c("StrainsA_1", "StrainsA_2", "StrainsB_1","StrainsB_2"))
data(stuart_tab)
summary(stuart_tab)
tab2 <- mark_match(stuart_tab,ref=strains)
print.data.frame(tab2 %>% filter(exclude_match==1))
tab2 <- mark_poly(tab2)
head(tab2)
tab2 <- mark_prop(tab2,cross="F2",homo=0.1,hetero=0.1)
print.data.frame(head(tab2))
tab2 %>% arrange(desc(exclude_allele)) %>% head() %>% print.data.frame()
tab2 <- mark_allele(tab=tab2,ref=strains,par1="parent1",par2="parent2")
tab2 %>% arrange(desc(exclude_allele)) %>% head() %>% print.data.frame()
head(tab2) %>% print.data.frame()
head(tab2) %>% print.data.frame()
rqtl_file[1:10,1:9] %>% print.data.frame()
devtools::build(path=".",vignettes=FALSE)
devtools::build_vignettes()
devtools::build_manual(path=".")
remove.packages("stuart")
devtools::install_gitlab(repo="mouselab/stuart",host="gitlab.pasteur.fr")
library(stuart)
View(annot_mini)
annot_mini %>% filter(is.na(chr)==FALSE) %>% filter(is.na(cM_cox)==TRUE)
devtools::build(path=".",vignettes=FALSE)
library(stuart)
write_rqtl
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment