diff --git a/.Rhistory b/.Rhistory index 5ebed94a3b6e53e1e51b639c2845eaa8fc1d3c3a..b96cf5cd25c4bed19ee7198d1c47811f54ca1931 100644 --- a/.Rhistory +++ b/.Rhistory @@ -1,12 +1,18 @@ -print(tab) -} -tab2 <- mark_prop(tab2,cross="F2",homo=0.1,hetero=0.1) +devtools::build_manual(path=".") +remove.packages("stuart") +devtools::install_gitlab(repo="mouselab/stuart",host="gitlab.pasteur.fr") +library(stuart) +View(annot_mini) +annot_mini %>% filter(is.na(chr)==FALSE) %>% filter(is.na(cM_cox)==TRUE) +devtools::build(path=".",vignettes=FALSE) +library(stuart) +write_rqtl +chisq.test(c(20,23),p=c(0.5,0.5)) mark_prop <- function(tab,cross,homo=NA,hetero=NA,pval=NA,na=0.5){ -#calculate total number of individuals genotyped for each marker -tab <- tab %>% mutate(n_geno = (n_HM1 + n_HM2 + n_HT)) #stock colnames to join names <- colnames(tab) -print(names) +#calculate total number of individuals genotyped for each marker +tab <- tab %>% mutate(n_geno = (n_HM1 + n_HM2 + n_HT)) #stop if cross != "F2" or "N2" if(!cross %in% c("F2","N2")){ stop("Cross must be F2 or N2") @@ -37,22 +43,37 @@ T ~ exclude_prop #exclude with pval chisq.test ## NEED TO ADD THIS FILTER IF CROSS = N2 if(is.na(pval)==FALSE){ +#if cross F2 +if(cross=="F2"){ tab <- tab %>% filter(p_NA != 1) %>% rowwise() %>% mutate(.,chi_pval = tibble(n_HM1,n_HM2,n_HT) %>% chisq.test(p=c(0.25,0.25,0.5)) %>% .$p.value) %>% full_join(.,tab,by=all_of(names)) tab <- tab %>% mutate(exclude_prop=case_when(chi_pval < pval ~ 1, T ~ exclude_prop)) +} else if(cross=="N2"){ +ab <- tab %>% filter(p_NA != 1) %>% rowwise() %>% +mutate(.,chi_pval = tibble(n_HM1,n_HT) %>% +chisq.test(p=c(0.5,0.5)) %>% .$p.value) %>% +full_join(.,tab,by=all_of(names)) +tab <- tab %>% mutate(exclude_prop=case_when(chi_pval < pval ~ 1, +T ~ exclude_prop)) +} } -print(tab) tab <- tab %>% select(all_of(names),exclude_prop) -print(tab) +return(tab) } -tab2 <- mark_prop(tab2,cross="F2",homo=0.1,hetero=0.1) +test <- tibble(marker=c("mark1","mark2","mark3","mark4"),) +library(tidyverse) +test <- tibble(marker=c("mark1","mark2","mark3","mark4"),allele_1=c(A,G,T,C),allele_2=c(T,T,A,G),n_HM1=c(48,45,1,26),n_HM2=c(0,0,0,0),n_HT=c(52,55,49,74)) +test <- tibble(marker=c("mark1","mark2","mark3","mark4"),allele_1=c("A","G","T","C"),allele_2=c("T","T","A","G"),n_HM1=c(48,45,1,26),n_HM2=c(0,0,0,0),n_HT=c(52,55,49,74)) +View(test) +mark_prop(test,cross="N2",pval=0.05) +test <- tibble(marker=c("mark1","mark2","mark3","mark4"),allele_1=c("A","G","T","C"),allele_2=c("T","T","A","G"),n_HM1=c(48,45,1,26),n_HM2=c(0,0,0,0),n_HT=c(52,55,49,74),n_NA=c(0,0,0,0)) +mark_prop(test,cross="N2",pval=0.05) mark_prop <- function(tab,cross,homo=NA,hetero=NA,pval=NA,na=0.5){ #stock colnames to join names <- colnames(tab) -print(names) #calculate total number of individuals genotyped for each marker tab <- tab %>% mutate(n_geno = (n_HM1 + n_HM2 + n_HT)) #stop if cross != "F2" or "N2" @@ -81,25 +102,205 @@ cross=="F2" & (p_HM1 < homo | p_HM2 < homo | p_HT < hetero) ~ 1, cross=="N2" & (p_HM1 < homo | p_HT < hetero) ~ 1, T ~ exclude_prop )) +#exclude with pval chisq.test +} else if(is.na(pval)==FALSE){ +#if cross F2 +if(cross=="F2"){ +tab <- tab %>% filter(p_NA != 1) %>% rowwise() %>% +mutate(.,chi_pval = tibble(n_HM1,n_HM2,n_HT) %>% +chisq.test(p=c(0.25,0.25,0.5)) %>% .$p.value) %>% +full_join(.,tab,by=all_of(names)) +tab <- tab %>% mutate(exclude_prop=case_when(chi_pval < pval ~ 1, +T ~ exclude_prop)) +#if cross N2 +} else if(cross=="N2"){ +tab <- tab %>% filter(p_NA != 1) %>% rowwise() %>% +mutate(.,chi_pval = tibble(n_HM1,n_HT) %>% +chisq.test(p=c(0.5,0.5)) %>% .$p.value) %>% +full_join(.,tab,by=all_of(names)) +tab <- tab %>% mutate(exclude_prop=case_when(chi_pval < pval ~ 1, +T ~ exclude_prop)) +} +} +tab <- tab %>% select(all_of(names),exclude_prop) +return(tab) +} +mark_prop(test,cross="N2",pval=0.05) +mark_prop <- function(tab,cross,homo=NA,hetero=NA,pval=NA,na=0.5){ +#stock colnames to join +names <- colnames(tab) +#calculate total number of individuals genotyped for each marker +tab <- tab %>% mutate(n_geno = (n_HM1 + n_HM2 + n_HT)) +#stop if cross != "F2" or "N2" +if(!cross %in% c("F2","N2")){ +stop("Cross must be F2 or N2") +} +#stop of homo&hetero or pval not specified +if((is.na(homo)==TRUE | is.na(hetero)==TRUE) & is.na(pval)==TRUE){ +stop("Arguments homo and hetero or argument pval must be specified") } +#stop with prop of na +#calculate proportion +tab <- tab %>% mutate(p_NA = n_NA/(n_geno+n_NA)) +tab <- tab %>% +mutate(exclude_prop=case_when(p_NA > na ~ 1, +T ~ 0)) +print(tab) +#exclude with prop of homo/hetero +if(is.na(pval)==TRUE){ +#calculate proportion of each genotype +tab <- tab %>% mutate(p_HM1 = n_HM1/n_geno) +tab <- tab %>% mutate(p_HM2 = n_HM2/n_geno) +tab <- tab %>% mutate(p_HT = n_HT/n_geno) +tab <- tab %>% +mutate(exclude_prop=case_when(p_NA > na ~ 1, +cross=="F2" & (p_HM1 < homo | p_HM2 < homo | p_HT < hetero) ~ 1, +cross=="N2" & (p_HM1 < homo | p_HT < hetero) ~ 1, +T ~ exclude_prop +)) #exclude with pval chisq.test -## NEED TO ADD THIS FILTER IF CROSS = N2 -if(is.na(pval)==FALSE){ +} else if(is.na(pval)==FALSE){ +#if cross F2 +if(cross=="F2"){ tab <- tab %>% filter(p_NA != 1) %>% rowwise() %>% mutate(.,chi_pval = tibble(n_HM1,n_HM2,n_HT) %>% chisq.test(p=c(0.25,0.25,0.5)) %>% .$p.value) %>% full_join(.,tab,by=all_of(names)) tab <- tab %>% mutate(exclude_prop=case_when(chi_pval < pval ~ 1, T ~ exclude_prop)) +#if cross N2 +} else if(cross=="N2"){ +tab <- tab %>% filter(p_NA != 1) %>% rowwise() %>% +mutate(.,chi_pval = tibble(n_HM1,n_HT) %>% +chisq.test(p=c(0.5,0.5)) %>% .$p.value) %>% +full_join(.,tab,by=all_of(names)) +tab <- tab %>% mutate(exclude_prop=case_when(chi_pval < pval ~ 1, +T ~ exclude_prop)) +} } +tab <- tab %>% select(all_of(names),exclude_prop) +return(tab) +} +mark_prop(test,cross="N2",pval=0.05) +mark_prop(test,cross="N2",homo=0.1,hetero=0.1) +mark_prop(test,cross="F2",homo=0.1,hetero=0.1) +mark_prop <- function(tab,cross,homo=NA,hetero=NA,pval=NA,na=0.5){ +#stock colnames to join +names <- colnames(tab) +#calculate total number of individuals genotyped for each marker +tab <- tab %>% mutate(n_geno = (n_HM1 + n_HM2 + n_HT)) +#stop if cross != "F2" or "N2" +if(!cross %in% c("F2","N2")){ +stop("Cross must be F2 or N2") +} +#stop of homo&hetero or pval not specified +if((is.na(homo)==TRUE | is.na(hetero)==TRUE) & is.na(pval)==TRUE){ +stop("Arguments homo and hetero or argument pval must be specified") +} +#stop with prop of na +#calculate proportion +tab <- tab %>% mutate(p_NA = n_NA/(n_geno+n_NA)) +tab <- tab %>% +mutate(exclude_prop=case_when(p_NA > na ~ 1, +T ~ 0)) print(tab) +#exclude with prop of homo/hetero +if(is.na(pval)==TRUE){ +#calculate proportion of each genotype +tab <- tab %>% mutate(p_HM1 = n_HM1/n_geno) +tab <- tab %>% mutate(p_HM2 = n_HM2/n_geno) +tab <- tab %>% mutate(p_HT = n_HT/n_geno) +tab <- tab %>% +mutate(exclude_prop=case_when(p_NA > na ~ 1, +cross=="F2" & (p_HM1 < homo | p_HM2 < homo | p_HT < hetero) ~ 1, +cross=="N2" & (p_HM1 < homo | p_HT < hetero) ~ 1, +T ~ exclude_prop +)) +#exclude with pval chisq.test +} else if(is.na(pval)==FALSE){ +#if cross F2 +if(cross=="F2"){ +tab <- tab %>% filter(p_NA != 1) %>% rowwise() %>% +mutate(.,chi_pval = tibble(n_HM1,n_HM2,n_HT) %>% +chisq.test(p=c(0.25,0.25,0.5)) %>% .$p.value) %>% +full_join(.,tab,by=all_of(names)) +tab <- tab %>% mutate(exclude_prop=case_when(chi_pval < pval ~ 1, +T ~ exclude_prop)) +#if cross N2 +} else if(cross=="N2"){ +tab <- tab %>% filter(p_NA != 1) %>% rowwise() %>% +mutate(.,chi_pval = tibble(n_HM1,n_HT) %>% +chisq.test(p=c(0.5,0.5)) %>% .$p.value) %>% +full_join(.,tab,by=all_of(names)) +print(tab) +tab <- tab %>% mutate(exclude_prop=case_when(chi_pval < pval ~ 1, +T ~ exclude_prop)) +} +} tab <- tab %>% select(all_of(names),exclude_prop) +return(tab) +} +mark_prop(test,cross="F2",pval=0.05) +mark_prop(test,cross="N2",pval=0.05) +View(test) +mark_prop <- function(tab,cross,homo=NA,hetero=NA,pval=NA,na=0.5){ +#stock colnames to join +names <- colnames(tab) +#calculate total number of individuals genotyped for each marker +tab <- tab %>% mutate(n_geno = (n_HM1 + n_HM2 + n_HT)) +#stop if cross != "F2" or "N2" +if(!cross %in% c("F2","N2")){ +stop("Cross must be F2 or N2") +} +#stop of homo&hetero or pval not specified +if((is.na(homo)==TRUE | is.na(hetero)==TRUE) & is.na(pval)==TRUE){ +stop("Arguments homo and hetero or argument pval must be specified") +} +#stop with prop of na +#calculate proportion +tab <- tab %>% mutate(p_NA = n_NA/(n_geno+n_NA)) +tab <- tab %>% +mutate(exclude_prop=case_when(p_NA > na ~ 1, +T ~ 0)) print(tab) +#exclude with prop of homo/hetero +if(is.na(pval)==TRUE){ +#calculate proportion of each genotype +tab <- tab %>% mutate(p_HM1 = n_HM1/n_geno) +tab <- tab %>% mutate(p_HM2 = n_HM2/n_geno) +tab <- tab %>% mutate(p_HT = n_HT/n_geno) +tab <- tab %>% +mutate(exclude_prop=case_when(p_NA > na ~ 1, +cross=="F2" & (p_HM1 < homo | p_HM2 < homo | p_HT < hetero) ~ 1, +cross=="N2" & (p_HM1 < homo | p_HT < hetero) ~ 1, +T ~ exclude_prop +)) +print(names) +#exclude with pval chisq.test +} else if(is.na(pval)==FALSE){ +#if cross F2 +if(cross=="F2"){ +tab <- tab %>% filter(p_NA != 1) %>% rowwise() %>% +mutate(.,chi_pval = tibble(n_HM1,n_HM2,n_HT) %>% +chisq.test(p=c(0.25,0.25,0.5)) %>% .$p.value) %>% +full_join(.,tab,by=all_of(names)) +tab <- tab %>% mutate(exclude_prop=case_when(chi_pval < pval ~ 1, +T ~ exclude_prop)) +#if cross N2 +} else if(cross=="N2"){ +tab <- tab %>% filter(p_NA != 1) %>% rowwise() %>% +mutate(.,chi_pval = tibble(n_HM1,n_HT) %>% +chisq.test(p=c(0.5,0.5)) %>% .$p.value) %>% +full_join(.,tab,by=all_of(names)) +print(tab) +tab <- tab %>% mutate(exclude_prop=case_when(chi_pval < pval ~ 1, +T ~ exclude_prop)) } -tab2 <- mark_match(stuart_tab,ref=strains) -tab2 %>% filter(exclude_match==1) -tab2 <- mark_poly(tab2) -head(tab2) +} +tab <- tab %>% select(all_of(names),exclude_prop) +return(tab) +} +mark_prop(test,cross="N2",pval=0.05) mark_prop <- function(tab,cross,homo=NA,hetero=NA,pval=NA,na=0.5){ #stock colnames to join names <- colnames(tab) @@ -120,6 +321,7 @@ tab <- tab %>% mutate(p_NA = n_NA/(n_geno+n_NA)) tab <- tab %>% mutate(exclude_prop=case_when(p_NA > na ~ 1, T ~ 0)) +print(tab) #exclude with prop of homo/hetero if(is.na(pval)==TRUE){ #calculate proportion of each genotype @@ -132,320 +334,147 @@ cross=="F2" & (p_HM1 < homo | p_HM2 < homo | p_HT < hetero) ~ 1, cross=="N2" & (p_HM1 < homo | p_HT < hetero) ~ 1, T ~ exclude_prop )) -} #exclude with pval chisq.test -## NEED TO ADD THIS FILTER IF CROSS = N2 -if(is.na(pval)==FALSE){ +} else if(is.na(pval)==FALSE){ +#if cross F2 +if(cross=="F2"){ tab <- tab %>% filter(p_NA != 1) %>% rowwise() %>% mutate(.,chi_pval = tibble(n_HM1,n_HM2,n_HT) %>% chisq.test(p=c(0.25,0.25,0.5)) %>% .$p.value) %>% full_join(.,tab,by=all_of(names)) tab <- tab %>% mutate(exclude_prop=case_when(chi_pval < pval ~ 1, T ~ exclude_prop)) -} +#if cross N2 +} else if(cross=="N2"){ +tab <- tab %>% filter(p_NA != 1) %>% rowwise() %>% +mutate(.,chi_pval = tibble(n_HM1,n_HT) %>% +chisq.test(p=c(0.5,0.5)) %>% .$p.value) %>% +full_join(.,tab,by=all_of(names)) print(tab) +tab <- tab %>% mutate(exclude_prop=case_when(chi_pval < pval ~ 1, +T ~ exclude_prop)) +} +} tab <- tab %>% select(all_of(names),exclude_prop) +return(tab) +} +mark_prop(test,cross="N2",pval=0.05) +mark_prop <- function(tab,cross,homo=NA,hetero=NA,pval=NA,na=0.5){ +#calculate total number of individuals genotyped for each marker +tab <- tab %>% mutate(n_geno = (n_HM1 + n_HM2 + n_HT)) +#stop if cross != "F2" or "N2" +if(!cross %in% c("F2","N2")){ +stop("Cross must be F2 or N2") +} +#stop of homo&hetero or pval not specified +if((is.na(homo)==TRUE | is.na(hetero)==TRUE) & is.na(pval)==TRUE){ +stop("Arguments homo and hetero or argument pval must be specified") +} +#stop with prop of na +#calculate proportion +tab <- tab %>% mutate(p_NA = n_NA/(n_geno+n_NA)) +tab <- tab %>% +mutate(exclude_prop=case_when(p_NA > na ~ 1, +T ~ 0)) +print(tab) +#stock colnames to join +names <- colnames(tab) +print(names) +#exclude with prop of homo/hetero +if(is.na(pval)==TRUE){ +#calculate proportion of each genotype +tab <- tab %>% mutate(p_HM1 = n_HM1/n_geno) +tab <- tab %>% mutate(p_HM2 = n_HM2/n_geno) +tab <- tab %>% mutate(p_HT = n_HT/n_geno) +tab <- tab %>% +mutate(exclude_prop=case_when(p_NA > na ~ 1, +cross=="F2" & (p_HM1 < homo | p_HM2 < homo | p_HT < hetero) ~ 1, +cross=="N2" & (p_HM1 < homo | p_HT < hetero) ~ 1, +T ~ exclude_prop +)) +#exclude with pval chisq.test +} else if(is.na(pval)==FALSE){ +#if cross F2 +if(cross=="F2"){ +tab <- tab %>% filter(p_NA != 1) %>% rowwise() %>% +mutate(.,chi_pval = tibble(n_HM1,n_HM2,n_HT) %>% +chisq.test(p=c(0.25,0.25,0.5)) %>% .$p.value) %>% +full_join(.,tab,by=all_of(names)) +tab <- tab %>% mutate(exclude_prop=case_when(chi_pval < pval ~ 1, +T ~ exclude_prop)) +#if cross N2 +} else if(cross=="N2"){ +tab <- tab %>% filter(p_NA != 1) %>% rowwise() %>% +mutate(.,chi_pval = tibble(n_HM1,n_HT) %>% +chisq.test(p=c(0.5,0.5)) %>% .$p.value) %>% +full_join(.,tab,by=all_of(names)) print(tab) +tab <- tab %>% mutate(exclude_prop=case_when(chi_pval < pval ~ 1, +T ~ exclude_prop)) } -tab2 <- mark_prop(tab2,cross="F2",homo=0.1,hetero=0.1) -devtools::build(path=".",vignettes=FALSE) -knitr::opts_chunk$set( -collapse = TRUE, -comment = "#>" -) -library(dplyr) -library(stuart) -annot_mini <- read.csv(url("https://raw.githubusercontent.com/kbroman/MUGAarrays/master/UWisc/mini_uwisc_v2.csv")) -data(genos) -summary(genos) -data(phenos) -summary(phenos) -strains <- geno_strains(ref=annot_mini,geno=genos,par1=c("StrainsA_1","StrainsA_2"),par2=c("StrainsB_1","StrainsB_2"),name1="parent1",name2="parent2") -head(strains) -genos <- genos %>% filter(!Sample.ID %in% c("StrainsA_1", "StrainsA_2", "StrainsB_1","StrainsB_2")) -data(stuart_tab) -summary(stuart_tab) -tab2 <- mark_match(stuart_tab,ref=strains) -tab2 %>% filter(exclude_match==1) -tab2 <- mark_poly(tab2) -head(tab2) -tab2 <- mark_prop(tab2,cross="F2",homo=0.1,hetero=0.1) -head(tab2) -knitr::opts_chunk$set( -collapse = TRUE, -comment = "#>" -) -library(dplyr) -library(stuart) -annot_mini <- read.csv(url("https://raw.githubusercontent.com/kbroman/MUGAarrays/master/UWisc/mini_uwisc_v2.csv")) -data(genos) -summary(genos) -data(phenos) -summary(phenos) -strains <- geno_strains(ref=annot_mini,geno=genos,par1=c("StrainsA_1","StrainsA_2"),par2=c("StrainsB_1","StrainsB_2"),name1="parent1",name2="parent2") -head(strains) -genos <- genos %>% filter(!Sample.ID %in% c("StrainsA_1", "StrainsA_2", "StrainsB_1","StrainsB_2")) -data(stuart_tab) -summary(stuart_tab) -View(stuart_tab) -tab2 <- mark_match(stuart_tab,ref=strains) -tab2 %>% filter(exclude_match==1) -View(tab2) -tab2 <- mark_poly(tab2) -head(tab2) -View(tab2) -tab2 <- mark_prop(tab2,cross="F2",homo=0.1,hetero=0.1) -View(tab2) -library(stuart) -knitr::opts_chunk$set( -collapse = TRUE, -comment = "#>" -) -library(dplyr) -library(stuart) -annot_mini <- read.csv(url("https://raw.githubusercontent.com/kbroman/MUGAarrays/master/UWisc/mini_uwisc_v2.csv")) -data(genos) -summary(genos) -data(phenos) -summary(phenos) -strains <- geno_strains(ref=annot_mini,geno=genos,par1=c("StrainsA_1","StrainsA_2"),par2=c("StrainsB_1","StrainsB_2"),name1="parent1",name2="parent2") -head(strains) -genos <- genos %>% filter(!Sample.ID %in% c("StrainsA_1", "StrainsA_2", "StrainsB_1","StrainsB_2")) -data(stuart_tab) -summary(stuart_tab) -tab2 <- mark_match(stuart_tab,ref=strains) -tab2 %>% filter(exclude_match==1) -tab2 <- mark_poly(tab2) -head(tab2) -tab2 <- mark_prop(tab2,cross="F2",homo=0.1,hetero=0.1) -head(tab2) -View(tab2) -tab2 <- mark_allele(tab=tab2,ref=strains,par1="parent1",par2="parent2") -tab2 %>% arrange(desc(exclude_allele)) %>% head() -strains %>% filter(marker %in% c("gJAX00038569","gJAX00425031","gUNC12245354","gUNC15530876","gUNC21555204","gUNC21596600")) %>% arrange(marker) %>% select(marker,parent1,parent2) -View(tab2) -strains %>% filter(marker %in% c("gJAX00038569","gJAX00425031","gUNC12245354","gUNC15530876","gUNC21555204","gUNC21596600")) %>% arrange(marker) %>% select(marker,parent1,parent2) -rqtl_file <- write_rqtl(geno=genos,pheno=phenos,tab=tab2,ref=strains,par1="parent1",par2="parent2",prefix="ind_",pos="cM_cox") -rqtl_file[1:10,1:7] -devtools::build_vignettes() -devtools::build_manual() -build_manual -devtools::build_manual -Rd2pdf -tools::texi2pdf -devtools::build_manual() -devtools::build(path=".",vignettes=FALSE,manual=TRUE) -devtools::build_manual() -Rd2pdf() -Rd2pdf -install.packages("RdUtils") -RdUtils -RdUtils() -system("R CMD Rd2pdf stuart") -system("R CMD Rd2pdf stuaRt") -getwd() -cd .. -setwd("~/Documents/stuart_package/stuart") -setwd("~/Documents/stuart_package/stuart") -setwd("~/Documents/stuart_package") -system("R CMD Rd2pdf stuaRt") -system("R CMD Rd2pdf stuart") -system("R CMD Rd2pdf stuart") -setwd("~/Documents/stuart_package/stuart") -devtools::build_manual() -getwd() -devtools::build_manual(path=".") -devtools::build_manual(path=".") -View(genos) -devtools::build(path=".",vignettes=FALSE) -devtools::build_vignettes() -devtools::build_manual(path=".") -devtools::build_manual(path=".") -devtools::build(path=".",vignettes=FALSE) -devtools::build_manual(path=".") -roxygen2::roxygenise() -devtools::build_manual(path=".") -library(dplyr) -library(stuart) -test <- tibble(x=c("A","T"),y=c("A","A"),z=c("A","A")) -View(test) -View(test) -View(test) -View(test) -View(rqtl_file) -View(annot_mini) -View(genos) -View(genos) -View(genos) -View(test) -library(dplyr) -library(stuart) -test <- tibble(x=c("A","T"),y=c("A","A"),z=c("A","A")) -View(test) -test <- test %>% mutate(geno=x) -View(test) -test %>% mutate(geno = ifelse(geno==y,geno,"N")) -test <- test %>% mutate(geno = ifelse(geno==y,geno,"N")) -test <- test %>% mutate(geno = ifelse(geno==z,geno,"N")) -View(test) -test <- tibble(x=c("A","T"),y=c("A","A"),z=c("A","N")) -test <- tibble(x=c("A","T"),y=c("A","A"),z=c("A","N")) -test <- test %>% mutate(geno=x) -print(test) -test <- test %>% mutate(geno = ifelse(geno==y,geno,"N")) -print(test) -test <- test %>% mutate(geno = ifelse(geno==z,geno,"N")) -print(test) -geno_strains <- function(ref,geno,par1,par2,name1,name2){ -#rename df columns -geno <- geno %>% rename("marker"=1, -"id"=2, -"allele_1"=3, -"allele_2"=4) -#recode genotypes from 2 alleles to 1 -geno <- geno %>% mutate_all(as.character) -geno <- geno %>% filter(id %in% c(par1,par2)) -geno <- geno %>% mutate(Geno=case_when(allele_1 == "-" | allele_2 == "-" ~ "N", -allele_1 == allele_2 ~ allele_1, -allele_1 %in% c("A","T","G","C") & allele_2 %in% c("A","T","G","C") ~ "H")) -geno <- geno %>% select(marker,id,Geno) %>% pivot_wider(names_from = id, values_from = Geno) -#create consensus -geno <- geno %>% mutate(parent1 = !!sym(par1[1])) -if(length(par1)!=1){ -for(i in 2:length(par1)){ -geno <- geno %>% mutate(parent1 = ifelse(parent1==!!sym(par1[i]),parent1,"N")) -} -} -geno <- geno %>% mutate(parent2 = !!sym(par2[1])) -if(length(par2)!=1){ -for(i in 2:length(par2)){ -geno <- geno %>% mutate(parent2 = ifelse(parent2==!!sym(par2[i]),parent2,"N")) -} -} -geno <- geno %>% select(marker,parent1,parent2) -colnames(geno) <- c("marker",name1,name2) -#merge with ref file -ref <- full_join(ref,geno,by=c("marker"="marker")) -return(ref) } -data(genos) -summary(genos) -data(phenos) -summary(phenos) -annot_mini <- read.csv(url("https://raw.githubusercontent.com/kbroman/MUGAarrays/master/UWisc/mini_uwisc_v2.csv")) -data(stuart_tab) -summary(stuart_tab) -View(annot_mini) -ref <- tibble(marker = c("mark1","mark2","mark3","mark4","mark5","mark6"), -chr = c("1","1","2","2","3","X") -) -View(genos) -geno <- tibble(SNP.Name = c("mark1","mark2","mark3","mark4","mark5","mark6", -"mark1","mark2","mark3","mark4","mark5","mark6", -"mark1","mark2","mark3","mark4","mark5","mark6", -"mark1","mark2","mark3","mark4","mark5","mark6", -"mark1","mark2","mark3","mark4","mark5","mark6", -"mark1","mark2","mark3","mark4","mark5","mark6"), -Sample.ID = c("par1a","par1a","par1a","par1a","par1a","par1a", -"par1b","par1b","par1b","par1b","par1b","par1b", -"par1c","par1c","par1c","par1c","par1c","par1c", -"par2a","par2a","par2a","par2a","par2a","par2a", -"par2b","par2b","par2b","par2b","par2b","par2b", -"par2c","par2c","par2c","par2c","par2c","par2c")) -View(geno) -geno <- tibble(SNP.Name = c("mark1","mark2","mark3","mark4","mark5","mark6", -"mark1","mark2","mark3","mark4","mark5","mark6", -"mark1","mark2","mark3","mark4","mark5","mark6", -"mark1","mark2","mark3","mark4","mark5","mark6", -"mark1","mark2","mark3","mark4","mark5","mark6", -"mark1","mark2","mark3","mark4","mark5","mark6"), -Sample.ID = c("par1a","par1a","par1a","par1a","par1a","par1a", -"par1b","par1b","par1b","par1b","par1b","par1b", -"par1c","par1c","par1c","par1c","par1c","par1c", -"par2a","par2a","par2a","par2a","par2a","par2a", -"par2b","par2b","par2b","par2b","par2b","par2b", -"par2c","par2c","par2c","par2c","par2c","par2c"), -Allele1...Forward = c("A","A","A","A","A","A", -"A","A","A","A","T","T", -"A","A","A","N","T","-", -"A","A","A","A","A","A", -"A","A","A","A","T","T", -"A","A","A","N","T","-"), -Allele2...Forward = c("A","A","A","A","A","A", -"A","A","A","A","T","T", -"A","A","A","N","T","-", -"A","A","A","A","A","A", -"A","A","A","A","T","T", -"A","A","A","N","T","-")) -geno_strains(ref=ref,geno=geno,par1=c("par1a","par1b","par1c"),par2=c("par2a","par2b","par2c"),name1="parent1",name2="parent2") -library(dplyr) -library(tidyr) -geno_strains(ref=ref,geno=geno,par1=c("par1a","par1b","par1c"),par2=c("par2a","par2b","par2c"),name1="parent1",name2="parent2") -geno <- tibble(SNP.Name = c("mark1","mark2","mark3","mark4","mark5","mark6", -"mark1","mark2","mark3","mark4","mark5","mark6", -"mark1","mark2","mark3","mark4","mark5","mark6", -"mark1","mark2","mark3","mark4","mark5","mark6", -"mark1","mark2","mark3","mark4","mark5","mark6", -"mark1","mark2","mark3","mark4","mark5","mark6"), -Sample.ID = c("par1a","par1a","par1a","par1a","par1a","par1a", -"par1b","par1b","par1b","par1b","par1b","par1b", -"par1c","par1c","par1c","par1c","par1c","par1c", -"par2a","par2a","par2a","par2a","par2a","par2a", -"par2b","par2b","par2b","par2b","par2b","par2b", -"par2c","par2c","par2c","par2c","par2c","par2c"), -Allele1...Forward = c("A","A","A","A","A","A", -"A","A","A","A","T","T", -"A","A","A","-","T","-", -"A","A","A","A","A","A", -"A","A","A","A","T","T", -"A","A","A","-","T","-"), -Allele2...Forward = c("A","A","A","A","A","A", -"A","A","A","A","T","T", -"A","A","A","-","T","-", -"A","A","A","A","A","A", -"A","A","A","A","T","T", -"A","A","A","-","T","-")) -geno_strains(ref=ref,geno=geno,par1=c("par1a","par1b","par1c"),par2=c("par2a","par2b","par2c"),name1="parent1",name2="parent2") -devtools::build(path=".",vignettes=FAKSE) -rlang::last_error() -devtools::build(path=".",vignettes=FAKSE) -devtools::build(path=".",vignettes=FALSE) -View(genos) -genos %>% filter(SNP.Name %in% c("S3J010123784","SAH010136363","S2H010137098")) %>% filter(Sample.ID %in% c("StrainsA_1","StrainsA_2","StrainsB_1","StrainsB_2")) -genos %>% filter(Sample.ID %in% c("StrainsA_1","StrainsA_2","StrainsB_1","StrainsB_2")) %>% filter(SNP.Name %in% c("S3J010123784","SAH010136363","S2H010137098")) -geno_strains <- function(ref,geno,par1,par2,name1,name2){ -#rename df columns -geno <- geno %>% rename("marker"=1, -"id"=2, -"allele_1"=3, -"allele_2"=4) -#recode genotypes from 2 alleles to 1 -geno <- geno %>% mutate_all(as.character) -geno <- geno %>% filter(id %in% c(par1,par2)) -geno <- geno %>% mutate(Geno=case_when(allele_1 == "-" | allele_2 == "-" ~ "N", -allele_1 == allele_2 ~ allele_1, -allele_1 %in% c("A","T","G","C") & allele_2 %in% c("A","T","G","C") ~ "H")) -geno <- geno %>% select(marker,id,Geno) %>% pivot_wider(names_from = id, values_from = Geno) -#create consensus -geno <- geno %>% mutate(parent1 = !!sym(par1[1])) -if(length(par1)!=1){ -for(i in 2:length(par1)){ -geno <- geno %>% mutate(parent1 = ifelse(parent1==!!sym(par1[i]),parent1,"N")) -} -} -geno <- geno %>% mutate(parent2 = !!sym(par2[1])) -if(length(par2)!=1){ -for(i in 2:length(par2)){ -geno <- geno %>% mutate(parent2 = ifelse(parent2==!!sym(par2[i]),parent2,"N")) -} -} -print(geno) -geno <- geno %>% select(marker,parent1,parent2) -colnames(geno) <- c("marker",name1,name2) -#merge with ref file -ref <- full_join(ref,geno,by=c("marker"="marker")) -return(ref) +tab <- tab %>% select(all_of(names),exclude_prop) +return(tab) } +mark_prop(test,cross="N2",pval=0.05) +mark_prop(test,cross="N2",pval=0.05) +mark_prop <- function(tab,cross,homo=NA,hetero=NA,pval=NA,na=0.5){ +#calculate total number of individuals genotyped for each marker +tab <- tab %>% mutate(n_geno = (n_HM1 + n_HM2 + n_HT)) +#stop if cross != "F2" or "N2" +if(!cross %in% c("F2","N2")){ +stop("Cross must be F2 or N2") +} +#stop of homo&hetero or pval not specified +if((is.na(homo)==TRUE | is.na(hetero)==TRUE) & is.na(pval)==TRUE){ +stop("Arguments homo and hetero or argument pval must be specified") +} +#stop with prop of na +#calculate proportion +tab <- tab %>% mutate(p_NA = n_NA/(n_geno+n_NA)) +tab <- tab %>% +mutate(exclude_prop=case_when(p_NA > na ~ 1, +T ~ 0)) +#stock colnames to join +names <- colnames(tab) +#exclude with prop of homo/hetero +if(is.na(pval)==TRUE){ +#calculate proportion of each genotype +tab <- tab %>% mutate(p_HM1 = n_HM1/n_geno) +tab <- tab %>% mutate(p_HM2 = n_HM2/n_geno) +tab <- tab %>% mutate(p_HT = n_HT/n_geno) +tab <- tab %>% +mutate(exclude_prop=case_when(p_NA > na ~ 1, +cross=="F2" & (p_HM1 < homo | p_HM2 < homo | p_HT < hetero) ~ 1, +cross=="N2" & (p_HM1 < homo | p_HT < hetero) ~ 1, +T ~ exclude_prop +)) +#exclude with pval chisq.test +} else if(is.na(pval)==FALSE){ +#if cross F2 +if(cross=="F2"){ +tab <- tab %>% filter(p_NA != 1) %>% rowwise() %>% +mutate(.,chi_pval = tibble(n_HM1,n_HM2,n_HT) %>% +chisq.test(p=c(0.25,0.25,0.5)) %>% .$p.value) %>% +full_join(.,tab,by=all_of(names)) +tab <- tab %>% mutate(exclude_prop=case_when(chi_pval < pval ~ 1, +T ~ exclude_prop)) +#if cross N2 +} else if(cross=="N2"){ +tab <- tab %>% filter(p_NA != 1) %>% rowwise() %>% +mutate(.,chi_pval = tibble(n_HM1,n_HT) %>% +chisq.test(p=c(0.5,0.5)) %>% .$p.value) %>% +full_join(.,tab,by=all_of(names)) +tab <- tab %>% mutate(exclude_prop=case_when(chi_pval < pval ~ 1, +T ~ exclude_prop)) +} +} +tab <- tab %>% select(all_of(names)) %>% select(-c(n_geno,p_NA)) +return(tab) +} +mark_prop(test,cross="N2",pval=0.05) +test <- tibble(marker=c("mark1","mark2","mark3","mark4"),allele_1=c("A","G","T","C"),allele_2=c("T","T","A","G"),n_HM1=c(46,43,1,26),n_HM2=c(0,0,0,0),n_HT=c(50,55,48,74),n_NA=c(4,2,1,0)) +mark_prop(test,cross="N2",pval=0.05) knitr::opts_chunk$set( collapse = TRUE, comment = "#>" @@ -457,56 +486,27 @@ data(genos) summary(genos) data(phenos) summary(phenos) -strains <- geno_strains(ref=annot_mini,geno=genos,par1=c("StrainsA_1","StrainsA_2"),par2=c("StrainsB_1","StrainsB_2"),name1="parent1",name2="parent2") -devtools::build(path=".",vignettes=FALSE) -devtools::build_vignettes() -devtools::build_manual(path=".") -devtools::build(path=".",vignettes=FALSE) -devtools::build_vignettes() -devtools::build_manual(path=".") -devtools::build(path=".",vignettes=FALSE) -install.packages("KableExtra") -install.packages("kableExtra") -install.packages("kableExtra") -library(dplyr) -library(stuart) -print.data.frame(tab2 %>% filter(exclude_match==1)) -knitr::opts_chunk$set( -collapse = TRUE, -comment = "#>" -) -library(dplyr) -library(stuart) -annot_mini <- read.csv(url("https://raw.githubusercontent.com/kbroman/MUGAarrays/master/UWisc/mini_uwisc_v2.csv")) -data(genos) -summary(genos) -data(phenos) -summary(phenos) -strains <- geno_strains(ref=annot_mini,geno=genos,par1=c("StrainsA_1","StrainsA_2"),par2=c("StrainsB_1","StrainsB_2"),name1="parent1",name2="parent2") -print.data.frame(head(strains)) -genos <- genos %>% filter(!Sample.ID %in% c("StrainsA_1", "StrainsA_2", "StrainsB_1","StrainsB_2")) +strains <- geno_strains(ref=annot_mini,geno=genos,par1=c("StrainsA_1","StrainsA_2"), +par2=c("StrainsB_1","StrainsB_2"),name1="parent1",name2="parent2") +head(strains) %>% print.data.frame() +genos <- genos %>% filter(!Sample.ID %in% c("StrainsA_1", "StrainsA_2", +"StrainsB_1","StrainsB_2")) data(stuart_tab) summary(stuart_tab) tab2 <- mark_match(stuart_tab,ref=strains) -print.data.frame(tab2 %>% filter(exclude_match==1)) +tab2 %>% filter(exclude_match==1) %>% print.data.frame() tab2 <- mark_poly(tab2) -head(tab2) +head(tab2) %>% print.data.frame() tab2 <- mark_prop(tab2,cross="F2",homo=0.1,hetero=0.1) -print.data.frame(head(tab2)) -tab2 %>% arrange(desc(exclude_allele)) %>% head() %>% print.data.frame() +head(tab2) %>% print.data.frame() tab2 <- mark_allele(tab=tab2,ref=strains,par1="parent1",par2="parent2") tab2 %>% arrange(desc(exclude_allele)) %>% head() %>% print.data.frame() -head(tab2) %>% print.data.frame() -head(tab2) %>% print.data.frame() -rqtl_file[1:10,1:9] %>% print.data.frame() +strains %>% filter(marker %in% c("gJAX00038569","gJAX00425031","gUNC12245354", +"gUNC15530876","gUNC21555204","gUNC21596600")) %>% arrange(marker) %>% +select(marker,parent1,parent2) %>% print.data.frame() +rqtl_file <- write_rqtl(geno=genos,pheno=phenos,tab=tab2, +ref=strains,par1="parent1",par2="parent2",prefix="ind_",pos="cM_cox") +rqtl_file[1:10,1:7] %>% print.data.frame() devtools::build(path=".",vignettes=FALSE) devtools::build_vignettes() devtools::build_manual(path=".") -remove.packages("stuart") -devtools::install_gitlab(repo="mouselab/stuart",host="gitlab.pasteur.fr") -library(stuart) -View(annot_mini) -annot_mini %>% filter(is.na(chr)==FALSE) %>% filter(is.na(cM_cox)==TRUE) -devtools::build(path=".",vignettes=FALSE) -library(stuart) -write_rqtl diff --git a/DESCRIPTION b/DESCRIPTION index 357dd3f7e8873d30c903a0729bea40fd38767329..e06c81a5fd1af10cbbee799bb8017425c6854a1e 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: stuart Title: stuart -Version: 1.0.2 +Version: 1.0.3 Authors@R: person(given = "Marie", family = "Bourdon", @@ -18,7 +18,8 @@ Imports: tidyr, utils, stringr, - rapportools + rapportools, + tidyselect Suggests: knitr, rmarkdown, diff --git a/R/mark_prop.R b/R/mark_prop.R index a7b25081702197260eae3f8d3716411e42721528..a632fe9ac853ee95d5cff21faf5c8b5e0ac2e2c7 100755 --- a/R/mark_prop.R +++ b/R/mark_prop.R @@ -8,6 +8,7 @@ #' @param na proportion of non-genotyped individuals above which the marker is excluded. #' #' @import dplyr +#' @import tidyselect #' #' @export #' diff --git a/stuart_1.0.2.pdf b/stuart_1.0.3.pdf similarity index 87% rename from stuart_1.0.2.pdf rename to stuart_1.0.3.pdf index c562000e015a9cf6a5899dc22fe8c2cf84b0f264..99e4846aa744f2f136e60092bbac128796724e7d 100644 Binary files a/stuart_1.0.2.pdf and b/stuart_1.0.3.pdf differ diff --git a/stuart_1.0.2.tar.gz b/stuart_1.0.3.tar.gz similarity index 85% rename from stuart_1.0.2.tar.gz rename to stuart_1.0.3.tar.gz index add13c08183fe149b5af608164bf7a1c3a198c90..c3a83f5cd6fea2e9cf92ae06f08ea3cd74cc785a 100644 Binary files a/stuart_1.0.2.tar.gz and b/stuart_1.0.3.tar.gz differ