diff --git a/.Rhistory b/.Rhistory index 5bb5b6edfd0467a2548107e48d9335f20e7bf051..d0bc336f7bb93c2ded73cb353d9573672bdd2351 100644 --- a/.Rhistory +++ b/.Rhistory @@ -1,14 +1,11 @@ -} -test_tab <- tab2[1:20,] -tab3 <- mark_prop(test_tab,cross="F2",pval=0.05) -mark_prop <- function(tab,cross,homo=NA,hetero=NA,pval=NA,na=0.5){ -#calculate total number of individuals genotyped for each marker tab <- tab %>% mutate(n_geno = (n_HM1 + n_HM2 + n_HT)) #calculate proportion of each genotype tab <- tab %>% mutate(p_HM1 = n_HM1/n_geno) tab <- tab %>% mutate(p_HM2 = n_HM2/n_geno) tab <- tab %>% mutate(p_HT = n_HT/n_geno) tab <- tab %>% mutate(p_NA = n_NA/(n_geno+n_NA)) +#stock colnames to join +names <- colnames(tab) #stop if cross != "F2" or "N2" if(!cross %in% c("F2","N2")){ stop("Cross must be F2 or N2") @@ -21,7 +18,7 @@ stop("Arguments homo and hetero or argument pval must be specified") tab <- tab %>% mutate(exclude_prop=case_when(p_NA > na ~ 1, T ~ 0)) -#stop with prop of homo/hetero +#exclude with prop of homo/hetero if(is.na(pval)==TRUE){ tab <- tab %>% mutate(exclude_prop=case_when(p_NA > na ~ 1, @@ -30,34 +27,22 @@ cross=="N2" & (p_HM1 < homo | p_HT < hetero) ~ 1, T ~ exclude_prop )) } -#stop with pval chisq.test +#exclude with pval chisq.test +## NEED TO ADD THIS FILTER IF CROSS = N2 if(is.na(pval)==FALSE){ tab <- tab %>% filter(p_NA != 1) %>% rowwise() %>% mutate(.,chi_pval = tibble(n_HM1,n_HM2,n_HT) %>% chisq.test(p=c(0.25,0.25,0.5)) %>% .$p.value) %>% -full_join(.,tab) +full_join(.,tab,by=all_of(names)) +tab <- tab %>% mutate(exclude_prop=case_when(chi_pval < pval ~ 1, +T ~ exclude_prop)) } print(tab) -# #stop if no value for "homo" for F2 cross -# if(cross=="F2" & (is.na(homo)==TRUE | is.na(hetero)==TRUE)){ -# stop("Arguments homo and hetero must be specified for F2 crosses") -# } -# -# #stop if no value for "homo" and "hetero" for N2 cross -# if(cross=="N2" & (is.na(homo)==TRUE | is.na(hetero)==TRUE)){ -# stop("Arguments homo and hetero must be specified for N2 crosses") -# } -# #exclude markers according to proportion of na -# tab <- tab %>% mutate(exclude_prop=case_when(p_NA > na ~ 1, #exclude markers according to proportion of na -# cross=="F2" & (p_HM1 < homo | p_HM2 < homo | p_HT < hetero) ~ 1, #exclude markers according to proportion of homozygous individuals for F2 cross -# cross=="N2" & (p_HM1 < homo | p_HT < hetero) ~ 1, #exclude markers according to proportion of homozygous and heterozygous individuals for N2 cross -# T ~ 0)) -# tab <- tab %>% select(-c(p_HM1,p_HM2,p_HT,p_NA,n_geno)) -# return(tab) -} -test_tab <- tab2[1:20,] -tab3 <- mark_prop(test_tab,cross="F2",pval=0.05) -View(tab3) +tab <- tab %>% select(all_of(names)) +print(tab) +} +tab2 <- mark_prop(tab2,cross="F2",homo=0.1,hetero=0.1) +head(tab2) mark_prop <- function(tab,cross,homo=NA,hetero=NA,pval=NA,na=0.5){ #calculate total number of individuals genotyped for each marker tab <- tab %>% mutate(n_geno = (n_HM1 + n_HM2 + n_HT)) @@ -66,6 +51,8 @@ tab <- tab %>% mutate(p_HM1 = n_HM1/n_geno) tab <- tab %>% mutate(p_HM2 = n_HM2/n_geno) tab <- tab %>% mutate(p_HT = n_HT/n_geno) tab <- tab %>% mutate(p_NA = n_NA/(n_geno+n_NA)) +#stock colnames to join +names <- colnames(tab) #stop if cross != "F2" or "N2" if(!cross %in% c("F2","N2")){ stop("Cross must be F2 or N2") @@ -78,7 +65,7 @@ stop("Arguments homo and hetero or argument pval must be specified") tab <- tab %>% mutate(exclude_prop=case_when(p_NA > na ~ 1, T ~ 0)) -#stop with prop of homo/hetero +#exclude with prop of homo/hetero if(is.na(pval)==TRUE){ tab <- tab %>% mutate(exclude_prop=case_when(p_NA > na ~ 1, @@ -87,43 +74,26 @@ cross=="N2" & (p_HM1 < homo | p_HT < hetero) ~ 1, T ~ exclude_prop )) } -#stop with pval chisq.test +#exclude with pval chisq.test +## NEED TO ADD THIS FILTER IF CROSS = N2 if(is.na(pval)==FALSE){ -#stock colnames to join -names <- colnames(tab) tab <- tab %>% filter(p_NA != 1) %>% rowwise() %>% mutate(.,chi_pval = tibble(n_HM1,n_HM2,n_HT) %>% chisq.test(p=c(0.25,0.25,0.5)) %>% .$p.value) %>% -full_join(.,tab,by=names) +full_join(.,tab,by=all_of(names)) +tab <- tab %>% mutate(exclude_prop=case_when(chi_pval < pval ~ 1, +T ~ exclude_prop)) } print(tab) -# #stop if no value for "homo" for F2 cross -# if(cross=="F2" & (is.na(homo)==TRUE | is.na(hetero)==TRUE)){ -# stop("Arguments homo and hetero must be specified for F2 crosses") -# } -# -# #stop if no value for "homo" and "hetero" for N2 cross -# if(cross=="N2" & (is.na(homo)==TRUE | is.na(hetero)==TRUE)){ -# stop("Arguments homo and hetero must be specified for N2 crosses") -# } -# #exclude markers according to proportion of na -# tab <- tab %>% mutate(exclude_prop=case_when(p_NA > na ~ 1, #exclude markers according to proportion of na -# cross=="F2" & (p_HM1 < homo | p_HM2 < homo | p_HT < hetero) ~ 1, #exclude markers according to proportion of homozygous individuals for F2 cross -# cross=="N2" & (p_HM1 < homo | p_HT < hetero) ~ 1, #exclude markers according to proportion of homozygous and heterozygous individuals for N2 cross -# T ~ 0)) -# tab <- tab %>% select(-c(p_HM1,p_HM2,p_HT,p_NA,n_geno)) -# return(tab) -} -test_tab <- tab2[1:20,] -tab3 <- mark_prop(test_tab,cross="F2",pval=0.05) +tab <- tab %>% select(all_of(names),exclude_prop) +print(tab) +} +tab2 <- mark_prop(tab2,cross="F2",homo=0.1,hetero=0.1) mark_prop <- function(tab,cross,homo=NA,hetero=NA,pval=NA,na=0.5){ #calculate total number of individuals genotyped for each marker tab <- tab %>% mutate(n_geno = (n_HM1 + n_HM2 + n_HT)) -#calculate proportion of each genotype -tab <- tab %>% mutate(p_HM1 = n_HM1/n_geno) -tab <- tab %>% mutate(p_HM2 = n_HM2/n_geno) -tab <- tab %>% mutate(p_HT = n_HT/n_geno) -tab <- tab %>% mutate(p_NA = n_NA/(n_geno+n_NA)) +#stock colnames to join +names <- colnames(tab) #stop if cross != "F2" or "N2" if(!cross %in% c("F2","N2")){ stop("Cross must be F2 or N2") @@ -136,8 +106,13 @@ stop("Arguments homo and hetero or argument pval must be specified") tab <- tab %>% mutate(exclude_prop=case_when(p_NA > na ~ 1, T ~ 0)) -#stop with prop of homo/hetero +#exclude with prop of homo/hetero if(is.na(pval)==TRUE){ +#calculate proportion of each genotype +tab <- tab %>% mutate(p_HM1 = n_HM1/n_geno) +tab <- tab %>% mutate(p_HM2 = n_HM2/n_geno) +tab <- tab %>% mutate(p_HT = n_HT/n_geno) +tab <- tab %>% mutate(p_NA = n_NA/(n_geno+n_NA)) tab <- tab %>% mutate(exclude_prop=case_when(p_NA > na ~ 1, cross=="F2" & (p_HM1 < homo | p_HM2 < homo | p_HT < hetero) ~ 1, @@ -145,43 +120,27 @@ cross=="N2" & (p_HM1 < homo | p_HT < hetero) ~ 1, T ~ exclude_prop )) } -#stop with pval chisq.test +#exclude with pval chisq.test +## NEED TO ADD THIS FILTER IF CROSS = N2 if(is.na(pval)==FALSE){ -#stock colnames to join -names <- colnames(tab) tab <- tab %>% filter(p_NA != 1) %>% rowwise() %>% mutate(.,chi_pval = tibble(n_HM1,n_HM2,n_HT) %>% chisq.test(p=c(0.25,0.25,0.5)) %>% .$p.value) %>% -full_join(.,tab,by=names) +full_join(.,tab,by=all_of(names)) +tab <- tab %>% mutate(exclude_prop=case_when(chi_pval < pval ~ 1, +T ~ exclude_prop)) } print(tab) -# #stop if no value for "homo" for F2 cross -# if(cross=="F2" & (is.na(homo)==TRUE | is.na(hetero)==TRUE)){ -# stop("Arguments homo and hetero must be specified for F2 crosses") -# } -# -# #stop if no value for "homo" and "hetero" for N2 cross -# if(cross=="N2" & (is.na(homo)==TRUE | is.na(hetero)==TRUE)){ -# stop("Arguments homo and hetero must be specified for N2 crosses") -# } -# #exclude markers according to proportion of na -# tab <- tab %>% mutate(exclude_prop=case_when(p_NA > na ~ 1, #exclude markers according to proportion of na -# cross=="F2" & (p_HM1 < homo | p_HM2 < homo | p_HT < hetero) ~ 1, #exclude markers according to proportion of homozygous individuals for F2 cross -# cross=="N2" & (p_HM1 < homo | p_HT < hetero) ~ 1, #exclude markers according to proportion of homozygous and heterozygous individuals for N2 cross -# T ~ 0)) -tab <- tab %>% select(-c(p_HM1,p_HM2,p_HT,p_NA,n_geno)) -return(tab) -} -test_tab <- tab2[1:20,] -tab3 <- mark_prop(test_tab,cross="F2",pval=0.05) +tab <- tab %>% select(all_of(names),exclude_prop) +print(tab) +} +tab2 <- mark_prop(tab2,cross="F2",homo=0.1,hetero=0.1) mark_prop <- function(tab,cross,homo=NA,hetero=NA,pval=NA,na=0.5){ #calculate total number of individuals genotyped for each marker tab <- tab %>% mutate(n_geno = (n_HM1 + n_HM2 + n_HT)) -#calculate proportion of each genotype -tab <- tab %>% mutate(p_HM1 = n_HM1/n_geno) -tab <- tab %>% mutate(p_HM2 = n_HM2/n_geno) -tab <- tab %>% mutate(p_HT = n_HT/n_geno) -tab <- tab %>% mutate(p_NA = n_NA/(n_geno+n_NA)) +#stock colnames to join +names <- colnames(tab) +print(names) #stop if cross != "F2" or "N2" if(!cross %in% c("F2","N2")){ stop("Cross must be F2 or N2") @@ -194,8 +153,13 @@ stop("Arguments homo and hetero or argument pval must be specified") tab <- tab %>% mutate(exclude_prop=case_when(p_NA > na ~ 1, T ~ 0)) -#stop with prop of homo/hetero +#exclude with prop of homo/hetero if(is.na(pval)==TRUE){ +#calculate proportion of each genotype +tab <- tab %>% mutate(p_HM1 = n_HM1/n_geno) +tab <- tab %>% mutate(p_HM2 = n_HM2/n_geno) +tab <- tab %>% mutate(p_HT = n_HT/n_geno) +tab <- tab %>% mutate(p_NA = n_NA/(n_geno+n_NA)) tab <- tab %>% mutate(exclude_prop=case_when(p_NA > na ~ 1, cross=="F2" & (p_HM1 < homo | p_HM2 < homo | p_HT < hetero) ~ 1, @@ -203,43 +167,31 @@ cross=="N2" & (p_HM1 < homo | p_HT < hetero) ~ 1, T ~ exclude_prop )) } -#stop with pval chisq.test +#exclude with pval chisq.test +## NEED TO ADD THIS FILTER IF CROSS = N2 if(is.na(pval)==FALSE){ -#stock colnames to join -names <- colnames(tab) tab <- tab %>% filter(p_NA != 1) %>% rowwise() %>% mutate(.,chi_pval = tibble(n_HM1,n_HM2,n_HT) %>% chisq.test(p=c(0.25,0.25,0.5)) %>% .$p.value) %>% -full_join(.,tab,by=names) -} -# #stop if no value for "homo" for F2 cross -# if(cross=="F2" & (is.na(homo)==TRUE | is.na(hetero)==TRUE)){ -# stop("Arguments homo and hetero must be specified for F2 crosses") -# } -# -# #stop if no value for "homo" and "hetero" for N2 cross -# if(cross=="N2" & (is.na(homo)==TRUE | is.na(hetero)==TRUE)){ -# stop("Arguments homo and hetero must be specified for N2 crosses") -# } -# #exclude markers according to proportion of na -# tab <- tab %>% mutate(exclude_prop=case_when(p_NA > na ~ 1, #exclude markers according to proportion of na -# cross=="F2" & (p_HM1 < homo | p_HM2 < homo | p_HT < hetero) ~ 1, #exclude markers according to proportion of homozygous individuals for F2 cross -# cross=="N2" & (p_HM1 < homo | p_HT < hetero) ~ 1, #exclude markers according to proportion of homozygous and heterozygous individuals for N2 cross -# T ~ 0)) -tab <- tab %>% select(-c(p_HM1,p_HM2,p_HT,p_NA,n_geno)) -return(tab) -} -test_tab <- tab2[1:20,] -tab3 <- mark_prop(test_tab,cross="F2",pval=0.05) -tab3 <- mark_prop(tab2,cross="F2",pval=0.05) +full_join(.,tab,by=all_of(names)) +tab <- tab %>% mutate(exclude_prop=case_when(chi_pval < pval ~ 1, +T ~ exclude_prop)) +} +print(tab) +tab <- tab %>% select(all_of(names),exclude_prop) +print(tab) +} +tab2 <- mark_prop(tab2,cross="F2",homo=0.1,hetero=0.1) +tab2 <- mark_match(stuart_tab,ref=strains) +tab2 %>% filter(exclude_match==1) +tab2 <- mark_poly(tab2) +head(tab2) mark_prop <- function(tab,cross,homo=NA,hetero=NA,pval=NA,na=0.5){ #calculate total number of individuals genotyped for each marker tab <- tab %>% mutate(n_geno = (n_HM1 + n_HM2 + n_HT)) -#calculate proportion of each genotype -tab <- tab %>% mutate(p_HM1 = n_HM1/n_geno) -tab <- tab %>% mutate(p_HM2 = n_HM2/n_geno) -tab <- tab %>% mutate(p_HT = n_HT/n_geno) -tab <- tab %>% mutate(p_NA = n_NA/(n_geno+n_NA)) +#stock colnames to join +names <- colnames(tab) +print(names) #stop if cross != "F2" or "N2" if(!cross %in% c("F2","N2")){ stop("Cross must be F2 or N2") @@ -252,8 +204,13 @@ stop("Arguments homo and hetero or argument pval must be specified") tab <- tab %>% mutate(exclude_prop=case_when(p_NA > na ~ 1, T ~ 0)) -#stop with prop of homo/hetero +#exclude with prop of homo/hetero if(is.na(pval)==TRUE){ +#calculate proportion of each genotype +tab <- tab %>% mutate(p_HM1 = n_HM1/n_geno) +tab <- tab %>% mutate(p_HM2 = n_HM2/n_geno) +tab <- tab %>% mutate(p_HT = n_HT/n_geno) +tab <- tab %>% mutate(p_NA = n_NA/(n_geno+n_NA)) tab <- tab %>% mutate(exclude_prop=case_when(p_NA > na ~ 1, cross=="F2" & (p_HM1 < homo | p_HM2 < homo | p_HT < hetero) ~ 1, @@ -261,43 +218,27 @@ cross=="N2" & (p_HM1 < homo | p_HT < hetero) ~ 1, T ~ exclude_prop )) } -#stop with pval chisq.test +#exclude with pval chisq.test +## NEED TO ADD THIS FILTER IF CROSS = N2 if(is.na(pval)==FALSE){ -#stock colnames to join -names <- colnames(tab) tab <- tab %>% filter(p_NA != 1) %>% rowwise() %>% mutate(.,chi_pval = tibble(n_HM1,n_HM2,n_HT) %>% chisq.test(p=c(0.25,0.25,0.5)) %>% .$p.value) %>% -full_join(.,tab,by=names) +full_join(.,tab,by=all_of(names)) tab <- tab %>% mutate(exclude_prop=case_when(chi_pval < pval ~ 1, T ~ exclude_prop)) } -# #stop if no value for "homo" for F2 cross -# if(cross=="F2" & (is.na(homo)==TRUE | is.na(hetero)==TRUE)){ -# stop("Arguments homo and hetero must be specified for F2 crosses") -# } -# -# #stop if no value for "homo" and "hetero" for N2 cross -# if(cross=="N2" & (is.na(homo)==TRUE | is.na(hetero)==TRUE)){ -# stop("Arguments homo and hetero must be specified for N2 crosses") -# } -# #exclude markers according to proportion of na -# tab <- tab %>% mutate(exclude_prop=case_when(p_NA > na ~ 1, #exclude markers according to proportion of na -# cross=="F2" & (p_HM1 < homo | p_HM2 < homo | p_HT < hetero) ~ 1, #exclude markers according to proportion of homozygous individuals for F2 cross -# cross=="N2" & (p_HM1 < homo | p_HT < hetero) ~ 1, #exclude markers according to proportion of homozygous and heterozygous individuals for N2 cross -# T ~ 0)) -tab <- tab %>% select(-c(p_HM1,p_HM2,p_HT,p_NA,n_geno)) -return(tab) -} -tab3 <- mark_prop(tab2,cross="F2",pval=0.05) +print(tab) +tab <- tab %>% select(all_of(names),exclude_prop) +print(tab) +} +tab2 <- mark_prop(tab2,cross="F2",homo=0.1,hetero=0.1) mark_prop <- function(tab,cross,homo=NA,hetero=NA,pval=NA,na=0.5){ #calculate total number of individuals genotyped for each marker tab <- tab %>% mutate(n_geno = (n_HM1 + n_HM2 + n_HT)) -#calculate proportion of each genotype -tab <- tab %>% mutate(p_HM1 = n_HM1/n_geno) -tab <- tab %>% mutate(p_HM2 = n_HM2/n_geno) -tab <- tab %>% mutate(p_HT = n_HT/n_geno) -tab <- tab %>% mutate(p_NA = n_NA/(n_geno+n_NA)) +#stock colnames to join +names <- colnames(tab) +print(names) #stop if cross != "F2" or "N2" if(!cross %in% c("F2","N2")){ stop("Cross must be F2 or N2") @@ -307,11 +248,17 @@ if((is.na(homo)==TRUE | is.na(hetero)==TRUE) & is.na(pval)==TRUE){ stop("Arguments homo and hetero or argument pval must be specified") } #stop with prop of na +#calculate proportion +tab <- tab %>% mutate(p_NA = n_NA/(n_geno+n_NA)) tab <- tab %>% mutate(exclude_prop=case_when(p_NA > na ~ 1, T ~ 0)) -#stop with prop of homo/hetero +#exclude with prop of homo/hetero if(is.na(pval)==TRUE){ +#calculate proportion of each genotype +tab <- tab %>% mutate(p_HM1 = n_HM1/n_geno) +tab <- tab %>% mutate(p_HM2 = n_HM2/n_geno) +tab <- tab %>% mutate(p_HT = n_HT/n_geno) tab <- tab %>% mutate(exclude_prop=case_when(p_NA > na ~ 1, cross=="F2" & (p_HM1 < homo | p_HM2 < homo | p_HT < hetero) ~ 1, @@ -319,43 +266,27 @@ cross=="N2" & (p_HM1 < homo | p_HT < hetero) ~ 1, T ~ exclude_prop )) } -#stop with pval chisq.test +#exclude with pval chisq.test +## NEED TO ADD THIS FILTER IF CROSS = N2 if(is.na(pval)==FALSE){ -#stock colnames to join -names <- colnames(tab) tab <- tab %>% filter(p_NA != 1) %>% rowwise() %>% mutate(.,chi_pval = tibble(n_HM1,n_HM2,n_HT) %>% chisq.test(p=c(0.25,0.25,0.5)) %>% .$p.value) %>% -full_join(.,tab,by=names) +full_join(.,tab,by=all_of(names)) tab <- tab %>% mutate(exclude_prop=case_when(chi_pval < pval ~ 1, T ~ exclude_prop)) } -# #stop if no value for "homo" for F2 cross -# if(cross=="F2" & (is.na(homo)==TRUE | is.na(hetero)==TRUE)){ -# stop("Arguments homo and hetero must be specified for F2 crosses") -# } -# -# #stop if no value for "homo" and "hetero" for N2 cross -# if(cross=="N2" & (is.na(homo)==TRUE | is.na(hetero)==TRUE)){ -# stop("Arguments homo and hetero must be specified for N2 crosses") -# } -# #exclude markers according to proportion of na -# tab <- tab %>% mutate(exclude_prop=case_when(p_NA > na ~ 1, #exclude markers according to proportion of na -# cross=="F2" & (p_HM1 < homo | p_HM2 < homo | p_HT < hetero) ~ 1, #exclude markers according to proportion of homozygous individuals for F2 cross -# cross=="N2" & (p_HM1 < homo | p_HT < hetero) ~ 1, #exclude markers according to proportion of homozygous and heterozygous individuals for N2 cross -# T ~ 0)) -tab <- tab %>% select(-c(p_HM1,p_HM2,p_HT,p_NA,n_geno,chi_pval)) -return(tab) -} -tab3 <- mark_prop(tab2,cross="F2",pval=0.05) +print(tab) +tab <- tab %>% select(all_of(names),exclude_prop) +print(tab) +} +tab2 <- mark_prop(tab2,cross="F2",homo=0.1,hetero=0.1) mark_prop <- function(tab,cross,homo=NA,hetero=NA,pval=NA,na=0.5){ +#stock colnames to join +names <- colnames(tab) +print(names) #calculate total number of individuals genotyped for each marker tab <- tab %>% mutate(n_geno = (n_HM1 + n_HM2 + n_HT)) -#calculate proportion of each genotype -tab <- tab %>% mutate(p_HM1 = n_HM1/n_geno) -tab <- tab %>% mutate(p_HM2 = n_HM2/n_geno) -tab <- tab %>% mutate(p_HT = n_HT/n_geno) -tab <- tab %>% mutate(p_NA = n_NA/(n_geno+n_NA)) #stop if cross != "F2" or "N2" if(!cross %in% c("F2","N2")){ stop("Cross must be F2 or N2") @@ -365,11 +296,17 @@ if((is.na(homo)==TRUE | is.na(hetero)==TRUE) & is.na(pval)==TRUE){ stop("Arguments homo and hetero or argument pval must be specified") } #stop with prop of na +#calculate proportion +tab <- tab %>% mutate(p_NA = n_NA/(n_geno+n_NA)) tab <- tab %>% mutate(exclude_prop=case_when(p_NA > na ~ 1, T ~ 0)) -#stop with prop of homo/hetero +#exclude with prop of homo/hetero if(is.na(pval)==TRUE){ +#calculate proportion of each genotype +tab <- tab %>% mutate(p_HM1 = n_HM1/n_geno) +tab <- tab %>% mutate(p_HM2 = n_HM2/n_geno) +tab <- tab %>% mutate(p_HT = n_HT/n_geno) tab <- tab %>% mutate(exclude_prop=case_when(p_NA > na ~ 1, cross=="F2" & (p_HM1 < homo | p_HM2 < homo | p_HT < hetero) ~ 1, @@ -377,26 +314,30 @@ cross=="N2" & (p_HM1 < homo | p_HT < hetero) ~ 1, T ~ exclude_prop )) } -#stop with pval chisq.test +#exclude with pval chisq.test +## NEED TO ADD THIS FILTER IF CROSS = N2 if(is.na(pval)==FALSE){ -#stock colnames to join -names <- colnames(tab) tab <- tab %>% filter(p_NA != 1) %>% rowwise() %>% mutate(.,chi_pval = tibble(n_HM1,n_HM2,n_HT) %>% chisq.test(p=c(0.25,0.25,0.5)) %>% .$p.value) %>% -full_join(.,tab,by=names) +full_join(.,tab,by=all_of(names)) tab <- tab %>% mutate(exclude_prop=case_when(chi_pval < pval ~ 1, T ~ exclude_prop)) } -T ~ 0)) +print(tab) +tab <- tab %>% select(all_of(names),exclude_prop) +print(tab) +} +tab2 <- mark_match(stuart_tab,ref=strains) +tab2 %>% filter(exclude_match==1) +tab2 <- mark_poly(tab2) +head(tab2) mark_prop <- function(tab,cross,homo=NA,hetero=NA,pval=NA,na=0.5){ +#stock colnames to join +names <- colnames(tab) +print(names) #calculate total number of individuals genotyped for each marker tab <- tab %>% mutate(n_geno = (n_HM1 + n_HM2 + n_HT)) -#calculate proportion of each genotype -tab <- tab %>% mutate(p_HM1 = n_HM1/n_geno) -tab <- tab %>% mutate(p_HM2 = n_HM2/n_geno) -tab <- tab %>% mutate(p_HT = n_HT/n_geno) -tab <- tab %>% mutate(p_NA = n_NA/(n_geno+n_NA)) #stop if cross != "F2" or "N2" if(!cross %in% c("F2","N2")){ stop("Cross must be F2 or N2") @@ -406,11 +347,17 @@ if((is.na(homo)==TRUE | is.na(hetero)==TRUE) & is.na(pval)==TRUE){ stop("Arguments homo and hetero or argument pval must be specified") } #stop with prop of na +#calculate proportion +tab <- tab %>% mutate(p_NA = n_NA/(n_geno+n_NA)) tab <- tab %>% mutate(exclude_prop=case_when(p_NA > na ~ 1, T ~ 0)) -#stop with prop of homo/hetero +#exclude with prop of homo/hetero if(is.na(pval)==TRUE){ +#calculate proportion of each genotype +tab <- tab %>% mutate(p_HM1 = n_HM1/n_geno) +tab <- tab %>% mutate(p_HM2 = n_HM2/n_geno) +tab <- tab %>% mutate(p_HT = n_HT/n_geno) tab <- tab %>% mutate(exclude_prop=case_when(p_NA > na ~ 1, cross=="F2" & (p_HM1 < homo | p_HM2 < homo | p_HT < hetero) ~ 1, @@ -418,21 +365,22 @@ cross=="N2" & (p_HM1 < homo | p_HT < hetero) ~ 1, T ~ exclude_prop )) } -#stop with pval chisq.test +#exclude with pval chisq.test +## NEED TO ADD THIS FILTER IF CROSS = N2 if(is.na(pval)==FALSE){ -#stock colnames to join -names <- colnames(tab) tab <- tab %>% filter(p_NA != 1) %>% rowwise() %>% mutate(.,chi_pval = tibble(n_HM1,n_HM2,n_HT) %>% chisq.test(p=c(0.25,0.25,0.5)) %>% .$p.value) %>% -full_join(.,tab,by=names) +full_join(.,tab,by=all_of(names)) tab <- tab %>% mutate(exclude_prop=case_when(chi_pval < pval ~ 1, T ~ exclude_prop)) } -tab <- tab %>% select(-c(p_HM1,p_HM2,p_HT,p_NA,n_geno,chi_pval)) -return(tab) +print(tab) +tab <- tab %>% select(all_of(names),exclude_prop) +print(tab) } -tab2 <- mark_prop(tab2,cross="F2",pval=0.05) +tab2 <- mark_prop(tab2,cross="F2",homo=0.1,hetero=0.1) +devtools::build(path=".",vignettes=FALSE) knitr::opts_chunk$set( collapse = TRUE, comment = "#>" @@ -453,20 +401,40 @@ tab2 <- mark_match(stuart_tab,ref=strains) tab2 %>% filter(exclude_match==1) tab2 <- mark_poly(tab2) head(tab2) -tab2 <- mark_prop(tab2,cross="F2",pval=0.05) -devtools::build(path=".",vignettes = FALSE) -devtools::build_vignettes() -devtools::build_vignettes() -devtools::build(path=".",vignettes = FALSE) -devtools::build_vignettes() -devtools::build(path=".",vignettes = FALSE) -devtools::build_vignettes() +tab2 <- mark_prop(tab2,cross="F2",homo=0.1,hetero=0.1) +head(tab2) +knitr::opts_chunk$set( +collapse = TRUE, +comment = "#>" +) library(dplyr) library(stuart) +annot_mini <- read.csv(url("https://raw.githubusercontent.com/kbroman/MUGAarrays/master/UWisc/mini_uwisc_v2.csv")) +data(genos) +summary(genos) +data(phenos) +summary(phenos) +strains <- geno_strains(ref=annot_mini,geno=genos,par1=c("StrainsA_1","StrainsA_2"),par2=c("StrainsB_1","StrainsB_2"),name1="parent1",name2="parent2") +head(strains) +genos <- genos %>% filter(!Sample.ID %in% c("StrainsA_1", "StrainsA_2", "StrainsB_1","StrainsB_2")) +data(stuart_tab) +summary(stuart_tab) +View(stuart_tab) +tab2 <- mark_match(stuart_tab,ref=strains) +tab2 %>% filter(exclude_match==1) +View(tab2) +tab2 <- mark_poly(tab2) +head(tab2) +View(tab2) +tab2 <- mark_prop(tab2,cross="F2",homo=0.1,hetero=0.1) +View(tab2) +library(stuart) knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) +library(dplyr) +library(stuart) annot_mini <- read.csv(url("https://raw.githubusercontent.com/kbroman/MUGAarrays/master/UWisc/mini_uwisc_v2.csv")) data(genos) summary(genos) @@ -483,30 +451,62 @@ tab2 <- mark_poly(tab2) head(tab2) tab2 <- mark_prop(tab2,cross="F2",homo=0.1,hetero=0.1) head(tab2) +View(tab2) tab2 <- mark_allele(tab=tab2,ref=strains,par1="parent1",par2="parent2") tab2 %>% arrange(desc(exclude_allele)) %>% head() strains %>% filter(marker %in% c("gJAX00038569","gJAX00425031","gUNC12245354","gUNC15530876","gUNC21555204","gUNC21596600")) %>% arrange(marker) %>% select(marker,parent1,parent2) +View(tab2) +strains %>% filter(marker %in% c("gJAX00038569","gJAX00425031","gUNC12245354","gUNC15530876","gUNC21555204","gUNC21596600")) %>% arrange(marker) %>% select(marker,parent1,parent2) rqtl_file <- write_rqtl(geno=genos,pheno=phenos,tab=tab2,ref=strains,par1="parent1",par2="parent2",prefix="ind_",pos="cM_cox") rqtl_file[1:10,1:7] devtools::build_vignettes() +devtools::build_manual() +build_manual +devtools::build_manual +Rd2pdf +tools::texi2pdf +devtools::build_manual() +devtools::build(path=".",vignettes=FALSE,manual=TRUE) +devtools::build_manual() +Rd2pdf() +Rd2pdf +install.packages("RdUtils") +RdUtils +RdUtils() +system("R CMD Rd2pdf stuart") +system("R CMD Rd2pdf stuaRt") +getwd() +cd .. +setwd("~/Documents/stuart_package/stuart") +setwd("~/Documents/stuart_package/stuart") +setwd("~/Documents/stuart_package") +system("R CMD Rd2pdf stuaRt") +system("R CMD Rd2pdf stuart") +system("R CMD Rd2pdf stuart") +setwd("~/Documents/stuart_package/stuart") +devtools::build_manual() +getwd() +devtools::build_manual(path=".") +devtools::build_manual(path=".") +View(genos) +devtools::build(path=".",vignettes=FALSE) devtools::build_vignettes() -devtools::build_vignettes() -devtools::build_vignettes() -devtools::build_vignettes() -devtools::build_vignettes() -demo() -install.packages("devtools") -install.packages("usethat") -install.packages("raportools") -install.packages("rapportools") -install.packages("roxygen2") -install.packages("testthat") -library(stuart) -library(dplyr) -library(stuart) -knitr::opts_chunk$set( -collapse = TRUE, -comment = "#>" -) +devtools::build_manual(path=".") +devtools::build_manual(path=".") +devtools::build(path=".",vignettes=FALSE) +devtools::build_manual(path=".") +roxygen2::roxygenise() +devtools::build_manual(path=".") library(dplyr) library(stuart) +test <- tibble(x=c("A","T"),y=c("A","A"),z=c("A","A")) +View(test) +View(test) +View(test) +View(test) +View(rqtl_file) +View(annot_mini) +View(genos) +View(genos) +View(genos) +View(test) diff --git a/.Rproj.user/shared/notebooks/paths b/.Rproj.user/shared/notebooks/paths index 8274802bed17cf345bd9ae5194f830b2d0379fe6..f7981f85816e8bfeabadadc7943a9d617abaeca3 100644 --- a/.Rproj.user/shared/notebooks/paths +++ b/.Rproj.user/shared/notebooks/paths @@ -1,5 +1,18 @@ +/home/marie/Documents/stuart_package/stuart/.Rbuildignore="739A4511" +/home/marie/Documents/stuart_package/stuart/.Rhistory="77548AEE" +/home/marie/Documents/stuart_package/stuart/.gitignore="05FA27A5" +/home/marie/Documents/stuart_package/stuart/DESCRIPTION="E286725B" +/home/marie/Documents/stuart_package/stuart/R/geno_strains.R="DCEFD103" +/home/marie/Documents/stuart_package/stuart/R/genos-data.R="33820B87" +/home/marie/Documents/stuart_package/stuart/R/mark_allele.R="64630E28" +/home/marie/Documents/stuart_package/stuart/R/mark_match.R="CBA3F514" +/home/marie/Documents/stuart_package/stuart/R/mark_poly.R="63E868BF" /home/marie/Documents/stuart_package/stuart/R/mark_prop.R="19C6446D" +/home/marie/Documents/stuart_package/stuart/R/phenos-data.R="75BCF577" +/home/marie/Documents/stuart_package/stuart/R/stuart_tab-data.R="9C18AF59" +/home/marie/Documents/stuart_package/stuart/R/tab_mark.R="60E85DC0" /home/marie/Documents/stuart_package/stuart/R/write_rqtl.R="D25FAC55" /home/marie/Documents/stuart_package/stuart/README.Rmd="C395B1B3" /home/marie/Documents/stuart_package/stuart/README.md="8BBA9900" +/home/marie/Documents/stuart_package/stuart/man/genos.Rd="383A8DC0" /home/marie/Documents/stuart_package/stuart/vignettes/stuaRt.Rmd="007031F6" diff --git a/R/geno_strains.R b/R/geno_strains.R index d4104d0f3af4780745e4939ce649d7e90e8c995e..3ebdc122afa068df6278a01e843b77cf66df0fa5 100755 --- a/R/geno_strains.R +++ b/R/geno_strains.R @@ -34,16 +34,18 @@ geno_strains <- function(ref,geno,par1,par2,name1,name2){ #create consensus + geno <- geno %>% mutate(parent1 = !!sym(par1[1])) if(length(par1)!=1){ - geno <- geno %>% mutate(parent1 = ifelse(!!sym(par1[1])==!!sym(par1[2]),!!sym(par1[1]),"N")) - } else { - geno <- geno %>% rename(parent1=!!sym(par1[1])) + for(i in 2:length(par1)){ + geno <- geno %>% mutate(parent1 = ifelse(parent1==!!sym(par1[i]),parent1,"N")) + } } + geno <- geno %>% mutate(parent2 = !!sym(par2[1])) if(length(par2)!=1){ - geno <- geno %>% mutate(parent2 = ifelse(!!sym(par2[1])==!!sym(par2[2]),!!sym(par2[1]),"N")) - } else { - geno <- geno %>% rename(parent2=!!sym(par2[1])) + for(i in 2:length(par2)){ + geno <- geno %>% mutate(parent2 = ifelse(parent2==!!sym(par2[i]),parent2,"N")) + } } geno <- geno %>% select(marker,parent1,parent2) diff --git a/R/mark_allele.R b/R/mark_allele.R index 1646ed5b30ec06af760e4cc03ae537448fb46770..83f4a70e835e097ce9c54f0e256860c043a4fabe 100755 --- a/R/mark_allele.R +++ b/R/mark_allele.R @@ -24,7 +24,6 @@ mark_allele <- function(tab,ref,par1,par2,parNH=TRUE){ ref <- ref %>% select(marker,!!sym(par1),!!sym(par2)) tab <- full_join(tab,ref,by=c("marker"="marker")) - print(parNH) #function core tab <- tab %>% mutate(exclude_allele = case_when(parNH==FALSE & (!!sym(par1) == "N" | !!sym(par2) == "N" | !!sym(par1) == "H" | !!sym(par2) == "H") ~ 1, @@ -43,8 +42,6 @@ mark_allele <- function(tab,ref,par1,par2,parNH=TRUE){ T ~ 0) ) - print(tab) - tab <- tab %>% select(-c(!!sym(par1),!!sym(par2))) return(tab) diff --git a/R/mark_prop.R b/R/mark_prop.R index 499ed92b1ca36c27647d2e77a5ec14b0195be7ec..3b0ff5d96bf7ef30104d589b40786ff2825f24ab 100755 --- a/R/mark_prop.R +++ b/R/mark_prop.R @@ -17,7 +17,6 @@ mark_prop <- function(tab,cross,homo=NA,hetero=NA,pval=NA,na=0.5){ #stock colnames to join names <- colnames(tab) - print(names) #calculate total number of individuals genotyped for each marker tab <- tab %>% mutate(n_geno = (n_HM1 + n_HM2 + n_HT)) @@ -71,7 +70,5 @@ mark_prop <- function(tab,cross,homo=NA,hetero=NA,pval=NA,na=0.5){ tab <- tab %>% mutate(exclude_prop=case_when(chi_pval < pval ~ 1, T ~ exclude_prop)) } - print(tab) tab <- tab %>% select(all_of(names),exclude_prop) - print(tab) } diff --git a/stuart.Rcheck/00_pkg_src/stuart/DESCRIPTION b/stuart.Rcheck/00_pkg_src/stuart/DESCRIPTION deleted file mode 100755 index 2b3476283fbba4a5b44e24460ae2db69c04a27f5..0000000000000000000000000000000000000000 --- a/stuart.Rcheck/00_pkg_src/stuart/DESCRIPTION +++ /dev/null @@ -1,23 +0,0 @@ -Package: stuart -Title: stuaRt -Version: 0.1.0 -Authors@R: - person(given = "Marie", - family = "Bourdon", - role = c("aut", "cre"), - email = "mariefbourdon@gmail.com", - comment = c(ORCID = "YOUR-ORCID-ID")) -Description: Sorts markers of miniMUGA genotyping for F2 or N2 individuals, for Rqtl analysis. -License: GPL-3 -Encoding: UTF-8 -LazyData: true -Roxygen: list(markdown = TRUE) -RoxygenNote: 7.1.1 -Imports: dplyr, tidyr, utils, stringr, rapportools -Suggests: knitr, rmarkdown -VignetteBuilder: knitr -Depends: R (>= 3.5.0) -NeedsCompilation: no -Packaged: 2021-06-01 08:06:30 UTC; mariebourdon -Author: Marie Bourdon [aut, cre] (YOUR-ORCID-ID) -Maintainer: Marie Bourdon <mariefbourdon@gmail.com> diff --git a/stuart.Rcheck/00_pkg_src/stuart/NAMESPACE b/stuart.Rcheck/00_pkg_src/stuart/NAMESPACE deleted file mode 100755 index 902e04361e27306cd36fac0de688e5a6ab80da2c..0000000000000000000000000000000000000000 --- a/stuart.Rcheck/00_pkg_src/stuart/NAMESPACE +++ /dev/null @@ -1,13 +0,0 @@ -# Generated by roxygen2: do not edit by hand - -export(geno_strains) -export(mark_allele) -export(mark_match) -export(mark_poly) -export(mark_prop) -export(tab_mark) -export(write_rqtl) -import(dplyr) -import(stringr) -import(tidyr) -import(utils) diff --git a/stuart.Rcheck/00_pkg_src/stuart/R/geno_strains.R b/stuart.Rcheck/00_pkg_src/stuart/R/geno_strains.R deleted file mode 100755 index a0ac113461536b3410adb9bc4e176c06600024a6..0000000000000000000000000000000000000000 --- a/stuart.Rcheck/00_pkg_src/stuart/R/geno_strains.R +++ /dev/null @@ -1,48 +0,0 @@ -#' @title Create haplotype for a new mouse strain into a reference dataframe -#' -#' @description This functions adds columns for parental strains used in the cross in the annotation data frame, from the genotype data frame in which one or several animal of the parental strains were genotyped. -#' If several animals of one strain were genotyped, a consensus is created from these animals. -#' The consensus is created as follow : if the indivuals carry the same allele, this allele is kept, otherwise, the allele is noted as "N". If individuals show residual heterozygosity, it is encoded as "H". -#' @param ref data frame with the reference genotypes of mouse lines -#' @param geno data frame with the genotyping results for your cross from miniMUGA array -#' @param par1 first parental strain used in the cross, the name must be written as in the geno data frame -#' @param par2 second parental strain used in the cross, the name must be written as in the geno data frame -#' @param name1 name of the first parental strain to use as the column name in the ref data frame -#' @param name2 name of the second parental strain to use as the column name in the ref data frame -#' -#' @import dplyr -#' @import tidyr -#' -#' @export -#' -geno_strains <- function(ref,geno,par1,par2,name1,name2){ - #recode genotypes from 2 alleles to 1 - geno <- geno %>% mutate_all(as.character) - geno <- geno %>% filter(Sample.ID %in% c(par1,par2)) - geno <- geno %>% mutate(Geno=case_when(Allele1...Forward == "-" | Allele2...Forward == "-" ~ "N", - Allele1...Forward == Allele2...Forward ~ Allele1...Forward, - Allele1...Forward %in% c("A","T","G","C") & Allele2...Forward %in% c("A","T","G","C") ~ "H")) - - geno <- geno %>% select(SNP.Name,Sample.ID,Geno) %>% pivot_wider(names_from = Sample.ID, values_from = Geno) - - - #create consensus - if(length(par1)!=1){ - geno <- geno %>% mutate(parent1 = ifelse(!!sym(par1[1])==!!sym(par1[2]),!!sym(par1[1]),"N")) - } else { - geno <- geno %>% rename(parent1=!!sym(par1[1])) - } - - if(length(par2)!=1){ - geno <- geno %>% mutate(parent2 = ifelse(!!sym(par2[1])==!!sym(par2[2]),!!sym(par2[1]),"N")) - } else { - geno <- geno %>% rename(parent2=!!sym(par2[1])) - } - - geno <- geno %>% select(SNP.Name,parent1,parent2) - colnames(geno) <- c("SNP.Name",name1,name2) - - #merge with ref file - ref <- full_join(ref,geno,by=c("marker"="SNP.Name")) - return(ref) -} diff --git a/stuart.Rcheck/00_pkg_src/stuart/R/genos-data.R b/stuart.Rcheck/00_pkg_src/stuart/R/genos-data.R deleted file mode 100755 index fd50eb0ea6a1188c1f619aa7510b6b276c9c2895..0000000000000000000000000000000000000000 --- a/stuart.Rcheck/00_pkg_src/stuart/R/genos-data.R +++ /dev/null @@ -1,8 +0,0 @@ -#' Data frame with miniMUGA genotyping of F2 individuals and parental strains -#' -#' A dataset containing the genotypes of 176 F2 individuals -#' -#' @format A data frame with 2002493 observations of 11 variables - - -"genos" diff --git a/stuart.Rcheck/00_pkg_src/stuart/R/mark_allele.R b/stuart.Rcheck/00_pkg_src/stuart/R/mark_allele.R deleted file mode 100755 index b679916993328de1dfd7b34b3325bf8066424b8e..0000000000000000000000000000000000000000 --- a/stuart.Rcheck/00_pkg_src/stuart/R/mark_allele.R +++ /dev/null @@ -1,46 +0,0 @@ -#' @title Exclude markers that have different alleles in the individuals of the cross and in parental strains -#' -#' @description This functions uses the dataframe produced by the tab_mark function and fills the "exclude" column for all the markers which have alleles observed in the individuals of the cross that do not correspond to the alleles observed in the parental strains. For example, a marker which is not polymorphic between the two parental strains but which has two alleles in the cross individuals will be excluded. -#' @param tab data frame obtained with tab_mark function -#' @param ref data frame with the reference genotypes of mouse lines -#' @param par1 first parental strain used in the cross, the name must be written as in the "ref" data frame -#' @param par2 second parental strain used in the cross, the name must be written as in the "ref" data frame -#' -#' @import dplyr -#' -#' @export -#' -mark_allele <- function(tab,ref,par1,par2){ - - #markers of ref df as characters - ref$marker <- as.character(ref$marker) - colnames(ref) <- make.names(colnames(ref)) - - #recode parents' names to match column names nomenclature - par1 <- make.names(par1) - par2 <- make.names(par2) - - #join tab and ref genotypes - ref <- ref %>% select(marker,!!sym(par1),!!sym(par2)) - tab <- full_join(tab,ref,by=c("SNP.Name"="marker")) - - #function core - tab <- tab %>% mutate(exclude_allele = case_when(is.na(Allele_2)==FALSE & - !!sym(par1) != "N" & !!sym(par2) != "N" & !!sym(par1) != "H" & !!sym(par2) != "H" & - ((Allele_1!=!!sym(par1) & Allele_1!=!!sym(par2)) | (Allele_2!=!!sym(par1) & Allele_2!=!!sym(par2))) ~ 1, - is.na(Allele_2)==FALSE & - (!!sym(par1)=="N" | !!sym(par2)=="N" | !!sym(par1)=="H" | !!sym(par2)=="H") & - ((Allele_1!=!!sym(par1) & Allele_1!=!!sym(par2)) & (Allele_2!=!!sym(par1) & Allele_2!=!!sym(par2))) ~ 1, - is.na(Allele_2)==TRUE & - !!sym(par1) != "N" & !!sym(par2) != "N" & !!sym(par1) != "H" & !!sym(par2) != "H" & - (Allele_1!=!!sym(par1) | Allele_1!=!!sym(par2)) ~ 1, - is.na(Allele_2)==TRUE & - (!!sym(par1)=="N" | !!sym(par2)=="N" | !!sym(par1)=="H" | !!sym(par2)=="H") & - Allele_1!=!!sym(par1) & Allele_1!=!!sym(par2) ~ 1, - T ~ 0) - ) - - tab <- tab %>% select(-c(!!sym(par1),!!sym(par2))) - - return(tab) -} diff --git a/stuart.Rcheck/00_pkg_src/stuart/R/mark_match.R b/stuart.Rcheck/00_pkg_src/stuart/R/mark_match.R deleted file mode 100755 index 8781242a7568eb4081bcd0170f5ff17a049f2358..0000000000000000000000000000000000000000 --- a/stuart.Rcheck/00_pkg_src/stuart/R/mark_match.R +++ /dev/null @@ -1,25 +0,0 @@ -#' @title Exclude markers that were not genotyped in the reference strains -#' -#' @description This functions uses the dataframe produced by the tab_mark function and fills the "exclude" column for all the markers that were genotyped in the individuals of the cross but not in the reference strains. This is useful if the parental strains of the cross were not genotyped with the individuals and a previous genotyping result is used. Indeed, changes in the markers of the array may have happened. We recommend always using this function in order to avoid errors. -#' @param tab data frame obtained with tab_mark function -#' @param ref data frame with the reference genotypes of mouse lines -#' -#' @import dplyr -#' -#' @export -#' -mark_match <- function(tab, #tab_mark df - ref){ #strain ref geno file - - #finds SNPs that are in both files: - snp_strains <- as.character(ref$marker) #extracts SNPs in strains ref geno file - snp_genfile <- as.character(tab$SNP.Name) #extracts SNPs in cross geno file - snp <- intersect(snp_strains,snp_genfile) #take intercept - - - #add results in exclude column - return(tab %>% mutate(exclude_match=ifelse(!SNP.Name %in% snp, - 1, - 0))) - -} diff --git a/stuart.Rcheck/00_pkg_src/stuart/R/mark_poly.R b/stuart.Rcheck/00_pkg_src/stuart/R/mark_poly.R deleted file mode 100755 index 168ccf3c35f28348477d6700238dc487666aae6a..0000000000000000000000000000000000000000 --- a/stuart.Rcheck/00_pkg_src/stuart/R/mark_poly.R +++ /dev/null @@ -1,13 +0,0 @@ -#' @title Exclude markers that are not polymorphic -#' -#' @description This functions uses the dataframe produced by the tab_mark function and fills the "exclude" column for all the markers that are not polymorphic. -#' @param tab data frame obtained with tab_mark function -#' -#' @import dplyr -#' -#' @export -mark_poly <- function(tab){ - return(tab %>% mutate(exclude_poly=ifelse(is.na(Allele_2)==TRUE, - 1, - 0))) -} diff --git a/stuart.Rcheck/00_pkg_src/stuart/R/mark_prop.R b/stuart.Rcheck/00_pkg_src/stuart/R/mark_prop.R deleted file mode 100755 index a0827e8c27b02c9d65a6cc3af34cf595f50a5840..0000000000000000000000000000000000000000 --- a/stuart.Rcheck/00_pkg_src/stuart/R/mark_prop.R +++ /dev/null @@ -1,50 +0,0 @@ -#' @title Exclude markers depending on proportions of homo/hetorozygous -#' -#' @description uses the dataframe produced by the tab_mark function and fills the "exclude" column for all the markers that present odd proportions of each genotype. You can define these proportions thanks to the arguments of the function. -#' @param tab data frame obtained with tab_mark function. -#' @param cross F2 or N2. -#' @param homo proportion of homozygous individuals under which the marker is excluded. Will apply on both homozygous genotypes for a F2, but only on one for N2. -#' @param hetero proportion of heterozygous individuals under which the marker is excluded. -#' @param na proportion of non-genotyped individuals above which the marker is excluded. -#' -#' @import dplyr -#' -#' @export -#' - -#### mark_prop #### -## excludes markers depending on proportions of homo/hetorozygous -mark_prop <- function(tab,cross,homo=NA,hetero=NA,na=0.5){ - #calculate total number of individuals genotyped for each marker - tab <- tab %>% mutate(n_geno = tab %>% select(n_HM1,n_HM2,n_HT) %>% rowSums(na.rm=TRUE)) - - #calculate proportion of each genotype - tab <- tab %>% mutate(p_HM1 = n_HM1/n_geno) - tab <- tab %>% mutate(p_HM2 = n_HM2/n_geno) - tab <- tab %>% mutate(p_HT = n_HT/n_geno) - tab <- tab %>% mutate(p_NA = n_NA/(n_geno+n_NA)) - - #stop if cross != "F2" or "N2" - if(!cross %in% c("F2","N2")){ - stop("Cross must be F2 or N2") - } - - #stop if no value for "homo" for F2 cross - if(cross=="F2" & (is.na(homo)==TRUE | is.na(hetero)==TRUE)){ - stop("Arguments homo and hetero must be specified for F2 crosses") - } - - #stop if no value for "homo" and "hetero" for N2 cross - if(cross=="N2" & (is.na(homo)==TRUE | is.na(hetero)==TRUE)){ - stop("Arguments homo and hetero must be specified for N2 crosses") - } - - #exclude markers according to proportion of na - tab <- tab %>% mutate(exclude_prop=case_when(p_NA > na ~ 1, #exclude markers according to proportion of na - cross=="F2" & (p_HM1 < homo | p_HM2 < homo | p_HT < hetero) ~ 1, #exclude markers according to proportion of homozygous individuals for F2 cross - cross=="N2" & (p_HM1 < homo | p_HT < hetero) ~ 1, #exclude markers according to proportion of homozygous and heterozygous individuals for N2 cross - T ~ 0)) - - tab <- tab %>% select(-c(p_HM1,p_HM2,p_HT,p_NA,n_geno)) - return(tab) -} diff --git a/stuart.Rcheck/00_pkg_src/stuart/R/phenos-data.R b/stuart.Rcheck/00_pkg_src/stuart/R/phenos-data.R deleted file mode 100755 index ae4ba0006a4c4eda85b75c15c796816894a06f79..0000000000000000000000000000000000000000 --- a/stuart.Rcheck/00_pkg_src/stuart/R/phenos-data.R +++ /dev/null @@ -1,8 +0,0 @@ -#' Data frame with phenotype of F2 individuals -#' -#' A dataset containing the phenotype of 176 F2 individuals -#' -#' @format A data frame with 176 observations of 4 variables - - -"phenos" diff --git a/stuart.Rcheck/00_pkg_src/stuart/R/ref_strains_mini-data.R b/stuart.Rcheck/00_pkg_src/stuart/R/ref_strains_mini-data.R deleted file mode 100755 index 3fdffa9ee1a5665666e7c7b9c25aa92d3d144cbf..0000000000000000000000000000000000000000 --- a/stuart.Rcheck/00_pkg_src/stuart/R/ref_strains_mini-data.R +++ /dev/null @@ -1,29 +0,0 @@ -#' Data frame with miniMUGA genotyping of classical lab strains. -#' -#' A dataset containing the genotypes of 10 mouse strains of the Institut pasteur. Markers positions and other information are from by Karl Broman (https://kbroman.org/MUGAarrays/mini_revisited.html). Strains genotyped from Institut Pasteur. -#' -#' @format A data frame with 11299 rows and 18 variables -#' \describe{ -#' \item{CC001}{CC001 mouse strain} -#' \item{CC005}{CC005 mouse strain} -#' \item{CC042}{CC042 mouse strain} -#' \item{CC071}{CC071 mouse strain} -#' \item{Ifnar.KO.129}{Ifnar KO 129 mouse strain} -#' \item{Ifnar.KO.B6}{Ifnar KO B6 mouse strain} -#' \item{Rvfs2.1}{Rvfs2-1 mouse strain} -#' \item{Rvfs2.2}{Rvfs2-2 mouse strain} -#' \item{Rvfs2.6}{Rvfs2-6 mouse strain} -#' \item{Rvfs2.7}{Rvfs2-7 mouse strain} -#' \item{marker}{name of the marker} -#' \item{chr}{chromosome} -#' \item{bp_mm10}{localisation on chromosome in bp (mm10 assembly)} -#' \item{cM_cox}{localisation on chromosome in cM (from Cox et al.)} -#' \item{cM_g2f1}{localisation on chromosome in cM (from Liu et al.)} -#' \item{snp}{marker alleles} -#' \item{unique}{indicates if the marker maps uniquely on mm10} -#' \item{multi}{indicates if the marker maps more than one time on mm10} -#' \item{unmapped}{indicates if the marker does not map perfectly on mm10} -#' } - - -"ref_strains_mini" diff --git a/stuart.Rcheck/00_pkg_src/stuart/R/stuart_tab-data.R b/stuart.Rcheck/00_pkg_src/stuart/R/stuart_tab-data.R deleted file mode 100644 index a5e0d4fb13d058c8b8147662389812fa7c300f28..0000000000000000000000000000000000000000 --- a/stuart.Rcheck/00_pkg_src/stuart/R/stuart_tab-data.R +++ /dev/null @@ -1,17 +0,0 @@ -#' Output of tab_mark function -#' -#' A dataset with the output of tab_mark() function. -#' -#' @format A data frame with 11125 rows and 7 variables -#' \describe{ -#' \item{SNP.Name}{name of the marker} -#' \item{Allele_1}{first allele of the marker} -#' \item{Allele_2}{second allele of the marker} -#' \item{n_HM1}{number of homozygous individuals for the first allele} -#' \item{n_HM2}{number of homozygous individuals for the second allele} -#' \item{n_HT}{number of heterozygous individuals} -#' \item{n_NA}{number of non genotyped individuals} -#' } - - -"stuart_tab" diff --git a/stuart.Rcheck/00_pkg_src/stuart/R/tab_mark.R b/stuart.Rcheck/00_pkg_src/stuart/R/tab_mark.R deleted file mode 100755 index 612c27db930cb54d481e09a9a7ab5efd4912420f..0000000000000000000000000000000000000000 --- a/stuart.Rcheck/00_pkg_src/stuart/R/tab_mark.R +++ /dev/null @@ -1,137 +0,0 @@ -#' @title Create of the summary table for all markers from the genotype data frame -#' -#' @description This function creates a table with all the markers that were genotyped in the array, the alleles for these markers, the number of homozygous and heterozygous animals, as well as the number of non genotyped animals. -#' @param geno data frame with the genotyping results for your cross -#' -#' @import dplyr -#' @import tidyr -#' -#' @export -#' - -#### tab_mark function #### -## create table with markers and counts -tab_mark <- function(geno){ - #create geno column in geno df - geno <- geno %>% unite(Geno,c("Allele1...Forward","Allele2...Forward"),sep="",remove=FALSE) - - #recode genotypes to have all heterozygous encoded the same way (ex: only "AT", no "TA") - geno <- geno %>% mutate(Geno=recode(Geno, - "TA" = "AT", - "GA" = "AG", - "CA" = "AC", - "GT" = "TG", - "CT" = "TC", - "GC" = "CG")) - - - #create df with counts for each genotype - df_count <- tibble(SNP.Name = as.character(unique(geno$SNP.Name)), - Allele_1 = NA, - Allele_2 = NA, - n_HM1 = NA, - n_HM2 = NA, - n_HT = NA, - n_NA = NA) - - - ## loop to count genotype - for(i in df_count$SNP.Name){ - #extract alleles for each marker - Alleles <- geno %>% filter(SNP.Name==i) %>% - select(c(SNP.Name,Sample.ID,Geno,Allele1...Forward,Allele2...Forward)) %>% - pivot_longer(c(Allele1...Forward,Allele2...Forward),names_to="Allele_name",values_to="Allele") %>% - distinct(Allele) %>% filter(Allele != "-") - Alleles <- as.factor(paste(Alleles$Allele)) - - #sort alleles - Alleles <- factor(Alleles,levels=c("A","T","C","G")) - Alleles <- sort(Alleles) - - #add alleles and counts, only for markers with alleles (not markers with no genotyped ind) - if(all(rapportools::is.empty(Alleles))==FALSE){ - - #add alleles to df_count - df_count <- df_count %>% mutate(Allele_1 = ifelse(SNP.Name == i, - paste(Alleles[1]), Allele_1)) - - - - #count for homozygous for allele 1 - n1 <- geno %>% filter(SNP.Name==i) %>% - filter(Geno == paste(Alleles[1],Alleles[1],sep="")) %>% - summarise(n=n()) - - - #add count for homozygous for allele 1 to df_count - df_count <- df_count %>% mutate(n_HM1 = ifelse(SNP.Name == i, - n1$n, n_HM1)) - - - } - - #if marker not polymorphic - if(is.na(Alleles[2])==TRUE){ - #NA as Allele_2 - df_count <- df_count %>% mutate(Allele_2 = ifelse(SNP.Name == i, - NA, Allele_2)) - - #NA as n_HM2 - df_count <- df_count %>% mutate(n_HM2 = ifelse(SNP.Name == i, - NA, n_HM2)) - - #NA as n_HT - df_count <- df_count %>% mutate(n_HT = ifelse(SNP.Name == i, - NA, n_HT)) - } else { - #add alleles to df_count - df_count <- df_count %>% mutate(Allele_2 = ifelse(SNP.Name == i, - paste(Alleles[2]), Allele_2)) - - - #count for homozygous for allele 2 - n2 <- geno %>% filter(SNP.Name==i) %>% - filter(Geno == paste(Alleles[2],Alleles[2],sep="")) %>% - summarise(n=n()) - - #add count for homozygous for allele 1 to df_count - df_count <- df_count %>% mutate(n_HM2 = ifelse(SNP.Name == i, - n2$n, n_HM2)) - - - #count for heterozygous - n3 <- geno %>% filter(SNP.Name==i) %>% - filter(Geno == paste(Alleles[1],Alleles[2],sep="")) %>% - summarise(n=n()) - - - #add count for homozygous for allele 1 to df_count - df_count <- df_count %>% mutate(n_HT = ifelse(SNP.Name == i, - n3$n, n_HT)) - - - } - - #count for NA - n4 <- geno %>% filter(SNP.Name==i) %>% - filter(Geno == "--" | - Geno == paste(Alleles[1],"-",sep="") | Geno == paste(Alleles[2],"-",sep="") | - Geno == paste("-",Alleles[1],sep="") | Geno == paste("-",Alleles[2],sep="")) %>% - summarise(n=n()) - - #add count for NA to df_count - df_count <- df_count %>% mutate(n_NA = ifelse(SNP.Name == i, - n4$n, n_NA)) - } - #change class of counts as numeric : - df_count$n_HM1 <- df_count$n_HM1 %>% as.numeric() - df_count$n_HM2 <- df_count$n_HM2 %>% as.numeric() - df_count$n_HT <- df_count$n_HT %>% as.numeric() - df_count$n_NA <- df_count$n_NA %>% as.numeric() - - #add 0 for null counts - df_count <- df_count %>% mutate_at(.vars=vars(n_HM1,n_HM2,n_HT,n_NA),~replace(., is.na(.), 0)) - - #return - return(df_count) -} diff --git a/stuart.Rcheck/00_pkg_src/stuart/R/write_rqtl.R b/stuart.Rcheck/00_pkg_src/stuart/R/write_rqtl.R deleted file mode 100755 index b02eb333df6c67886b4a8245fe181dab8e923155..0000000000000000000000000000000000000000 --- a/stuart.Rcheck/00_pkg_src/stuart/R/write_rqtl.R +++ /dev/null @@ -1,137 +0,0 @@ -#' @title Create data frame in Rqtl CSV format -#' -#' @description This function uses the table produced by tab_mark function filled by all the mark_* functions in order to create a data frame in the right format for Rqtl read.cross function. Only the non-excluded markers will be kept and genotypeds will be encoded in "0", "1" and "2", "0" being homozygous for the first parental strain, "1" heterozygous and "2" homozygous for the second parental strain. Caution, this file create a data frame and a CSV file in the path of your choice if indicated by the "path" argument. This function does not create a "cross" object in your environment that can be directly used for QTL mapping. You will need to load the CSV file with qtl::read.cross. -#' @param geno data frame with the genotyping results for your cross -#' @param pheno data frame with phenotypes of the individuals (individuals must have the same ID in the geno data frame and in the pheno data frame) -#' @param prefix potential prefix present in the names of the individuals in the geno data frame to be removed in ordere to have the same names as in the pheno file -#' @param tab data frame obtained with tab_mark function -#' @param ref data frame with the reference genotypes of mouse lines -#' @param par1 first parental strain used in the cross, the name must be written as in the "ref" data frame -#' @param par2 second parental strain used in the cross, the name must be written as in the "ref" data frame -#' @param method method of calculation of cM position, can be "cM_cox" of "cM_g2f1" -#' @param path if indicated, the data frame will be exported in this path -#' -#' @import dplyr -#' @import tidyr -#' @import utils -#' @import stringr -#' -#' @export -#' -#### write_rqtl #### -## write data frame in rqtl format (csv), if path != NA writes the file in the path indicated -write_rqtl <- function(geno,pheno,tab,ref,par1,par2,prefix,method="cM_cox",path=NA){ - #extract snps non excluded - if("exclude_match" %in% colnames(tab)){ - tab <- tab %>% filter(exclude_match==0) - } - - if("exclude_poly" %in% colnames(tab)){ - tab <- tab %>% filter(exclude_poly==0) - } - - if("exclude_prop" %in% colnames(tab)){ - tab <- tab %>% filter(exclude_prop==0) - } - - if("exclude_allele" %in% colnames(tab)){ - tab <- tab %>% filter(exclude_allele==0) - } - - - #filter genotypes for non excluded markers in geno file - geno <- geno %>% select(c(SNP.Name,Sample.ID,Allele1...Forward,Allele2...Forward)) %>% filter(SNP.Name %in% tab$SNP.Name) - - #recode parents' names to match column names nomenclature - par1 <- make.names(par1) - par2 <- make.names(par2) - - #keep parental lines genotypes - colnames(ref) <- make.names(colnames(ref)) - ref <- ref %>% select(marker,chr,bp_mm10,cM_cox,cM_g2f1,!!sym(par1),!!sym(par2)) - - #merge genotypes with parents - geno <- left_join(geno,ref,by=c("SNP.Name"="marker")) - - #recode "-" in "N" in geno file - geno <- geno %>% mutate(Allele1...Forward = recode(Allele1...Forward, - "-" = "N")) - - geno <- geno %>% mutate(Allele2...Forward = recode(Allele2...Forward, - "-" = "N")) - - #recode geno in factors with same levels - geno <- geno %>% mutate(Allele1...Forward = factor(Allele1...Forward,levels=c("A","C","G","H","N","T"))) - geno <- geno %>% mutate(Allele2...Forward = factor(Allele2...Forward,levels=c("A","C","G","H","N","T"))) - - - - #recode genotypes depending on parents' genotypes - geno <- geno %>% mutate(Geno = case_when( - #if one allele not genotyped: - Allele1...Forward=="N" | Allele2...Forward=="N" ~ "NA", - - #if both alleles genotyped - ##homozygous 0 - Allele1...Forward==Allele2...Forward & Allele1...Forward==!!sym(par1) ~ "0", - ##homozygous 2 - Allele1...Forward==Allele2...Forward & Allele1...Forward==!!sym(par2) ~ "2", - ##heterozygous - Allele1...Forward!=Allele2...Forward ~ "1", - - #if parental strains are N/H - ##homozygous for parent that is N/H - ###homozygous 0 - (!!sym(par1)%in%c("H","N") | !!sym(par2)%in%c("H","N")) & - !!sym(par1)%in%c("H","N") ~ "0", - ###homozygous 2 - (!!sym(par1)%in%c("H","N") | !!sym(par2)%in%c("H","N")) & - !!sym(par2)%in%c("H","N") ~ "2", - ) - ) - - - #keep positions of markers - markers <- geno %>% select(SNP.Name,chr,bp_mm10,cM_cox,cM_g2f1) %>% distinct() - markers <- markers %>% arrange(chr,bp_mm10) - - - #keep only interesting columns in geno file - geno <- geno %>% arrange(chr,bp_mm10) - geno <- geno %>% select(SNP.Name,Sample.ID,Geno) - - #remove prefix - geno <- geno %>% mutate(Sample.ID=str_remove(Sample.ID,prefix)) - - #keep only non excluded markers and merge with positions - markers <- markers %>% mutate(SNP.Name=as.character(SNP.Name)) - markers <- markers %>% mutate(chr=as.character(chr)) - geno <- markers %>% select(SNP.Name,chr,!!sym(method)) %>% full_join(.,geno,by="SNP.Name") - - - #pivoting - geno <- geno %>% pivot_wider(names_from = c(SNP.Name,chr,!!sym(method)),values_from = Geno,names_sep=",") - geno <- geno %>% mutate(Sample.ID=as.character(Sample.ID)) - geno <- geno %>% rename("Sample.ID,,"=Sample.ID) - - - #merge with phenotype file - pheno <- pheno %>% mutate_all(as.character) - colnames(pheno) <- str_c(colnames(pheno),",,") - qtl_file <- right_join(pheno,geno,by=c("Ind,,"="Sample.ID,,")) - - #prepare file - qtl_file <- rbind(colnames(qtl_file),qtl_file) - qtl_file <- separate_rows(qtl_file,everything(),sep=",") - colnames(qtl_file) <- qtl_file[1,] - qtl_file <- qtl_file %>% slice(-1) - - if(is.na(path)==FALSE){ - write.csv(qtl_file,file=path,quote=FALSE,row.names = FALSE) - } - - - - - return(qtl_file) -} diff --git a/stuart.Rcheck/00_pkg_src/stuart/README.md b/stuart.Rcheck/00_pkg_src/stuart/README.md deleted file mode 100644 index 2497f6e0824b3a3c72b4a108b6adfe55cce49a03..0000000000000000000000000000000000000000 --- a/stuart.Rcheck/00_pkg_src/stuart/README.md +++ /dev/null @@ -1,28 +0,0 @@ - -<!-- README.md is generated from README.Rmd. Please edit that file --> - -# stuart - -<!-- badges: start --> - -<!-- badges: end --> - -stuart is a R package used to analyze whole genome genotyping results of -animals used by crossing laboratory strains. It is particularly useful -for F2 or N2 individuals as it allows to filter the markers in the -arrays that can or cannot be used for further analysis from a genetic -point of view. Mrkers will be selected depending on their proportion of -each genotype, correspondance between F2 or N2 individuals alleles and -parental strains alleles, etc. - -## Installation - -You can install the released version of stuart from GitLab - -``` r -``` - -## Example - -To see an example of the use of stuart package with miniMUGA array, -consult the package vignette. diff --git a/stuart.Rcheck/00_pkg_src/stuart/build/vignette.rds b/stuart.Rcheck/00_pkg_src/stuart/build/vignette.rds deleted file mode 100644 index 87a9d6290223156fd3752ff37a7c170f33892ee0..0000000000000000000000000000000000000000 Binary files a/stuart.Rcheck/00_pkg_src/stuart/build/vignette.rds and /dev/null differ diff --git a/stuart.Rcheck/00_pkg_src/stuart/data/genos.rda b/stuart.Rcheck/00_pkg_src/stuart/data/genos.rda deleted file mode 100755 index 3b5ceecc40384e0c262904a26294f3568d06a691..0000000000000000000000000000000000000000 Binary files a/stuart.Rcheck/00_pkg_src/stuart/data/genos.rda and /dev/null differ diff --git a/stuart.Rcheck/00_pkg_src/stuart/data/phenos.rda b/stuart.Rcheck/00_pkg_src/stuart/data/phenos.rda deleted file mode 100755 index bf2f162aa773c8bcac47585a99b6d86a5ed33f92..0000000000000000000000000000000000000000 Binary files a/stuart.Rcheck/00_pkg_src/stuart/data/phenos.rda and /dev/null differ diff --git a/stuart.Rcheck/00_pkg_src/stuart/data/ref_strains_mini.rda b/stuart.Rcheck/00_pkg_src/stuart/data/ref_strains_mini.rda deleted file mode 100755 index e5673c4faec5cb1bc70625c468ebeba059709c02..0000000000000000000000000000000000000000 Binary files a/stuart.Rcheck/00_pkg_src/stuart/data/ref_strains_mini.rda and /dev/null differ diff --git a/stuart.Rcheck/00_pkg_src/stuart/data/stuart_tab.rda b/stuart.Rcheck/00_pkg_src/stuart/data/stuart_tab.rda deleted file mode 100644 index b8b8e8f4ec7299f686eb136dc11793732cee6dd5..0000000000000000000000000000000000000000 Binary files a/stuart.Rcheck/00_pkg_src/stuart/data/stuart_tab.rda and /dev/null differ diff --git a/stuart.Rcheck/00_pkg_src/stuart/inst/doc/stuaRt.R b/stuart.Rcheck/00_pkg_src/stuart/inst/doc/stuaRt.R deleted file mode 100644 index 71e57691d5f643cb3edb7e751d2c7641ee3b5f96..0000000000000000000000000000000000000000 --- a/stuart.Rcheck/00_pkg_src/stuart/inst/doc/stuaRt.R +++ /dev/null @@ -1,56 +0,0 @@ -## ---- include = FALSE--------------------------------------------------------- -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>" -) - -## ----setup-------------------------------------------------------------------- -library(dplyr) -library(stuart) - -## ----annot-------------------------------------------------------------------- -annot_mini <- read.csv(url("https://raw.githubusercontent.com/kbroman/MUGAarrays/master/UWisc/mini_uwisc_v2.csv")) - -## ----load--------------------------------------------------------------------- -data(genos) -summary(genos) -data(phenos) -summary(phenos) - -## ----strains------------------------------------------------------------------ -strains <- geno_strains(ref=annot_mini,geno=genos,par1=c("StrainsA_1","StrainsA_2"),par2=c("StrainsB_1","StrainsB_2"),name1="parent1",name2="parent2") -head(strains) - -## ----no_parent---------------------------------------------------------------- -genos <- genos %>% filter(!Sample.ID %in% c("StrainsA_1", "StrainsA_2", "StrainsB_1","StrainsB_2")) - -## ----tab_mark----------------------------------------------------------------- -data(stuart_tab) -summary(stuart_tab) - -## ----mark_match--------------------------------------------------------------- -tab2 <- mark_match(stuart_tab,ref=strains) - - -tab2 %>% filter(exclude_match==1) - -## ----mark_poly ex------------------------------------------------------------- -tab2 <- mark_poly(tab2) -head(tab2) - -## ----mark_prop ex------------------------------------------------------------- -tab2 <- mark_prop(tab2,cross="F2",homo=0.1,hetero=0.1) -head(tab2) - -## ----mark_allele-------------------------------------------------------------- -tab2 <- mark_allele(tab=tab2,ref=strains,par1="parent1",par2="parent2") -tab2 %>% arrange(desc(exclude_allele)) %>% head() - -## ----mark_allele-strains------------------------------------------------------ -strains %>% filter(marker %in% c("gJAX00038569","gJAX00425031","gUNC12245354","gUNC15530876","gUNC21555204","gUNC21596600")) %>% arrange(marker) %>% select(marker,parent1,parent2) - -## ----write_qtl---------------------------------------------------------------- -rqtl_file <- (write_rqtl(geno=genos,pheno=phenos,tab=tab2,ref=strains,par1="parent1",par2="parent2",prefix="ind_")) - -rqtl_file[1:10,1:7] - diff --git a/stuart.Rcheck/00_pkg_src/stuart/inst/doc/stuaRt.Rmd b/stuart.Rcheck/00_pkg_src/stuart/inst/doc/stuaRt.Rmd deleted file mode 100755 index 4e294ab0cc3558b6b84acf6a660e790a6c6522f4..0000000000000000000000000000000000000000 --- a/stuart.Rcheck/00_pkg_src/stuart/inst/doc/stuaRt.Rmd +++ /dev/null @@ -1,134 +0,0 @@ ---- -title: "stuaRt" -output: rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{stuaRt} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - - -```{r, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>" -) -``` - -Marie Bourdon - -April 2021 - -## Goal - -stuaRt is a R package which formats the genotyping data from MUGA arrays (Neogen) to use it in Rqtl, for backcross or F2 crosses. It allows to filter the markers in MUGA arrays that can or cannot be used for Rqtl analysis, from a genetic point of view. Indeed, markers will be selected depending on their proportion of each genotype, correspondance between F2 or N2 individuals alleles and parental strains alleles, etc. - -The examples shown here require the use of dplyr package. - - -```{r setup} -library(dplyr) -library(stuart) -``` - - -## Annotation files - -The developer of Rqtl and Rqtl2 packages, Karl Broman, realised that the annotation of the MUGA arrays was not correct for some markers. Thus, he produced new annotation files for MUGA, miniMUGA, megaMUGA and gigaMUGA arrays. These files contain some informations about the markers including the chromosome and position where the probe of the marker matchs on the genome, wether the marker maps uniquely or not, etc. These files also contains the genetic position of the markers calculated with two methods : "cM_cox" and "cM_g2f1" (see https://kbroman.org/MUGAarrays/mini_revisited.html for more informations). - -We recommand to use these annotation files to reconstruct the file use for Rqtl analysis. You can load the datasets with these annotations from GitHub (https://github.com/kbroman/MUGAarrays/tree/master/UWisc). Choose the file corresponding to the MUGA array that you used and use the URL to load the dataset in R. - -Here, we will present an example of the use of stuaRt with results of a F2 cross genotyped with miniMUGA. We load the result of Neogen genotyping: `genos` and thephenotype dataset produced by the lab: `phenos`. All these datasets are available for example in stuaRt package. - -```{r annot} -annot_mini <- read.csv(url("https://raw.githubusercontent.com/kbroman/MUGAarrays/master/UWisc/mini_uwisc_v2.csv")) -``` - - - -```{r load} -data(genos) -summary(genos) -data(phenos) -summary(phenos) -``` - -### Genotyping of parental strains - -To use genotyping result for Rqtl analysis, we need to recode the genotypes of the individuals (originally encoded in A, T, G, C) depending on the genotype of the parental strains: homozygous for the first parental strain (0), heterozygous (1) or homozygous for the second parental strain (2). - -We recommend to always genotype the parental strains of the cross. Here, their genotypes are in the `genos` file and correspond to the Sample.ID "StrainsA_1", "StrainsA_2", "StrainsB_1" and "StrainsB_2". Two individuals were genotyped for each parental strain. The first step will be to create a consensus genotype for each strain from the two genotyped individuals. The consensus genotype will be added to the annotation dataset in order to obtain a dataset with both annotation and reference genotype of the parental strains that will be used for recoding the genotypes or the F2 individuals. - -This is done with the `geno_strains` function. - -```{r strains} -strains <- geno_strains(ref=annot_mini,geno=genos,par1=c("StrainsA_1","StrainsA_2"),par2=c("StrainsB_1","StrainsB_2"),name1="parent1",name2="parent2") -head(strains) -``` - -After this step, we need to remove the genotyping result for these individuals from the `genos` dataset. -```{r no_parent} -genos <- genos %>% filter(!Sample.ID %in% c("StrainsA_1", "StrainsA_2", "StrainsB_1","StrainsB_2")) -``` - - -## Markers sorting - -### Marker tab - -The first step of the markers sorting is to create the marker dataframe with the tab_mark() function. This dataframe contains for each marker the two alleles that can be found in the F2/N2 population (`Allele_1` and `Allele_2`), the number of individuals for each genotype (homozygous for each allele (`n_HM1` and `n_HM2`) and heterozygous (`n_HT`)), and the number of non genotyped individuals (`n_NA`) This step can take several minutes. You can also load the output of this function. - - -```{r tab_mark} -data(stuart_tab) -summary(stuart_tab) -``` - -Then we will use the different mark_* functions in order to filter the markers. First, we can use mark_match() function. This function excludes markers that are in your genotype file but not in the reference genotype dataset. We recomend using this function as the chip used for genotyping may change. - -```{r mark_match} -tab2 <- mark_match(stuart_tab,ref=strains) - - -tab2 %>% filter(exclude_match==1) -``` - -Here the reference strains were genotyped with the same version of the chip as the F2 individuals so no marker was excluded. - -Then, we can use the mark_poly() function, which will exclude the markers that are not polymorphic. - -```{r mark_poly ex} -tab2 <- mark_poly(tab2) -head(tab2) -``` - -The mark_prop() function can be used to filter markers depending on the proportion of each genotype. Here, we have a F2 so we can use the "homo" argument in order to filter depending on the proportion of both homozygous genotype. If we have a N2, we can filter with the proportion of homozygous individuals with the "homo" argument and of heterozygous individuals with the hetero" argument. Moreover, this function allows to filter marker depending on the proportion on non genotyped animals. By defaults, markers for which more than 50% of individuals were not genotyped. - -```{r mark_prop ex} -tab2 <- mark_prop(tab2,cross="F2",homo=0.1,hetero=0.1) -head(tab2) -``` - -Last, we can use the mark_allele(). This very helpful function excludes markers for which the alleles found in the F2/N2 individuals do not correspond to the alleles found in the parental strains. For example, if for a marker is not polymorphic in the parental strains but we found two alleles in the F2/N2 individuals, it will be excluded. - -```{r mark_allele} -tab2 <- mark_allele(tab=tab2,ref=strains,par1="parent1",par2="parent2") -tab2 %>% arrange(desc(exclude_allele)) %>% head() -``` - -Indeed, we can see that the markers excluded with mark_allele() have different alleles in the parental strains. - -```{r mark_allele-strains} -strains %>% filter(marker %in% c("gJAX00038569","gJAX00425031","gUNC12245354","gUNC15530876","gUNC21555204","gUNC21596600")) %>% arrange(marker) %>% select(marker,parent1,parent2) -``` - -# Creation of the R/qtl file - -After excluding the problematic markers, we can create the R/qtl file. The individuals must have the same ID in the geno and in the pheno file. If there is a prefix in the geno file that must be removed in order to acheive this, you can use the "prefix" argument. The "path" argument can be used in order to create a CSV file that you can laod with qtl::read.cross. - -```{r write_qtl} -rqtl_file <- (write_rqtl(geno=genos,pheno=phenos,tab=tab2,ref=strains,par1="parent1",par2="parent2",prefix="ind_")) - -rqtl_file[1:10,1:7] -``` - diff --git a/stuart.Rcheck/00_pkg_src/stuart/inst/doc/stuaRt.html b/stuart.Rcheck/00_pkg_src/stuart/inst/doc/stuaRt.html deleted file mode 100644 index 4c9410ed11fa0ee4af7da835ea7e5fc2d57ce791..0000000000000000000000000000000000000000 --- a/stuart.Rcheck/00_pkg_src/stuart/inst/doc/stuaRt.html +++ /dev/null @@ -1,558 +0,0 @@ -<!DOCTYPE html> - -<html> - -<head> - -<meta charset="utf-8" /> -<meta name="generator" content="pandoc" /> -<meta http-equiv="X-UA-Compatible" content="IE=EDGE" /> - -<meta name="viewport" content="width=device-width, initial-scale=1" /> - - - -<title>stuaRt</title> - -<script>// Pandoc 2.9 adds attributes on both header and div. We remove the former (to -// be compatible with the behavior of Pandoc < 2.8). -document.addEventListener('DOMContentLoaded', function(e) { - var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); - var i, h, a; - for (i = 0; i < hs.length; i++) { - h = hs[i]; - if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 - a = h.attributes; - while (a.length > 0) h.removeAttribute(a[0].name); - } -}); -</script> -<script>// Hide empty <a> tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/issues/6352#issuecomment-626106786) --> -// v0.0.1 -// Written by JooYoung Seo (jooyoung@psu.edu) and Atsushi Yasumoto on June 1st, 2020. - -document.addEventListener('DOMContentLoaded', function() { - const codeList = document.getElementsByClassName("sourceCode"); - for (var i = 0; i < codeList.length; i++) { - var linkList = codeList[i].getElementsByTagName('a'); - for (var j = 0; j < linkList.length; j++) { - if (linkList[j].innerHTML === "") { - linkList[j].setAttribute('aria-hidden', 'true'); - } - } - } -}); -</script> - -<style type="text/css"> - code{white-space: pre-wrap;} - span.smallcaps{font-variant: small-caps;} - span.underline{text-decoration: underline;} - div.column{display: inline-block; vertical-align: top; width: 50%;} - div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;} - ul.task-list{list-style: none;} - </style> - - -<style type="text/css"> - code { - white-space: pre; - } - .sourceCode { - overflow: visible; - } -</style> -<style type="text/css" data-origin="pandoc"> -pre > code.sourceCode { white-space: pre; position: relative; } -pre > code.sourceCode > span { display: inline-block; line-height: 1.25; } -pre > code.sourceCode > span:empty { height: 1.2em; } -code.sourceCode > span { color: inherit; text-decoration: inherit; } -div.sourceCode { margin: 1em 0; } -pre.sourceCode { margin: 0; } -@media screen { -div.sourceCode { overflow: auto; } -} -@media print { -pre > code.sourceCode { white-space: pre-wrap; } -pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; } -} -pre.numberSource code - { counter-reset: source-line 0; } -pre.numberSource code > span - { position: relative; left: -4em; counter-increment: source-line; } -pre.numberSource code > span > a:first-child::before - { content: counter(source-line); - position: relative; left: -1em; text-align: right; vertical-align: baseline; - border: none; display: inline-block; - -webkit-touch-callout: none; -webkit-user-select: none; - -khtml-user-select: none; -moz-user-select: none; - -ms-user-select: none; user-select: none; - padding: 0 4px; width: 4em; - color: #aaaaaa; - } -pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa; padding-left: 4px; } -div.sourceCode - { } -@media screen { -pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; } -} -code span.al { color: #ff0000; font-weight: bold; } /* Alert */ -code span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */ -code span.at { color: #7d9029; } /* Attribute */ -code span.bn { color: #40a070; } /* BaseN */ -code span.bu { } /* BuiltIn */ -code span.cf { color: #007020; font-weight: bold; } /* ControlFlow */ -code span.ch { color: #4070a0; } /* Char */ -code span.cn { color: #880000; } /* Constant */ -code span.co { color: #60a0b0; font-style: italic; } /* Comment */ -code span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */ -code span.do { color: #ba2121; font-style: italic; } /* Documentation */ -code span.dt { color: #902000; } /* DataType */ -code span.dv { color: #40a070; } /* DecVal */ -code span.er { color: #ff0000; font-weight: bold; } /* Error */ -code span.ex { } /* Extension */ -code span.fl { color: #40a070; } /* Float */ -code span.fu { color: #06287e; } /* Function */ -code span.im { } /* Import */ -code span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */ -code span.kw { color: #007020; font-weight: bold; } /* Keyword */ -code span.op { color: #666666; } /* Operator */ -code span.ot { color: #007020; } /* Other */ -code span.pp { color: #bc7a00; } /* Preprocessor */ -code span.sc { color: #4070a0; } /* SpecialChar */ -code span.ss { color: #bb6688; } /* SpecialString */ -code span.st { color: #4070a0; } /* String */ -code span.va { color: #19177c; } /* Variable */ -code span.vs { color: #4070a0; } /* VerbatimString */ -code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */ - -</style> -<script> -// apply pandoc div.sourceCode style to pre.sourceCode instead -(function() { - var sheets = document.styleSheets; - for (var i = 0; i < sheets.length; i++) { - if (sheets[i].ownerNode.dataset["origin"] !== "pandoc") continue; - try { var rules = sheets[i].cssRules; } catch (e) { continue; } - for (var j = 0; j < rules.length; j++) { - var rule = rules[j]; - // check if there is a div.sourceCode rule - if (rule.type !== rule.STYLE_RULE || rule.selectorText !== "div.sourceCode") continue; - var style = rule.style.cssText; - // check if color or background-color is set - if (rule.style.color === '' && rule.style.backgroundColor === '') continue; - // replace div.sourceCode by a pre.sourceCode rule - sheets[i].deleteRule(j); - sheets[i].insertRule('pre.sourceCode{' + style + '}', j); - } - } -})(); -</script> - - - - -<style type="text/css">body { -background-color: #fff; -margin: 1em auto; -max-width: 700px; -overflow: visible; -padding-left: 2em; -padding-right: 2em; -font-family: "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif; -font-size: 14px; -line-height: 1.35; -} -#TOC { -clear: both; -margin: 0 0 10px 10px; -padding: 4px; -width: 400px; -border: 1px solid #CCCCCC; -border-radius: 5px; -background-color: #f6f6f6; -font-size: 13px; -line-height: 1.3; -} -#TOC .toctitle { -font-weight: bold; -font-size: 15px; -margin-left: 5px; -} -#TOC ul { -padding-left: 40px; -margin-left: -1.5em; -margin-top: 5px; -margin-bottom: 5px; -} -#TOC ul ul { -margin-left: -2em; -} -#TOC li { -line-height: 16px; -} -table { -margin: 1em auto; -border-width: 1px; -border-color: #DDDDDD; -border-style: outset; -border-collapse: collapse; -} -table th { -border-width: 2px; -padding: 5px; -border-style: inset; -} -table td { -border-width: 1px; -border-style: inset; -line-height: 18px; -padding: 5px 5px; -} -table, table th, table td { -border-left-style: none; -border-right-style: none; -} -table thead, table tr.even { -background-color: #f7f7f7; -} -p { -margin: 0.5em 0; -} -blockquote { -background-color: #f6f6f6; -padding: 0.25em 0.75em; -} -hr { -border-style: solid; -border: none; -border-top: 1px solid #777; -margin: 28px 0; -} -dl { -margin-left: 0; -} -dl dd { -margin-bottom: 13px; -margin-left: 13px; -} -dl dt { -font-weight: bold; -} -ul { -margin-top: 0; -} -ul li { -list-style: circle outside; -} -ul ul { -margin-bottom: 0; -} -pre, code { -background-color: #f7f7f7; -border-radius: 3px; -color: #333; -white-space: pre-wrap; -} -pre { -border-radius: 3px; -margin: 5px 0px 10px 0px; -padding: 10px; -} -pre:not([class]) { -background-color: #f7f7f7; -} -code { -font-family: Consolas, Monaco, 'Courier New', monospace; -font-size: 85%; -} -p > code, li > code { -padding: 2px 0px; -} -div.figure { -text-align: center; -} -img { -background-color: #FFFFFF; -padding: 2px; -border: 1px solid #DDDDDD; -border-radius: 3px; -border: 1px solid #CCCCCC; -margin: 0 5px; -} -h1 { -margin-top: 0; -font-size: 35px; -line-height: 40px; -} -h2 { -border-bottom: 4px solid #f7f7f7; -padding-top: 10px; -padding-bottom: 2px; -font-size: 145%; -} -h3 { -border-bottom: 2px solid #f7f7f7; -padding-top: 10px; -font-size: 120%; -} -h4 { -border-bottom: 1px solid #f7f7f7; -margin-left: 8px; -font-size: 105%; -} -h5, h6 { -border-bottom: 1px solid #ccc; -font-size: 105%; -} -a { -color: #0033dd; -text-decoration: none; -} -a:hover { -color: #6666ff; } -a:visited { -color: #800080; } -a:visited:hover { -color: #BB00BB; } -a[href^="http:"] { -text-decoration: underline; } -a[href^="https:"] { -text-decoration: underline; } - -code > span.kw { color: #555; font-weight: bold; } -code > span.dt { color: #902000; } -code > span.dv { color: #40a070; } -code > span.bn { color: #d14; } -code > span.fl { color: #d14; } -code > span.ch { color: #d14; } -code > span.st { color: #d14; } -code > span.co { color: #888888; font-style: italic; } -code > span.ot { color: #007020; } -code > span.al { color: #ff0000; font-weight: bold; } -code > span.fu { color: #900; font-weight: bold; } -code > span.er { color: #a61717; background-color: #e3d2d2; } -</style> - - - - -</head> - -<body> - - - - -<h1 class="title toc-ignore">stuaRt</h1> - - - -<p>Marie Bourdon</p> -<p>April 2021</p> -<div id="goal" class="section level2"> -<h2>Goal</h2> -<p>stuaRt is a R package which formats the genotyping data from MUGA arrays (Neogen) to use it in Rqtl, for backcross or F2 crosses. It allows to filter the markers in MUGA arrays that can or cannot be used for Rqtl analysis, from a genetic point of view. Indeed, markers will be selected depending on their proportion of each genotype, correspondance between F2 or N2 individuals alleles and parental strains alleles, etc.</p> -<p>The examples shown here require the use of dplyr package.</p> -<div class="sourceCode" id="cb1"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb1-1"><a href="#cb1-1"></a><span class="kw">library</span>(dplyr)</span> -<span id="cb1-2"><a href="#cb1-2"></a><span class="co">#> </span></span> -<span id="cb1-3"><a href="#cb1-3"></a><span class="co">#> Attaching package: 'dplyr'</span></span> -<span id="cb1-4"><a href="#cb1-4"></a><span class="co">#> The following objects are masked from 'package:stats':</span></span> -<span id="cb1-5"><a href="#cb1-5"></a><span class="co">#> </span></span> -<span id="cb1-6"><a href="#cb1-6"></a><span class="co">#> filter, lag</span></span> -<span id="cb1-7"><a href="#cb1-7"></a><span class="co">#> The following objects are masked from 'package:base':</span></span> -<span id="cb1-8"><a href="#cb1-8"></a><span class="co">#> </span></span> -<span id="cb1-9"><a href="#cb1-9"></a><span class="co">#> intersect, setdiff, setequal, union</span></span> -<span id="cb1-10"><a href="#cb1-10"></a><span class="kw">library</span>(stuart)</span></code></pre></div> -</div> -<div id="annotation-files" class="section level2"> -<h2>Annotation files</h2> -<p>The developer of Rqtl and Rqtl2 packages, Karl Broman, realised that the annotation of the MUGA arrays was not correct for some markers. Thus, he produced new annotation files for MUGA, miniMUGA, megaMUGA and gigaMUGA arrays. These files contain some informations about the markers including the chromosome and position where the probe of the marker matchs on the genome, wether the marker maps uniquely or not, etc. These files also contains the genetic position of the markers calculated with two methods : “cM_cox†and “cM_g2f1†(see <a href="https://kbroman.org/MUGAarrays/mini_revisited.html" class="uri">https://kbroman.org/MUGAarrays/mini_revisited.html</a> for more informations).</p> -<p>We recommand to use these annotation files to reconstruct the file use for Rqtl analysis. You can load the datasets with these annotations from GitHub (<a href="https://github.com/kbroman/MUGAarrays/tree/master/UWisc" class="uri">https://github.com/kbroman/MUGAarrays/tree/master/UWisc</a>). Choose the file corresponding to the MUGA array that you used and use the URL to load the dataset in R.</p> -<p>Here, we will present an example of the use of stuaRt with results of a F2 cross genotyped with miniMUGA. We load the result of Neogen genotyping: <code>genos</code> and thephenotype dataset produced by the lab: <code>phenos</code>. All these datasets are available for example in stuaRt package.</p> -<div class="sourceCode" id="cb2"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb2-1"><a href="#cb2-1"></a>annot_mini <-<span class="st"> </span><span class="kw">read.csv</span>(<span class="kw">url</span>(<span class="st">"https://raw.githubusercontent.com/kbroman/MUGAarrays/master/UWisc/mini_uwisc_v2.csv"</span>))</span></code></pre></div> -<div class="sourceCode" id="cb3"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb3-1"><a href="#cb3-1"></a><span class="kw">data</span>(genos)</span> -<span id="cb3-2"><a href="#cb3-2"></a><span class="kw">summary</span>(genos)</span> -<span id="cb3-3"><a href="#cb3-3"></a><span class="co">#> SNP.Name Sample.ID Allele1...Forward Allele2...Forward </span></span> -<span id="cb3-4"><a href="#cb3-4"></a><span class="co">#> Length:2002493 Length:2002493 Length:2002493 Length:2002493 </span></span> -<span id="cb3-5"><a href="#cb3-5"></a><span class="co">#> Class :character Class :character Class :character Class :character </span></span> -<span id="cb3-6"><a href="#cb3-6"></a><span class="co">#> Mode :character Mode :character Mode :character Mode :character </span></span> -<span id="cb3-7"><a href="#cb3-7"></a><span class="co">#> </span></span> -<span id="cb3-8"><a href="#cb3-8"></a><span class="co">#> </span></span> -<span id="cb3-9"><a href="#cb3-9"></a><span class="co">#> </span></span> -<span id="cb3-10"><a href="#cb3-10"></a><span class="co">#> </span></span> -<span id="cb3-11"><a href="#cb3-11"></a><span class="co">#> X Y GC.Score Theta </span></span> -<span id="cb3-12"><a href="#cb3-12"></a><span class="co">#> Min. : 0.0000 Min. : 0.0000 Min. :0.0000 Min. :0.0000 </span></span> -<span id="cb3-13"><a href="#cb3-13"></a><span class="co">#> 1st Qu.: 0.0260 1st Qu.: 0.0480 1st Qu.:0.6747 1st Qu.:0.0420 </span></span> -<span id="cb3-14"><a href="#cb3-14"></a><span class="co">#> Median : 0.1750 Median : 0.5010 Median :0.8550 Median :0.6620 </span></span> -<span id="cb3-15"><a href="#cb3-15"></a><span class="co">#> Mean : 0.5716 Mean : 0.6564 Mean :0.7276 Mean :0.5606 </span></span> -<span id="cb3-16"><a href="#cb3-16"></a><span class="co">#> 3rd Qu.: 0.8560 3rd Qu.: 1.0470 3rd Qu.:0.9245 3rd Qu.:0.9800 </span></span> -<span id="cb3-17"><a href="#cb3-17"></a><span class="co">#> Max. :25.1610 Max. :20.7940 Max. :0.9889 Max. :1.0000 </span></span> -<span id="cb3-18"><a href="#cb3-18"></a><span class="co">#> NA's :87 NA's :87 NA's :326 NA's :413 </span></span> -<span id="cb3-19"><a href="#cb3-19"></a><span class="co">#> X.Raw Y.Raw R </span></span> -<span id="cb3-20"><a href="#cb3-20"></a><span class="co">#> Min. : 0 Min. : 0 Min. : 0.000 </span></span> -<span id="cb3-21"><a href="#cb3-21"></a><span class="co">#> 1st Qu.: 546 1st Qu.: 678 1st Qu.: 0.605 </span></span> -<span id="cb3-22"><a href="#cb3-22"></a><span class="co">#> Median : 1611 Median : 3908 Median : 1.027 </span></span> -<span id="cb3-23"><a href="#cb3-23"></a><span class="co">#> Mean : 5265 Mean : 5578 Mean : 1.228 </span></span> -<span id="cb3-24"><a href="#cb3-24"></a><span class="co">#> 3rd Qu.: 7924 3rd Qu.: 9232 3rd Qu.: 1.553 </span></span> -<span id="cb3-25"><a href="#cb3-25"></a><span class="co">#> Max. :49906 Max. :43935 Max. :26.041 </span></span> -<span id="cb3-26"><a href="#cb3-26"></a><span class="co">#> NA's :413</span></span> -<span id="cb3-27"><a href="#cb3-27"></a><span class="kw">data</span>(phenos)</span> -<span id="cb3-28"><a href="#cb3-28"></a><span class="kw">summary</span>(phenos)</span> -<span id="cb3-29"><a href="#cb3-29"></a><span class="co">#> Ind Sex Age Pheno </span></span> -<span id="cb3-30"><a href="#cb3-30"></a><span class="co">#> 201 : 1 Length:176 Min. :5.000 Min. : 8.609 </span></span> -<span id="cb3-31"><a href="#cb3-31"></a><span class="co">#> 202 : 1 Class :character 1st Qu.:7.000 1st Qu.:10.420 </span></span> -<span id="cb3-32"><a href="#cb3-32"></a><span class="co">#> 203 : 1 Mode :character Median :7.000 Median :10.628 </span></span> -<span id="cb3-33"><a href="#cb3-33"></a><span class="co">#> 204 : 1 Mean :7.176 Mean :10.526 </span></span> -<span id="cb3-34"><a href="#cb3-34"></a><span class="co">#> 205 : 1 3rd Qu.:8.000 3rd Qu.:10.793 </span></span> -<span id="cb3-35"><a href="#cb3-35"></a><span class="co">#> 206 : 1 Max. :9.000 Max. :11.147 </span></span> -<span id="cb3-36"><a href="#cb3-36"></a><span class="co">#> (Other):170</span></span></code></pre></div> -<div id="genotyping-of-parental-strains" class="section level3"> -<h3>Genotyping of parental strains</h3> -<p>To use genotyping result for Rqtl analysis, we need to recode the genotypes of the individuals (originally encoded in A, T, G, C) depending on the genotype of the parental strains: homozygous for the first parental strain (0), heterozygous (1) or homozygous for the second parental strain (2).</p> -<p>We recommend to always genotype the parental strains of the cross. Here, their genotypes are in the <code>genos</code> file and correspond to the Sample.ID “StrainsA_1â€, “StrainsA_2â€, “StrainsB_1†and “StrainsB_2â€. Two individuals were genotyped for each parental strain. The first step will be to create a consensus genotype for each strain from the two genotyped individuals. The consensus genotype will be added to the annotation dataset in order to obtain a dataset with both annotation and reference genotype of the parental strains that will be used for recoding the genotypes or the F2 individuals.</p> -<p>This is done with the <code>geno_strains</code> function.</p> -<div class="sourceCode" id="cb4"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb4-1"><a href="#cb4-1"></a>strains <-<span class="st"> </span><span class="kw">geno_strains</span>(<span class="dt">ref=</span>annot_mini,<span class="dt">geno=</span>genos,<span class="dt">par1=</span><span class="kw">c</span>(<span class="st">"StrainsA_1"</span>,<span class="st">"StrainsA_2"</span>),<span class="dt">par2=</span><span class="kw">c</span>(<span class="st">"StrainsB_1"</span>,<span class="st">"StrainsB_2"</span>),<span class="dt">name1=</span><span class="st">"parent1"</span>,<span class="dt">name2=</span><span class="st">"parent2"</span>)</span> -<span id="cb4-2"><a href="#cb4-2"></a><span class="kw">head</span>(strains)</span> -<span id="cb4-3"><a href="#cb4-3"></a><span class="co">#> marker chr bp_mm10 cM_cox cM_g2f1 strand snp unique unmapped</span></span> -<span id="cb4-4"><a href="#cb4-4"></a><span class="co">#> 1 S3J010123784 1 3094603 1.542 0.046 plus CA TRUE FALSE</span></span> -<span id="cb4-5"><a href="#cb4-5"></a><span class="co">#> 2 SAH010136363 1 3409090 1.608 0.175 minus CA TRUE FALSE</span></span> -<span id="cb4-6"><a href="#cb4-6"></a><span class="co">#> 3 S2H010137098 1 3427467 1.609 0.177 minus AG TRUE FALSE</span></span> -<span id="cb4-7"><a href="#cb4-7"></a><span class="co">#> 4 UNCHS000006 1 3439034 1.610 0.178 plus CT TRUE FALSE</span></span> -<span id="cb4-8"><a href="#cb4-8"></a><span class="co">#> 5 mUNC010515443 1 3668628 1.627 0.202 minus GA TRUE FALSE</span></span> -<span id="cb4-9"><a href="#cb4-9"></a><span class="co">#> 6 S2J010178507 1 4462692 1.733 0.234 minus GA TRUE FALSE</span></span> -<span id="cb4-10"><a href="#cb4-10"></a><span class="co">#> probe chr_unc bp_unc parent1</span></span> -<span id="cb4-11"><a href="#cb4-11"></a><span class="co">#> 1 ATAAATTCTACAGCCACAGAAGTCACATTTTAGCACTGCTGTGTTTCCAG 1 3094603 C</span></span> -<span id="cb4-12"><a href="#cb4-12"></a><span class="co">#> 2 CCACCTTTGTACTCTGTATGCTACACAGAAGCTATGAGTATTCTTTTCCC 1 3409090 C</span></span> -<span id="cb4-13"><a href="#cb4-13"></a><span class="co">#> 3 ATTGAAAATGATCTAAGGGAGTCATGAGTACAAGGAGAAATGGGCATATT 1 3427467 A</span></span> -<span id="cb4-14"><a href="#cb4-14"></a><span class="co">#> 4 AATTTCTACCAGATCTCTTTGTCCTCCTAGAAGCATTGTGATACTCAGGA 1 3439034 C</span></span> -<span id="cb4-15"><a href="#cb4-15"></a><span class="co">#> 5 CAGGAAATGATGCTGAGAAAGTGAGAAGTAGGAAAACGTGGAGAAAAATA 1 3668628 G</span></span> -<span id="cb4-16"><a href="#cb4-16"></a><span class="co">#> 6 GACCTATGGTTAAAAGTCAGGCATTTCTTGTGTCTTCTTGTATTATTGGT 1 4462692 G</span></span> -<span id="cb4-17"><a href="#cb4-17"></a><span class="co">#> parent2</span></span> -<span id="cb4-18"><a href="#cb4-18"></a><span class="co">#> 1 C</span></span> -<span id="cb4-19"><a href="#cb4-19"></a><span class="co">#> 2 C</span></span> -<span id="cb4-20"><a href="#cb4-20"></a><span class="co">#> 3 A</span></span> -<span id="cb4-21"><a href="#cb4-21"></a><span class="co">#> 4 C</span></span> -<span id="cb4-22"><a href="#cb4-22"></a><span class="co">#> 5 G</span></span> -<span id="cb4-23"><a href="#cb4-23"></a><span class="co">#> 6 G</span></span></code></pre></div> -<p>After this step, we need to remove the genotyping result for these individuals from the <code>genos</code> dataset.</p> -<div class="sourceCode" id="cb5"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb5-1"><a href="#cb5-1"></a>genos <-<span class="st"> </span>genos <span class="op">%>%</span><span class="st"> </span><span class="kw">filter</span>(<span class="op">!</span>Sample.ID <span class="op">%in%</span><span class="st"> </span><span class="kw">c</span>(<span class="st">"StrainsA_1"</span>, <span class="st">"StrainsA_2"</span>, <span class="st">"StrainsB_1"</span>,<span class="st">"StrainsB_2"</span>))</span></code></pre></div> -</div> -</div> -<div id="markers-sorting" class="section level2"> -<h2>Markers sorting</h2> -<div id="marker-tab" class="section level3"> -<h3>Marker tab</h3> -<p>The first step of the markers sorting is to create the marker dataframe with the tab_mark() function. This dataframe contains for each marker the two alleles that can be found in the F2/N2 population (<code>Allele_1</code> and <code>Allele_2</code>), the number of individuals for each genotype (homozygous for each allele (<code>n_HM1</code> and <code>n_HM2</code>) and heterozygous (<code>n_HT</code>)), and the number of non genotyped individuals (<code>n_NA</code>) This step can take several minutes. You can also load the output of this function.</p> -<div class="sourceCode" id="cb6"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb6-1"><a href="#cb6-1"></a><span class="kw">data</span>(stuart_tab)</span> -<span id="cb6-2"><a href="#cb6-2"></a><span class="kw">summary</span>(stuart_tab)</span> -<span id="cb6-3"><a href="#cb6-3"></a><span class="co">#> SNP.Name Allele_1 Allele_2 n_HM1 </span></span> -<span id="cb6-4"><a href="#cb6-4"></a><span class="co">#> Length:11125 Length:11125 Length:11125 Min. : 0.0 </span></span> -<span id="cb6-5"><a href="#cb6-5"></a><span class="co">#> Class :character Class :character Class :character 1st Qu.: 44.0 </span></span> -<span id="cb6-6"><a href="#cb6-6"></a><span class="co">#> Mode :character Mode :character Mode :character Median :174.0 </span></span> -<span id="cb6-7"><a href="#cb6-7"></a><span class="co">#> Mean :123.9 </span></span> -<span id="cb6-8"><a href="#cb6-8"></a><span class="co">#> 3rd Qu.:176.0 </span></span> -<span id="cb6-9"><a href="#cb6-9"></a><span class="co">#> Max. :176.0 </span></span> -<span id="cb6-10"><a href="#cb6-10"></a><span class="co">#> n_HM2 n_HT n_NA </span></span> -<span id="cb6-11"><a href="#cb6-11"></a><span class="co">#> Min. : 0.00 Min. : 0.00 Min. : 0.00 </span></span> -<span id="cb6-12"><a href="#cb6-12"></a><span class="co">#> 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.00 </span></span> -<span id="cb6-13"><a href="#cb6-13"></a><span class="co">#> Median : 0.00 Median : 0.00 Median : 1.00 </span></span> -<span id="cb6-14"><a href="#cb6-14"></a><span class="co">#> Mean : 19.92 Mean : 19.24 Mean : 12.91 </span></span> -<span id="cb6-15"><a href="#cb6-15"></a><span class="co">#> 3rd Qu.: 34.00 3rd Qu.: 5.00 3rd Qu.: 5.00 </span></span> -<span id="cb6-16"><a href="#cb6-16"></a><span class="co">#> Max. :175.00 Max. :176.00 Max. :176.00</span></span></code></pre></div> -<p>Then we will use the different mark_* functions in order to filter the markers. First, we can use mark_match() function. This function excludes markers that are in your genotype file but not in the reference genotype dataset. We recomend using this function as the chip used for genotyping may change.</p> -<div class="sourceCode" id="cb7"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb7-1"><a href="#cb7-1"></a>tab2 <-<span class="st"> </span><span class="kw">mark_match</span>(stuart_tab,<span class="dt">ref=</span>strains)</span> -<span id="cb7-2"><a href="#cb7-2"></a></span> -<span id="cb7-3"><a href="#cb7-3"></a></span> -<span id="cb7-4"><a href="#cb7-4"></a>tab2 <span class="op">%>%</span><span class="st"> </span><span class="kw">filter</span>(exclude_match<span class="op">==</span><span class="dv">1</span>)</span> -<span id="cb7-5"><a href="#cb7-5"></a><span class="co">#> # A tibble: 0 x 8</span></span> -<span id="cb7-6"><a href="#cb7-6"></a><span class="co">#> # … with 8 variables: SNP.Name <chr>, Allele_1 <chr>, Allele_2 <chr>,</span></span> -<span id="cb7-7"><a href="#cb7-7"></a><span class="co">#> # n_HM1 <dbl>, n_HM2 <dbl>, n_HT <dbl>, n_NA <dbl>, exclude_match <dbl></span></span></code></pre></div> -<p>Here the reference strains were genotyped with the same version of the chip as the F2 individuals so no marker was excluded.</p> -<p>Then, we can use the mark_poly() function, which will exclude the markers that are not polymorphic.</p> -<div class="sourceCode" id="cb8"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb8-1"><a href="#cb8-1"></a>tab2 <-<span class="st"> </span><span class="kw">mark_poly</span>(tab2)</span> -<span id="cb8-2"><a href="#cb8-2"></a><span class="kw">head</span>(tab2)</span> -<span id="cb8-3"><a href="#cb8-3"></a><span class="co">#> # A tibble: 6 x 9</span></span> -<span id="cb8-4"><a href="#cb8-4"></a><span class="co">#> SNP.Name Allele_1 Allele_2 n_HM1 n_HM2 n_HT n_NA exclude_match exclude_poly</span></span> -<span id="cb8-5"><a href="#cb8-5"></a><span class="co">#> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl></span></span> -<span id="cb8-6"><a href="#cb8-6"></a><span class="co">#> 1 B1001000… C <NA> 175 0 0 1 0 1</span></span> -<span id="cb8-7"><a href="#cb8-7"></a><span class="co">#> 2 B1001001… <NA> <NA> 0 0 0 176 0 1</span></span> -<span id="cb8-8"><a href="#cb8-8"></a><span class="co">#> 3 B1001001… A <NA> 175 0 0 1 0 1</span></span> -<span id="cb8-9"><a href="#cb8-9"></a><span class="co">#> 4 B1001002… A G 171 0 2 3 0 0</span></span> -<span id="cb8-10"><a href="#cb8-10"></a><span class="co">#> 5 B1001002… C <NA> 176 0 0 0 0 1</span></span> -<span id="cb8-11"><a href="#cb8-11"></a><span class="co">#> 6 B1001003… A <NA> 176 0 0 0 0 1</span></span></code></pre></div> -<p>The mark_prop() function can be used to filter markers depending on the proportion of each genotype. Here, we have a F2 so we can use the “homo†argument in order to filter depending on the proportion of both homozygous genotype. If we have a N2, we can filter with the proportion of homozygous individuals with the “homo†argument and of heterozygous individuals with the hetero" argument. Moreover, this function allows to filter marker depending on the proportion on non genotyped animals. By defaults, markers for which more than 50% of individuals were not genotyped.</p> -<div class="sourceCode" id="cb9"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb9-1"><a href="#cb9-1"></a>tab2 <-<span class="st"> </span><span class="kw">mark_prop</span>(tab2,<span class="dt">cross=</span><span class="st">"F2"</span>,<span class="dt">homo=</span><span class="fl">0.1</span>,<span class="dt">hetero=</span><span class="fl">0.1</span>)</span> -<span id="cb9-2"><a href="#cb9-2"></a><span class="kw">head</span>(tab2)</span> -<span id="cb9-3"><a href="#cb9-3"></a><span class="co">#> # A tibble: 6 x 10</span></span> -<span id="cb9-4"><a href="#cb9-4"></a><span class="co">#> SNP.Name Allele_1 Allele_2 n_HM1 n_HM2 n_HT n_NA exclude_match exclude_poly</span></span> -<span id="cb9-5"><a href="#cb9-5"></a><span class="co">#> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl></span></span> -<span id="cb9-6"><a href="#cb9-6"></a><span class="co">#> 1 B1001000… C <NA> 175 0 0 1 0 1</span></span> -<span id="cb9-7"><a href="#cb9-7"></a><span class="co">#> 2 B1001001… <NA> <NA> 0 0 0 176 0 1</span></span> -<span id="cb9-8"><a href="#cb9-8"></a><span class="co">#> 3 B1001001… A <NA> 175 0 0 1 0 1</span></span> -<span id="cb9-9"><a href="#cb9-9"></a><span class="co">#> 4 B1001002… A G 171 0 2 3 0 0</span></span> -<span id="cb9-10"><a href="#cb9-10"></a><span class="co">#> 5 B1001002… C <NA> 176 0 0 0 0 1</span></span> -<span id="cb9-11"><a href="#cb9-11"></a><span class="co">#> 6 B1001003… A <NA> 176 0 0 0 0 1</span></span> -<span id="cb9-12"><a href="#cb9-12"></a><span class="co">#> # … with 1 more variable: exclude_prop <dbl></span></span></code></pre></div> -<p>Last, we can use the mark_allele(). This very helpful function excludes markers for which the alleles found in the F2/N2 individuals do not correspond to the alleles found in the parental strains. For example, if for a marker is not polymorphic in the parental strains but we found two alleles in the F2/N2 individuals, it will be excluded.</p> -<div class="sourceCode" id="cb10"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb10-1"><a href="#cb10-1"></a>tab2 <-<span class="st"> </span><span class="kw">mark_allele</span>(<span class="dt">tab=</span>tab2,<span class="dt">ref=</span>strains,<span class="dt">par1=</span><span class="st">"parent1"</span>,<span class="dt">par2=</span><span class="st">"parent2"</span>)</span> -<span id="cb10-2"><a href="#cb10-2"></a>tab2 <span class="op">%>%</span><span class="st"> </span><span class="kw">arrange</span>(<span class="kw">desc</span>(exclude_allele)) <span class="op">%>%</span><span class="st"> </span><span class="kw">head</span>()</span> -<span id="cb10-3"><a href="#cb10-3"></a><span class="co">#> # A tibble: 6 x 11</span></span> -<span id="cb10-4"><a href="#cb10-4"></a><span class="co">#> SNP.Name Allele_1 Allele_2 n_HM1 n_HM2 n_HT n_NA exclude_match exclude_poly</span></span> -<span id="cb10-5"><a href="#cb10-5"></a><span class="co">#> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl></span></span> -<span id="cb10-6"><a href="#cb10-6"></a><span class="co">#> 1 B1002001… T C 0 172 1 3 0 0</span></span> -<span id="cb10-7"><a href="#cb10-7"></a><span class="co">#> 2 B1002009… A G 0 170 2 4 0 0</span></span> -<span id="cb10-8"><a href="#cb10-8"></a><span class="co">#> 3 B1003003… T C 0 162 2 12 0 0</span></span> -<span id="cb10-9"><a href="#cb10-9"></a><span class="co">#> 4 B1004002… T C 0 172 2 2 0 0</span></span> -<span id="cb10-10"><a href="#cb10-10"></a><span class="co">#> 5 B1004002… A C 0 168 1 7 0 0</span></span> -<span id="cb10-11"><a href="#cb10-11"></a><span class="co">#> 6 B1004005… T C 0 95 46 35 0 0</span></span> -<span id="cb10-12"><a href="#cb10-12"></a><span class="co">#> # … with 2 more variables: exclude_prop <dbl>, exclude_allele <dbl></span></span></code></pre></div> -<p>Indeed, we can see that the markers excluded with mark_allele() have different alleles in the parental strains.</p> -<div class="sourceCode" id="cb11"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb11-1"><a href="#cb11-1"></a>strains <span class="op">%>%</span><span class="st"> </span><span class="kw">filter</span>(marker <span class="op">%in%</span><span class="st"> </span><span class="kw">c</span>(<span class="st">"gJAX00038569"</span>,<span class="st">"gJAX00425031"</span>,<span class="st">"gUNC12245354"</span>,<span class="st">"gUNC15530876"</span>,<span class="st">"gUNC21555204"</span>,<span class="st">"gUNC21596600"</span>)) <span class="op">%>%</span><span class="st"> </span><span class="kw">arrange</span>(marker) <span class="op">%>%</span><span class="st"> </span><span class="kw">select</span>(marker,parent1,parent2)</span> -<span id="cb11-2"><a href="#cb11-2"></a><span class="co">#> marker parent1 parent2</span></span> -<span id="cb11-3"><a href="#cb11-3"></a><span class="co">#> 1 gJAX00038569 C C</span></span> -<span id="cb11-4"><a href="#cb11-4"></a><span class="co">#> 2 gJAX00425031 C C</span></span> -<span id="cb11-5"><a href="#cb11-5"></a><span class="co">#> 3 gUNC12245354 N N</span></span> -<span id="cb11-6"><a href="#cb11-6"></a><span class="co">#> 4 gUNC15530876 N N</span></span> -<span id="cb11-7"><a href="#cb11-7"></a><span class="co">#> 5 gUNC21555204 T T</span></span> -<span id="cb11-8"><a href="#cb11-8"></a><span class="co">#> 6 gUNC21596600 A A</span></span></code></pre></div> -</div> -</div> -<div id="creation-of-the-rqtl-file" class="section level1"> -<h1>Creation of the R/qtl file</h1> -<p>After excluding the problematic markers, we can create the R/qtl file. The individuals must have the same ID in the geno and in the pheno file. If there is a prefix in the geno file that must be removed in order to acheive this, you can use the “prefix†argument. The “path†argument can be used in order to create a CSV file that you can laod with qtl::read.cross.</p> -<div class="sourceCode" id="cb12"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb12-1"><a href="#cb12-1"></a>rqtl_file <-<span class="st"> </span>(<span class="kw">write_rqtl</span>(<span class="dt">geno=</span>genos,<span class="dt">pheno=</span>phenos,<span class="dt">tab=</span>tab2,<span class="dt">ref=</span>strains,<span class="dt">par1=</span><span class="st">"parent1"</span>,<span class="dt">par2=</span><span class="st">"parent2"</span>,<span class="dt">prefix=</span><span class="st">"ind_"</span>))</span> -<span id="cb12-2"><a href="#cb12-2"></a></span> -<span id="cb12-3"><a href="#cb12-3"></a>rqtl_file[<span class="dv">1</span><span class="op">:</span><span class="dv">10</span>,<span class="dv">1</span><span class="op">:</span><span class="dv">7</span>]</span> -<span id="cb12-4"><a href="#cb12-4"></a><span class="co">#> # A tibble: 10 x 7</span></span> -<span id="cb12-5"><a href="#cb12-5"></a><span class="co">#> Ind Sex Age Pheno S6J010381992 S6J011498219 S6J011558924</span></span> -<span id="cb12-6"><a href="#cb12-6"></a><span class="co">#> <chr> <chr> <chr> <chr> <chr> <chr> <chr> </span></span> -<span id="cb12-7"><a href="#cb12-7"></a><span class="co">#> 1 "" "" "" "" 1 1 1 </span></span> -<span id="cb12-8"><a href="#cb12-8"></a><span class="co">#> 2 "" "" "" "" 2.188 15.746 17.347 </span></span> -<span id="cb12-9"><a href="#cb12-9"></a><span class="co">#> 3 "201" "M" "7" "10.5296676084955" 1 1 1 </span></span> -<span id="cb12-10"><a href="#cb12-10"></a><span class="co">#> 4 "210" "M" "7" "10.4877403515456" 2 0 0 </span></span> -<span id="cb12-11"><a href="#cb12-11"></a><span class="co">#> 5 "308" "F" "7" "10.9714806769608" 1 0 0 </span></span> -<span id="cb12-12"><a href="#cb12-12"></a><span class="co">#> 6 "309" "M" "7" "10.8463976841841" 0 1 1 </span></span> -<span id="cb12-13"><a href="#cb12-13"></a><span class="co">#> 7 "310" "M" "7" "11.0680508131391" NA 1 1 </span></span> -<span id="cb12-14"><a href="#cb12-14"></a><span class="co">#> 8 "311" "F" "9" "10.5753482053348" 0 2 2 </span></span> -<span id="cb12-15"><a href="#cb12-15"></a><span class="co">#> 9 "312" "M" "7" "10.7137055438312" 0 2 2 </span></span> -<span id="cb12-16"><a href="#cb12-16"></a><span class="co">#> 10 "313" "M" "8" "10.7375743556446" 0 1 1</span></span></code></pre></div> -</div> - - - -<!-- code folding --> - - -<!-- dynamically load mathjax for compatibility with self-contained --> -<script> - (function () { - var script = document.createElement("script"); - script.type = "text/javascript"; - script.src = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"; - document.getElementsByTagName("head")[0].appendChild(script); - })(); -</script> - -</body> -</html> diff --git a/stuart.Rcheck/00_pkg_src/stuart/inst/extdata/genos.txt b/stuart.Rcheck/00_pkg_src/stuart/inst/extdata/genos.txt deleted file mode 100755 index 49a9e33f41b395a716aaf3df972513b9058f5bf4..0000000000000000000000000000000000000000 Binary files a/stuart.Rcheck/00_pkg_src/stuart/inst/extdata/genos.txt and /dev/null differ diff --git a/stuart.Rcheck/00_pkg_src/stuart/inst/extdata/phenos.txt b/stuart.Rcheck/00_pkg_src/stuart/inst/extdata/phenos.txt deleted file mode 100755 index 1d25e9ab2a19f593f1c7809e71eeb16337aa3820..0000000000000000000000000000000000000000 --- a/stuart.Rcheck/00_pkg_src/stuart/inst/extdata/phenos.txt +++ /dev/null @@ -1,177 +0,0 @@ -Ind Sex Exp Age Death Day.death Pheno -201 M 36 7 1 14 10.5296676084955 -210 M 38 7 0 14 10.4877403515456 -308 F 53 7 1 8 10.9714806769608 -309 M 53 7 1 7 10.8463976841841 -310 M 53 7 1 8 11.0680508131391 -311 F 53 9 1 7 10.5753482053348 -312 M 56 7 1 7 10.7137055438312 -313 M 56 8 1 7 10.7375743556446 -315 M 56 7 0 14 10.8719419391157 -318 F 56 6 1 6 10.8396273596649 -319 F 56 6 0 14 10.7668793836238 -320 M 58 8 0 14 10.7325547648656 -211 M 38 7 0 14 10.8695302496376 -321 M 58 6 1 7 10.8283736794678 -322 M 58 6 1 7 10.8915635045684 -323 M 58 6 1 7 10.5356077157252 -324 F 58 6 1 7 10.6280255935391 -325 F 58 8 0 14 10.7480587696075 -326 F 58 6 1 7 10.723232575067 -327 F 58 6 1 7 10.561378054744 -328 M 58 8 1 8 10.678328733254 -329 M 58 7 1 8 10.5553266906605 -330 M 58 7 1 8 9.80206867803321 -212 M 38 7 0 14 10.6357450501074 -331 M 58 7 1 7 10.5982605721032 -332 M 58 7 1 7 10.8473250099944 -333 F 58 6 1 7 10.3764348734637 -334 F 58 6 1 7 10.5460358865664 -336 F 58 5 1 7 10.5222855493513 -337 F 67 8 1 9 10.5327174182859 -338 F 67 6 0 14 10.5473443040275 -339 F 67 8 1 8 9.87194179754362 -340 F 67 6 1 8 10.7090598750034 -341 M 67 6 1 8 10.2209997596409 -213 M 38 7 1 8 11.0270167306899 -342 M 67 8 1 8 10.5431290182179 -343 M 68 8 0 14 10.4981076961986 -344 M 68 8 0 14 10.3735386730113 -345 F 68 8 1 14 10.4320215351478 -346 F 68 8 0 14 8.60947058208406 -347 F 73 7 0 14 10.4168301170408 -348 F 73 7 1 6 11.0628524075666 -349 F 73 7 1 7 10.4330268533001 -350 M 73 7 1 7 10.4800637563932 -351 M 73 6 1 7 10.6324192103035 -214 M 38 7 1 8 10.8218250314825 -352 M 73 6 1 8 10.4725870266356 -353 M 73 6 1 8 10.463431310619 -354 F 73 8 1 9 10.2353468625069 -355 F 73 8 1 8 10.4784947722463 -356 M 73 8 0 14 10.2302554084309 -357 M 73 7 1 7 10.3331446827217 -358 M 75 7 1 8 10.4392178227394 -359 M 75 7 1 9 10.2749993364651 -360 M 75 7 1 9 10.5896956543931 -361 M 75 7 1 9 10.4102656461775 -215 M 38 7 1 7 10.98405240378 -362 M 75 7 0 14 9.25536232298768 -363 M 75 9 1 9 10.3492941307168 -364 M 75 9 1 9 10.3620959742092 -365 M 75 9 1 12 10.4744976511434 -366 M 75 9 1 12 11.030867369515 -369 F 75 9 0 14 10.628477999175 -370 F 75 9 0 14 10.4670786491756 -373 F 75 7 0 14 10.9179711868264 -374 F 75 9 1 8 10.4953620125703 -375 F 75 7 1 7 10.1874531724974 -216 M 38 7 0 14 10.5655205985375 -316 M 78 9 0 14 10.766442313453 -371 M 78 9 1 12 10.4195168879626 -372 M 78 9 1 9 9.95272129347567 -376 M 78 8 0 14 10.4195168879626 -377 F 78 9 0 14 9.95272129347567 -379 F 78 8 0 14 10.0823979142355 -380 F 78 8 1 8 10.9095058001147 -381 M 78 8 1 12 10.8801492251438 -383 F 78 7 1 8 10.9965190456509 -384 F 78 7 0 14 11.0509121188558 -217 M 38 7 1 7 10.4994444268072 -385 F 78 7 1 8 11.0429066559961 -386 F 78 7 0 14 10.8109357224842 -387 M 78 8 0 14 10.6074095323141 -388 M 78 7 0 14 10.7086232219316 -389 M 78 7 1 8 10.8244554978178 -390 M 78 7 1 8 10.5135622767937 -391 M 78 7 1 8 10.6549253863367 -219 F 38 7 1 9 10.9538292220917 -220 M 40 7 1 7 10.7769347175043 -202 M 36 7 1 7 10.6939691207128 -221 M 40 7 0 14 10.6933765093072 -222 F 43 6 1 7 10.6767026645835 -223 F 43 6 0 14 10.5003906942331 -224 F 43 6 0 14 10.5149649858964 -225 F 43 6 0 14 10.8903239014372 -226 M 43 6 0 14 10.505656838062 -227 M 43 6 1 7 10.9661802289593 -228 M 43 6 1 7 10.7323373517824 -229 M 45 8 1 7 10.7312300131548 -230 M 45 8 0 14 10.5748054422932 -203 M 36 7 0 14 10.6753018614348 -231 M 45 8 0 14 10.6367921020912 -232 M 45 8 1 7 11.0739366525724 -233 F 45 8 0 14 10.1491227173623 -234 F 45 8 1 8 10.6667311696339 -235 F 45 8 0 14 8.7941942405191 -236 F 45 8 0 14 10.9093248127322 -237 M 47 7 0 14 10.9324949209267 -238 M 47 7 0 14 10.6947153326779 -239 M 47 7 0 14 10.6120985065664 -240 M 47 7 1 8 10.8327443879201 -204 M 36 7 1 7 10.7791570102755 -241 M 47 7 1 9 10.5125518260128 -242 M 47 7 1 7 10.6894120122994 -243 M 47 7 1 7 11.0691241089673 -245 M 47 7 0 14 10.4819031965733 -246 F 47 7 1 8 9.96762433427887 -247 F 47 7 1 8 10.7245450774586 -248 F 47 7 0 14 10.4912425829631 -249 F 47 7 0 14 10.7474083506145 -250 M 48 8 0 14 10.2720876656636 -251 M 48 8 0 14 9.31399254303701 -205 M 38 7 0 14 10.4428500375673 -252 M 48 8 1 7 10.2862132021395 -253 M 48 7 1 8 10.5048761731899 -254 F 48 8 0 14 10.228672834882 -255 F 48 7 1 7 9.62470062290951 -256 F 48 7 0 14 10.6406621644657 -257 M 48 7 0 14 9.94948396721433 -258 M 48 7 1 7 9.30736764204129 -259 M 48 7 0 14 9.88913119725402 -260 M 48 7 0 14 10.2380596666633 -261 M 48 7 1 7 10.2540669844702 -206 M 38 7 0 14 10.7606379038488 -262 M 48 7 0 14 9.76042991401441 -263 M 48 7 0 14 10.6091919117065 -264 F 50 8 0 14 10.6851731967701 -266 F 50 7 1 8 10.7673408387505 -267 M 50 8 1 7 10.0482542502632 -269 M 50 8 1 8 11.0733835864953 -270 M 50 8 0 14 10.4484942670828 -271 M 50 7 0 14 10.1718549349538 -272 M 50 6 1 7 10.3653629747907 -274 M 50 8 1 7 10.7987817154853 -207 M 38 7 0 14 9.28000922051108 -265 F 51 8 1 8 10.6396558357786 -268 F 51 8 1 8 10.38209131129 -273 F 51 7 0 14 10.668205277928 -277 F 51 7 0 14 10.3487626209106 -278 M 51 7 0 14 8.79404815627125 -279 M 51 6 0 14 10.983879782484 -284 M 52 7 1 7 10.9273290008382 -285 M 52 7 0 14 10.9386568661041 -286 M 52 7 0 14 10.8807485243696 -287 M 52 7 1 8 10.9037194625542 -208 M 38 7 0 14 10.7753050361387 -288 M 52 7 0 14 10.7677108901204 -289 F 52 7 1 7 10.9101761509059 -290 F 52 7 1 7 10.9405884955895 -291 F 52 7 0 14 8.61409649857943 -292 F 52 7 0 14 10.7908109750272 -293 F 52 7 1 7 10.9585494660728 -283 M 52 7 0 14 10.7347416773051 -294 M 52 7 0 14 10.5163444828732 -295 M 52 7 0 14 10.7527016128882 -296 M 52 6 0 14 10.7427219571178 -209 M 38 7 1 7 10.6341153687419 -297 M 52 6 1 8 10.6603294779049 -298 M 52 6 1 7 10.952768506441 -299 F 52 6 1 7 11.1474990509139 -300 F 52 6 0 14 9.35603884473677 -301 F 52 6 1 8 11.1056293961659 -303 M 53 9 0 14 10.1069851602543 -304 F 53 7 1 8 10.7275476145336 -305 F 53 7 0 14 10.9381137134461 -306 F 53 7 0 14 10.5449698000374 -307 F 53 7 0 14 10.6311447764499 diff --git a/stuart.Rcheck/00_pkg_src/stuart/man/geno_strains.Rd b/stuart.Rcheck/00_pkg_src/stuart/man/geno_strains.Rd deleted file mode 100755 index c7be59d453890a7cfa5929321dbbb2b8d99f9418..0000000000000000000000000000000000000000 --- a/stuart.Rcheck/00_pkg_src/stuart/man/geno_strains.Rd +++ /dev/null @@ -1,26 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/geno_strains.R -\name{geno_strains} -\alias{geno_strains} -\title{Create haplotype for a new mouse strain into a reference dataframe} -\usage{ -geno_strains(ref, geno, par1, par2, name1, name2) -} -\arguments{ -\item{ref}{data frame with the reference genotypes of mouse lines} - -\item{geno}{data frame with the genotyping results for your cross from miniMUGA array} - -\item{par1}{first parental strain used in the cross, the name must be written as in the geno data frame} - -\item{par2}{second parental strain used in the cross, the name must be written as in the geno data frame} - -\item{name1}{name of the first parental strain to use as the column name in the ref data frame} - -\item{name2}{name of the second parental strain to use as the column name in the ref data frame} -} -\description{ -This functions adds columns for parental strains used in the cross in the annotation data frame, from the genotype data frame in which one or several animal of the parental strains were genotyped. -If several animals of one strain were genotyped, a consensus is created from these animals. -The consensus is created as follow : if the indivuals carry the same allele, this allele is kept, otherwise, the allele is noted as "N". If individuals show residual heterozygosity, it is encoded as "H". -} diff --git a/stuart.Rcheck/00_pkg_src/stuart/man/genos.Rd b/stuart.Rcheck/00_pkg_src/stuart/man/genos.Rd deleted file mode 100755 index 1aad5938b7e0d7fd09e31b1ca35a07c887f09449..0000000000000000000000000000000000000000 --- a/stuart.Rcheck/00_pkg_src/stuart/man/genos.Rd +++ /dev/null @@ -1,16 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/genos-data.R -\docType{data} -\name{genos} -\alias{genos} -\title{Data frame with miniMUGA genotyping of F2 individuals and parental strains} -\format{ -A data frame with 2002493 observations of 11 variables -} -\usage{ -genos -} -\description{ -A dataset containing the genotypes of 176 F2 individuals -} -\keyword{datasets} diff --git a/stuart.Rcheck/00_pkg_src/stuart/man/mark_allele.Rd b/stuart.Rcheck/00_pkg_src/stuart/man/mark_allele.Rd deleted file mode 100755 index 6bb2a189321bcc7d4990373d4ff75b65d3950546..0000000000000000000000000000000000000000 --- a/stuart.Rcheck/00_pkg_src/stuart/man/mark_allele.Rd +++ /dev/null @@ -1,20 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/mark_allele.R -\name{mark_allele} -\alias{mark_allele} -\title{Exclude markers that have different alleles in the individuals of the cross and in parental strains} -\usage{ -mark_allele(tab, ref, par1, par2) -} -\arguments{ -\item{tab}{data frame obtained with tab_mark function} - -\item{ref}{data frame with the reference genotypes of mouse lines} - -\item{par1}{first parental strain used in the cross, the name must be written as in the "ref" data frame} - -\item{par2}{second parental strain used in the cross, the name must be written as in the "ref" data frame} -} -\description{ -This functions uses the dataframe produced by the tab_mark function and fills the "exclude" column for all the markers which have alleles observed in the individuals of the cross that do not correspond to the alleles observed in the parental strains. For example, a marker which is not polymorphic between the two parental strains but which has two alleles in the cross individuals will be excluded. -} diff --git a/stuart.Rcheck/00_pkg_src/stuart/man/mark_match.Rd b/stuart.Rcheck/00_pkg_src/stuart/man/mark_match.Rd deleted file mode 100755 index 15791a97b75b92476cf5a33d9a3720a849a4d1a0..0000000000000000000000000000000000000000 --- a/stuart.Rcheck/00_pkg_src/stuart/man/mark_match.Rd +++ /dev/null @@ -1,16 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/mark_match.R -\name{mark_match} -\alias{mark_match} -\title{Exclude markers that were not genotyped in the reference strains} -\usage{ -mark_match(tab, ref) -} -\arguments{ -\item{tab}{data frame obtained with tab_mark function} - -\item{ref}{data frame with the reference genotypes of mouse lines} -} -\description{ -This functions uses the dataframe produced by the tab_mark function and fills the "exclude" column for all the markers that were genotyped in the individuals of the cross but not in the reference strains. This is useful if the parental strains of the cross were not genotyped with the individuals and a previous genotyping result is used. Indeed, changes in the markers of the array may have happened. We recommend always using this function in order to avoid errors. -} diff --git a/stuart.Rcheck/00_pkg_src/stuart/man/mark_poly.Rd b/stuart.Rcheck/00_pkg_src/stuart/man/mark_poly.Rd deleted file mode 100755 index d68157afc68e62187a31bf8862ea3fdaf16585eb..0000000000000000000000000000000000000000 --- a/stuart.Rcheck/00_pkg_src/stuart/man/mark_poly.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/mark_poly.R -\name{mark_poly} -\alias{mark_poly} -\title{Exclude markers that are not polymorphic} -\usage{ -mark_poly(tab) -} -\arguments{ -\item{tab}{data frame obtained with tab_mark function} -} -\description{ -This functions uses the dataframe produced by the tab_mark function and fills the "exclude" column for all the markers that are not polymorphic. -} diff --git a/stuart.Rcheck/00_pkg_src/stuart/man/mark_prop.Rd b/stuart.Rcheck/00_pkg_src/stuart/man/mark_prop.Rd deleted file mode 100755 index fa0163af9fb00460925f56115d4688185f175841..0000000000000000000000000000000000000000 --- a/stuart.Rcheck/00_pkg_src/stuart/man/mark_prop.Rd +++ /dev/null @@ -1,22 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/mark_prop.R -\name{mark_prop} -\alias{mark_prop} -\title{Exclude markers depending on proportions of homo/hetorozygous} -\usage{ -mark_prop(tab, cross, homo = NA, hetero = NA, na = 0.5) -} -\arguments{ -\item{tab}{data frame obtained with tab_mark function.} - -\item{cross}{F2 or N2.} - -\item{homo}{proportion of homozygous individuals under which the marker is excluded. Will apply on both homozygous genotypes for a F2, but only on one for N2.} - -\item{hetero}{proportion of heterozygous individuals under which the marker is excluded.} - -\item{na}{proportion of non-genotyped individuals above which the marker is excluded.} -} -\description{ -uses the dataframe produced by the tab_mark function and fills the "exclude" column for all the markers that present odd proportions of each genotype. You can define these proportions thanks to the arguments of the function. -} diff --git a/stuart.Rcheck/00_pkg_src/stuart/man/phenos.Rd b/stuart.Rcheck/00_pkg_src/stuart/man/phenos.Rd deleted file mode 100755 index bd7b403cc69888e78b28149e4a30c0ded16367a2..0000000000000000000000000000000000000000 --- a/stuart.Rcheck/00_pkg_src/stuart/man/phenos.Rd +++ /dev/null @@ -1,16 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/phenos-data.R -\docType{data} -\name{phenos} -\alias{phenos} -\title{Data frame with phenotype of F2 individuals} -\format{ -A data frame with 176 observations of 4 variables -} -\usage{ -phenos -} -\description{ -A dataset containing the phenotype of 176 F2 individuals -} -\keyword{datasets} diff --git a/stuart.Rcheck/00_pkg_src/stuart/man/ref_strains_mini.Rd b/stuart.Rcheck/00_pkg_src/stuart/man/ref_strains_mini.Rd deleted file mode 100755 index e9117d1075cc3823b7511e4b219a5656a5cf348a..0000000000000000000000000000000000000000 --- a/stuart.Rcheck/00_pkg_src/stuart/man/ref_strains_mini.Rd +++ /dev/null @@ -1,37 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/ref_strains_mini-data.R -\docType{data} -\name{ref_strains_mini} -\alias{ref_strains_mini} -\title{Data frame with miniMUGA genotyping of classical lab strains.} -\format{ -A data frame with 11299 rows and 18 variables -\describe{ -\item{CC001}{CC001 mouse strain} -\item{CC005}{CC005 mouse strain} -\item{CC042}{CC042 mouse strain} -\item{CC071}{CC071 mouse strain} -\item{Ifnar.KO.129}{Ifnar KO 129 mouse strain} -\item{Ifnar.KO.B6}{Ifnar KO B6 mouse strain} -\item{Rvfs2.1}{Rvfs2-1 mouse strain} -\item{Rvfs2.2}{Rvfs2-2 mouse strain} -\item{Rvfs2.6}{Rvfs2-6 mouse strain} -\item{Rvfs2.7}{Rvfs2-7 mouse strain} -\item{marker}{name of the marker} -\item{chr}{chromosome} -\item{bp_mm10}{localisation on chromosome in bp (mm10 assembly)} -\item{cM_cox}{localisation on chromosome in cM (from Cox et al.)} -\item{cM_g2f1}{localisation on chromosome in cM (from Liu et al.)} -\item{snp}{marker alleles} -\item{unique}{indicates if the marker maps uniquely on mm10} -\item{multi}{indicates if the marker maps more than one time on mm10} -\item{unmapped}{indicates if the marker does not map perfectly on mm10} -} -} -\usage{ -ref_strains_mini -} -\description{ -A dataset containing the genotypes of 10 mouse strains of the Institut pasteur. Markers positions and other information are from by Karl Broman (https://kbroman.org/MUGAarrays/mini_revisited.html). Strains genotyped from Institut Pasteur. -} -\keyword{datasets} diff --git a/stuart.Rcheck/00_pkg_src/stuart/man/stuart_tab.Rd b/stuart.Rcheck/00_pkg_src/stuart/man/stuart_tab.Rd deleted file mode 100644 index ebe357ac22fecfe7fca149db36df30412e4d55ea..0000000000000000000000000000000000000000 --- a/stuart.Rcheck/00_pkg_src/stuart/man/stuart_tab.Rd +++ /dev/null @@ -1,25 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/stuart_tab-data.R -\docType{data} -\name{stuart_tab} -\alias{stuart_tab} -\title{Output of tab_mark function} -\format{ -A data frame with 11125 rows and 7 variables -\describe{ -\item{SNP.Name}{name of the marker} -\item{Allele_1}{first allele of the marker} -\item{Allele_2}{second allele of the marker} -\item{n_HM1}{number of homozygous individuals for the first allele} -\item{n_HM2}{number of homozygous individuals for the second allele} -\item{n_HT}{number of heterozygous individuals} -\item{n_NA}{number of non genotyped individuals} -} -} -\usage{ -stuart_tab -} -\description{ -A dataset with the output of tab_mark() function. -} -\keyword{datasets} diff --git a/stuart.Rcheck/00_pkg_src/stuart/man/tab_mark.Rd b/stuart.Rcheck/00_pkg_src/stuart/man/tab_mark.Rd deleted file mode 100755 index fb6ef3952d5a3b8147940e428aaba0dfb728e905..0000000000000000000000000000000000000000 --- a/stuart.Rcheck/00_pkg_src/stuart/man/tab_mark.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/tab_mark.R -\name{tab_mark} -\alias{tab_mark} -\title{Create of the summary table for all markers from the genotype data frame} -\usage{ -tab_mark(geno) -} -\arguments{ -\item{geno}{data frame with the genotyping results for your cross} -} -\description{ -This function creates a table with all the markers that were genotyped in the array, the alleles for these markers, the number of homozygous and heterozygous animals, as well as the number of non genotyped animals. -} diff --git a/stuart.Rcheck/00_pkg_src/stuart/man/write_rqtl.Rd b/stuart.Rcheck/00_pkg_src/stuart/man/write_rqtl.Rd deleted file mode 100755 index 77fb9302cf40dd2bff55569500ef452f85e9214b..0000000000000000000000000000000000000000 --- a/stuart.Rcheck/00_pkg_src/stuart/man/write_rqtl.Rd +++ /dev/null @@ -1,40 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/write_rqtl.R -\name{write_rqtl} -\alias{write_rqtl} -\title{Create data frame in Rqtl CSV format} -\usage{ -write_rqtl( - geno, - pheno, - tab, - ref, - par1, - par2, - prefix, - method = "cM_cox", - path = NA -) -} -\arguments{ -\item{geno}{data frame with the genotyping results for your cross} - -\item{pheno}{data frame with phenotypes of the individuals (individuals must have the same ID in the geno data frame and in the pheno data frame)} - -\item{tab}{data frame obtained with tab_mark function} - -\item{ref}{data frame with the reference genotypes of mouse lines} - -\item{par1}{first parental strain used in the cross, the name must be written as in the "ref" data frame} - -\item{par2}{second parental strain used in the cross, the name must be written as in the "ref" data frame} - -\item{prefix}{potential prefix present in the names of the individuals in the geno data frame to be removed in ordere to have the same names as in the pheno file} - -\item{method}{method of calculation of cM position, can be "cM_cox" of "cM_g2f1"} - -\item{path}{if indicated, the data frame will be exported in this path} -} -\description{ -This function uses the table produced by tab_mark function filled by all the mark_* functions in order to create a data frame in the right format for Rqtl read.cross function. Only the non-excluded markers will be kept and genotypeds will be encoded in "0", "1" and "2", "0" being homozygous for the first parental strain, "1" heterozygous and "2" homozygous for the second parental strain. Caution, this file create a data frame and a CSV file in the path of your choice if indicated by the "path" argument. This function does not create a "cross" object in your environment that can be directly used for QTL mapping. You will need to load the CSV file with qtl::read.cross. -} diff --git a/stuart.Rcheck/00_pkg_src/stuart/stuart_0.1.0.tar.gz b/stuart.Rcheck/00_pkg_src/stuart/stuart_0.1.0.tar.gz deleted file mode 100644 index ffbb4d84106e8cdf3040091f9393e381e2dc6d90..0000000000000000000000000000000000000000 Binary files a/stuart.Rcheck/00_pkg_src/stuart/stuart_0.1.0.tar.gz and /dev/null differ diff --git a/stuart.Rcheck/00_pkg_src/stuart/vignettes/stuaRt.Rmd b/stuart.Rcheck/00_pkg_src/stuart/vignettes/stuaRt.Rmd deleted file mode 100755 index 4e294ab0cc3558b6b84acf6a660e790a6c6522f4..0000000000000000000000000000000000000000 --- a/stuart.Rcheck/00_pkg_src/stuart/vignettes/stuaRt.Rmd +++ /dev/null @@ -1,134 +0,0 @@ ---- -title: "stuaRt" -output: rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{stuaRt} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - - -```{r, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>" -) -``` - -Marie Bourdon - -April 2021 - -## Goal - -stuaRt is a R package which formats the genotyping data from MUGA arrays (Neogen) to use it in Rqtl, for backcross or F2 crosses. It allows to filter the markers in MUGA arrays that can or cannot be used for Rqtl analysis, from a genetic point of view. Indeed, markers will be selected depending on their proportion of each genotype, correspondance between F2 or N2 individuals alleles and parental strains alleles, etc. - -The examples shown here require the use of dplyr package. - - -```{r setup} -library(dplyr) -library(stuart) -``` - - -## Annotation files - -The developer of Rqtl and Rqtl2 packages, Karl Broman, realised that the annotation of the MUGA arrays was not correct for some markers. Thus, he produced new annotation files for MUGA, miniMUGA, megaMUGA and gigaMUGA arrays. These files contain some informations about the markers including the chromosome and position where the probe of the marker matchs on the genome, wether the marker maps uniquely or not, etc. These files also contains the genetic position of the markers calculated with two methods : "cM_cox" and "cM_g2f1" (see https://kbroman.org/MUGAarrays/mini_revisited.html for more informations). - -We recommand to use these annotation files to reconstruct the file use for Rqtl analysis. You can load the datasets with these annotations from GitHub (https://github.com/kbroman/MUGAarrays/tree/master/UWisc). Choose the file corresponding to the MUGA array that you used and use the URL to load the dataset in R. - -Here, we will present an example of the use of stuaRt with results of a F2 cross genotyped with miniMUGA. We load the result of Neogen genotyping: `genos` and thephenotype dataset produced by the lab: `phenos`. All these datasets are available for example in stuaRt package. - -```{r annot} -annot_mini <- read.csv(url("https://raw.githubusercontent.com/kbroman/MUGAarrays/master/UWisc/mini_uwisc_v2.csv")) -``` - - - -```{r load} -data(genos) -summary(genos) -data(phenos) -summary(phenos) -``` - -### Genotyping of parental strains - -To use genotyping result for Rqtl analysis, we need to recode the genotypes of the individuals (originally encoded in A, T, G, C) depending on the genotype of the parental strains: homozygous for the first parental strain (0), heterozygous (1) or homozygous for the second parental strain (2). - -We recommend to always genotype the parental strains of the cross. Here, their genotypes are in the `genos` file and correspond to the Sample.ID "StrainsA_1", "StrainsA_2", "StrainsB_1" and "StrainsB_2". Two individuals were genotyped for each parental strain. The first step will be to create a consensus genotype for each strain from the two genotyped individuals. The consensus genotype will be added to the annotation dataset in order to obtain a dataset with both annotation and reference genotype of the parental strains that will be used for recoding the genotypes or the F2 individuals. - -This is done with the `geno_strains` function. - -```{r strains} -strains <- geno_strains(ref=annot_mini,geno=genos,par1=c("StrainsA_1","StrainsA_2"),par2=c("StrainsB_1","StrainsB_2"),name1="parent1",name2="parent2") -head(strains) -``` - -After this step, we need to remove the genotyping result for these individuals from the `genos` dataset. -```{r no_parent} -genos <- genos %>% filter(!Sample.ID %in% c("StrainsA_1", "StrainsA_2", "StrainsB_1","StrainsB_2")) -``` - - -## Markers sorting - -### Marker tab - -The first step of the markers sorting is to create the marker dataframe with the tab_mark() function. This dataframe contains for each marker the two alleles that can be found in the F2/N2 population (`Allele_1` and `Allele_2`), the number of individuals for each genotype (homozygous for each allele (`n_HM1` and `n_HM2`) and heterozygous (`n_HT`)), and the number of non genotyped individuals (`n_NA`) This step can take several minutes. You can also load the output of this function. - - -```{r tab_mark} -data(stuart_tab) -summary(stuart_tab) -``` - -Then we will use the different mark_* functions in order to filter the markers. First, we can use mark_match() function. This function excludes markers that are in your genotype file but not in the reference genotype dataset. We recomend using this function as the chip used for genotyping may change. - -```{r mark_match} -tab2 <- mark_match(stuart_tab,ref=strains) - - -tab2 %>% filter(exclude_match==1) -``` - -Here the reference strains were genotyped with the same version of the chip as the F2 individuals so no marker was excluded. - -Then, we can use the mark_poly() function, which will exclude the markers that are not polymorphic. - -```{r mark_poly ex} -tab2 <- mark_poly(tab2) -head(tab2) -``` - -The mark_prop() function can be used to filter markers depending on the proportion of each genotype. Here, we have a F2 so we can use the "homo" argument in order to filter depending on the proportion of both homozygous genotype. If we have a N2, we can filter with the proportion of homozygous individuals with the "homo" argument and of heterozygous individuals with the hetero" argument. Moreover, this function allows to filter marker depending on the proportion on non genotyped animals. By defaults, markers for which more than 50% of individuals were not genotyped. - -```{r mark_prop ex} -tab2 <- mark_prop(tab2,cross="F2",homo=0.1,hetero=0.1) -head(tab2) -``` - -Last, we can use the mark_allele(). This very helpful function excludes markers for which the alleles found in the F2/N2 individuals do not correspond to the alleles found in the parental strains. For example, if for a marker is not polymorphic in the parental strains but we found two alleles in the F2/N2 individuals, it will be excluded. - -```{r mark_allele} -tab2 <- mark_allele(tab=tab2,ref=strains,par1="parent1",par2="parent2") -tab2 %>% arrange(desc(exclude_allele)) %>% head() -``` - -Indeed, we can see that the markers excluded with mark_allele() have different alleles in the parental strains. - -```{r mark_allele-strains} -strains %>% filter(marker %in% c("gJAX00038569","gJAX00425031","gUNC12245354","gUNC15530876","gUNC21555204","gUNC21596600")) %>% arrange(marker) %>% select(marker,parent1,parent2) -``` - -# Creation of the R/qtl file - -After excluding the problematic markers, we can create the R/qtl file. The individuals must have the same ID in the geno and in the pheno file. If there is a prefix in the geno file that must be removed in order to acheive this, you can use the "prefix" argument. The "path" argument can be used in order to create a CSV file that you can laod with qtl::read.cross. - -```{r write_qtl} -rqtl_file <- (write_rqtl(geno=genos,pheno=phenos,tab=tab2,ref=strains,par1="parent1",par2="parent2",prefix="ind_")) - -rqtl_file[1:10,1:7] -``` - diff --git a/stuart.Rcheck/00check.log b/stuart.Rcheck/00check.log deleted file mode 100644 index ce5827b5840a1d1881cc62e551b30cbc0bb42e59..0000000000000000000000000000000000000000 --- a/stuart.Rcheck/00check.log +++ /dev/null @@ -1,124 +0,0 @@ -* using log directory ‘/Users/mariebourdon/stuart_package/stuart/stuart/stuart.Rcheck’ -* using R version 4.0.4 (2021-02-15) -* using platform: x86_64-apple-darwin17.0 (64-bit) -* using session charset: UTF-8 -* using options ‘--no-manual --as-cran’ -* checking for file ‘stuart/DESCRIPTION’ ... OK -* this is package ‘stuart’ version ‘0.1.0’ -* package encoding: UTF-8 -* checking package namespace information ... OK -* checking package dependencies ... OK -* checking if this is a source package ... OK -* checking if there is a namespace ... OK -* checking for executable files ... OK -* checking for hidden files and directories ... OK -* checking for portable file names ... OK -* checking for sufficient/correct file permissions ... OK -* checking whether package ‘stuart’ can be installed ... OK -* checking installed package size ... NOTE - installed size is 166.5Mb - sub-directories of 1Mb or more: - data 41.4Mb - extdata 124.9Mb -* checking package directory ... OK -* checking for future file timestamps ... OK -* checking ‘build’ directory ... OK -* checking DESCRIPTION meta-information ... OK -* checking top-level files ... NOTE -Non-standard file/directory found at top level: - ‘stuart_0.1.0.tar.gz’ -* checking for left-over files ... OK -* checking index information ... OK -* checking package subdirectories ... OK -* checking R files for non-ASCII characters ... OK -* checking R files for syntax errors ... OK -* checking whether the package can be loaded ... OK -* checking whether the package can be loaded with stated dependencies ... OK -* checking whether the package can be unloaded cleanly ... OK -* checking whether the namespace can be loaded with stated dependencies ... OK -* checking whether the namespace can be unloaded cleanly ... OK -* checking dependencies in R code ... OK -* checking S3 generic/method consistency ... OK -* checking replacement functions ... OK -* checking foreign function calls ... OK -* checking R code for possible problems ... NOTE -geno_strains: no visible binding for global variable ‘Sample.ID’ -geno_strains: no visible binding for global variable ‘SNP.Name’ -geno_strains: no visible binding for global variable ‘Geno’ -geno_strains: no visible binding for global variable ‘parent1’ -geno_strains: no visible binding for global variable ‘parent2’ -mark_allele: no visible binding for global variable ‘marker’ -mark_match: no visible binding for global variable ‘SNP.Name’ -mark_poly: no visible binding for global variable ‘Allele_2’ -mark_prop: no visible binding for global variable ‘n_HM1’ -mark_prop: no visible binding for global variable ‘n_HM2’ -mark_prop: no visible binding for global variable ‘n_HT’ -mark_prop: no visible binding for global variable ‘n_geno’ -mark_prop: no visible binding for global variable ‘n_NA’ -mark_prop: no visible binding for global variable ‘p_HM1’ -mark_prop: no visible binding for global variable ‘p_HM2’ -mark_prop: no visible binding for global variable ‘p_HT’ -mark_prop: no visible binding for global variable ‘p_NA’ -tab_mark: no visible binding for global variable ‘Geno’ -tab_mark: no visible binding for global variable ‘SNP.Name’ -tab_mark: no visible binding for global variable ‘Sample.ID’ -tab_mark: no visible binding for global variable ‘Allele1...Forward’ -tab_mark: no visible binding for global variable ‘Allele2...Forward’ -tab_mark: no visible binding for global variable ‘Allele’ -tab_mark: no visible binding for global variable ‘Allele_1’ -tab_mark: no visible binding for global variable ‘n_HM1’ -tab_mark: no visible binding for global variable ‘Allele_2’ -tab_mark: no visible binding for global variable ‘n_HM2’ -tab_mark: no visible binding for global variable ‘n_HT’ -tab_mark: no visible binding for global variable ‘n_NA’ -write_rqtl: no visible binding for global variable ‘exclude_match’ -write_rqtl: no visible binding for global variable ‘exclude_poly’ -write_rqtl: no visible binding for global variable ‘exclude_prop’ -write_rqtl: no visible binding for global variable ‘exclude_allele’ -write_rqtl: no visible binding for global variable ‘SNP.Name’ -write_rqtl: no visible binding for global variable ‘Sample.ID’ -write_rqtl: no visible binding for global variable ‘Allele1...Forward’ -write_rqtl: no visible binding for global variable ‘Allele2...Forward’ -write_rqtl: no visible binding for global variable ‘marker’ -write_rqtl: no visible binding for global variable ‘chr’ -write_rqtl: no visible binding for global variable ‘bp_mm10’ -write_rqtl: no visible binding for global variable ‘cM_cox’ -write_rqtl: no visible binding for global variable ‘cM_g2f1’ -write_rqtl: no visible binding for global variable ‘Geno’ -write_rqtl: no visible binding for global variable ‘.’ -Undefined global functions or variables: - . Allele Allele1...Forward Allele2...Forward Allele_1 Allele_2 Geno - SNP.Name Sample.ID bp_mm10 cM_cox cM_g2f1 chr exclude_allele - exclude_match exclude_poly exclude_prop marker n_HM1 n_HM2 n_HT n_NA - n_geno p_HM1 p_HM2 p_HT p_NA parent1 parent2 -* checking Rd files ... OK -* checking Rd metadata ... OK -* checking Rd line widths ... OK -* checking Rd cross-references ... OK -* checking for missing documentation entries ... OK -* checking for code/documentation mismatches ... WARNING -Data codoc mismatches from documentation object 'ref_strains_mini': -Variables in data frame 'ref_strains_mini' - Code: CC001 CC005 CC042 CC071 Ifnar.KO.129 Ifnar.KO.B6 Rvfs2.1 - Rvfs2.2 Rvfs2.6 Rvfs2.7 bp_mm10 cM_cox cM_g2f1 chr marker snp - unique unmapped - Docs: CC001 CC005 CC042 CC071 Ifnar.KO.129 Ifnar.KO.B6 Rvfs2.1 - Rvfs2.2 Rvfs2.6 Rvfs2.7 bp_mm10 cM_cox cM_g2f1 chr marker multi - snp unique unmapped - -* checking Rd \usage sections ... OK -* checking Rd contents ... OK -* checking for unstated dependencies in examples ... OK -* checking contents of ‘data’ directory ... OK -* checking data for non-ASCII characters ... OK -* checking data for ASCII and uncompressed saves ... OK -* checking installed files from ‘inst/doc’ ... OK -* checking files in ‘vignettes’ ... OK -* checking examples ... NONE -* checking for unstated dependencies in vignettes ... OK -* checking package vignettes in ‘inst/doc’ ... OK -* checking re-building of vignette outputs ... OK -* checking for non-standard things in the check directory ... OK -* checking for detritus in the temp directory ... OK -* DONE -Status: 1 WARNING, 3 NOTEs diff --git a/stuart.Rcheck/00install.out b/stuart.Rcheck/00install.out deleted file mode 100644 index 85123f6e14dff9c876627ded1d7669e3af45c77c..0000000000000000000000000000000000000000 --- a/stuart.Rcheck/00install.out +++ /dev/null @@ -1,15 +0,0 @@ -* installing *source* package ‘stuart’ ... -** using staged installation -** R -** data -*** moving datasets to lazyload DB -** inst -** byte-compile and prepare package for lazy loading -** help -*** installing help indices -** building package indices -** installing vignettes -** testing if installed package can be loaded from temporary location -** testing if installed package can be loaded from final location -** testing if installed package keeps a record of temporary installation path -* DONE (stuart) diff --git a/stuart.Rcheck/R_check_bin/R b/stuart.Rcheck/R_check_bin/R deleted file mode 100755 index 3e289bc21d3f8a3f979596483c640411ca6e8b2f..0000000000000000000000000000000000000000 --- a/stuart.Rcheck/R_check_bin/R +++ /dev/null @@ -1,2 +0,0 @@ -echo "'R' should not be used without a path -- see par. 1.6 of the manual" -exit 1 diff --git a/stuart.Rcheck/R_check_bin/Rscript b/stuart.Rcheck/R_check_bin/Rscript deleted file mode 100755 index 6fead74173b543aa25ff26146b3d59659136a0c1..0000000000000000000000000000000000000000 --- a/stuart.Rcheck/R_check_bin/Rscript +++ /dev/null @@ -1,2 +0,0 @@ -echo "'Rscript' should not be used without a path -- see par. 1.6 of the manual" -exit 1 diff --git a/stuart.Rcheck/stuart/DESCRIPTION b/stuart.Rcheck/stuart/DESCRIPTION deleted file mode 100644 index 821f2e19414e18c62f4b35fa51e754bd13dee7cd..0000000000000000000000000000000000000000 --- a/stuart.Rcheck/stuart/DESCRIPTION +++ /dev/null @@ -1,24 +0,0 @@ -Package: stuart -Title: stuaRt -Version: 0.1.0 -Authors@R: - person(given = "Marie", - family = "Bourdon", - role = c("aut", "cre"), - email = "mariefbourdon@gmail.com", - comment = c(ORCID = "YOUR-ORCID-ID")) -Description: Sorts markers of miniMUGA genotyping for F2 or N2 individuals, for Rqtl analysis. -License: GPL-3 -Encoding: UTF-8 -LazyData: true -Roxygen: list(markdown = TRUE) -RoxygenNote: 7.1.1 -Imports: dplyr, tidyr, utils, stringr, rapportools -Suggests: knitr, rmarkdown -VignetteBuilder: knitr -Depends: R (>= 3.5.0) -NeedsCompilation: no -Packaged: 2021-06-01 08:06:30 UTC; mariebourdon -Author: Marie Bourdon [aut, cre] (YOUR-ORCID-ID) -Maintainer: Marie Bourdon <mariefbourdon@gmail.com> -Built: R 4.0.4; ; 2021-06-01 08:07:48 UTC; unix diff --git a/stuart.Rcheck/stuart/INDEX b/stuart.Rcheck/stuart/INDEX deleted file mode 100644 index 224235f8b427f4a688db3615f2a05028b216892a..0000000000000000000000000000000000000000 --- a/stuart.Rcheck/stuart/INDEX +++ /dev/null @@ -1,19 +0,0 @@ -geno_strains Create haplotype for a new mouse strain into a - reference dataframe -genos Data frame with miniMUGA genotyping of F2 - individuals and parental strains -mark_allele Exclude markers that have different alleles in - the individuals of the cross and in parental - strains -mark_match Exclude markers that were not genotyped in the - reference strains -mark_poly Exclude markers that are not polymorphic -mark_prop Exclude markers depending on proportions of - homo/hetorozygous -phenos Data frame with phenotype of F2 individuals -ref_strains_mini Data frame with miniMUGA genotyping of - classical lab strains. -stuart_tab Output of tab_mark function -tab_mark Create of the summary table for all markers - from the genotype data frame -write_rqtl Create data frame in Rqtl CSV format diff --git a/stuart.Rcheck/stuart/Meta/Rd.rds b/stuart.Rcheck/stuart/Meta/Rd.rds deleted file mode 100644 index c4ea79b4670e103d10b06c8c9fc26b62f168ee32..0000000000000000000000000000000000000000 Binary files a/stuart.Rcheck/stuart/Meta/Rd.rds and /dev/null differ diff --git a/stuart.Rcheck/stuart/Meta/data.rds b/stuart.Rcheck/stuart/Meta/data.rds deleted file mode 100644 index 24ba997e0432534388ba081f0da7778ba0d8bb8a..0000000000000000000000000000000000000000 Binary files a/stuart.Rcheck/stuart/Meta/data.rds and /dev/null differ diff --git a/stuart.Rcheck/stuart/Meta/features.rds b/stuart.Rcheck/stuart/Meta/features.rds deleted file mode 100644 index 1c5201d9a160c38ddc83d2f212e6464610414d9b..0000000000000000000000000000000000000000 Binary files a/stuart.Rcheck/stuart/Meta/features.rds and /dev/null differ diff --git a/stuart.Rcheck/stuart/Meta/hsearch.rds b/stuart.Rcheck/stuart/Meta/hsearch.rds deleted file mode 100644 index 61f3ee133c875ffb2def1cb4fba2c28c27b2b6c4..0000000000000000000000000000000000000000 Binary files a/stuart.Rcheck/stuart/Meta/hsearch.rds and /dev/null differ diff --git a/stuart.Rcheck/stuart/Meta/links.rds b/stuart.Rcheck/stuart/Meta/links.rds deleted file mode 100644 index 13c8247d5d77be63bd8e0d9592f61caedb941685..0000000000000000000000000000000000000000 Binary files a/stuart.Rcheck/stuart/Meta/links.rds and /dev/null differ diff --git a/stuart.Rcheck/stuart/Meta/nsInfo.rds b/stuart.Rcheck/stuart/Meta/nsInfo.rds deleted file mode 100644 index c765a31e6cf90d51ddb9b037036f2b8a37731e3f..0000000000000000000000000000000000000000 Binary files a/stuart.Rcheck/stuart/Meta/nsInfo.rds and /dev/null differ diff --git a/stuart.Rcheck/stuart/Meta/package.rds b/stuart.Rcheck/stuart/Meta/package.rds deleted file mode 100644 index 70396b31d9b11138a401dc5f27e8493812272c44..0000000000000000000000000000000000000000 Binary files a/stuart.Rcheck/stuart/Meta/package.rds and /dev/null differ diff --git a/stuart.Rcheck/stuart/Meta/vignette.rds b/stuart.Rcheck/stuart/Meta/vignette.rds deleted file mode 100644 index 87a9d6290223156fd3752ff37a7c170f33892ee0..0000000000000000000000000000000000000000 Binary files a/stuart.Rcheck/stuart/Meta/vignette.rds and /dev/null differ diff --git a/stuart.Rcheck/stuart/NAMESPACE b/stuart.Rcheck/stuart/NAMESPACE deleted file mode 100644 index 902e04361e27306cd36fac0de688e5a6ab80da2c..0000000000000000000000000000000000000000 --- a/stuart.Rcheck/stuart/NAMESPACE +++ /dev/null @@ -1,13 +0,0 @@ -# Generated by roxygen2: do not edit by hand - -export(geno_strains) -export(mark_allele) -export(mark_match) -export(mark_poly) -export(mark_prop) -export(tab_mark) -export(write_rqtl) -import(dplyr) -import(stringr) -import(tidyr) -import(utils) diff --git a/stuart.Rcheck/stuart/R/stuart b/stuart.Rcheck/stuart/R/stuart deleted file mode 100644 index 6686156321de4dbdb52db7e32cd99961007f56e2..0000000000000000000000000000000000000000 --- a/stuart.Rcheck/stuart/R/stuart +++ /dev/null @@ -1,27 +0,0 @@ -# File share/R/nspackloader.R -# Part of the R package, https://www.R-project.org -# -# Copyright (C) 1995-2012 The R Core Team -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# A copy of the GNU General Public License is available at -# https://www.r-project.org/Licenses/ - -local({ - info <- loadingNamespaceInfo() - pkg <- info$pkgname - ns <- .getNamespace(as.name(pkg)) - if (is.null(ns)) - stop("cannot find namespace environment for ", pkg, domain = NA); - dbbase <- file.path(info$libname, pkg, "R", pkg) - lazyLoad(dbbase, ns, filter = function(n) n != ".__NAMESPACE__.") -}) diff --git a/stuart.Rcheck/stuart/R/stuart.rdb b/stuart.Rcheck/stuart/R/stuart.rdb deleted file mode 100644 index a4ea4dd1a0d843f313da96af76a0dc5382957ba4..0000000000000000000000000000000000000000 Binary files a/stuart.Rcheck/stuart/R/stuart.rdb and /dev/null differ diff --git a/stuart.Rcheck/stuart/R/stuart.rdx b/stuart.Rcheck/stuart/R/stuart.rdx deleted file mode 100644 index fa4db41f31d22a83d342d724a4aa8700995d4f84..0000000000000000000000000000000000000000 Binary files a/stuart.Rcheck/stuart/R/stuart.rdx and /dev/null differ diff --git a/stuart.Rcheck/stuart/data/Rdata.rdb b/stuart.Rcheck/stuart/data/Rdata.rdb deleted file mode 100644 index 5fd98c9b3739082c1c1ca079427013f1b3c1370e..0000000000000000000000000000000000000000 Binary files a/stuart.Rcheck/stuart/data/Rdata.rdb and /dev/null differ diff --git a/stuart.Rcheck/stuart/data/Rdata.rds b/stuart.Rcheck/stuart/data/Rdata.rds deleted file mode 100644 index 8c9ce116d6ae5bee09b992ce1a42430315ce7c41..0000000000000000000000000000000000000000 Binary files a/stuart.Rcheck/stuart/data/Rdata.rds and /dev/null differ diff --git a/stuart.Rcheck/stuart/data/Rdata.rdx b/stuart.Rcheck/stuart/data/Rdata.rdx deleted file mode 100644 index c9902b606dbba79a2c47b7b4a17b9abff116416f..0000000000000000000000000000000000000000 Binary files a/stuart.Rcheck/stuart/data/Rdata.rdx and /dev/null differ diff --git a/stuart.Rcheck/stuart/doc/index.html b/stuart.Rcheck/stuart/doc/index.html deleted file mode 100644 index 1946e82821cd67a4f6c75ec171142c3113c81912..0000000000000000000000000000000000000000 --- a/stuart.Rcheck/stuart/doc/index.html +++ /dev/null @@ -1,28 +0,0 @@ -<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> -<html xmlns="http://www.w3.org/1999/xhtml"> -<head><title>R: Vignettes and other documentation</title> -<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> -<link rel="stylesheet" type="text/css" href="/doc/html/R.css" /> -</head><body> -<h1> Vignettes and other documentation -<img class="toplogo" src="/doc/html/Rlogo.svg" alt="[R logo]" /> -</h1> -<hr/> -<div style="text-align: center;"> -<a href="/doc/html/index.html"><img class="arrow" src="/doc/html/up.jpg" alt="[Top]" /></a> -</div> -<h2>Vignettes from package 'stuart'</h2> -<table width="100%"> -<col style="width: 22%;" /> -<col style="width: 2%;" /> -<col style="width: 50%;" /> -<col style="width: 8%;" /> -<col style="width: 8%;" /> -<col style="width: 8%;" /> -<tr><td style="text-align: right; vertical-align: top;"><a href="../../../library/stuart/doc/stuaRt.html">stuart::stuaRt</a></td> -<td></td><td valign="top">stuaRt</td> -<td valign="top"><a href="../../../library/stuart/doc/stuaRt.html">HTML</a></td> -<td valign="top"><a href="../../../library/stuart/doc/stuaRt.Rmd">source</a></td> -<td valign="top" style="white-space: nowrap"><a href="../../../library/stuart/doc/stuaRt.R">R code</a></td></tr> -</table> -</body></html> diff --git a/stuart.Rcheck/stuart/doc/stuaRt.R b/stuart.Rcheck/stuart/doc/stuaRt.R deleted file mode 100644 index 71e57691d5f643cb3edb7e751d2c7641ee3b5f96..0000000000000000000000000000000000000000 --- a/stuart.Rcheck/stuart/doc/stuaRt.R +++ /dev/null @@ -1,56 +0,0 @@ -## ---- include = FALSE--------------------------------------------------------- -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>" -) - -## ----setup-------------------------------------------------------------------- -library(dplyr) -library(stuart) - -## ----annot-------------------------------------------------------------------- -annot_mini <- read.csv(url("https://raw.githubusercontent.com/kbroman/MUGAarrays/master/UWisc/mini_uwisc_v2.csv")) - -## ----load--------------------------------------------------------------------- -data(genos) -summary(genos) -data(phenos) -summary(phenos) - -## ----strains------------------------------------------------------------------ -strains <- geno_strains(ref=annot_mini,geno=genos,par1=c("StrainsA_1","StrainsA_2"),par2=c("StrainsB_1","StrainsB_2"),name1="parent1",name2="parent2") -head(strains) - -## ----no_parent---------------------------------------------------------------- -genos <- genos %>% filter(!Sample.ID %in% c("StrainsA_1", "StrainsA_2", "StrainsB_1","StrainsB_2")) - -## ----tab_mark----------------------------------------------------------------- -data(stuart_tab) -summary(stuart_tab) - -## ----mark_match--------------------------------------------------------------- -tab2 <- mark_match(stuart_tab,ref=strains) - - -tab2 %>% filter(exclude_match==1) - -## ----mark_poly ex------------------------------------------------------------- -tab2 <- mark_poly(tab2) -head(tab2) - -## ----mark_prop ex------------------------------------------------------------- -tab2 <- mark_prop(tab2,cross="F2",homo=0.1,hetero=0.1) -head(tab2) - -## ----mark_allele-------------------------------------------------------------- -tab2 <- mark_allele(tab=tab2,ref=strains,par1="parent1",par2="parent2") -tab2 %>% arrange(desc(exclude_allele)) %>% head() - -## ----mark_allele-strains------------------------------------------------------ -strains %>% filter(marker %in% c("gJAX00038569","gJAX00425031","gUNC12245354","gUNC15530876","gUNC21555204","gUNC21596600")) %>% arrange(marker) %>% select(marker,parent1,parent2) - -## ----write_qtl---------------------------------------------------------------- -rqtl_file <- (write_rqtl(geno=genos,pheno=phenos,tab=tab2,ref=strains,par1="parent1",par2="parent2",prefix="ind_")) - -rqtl_file[1:10,1:7] - diff --git a/stuart.Rcheck/stuart/doc/stuaRt.Rmd b/stuart.Rcheck/stuart/doc/stuaRt.Rmd deleted file mode 100755 index 4e294ab0cc3558b6b84acf6a660e790a6c6522f4..0000000000000000000000000000000000000000 --- a/stuart.Rcheck/stuart/doc/stuaRt.Rmd +++ /dev/null @@ -1,134 +0,0 @@ ---- -title: "stuaRt" -output: rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{stuaRt} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - - -```{r, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>" -) -``` - -Marie Bourdon - -April 2021 - -## Goal - -stuaRt is a R package which formats the genotyping data from MUGA arrays (Neogen) to use it in Rqtl, for backcross or F2 crosses. It allows to filter the markers in MUGA arrays that can or cannot be used for Rqtl analysis, from a genetic point of view. Indeed, markers will be selected depending on their proportion of each genotype, correspondance between F2 or N2 individuals alleles and parental strains alleles, etc. - -The examples shown here require the use of dplyr package. - - -```{r setup} -library(dplyr) -library(stuart) -``` - - -## Annotation files - -The developer of Rqtl and Rqtl2 packages, Karl Broman, realised that the annotation of the MUGA arrays was not correct for some markers. Thus, he produced new annotation files for MUGA, miniMUGA, megaMUGA and gigaMUGA arrays. These files contain some informations about the markers including the chromosome and position where the probe of the marker matchs on the genome, wether the marker maps uniquely or not, etc. These files also contains the genetic position of the markers calculated with two methods : "cM_cox" and "cM_g2f1" (see https://kbroman.org/MUGAarrays/mini_revisited.html for more informations). - -We recommand to use these annotation files to reconstruct the file use for Rqtl analysis. You can load the datasets with these annotations from GitHub (https://github.com/kbroman/MUGAarrays/tree/master/UWisc). Choose the file corresponding to the MUGA array that you used and use the URL to load the dataset in R. - -Here, we will present an example of the use of stuaRt with results of a F2 cross genotyped with miniMUGA. We load the result of Neogen genotyping: `genos` and thephenotype dataset produced by the lab: `phenos`. All these datasets are available for example in stuaRt package. - -```{r annot} -annot_mini <- read.csv(url("https://raw.githubusercontent.com/kbroman/MUGAarrays/master/UWisc/mini_uwisc_v2.csv")) -``` - - - -```{r load} -data(genos) -summary(genos) -data(phenos) -summary(phenos) -``` - -### Genotyping of parental strains - -To use genotyping result for Rqtl analysis, we need to recode the genotypes of the individuals (originally encoded in A, T, G, C) depending on the genotype of the parental strains: homozygous for the first parental strain (0), heterozygous (1) or homozygous for the second parental strain (2). - -We recommend to always genotype the parental strains of the cross. Here, their genotypes are in the `genos` file and correspond to the Sample.ID "StrainsA_1", "StrainsA_2", "StrainsB_1" and "StrainsB_2". Two individuals were genotyped for each parental strain. The first step will be to create a consensus genotype for each strain from the two genotyped individuals. The consensus genotype will be added to the annotation dataset in order to obtain a dataset with both annotation and reference genotype of the parental strains that will be used for recoding the genotypes or the F2 individuals. - -This is done with the `geno_strains` function. - -```{r strains} -strains <- geno_strains(ref=annot_mini,geno=genos,par1=c("StrainsA_1","StrainsA_2"),par2=c("StrainsB_1","StrainsB_2"),name1="parent1",name2="parent2") -head(strains) -``` - -After this step, we need to remove the genotyping result for these individuals from the `genos` dataset. -```{r no_parent} -genos <- genos %>% filter(!Sample.ID %in% c("StrainsA_1", "StrainsA_2", "StrainsB_1","StrainsB_2")) -``` - - -## Markers sorting - -### Marker tab - -The first step of the markers sorting is to create the marker dataframe with the tab_mark() function. This dataframe contains for each marker the two alleles that can be found in the F2/N2 population (`Allele_1` and `Allele_2`), the number of individuals for each genotype (homozygous for each allele (`n_HM1` and `n_HM2`) and heterozygous (`n_HT`)), and the number of non genotyped individuals (`n_NA`) This step can take several minutes. You can also load the output of this function. - - -```{r tab_mark} -data(stuart_tab) -summary(stuart_tab) -``` - -Then we will use the different mark_* functions in order to filter the markers. First, we can use mark_match() function. This function excludes markers that are in your genotype file but not in the reference genotype dataset. We recomend using this function as the chip used for genotyping may change. - -```{r mark_match} -tab2 <- mark_match(stuart_tab,ref=strains) - - -tab2 %>% filter(exclude_match==1) -``` - -Here the reference strains were genotyped with the same version of the chip as the F2 individuals so no marker was excluded. - -Then, we can use the mark_poly() function, which will exclude the markers that are not polymorphic. - -```{r mark_poly ex} -tab2 <- mark_poly(tab2) -head(tab2) -``` - -The mark_prop() function can be used to filter markers depending on the proportion of each genotype. Here, we have a F2 so we can use the "homo" argument in order to filter depending on the proportion of both homozygous genotype. If we have a N2, we can filter with the proportion of homozygous individuals with the "homo" argument and of heterozygous individuals with the hetero" argument. Moreover, this function allows to filter marker depending on the proportion on non genotyped animals. By defaults, markers for which more than 50% of individuals were not genotyped. - -```{r mark_prop ex} -tab2 <- mark_prop(tab2,cross="F2",homo=0.1,hetero=0.1) -head(tab2) -``` - -Last, we can use the mark_allele(). This very helpful function excludes markers for which the alleles found in the F2/N2 individuals do not correspond to the alleles found in the parental strains. For example, if for a marker is not polymorphic in the parental strains but we found two alleles in the F2/N2 individuals, it will be excluded. - -```{r mark_allele} -tab2 <- mark_allele(tab=tab2,ref=strains,par1="parent1",par2="parent2") -tab2 %>% arrange(desc(exclude_allele)) %>% head() -``` - -Indeed, we can see that the markers excluded with mark_allele() have different alleles in the parental strains. - -```{r mark_allele-strains} -strains %>% filter(marker %in% c("gJAX00038569","gJAX00425031","gUNC12245354","gUNC15530876","gUNC21555204","gUNC21596600")) %>% arrange(marker) %>% select(marker,parent1,parent2) -``` - -# Creation of the R/qtl file - -After excluding the problematic markers, we can create the R/qtl file. The individuals must have the same ID in the geno and in the pheno file. If there is a prefix in the geno file that must be removed in order to acheive this, you can use the "prefix" argument. The "path" argument can be used in order to create a CSV file that you can laod with qtl::read.cross. - -```{r write_qtl} -rqtl_file <- (write_rqtl(geno=genos,pheno=phenos,tab=tab2,ref=strains,par1="parent1",par2="parent2",prefix="ind_")) - -rqtl_file[1:10,1:7] -``` - diff --git a/stuart.Rcheck/stuart/doc/stuaRt.html b/stuart.Rcheck/stuart/doc/stuaRt.html deleted file mode 100644 index 4c9410ed11fa0ee4af7da835ea7e5fc2d57ce791..0000000000000000000000000000000000000000 --- a/stuart.Rcheck/stuart/doc/stuaRt.html +++ /dev/null @@ -1,558 +0,0 @@ -<!DOCTYPE html> - -<html> - -<head> - -<meta charset="utf-8" /> -<meta name="generator" content="pandoc" /> -<meta http-equiv="X-UA-Compatible" content="IE=EDGE" /> - -<meta name="viewport" content="width=device-width, initial-scale=1" /> - - - -<title>stuaRt</title> - -<script>// Pandoc 2.9 adds attributes on both header and div. We remove the former (to -// be compatible with the behavior of Pandoc < 2.8). -document.addEventListener('DOMContentLoaded', function(e) { - var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); - var i, h, a; - for (i = 0; i < hs.length; i++) { - h = hs[i]; - if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 - a = h.attributes; - while (a.length > 0) h.removeAttribute(a[0].name); - } -}); -</script> -<script>// Hide empty <a> tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/issues/6352#issuecomment-626106786) --> -// v0.0.1 -// Written by JooYoung Seo (jooyoung@psu.edu) and Atsushi Yasumoto on June 1st, 2020. - -document.addEventListener('DOMContentLoaded', function() { - const codeList = document.getElementsByClassName("sourceCode"); - for (var i = 0; i < codeList.length; i++) { - var linkList = codeList[i].getElementsByTagName('a'); - for (var j = 0; j < linkList.length; j++) { - if (linkList[j].innerHTML === "") { - linkList[j].setAttribute('aria-hidden', 'true'); - } - } - } -}); -</script> - -<style type="text/css"> - code{white-space: pre-wrap;} - span.smallcaps{font-variant: small-caps;} - span.underline{text-decoration: underline;} - div.column{display: inline-block; vertical-align: top; width: 50%;} - div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;} - ul.task-list{list-style: none;} - </style> - - -<style type="text/css"> - code { - white-space: pre; - } - .sourceCode { - overflow: visible; - } -</style> -<style type="text/css" data-origin="pandoc"> -pre > code.sourceCode { white-space: pre; position: relative; } -pre > code.sourceCode > span { display: inline-block; line-height: 1.25; } -pre > code.sourceCode > span:empty { height: 1.2em; } -code.sourceCode > span { color: inherit; text-decoration: inherit; } -div.sourceCode { margin: 1em 0; } -pre.sourceCode { margin: 0; } -@media screen { -div.sourceCode { overflow: auto; } -} -@media print { -pre > code.sourceCode { white-space: pre-wrap; } -pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; } -} -pre.numberSource code - { counter-reset: source-line 0; } -pre.numberSource code > span - { position: relative; left: -4em; counter-increment: source-line; } -pre.numberSource code > span > a:first-child::before - { content: counter(source-line); - position: relative; left: -1em; text-align: right; vertical-align: baseline; - border: none; display: inline-block; - -webkit-touch-callout: none; -webkit-user-select: none; - -khtml-user-select: none; -moz-user-select: none; - -ms-user-select: none; user-select: none; - padding: 0 4px; width: 4em; - color: #aaaaaa; - } -pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa; padding-left: 4px; } -div.sourceCode - { } -@media screen { -pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; } -} -code span.al { color: #ff0000; font-weight: bold; } /* Alert */ -code span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */ -code span.at { color: #7d9029; } /* Attribute */ -code span.bn { color: #40a070; } /* BaseN */ -code span.bu { } /* BuiltIn */ -code span.cf { color: #007020; font-weight: bold; } /* ControlFlow */ -code span.ch { color: #4070a0; } /* Char */ -code span.cn { color: #880000; } /* Constant */ -code span.co { color: #60a0b0; font-style: italic; } /* Comment */ -code span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */ -code span.do { color: #ba2121; font-style: italic; } /* Documentation */ -code span.dt { color: #902000; } /* DataType */ -code span.dv { color: #40a070; } /* DecVal */ -code span.er { color: #ff0000; font-weight: bold; } /* Error */ -code span.ex { } /* Extension */ -code span.fl { color: #40a070; } /* Float */ -code span.fu { color: #06287e; } /* Function */ -code span.im { } /* Import */ -code span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */ -code span.kw { color: #007020; font-weight: bold; } /* Keyword */ -code span.op { color: #666666; } /* Operator */ -code span.ot { color: #007020; } /* Other */ -code span.pp { color: #bc7a00; } /* Preprocessor */ -code span.sc { color: #4070a0; } /* SpecialChar */ -code span.ss { color: #bb6688; } /* SpecialString */ -code span.st { color: #4070a0; } /* String */ -code span.va { color: #19177c; } /* Variable */ -code span.vs { color: #4070a0; } /* VerbatimString */ -code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */ - -</style> -<script> -// apply pandoc div.sourceCode style to pre.sourceCode instead -(function() { - var sheets = document.styleSheets; - for (var i = 0; i < sheets.length; i++) { - if (sheets[i].ownerNode.dataset["origin"] !== "pandoc") continue; - try { var rules = sheets[i].cssRules; } catch (e) { continue; } - for (var j = 0; j < rules.length; j++) { - var rule = rules[j]; - // check if there is a div.sourceCode rule - if (rule.type !== rule.STYLE_RULE || rule.selectorText !== "div.sourceCode") continue; - var style = rule.style.cssText; - // check if color or background-color is set - if (rule.style.color === '' && rule.style.backgroundColor === '') continue; - // replace div.sourceCode by a pre.sourceCode rule - sheets[i].deleteRule(j); - sheets[i].insertRule('pre.sourceCode{' + style + '}', j); - } - } -})(); -</script> - - - - -<style type="text/css">body { -background-color: #fff; -margin: 1em auto; -max-width: 700px; -overflow: visible; -padding-left: 2em; -padding-right: 2em; -font-family: "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif; -font-size: 14px; -line-height: 1.35; -} -#TOC { -clear: both; -margin: 0 0 10px 10px; -padding: 4px; -width: 400px; -border: 1px solid #CCCCCC; -border-radius: 5px; -background-color: #f6f6f6; -font-size: 13px; -line-height: 1.3; -} -#TOC .toctitle { -font-weight: bold; -font-size: 15px; -margin-left: 5px; -} -#TOC ul { -padding-left: 40px; -margin-left: -1.5em; -margin-top: 5px; -margin-bottom: 5px; -} -#TOC ul ul { -margin-left: -2em; -} -#TOC li { -line-height: 16px; -} -table { -margin: 1em auto; -border-width: 1px; -border-color: #DDDDDD; -border-style: outset; -border-collapse: collapse; -} -table th { -border-width: 2px; -padding: 5px; -border-style: inset; -} -table td { -border-width: 1px; -border-style: inset; -line-height: 18px; -padding: 5px 5px; -} -table, table th, table td { -border-left-style: none; -border-right-style: none; -} -table thead, table tr.even { -background-color: #f7f7f7; -} -p { -margin: 0.5em 0; -} -blockquote { -background-color: #f6f6f6; -padding: 0.25em 0.75em; -} -hr { -border-style: solid; -border: none; -border-top: 1px solid #777; -margin: 28px 0; -} -dl { -margin-left: 0; -} -dl dd { -margin-bottom: 13px; -margin-left: 13px; -} -dl dt { -font-weight: bold; -} -ul { -margin-top: 0; -} -ul li { -list-style: circle outside; -} -ul ul { -margin-bottom: 0; -} -pre, code { -background-color: #f7f7f7; -border-radius: 3px; -color: #333; -white-space: pre-wrap; -} -pre { -border-radius: 3px; -margin: 5px 0px 10px 0px; -padding: 10px; -} -pre:not([class]) { -background-color: #f7f7f7; -} -code { -font-family: Consolas, Monaco, 'Courier New', monospace; -font-size: 85%; -} -p > code, li > code { -padding: 2px 0px; -} -div.figure { -text-align: center; -} -img { -background-color: #FFFFFF; -padding: 2px; -border: 1px solid #DDDDDD; -border-radius: 3px; -border: 1px solid #CCCCCC; -margin: 0 5px; -} -h1 { -margin-top: 0; -font-size: 35px; -line-height: 40px; -} -h2 { -border-bottom: 4px solid #f7f7f7; -padding-top: 10px; -padding-bottom: 2px; -font-size: 145%; -} -h3 { -border-bottom: 2px solid #f7f7f7; -padding-top: 10px; -font-size: 120%; -} -h4 { -border-bottom: 1px solid #f7f7f7; -margin-left: 8px; -font-size: 105%; -} -h5, h6 { -border-bottom: 1px solid #ccc; -font-size: 105%; -} -a { -color: #0033dd; -text-decoration: none; -} -a:hover { -color: #6666ff; } -a:visited { -color: #800080; } -a:visited:hover { -color: #BB00BB; } -a[href^="http:"] { -text-decoration: underline; } -a[href^="https:"] { -text-decoration: underline; } - -code > span.kw { color: #555; font-weight: bold; } -code > span.dt { color: #902000; } -code > span.dv { color: #40a070; } -code > span.bn { color: #d14; } -code > span.fl { color: #d14; } -code > span.ch { color: #d14; } -code > span.st { color: #d14; } -code > span.co { color: #888888; font-style: italic; } -code > span.ot { color: #007020; } -code > span.al { color: #ff0000; font-weight: bold; } -code > span.fu { color: #900; font-weight: bold; } -code > span.er { color: #a61717; background-color: #e3d2d2; } -</style> - - - - -</head> - -<body> - - - - -<h1 class="title toc-ignore">stuaRt</h1> - - - -<p>Marie Bourdon</p> -<p>April 2021</p> -<div id="goal" class="section level2"> -<h2>Goal</h2> -<p>stuaRt is a R package which formats the genotyping data from MUGA arrays (Neogen) to use it in Rqtl, for backcross or F2 crosses. It allows to filter the markers in MUGA arrays that can or cannot be used for Rqtl analysis, from a genetic point of view. Indeed, markers will be selected depending on their proportion of each genotype, correspondance between F2 or N2 individuals alleles and parental strains alleles, etc.</p> -<p>The examples shown here require the use of dplyr package.</p> -<div class="sourceCode" id="cb1"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb1-1"><a href="#cb1-1"></a><span class="kw">library</span>(dplyr)</span> -<span id="cb1-2"><a href="#cb1-2"></a><span class="co">#> </span></span> -<span id="cb1-3"><a href="#cb1-3"></a><span class="co">#> Attaching package: 'dplyr'</span></span> -<span id="cb1-4"><a href="#cb1-4"></a><span class="co">#> The following objects are masked from 'package:stats':</span></span> -<span id="cb1-5"><a href="#cb1-5"></a><span class="co">#> </span></span> -<span id="cb1-6"><a href="#cb1-6"></a><span class="co">#> filter, lag</span></span> -<span id="cb1-7"><a href="#cb1-7"></a><span class="co">#> The following objects are masked from 'package:base':</span></span> -<span id="cb1-8"><a href="#cb1-8"></a><span class="co">#> </span></span> -<span id="cb1-9"><a href="#cb1-9"></a><span class="co">#> intersect, setdiff, setequal, union</span></span> -<span id="cb1-10"><a href="#cb1-10"></a><span class="kw">library</span>(stuart)</span></code></pre></div> -</div> -<div id="annotation-files" class="section level2"> -<h2>Annotation files</h2> -<p>The developer of Rqtl and Rqtl2 packages, Karl Broman, realised that the annotation of the MUGA arrays was not correct for some markers. Thus, he produced new annotation files for MUGA, miniMUGA, megaMUGA and gigaMUGA arrays. These files contain some informations about the markers including the chromosome and position where the probe of the marker matchs on the genome, wether the marker maps uniquely or not, etc. These files also contains the genetic position of the markers calculated with two methods : “cM_cox†and “cM_g2f1†(see <a href="https://kbroman.org/MUGAarrays/mini_revisited.html" class="uri">https://kbroman.org/MUGAarrays/mini_revisited.html</a> for more informations).</p> -<p>We recommand to use these annotation files to reconstruct the file use for Rqtl analysis. You can load the datasets with these annotations from GitHub (<a href="https://github.com/kbroman/MUGAarrays/tree/master/UWisc" class="uri">https://github.com/kbroman/MUGAarrays/tree/master/UWisc</a>). Choose the file corresponding to the MUGA array that you used and use the URL to load the dataset in R.</p> -<p>Here, we will present an example of the use of stuaRt with results of a F2 cross genotyped with miniMUGA. We load the result of Neogen genotyping: <code>genos</code> and thephenotype dataset produced by the lab: <code>phenos</code>. All these datasets are available for example in stuaRt package.</p> -<div class="sourceCode" id="cb2"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb2-1"><a href="#cb2-1"></a>annot_mini <-<span class="st"> </span><span class="kw">read.csv</span>(<span class="kw">url</span>(<span class="st">"https://raw.githubusercontent.com/kbroman/MUGAarrays/master/UWisc/mini_uwisc_v2.csv"</span>))</span></code></pre></div> -<div class="sourceCode" id="cb3"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb3-1"><a href="#cb3-1"></a><span class="kw">data</span>(genos)</span> -<span id="cb3-2"><a href="#cb3-2"></a><span class="kw">summary</span>(genos)</span> -<span id="cb3-3"><a href="#cb3-3"></a><span class="co">#> SNP.Name Sample.ID Allele1...Forward Allele2...Forward </span></span> -<span id="cb3-4"><a href="#cb3-4"></a><span class="co">#> Length:2002493 Length:2002493 Length:2002493 Length:2002493 </span></span> -<span id="cb3-5"><a href="#cb3-5"></a><span class="co">#> Class :character Class :character Class :character Class :character </span></span> -<span id="cb3-6"><a href="#cb3-6"></a><span class="co">#> Mode :character Mode :character Mode :character Mode :character </span></span> -<span id="cb3-7"><a href="#cb3-7"></a><span class="co">#> </span></span> -<span id="cb3-8"><a href="#cb3-8"></a><span class="co">#> </span></span> -<span id="cb3-9"><a href="#cb3-9"></a><span class="co">#> </span></span> -<span id="cb3-10"><a href="#cb3-10"></a><span class="co">#> </span></span> -<span id="cb3-11"><a href="#cb3-11"></a><span class="co">#> X Y GC.Score Theta </span></span> -<span id="cb3-12"><a href="#cb3-12"></a><span class="co">#> Min. : 0.0000 Min. : 0.0000 Min. :0.0000 Min. :0.0000 </span></span> -<span id="cb3-13"><a href="#cb3-13"></a><span class="co">#> 1st Qu.: 0.0260 1st Qu.: 0.0480 1st Qu.:0.6747 1st Qu.:0.0420 </span></span> -<span id="cb3-14"><a href="#cb3-14"></a><span class="co">#> Median : 0.1750 Median : 0.5010 Median :0.8550 Median :0.6620 </span></span> -<span id="cb3-15"><a href="#cb3-15"></a><span class="co">#> Mean : 0.5716 Mean : 0.6564 Mean :0.7276 Mean :0.5606 </span></span> -<span id="cb3-16"><a href="#cb3-16"></a><span class="co">#> 3rd Qu.: 0.8560 3rd Qu.: 1.0470 3rd Qu.:0.9245 3rd Qu.:0.9800 </span></span> -<span id="cb3-17"><a href="#cb3-17"></a><span class="co">#> Max. :25.1610 Max. :20.7940 Max. :0.9889 Max. :1.0000 </span></span> -<span id="cb3-18"><a href="#cb3-18"></a><span class="co">#> NA's :87 NA's :87 NA's :326 NA's :413 </span></span> -<span id="cb3-19"><a href="#cb3-19"></a><span class="co">#> X.Raw Y.Raw R </span></span> -<span id="cb3-20"><a href="#cb3-20"></a><span class="co">#> Min. : 0 Min. : 0 Min. : 0.000 </span></span> -<span id="cb3-21"><a href="#cb3-21"></a><span class="co">#> 1st Qu.: 546 1st Qu.: 678 1st Qu.: 0.605 </span></span> -<span id="cb3-22"><a href="#cb3-22"></a><span class="co">#> Median : 1611 Median : 3908 Median : 1.027 </span></span> -<span id="cb3-23"><a href="#cb3-23"></a><span class="co">#> Mean : 5265 Mean : 5578 Mean : 1.228 </span></span> -<span id="cb3-24"><a href="#cb3-24"></a><span class="co">#> 3rd Qu.: 7924 3rd Qu.: 9232 3rd Qu.: 1.553 </span></span> -<span id="cb3-25"><a href="#cb3-25"></a><span class="co">#> Max. :49906 Max. :43935 Max. :26.041 </span></span> -<span id="cb3-26"><a href="#cb3-26"></a><span class="co">#> NA's :413</span></span> -<span id="cb3-27"><a href="#cb3-27"></a><span class="kw">data</span>(phenos)</span> -<span id="cb3-28"><a href="#cb3-28"></a><span class="kw">summary</span>(phenos)</span> -<span id="cb3-29"><a href="#cb3-29"></a><span class="co">#> Ind Sex Age Pheno </span></span> -<span id="cb3-30"><a href="#cb3-30"></a><span class="co">#> 201 : 1 Length:176 Min. :5.000 Min. : 8.609 </span></span> -<span id="cb3-31"><a href="#cb3-31"></a><span class="co">#> 202 : 1 Class :character 1st Qu.:7.000 1st Qu.:10.420 </span></span> -<span id="cb3-32"><a href="#cb3-32"></a><span class="co">#> 203 : 1 Mode :character Median :7.000 Median :10.628 </span></span> -<span id="cb3-33"><a href="#cb3-33"></a><span class="co">#> 204 : 1 Mean :7.176 Mean :10.526 </span></span> -<span id="cb3-34"><a href="#cb3-34"></a><span class="co">#> 205 : 1 3rd Qu.:8.000 3rd Qu.:10.793 </span></span> -<span id="cb3-35"><a href="#cb3-35"></a><span class="co">#> 206 : 1 Max. :9.000 Max. :11.147 </span></span> -<span id="cb3-36"><a href="#cb3-36"></a><span class="co">#> (Other):170</span></span></code></pre></div> -<div id="genotyping-of-parental-strains" class="section level3"> -<h3>Genotyping of parental strains</h3> -<p>To use genotyping result for Rqtl analysis, we need to recode the genotypes of the individuals (originally encoded in A, T, G, C) depending on the genotype of the parental strains: homozygous for the first parental strain (0), heterozygous (1) or homozygous for the second parental strain (2).</p> -<p>We recommend to always genotype the parental strains of the cross. Here, their genotypes are in the <code>genos</code> file and correspond to the Sample.ID “StrainsA_1â€, “StrainsA_2â€, “StrainsB_1†and “StrainsB_2â€. Two individuals were genotyped for each parental strain. The first step will be to create a consensus genotype for each strain from the two genotyped individuals. The consensus genotype will be added to the annotation dataset in order to obtain a dataset with both annotation and reference genotype of the parental strains that will be used for recoding the genotypes or the F2 individuals.</p> -<p>This is done with the <code>geno_strains</code> function.</p> -<div class="sourceCode" id="cb4"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb4-1"><a href="#cb4-1"></a>strains <-<span class="st"> </span><span class="kw">geno_strains</span>(<span class="dt">ref=</span>annot_mini,<span class="dt">geno=</span>genos,<span class="dt">par1=</span><span class="kw">c</span>(<span class="st">"StrainsA_1"</span>,<span class="st">"StrainsA_2"</span>),<span class="dt">par2=</span><span class="kw">c</span>(<span class="st">"StrainsB_1"</span>,<span class="st">"StrainsB_2"</span>),<span class="dt">name1=</span><span class="st">"parent1"</span>,<span class="dt">name2=</span><span class="st">"parent2"</span>)</span> -<span id="cb4-2"><a href="#cb4-2"></a><span class="kw">head</span>(strains)</span> -<span id="cb4-3"><a href="#cb4-3"></a><span class="co">#> marker chr bp_mm10 cM_cox cM_g2f1 strand snp unique unmapped</span></span> -<span id="cb4-4"><a href="#cb4-4"></a><span class="co">#> 1 S3J010123784 1 3094603 1.542 0.046 plus CA TRUE FALSE</span></span> -<span id="cb4-5"><a href="#cb4-5"></a><span class="co">#> 2 SAH010136363 1 3409090 1.608 0.175 minus CA TRUE FALSE</span></span> -<span id="cb4-6"><a href="#cb4-6"></a><span class="co">#> 3 S2H010137098 1 3427467 1.609 0.177 minus AG TRUE FALSE</span></span> -<span id="cb4-7"><a href="#cb4-7"></a><span class="co">#> 4 UNCHS000006 1 3439034 1.610 0.178 plus CT TRUE FALSE</span></span> -<span id="cb4-8"><a href="#cb4-8"></a><span class="co">#> 5 mUNC010515443 1 3668628 1.627 0.202 minus GA TRUE FALSE</span></span> -<span id="cb4-9"><a href="#cb4-9"></a><span class="co">#> 6 S2J010178507 1 4462692 1.733 0.234 minus GA TRUE FALSE</span></span> -<span id="cb4-10"><a href="#cb4-10"></a><span class="co">#> probe chr_unc bp_unc parent1</span></span> -<span id="cb4-11"><a href="#cb4-11"></a><span class="co">#> 1 ATAAATTCTACAGCCACAGAAGTCACATTTTAGCACTGCTGTGTTTCCAG 1 3094603 C</span></span> -<span id="cb4-12"><a href="#cb4-12"></a><span class="co">#> 2 CCACCTTTGTACTCTGTATGCTACACAGAAGCTATGAGTATTCTTTTCCC 1 3409090 C</span></span> -<span id="cb4-13"><a href="#cb4-13"></a><span class="co">#> 3 ATTGAAAATGATCTAAGGGAGTCATGAGTACAAGGAGAAATGGGCATATT 1 3427467 A</span></span> -<span id="cb4-14"><a href="#cb4-14"></a><span class="co">#> 4 AATTTCTACCAGATCTCTTTGTCCTCCTAGAAGCATTGTGATACTCAGGA 1 3439034 C</span></span> -<span id="cb4-15"><a href="#cb4-15"></a><span class="co">#> 5 CAGGAAATGATGCTGAGAAAGTGAGAAGTAGGAAAACGTGGAGAAAAATA 1 3668628 G</span></span> -<span id="cb4-16"><a href="#cb4-16"></a><span class="co">#> 6 GACCTATGGTTAAAAGTCAGGCATTTCTTGTGTCTTCTTGTATTATTGGT 1 4462692 G</span></span> -<span id="cb4-17"><a href="#cb4-17"></a><span class="co">#> parent2</span></span> -<span id="cb4-18"><a href="#cb4-18"></a><span class="co">#> 1 C</span></span> -<span id="cb4-19"><a href="#cb4-19"></a><span class="co">#> 2 C</span></span> -<span id="cb4-20"><a href="#cb4-20"></a><span class="co">#> 3 A</span></span> -<span id="cb4-21"><a href="#cb4-21"></a><span class="co">#> 4 C</span></span> -<span id="cb4-22"><a href="#cb4-22"></a><span class="co">#> 5 G</span></span> -<span id="cb4-23"><a href="#cb4-23"></a><span class="co">#> 6 G</span></span></code></pre></div> -<p>After this step, we need to remove the genotyping result for these individuals from the <code>genos</code> dataset.</p> -<div class="sourceCode" id="cb5"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb5-1"><a href="#cb5-1"></a>genos <-<span class="st"> </span>genos <span class="op">%>%</span><span class="st"> </span><span class="kw">filter</span>(<span class="op">!</span>Sample.ID <span class="op">%in%</span><span class="st"> </span><span class="kw">c</span>(<span class="st">"StrainsA_1"</span>, <span class="st">"StrainsA_2"</span>, <span class="st">"StrainsB_1"</span>,<span class="st">"StrainsB_2"</span>))</span></code></pre></div> -</div> -</div> -<div id="markers-sorting" class="section level2"> -<h2>Markers sorting</h2> -<div id="marker-tab" class="section level3"> -<h3>Marker tab</h3> -<p>The first step of the markers sorting is to create the marker dataframe with the tab_mark() function. This dataframe contains for each marker the two alleles that can be found in the F2/N2 population (<code>Allele_1</code> and <code>Allele_2</code>), the number of individuals for each genotype (homozygous for each allele (<code>n_HM1</code> and <code>n_HM2</code>) and heterozygous (<code>n_HT</code>)), and the number of non genotyped individuals (<code>n_NA</code>) This step can take several minutes. You can also load the output of this function.</p> -<div class="sourceCode" id="cb6"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb6-1"><a href="#cb6-1"></a><span class="kw">data</span>(stuart_tab)</span> -<span id="cb6-2"><a href="#cb6-2"></a><span class="kw">summary</span>(stuart_tab)</span> -<span id="cb6-3"><a href="#cb6-3"></a><span class="co">#> SNP.Name Allele_1 Allele_2 n_HM1 </span></span> -<span id="cb6-4"><a href="#cb6-4"></a><span class="co">#> Length:11125 Length:11125 Length:11125 Min. : 0.0 </span></span> -<span id="cb6-5"><a href="#cb6-5"></a><span class="co">#> Class :character Class :character Class :character 1st Qu.: 44.0 </span></span> -<span id="cb6-6"><a href="#cb6-6"></a><span class="co">#> Mode :character Mode :character Mode :character Median :174.0 </span></span> -<span id="cb6-7"><a href="#cb6-7"></a><span class="co">#> Mean :123.9 </span></span> -<span id="cb6-8"><a href="#cb6-8"></a><span class="co">#> 3rd Qu.:176.0 </span></span> -<span id="cb6-9"><a href="#cb6-9"></a><span class="co">#> Max. :176.0 </span></span> -<span id="cb6-10"><a href="#cb6-10"></a><span class="co">#> n_HM2 n_HT n_NA </span></span> -<span id="cb6-11"><a href="#cb6-11"></a><span class="co">#> Min. : 0.00 Min. : 0.00 Min. : 0.00 </span></span> -<span id="cb6-12"><a href="#cb6-12"></a><span class="co">#> 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.00 </span></span> -<span id="cb6-13"><a href="#cb6-13"></a><span class="co">#> Median : 0.00 Median : 0.00 Median : 1.00 </span></span> -<span id="cb6-14"><a href="#cb6-14"></a><span class="co">#> Mean : 19.92 Mean : 19.24 Mean : 12.91 </span></span> -<span id="cb6-15"><a href="#cb6-15"></a><span class="co">#> 3rd Qu.: 34.00 3rd Qu.: 5.00 3rd Qu.: 5.00 </span></span> -<span id="cb6-16"><a href="#cb6-16"></a><span class="co">#> Max. :175.00 Max. :176.00 Max. :176.00</span></span></code></pre></div> -<p>Then we will use the different mark_* functions in order to filter the markers. First, we can use mark_match() function. This function excludes markers that are in your genotype file but not in the reference genotype dataset. We recomend using this function as the chip used for genotyping may change.</p> -<div class="sourceCode" id="cb7"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb7-1"><a href="#cb7-1"></a>tab2 <-<span class="st"> </span><span class="kw">mark_match</span>(stuart_tab,<span class="dt">ref=</span>strains)</span> -<span id="cb7-2"><a href="#cb7-2"></a></span> -<span id="cb7-3"><a href="#cb7-3"></a></span> -<span id="cb7-4"><a href="#cb7-4"></a>tab2 <span class="op">%>%</span><span class="st"> </span><span class="kw">filter</span>(exclude_match<span class="op">==</span><span class="dv">1</span>)</span> -<span id="cb7-5"><a href="#cb7-5"></a><span class="co">#> # A tibble: 0 x 8</span></span> -<span id="cb7-6"><a href="#cb7-6"></a><span class="co">#> # … with 8 variables: SNP.Name <chr>, Allele_1 <chr>, Allele_2 <chr>,</span></span> -<span id="cb7-7"><a href="#cb7-7"></a><span class="co">#> # n_HM1 <dbl>, n_HM2 <dbl>, n_HT <dbl>, n_NA <dbl>, exclude_match <dbl></span></span></code></pre></div> -<p>Here the reference strains were genotyped with the same version of the chip as the F2 individuals so no marker was excluded.</p> -<p>Then, we can use the mark_poly() function, which will exclude the markers that are not polymorphic.</p> -<div class="sourceCode" id="cb8"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb8-1"><a href="#cb8-1"></a>tab2 <-<span class="st"> </span><span class="kw">mark_poly</span>(tab2)</span> -<span id="cb8-2"><a href="#cb8-2"></a><span class="kw">head</span>(tab2)</span> -<span id="cb8-3"><a href="#cb8-3"></a><span class="co">#> # A tibble: 6 x 9</span></span> -<span id="cb8-4"><a href="#cb8-4"></a><span class="co">#> SNP.Name Allele_1 Allele_2 n_HM1 n_HM2 n_HT n_NA exclude_match exclude_poly</span></span> -<span id="cb8-5"><a href="#cb8-5"></a><span class="co">#> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl></span></span> -<span id="cb8-6"><a href="#cb8-6"></a><span class="co">#> 1 B1001000… C <NA> 175 0 0 1 0 1</span></span> -<span id="cb8-7"><a href="#cb8-7"></a><span class="co">#> 2 B1001001… <NA> <NA> 0 0 0 176 0 1</span></span> -<span id="cb8-8"><a href="#cb8-8"></a><span class="co">#> 3 B1001001… A <NA> 175 0 0 1 0 1</span></span> -<span id="cb8-9"><a href="#cb8-9"></a><span class="co">#> 4 B1001002… A G 171 0 2 3 0 0</span></span> -<span id="cb8-10"><a href="#cb8-10"></a><span class="co">#> 5 B1001002… C <NA> 176 0 0 0 0 1</span></span> -<span id="cb8-11"><a href="#cb8-11"></a><span class="co">#> 6 B1001003… A <NA> 176 0 0 0 0 1</span></span></code></pre></div> -<p>The mark_prop() function can be used to filter markers depending on the proportion of each genotype. Here, we have a F2 so we can use the “homo†argument in order to filter depending on the proportion of both homozygous genotype. If we have a N2, we can filter with the proportion of homozygous individuals with the “homo†argument and of heterozygous individuals with the hetero" argument. Moreover, this function allows to filter marker depending on the proportion on non genotyped animals. By defaults, markers for which more than 50% of individuals were not genotyped.</p> -<div class="sourceCode" id="cb9"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb9-1"><a href="#cb9-1"></a>tab2 <-<span class="st"> </span><span class="kw">mark_prop</span>(tab2,<span class="dt">cross=</span><span class="st">"F2"</span>,<span class="dt">homo=</span><span class="fl">0.1</span>,<span class="dt">hetero=</span><span class="fl">0.1</span>)</span> -<span id="cb9-2"><a href="#cb9-2"></a><span class="kw">head</span>(tab2)</span> -<span id="cb9-3"><a href="#cb9-3"></a><span class="co">#> # A tibble: 6 x 10</span></span> -<span id="cb9-4"><a href="#cb9-4"></a><span class="co">#> SNP.Name Allele_1 Allele_2 n_HM1 n_HM2 n_HT n_NA exclude_match exclude_poly</span></span> -<span id="cb9-5"><a href="#cb9-5"></a><span class="co">#> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl></span></span> -<span id="cb9-6"><a href="#cb9-6"></a><span class="co">#> 1 B1001000… C <NA> 175 0 0 1 0 1</span></span> -<span id="cb9-7"><a href="#cb9-7"></a><span class="co">#> 2 B1001001… <NA> <NA> 0 0 0 176 0 1</span></span> -<span id="cb9-8"><a href="#cb9-8"></a><span class="co">#> 3 B1001001… A <NA> 175 0 0 1 0 1</span></span> -<span id="cb9-9"><a href="#cb9-9"></a><span class="co">#> 4 B1001002… A G 171 0 2 3 0 0</span></span> -<span id="cb9-10"><a href="#cb9-10"></a><span class="co">#> 5 B1001002… C <NA> 176 0 0 0 0 1</span></span> -<span id="cb9-11"><a href="#cb9-11"></a><span class="co">#> 6 B1001003… A <NA> 176 0 0 0 0 1</span></span> -<span id="cb9-12"><a href="#cb9-12"></a><span class="co">#> # … with 1 more variable: exclude_prop <dbl></span></span></code></pre></div> -<p>Last, we can use the mark_allele(). This very helpful function excludes markers for which the alleles found in the F2/N2 individuals do not correspond to the alleles found in the parental strains. For example, if for a marker is not polymorphic in the parental strains but we found two alleles in the F2/N2 individuals, it will be excluded.</p> -<div class="sourceCode" id="cb10"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb10-1"><a href="#cb10-1"></a>tab2 <-<span class="st"> </span><span class="kw">mark_allele</span>(<span class="dt">tab=</span>tab2,<span class="dt">ref=</span>strains,<span class="dt">par1=</span><span class="st">"parent1"</span>,<span class="dt">par2=</span><span class="st">"parent2"</span>)</span> -<span id="cb10-2"><a href="#cb10-2"></a>tab2 <span class="op">%>%</span><span class="st"> </span><span class="kw">arrange</span>(<span class="kw">desc</span>(exclude_allele)) <span class="op">%>%</span><span class="st"> </span><span class="kw">head</span>()</span> -<span id="cb10-3"><a href="#cb10-3"></a><span class="co">#> # A tibble: 6 x 11</span></span> -<span id="cb10-4"><a href="#cb10-4"></a><span class="co">#> SNP.Name Allele_1 Allele_2 n_HM1 n_HM2 n_HT n_NA exclude_match exclude_poly</span></span> -<span id="cb10-5"><a href="#cb10-5"></a><span class="co">#> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl></span></span> -<span id="cb10-6"><a href="#cb10-6"></a><span class="co">#> 1 B1002001… T C 0 172 1 3 0 0</span></span> -<span id="cb10-7"><a href="#cb10-7"></a><span class="co">#> 2 B1002009… A G 0 170 2 4 0 0</span></span> -<span id="cb10-8"><a href="#cb10-8"></a><span class="co">#> 3 B1003003… T C 0 162 2 12 0 0</span></span> -<span id="cb10-9"><a href="#cb10-9"></a><span class="co">#> 4 B1004002… T C 0 172 2 2 0 0</span></span> -<span id="cb10-10"><a href="#cb10-10"></a><span class="co">#> 5 B1004002… A C 0 168 1 7 0 0</span></span> -<span id="cb10-11"><a href="#cb10-11"></a><span class="co">#> 6 B1004005… T C 0 95 46 35 0 0</span></span> -<span id="cb10-12"><a href="#cb10-12"></a><span class="co">#> # … with 2 more variables: exclude_prop <dbl>, exclude_allele <dbl></span></span></code></pre></div> -<p>Indeed, we can see that the markers excluded with mark_allele() have different alleles in the parental strains.</p> -<div class="sourceCode" id="cb11"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb11-1"><a href="#cb11-1"></a>strains <span class="op">%>%</span><span class="st"> </span><span class="kw">filter</span>(marker <span class="op">%in%</span><span class="st"> </span><span class="kw">c</span>(<span class="st">"gJAX00038569"</span>,<span class="st">"gJAX00425031"</span>,<span class="st">"gUNC12245354"</span>,<span class="st">"gUNC15530876"</span>,<span class="st">"gUNC21555204"</span>,<span class="st">"gUNC21596600"</span>)) <span class="op">%>%</span><span class="st"> </span><span class="kw">arrange</span>(marker) <span class="op">%>%</span><span class="st"> </span><span class="kw">select</span>(marker,parent1,parent2)</span> -<span id="cb11-2"><a href="#cb11-2"></a><span class="co">#> marker parent1 parent2</span></span> -<span id="cb11-3"><a href="#cb11-3"></a><span class="co">#> 1 gJAX00038569 C C</span></span> -<span id="cb11-4"><a href="#cb11-4"></a><span class="co">#> 2 gJAX00425031 C C</span></span> -<span id="cb11-5"><a href="#cb11-5"></a><span class="co">#> 3 gUNC12245354 N N</span></span> -<span id="cb11-6"><a href="#cb11-6"></a><span class="co">#> 4 gUNC15530876 N N</span></span> -<span id="cb11-7"><a href="#cb11-7"></a><span class="co">#> 5 gUNC21555204 T T</span></span> -<span id="cb11-8"><a href="#cb11-8"></a><span class="co">#> 6 gUNC21596600 A A</span></span></code></pre></div> -</div> -</div> -<div id="creation-of-the-rqtl-file" class="section level1"> -<h1>Creation of the R/qtl file</h1> -<p>After excluding the problematic markers, we can create the R/qtl file. The individuals must have the same ID in the geno and in the pheno file. If there is a prefix in the geno file that must be removed in order to acheive this, you can use the “prefix†argument. The “path†argument can be used in order to create a CSV file that you can laod with qtl::read.cross.</p> -<div class="sourceCode" id="cb12"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb12-1"><a href="#cb12-1"></a>rqtl_file <-<span class="st"> </span>(<span class="kw">write_rqtl</span>(<span class="dt">geno=</span>genos,<span class="dt">pheno=</span>phenos,<span class="dt">tab=</span>tab2,<span class="dt">ref=</span>strains,<span class="dt">par1=</span><span class="st">"parent1"</span>,<span class="dt">par2=</span><span class="st">"parent2"</span>,<span class="dt">prefix=</span><span class="st">"ind_"</span>))</span> -<span id="cb12-2"><a href="#cb12-2"></a></span> -<span id="cb12-3"><a href="#cb12-3"></a>rqtl_file[<span class="dv">1</span><span class="op">:</span><span class="dv">10</span>,<span class="dv">1</span><span class="op">:</span><span class="dv">7</span>]</span> -<span id="cb12-4"><a href="#cb12-4"></a><span class="co">#> # A tibble: 10 x 7</span></span> -<span id="cb12-5"><a href="#cb12-5"></a><span class="co">#> Ind Sex Age Pheno S6J010381992 S6J011498219 S6J011558924</span></span> -<span id="cb12-6"><a href="#cb12-6"></a><span class="co">#> <chr> <chr> <chr> <chr> <chr> <chr> <chr> </span></span> -<span id="cb12-7"><a href="#cb12-7"></a><span class="co">#> 1 "" "" "" "" 1 1 1 </span></span> -<span id="cb12-8"><a href="#cb12-8"></a><span class="co">#> 2 "" "" "" "" 2.188 15.746 17.347 </span></span> -<span id="cb12-9"><a href="#cb12-9"></a><span class="co">#> 3 "201" "M" "7" "10.5296676084955" 1 1 1 </span></span> -<span id="cb12-10"><a href="#cb12-10"></a><span class="co">#> 4 "210" "M" "7" "10.4877403515456" 2 0 0 </span></span> -<span id="cb12-11"><a href="#cb12-11"></a><span class="co">#> 5 "308" "F" "7" "10.9714806769608" 1 0 0 </span></span> -<span id="cb12-12"><a href="#cb12-12"></a><span class="co">#> 6 "309" "M" "7" "10.8463976841841" 0 1 1 </span></span> -<span id="cb12-13"><a href="#cb12-13"></a><span class="co">#> 7 "310" "M" "7" "11.0680508131391" NA 1 1 </span></span> -<span id="cb12-14"><a href="#cb12-14"></a><span class="co">#> 8 "311" "F" "9" "10.5753482053348" 0 2 2 </span></span> -<span id="cb12-15"><a href="#cb12-15"></a><span class="co">#> 9 "312" "M" "7" "10.7137055438312" 0 2 2 </span></span> -<span id="cb12-16"><a href="#cb12-16"></a><span class="co">#> 10 "313" "M" "8" "10.7375743556446" 0 1 1</span></span></code></pre></div> -</div> - - - -<!-- code folding --> - - -<!-- dynamically load mathjax for compatibility with self-contained --> -<script> - (function () { - var script = document.createElement("script"); - script.type = "text/javascript"; - script.src = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"; - document.getElementsByTagName("head")[0].appendChild(script); - })(); -</script> - -</body> -</html> diff --git a/stuart.Rcheck/stuart/extdata/genos.txt b/stuart.Rcheck/stuart/extdata/genos.txt deleted file mode 100755 index 49a9e33f41b395a716aaf3df972513b9058f5bf4..0000000000000000000000000000000000000000 Binary files a/stuart.Rcheck/stuart/extdata/genos.txt and /dev/null differ diff --git a/stuart.Rcheck/stuart/extdata/phenos.txt b/stuart.Rcheck/stuart/extdata/phenos.txt deleted file mode 100755 index 1d25e9ab2a19f593f1c7809e71eeb16337aa3820..0000000000000000000000000000000000000000 --- a/stuart.Rcheck/stuart/extdata/phenos.txt +++ /dev/null @@ -1,177 +0,0 @@ -Ind Sex Exp Age Death Day.death Pheno -201 M 36 7 1 14 10.5296676084955 -210 M 38 7 0 14 10.4877403515456 -308 F 53 7 1 8 10.9714806769608 -309 M 53 7 1 7 10.8463976841841 -310 M 53 7 1 8 11.0680508131391 -311 F 53 9 1 7 10.5753482053348 -312 M 56 7 1 7 10.7137055438312 -313 M 56 8 1 7 10.7375743556446 -315 M 56 7 0 14 10.8719419391157 -318 F 56 6 1 6 10.8396273596649 -319 F 56 6 0 14 10.7668793836238 -320 M 58 8 0 14 10.7325547648656 -211 M 38 7 0 14 10.8695302496376 -321 M 58 6 1 7 10.8283736794678 -322 M 58 6 1 7 10.8915635045684 -323 M 58 6 1 7 10.5356077157252 -324 F 58 6 1 7 10.6280255935391 -325 F 58 8 0 14 10.7480587696075 -326 F 58 6 1 7 10.723232575067 -327 F 58 6 1 7 10.561378054744 -328 M 58 8 1 8 10.678328733254 -329 M 58 7 1 8 10.5553266906605 -330 M 58 7 1 8 9.80206867803321 -212 M 38 7 0 14 10.6357450501074 -331 M 58 7 1 7 10.5982605721032 -332 M 58 7 1 7 10.8473250099944 -333 F 58 6 1 7 10.3764348734637 -334 F 58 6 1 7 10.5460358865664 -336 F 58 5 1 7 10.5222855493513 -337 F 67 8 1 9 10.5327174182859 -338 F 67 6 0 14 10.5473443040275 -339 F 67 8 1 8 9.87194179754362 -340 F 67 6 1 8 10.7090598750034 -341 M 67 6 1 8 10.2209997596409 -213 M 38 7 1 8 11.0270167306899 -342 M 67 8 1 8 10.5431290182179 -343 M 68 8 0 14 10.4981076961986 -344 M 68 8 0 14 10.3735386730113 -345 F 68 8 1 14 10.4320215351478 -346 F 68 8 0 14 8.60947058208406 -347 F 73 7 0 14 10.4168301170408 -348 F 73 7 1 6 11.0628524075666 -349 F 73 7 1 7 10.4330268533001 -350 M 73 7 1 7 10.4800637563932 -351 M 73 6 1 7 10.6324192103035 -214 M 38 7 1 8 10.8218250314825 -352 M 73 6 1 8 10.4725870266356 -353 M 73 6 1 8 10.463431310619 -354 F 73 8 1 9 10.2353468625069 -355 F 73 8 1 8 10.4784947722463 -356 M 73 8 0 14 10.2302554084309 -357 M 73 7 1 7 10.3331446827217 -358 M 75 7 1 8 10.4392178227394 -359 M 75 7 1 9 10.2749993364651 -360 M 75 7 1 9 10.5896956543931 -361 M 75 7 1 9 10.4102656461775 -215 M 38 7 1 7 10.98405240378 -362 M 75 7 0 14 9.25536232298768 -363 M 75 9 1 9 10.3492941307168 -364 M 75 9 1 9 10.3620959742092 -365 M 75 9 1 12 10.4744976511434 -366 M 75 9 1 12 11.030867369515 -369 F 75 9 0 14 10.628477999175 -370 F 75 9 0 14 10.4670786491756 -373 F 75 7 0 14 10.9179711868264 -374 F 75 9 1 8 10.4953620125703 -375 F 75 7 1 7 10.1874531724974 -216 M 38 7 0 14 10.5655205985375 -316 M 78 9 0 14 10.766442313453 -371 M 78 9 1 12 10.4195168879626 -372 M 78 9 1 9 9.95272129347567 -376 M 78 8 0 14 10.4195168879626 -377 F 78 9 0 14 9.95272129347567 -379 F 78 8 0 14 10.0823979142355 -380 F 78 8 1 8 10.9095058001147 -381 M 78 8 1 12 10.8801492251438 -383 F 78 7 1 8 10.9965190456509 -384 F 78 7 0 14 11.0509121188558 -217 M 38 7 1 7 10.4994444268072 -385 F 78 7 1 8 11.0429066559961 -386 F 78 7 0 14 10.8109357224842 -387 M 78 8 0 14 10.6074095323141 -388 M 78 7 0 14 10.7086232219316 -389 M 78 7 1 8 10.8244554978178 -390 M 78 7 1 8 10.5135622767937 -391 M 78 7 1 8 10.6549253863367 -219 F 38 7 1 9 10.9538292220917 -220 M 40 7 1 7 10.7769347175043 -202 M 36 7 1 7 10.6939691207128 -221 M 40 7 0 14 10.6933765093072 -222 F 43 6 1 7 10.6767026645835 -223 F 43 6 0 14 10.5003906942331 -224 F 43 6 0 14 10.5149649858964 -225 F 43 6 0 14 10.8903239014372 -226 M 43 6 0 14 10.505656838062 -227 M 43 6 1 7 10.9661802289593 -228 M 43 6 1 7 10.7323373517824 -229 M 45 8 1 7 10.7312300131548 -230 M 45 8 0 14 10.5748054422932 -203 M 36 7 0 14 10.6753018614348 -231 M 45 8 0 14 10.6367921020912 -232 M 45 8 1 7 11.0739366525724 -233 F 45 8 0 14 10.1491227173623 -234 F 45 8 1 8 10.6667311696339 -235 F 45 8 0 14 8.7941942405191 -236 F 45 8 0 14 10.9093248127322 -237 M 47 7 0 14 10.9324949209267 -238 M 47 7 0 14 10.6947153326779 -239 M 47 7 0 14 10.6120985065664 -240 M 47 7 1 8 10.8327443879201 -204 M 36 7 1 7 10.7791570102755 -241 M 47 7 1 9 10.5125518260128 -242 M 47 7 1 7 10.6894120122994 -243 M 47 7 1 7 11.0691241089673 -245 M 47 7 0 14 10.4819031965733 -246 F 47 7 1 8 9.96762433427887 -247 F 47 7 1 8 10.7245450774586 -248 F 47 7 0 14 10.4912425829631 -249 F 47 7 0 14 10.7474083506145 -250 M 48 8 0 14 10.2720876656636 -251 M 48 8 0 14 9.31399254303701 -205 M 38 7 0 14 10.4428500375673 -252 M 48 8 1 7 10.2862132021395 -253 M 48 7 1 8 10.5048761731899 -254 F 48 8 0 14 10.228672834882 -255 F 48 7 1 7 9.62470062290951 -256 F 48 7 0 14 10.6406621644657 -257 M 48 7 0 14 9.94948396721433 -258 M 48 7 1 7 9.30736764204129 -259 M 48 7 0 14 9.88913119725402 -260 M 48 7 0 14 10.2380596666633 -261 M 48 7 1 7 10.2540669844702 -206 M 38 7 0 14 10.7606379038488 -262 M 48 7 0 14 9.76042991401441 -263 M 48 7 0 14 10.6091919117065 -264 F 50 8 0 14 10.6851731967701 -266 F 50 7 1 8 10.7673408387505 -267 M 50 8 1 7 10.0482542502632 -269 M 50 8 1 8 11.0733835864953 -270 M 50 8 0 14 10.4484942670828 -271 M 50 7 0 14 10.1718549349538 -272 M 50 6 1 7 10.3653629747907 -274 M 50 8 1 7 10.7987817154853 -207 M 38 7 0 14 9.28000922051108 -265 F 51 8 1 8 10.6396558357786 -268 F 51 8 1 8 10.38209131129 -273 F 51 7 0 14 10.668205277928 -277 F 51 7 0 14 10.3487626209106 -278 M 51 7 0 14 8.79404815627125 -279 M 51 6 0 14 10.983879782484 -284 M 52 7 1 7 10.9273290008382 -285 M 52 7 0 14 10.9386568661041 -286 M 52 7 0 14 10.8807485243696 -287 M 52 7 1 8 10.9037194625542 -208 M 38 7 0 14 10.7753050361387 -288 M 52 7 0 14 10.7677108901204 -289 F 52 7 1 7 10.9101761509059 -290 F 52 7 1 7 10.9405884955895 -291 F 52 7 0 14 8.61409649857943 -292 F 52 7 0 14 10.7908109750272 -293 F 52 7 1 7 10.9585494660728 -283 M 52 7 0 14 10.7347416773051 -294 M 52 7 0 14 10.5163444828732 -295 M 52 7 0 14 10.7527016128882 -296 M 52 6 0 14 10.7427219571178 -209 M 38 7 1 7 10.6341153687419 -297 M 52 6 1 8 10.6603294779049 -298 M 52 6 1 7 10.952768506441 -299 F 52 6 1 7 11.1474990509139 -300 F 52 6 0 14 9.35603884473677 -301 F 52 6 1 8 11.1056293961659 -303 M 53 9 0 14 10.1069851602543 -304 F 53 7 1 8 10.7275476145336 -305 F 53 7 0 14 10.9381137134461 -306 F 53 7 0 14 10.5449698000374 -307 F 53 7 0 14 10.6311447764499 diff --git a/stuart.Rcheck/stuart/help/AnIndex b/stuart.Rcheck/stuart/help/AnIndex deleted file mode 100644 index 2d740c1843ddacadc6225e3c243ceb9b8cf1bcf5..0000000000000000000000000000000000000000 --- a/stuart.Rcheck/stuart/help/AnIndex +++ /dev/null @@ -1,11 +0,0 @@ -genos genos -geno_strains geno_strains -mark_allele mark_allele -mark_match mark_match -mark_poly mark_poly -mark_prop mark_prop -phenos phenos -ref_strains_mini ref_strains_mini -stuart_tab stuart_tab -tab_mark tab_mark -write_rqtl write_rqtl diff --git a/stuart.Rcheck/stuart/help/aliases.rds b/stuart.Rcheck/stuart/help/aliases.rds deleted file mode 100644 index 2c04860a701e7e3648f1276c9ed9d2c909b171da..0000000000000000000000000000000000000000 Binary files a/stuart.Rcheck/stuart/help/aliases.rds and /dev/null differ diff --git a/stuart.Rcheck/stuart/help/paths.rds b/stuart.Rcheck/stuart/help/paths.rds deleted file mode 100644 index 14d6d753e477957fb86a26afb0814509df8689d2..0000000000000000000000000000000000000000 Binary files a/stuart.Rcheck/stuart/help/paths.rds and /dev/null differ diff --git a/stuart.Rcheck/stuart/help/stuart.rdb b/stuart.Rcheck/stuart/help/stuart.rdb deleted file mode 100644 index 15aef2078e4456fd5954fe9bc32b070ba22f2231..0000000000000000000000000000000000000000 Binary files a/stuart.Rcheck/stuart/help/stuart.rdb and /dev/null differ diff --git a/stuart.Rcheck/stuart/help/stuart.rdx b/stuart.Rcheck/stuart/help/stuart.rdx deleted file mode 100644 index 49846afe532de47db1cae4e31b0aef05d22bf75b..0000000000000000000000000000000000000000 Binary files a/stuart.Rcheck/stuart/help/stuart.rdx and /dev/null differ diff --git a/stuart.Rcheck/stuart/html/00Index.html b/stuart.Rcheck/stuart/html/00Index.html deleted file mode 100644 index 149450c7160fa952a79ae992cb9bfaf6e1b6459c..0000000000000000000000000000000000000000 --- a/stuart.Rcheck/stuart/html/00Index.html +++ /dev/null @@ -1,47 +0,0 @@ -<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> -<html xmlns="http://www.w3.org/1999/xhtml"> -<head><title>R: stuaRt</title> -<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> -<link rel="stylesheet" type="text/css" href="R.css" /> -</head><body> -<h1> stuaRt -<img class="toplogo" src="../../../doc/html/Rlogo.svg" alt="[R logo]" /> -</h1> -<hr/> -<div style="text-align: center;"> -<a href="../../../doc/html/packages.html"><img class="arrow" src="../../../doc/html/left.jpg" alt="[Up]" /></a> -<a href="../../../doc/html/index.html"><img class="arrow" src="../../../doc/html/up.jpg" alt="[Top]" /></a> -</div><h2>Documentation for package ‘stuart’ version 0.1.0</h2> - -<ul><li><a href="../DESCRIPTION">DESCRIPTION file</a>.</li> -<li><a href="../doc/index.html">User guides, package vignettes and other documentation.</a></li> -</ul> - -<h2>Help Pages</h2> - - -<table width="100%"> -<tr><td style="width: 25%;"><a href="genos.html">genos</a></td> -<td>Data frame with miniMUGA genotyping of F2 individuals and parental strains</td></tr> -<tr><td style="width: 25%;"><a href="geno_strains.html">geno_strains</a></td> -<td>Create haplotype for a new mouse strain into a reference dataframe</td></tr> -<tr><td style="width: 25%;"><a href="mark_allele.html">mark_allele</a></td> -<td>Exclude markers that have different alleles in the individuals of the cross and in parental strains</td></tr> -<tr><td style="width: 25%;"><a href="mark_match.html">mark_match</a></td> -<td>Exclude markers that were not genotyped in the reference strains</td></tr> -<tr><td style="width: 25%;"><a href="mark_poly.html">mark_poly</a></td> -<td>Exclude markers that are not polymorphic</td></tr> -<tr><td style="width: 25%;"><a href="mark_prop.html">mark_prop</a></td> -<td>Exclude markers depending on proportions of homo/hetorozygous</td></tr> -<tr><td style="width: 25%;"><a href="phenos.html">phenos</a></td> -<td>Data frame with phenotype of F2 individuals</td></tr> -<tr><td style="width: 25%;"><a href="ref_strains_mini.html">ref_strains_mini</a></td> -<td>Data frame with miniMUGA genotyping of classical lab strains.</td></tr> -<tr><td style="width: 25%;"><a href="stuart_tab.html">stuart_tab</a></td> -<td>Output of tab_mark function</td></tr> -<tr><td style="width: 25%;"><a href="tab_mark.html">tab_mark</a></td> -<td>Create of the summary table for all markers from the genotype data frame</td></tr> -<tr><td style="width: 25%;"><a href="write_rqtl.html">write_rqtl</a></td> -<td>Create data frame in Rqtl CSV format</td></tr> -</table> -</body></html> diff --git a/stuart.Rcheck/stuart/html/R.css b/stuart.Rcheck/stuart/html/R.css deleted file mode 100644 index f10f5ea669d3de9e84a2ce5bf5752546b99cd02d..0000000000000000000000000000000000000000 --- a/stuart.Rcheck/stuart/html/R.css +++ /dev/null @@ -1,97 +0,0 @@ -body { - background: white; - color: black; -} - -a:link { - background: white; - color: blue; -} - -a:visited { - background: white; - color: rgb(50%, 0%, 50%); -} - -h1 { - background: white; - color: rgb(55%, 55%, 55%); - font-family: monospace; - font-size: x-large; - text-align: center; -} - -h2 { - background: white; - color: rgb(40%, 40%, 40%); - font-family: monospace; - font-size: large; - text-align: center; -} - -h3 { - background: white; - color: rgb(40%, 40%, 40%); - font-family: monospace; - font-size: large; -} - -h4 { - background: white; - color: rgb(40%, 40%, 40%); - font-family: monospace; - font-style: italic; - font-size: large; -} - -h5 { - background: white; - color: rgb(40%, 40%, 40%); - font-family: monospace; -} - -h6 { - background: white; - color: rgb(40%, 40%, 40%); - font-family: monospace; - font-style: italic; -} - -img.toplogo { - width: 4em; - vertical-align: middle; -} - -img.arrow { - width: 30px; - height: 30px; - border: 0; -} - -span.acronym { - font-size: small; -} - -span.env { - font-family: monospace; -} - -span.file { - font-family: monospace; -} - -span.option{ - font-family: monospace; -} - -span.pkg { - font-weight: bold; -} - -span.samp{ - font-family: monospace; -} - -div.vignettes a:hover { - background: rgb(85%, 85%, 85%); -} diff --git a/stuart_1.0.2.pdf b/stuart_1.0.2.pdf index ba8a6d728314b0933827e229787f7b8d912ac958..c562000e015a9cf6a5899dc22fe8c2cf84b0f264 100644 Binary files a/stuart_1.0.2.pdf and b/stuart_1.0.2.pdf differ diff --git a/stuart_1.0.2.tar.gz b/stuart_1.0.2.tar.gz index e19e5f3982702b23d735577bbea8eda15ecceaee..c901000a6c4dee6480f2ed3953a009bb59438f3c 100644 Binary files a/stuart_1.0.2.tar.gz and b/stuart_1.0.2.tar.gz differ diff --git a/vignettes/stuaRt.Rmd b/vignettes/stuaRt.Rmd index b4af84f39711829855378cf4d6021838f3cc450f..48568f25bde86d655292635f2cf02bd343f03ae5 100755 --- a/vignettes/stuaRt.Rmd +++ b/vignettes/stuaRt.Rmd @@ -69,13 +69,15 @@ We recommend to always genotype the parental strains of the cross. Here, their g This is done with the `geno_strains` function. ```{r strains} -strains <- geno_strains(ref=annot_mini,geno=genos,par1=c("StrainsA_1","StrainsA_2"),par2=c("StrainsB_1","StrainsB_2"),name1="parent1",name2="parent2") -head(strains) +strains <- geno_strains(ref=annot_mini,geno=genos,par1=c("StrainsA_1","StrainsA_2"), + par2=c("StrainsB_1","StrainsB_2"),name1="parent1",name2="parent2") +head(strains) %>% print.data.frame() ``` After this step, we need to remove the genotyping result for these individuals from the `genos` dataset. ```{r no_parent} -genos <- genos %>% filter(!Sample.ID %in% c("StrainsA_1", "StrainsA_2", "StrainsB_1","StrainsB_2")) +genos <- genos %>% filter(!Sample.ID %in% c("StrainsA_1", "StrainsA_2", + "StrainsB_1","StrainsB_2")) ``` @@ -95,7 +97,7 @@ Then we will use the different mark_* functions in order to filter the markers. ```{r mark_match} tab2 <- mark_match(stuart_tab,ref=strains) -tab2 %>% filter(exclude_match==1) +tab2 %>% filter(exclude_match==1) %>% print.data.frame() ``` Here the reference strains were genotyped with the same version of the chip as the F2 individuals so no marker was excluded. @@ -104,27 +106,29 @@ Then, we can use the `mark_poly()` function, which will exclude the markers that ```{r mark_poly ex} tab2 <- mark_poly(tab2) -head(tab2) +head(tab2) %>% print.data.frame() ``` The `mark_prop()` function can be used to filter markers depending on the proportion of each genotype. Here, we have a F2 so we can use the "homo" argument in order to filter depending on the proportion of both homozygous genotype. If we have a N2, we can filter with the proportion of homozygous individuals with the "homo" argument and of heterozygous individuals with the "hetero" argument. Moreover, this function allows to filter marker depending on the proportion on non genotyped animals. By defaults, markers for which more than 50% of individuals were not genotyped. ```{r mark_prop ex} tab2 <- mark_prop(tab2,cross="F2",homo=0.1,hetero=0.1) -head(tab2) +head(tab2) %>% print.data.frame() ``` Last, we can use the `mark_allele()` function. This very helpful function excludes markers for which the alleles found in the F2/N2 individuals do not correspond to the alleles found in the parental strains. For example, if for a marker is not polymorphic in the parental strains but we found two alleles in the F2/N2 individuals, it will be excluded. ```{r mark_allele} tab2 <- mark_allele(tab=tab2,ref=strains,par1="parent1",par2="parent2") -tab2 %>% arrange(desc(exclude_allele)) %>% head() +tab2 %>% arrange(desc(exclude_allele)) %>% head() %>% print.data.frame() ``` Indeed, we can see that the markers excluded with `mark_allele()` have different alleles in the parental strains. ```{r mark_allele-strains} -strains %>% filter(marker %in% c("gJAX00038569","gJAX00425031","gUNC12245354","gUNC15530876","gUNC21555204","gUNC21596600")) %>% arrange(marker) %>% select(marker,parent1,parent2) +strains %>% filter(marker %in% c("gJAX00038569","gJAX00425031","gUNC12245354", + "gUNC15530876","gUNC21555204","gUNC21596600")) %>% arrange(marker) %>% + select(marker,parent1,parent2) %>% print.data.frame() ``` # Creation of the R/qtl file @@ -132,8 +136,11 @@ strains %>% filter(marker %in% c("gJAX00038569","gJAX00425031","gUNC12245354","g After excluding the problematic markers, we can create the R/qtl file. The individuals must have the same ID in the geno and in the pheno file. If there is a prefix in the geno file that must be removed in order to acheive this, you can use the "prefix" argument. The "path" argument can be used in order to create a CSV file that you can laod with `qtl::read.cross`. ```{r write_qtl} -rqtl_file <- write_rqtl(geno=genos,pheno=phenos,tab=tab2,ref=strains,par1="parent1",par2="parent2",prefix="ind_",pos="cM_cox") +rqtl_file <- write_rqtl(geno=genos,pheno=phenos,tab=tab2, + ref=strains,par1="parent1",par2="parent2",prefix="ind_",pos="cM_cox") -rqtl_file[1:10,1:7] +rqtl_file[1:10,1:7] %>% print.data.frame() ``` + +