diff --git a/.Rproj.user/9DAE6990/sources/prop/5B8691C7 b/.Rproj.user/9DAE6990/sources/prop/5B8691C7 index 1f3aca9005e6c2bfeb0a9b999c80078a7d57fb23..dd976e56d512c526a7b5d85f156ef0835b1871df 100644 --- a/.Rproj.user/9DAE6990/sources/prop/5B8691C7 +++ b/.Rproj.user/9DAE6990/sources/prop/5B8691C7 @@ -1,4 +1,4 @@ { - "cursorPosition" : "48,32", - "scrollLine" : "33" + "cursorPosition" : "84,18", + "scrollLine" : "94" } \ No newline at end of file diff --git a/.Rproj.user/9DAE6990/sources/prop/65312719 b/.Rproj.user/9DAE6990/sources/prop/65312719 new file mode 100644 index 0000000000000000000000000000000000000000..7a73a41bfdf76d6f793007240d80983a52f15f97 --- /dev/null +++ b/.Rproj.user/9DAE6990/sources/prop/65312719 @@ -0,0 +1,2 @@ +{ +} \ No newline at end of file diff --git a/.Rproj.user/9DAE6990/sources/prop/D602FFE4 b/.Rproj.user/9DAE6990/sources/prop/D602FFE4 index 082b02e9e6bf90cc948fb06aced67605250d702c..59518e3e3c0da54f662f7dd699fd976f910ca7c7 100644 --- a/.Rproj.user/9DAE6990/sources/prop/D602FFE4 +++ b/.Rproj.user/9DAE6990/sources/prop/D602FFE4 @@ -1,5 +1,5 @@ { - "cursorPosition" : "127,25", + "cursorPosition" : "135,15", "last_setup_crc32" : "39B546A65bfca283", - "scrollLine" : "119" + "scrollLine" : "130" } \ No newline at end of file diff --git a/.Rproj.user/9DAE6990/sources/prop/INDEX b/.Rproj.user/9DAE6990/sources/prop/INDEX index b5e12590bd67d9b3c876796a41d7a702c54e6a41..16f98885be4bba0b41bed600c49af1738abd995d 100644 --- a/.Rproj.user/9DAE6990/sources/prop/INDEX +++ b/.Rproj.user/9DAE6990/sources/prop/INDEX @@ -9,5 +9,6 @@ ~%2Fstuart_package%2Fstuart%2FR%2Fstuart_tab-data.R="7411866" ~%2Fstuart_package%2Fstuart%2FR%2Ftab_mark.R="7FA3B215" ~%2Fstuart_package%2Fstuart%2FR%2Fwrite_rqtl.R="5B8691C7" +~%2Fstuart_package%2Fstuart%2Fdoc%2FstuaRt.R="65312719" ~%2Fstuart_package%2Fstuart%2Fvignettes%2FstuaRt.R="EBD625D2" ~%2Fstuart_package%2Fstuart%2Fvignettes%2FstuaRt.Rmd="D602FFE4" diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/45D91D58 b/.Rproj.user/9DAE6990/sources/s-39B546A6/45D91D58 index 7b1630989ded63aeb84eee2ab57468fa86e768a2..f68fb4c1e8cf5d5be740afb5ed88aef0a4f0d0a3 100644 --- a/.Rproj.user/9DAE6990/sources/s-39B546A6/45D91D58 +++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/45D91D58 @@ -5,15 +5,15 @@ "dirty" : false, "encoding" : "UTF-8", "folds" : "", - "hash" : "3120719904", + "hash" : "1139135974", "id" : "45D91D58", - "lastKnownWriteTime" : 1622621980, - "last_content_update" : 1622621980790, + "lastKnownWriteTime" : 1622648301, + "last_content_update" : 1622648301329, "path" : "~/stuart_package/stuart/R/write_rqtl.R", "project_path" : "R/write_rqtl.R", "properties" : { - "cursorPosition" : "48,32", - "scrollLine" : "33" + "cursorPosition" : "84,18", + "scrollLine" : "94" }, "read_only" : false, "read_only_alternatives" : [ diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/45D91D58-contents b/.Rproj.user/9DAE6990/sources/s-39B546A6/45D91D58-contents index cbfc05505af72a38887f69cdef97707b5d8e8bf1..0926c5c3a44d009a46244ee5d52835afb1309978 100644 --- a/.Rproj.user/9DAE6990/sources/s-39B546A6/45D91D58-contents +++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/45D91D58-contents @@ -21,6 +21,12 @@ #### write_rqtl #### ## write data frame in rqtl format (csv), if path != NA writes the file in the path indicated write_rqtl <- function(geno,pheno,tab,ref,par1,par2,prefix,pos,path=NA){ + #rename df columns + geno <- geno %>% rename("marker"=1, + "id"=2, + "allele_1"=3, + "allele_2"=4) + #extract snps non excluded if("exclude_match" %in% colnames(tab)){ tab <- tab %>% filter(exclude_match==0) @@ -40,7 +46,7 @@ write_rqtl <- function(geno,pheno,tab,ref,par1,par2,prefix,pos,path=NA){ #filter genotypes for non excluded markers in geno file - geno <- geno %>% select(c(SNP.Name,Sample.ID,Allele1...Forward,Allele2...Forward)) %>% filter(SNP.Name %in% tab$SNP.Name) + geno <- geno %>% select(c(marker,id,allele_1,allele_2)) %>% filter(marker %in% tab$marker) #recode parents' names to match column names nomenclature par1 <- make.names(par1) @@ -51,33 +57,33 @@ write_rqtl <- function(geno,pheno,tab,ref,par1,par2,prefix,pos,path=NA){ ref <- ref %>% select(marker,chr,!!sym(pos),!!sym(par1),!!sym(par2)) #merge genotypes with parents - geno <- left_join(geno,ref,by=c("SNP.Name"="marker")) + geno <- left_join(geno,ref,by=c("marker"="marker")) #recode "-" in "N" in geno file - geno <- geno %>% mutate(Allele1...Forward = recode(Allele1...Forward, + geno <- geno %>% mutate(allele_1 = recode(allele_1, "-" = "N")) - geno <- geno %>% mutate(Allele2...Forward = recode(Allele2...Forward, + geno <- geno %>% mutate(allele_2 = recode(allele_2, "-" = "N")) #recode geno in factors with same levels - geno <- geno %>% mutate(Allele1...Forward = factor(Allele1...Forward,levels=c("A","C","G","H","N","T"))) - geno <- geno %>% mutate(Allele2...Forward = factor(Allele2...Forward,levels=c("A","C","G","H","N","T"))) + geno <- geno %>% mutate(allele_1 = factor(allele_1,levels=c("A","C","G","H","N","T"))) + geno <- geno %>% mutate(allele_2 = factor(allele_2,levels=c("A","C","G","H","N","T"))) #recode genotypes depending on parents' genotypes geno <- geno %>% mutate(Geno = case_when( #if one allele not genotyped: - Allele1...Forward=="N" | Allele2...Forward=="N" ~ "NA", + allele_1=="N" | allele_2=="N" ~ "NA", #if both alleles genotyped ##homozygous 0 - Allele1...Forward==Allele2...Forward & Allele1...Forward==!!sym(par1) ~ "0", + allele_1==allele_2 & allele_1==!!sym(par1) ~ "0", ##homozygous 2 - Allele1...Forward==Allele2...Forward & Allele1...Forward==!!sym(par2) ~ "2", + allele_1==allele_2 & allele_1==!!sym(par2) ~ "2", ##heterozygous - Allele1...Forward!=Allele2...Forward ~ "1", + allele_1!=allele_2 ~ "1", #if parental strains are N/H ##homozygous for parent that is N/H @@ -92,33 +98,33 @@ write_rqtl <- function(geno,pheno,tab,ref,par1,par2,prefix,pos,path=NA){ #keep positions of markers - markers <- geno %>% select(SNP.Name,chr,!!sym(pos)) %>% distinct() + markers <- geno %>% select(marker,chr,!!sym(pos)) %>% distinct() markers <- markers %>% arrange(chr,!!sym(pos)) #keep only interesting columns in geno file geno <- geno %>% arrange(chr,!!sym(pos)) - geno <- geno %>% select(SNP.Name,Sample.ID,Geno) + geno <- geno %>% select(marker,id,Geno) #remove prefix - geno <- geno %>% mutate(Sample.ID=str_remove(Sample.ID,prefix)) + geno <- geno %>% mutate(id=str_remove(id,prefix)) #keep only non excluded markers and merge with positions - markers <- markers %>% mutate(SNP.Name=as.character(SNP.Name)) + markers <- markers %>% mutate(marker=as.character(marker)) markers <- markers %>% mutate(chr=as.character(chr)) - geno <- markers %>% select(SNP.Name,chr,!!sym(pos)) %>% full_join(.,geno,by="SNP.Name") + geno <- markers %>% select(marker,chr,!!sym(pos)) %>% full_join(.,geno,by="marker") #pivoting - geno <- geno %>% pivot_wider(names_from = c(SNP.Name,chr,!!sym(pos)),values_from = Geno,names_sep=",") - geno <- geno %>% mutate(Sample.ID=as.character(Sample.ID)) - geno <- geno %>% rename("Sample.ID,,"=Sample.ID) + geno <- geno %>% pivot_wider(names_from = c(marker,chr,!!sym(pos)),values_from = Geno,names_sep=",") + geno <- geno %>% mutate(id=as.character(id)) + geno <- geno %>% rename("id,,"=id) #merge with phenotype file pheno <- pheno %>% mutate_all(as.character) colnames(pheno) <- str_c(colnames(pheno),",,") - qtl_file <- right_join(pheno,geno,by=c("Ind,,"="Sample.ID,,")) + qtl_file <- right_join(pheno,geno,by=c("Ind,,"="id,,")) #prepare file qtl_file <- rbind(colnames(qtl_file),qtl_file) diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/96AB3736 b/.Rproj.user/9DAE6990/sources/s-39B546A6/96AB3736 index bf870f4dc334ee3de6a448d1ad982d8d44936526..ff216ba631ada3582dc5f0c3069563828d2cae4a 100644 --- a/.Rproj.user/9DAE6990/sources/s-39B546A6/96AB3736 +++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/96AB3736 @@ -5,16 +5,16 @@ "dirty" : false, "encoding" : "UTF-8", "folds" : "", - "hash" : "3134659970", + "hash" : "2014255563", "id" : "96AB3736", - "lastKnownWriteTime" : 1622647884, - "last_content_update" : 1622647884538, + "lastKnownWriteTime" : 1622648726, + "last_content_update" : 1622648726992, "path" : "~/stuart_package/stuart/vignettes/stuaRt.Rmd", "project_path" : "vignettes/stuaRt.Rmd", "properties" : { - "cursorPosition" : "127,25", + "cursorPosition" : "135,15", "last_setup_crc32" : "39B546A65bfca283", - "scrollLine" : "119" + "scrollLine" : "130" }, "read_only" : false, "read_only_alternatives" : [ diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/96AB3736-contents b/.Rproj.user/9DAE6990/sources/s-39B546A6/96AB3736-contents index 2dfb70edac53e76fd1e666150ea7dd15d9442f65..e73872a8f734ba8177b36fc84e013586614d4848 100644 --- a/.Rproj.user/9DAE6990/sources/s-39B546A6/96AB3736-contents +++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/96AB3736-contents @@ -133,7 +133,7 @@ strains %>% filter(marker %in% c("gJAX00038569","gJAX00425031","gUNC12245354","g After excluding the problematic markers, we can create the R/qtl file. The individuals must have the same ID in the geno and in the pheno file. If there is a prefix in the geno file that must be removed in order to acheive this, you can use the "prefix" argument. The "path" argument can be used in order to create a CSV file that you can laod with `qtl::read.cross`. -```{r write_qtl,eval=F} +```{r write_qtl} rqtl_file <- write_rqtl(geno=genos,pheno=phenos,tab=tab2,ref=strains,par1="parent1",par2="parent2",prefix="ind_",pos="cM_cox") rqtl_file[1:10,1:7] diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/C6286151-contents b/.Rproj.user/9DAE6990/sources/s-39B546A6/C6286151-contents new file mode 100644 index 0000000000000000000000000000000000000000..0035346d6c1f1b76d77d1af4868a3329f2545064 --- /dev/null +++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/C6286151-contents @@ -0,0 +1,56 @@ +## ---- include = FALSE--------------------------------------------------------- +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) + +## ----setup-------------------------------------------------------------------- +library(dplyr) +library(stuart) + +## ----annot-------------------------------------------------------------------- +annot_mini <- read.csv(url("https://raw.githubusercontent.com/kbroman/MUGAarrays/master/UWisc/mini_uwisc_v2.csv")) + +## ----load--------------------------------------------------------------------- +data(genos) +summary(genos) +data(phenos) +summary(phenos) + +## ----strains------------------------------------------------------------------ +strains <- geno_strains(ref=annot_mini,geno=genos,par1=c("StrainsA_1","StrainsA_2"),par2=c("StrainsB_1","StrainsB_2"),name1="parent1",name2="parent2") +head(strains) + +## ----no_parent---------------------------------------------------------------- +genos <- genos %>% filter(!Sample.ID %in% c("StrainsA_1", "StrainsA_2", "StrainsB_1","StrainsB_2")) + +## ----tab_mark----------------------------------------------------------------- +data(stuart_tab) +summary(stuart_tab) + +## ----mark_match--------------------------------------------------------------- +tab2 <- mark_match(stuart_tab,ref=strains) + + +tab2 %>% filter(exclude_match==1) + +## ----mark_poly ex------------------------------------------------------------- +tab2 <- mark_poly(tab2) +head(tab2) + +## ----mark_prop ex------------------------------------------------------------- +tab2 <- mark_prop(tab2,cross="F2",homo=0.1,hetero=0.1) +head(tab2) + +## ----mark_allele-------------------------------------------------------------- +tab2 <- mark_allele(tab=tab2,ref=strains,par1="parent1",par2="parent2") +tab2 %>% arrange(desc(exclude_allele)) %>% head() + +## ----mark_allele-strains------------------------------------------------------ +strains %>% filter(marker %in% c("gJAX00038569","gJAX00425031","gUNC12245354","gUNC15530876","gUNC21555204","gUNC21596600")) %>% arrange(marker) %>% select(marker,parent1,parent2) + +## ----write_qtl---------------------------------------------------------------- +rqtl_file <- write_rqtl(geno=genos,pheno=phenos,tab=tab2,ref=strains,par1="parent1",par2="parent2",prefix="ind_",pos="cM_cox") + +rqtl_file[1:10,1:7] + diff --git a/.Rproj.user/shared/notebooks/paths b/.Rproj.user/shared/notebooks/paths index 987226a929aa70c98c5954e2e9f8ce2716ea30d6..7779f200af63a93c07e730a2f2b07fa1992b2f1b 100644 --- a/.Rproj.user/shared/notebooks/paths +++ b/.Rproj.user/shared/notebooks/paths @@ -2,4 +2,5 @@ /Users/mariebourdon/stuart_package/stuart/R/geno_strains.R="1F9B28F5" /Users/mariebourdon/stuart_package/stuart/R/genos-data.R="9943E26B" /Users/mariebourdon/stuart_package/stuart/R/tab_mark.R="DEC9867F" +/Users/mariebourdon/stuart_package/stuart/doc/stuaRt.R="E6241391" /Users/mariebourdon/stuart_package/stuart/vignettes/stuaRt.Rmd="4D49CCFD" diff --git a/R/write_rqtl.R b/R/write_rqtl.R index cbfc05505af72a38887f69cdef97707b5d8e8bf1..0926c5c3a44d009a46244ee5d52835afb1309978 100755 --- a/R/write_rqtl.R +++ b/R/write_rqtl.R @@ -21,6 +21,12 @@ #### write_rqtl #### ## write data frame in rqtl format (csv), if path != NA writes the file in the path indicated write_rqtl <- function(geno,pheno,tab,ref,par1,par2,prefix,pos,path=NA){ + #rename df columns + geno <- geno %>% rename("marker"=1, + "id"=2, + "allele_1"=3, + "allele_2"=4) + #extract snps non excluded if("exclude_match" %in% colnames(tab)){ tab <- tab %>% filter(exclude_match==0) @@ -40,7 +46,7 @@ write_rqtl <- function(geno,pheno,tab,ref,par1,par2,prefix,pos,path=NA){ #filter genotypes for non excluded markers in geno file - geno <- geno %>% select(c(SNP.Name,Sample.ID,Allele1...Forward,Allele2...Forward)) %>% filter(SNP.Name %in% tab$SNP.Name) + geno <- geno %>% select(c(marker,id,allele_1,allele_2)) %>% filter(marker %in% tab$marker) #recode parents' names to match column names nomenclature par1 <- make.names(par1) @@ -51,33 +57,33 @@ write_rqtl <- function(geno,pheno,tab,ref,par1,par2,prefix,pos,path=NA){ ref <- ref %>% select(marker,chr,!!sym(pos),!!sym(par1),!!sym(par2)) #merge genotypes with parents - geno <- left_join(geno,ref,by=c("SNP.Name"="marker")) + geno <- left_join(geno,ref,by=c("marker"="marker")) #recode "-" in "N" in geno file - geno <- geno %>% mutate(Allele1...Forward = recode(Allele1...Forward, + geno <- geno %>% mutate(allele_1 = recode(allele_1, "-" = "N")) - geno <- geno %>% mutate(Allele2...Forward = recode(Allele2...Forward, + geno <- geno %>% mutate(allele_2 = recode(allele_2, "-" = "N")) #recode geno in factors with same levels - geno <- geno %>% mutate(Allele1...Forward = factor(Allele1...Forward,levels=c("A","C","G","H","N","T"))) - geno <- geno %>% mutate(Allele2...Forward = factor(Allele2...Forward,levels=c("A","C","G","H","N","T"))) + geno <- geno %>% mutate(allele_1 = factor(allele_1,levels=c("A","C","G","H","N","T"))) + geno <- geno %>% mutate(allele_2 = factor(allele_2,levels=c("A","C","G","H","N","T"))) #recode genotypes depending on parents' genotypes geno <- geno %>% mutate(Geno = case_when( #if one allele not genotyped: - Allele1...Forward=="N" | Allele2...Forward=="N" ~ "NA", + allele_1=="N" | allele_2=="N" ~ "NA", #if both alleles genotyped ##homozygous 0 - Allele1...Forward==Allele2...Forward & Allele1...Forward==!!sym(par1) ~ "0", + allele_1==allele_2 & allele_1==!!sym(par1) ~ "0", ##homozygous 2 - Allele1...Forward==Allele2...Forward & Allele1...Forward==!!sym(par2) ~ "2", + allele_1==allele_2 & allele_1==!!sym(par2) ~ "2", ##heterozygous - Allele1...Forward!=Allele2...Forward ~ "1", + allele_1!=allele_2 ~ "1", #if parental strains are N/H ##homozygous for parent that is N/H @@ -92,33 +98,33 @@ write_rqtl <- function(geno,pheno,tab,ref,par1,par2,prefix,pos,path=NA){ #keep positions of markers - markers <- geno %>% select(SNP.Name,chr,!!sym(pos)) %>% distinct() + markers <- geno %>% select(marker,chr,!!sym(pos)) %>% distinct() markers <- markers %>% arrange(chr,!!sym(pos)) #keep only interesting columns in geno file geno <- geno %>% arrange(chr,!!sym(pos)) - geno <- geno %>% select(SNP.Name,Sample.ID,Geno) + geno <- geno %>% select(marker,id,Geno) #remove prefix - geno <- geno %>% mutate(Sample.ID=str_remove(Sample.ID,prefix)) + geno <- geno %>% mutate(id=str_remove(id,prefix)) #keep only non excluded markers and merge with positions - markers <- markers %>% mutate(SNP.Name=as.character(SNP.Name)) + markers <- markers %>% mutate(marker=as.character(marker)) markers <- markers %>% mutate(chr=as.character(chr)) - geno <- markers %>% select(SNP.Name,chr,!!sym(pos)) %>% full_join(.,geno,by="SNP.Name") + geno <- markers %>% select(marker,chr,!!sym(pos)) %>% full_join(.,geno,by="marker") #pivoting - geno <- geno %>% pivot_wider(names_from = c(SNP.Name,chr,!!sym(pos)),values_from = Geno,names_sep=",") - geno <- geno %>% mutate(Sample.ID=as.character(Sample.ID)) - geno <- geno %>% rename("Sample.ID,,"=Sample.ID) + geno <- geno %>% pivot_wider(names_from = c(marker,chr,!!sym(pos)),values_from = Geno,names_sep=",") + geno <- geno %>% mutate(id=as.character(id)) + geno <- geno %>% rename("id,,"=id) #merge with phenotype file pheno <- pheno %>% mutate_all(as.character) colnames(pheno) <- str_c(colnames(pheno),",,") - qtl_file <- right_join(pheno,geno,by=c("Ind,,"="Sample.ID,,")) + qtl_file <- right_join(pheno,geno,by=c("Ind,,"="id,,")) #prepare file qtl_file <- rbind(colnames(qtl_file),qtl_file) diff --git a/stuart_0.1.0.tar.gz b/stuart_0.1.0.tar.gz index c0f464039673569fe02a33fa8028029a57a5ce7b..0574719a0ca2ebd5501a2b4295da59a4b5a87746 100644 Binary files a/stuart_0.1.0.tar.gz and b/stuart_0.1.0.tar.gz differ diff --git a/vignettes/stuaRt.Rmd b/vignettes/stuaRt.Rmd index 2dfb70edac53e76fd1e666150ea7dd15d9442f65..e73872a8f734ba8177b36fc84e013586614d4848 100755 --- a/vignettes/stuaRt.Rmd +++ b/vignettes/stuaRt.Rmd @@ -133,7 +133,7 @@ strains %>% filter(marker %in% c("gJAX00038569","gJAX00425031","gUNC12245354","g After excluding the problematic markers, we can create the R/qtl file. The individuals must have the same ID in the geno and in the pheno file. If there is a prefix in the geno file that must be removed in order to acheive this, you can use the "prefix" argument. The "path" argument can be used in order to create a CSV file that you can laod with `qtl::read.cross`. -```{r write_qtl,eval=F} +```{r write_qtl} rqtl_file <- write_rqtl(geno=genos,pheno=phenos,tab=tab2,ref=strains,par1="parent1",par2="parent2",prefix="ind_",pos="cM_cox") rqtl_file[1:10,1:7]