Commit 8629fd3f authored by Marie Bourdon's avatar Marie Bourdon
Browse files

modif names write_rqtl

parent 40425a5a
{ {
"cursorPosition" : "48,32", "cursorPosition" : "84,18",
"scrollLine" : "33" "scrollLine" : "94"
} }
\ No newline at end of file
{
}
\ No newline at end of file
{ {
"cursorPosition" : "127,25", "cursorPosition" : "135,15",
"last_setup_crc32" : "39B546A65bfca283", "last_setup_crc32" : "39B546A65bfca283",
"scrollLine" : "119" "scrollLine" : "130"
} }
\ No newline at end of file
...@@ -9,5 +9,6 @@ ...@@ -9,5 +9,6 @@
~%2Fstuart_package%2Fstuart%2FR%2Fstuart_tab-data.R="7411866" ~%2Fstuart_package%2Fstuart%2FR%2Fstuart_tab-data.R="7411866"
~%2Fstuart_package%2Fstuart%2FR%2Ftab_mark.R="7FA3B215" ~%2Fstuart_package%2Fstuart%2FR%2Ftab_mark.R="7FA3B215"
~%2Fstuart_package%2Fstuart%2FR%2Fwrite_rqtl.R="5B8691C7" ~%2Fstuart_package%2Fstuart%2FR%2Fwrite_rqtl.R="5B8691C7"
~%2Fstuart_package%2Fstuart%2Fdoc%2FstuaRt.R="65312719"
~%2Fstuart_package%2Fstuart%2Fvignettes%2FstuaRt.R="EBD625D2" ~%2Fstuart_package%2Fstuart%2Fvignettes%2FstuaRt.R="EBD625D2"
~%2Fstuart_package%2Fstuart%2Fvignettes%2FstuaRt.Rmd="D602FFE4" ~%2Fstuart_package%2Fstuart%2Fvignettes%2FstuaRt.Rmd="D602FFE4"
...@@ -5,15 +5,15 @@ ...@@ -5,15 +5,15 @@
"dirty" : false, "dirty" : false,
"encoding" : "UTF-8", "encoding" : "UTF-8",
"folds" : "", "folds" : "",
"hash" : "3120719904", "hash" : "1139135974",
"id" : "45D91D58", "id" : "45D91D58",
"lastKnownWriteTime" : 1622621980, "lastKnownWriteTime" : 1622648301,
"last_content_update" : 1622621980790, "last_content_update" : 1622648301329,
"path" : "~/stuart_package/stuart/R/write_rqtl.R", "path" : "~/stuart_package/stuart/R/write_rqtl.R",
"project_path" : "R/write_rqtl.R", "project_path" : "R/write_rqtl.R",
"properties" : { "properties" : {
"cursorPosition" : "48,32", "cursorPosition" : "84,18",
"scrollLine" : "33" "scrollLine" : "94"
}, },
"read_only" : false, "read_only" : false,
"read_only_alternatives" : [ "read_only_alternatives" : [
......
...@@ -21,6 +21,12 @@ ...@@ -21,6 +21,12 @@
#### write_rqtl #### #### write_rqtl ####
## write data frame in rqtl format (csv), if path != NA writes the file in the path indicated ## write data frame in rqtl format (csv), if path != NA writes the file in the path indicated
write_rqtl <- function(geno,pheno,tab,ref,par1,par2,prefix,pos,path=NA){ write_rqtl <- function(geno,pheno,tab,ref,par1,par2,prefix,pos,path=NA){
#rename df columns
geno <- geno %>% rename("marker"=1,
"id"=2,
"allele_1"=3,
"allele_2"=4)
#extract snps non excluded #extract snps non excluded
if("exclude_match" %in% colnames(tab)){ if("exclude_match" %in% colnames(tab)){
tab <- tab %>% filter(exclude_match==0) tab <- tab %>% filter(exclude_match==0)
...@@ -40,7 +46,7 @@ write_rqtl <- function(geno,pheno,tab,ref,par1,par2,prefix,pos,path=NA){ ...@@ -40,7 +46,7 @@ write_rqtl <- function(geno,pheno,tab,ref,par1,par2,prefix,pos,path=NA){
#filter genotypes for non excluded markers in geno file #filter genotypes for non excluded markers in geno file
geno <- geno %>% select(c(SNP.Name,Sample.ID,Allele1...Forward,Allele2...Forward)) %>% filter(SNP.Name %in% tab$SNP.Name) geno <- geno %>% select(c(marker,id,allele_1,allele_2)) %>% filter(marker %in% tab$marker)
#recode parents' names to match column names nomenclature #recode parents' names to match column names nomenclature
par1 <- make.names(par1) par1 <- make.names(par1)
...@@ -51,33 +57,33 @@ write_rqtl <- function(geno,pheno,tab,ref,par1,par2,prefix,pos,path=NA){ ...@@ -51,33 +57,33 @@ write_rqtl <- function(geno,pheno,tab,ref,par1,par2,prefix,pos,path=NA){
ref <- ref %>% select(marker,chr,!!sym(pos),!!sym(par1),!!sym(par2)) ref <- ref %>% select(marker,chr,!!sym(pos),!!sym(par1),!!sym(par2))
#merge genotypes with parents #merge genotypes with parents
geno <- left_join(geno,ref,by=c("SNP.Name"="marker")) geno <- left_join(geno,ref,by=c("marker"="marker"))
#recode "-" in "N" in geno file #recode "-" in "N" in geno file
geno <- geno %>% mutate(Allele1...Forward = recode(Allele1...Forward, geno <- geno %>% mutate(allele_1 = recode(allele_1,
"-" = "N")) "-" = "N"))
geno <- geno %>% mutate(Allele2...Forward = recode(Allele2...Forward, geno <- geno %>% mutate(allele_2 = recode(allele_2,
"-" = "N")) "-" = "N"))
#recode geno in factors with same levels #recode geno in factors with same levels
geno <- geno %>% mutate(Allele1...Forward = factor(Allele1...Forward,levels=c("A","C","G","H","N","T"))) geno <- geno %>% mutate(allele_1 = factor(allele_1,levels=c("A","C","G","H","N","T")))
geno <- geno %>% mutate(Allele2...Forward = factor(Allele2...Forward,levels=c("A","C","G","H","N","T"))) geno <- geno %>% mutate(allele_2 = factor(allele_2,levels=c("A","C","G","H","N","T")))
#recode genotypes depending on parents' genotypes #recode genotypes depending on parents' genotypes
geno <- geno %>% mutate(Geno = case_when( geno <- geno %>% mutate(Geno = case_when(
#if one allele not genotyped: #if one allele not genotyped:
Allele1...Forward=="N" | Allele2...Forward=="N" ~ "NA", allele_1=="N" | allele_2=="N" ~ "NA",
#if both alleles genotyped #if both alleles genotyped
##homozygous 0 ##homozygous 0
Allele1...Forward==Allele2...Forward & Allele1...Forward==!!sym(par1) ~ "0", allele_1==allele_2 & allele_1==!!sym(par1) ~ "0",
##homozygous 2 ##homozygous 2
Allele1...Forward==Allele2...Forward & Allele1...Forward==!!sym(par2) ~ "2", allele_1==allele_2 & allele_1==!!sym(par2) ~ "2",
##heterozygous ##heterozygous
Allele1...Forward!=Allele2...Forward ~ "1", allele_1!=allele_2 ~ "1",
#if parental strains are N/H #if parental strains are N/H
##homozygous for parent that is N/H ##homozygous for parent that is N/H
...@@ -92,33 +98,33 @@ write_rqtl <- function(geno,pheno,tab,ref,par1,par2,prefix,pos,path=NA){ ...@@ -92,33 +98,33 @@ write_rqtl <- function(geno,pheno,tab,ref,par1,par2,prefix,pos,path=NA){
#keep positions of markers #keep positions of markers
markers <- geno %>% select(SNP.Name,chr,!!sym(pos)) %>% distinct() markers <- geno %>% select(marker,chr,!!sym(pos)) %>% distinct()
markers <- markers %>% arrange(chr,!!sym(pos)) markers <- markers %>% arrange(chr,!!sym(pos))
#keep only interesting columns in geno file #keep only interesting columns in geno file
geno <- geno %>% arrange(chr,!!sym(pos)) geno <- geno %>% arrange(chr,!!sym(pos))
geno <- geno %>% select(SNP.Name,Sample.ID,Geno) geno <- geno %>% select(marker,id,Geno)
#remove prefix #remove prefix
geno <- geno %>% mutate(Sample.ID=str_remove(Sample.ID,prefix)) geno <- geno %>% mutate(id=str_remove(id,prefix))
#keep only non excluded markers and merge with positions #keep only non excluded markers and merge with positions
markers <- markers %>% mutate(SNP.Name=as.character(SNP.Name)) markers <- markers %>% mutate(marker=as.character(marker))
markers <- markers %>% mutate(chr=as.character(chr)) markers <- markers %>% mutate(chr=as.character(chr))
geno <- markers %>% select(SNP.Name,chr,!!sym(pos)) %>% full_join(.,geno,by="SNP.Name") geno <- markers %>% select(marker,chr,!!sym(pos)) %>% full_join(.,geno,by="marker")
#pivoting #pivoting
geno <- geno %>% pivot_wider(names_from = c(SNP.Name,chr,!!sym(pos)),values_from = Geno,names_sep=",") geno <- geno %>% pivot_wider(names_from = c(marker,chr,!!sym(pos)),values_from = Geno,names_sep=",")
geno <- geno %>% mutate(Sample.ID=as.character(Sample.ID)) geno <- geno %>% mutate(id=as.character(id))
geno <- geno %>% rename("Sample.ID,,"=Sample.ID) geno <- geno %>% rename("id,,"=id)
#merge with phenotype file #merge with phenotype file
pheno <- pheno %>% mutate_all(as.character) pheno <- pheno %>% mutate_all(as.character)
colnames(pheno) <- str_c(colnames(pheno),",,") colnames(pheno) <- str_c(colnames(pheno),",,")
qtl_file <- right_join(pheno,geno,by=c("Ind,,"="Sample.ID,,")) qtl_file <- right_join(pheno,geno,by=c("Ind,,"="id,,"))
#prepare file #prepare file
qtl_file <- rbind(colnames(qtl_file),qtl_file) qtl_file <- rbind(colnames(qtl_file),qtl_file)
......
...@@ -5,16 +5,16 @@ ...@@ -5,16 +5,16 @@
"dirty" : false, "dirty" : false,
"encoding" : "UTF-8", "encoding" : "UTF-8",
"folds" : "", "folds" : "",
"hash" : "3134659970", "hash" : "2014255563",
"id" : "96AB3736", "id" : "96AB3736",
"lastKnownWriteTime" : 1622647884, "lastKnownWriteTime" : 1622648726,
"last_content_update" : 1622647884538, "last_content_update" : 1622648726992,
"path" : "~/stuart_package/stuart/vignettes/stuaRt.Rmd", "path" : "~/stuart_package/stuart/vignettes/stuaRt.Rmd",
"project_path" : "vignettes/stuaRt.Rmd", "project_path" : "vignettes/stuaRt.Rmd",
"properties" : { "properties" : {
"cursorPosition" : "127,25", "cursorPosition" : "135,15",
"last_setup_crc32" : "39B546A65bfca283", "last_setup_crc32" : "39B546A65bfca283",
"scrollLine" : "119" "scrollLine" : "130"
}, },
"read_only" : false, "read_only" : false,
"read_only_alternatives" : [ "read_only_alternatives" : [
......
...@@ -133,7 +133,7 @@ strains %>% filter(marker %in% c("gJAX00038569","gJAX00425031","gUNC12245354","g ...@@ -133,7 +133,7 @@ strains %>% filter(marker %in% c("gJAX00038569","gJAX00425031","gUNC12245354","g
After excluding the problematic markers, we can create the R/qtl file. The individuals must have the same ID in the geno and in the pheno file. If there is a prefix in the geno file that must be removed in order to acheive this, you can use the "prefix" argument. The "path" argument can be used in order to create a CSV file that you can laod with `qtl::read.cross`. After excluding the problematic markers, we can create the R/qtl file. The individuals must have the same ID in the geno and in the pheno file. If there is a prefix in the geno file that must be removed in order to acheive this, you can use the "prefix" argument. The "path" argument can be used in order to create a CSV file that you can laod with `qtl::read.cross`.
```{r write_qtl,eval=F} ```{r write_qtl}
rqtl_file <- write_rqtl(geno=genos,pheno=phenos,tab=tab2,ref=strains,par1="parent1",par2="parent2",prefix="ind_",pos="cM_cox") rqtl_file <- write_rqtl(geno=genos,pheno=phenos,tab=tab2,ref=strains,par1="parent1",par2="parent2",prefix="ind_",pos="cM_cox")
rqtl_file[1:10,1:7] rqtl_file[1:10,1:7]
......
## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
## ----setup--------------------------------------------------------------------
library(dplyr)
library(stuart)
## ----annot--------------------------------------------------------------------
annot_mini <- read.csv(url("https://raw.githubusercontent.com/kbroman/MUGAarrays/master/UWisc/mini_uwisc_v2.csv"))
## ----load---------------------------------------------------------------------
data(genos)
summary(genos)
data(phenos)
summary(phenos)
## ----strains------------------------------------------------------------------
strains <- geno_strains(ref=annot_mini,geno=genos,par1=c("StrainsA_1","StrainsA_2"),par2=c("StrainsB_1","StrainsB_2"),name1="parent1",name2="parent2")
head(strains)
## ----no_parent----------------------------------------------------------------
genos <- genos %>% filter(!Sample.ID %in% c("StrainsA_1", "StrainsA_2", "StrainsB_1","StrainsB_2"))
## ----tab_mark-----------------------------------------------------------------
data(stuart_tab)
summary(stuart_tab)
## ----mark_match---------------------------------------------------------------
tab2 <- mark_match(stuart_tab,ref=strains)
tab2 %>% filter(exclude_match==1)
## ----mark_poly ex-------------------------------------------------------------
tab2 <- mark_poly(tab2)
head(tab2)
## ----mark_prop ex-------------------------------------------------------------
tab2 <- mark_prop(tab2,cross="F2",homo=0.1,hetero=0.1)
head(tab2)
## ----mark_allele--------------------------------------------------------------
tab2 <- mark_allele(tab=tab2,ref=strains,par1="parent1",par2="parent2")
tab2 %>% arrange(desc(exclude_allele)) %>% head()
## ----mark_allele-strains------------------------------------------------------
strains %>% filter(marker %in% c("gJAX00038569","gJAX00425031","gUNC12245354","gUNC15530876","gUNC21555204","gUNC21596600")) %>% arrange(marker) %>% select(marker,parent1,parent2)
## ----write_qtl----------------------------------------------------------------
rqtl_file <- write_rqtl(geno=genos,pheno=phenos,tab=tab2,ref=strains,par1="parent1",par2="parent2",prefix="ind_",pos="cM_cox")
rqtl_file[1:10,1:7]
...@@ -2,4 +2,5 @@ ...@@ -2,4 +2,5 @@
/Users/mariebourdon/stuart_package/stuart/R/geno_strains.R="1F9B28F5" /Users/mariebourdon/stuart_package/stuart/R/geno_strains.R="1F9B28F5"
/Users/mariebourdon/stuart_package/stuart/R/genos-data.R="9943E26B" /Users/mariebourdon/stuart_package/stuart/R/genos-data.R="9943E26B"
/Users/mariebourdon/stuart_package/stuart/R/tab_mark.R="DEC9867F" /Users/mariebourdon/stuart_package/stuart/R/tab_mark.R="DEC9867F"
/Users/mariebourdon/stuart_package/stuart/doc/stuaRt.R="E6241391"
/Users/mariebourdon/stuart_package/stuart/vignettes/stuaRt.Rmd="4D49CCFD" /Users/mariebourdon/stuart_package/stuart/vignettes/stuaRt.Rmd="4D49CCFD"
...@@ -21,6 +21,12 @@ ...@@ -21,6 +21,12 @@
#### write_rqtl #### #### write_rqtl ####
## write data frame in rqtl format (csv), if path != NA writes the file in the path indicated ## write data frame in rqtl format (csv), if path != NA writes the file in the path indicated
write_rqtl <- function(geno,pheno,tab,ref,par1,par2,prefix,pos,path=NA){ write_rqtl <- function(geno,pheno,tab,ref,par1,par2,prefix,pos,path=NA){
#rename df columns
geno <- geno %>% rename("marker"=1,
"id"=2,
"allele_1"=3,
"allele_2"=4)
#extract snps non excluded #extract snps non excluded
if("exclude_match" %in% colnames(tab)){ if("exclude_match" %in% colnames(tab)){
tab <- tab %>% filter(exclude_match==0) tab <- tab %>% filter(exclude_match==0)
...@@ -40,7 +46,7 @@ write_rqtl <- function(geno,pheno,tab,ref,par1,par2,prefix,pos,path=NA){ ...@@ -40,7 +46,7 @@ write_rqtl <- function(geno,pheno,tab,ref,par1,par2,prefix,pos,path=NA){
#filter genotypes for non excluded markers in geno file #filter genotypes for non excluded markers in geno file
geno <- geno %>% select(c(SNP.Name,Sample.ID,Allele1...Forward,Allele2...Forward)) %>% filter(SNP.Name %in% tab$SNP.Name) geno <- geno %>% select(c(marker,id,allele_1,allele_2)) %>% filter(marker %in% tab$marker)
#recode parents' names to match column names nomenclature #recode parents' names to match column names nomenclature
par1 <- make.names(par1) par1 <- make.names(par1)
...@@ -51,33 +57,33 @@ write_rqtl <- function(geno,pheno,tab,ref,par1,par2,prefix,pos,path=NA){ ...@@ -51,33 +57,33 @@ write_rqtl <- function(geno,pheno,tab,ref,par1,par2,prefix,pos,path=NA){
ref <- ref %>% select(marker,chr,!!sym(pos),!!sym(par1),!!sym(par2)) ref <- ref %>% select(marker,chr,!!sym(pos),!!sym(par1),!!sym(par2))
#merge genotypes with parents #merge genotypes with parents
geno <- left_join(geno,ref,by=c("SNP.Name"="marker")) geno <- left_join(geno,ref,by=c("marker"="marker"))
#recode "-" in "N" in geno file #recode "-" in "N" in geno file
geno <- geno %>% mutate(Allele1...Forward = recode(Allele1...Forward, geno <- geno %>% mutate(allele_1 = recode(allele_1,
"-" = "N")) "-" = "N"))
geno <- geno %>% mutate(Allele2...Forward = recode(Allele2...Forward, geno <- geno %>% mutate(allele_2 = recode(allele_2,
"-" = "N")) "-" = "N"))
#recode geno in factors with same levels #recode geno in factors with same levels
geno <- geno %>% mutate(Allele1...Forward = factor(Allele1...Forward,levels=c("A","C","G","H","N","T"))) geno <- geno %>% mutate(allele_1 = factor(allele_1,levels=c("A","C","G","H","N","T")))
geno <- geno %>% mutate(Allele2...Forward = factor(Allele2...Forward,levels=c("A","C","G","H","N","T"))) geno <- geno %>% mutate(allele_2 = factor(allele_2,levels=c("A","C","G","H","N","T")))
#recode genotypes depending on parents' genotypes #recode genotypes depending on parents' genotypes
geno <- geno %>% mutate(Geno = case_when( geno <- geno %>% mutate(Geno = case_when(
#if one allele not genotyped: #if one allele not genotyped:
Allele1...Forward=="N" | Allele2...Forward=="N" ~ "NA", allele_1=="N" | allele_2=="N" ~ "NA",
#if both alleles genotyped #if both alleles genotyped
##homozygous 0 ##homozygous 0
Allele1...Forward==Allele2...Forward & Allele1...Forward==!!sym(par1) ~ "0", allele_1==allele_2 & allele_1==!!sym(par1) ~ "0",
##homozygous 2 ##homozygous 2
Allele1...Forward==Allele2...Forward & Allele1...Forward==!!sym(par2) ~ "2", allele_1==allele_2 & allele_1==!!sym(par2) ~ "2",
##heterozygous ##heterozygous
Allele1...Forward!=Allele2...Forward ~ "1", allele_1!=allele_2 ~ "1",
#if parental strains are N/H #if parental strains are N/H
##homozygous for parent that is N/H ##homozygous for parent that is N/H
...@@ -92,33 +98,33 @@ write_rqtl <- function(geno,pheno,tab,ref,par1,par2,prefix,pos,path=NA){ ...@@ -92,33 +98,33 @@ write_rqtl <- function(geno,pheno,tab,ref,par1,par2,prefix,pos,path=NA){
#keep positions of markers #keep positions of markers
markers <- geno %>% select(SNP.Name,chr,!!sym(pos)) %>% distinct() markers <- geno %>% select(marker,chr,!!sym(pos)) %>% distinct()
markers <- markers %>% arrange(chr,!!sym(pos)) markers <- markers %>% arrange(chr,!!sym(pos))
#keep only interesting columns in geno file #keep only interesting columns in geno file
geno <- geno %>% arrange(chr,!!sym(pos)) geno <- geno %>% arrange(chr,!!sym(pos))
geno <- geno %>% select(SNP.Name,Sample.ID,Geno) geno <- geno %>% select(marker,id,Geno)
#remove prefix #remove prefix
geno <- geno %>% mutate(Sample.ID=str_remove(Sample.ID,prefix)) geno <- geno %>% mutate(id=str_remove(id,prefix))
#keep only non excluded markers and merge with positions #keep only non excluded markers and merge with positions
markers <- markers %>% mutate(SNP.Name=as.character(SNP.Name)) markers <- markers %>% mutate(marker=as.character(marker))
markers <- markers %>% mutate(chr=as.character(chr)) markers <- markers %>% mutate(chr=as.character(chr))
geno <- markers %>% select(SNP.Name,chr,!!sym(pos)) %>% full_join(.,geno,by="SNP.Name") geno <- markers %>% select(marker,chr,!!sym(pos)) %>% full_join(.,geno,by="marker")
#pivoting #pivoting
geno <- geno %>% pivot_wider(names_from = c(SNP.Name,chr,!!sym(pos)),values_from = Geno,names_sep=",") geno <- geno %>% pivot_wider(names_from = c(marker,chr,!!sym(pos)),values_from = Geno,names_sep=",")
geno <- geno %>% mutate(Sample.ID=as.character(Sample.ID)) geno <- geno %>% mutate(id=as.character(id))
geno <- geno %>% rename("Sample.ID,,"=Sample.ID) geno <- geno %>% rename("id,,"=id)
#merge with phenotype file #merge with phenotype file
pheno <- pheno %>% mutate_all(as.character) pheno <- pheno %>% mutate_all(as.character)
colnames(pheno) <- str_c(colnames(pheno),",,") colnames(pheno) <- str_c(colnames(pheno),",,")
qtl_file <- right_join(pheno,geno,by=c("Ind,,"="Sample.ID,,")) qtl_file <- right_join(pheno,geno,by=c("Ind,,"="id,,"))
#prepare file #prepare file
qtl_file <- rbind(colnames(qtl_file),qtl_file) qtl_file <- rbind(colnames(qtl_file),qtl_file)
......
No preview for this file type
...@@ -133,7 +133,7 @@ strains %>% filter(marker %in% c("gJAX00038569","gJAX00425031","gUNC12245354","g ...@@ -133,7 +133,7 @@ strains %>% filter(marker %in% c("gJAX00038569","gJAX00425031","gUNC12245354","g
After excluding the problematic markers, we can create the R/qtl file. The individuals must have the same ID in the geno and in the pheno file. If there is a prefix in the geno file that must be removed in order to acheive this, you can use the "prefix" argument. The "path" argument can be used in order to create a CSV file that you can laod with `qtl::read.cross`. After excluding the problematic markers, we can create the R/qtl file. The individuals must have the same ID in the geno and in the pheno file. If there is a prefix in the geno file that must be removed in order to acheive this, you can use the "prefix" argument. The "path" argument can be used in order to create a CSV file that you can laod with `qtl::read.cross`.
```{r write_qtl,eval=F} ```{r write_qtl}
rqtl_file <- write_rqtl(geno=genos,pheno=phenos,tab=tab2,ref=strains,par1="parent1",par2="parent2",prefix="ind_",pos="cM_cox") rqtl_file <- write_rqtl(geno=genos,pheno=phenos,tab=tab2,ref=strains,par1="parent1",par2="parent2",prefix="ind_",pos="cM_cox")
rqtl_file[1:10,1:7] rqtl_file[1:10,1:7]
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment