Commit fc18feef authored by Marie Bourdon's avatar Marie Bourdon
Browse files

change pos argument write_rqtl

parent d4c44e64
File added
^.*\.Rproj$
^\.Rproj\.user$
^doc$
^Meta$
{
"path" : "~/stuart_package/stuart",
"sortOrder" : [
{
"ascending" : true,
"columnIndex" : 2
}
]
}
\ No newline at end of file
{
"activeTab" : 0
}
\ No newline at end of file
{
"left" : {
"panelheight" : 689,
"splitterpos" : 290,
"topwindowstate" : "NORMAL",
"windowheight" : 727
},
"right" : {
"panelheight" : 689,
"splitterpos" : 436,
"topwindowstate" : "NORMAL",
"windowheight" : 727
}
}
\ No newline at end of file
{
"TabSet1" : 3,
"TabSet2" : 3,
"TabZoom" : {
}
}
\ No newline at end of file
build-last-errors="[]"
build-last-errors-base-dir="~/stuart_package/stuart/"
build-last-outputs="[{\"output\":\"==> R CMD INSTALL --no-multiarch --with-keep.source stuart\\n\\n\",\"type\":0},{\"output\":\"* installing to library ‘/Library/Frameworks/R.framework/Versions/4.0/Resources/library’\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"* installing *source* package ‘stuart’ ...\\n\",\"type\":1},{\"output\":\"** using staged installation\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** R\\n\",\"type\":1},{\"output\":\"** data\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"*** moving datasets to lazyload DB\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** inst\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** byte-compile and prepare package for lazy loading\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** help\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"*** installing help indices\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** building package indices\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** installing vignettes\\n\",\"type\":1},{\"output\":\"** testing if installed package can be loaded from temporary location\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** testing if installed package can be loaded from final location\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** testing if installed package keeps a record of temporary installation path\\n\",\"type\":1},{\"output\":\"* DONE (stuart)\\n\",\"type\":1},{\"output\":\"\",\"type\":1}]"
compile_pdf_state="{\"errors\":[],\"output\":\"\",\"running\":false,\"tab_visible\":false,\"target_file\":\"\"}"
files.monitored-path=""
find-in-files-state="{\"handle\":\"\",\"input\":\"\",\"path\":\"\",\"regex\":true,\"results\":{\"file\":[],\"line\":[],\"lineValue\":[],\"matchOff\":[],\"matchOn\":[]},\"running\":false}"
imageDirtyState="1"
saveActionState="0"
{"active_set":"","sets":[]}
\ No newline at end of file
{
"cursorPosition" : "100,50",
"scrollLine" : "96"
}
\ No newline at end of file
{
"cursorPosition" : "128,16",
"last_setup_crc32" : "31136BFE5bfca283",
"scrollLine" : "123"
}
\ No newline at end of file
{
}
\ No newline at end of file
~%2Fstuart_package%2Fstuart%2FR%2Fwrite_rqtl.R="5B8691C7"
~%2Fstuart_package%2Fstuart%2Fvignettes%2FstuaRt.R="EBD625D2"
~%2Fstuart_package%2Fstuart%2Fvignettes%2FstuaRt.Rmd="D602FFE4"
## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
## ----setup--------------------------------------------------------------------
library(dplyr)
library(stuart)
## ----annot--------------------------------------------------------------------
annot_mini <- read.csv(url("https://raw.githubusercontent.com/kbroman/MUGAarrays/master/UWisc/mini_uwisc_v2.csv"))
## ----load---------------------------------------------------------------------
data(genos)
summary(genos)
data(phenos)
summary(phenos)
## ----strains------------------------------------------------------------------
strains <- geno_strains(ref=annot_mini,geno=genos,par1=c("StrainsA_1","StrainsA_2"),par2=c("StrainsB_1","StrainsB_2"),name1="parent1",name2="parent2")
head(strains)
## ----no_parent----------------------------------------------------------------
genos <- genos %>% filter(!Sample.ID %in% c("StrainsA_1", "StrainsA_2", "StrainsB_1","StrainsB_2"))
## ----tab_mark-----------------------------------------------------------------
data(stuart_tab)
summary(stuart_tab)
## ----mark_match---------------------------------------------------------------
tab2 <- mark_match(stuart_tab,ref=strains)
tab2 %>% filter(exclude_match==1)
## ----mark_poly ex-------------------------------------------------------------
tab2 <- mark_poly(tab2)
head(tab2)
## ----mark_prop ex-------------------------------------------------------------
tab2 <- mark_prop(tab2,cross="F2",homo=0.1,hetero=0.1)
head(tab2)
## ----mark_allele--------------------------------------------------------------
tab2 <- mark_allele(tab=tab2,ref=strains,par1="parent1",par2="parent2")
tab2 %>% arrange(desc(exclude_allele)) %>% head()
## ----mark_allele-strains------------------------------------------------------
strains %>% filter(marker %in% c("gJAX00038569","gJAX00425031","gUNC12245354","gUNC15530876","gUNC21555204","gUNC21596600")) %>% arrange(marker) %>% select(marker,parent1,parent2)
{
"collab_server" : "",
"contents" : "",
"created" : 1622535818738.000,
"dirty" : false,
"encoding" : "UTF-8",
"folds" : "",
"hash" : "0",
"id" : "45D91D58",
"lastKnownWriteTime" : 1622539449,
"last_content_update" : 1622539449402,
"path" : "~/stuart_package/stuart/R/write_rqtl.R",
"project_path" : "R/write_rqtl.R",
"properties" : {
"cursorPosition" : "100,50",
"scrollLine" : "96"
},
"read_only" : false,
"read_only_alternatives" : [
],
"relative_order" : 1,
"source_on_save" : false,
"source_window" : "",
"type" : "r_source"
}
\ No newline at end of file
#' @title Create data frame in Rqtl CSV format
#'
#' @description This function uses the table produced by tab_mark function filled by all the mark_* functions in order to create a data frame in the right format for Rqtl read.cross function. Only the non-excluded markers will be kept and genotypeds will be encoded in "0", "1" and "2", "0" being homozygous for the first parental strain, "1" heterozygous and "2" homozygous for the second parental strain. Caution, this file create a data frame and a CSV file in the path of your choice if indicated by the "path" argument. This function does not create a "cross" object in your environment that can be directly used for QTL mapping. You will need to load the CSV file with qtl::read.cross.
#' @param geno data frame with the genotyping results for your cross
#' @param pheno data frame with phenotypes of the individuals (individuals must have the same ID in the geno data frame and in the pheno data frame)
#' @param prefix potential prefix present in the names of the individuals in the geno data frame to be removed in ordere to have the same names as in the pheno file
#' @param tab data frame obtained with tab_mark function
#' @param ref data frame with the reference genotypes of mouse lines
#' @param par1 first parental strain used in the cross, the name must be written as in the "ref" data frame
#' @param par2 second parental strain used in the cross, the name must be written as in the "ref" data frame
#' @param pos column with marker positions
#' @param path if indicated, the data frame will be exported in this path
#'
#' @import dplyr
#' @import tidyr
#' @import utils
#' @import stringr
#'
#' @export
#'
#### write_rqtl ####
## write data frame in rqtl format (csv), if path != NA writes the file in the path indicated
write_rqtl <- function(geno,pheno,tab,ref,par1,par2,prefix,pos,path=NA){
#extract snps non excluded
if("exclude_match" %in% colnames(tab)){
tab <- tab %>% filter(exclude_match==0)
}
if("exclude_poly" %in% colnames(tab)){
tab <- tab %>% filter(exclude_poly==0)
}
if("exclude_prop" %in% colnames(tab)){
tab <- tab %>% filter(exclude_prop==0)
}
if("exclude_allele" %in% colnames(tab)){
tab <- tab %>% filter(exclude_allele==0)
}
#filter genotypes for non excluded markers in geno file
geno <- geno %>% select(c(SNP.Name,Sample.ID,Allele1...Forward,Allele2...Forward)) %>% filter(SNP.Name %in% tab$SNP.Name)
#recode parents' names to match column names nomenclature
par1 <- make.names(par1)
par2 <- make.names(par2)
#keep parental lines genotypes
colnames(ref) <- make.names(colnames(ref))
ref <- ref %>% select(marker,chr,bp_mm10,!!sym(pos),!!sym(par1),!!sym(par2))
#merge genotypes with parents
geno <- left_join(geno,ref,by=c("SNP.Name"="marker"))
#recode "-" in "N" in geno file
geno <- geno %>% mutate(Allele1...Forward = recode(Allele1...Forward,
"-" = "N"))
geno <- geno %>% mutate(Allele2...Forward = recode(Allele2...Forward,
"-" = "N"))
#recode geno in factors with same levels
geno <- geno %>% mutate(Allele1...Forward = factor(Allele1...Forward,levels=c("A","C","G","H","N","T")))
geno <- geno %>% mutate(Allele2...Forward = factor(Allele2...Forward,levels=c("A","C","G","H","N","T")))
#recode genotypes depending on parents' genotypes
geno <- geno %>% mutate(Geno = case_when(
#if one allele not genotyped:
Allele1...Forward=="N" | Allele2...Forward=="N" ~ "NA",
#if both alleles genotyped
##homozygous 0
Allele1...Forward==Allele2...Forward & Allele1...Forward==!!sym(par1) ~ "0",
##homozygous 2
Allele1...Forward==Allele2...Forward & Allele1...Forward==!!sym(par2) ~ "2",
##heterozygous
Allele1...Forward!=Allele2...Forward ~ "1",
#if parental strains are N/H
##homozygous for parent that is N/H
###homozygous 0
(!!sym(par1)%in%c("H","N") | !!sym(par2)%in%c("H","N")) &
!!sym(par1)%in%c("H","N") ~ "0",
###homozygous 2
(!!sym(par1)%in%c("H","N") | !!sym(par2)%in%c("H","N")) &
!!sym(par2)%in%c("H","N") ~ "2",
)
)
#keep positions of markers
markers <- geno %>% select(SNP.Name,chr,bp_mm10,!!sym(pos)) %>% distinct()
markers <- markers %>% arrange(chr,bp_mm10)
#keep only interesting columns in geno file
geno <- geno %>% arrange(chr,bp_mm10)
geno <- geno %>% select(SNP.Name,Sample.ID,Geno)
#remove prefix
geno <- geno %>% mutate(Sample.ID=str_remove(Sample.ID,prefix))
#keep only non excluded markers and merge with positions
markers <- markers %>% mutate(SNP.Name=as.character(SNP.Name))
markers <- markers %>% mutate(chr=as.character(chr))
geno <- markers %>% select(SNP.Name,chr,!!sym(pos)) %>% full_join(.,geno,by="SNP.Name")
#pivoting
geno <- geno %>% pivot_wider(names_from = c(SNP.Name,chr,!!sym(pos)),values_from = Geno,names_sep=",")
geno <- geno %>% mutate(Sample.ID=as.character(Sample.ID))
geno <- geno %>% rename("Sample.ID,,"=Sample.ID)
#merge with phenotype file
pheno <- pheno %>% mutate_all(as.character)
colnames(pheno) <- str_c(colnames(pheno),",,")
qtl_file <- right_join(pheno,geno,by=c("Ind,,"="Sample.ID,,"))
#prepare file
qtl_file <- rbind(colnames(qtl_file),qtl_file)
qtl_file <- separate_rows(qtl_file,everything(),sep=",")
colnames(qtl_file) <- qtl_file[1,]
qtl_file <- qtl_file %>% slice(-1)
if(is.na(path)==FALSE){
write.csv(qtl_file,file=path,quote=FALSE,row.names = FALSE)
}
return(qtl_file)
}
{
"collab_server" : "",
"contents" : "",
"created" : 1622538246256.000,
"dirty" : false,
"encoding" : "",
"folds" : "",
"hash" : "0",
"id" : "47AFB64",
"lastKnownWriteTime" : 140548509794308,
"last_content_update" : 1622538246256,
"path" : null,
"project_path" : null,
"properties" : {
"cacheKey" : "6294E01A",
"caption" : "annot_mini",
"contentUrl" : "grid_resource/gridviewer.html?env=&obj=annot_mini&cache_key=6294E01A",
"displayedObservations" : 11125,
"environment" : "",
"expression" : "annot_mini",
"object" : "annot_mini",
"preview" : 0,
"totalObservations" : 11125,
"variables" : 12
},
"read_only" : false,
"read_only_alternatives" : [
],
"relative_order" : 5,
"source_on_save" : false,
"source_window" : "",
"type" : "r_dataframe"
}
\ No newline at end of file
{
"collab_server" : "",
"contents" : "",
"created" : 1622538256440.000,
"dirty" : false,
"encoding" : "",
"folds" : "",
"hash" : "0",
"id" : "4A9D04E",
"lastKnownWriteTime" : 140548509794304,
"last_content_update" : 1622538256440,
"path" : null,
"project_path" : null,
"properties" : {
"cacheKey" : "2EEA0644",
"caption" : "strains",
"contentUrl" : "grid_resource/gridviewer.html?env=&obj=strains&cache_key=2EEA0644",
"displayedObservations" : 11125,
"environment" : "",
"expression" : "strains",
"object" : "strains",
"preview" : 0,
"totalObservations" : 11125,
"variables" : 14
},
"read_only" : false,
"read_only_alternatives" : [
],
"relative_order" : 6,
"source_on_save" : false,
"source_window" : "",
"type" : "r_dataframe"
}
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment