diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..5008ddfcf53c02e82d7eee2e57c38e5672ef89f6 Binary files /dev/null and b/.DS_Store differ diff --git a/.Rbuildignore b/.Rbuildignore new file mode 100644 index 0000000000000000000000000000000000000000..a32bacb46f8972e28df68f94556fe9596be2f74f --- /dev/null +++ b/.Rbuildignore @@ -0,0 +1,4 @@ +^.*\.Rproj$ +^\.Rproj\.user$ +^doc$ +^Meta$ diff --git a/.Rproj.user/9DAE6990/pcs/files-pane.pper b/.Rproj.user/9DAE6990/pcs/files-pane.pper new file mode 100644 index 0000000000000000000000000000000000000000..62dbe5e1d1674930d70678117198f25a2363ef11 --- /dev/null +++ b/.Rproj.user/9DAE6990/pcs/files-pane.pper @@ -0,0 +1,9 @@ +{ + "path" : "~/stuart_package/stuart", + "sortOrder" : [ + { + "ascending" : true, + "columnIndex" : 2 + } + ] +} \ No newline at end of file diff --git a/.Rproj.user/9DAE6990/pcs/source-pane.pper b/.Rproj.user/9DAE6990/pcs/source-pane.pper new file mode 100644 index 0000000000000000000000000000000000000000..1743e40fec30e357993d33f5cb053bf027524dc8 --- /dev/null +++ b/.Rproj.user/9DAE6990/pcs/source-pane.pper @@ -0,0 +1,3 @@ +{ + "activeTab" : 0 +} \ No newline at end of file diff --git a/.Rproj.user/9DAE6990/pcs/windowlayoutstate.pper b/.Rproj.user/9DAE6990/pcs/windowlayoutstate.pper new file mode 100644 index 0000000000000000000000000000000000000000..51a8bddc2368de4b56205ec1bcaf88f0626afb6d --- /dev/null +++ b/.Rproj.user/9DAE6990/pcs/windowlayoutstate.pper @@ -0,0 +1,14 @@ +{ + "left" : { + "panelheight" : 689, + "splitterpos" : 290, + "topwindowstate" : "NORMAL", + "windowheight" : 727 + }, + "right" : { + "panelheight" : 689, + "splitterpos" : 436, + "topwindowstate" : "NORMAL", + "windowheight" : 727 + } +} \ No newline at end of file diff --git a/.Rproj.user/9DAE6990/pcs/workbench-pane.pper b/.Rproj.user/9DAE6990/pcs/workbench-pane.pper new file mode 100644 index 0000000000000000000000000000000000000000..3eb507b3b7673752c5fcdc0c6072023af93d1b7e --- /dev/null +++ b/.Rproj.user/9DAE6990/pcs/workbench-pane.pper @@ -0,0 +1,6 @@ +{ + "TabSet1" : 3, + "TabSet2" : 3, + "TabZoom" : { + } +} \ No newline at end of file diff --git a/.Rproj.user/9DAE6990/persistent-state b/.Rproj.user/9DAE6990/persistent-state new file mode 100644 index 0000000000000000000000000000000000000000..5dca5416de0070e60c804242c2243fc7336ef63b --- /dev/null +++ b/.Rproj.user/9DAE6990/persistent-state @@ -0,0 +1,8 @@ +build-last-errors="[]" +build-last-errors-base-dir="~/stuart_package/stuart/" +build-last-outputs="[{\"output\":\"==> R CMD INSTALL --no-multiarch --with-keep.source stuart\\n\\n\",\"type\":0},{\"output\":\"* installing to library ‘/Library/Frameworks/R.framework/Versions/4.0/Resources/library’\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"* installing *source* package ‘stuart’ ...\\n\",\"type\":1},{\"output\":\"** using staged installation\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** R\\n\",\"type\":1},{\"output\":\"** data\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"*** moving datasets to lazyload DB\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** inst\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** byte-compile and prepare package for lazy loading\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** help\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"*** installing help indices\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** building package indices\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** installing vignettes\\n\",\"type\":1},{\"output\":\"** testing if installed package can be loaded from temporary location\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** testing if installed package can be loaded from final location\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** testing if installed package keeps a record of temporary installation path\\n\",\"type\":1},{\"output\":\"* DONE (stuart)\\n\",\"type\":1},{\"output\":\"\",\"type\":1}]" +compile_pdf_state="{\"errors\":[],\"output\":\"\",\"running\":false,\"tab_visible\":false,\"target_file\":\"\"}" +files.monitored-path="" +find-in-files-state="{\"handle\":\"\",\"input\":\"\",\"path\":\"\",\"regex\":true,\"results\":{\"file\":[],\"line\":[],\"lineValue\":[],\"matchOff\":[],\"matchOn\":[]},\"running\":false}" +imageDirtyState="1" +saveActionState="0" diff --git a/.Rproj.user/9DAE6990/rmd-outputs b/.Rproj.user/9DAE6990/rmd-outputs new file mode 100644 index 0000000000000000000000000000000000000000..3f2ff2d6cc8f257ffcade7ead1ca4042c0e884b9 --- /dev/null +++ b/.Rproj.user/9DAE6990/rmd-outputs @@ -0,0 +1,5 @@ + + + + + diff --git a/.Rproj.user/9DAE6990/saved_source_markers b/.Rproj.user/9DAE6990/saved_source_markers new file mode 100644 index 0000000000000000000000000000000000000000..2b1bef112ac6921abda6162a65dbfcd8c6d55c80 --- /dev/null +++ b/.Rproj.user/9DAE6990/saved_source_markers @@ -0,0 +1 @@ +{"active_set":"","sets":[]} \ No newline at end of file diff --git a/.Rproj.user/9DAE6990/sources/prop/5B8691C7 b/.Rproj.user/9DAE6990/sources/prop/5B8691C7 new file mode 100644 index 0000000000000000000000000000000000000000..a4a5143d39dc1582a0676eacc0c81004c72cfb4c --- /dev/null +++ b/.Rproj.user/9DAE6990/sources/prop/5B8691C7 @@ -0,0 +1,4 @@ +{ + "cursorPosition" : "100,50", + "scrollLine" : "96" +} \ No newline at end of file diff --git a/.Rproj.user/9DAE6990/sources/prop/D602FFE4 b/.Rproj.user/9DAE6990/sources/prop/D602FFE4 new file mode 100644 index 0000000000000000000000000000000000000000..f5350feff65e74dc29d2f6c9641ae80dc2edb47d --- /dev/null +++ b/.Rproj.user/9DAE6990/sources/prop/D602FFE4 @@ -0,0 +1,5 @@ +{ + "cursorPosition" : "128,16", + "last_setup_crc32" : "31136BFE5bfca283", + "scrollLine" : "123" +} \ No newline at end of file diff --git a/.Rproj.user/9DAE6990/sources/prop/EBD625D2 b/.Rproj.user/9DAE6990/sources/prop/EBD625D2 new file mode 100644 index 0000000000000000000000000000000000000000..7a73a41bfdf76d6f793007240d80983a52f15f97 --- /dev/null +++ b/.Rproj.user/9DAE6990/sources/prop/EBD625D2 @@ -0,0 +1,2 @@ +{ +} \ No newline at end of file diff --git a/.Rproj.user/9DAE6990/sources/prop/INDEX b/.Rproj.user/9DAE6990/sources/prop/INDEX new file mode 100644 index 0000000000000000000000000000000000000000..15912589cb2812bd7688aeec108d266eaee39ad4 --- /dev/null +++ b/.Rproj.user/9DAE6990/sources/prop/INDEX @@ -0,0 +1,3 @@ +~%2Fstuart_package%2Fstuart%2FR%2Fwrite_rqtl.R="5B8691C7" +~%2Fstuart_package%2Fstuart%2Fvignettes%2FstuaRt.R="EBD625D2" +~%2Fstuart_package%2Fstuart%2Fvignettes%2FstuaRt.Rmd="D602FFE4" diff --git a/.Rproj.user/9DAE6990/sources/s-31136BFE/32FAC663-contents b/.Rproj.user/9DAE6990/sources/s-31136BFE/32FAC663-contents new file mode 100644 index 0000000000000000000000000000000000000000..0de16312f453eccb1444ab3e78c69650103cee69 --- /dev/null +++ b/.Rproj.user/9DAE6990/sources/s-31136BFE/32FAC663-contents @@ -0,0 +1,51 @@ +## ---- include = FALSE--------------------------------------------------------- +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) + +## ----setup-------------------------------------------------------------------- +library(dplyr) +library(stuart) + +## ----annot-------------------------------------------------------------------- +annot_mini <- read.csv(url("https://raw.githubusercontent.com/kbroman/MUGAarrays/master/UWisc/mini_uwisc_v2.csv")) + +## ----load--------------------------------------------------------------------- +data(genos) +summary(genos) +data(phenos) +summary(phenos) + +## ----strains------------------------------------------------------------------ +strains <- geno_strains(ref=annot_mini,geno=genos,par1=c("StrainsA_1","StrainsA_2"),par2=c("StrainsB_1","StrainsB_2"),name1="parent1",name2="parent2") +head(strains) + +## ----no_parent---------------------------------------------------------------- +genos <- genos %>% filter(!Sample.ID %in% c("StrainsA_1", "StrainsA_2", "StrainsB_1","StrainsB_2")) + +## ----tab_mark----------------------------------------------------------------- +data(stuart_tab) +summary(stuart_tab) + +## ----mark_match--------------------------------------------------------------- +tab2 <- mark_match(stuart_tab,ref=strains) + + +tab2 %>% filter(exclude_match==1) + +## ----mark_poly ex------------------------------------------------------------- +tab2 <- mark_poly(tab2) +head(tab2) + +## ----mark_prop ex------------------------------------------------------------- +tab2 <- mark_prop(tab2,cross="F2",homo=0.1,hetero=0.1) +head(tab2) + +## ----mark_allele-------------------------------------------------------------- +tab2 <- mark_allele(tab=tab2,ref=strains,par1="parent1",par2="parent2") +tab2 %>% arrange(desc(exclude_allele)) %>% head() + +## ----mark_allele-strains------------------------------------------------------ +strains %>% filter(marker %in% c("gJAX00038569","gJAX00425031","gUNC12245354","gUNC15530876","gUNC21555204","gUNC21596600")) %>% arrange(marker) %>% select(marker,parent1,parent2) + diff --git a/.Rproj.user/9DAE6990/sources/s-31136BFE/45D91D58 b/.Rproj.user/9DAE6990/sources/s-31136BFE/45D91D58 new file mode 100644 index 0000000000000000000000000000000000000000..cac563747051098e41cdf9420259dfd117db1de0 --- /dev/null +++ b/.Rproj.user/9DAE6990/sources/s-31136BFE/45D91D58 @@ -0,0 +1,25 @@ +{ + "collab_server" : "", + "contents" : "", + "created" : 1622535818738.000, + "dirty" : false, + "encoding" : "UTF-8", + "folds" : "", + "hash" : "0", + "id" : "45D91D58", + "lastKnownWriteTime" : 1622539449, + "last_content_update" : 1622539449402, + "path" : "~/stuart_package/stuart/R/write_rqtl.R", + "project_path" : "R/write_rqtl.R", + "properties" : { + "cursorPosition" : "100,50", + "scrollLine" : "96" + }, + "read_only" : false, + "read_only_alternatives" : [ + ], + "relative_order" : 1, + "source_on_save" : false, + "source_window" : "", + "type" : "r_source" +} \ No newline at end of file diff --git a/.Rproj.user/9DAE6990/sources/s-31136BFE/45D91D58-contents b/.Rproj.user/9DAE6990/sources/s-31136BFE/45D91D58-contents new file mode 100644 index 0000000000000000000000000000000000000000..b6beb864d28ee5a4d533d5f9fb57a99499504522 --- /dev/null +++ b/.Rproj.user/9DAE6990/sources/s-31136BFE/45D91D58-contents @@ -0,0 +1,137 @@ +#' @title Create data frame in Rqtl CSV format +#' +#' @description This function uses the table produced by tab_mark function filled by all the mark_* functions in order to create a data frame in the right format for Rqtl read.cross function. Only the non-excluded markers will be kept and genotypeds will be encoded in "0", "1" and "2", "0" being homozygous for the first parental strain, "1" heterozygous and "2" homozygous for the second parental strain. Caution, this file create a data frame and a CSV file in the path of your choice if indicated by the "path" argument. This function does not create a "cross" object in your environment that can be directly used for QTL mapping. You will need to load the CSV file with qtl::read.cross. +#' @param geno data frame with the genotyping results for your cross +#' @param pheno data frame with phenotypes of the individuals (individuals must have the same ID in the geno data frame and in the pheno data frame) +#' @param prefix potential prefix present in the names of the individuals in the geno data frame to be removed in ordere to have the same names as in the pheno file +#' @param tab data frame obtained with tab_mark function +#' @param ref data frame with the reference genotypes of mouse lines +#' @param par1 first parental strain used in the cross, the name must be written as in the "ref" data frame +#' @param par2 second parental strain used in the cross, the name must be written as in the "ref" data frame +#' @param pos column with marker positions +#' @param path if indicated, the data frame will be exported in this path +#' +#' @import dplyr +#' @import tidyr +#' @import utils +#' @import stringr +#' +#' @export +#' +#### write_rqtl #### +## write data frame in rqtl format (csv), if path != NA writes the file in the path indicated +write_rqtl <- function(geno,pheno,tab,ref,par1,par2,prefix,pos,path=NA){ + #extract snps non excluded + if("exclude_match" %in% colnames(tab)){ + tab <- tab %>% filter(exclude_match==0) + } + + if("exclude_poly" %in% colnames(tab)){ + tab <- tab %>% filter(exclude_poly==0) + } + + if("exclude_prop" %in% colnames(tab)){ + tab <- tab %>% filter(exclude_prop==0) + } + + if("exclude_allele" %in% colnames(tab)){ + tab <- tab %>% filter(exclude_allele==0) + } + + + #filter genotypes for non excluded markers in geno file + geno <- geno %>% select(c(SNP.Name,Sample.ID,Allele1...Forward,Allele2...Forward)) %>% filter(SNP.Name %in% tab$SNP.Name) + + #recode parents' names to match column names nomenclature + par1 <- make.names(par1) + par2 <- make.names(par2) + + #keep parental lines genotypes + colnames(ref) <- make.names(colnames(ref)) + ref <- ref %>% select(marker,chr,bp_mm10,!!sym(pos),!!sym(par1),!!sym(par2)) + + #merge genotypes with parents + geno <- left_join(geno,ref,by=c("SNP.Name"="marker")) + + #recode "-" in "N" in geno file + geno <- geno %>% mutate(Allele1...Forward = recode(Allele1...Forward, + "-" = "N")) + + geno <- geno %>% mutate(Allele2...Forward = recode(Allele2...Forward, + "-" = "N")) + + #recode geno in factors with same levels + geno <- geno %>% mutate(Allele1...Forward = factor(Allele1...Forward,levels=c("A","C","G","H","N","T"))) + geno <- geno %>% mutate(Allele2...Forward = factor(Allele2...Forward,levels=c("A","C","G","H","N","T"))) + + + + #recode genotypes depending on parents' genotypes + geno <- geno %>% mutate(Geno = case_when( + #if one allele not genotyped: + Allele1...Forward=="N" | Allele2...Forward=="N" ~ "NA", + + #if both alleles genotyped + ##homozygous 0 + Allele1...Forward==Allele2...Forward & Allele1...Forward==!!sym(par1) ~ "0", + ##homozygous 2 + Allele1...Forward==Allele2...Forward & Allele1...Forward==!!sym(par2) ~ "2", + ##heterozygous + Allele1...Forward!=Allele2...Forward ~ "1", + + #if parental strains are N/H + ##homozygous for parent that is N/H + ###homozygous 0 + (!!sym(par1)%in%c("H","N") | !!sym(par2)%in%c("H","N")) & + !!sym(par1)%in%c("H","N") ~ "0", + ###homozygous 2 + (!!sym(par1)%in%c("H","N") | !!sym(par2)%in%c("H","N")) & + !!sym(par2)%in%c("H","N") ~ "2", + ) + ) + + + #keep positions of markers + markers <- geno %>% select(SNP.Name,chr,bp_mm10,!!sym(pos)) %>% distinct() + markers <- markers %>% arrange(chr,bp_mm10) + + + #keep only interesting columns in geno file + geno <- geno %>% arrange(chr,bp_mm10) + geno <- geno %>% select(SNP.Name,Sample.ID,Geno) + + #remove prefix + geno <- geno %>% mutate(Sample.ID=str_remove(Sample.ID,prefix)) + + #keep only non excluded markers and merge with positions + markers <- markers %>% mutate(SNP.Name=as.character(SNP.Name)) + markers <- markers %>% mutate(chr=as.character(chr)) + geno <- markers %>% select(SNP.Name,chr,!!sym(pos)) %>% full_join(.,geno,by="SNP.Name") + + + #pivoting + geno <- geno %>% pivot_wider(names_from = c(SNP.Name,chr,!!sym(pos)),values_from = Geno,names_sep=",") + geno <- geno %>% mutate(Sample.ID=as.character(Sample.ID)) + geno <- geno %>% rename("Sample.ID,,"=Sample.ID) + + + #merge with phenotype file + pheno <- pheno %>% mutate_all(as.character) + colnames(pheno) <- str_c(colnames(pheno),",,") + qtl_file <- right_join(pheno,geno,by=c("Ind,,"="Sample.ID,,")) + + #prepare file + qtl_file <- rbind(colnames(qtl_file),qtl_file) + qtl_file <- separate_rows(qtl_file,everything(),sep=",") + colnames(qtl_file) <- qtl_file[1,] + qtl_file <- qtl_file %>% slice(-1) + + if(is.na(path)==FALSE){ + write.csv(qtl_file,file=path,quote=FALSE,row.names = FALSE) + } + + + + + return(qtl_file) +} diff --git a/.Rproj.user/9DAE6990/sources/s-31136BFE/47AFB64 b/.Rproj.user/9DAE6990/sources/s-31136BFE/47AFB64 new file mode 100644 index 0000000000000000000000000000000000000000..c1f85bc43e30bd26f2e6eb6b25e45a632f2a5308 --- /dev/null +++ b/.Rproj.user/9DAE6990/sources/s-31136BFE/47AFB64 @@ -0,0 +1,33 @@ +{ + "collab_server" : "", + "contents" : "", + "created" : 1622538246256.000, + "dirty" : false, + "encoding" : "", + "folds" : "", + "hash" : "0", + "id" : "47AFB64", + "lastKnownWriteTime" : 140548509794308, + "last_content_update" : 1622538246256, + "path" : null, + "project_path" : null, + "properties" : { + "cacheKey" : "6294E01A", + "caption" : "annot_mini", + "contentUrl" : "grid_resource/gridviewer.html?env=&obj=annot_mini&cache_key=6294E01A", + "displayedObservations" : 11125, + "environment" : "", + "expression" : "annot_mini", + "object" : "annot_mini", + "preview" : 0, + "totalObservations" : 11125, + "variables" : 12 + }, + "read_only" : false, + "read_only_alternatives" : [ + ], + "relative_order" : 5, + "source_on_save" : false, + "source_window" : "", + "type" : "r_dataframe" +} \ No newline at end of file diff --git a/.Rproj.user/9DAE6990/sources/s-31136BFE/47AFB64-contents b/.Rproj.user/9DAE6990/sources/s-31136BFE/47AFB64-contents new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.Rproj.user/9DAE6990/sources/s-31136BFE/4A9D04E b/.Rproj.user/9DAE6990/sources/s-31136BFE/4A9D04E new file mode 100644 index 0000000000000000000000000000000000000000..cdd7e567a3e6e87d269613c206e7819b910c0dff --- /dev/null +++ b/.Rproj.user/9DAE6990/sources/s-31136BFE/4A9D04E @@ -0,0 +1,33 @@ +{ + "collab_server" : "", + "contents" : "", + "created" : 1622538256440.000, + "dirty" : false, + "encoding" : "", + "folds" : "", + "hash" : "0", + "id" : "4A9D04E", + "lastKnownWriteTime" : 140548509794304, + "last_content_update" : 1622538256440, + "path" : null, + "project_path" : null, + "properties" : { + "cacheKey" : "2EEA0644", + "caption" : "strains", + "contentUrl" : "grid_resource/gridviewer.html?env=&obj=strains&cache_key=2EEA0644", + "displayedObservations" : 11125, + "environment" : "", + "expression" : "strains", + "object" : "strains", + "preview" : 0, + "totalObservations" : 11125, + "variables" : 14 + }, + "read_only" : false, + "read_only_alternatives" : [ + ], + "relative_order" : 6, + "source_on_save" : false, + "source_window" : "", + "type" : "r_dataframe" +} \ No newline at end of file diff --git a/.Rproj.user/9DAE6990/sources/s-31136BFE/4A9D04E-contents b/.Rproj.user/9DAE6990/sources/s-31136BFE/4A9D04E-contents new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.Rproj.user/9DAE6990/sources/s-31136BFE/806AAC34 b/.Rproj.user/9DAE6990/sources/s-31136BFE/806AAC34 new file mode 100644 index 0000000000000000000000000000000000000000..031f6ed8272813e4623cf104db46ea9a751f25fd --- /dev/null +++ b/.Rproj.user/9DAE6990/sources/s-31136BFE/806AAC34 @@ -0,0 +1,33 @@ +{ + "collab_server" : "", + "contents" : "", + "created" : 1622538162413.000, + "dirty" : false, + "encoding" : "", + "folds" : "", + "hash" : "0", + "id" : "806AAC34", + "lastKnownWriteTime" : 5, + "last_content_update" : 1622538162413, + "path" : null, + "project_path" : null, + "properties" : { + "cacheKey" : "634A6953", + "caption" : "stuart_tab", + "contentUrl" : "grid_resource/gridviewer.html?env=&obj=stuart_tab&cache_key=634A6953", + "displayedObservations" : 11125, + "environment" : "", + "expression" : "stuart_tab", + "object" : "stuart_tab", + "preview" : 0, + "totalObservations" : 11125, + "variables" : 7 + }, + "read_only" : false, + "read_only_alternatives" : [ + ], + "relative_order" : 3, + "source_on_save" : false, + "source_window" : "", + "type" : "r_dataframe" +} \ No newline at end of file diff --git a/.Rproj.user/9DAE6990/sources/s-31136BFE/806AAC34-contents b/.Rproj.user/9DAE6990/sources/s-31136BFE/806AAC34-contents new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.Rproj.user/9DAE6990/sources/s-31136BFE/96AB3736 b/.Rproj.user/9DAE6990/sources/s-31136BFE/96AB3736 new file mode 100644 index 0000000000000000000000000000000000000000..55ffa9e750692d751a69e0cf3294b70e78ebca23 --- /dev/null +++ b/.Rproj.user/9DAE6990/sources/s-31136BFE/96AB3736 @@ -0,0 +1,26 @@ +{ + "collab_server" : "", + "contents" : "", + "created" : 1622537642060.000, + "dirty" : false, + "encoding" : "UTF-8", + "folds" : "", + "hash" : "0", + "id" : "96AB3736", + "lastKnownWriteTime" : 1622538645, + "last_content_update" : 1622538645004, + "path" : "~/stuart_package/stuart/vignettes/stuaRt.Rmd", + "project_path" : "vignettes/stuaRt.Rmd", + "properties" : { + "cursorPosition" : "128,16", + "last_setup_crc32" : "31136BFE5bfca283", + "scrollLine" : "123" + }, + "read_only" : false, + "read_only_alternatives" : [ + ], + "relative_order" : 2, + "source_on_save" : false, + "source_window" : "", + "type" : "r_markdown" +} \ No newline at end of file diff --git a/.Rproj.user/9DAE6990/sources/s-31136BFE/96AB3736-contents b/.Rproj.user/9DAE6990/sources/s-31136BFE/96AB3736-contents new file mode 100644 index 0000000000000000000000000000000000000000..92709f484cb21eb508c53c98fe3f975f8e90ee70 --- /dev/null +++ b/.Rproj.user/9DAE6990/sources/s-31136BFE/96AB3736-contents @@ -0,0 +1,134 @@ +--- +title: "stuart" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{stuart} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +Marie Bourdon + +April 2021 + +## Goal + +stuart is a R package which formats the genotyping data from MUGA arrays (Neogen) to use it in Rqtl, for backcross or F2 crosses. It allows to filter the markers in MUGA arrays that can or cannot be used for Rqtl analysis, from a genetic point of view. Indeed, markers will be selected depending on their proportion of each genotype, correspondance between F2 or N2 individuals alleles and parental strains alleles, etc. + +The examples shown here require the use of dplyr package. + + +```{r setup} +library(dplyr) +library(stuart) +``` + + +## Annotation files + +The developer of Rqtl and Rqtl2 packages, Karl Broman, realised that the annotation of the MUGA arrays was not correct for some markers. Thus, he produced new annotation files for MUGA, miniMUGA, megaMUGA and gigaMUGA arrays. These files contain some informations about the markers including the chromosome and position where the probe of the marker matchs on the genome, wether the marker maps uniquely or not, etc. These files also contains the genetic position of the markers calculated with two methods : "cM_cox" and "cM_g2f1" (see https://kbroman.org/MUGAarrays/mini_revisited.html for more informations). + +We recommand to use these annotation files to reconstruct the file use for Rqtl analysis. You can load the datasets with these annotations from GitHub (https://github.com/kbroman/MUGAarrays/tree/master/UWisc). Choose the file corresponding to the MUGA array that you used and use the URL to load the dataset in R. + +Here, we will present an example of the use of stuart with results of a F2 cross genotyped with miniMUGA. We load the result of Neogen genotyping: `genos` and thephenotype dataset produced by the lab: `phenos`. All these datasets are available for example in stuart package. + +```{r annot} +annot_mini <- read.csv(url("https://raw.githubusercontent.com/kbroman/MUGAarrays/master/UWisc/mini_uwisc_v2.csv")) +``` + + + +```{r load} +data(genos) +summary(genos) +data(phenos) +summary(phenos) +``` + +### Genotyping of parental strains + +To use genotyping result for Rqtl analysis, we need to recode the genotypes of the individuals (originally encoded in A, T, G, C) depending on the genotype of the parental strains: homozygous for the first parental strain (0), heterozygous (1) or homozygous for the second parental strain (2). + +We recommend to always genotype the parental strains of the cross. Here, their genotypes are in the `genos` file and correspond to the Sample.ID "StrainsA_1", "StrainsA_2", "StrainsB_1" and "StrainsB_2". Two individuals were genotyped for each parental strain. The first step will be to create a consensus genotype for each strain from the two genotyped individuals. The consensus genotype will be added to the annotation dataset in order to obtain a dataset with both annotation and reference genotype of the parental strains that will be used for recoding the genotypes or the F2 individuals. + +This is done with the `geno_strains` function. + +```{r strains} +strains <- geno_strains(ref=annot_mini,geno=genos,par1=c("StrainsA_1","StrainsA_2"),par2=c("StrainsB_1","StrainsB_2"),name1="parent1",name2="parent2") +head(strains) +``` + +After this step, we need to remove the genotyping result for these individuals from the `genos` dataset. +```{r no_parent} +genos <- genos %>% filter(!Sample.ID %in% c("StrainsA_1", "StrainsA_2", "StrainsB_1","StrainsB_2")) +``` + + +## Markers sorting + +### Marker tab + +The first step of the markers sorting is to create the marker dataframe with the tab_mark() function. This dataframe contains for each marker the two alleles that can be found in the F2/N2 population (`Allele_1` and `Allele_2`), the number of individuals for each genotype (homozygous for each allele (`n_HM1` and `n_HM2`) and heterozygous (`n_HT`)), and the number of non genotyped individuals (`n_NA`) This step can take several minutes. You can also load the output of this function. + + +```{r tab_mark} +data(stuart_tab) +summary(stuart_tab) +``` + +Then we will use the different mark_* functions in order to filter the markers. First, we can use mark_match() function. This function excludes markers that are in your genotype file but not in the reference genotype dataset. We recomend using this function as the chip used for genotyping may change. + +```{r mark_match} +tab2 <- mark_match(stuart_tab,ref=strains) + + +tab2 %>% filter(exclude_match==1) +``` + +Here the reference strains were genotyped with the same version of the chip as the F2 individuals so no marker was excluded. + +Then, we can use the mark_poly() function, which will exclude the markers that are not polymorphic. + +```{r mark_poly ex} +tab2 <- mark_poly(tab2) +head(tab2) +``` + +The mark_prop() function can be used to filter markers depending on the proportion of each genotype. Here, we have a F2 so we can use the "homo" argument in order to filter depending on the proportion of both homozygous genotype. If we have a N2, we can filter with the proportion of homozygous individuals with the "homo" argument and of heterozygous individuals with the hetero" argument. Moreover, this function allows to filter marker depending on the proportion on non genotyped animals. By defaults, markers for which more than 50% of individuals were not genotyped. + +```{r mark_prop ex} +tab2 <- mark_prop(tab2,cross="F2",homo=0.1,hetero=0.1) +head(tab2) +``` + +Last, we can use the mark_allele(). This very helpful function excludes markers for which the alleles found in the F2/N2 individuals do not correspond to the alleles found in the parental strains. For example, if for a marker is not polymorphic in the parental strains but we found two alleles in the F2/N2 individuals, it will be excluded. + +```{r mark_allele} +tab2 <- mark_allele(tab=tab2,ref=strains,par1="parent1",par2="parent2") +tab2 %>% arrange(desc(exclude_allele)) %>% head() +``` + +Indeed, we can see that the markers excluded with mark_allele() have different alleles in the parental strains. + +```{r mark_allele-strains} +strains %>% filter(marker %in% c("gJAX00038569","gJAX00425031","gUNC12245354","gUNC15530876","gUNC21555204","gUNC21596600")) %>% arrange(marker) %>% select(marker,parent1,parent2) +``` + +# Creation of the R/qtl file + +After excluding the problematic markers, we can create the R/qtl file. The individuals must have the same ID in the geno and in the pheno file. If there is a prefix in the geno file that must be removed in order to acheive this, you can use the "prefix" argument. The "path" argument can be used in order to create a CSV file that you can laod with qtl::read.cross. + +```{r write_qtl} +rqtl_file <- write_rqtl(geno=genos,pheno=phenos,tab=tab2,ref=strains,par1="parent1",par2="parent2",prefix="ind_",pos="cM_cox") + +rqtl_file[1:10,1:7] +``` + diff --git a/.Rproj.user/9DAE6990/sources/s-31136BFE/D49EE59C b/.Rproj.user/9DAE6990/sources/s-31136BFE/D49EE59C new file mode 100644 index 0000000000000000000000000000000000000000..8aa2e5b089172b514a4948648ddfeb25355a0027 --- /dev/null +++ b/.Rproj.user/9DAE6990/sources/s-31136BFE/D49EE59C @@ -0,0 +1,33 @@ +{ + "collab_server" : "", + "contents" : "", + "created" : 1622538242843.000, + "dirty" : false, + "encoding" : "", + "folds" : "", + "hash" : "0", + "id" : "D49EE59C", + "lastKnownWriteTime" : 140551864679664, + "last_content_update" : 1622538242843, + "path" : null, + "project_path" : null, + "properties" : { + "cacheKey" : "4C070D0B", + "caption" : "genos", + "contentUrl" : "grid_resource/gridviewer.html?env=&obj=genos&cache_key=4C070D0B", + "displayedObservations" : 1957993, + "environment" : "", + "expression" : "genos", + "object" : "genos", + "preview" : 0, + "totalObservations" : 1957993, + "variables" : 11 + }, + "read_only" : false, + "read_only_alternatives" : [ + ], + "relative_order" : 4, + "source_on_save" : false, + "source_window" : "", + "type" : "r_dataframe" +} \ No newline at end of file diff --git a/.Rproj.user/9DAE6990/sources/s-31136BFE/D49EE59C-contents b/.Rproj.user/9DAE6990/sources/s-31136BFE/D49EE59C-contents new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.Rproj.user/9DAE6990/sources/s-31136BFE/lock_file b/.Rproj.user/9DAE6990/sources/s-31136BFE/lock_file new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.Rproj.user/9DAE6990/viewer-cache/2EEA0644.Rdata b/.Rproj.user/9DAE6990/viewer-cache/2EEA0644.Rdata new file mode 100644 index 0000000000000000000000000000000000000000..07ae6d4fa26059437b3afec2ab8112037f0bf1f4 Binary files /dev/null and b/.Rproj.user/9DAE6990/viewer-cache/2EEA0644.Rdata differ diff --git a/.Rproj.user/9DAE6990/viewer-cache/4C070D0B.Rdata b/.Rproj.user/9DAE6990/viewer-cache/4C070D0B.Rdata new file mode 100644 index 0000000000000000000000000000000000000000..c7f59bfdcb70ea156f7c5abe0a7f47d4bdd858f8 Binary files /dev/null and b/.Rproj.user/9DAE6990/viewer-cache/4C070D0B.Rdata differ diff --git a/.Rproj.user/9DAE6990/viewer-cache/6294E01A.Rdata b/.Rproj.user/9DAE6990/viewer-cache/6294E01A.Rdata new file mode 100644 index 0000000000000000000000000000000000000000..6d9cbeb3f64ff88d5fcca84cc5ff97afd9cbb75f Binary files /dev/null and b/.Rproj.user/9DAE6990/viewer-cache/6294E01A.Rdata differ diff --git a/.Rproj.user/9DAE6990/viewer-cache/634A6953.Rdata b/.Rproj.user/9DAE6990/viewer-cache/634A6953.Rdata new file mode 100644 index 0000000000000000000000000000000000000000..ad0ad30a49aa244ba94b04ed2893c0739e7226ae Binary files /dev/null and b/.Rproj.user/9DAE6990/viewer-cache/634A6953.Rdata differ diff --git a/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/9DAE699031136BFE/chunks.json b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/9DAE699031136BFE/chunks.json new file mode 100644 index 0000000000000000000000000000000000000000..9a361141cd10311f5d382cad0d9b9c0904bc75de --- /dev/null +++ b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/9DAE699031136BFE/chunks.json @@ -0,0 +1 @@ +{"chunk_definitions":[{"chunk_id":"cugiprfbptcaw","chunk_label":"unnamed-chunk-1","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","include":false,"label":"unnamed-chunk-1"},"row":15,"row_count":1,"visible":true},{"chunk_id":"csetup_chunk","chunk_label":"setup","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","include":false,"label":"setup"},"row":31,"row_count":1,"visible":true},{"chunk_id":"ct8u35p5h48pa","chunk_label":"annot","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"annot"},"row":44,"row_count":1,"visible":true},{"chunk_id":"cystga685ux9r","chunk_label":"load","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"load"},"row":53,"row_count":1,"visible":true},{"chunk_id":"cc71rfo54vvou","chunk_label":"strains","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"strains"},"row":66,"row_count":1,"visible":true},{"chunk_id":"coar8mvardv1z","chunk_label":"no_parent","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"no_parent"},"row":71,"row_count":1,"visible":true},{"chunk_id":"cyqo4jk1414tp","chunk_label":"tab_mark","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"tab_mark"},"row":84,"row_count":1,"visible":true},{"chunk_id":"ci1zg9xosgth8","chunk_label":"mark_match","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_match"},"row":93,"row_count":1,"visible":true},{"chunk_id":"c4j6ei29p4187","chunk_label":"mark_poly ex","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_poly ex"},"row":102,"row_count":1,"visible":true},{"chunk_id":"cndnl4vh4xyj8","chunk_label":"mark_prop ex","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_prop ex"},"row":109,"row_count":1,"visible":true},{"chunk_id":"cgrscnwnyajvi","chunk_label":"mark_allele","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_allele"},"row":116,"row_count":1,"visible":true},{"chunk_id":"c45rvmci4gaoy","chunk_label":"mark_allele-strains","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_allele-strains"},"row":122,"row_count":1,"visible":true},{"chunk_id":"cv6d9nrsrzqfm","chunk_label":"write_qtl","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"write_qtl"},"row":132,"row_count":1,"visible":true}],"default_chunk_options":{},"doc_write_time":1622538563,"working_dir":null} \ No newline at end of file diff --git a/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/c45rvmci4gaoy/00000e.csv b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/c45rvmci4gaoy/00000e.csv new file mode 100644 index 0000000000000000000000000000000000000000..e0b8942f0f6d2e406a000445d2de1d3b4e6653b0 --- /dev/null +++ b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/c45rvmci4gaoy/00000e.csv @@ -0,0 +1 @@ +"0","strains %>% filter(marker %in% c(""gJAX00038569"",""gJAX00425031"",""gUNC12245354"",""gUNC15530876"",""gUNC21555204"",""gUNC21596600"")) %>% arrange(marker) %>% select(marker,parent1,parent2)" diff --git a/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/c45rvmci4gaoy/00000f.metadata b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/c45rvmci4gaoy/00000f.metadata new file mode 100644 index 0000000000000000000000000000000000000000..ec2c4d79915cb51b75bfe347815a707160949d0e --- /dev/null +++ b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/c45rvmci4gaoy/00000f.metadata @@ -0,0 +1 @@ +{"classes":["data.frame"],"ncol":3,"nrow":6} \ No newline at end of file diff --git a/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/c45rvmci4gaoy/00000f.rdf b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/c45rvmci4gaoy/00000f.rdf new file mode 100644 index 0000000000000000000000000000000000000000..19dc89bc8ba8f67f81cdb1ed0134cdcd7c2ce614 Binary files /dev/null and b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/c45rvmci4gaoy/00000f.rdf differ diff --git a/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/c4j6ei29p4187/00000e.csv b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/c4j6ei29p4187/00000e.csv new file mode 100644 index 0000000000000000000000000000000000000000..a472dbbdde0cc5e442424a891bd6c344e74bf76d --- /dev/null +++ b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/c4j6ei29p4187/00000e.csv @@ -0,0 +1,2 @@ +"0","tab2 <- mark_poly(tab2)" +"0","head(tab2)" diff --git a/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/c4j6ei29p4187/00000f.metadata b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/c4j6ei29p4187/00000f.metadata new file mode 100644 index 0000000000000000000000000000000000000000..0c1ed96e6119c430ceb61ff486f6009cdc3ed72b --- /dev/null +++ b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/c4j6ei29p4187/00000f.metadata @@ -0,0 +1 @@ +{"classes":["tbl_df","tbl","data.frame"],"ncol":9,"nrow":6} \ No newline at end of file diff --git a/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/c4j6ei29p4187/00000f.rdf b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/c4j6ei29p4187/00000f.rdf new file mode 100644 index 0000000000000000000000000000000000000000..1ee8e8c036a79b472092c20d315cf7b5c665b0b0 Binary files /dev/null and b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/c4j6ei29p4187/00000f.rdf differ diff --git a/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cc71rfo54vvou/000010.csv b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cc71rfo54vvou/000010.csv new file mode 100644 index 0000000000000000000000000000000000000000..f9373193d6b3c12fbfd69c2af9e7982cc0b6f748 --- /dev/null +++ b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cc71rfo54vvou/000010.csv @@ -0,0 +1,2 @@ +"0","strains <- geno_strains(ref=annot_mini,geno=genos,par1=c(""StrainsA_1"",""StrainsA_2""),par2=c(""StrainsB_1"",""StrainsB_2""),name1=""parent1"",name2=""parent2"")" +"0","head(strains)" diff --git a/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cc71rfo54vvou/000011.metadata b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cc71rfo54vvou/000011.metadata new file mode 100644 index 0000000000000000000000000000000000000000..785c904bbfba004ed6ad492f617f7efaefd05edd --- /dev/null +++ b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cc71rfo54vvou/000011.metadata @@ -0,0 +1 @@ +{"classes":["data.frame"],"ncol":14,"nrow":6} \ No newline at end of file diff --git a/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cc71rfo54vvou/000011.rdf b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cc71rfo54vvou/000011.rdf new file mode 100644 index 0000000000000000000000000000000000000000..3555bbc2181017323842a4e7d7a51c3bbfdfed00 Binary files /dev/null and b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cc71rfo54vvou/000011.rdf differ diff --git a/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cgrscnwnyajvi/00000e.csv b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cgrscnwnyajvi/00000e.csv new file mode 100644 index 0000000000000000000000000000000000000000..e74b7bdd18c9aca3c844c6fadf7e9c5f35f5e7cc --- /dev/null +++ b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cgrscnwnyajvi/00000e.csv @@ -0,0 +1,2 @@ +"0","tab2 <- mark_allele(tab=tab2,ref=strains,par1=""parent1"",par2=""parent2"")" +"0","tab2 %>% arrange(desc(exclude_allele)) %>% head()" diff --git a/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cgrscnwnyajvi/00000f.metadata b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cgrscnwnyajvi/00000f.metadata new file mode 100644 index 0000000000000000000000000000000000000000..eeb3cda550272b2a5342cf135b54c94d24052a0f --- /dev/null +++ b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cgrscnwnyajvi/00000f.metadata @@ -0,0 +1 @@ +{"classes":["tbl_df","tbl","data.frame"],"ncol":11,"nrow":6} \ No newline at end of file diff --git a/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cgrscnwnyajvi/00000f.rdf b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cgrscnwnyajvi/00000f.rdf new file mode 100644 index 0000000000000000000000000000000000000000..9aed4888fa27145560e2699ba308d22765125768 Binary files /dev/null and b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cgrscnwnyajvi/00000f.rdf differ diff --git a/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/chunks.json b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/chunks.json new file mode 100644 index 0000000000000000000000000000000000000000..9a361141cd10311f5d382cad0d9b9c0904bc75de --- /dev/null +++ b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/chunks.json @@ -0,0 +1 @@ +{"chunk_definitions":[{"chunk_id":"cugiprfbptcaw","chunk_label":"unnamed-chunk-1","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","include":false,"label":"unnamed-chunk-1"},"row":15,"row_count":1,"visible":true},{"chunk_id":"csetup_chunk","chunk_label":"setup","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","include":false,"label":"setup"},"row":31,"row_count":1,"visible":true},{"chunk_id":"ct8u35p5h48pa","chunk_label":"annot","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"annot"},"row":44,"row_count":1,"visible":true},{"chunk_id":"cystga685ux9r","chunk_label":"load","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"load"},"row":53,"row_count":1,"visible":true},{"chunk_id":"cc71rfo54vvou","chunk_label":"strains","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"strains"},"row":66,"row_count":1,"visible":true},{"chunk_id":"coar8mvardv1z","chunk_label":"no_parent","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"no_parent"},"row":71,"row_count":1,"visible":true},{"chunk_id":"cyqo4jk1414tp","chunk_label":"tab_mark","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"tab_mark"},"row":84,"row_count":1,"visible":true},{"chunk_id":"ci1zg9xosgth8","chunk_label":"mark_match","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_match"},"row":93,"row_count":1,"visible":true},{"chunk_id":"c4j6ei29p4187","chunk_label":"mark_poly ex","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_poly ex"},"row":102,"row_count":1,"visible":true},{"chunk_id":"cndnl4vh4xyj8","chunk_label":"mark_prop ex","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_prop ex"},"row":109,"row_count":1,"visible":true},{"chunk_id":"cgrscnwnyajvi","chunk_label":"mark_allele","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_allele"},"row":116,"row_count":1,"visible":true},{"chunk_id":"c45rvmci4gaoy","chunk_label":"mark_allele-strains","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_allele-strains"},"row":122,"row_count":1,"visible":true},{"chunk_id":"cv6d9nrsrzqfm","chunk_label":"write_qtl","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"write_qtl"},"row":132,"row_count":1,"visible":true}],"default_chunk_options":{},"doc_write_time":1622538563,"working_dir":null} \ No newline at end of file diff --git a/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/ci1zg9xosgth8/000011.csv b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/ci1zg9xosgth8/000011.csv new file mode 100644 index 0000000000000000000000000000000000000000..449e890eb8ddef10a3c44f22143460cfc327cd3a --- /dev/null +++ b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/ci1zg9xosgth8/000011.csv @@ -0,0 +1,4 @@ +"0","tab2 <- mark_match(stuart_tab,ref=strains)" +"0","" +"0","" +"0","tab2 %>% filter(exclude_match==1)" diff --git a/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/ci1zg9xosgth8/000012.metadata b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/ci1zg9xosgth8/000012.metadata new file mode 100644 index 0000000000000000000000000000000000000000..39d63b148f62d47607d952ba297b70c258ce94a5 --- /dev/null +++ b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/ci1zg9xosgth8/000012.metadata @@ -0,0 +1 @@ +{"classes":["tbl_df","tbl","data.frame"],"ncol":8,"nrow":0} \ No newline at end of file diff --git a/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/ci1zg9xosgth8/000012.rdf b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/ci1zg9xosgth8/000012.rdf new file mode 100644 index 0000000000000000000000000000000000000000..29735d32b12b977a22f679a708962e08b9477a71 Binary files /dev/null and b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/ci1zg9xosgth8/000012.rdf differ diff --git a/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cndnl4vh4xyj8/00000e.csv b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cndnl4vh4xyj8/00000e.csv new file mode 100644 index 0000000000000000000000000000000000000000..fe548bf35c02eb7aa036260ffc859252f738a25d --- /dev/null +++ b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cndnl4vh4xyj8/00000e.csv @@ -0,0 +1,2 @@ +"0","tab2 <- mark_prop(tab2,cross=""F2"",homo=0.1,hetero=0.1)" +"0","head(tab2)" diff --git a/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cndnl4vh4xyj8/00000f.metadata b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cndnl4vh4xyj8/00000f.metadata new file mode 100644 index 0000000000000000000000000000000000000000..cb629601be14008f55c58ec2dc91c3672f7ac8cd --- /dev/null +++ b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cndnl4vh4xyj8/00000f.metadata @@ -0,0 +1 @@ +{"classes":["tbl_df","tbl","data.frame"],"ncol":10,"nrow":6} \ No newline at end of file diff --git a/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cndnl4vh4xyj8/00000f.rdf b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cndnl4vh4xyj8/00000f.rdf new file mode 100644 index 0000000000000000000000000000000000000000..d1a49b5eedf5d57e934e3307b83fcc148b4d3552 Binary files /dev/null and b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cndnl4vh4xyj8/00000f.rdf differ diff --git a/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/coar8mvardv1z/000002.csv b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/coar8mvardv1z/000002.csv new file mode 100644 index 0000000000000000000000000000000000000000..ae32996095ddd4a5634e65cea490a3045c0e740b --- /dev/null +++ b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/coar8mvardv1z/000002.csv @@ -0,0 +1 @@ +"0","genos <- genos %>% filter(!Sample.ID %in% c(""StrainsA_1"", ""StrainsA_2"", ""StrainsB_1"",""StrainsB_2""))" diff --git a/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/csetup_chunk/000002.csv b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/csetup_chunk/000002.csv new file mode 100644 index 0000000000000000000000000000000000000000..1edd6be33ee4bd156324047e37e4fa25158aa4fd --- /dev/null +++ b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/csetup_chunk/000002.csv @@ -0,0 +1,2 @@ +"0","library(dplyr)" +"0","library(stuart)" diff --git a/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/ct8u35p5h48pa/000002.csv b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/ct8u35p5h48pa/000002.csv new file mode 100644 index 0000000000000000000000000000000000000000..d60d661c6e52cccece20691efd360d25d764c5b0 --- /dev/null +++ b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/ct8u35p5h48pa/000002.csv @@ -0,0 +1 @@ +"0","annot_mini <- read.csv(url(""https://raw.githubusercontent.com/kbroman/MUGAarrays/master/UWisc/mini_uwisc_v2.csv""))" diff --git a/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cugiprfbptcaw/000002.csv b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cugiprfbptcaw/000002.csv new file mode 100644 index 0000000000000000000000000000000000000000..56c8fe6a2ad810a56b647940468c76816c70636f --- /dev/null +++ b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cugiprfbptcaw/000002.csv @@ -0,0 +1,4 @@ +"0","knitr::opts_chunk$set(" +"0"," collapse = TRUE," +"0"," comment = ""#>""" +"0",")" diff --git a/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cv6d9nrsrzqfm/00001f.csv b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cv6d9nrsrzqfm/00001f.csv new file mode 100644 index 0000000000000000000000000000000000000000..7da078df8029ee5337e0f0fccd91fb39f8d96ca3 --- /dev/null +++ b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cv6d9nrsrzqfm/00001f.csv @@ -0,0 +1,3 @@ +"0","rqtl_file <- write_rqtl(geno=genos,pheno=phenos,tab=tab2,ref=strains,par1=""parent1"",par2=""parent2"",prefix=""ind_"",pos=""cM_cox"")" +"0","" +"0","rqtl_file[1:10,1:7]" diff --git a/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cv6d9nrsrzqfm/000020.metadata b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cv6d9nrsrzqfm/000020.metadata new file mode 100644 index 0000000000000000000000000000000000000000..825da23674df0a3455b3082fc8f3c96aeb486c34 --- /dev/null +++ b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cv6d9nrsrzqfm/000020.metadata @@ -0,0 +1 @@ +{"classes":["tbl_df","tbl","data.frame"],"ncol":7,"nrow":10} \ No newline at end of file diff --git a/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cv6d9nrsrzqfm/000020.rdf b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cv6d9nrsrzqfm/000020.rdf new file mode 100644 index 0000000000000000000000000000000000000000..f92cc82ac1146811a9837c7238592d3063c24369 Binary files /dev/null and b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cv6d9nrsrzqfm/000020.rdf differ diff --git a/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cyqo4jk1414tp/000002.csv b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cyqo4jk1414tp/000002.csv new file mode 100644 index 0000000000000000000000000000000000000000..7bcf3b6ee7edea558c8748560a60dbf1733987d7 --- /dev/null +++ b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cyqo4jk1414tp/000002.csv @@ -0,0 +1,81 @@ +"0","data(stuart_tab)" +"0","summary(stuart_tab)" +"1","" +"1"," SNP.Name " +"1"," Allele_1 " +"1"," Allele_2 " +"1"," +" +"1"," Length:11125 " +"1"," Length:11125 " +"1"," Length:11125 " +"1"," +" +"1"," Class :character " +"1"," Class :character " +"1"," Class :character " +"1"," +" +"1"," Mode :character " +"1"," Mode :character " +"1"," Mode :character " +"1"," +" +"1"," " +"1"," " +"1"," " +"1"," +" +"1"," " +"1"," " +"1"," " +"1"," +" +"1"," " +"1"," " +"1"," " +"1"," +" +"1","" +"1"," n_HM1 " +"1"," n_HM2 " +"1"," n_HT " +"1"," n_NA " +"1"," +" +"1"," Min. : 0.0 " +"1"," Min. : 0.00 " +"1"," Min. : 0.00 " +"1"," Min. : 0.00 " +"1"," +" +"1"," 1st Qu.: 44.0 " +"1"," 1st Qu.: 0.00 " +"1"," 1st Qu.: 0.00 " +"1"," 1st Qu.: 0.00 " +"1"," +" +"1"," Median :174.0 " +"1"," Median : 0.00 " +"1"," Median : 0.00 " +"1"," Median : 1.00 " +"1"," +" +"1"," Mean :123.9 " +"1"," Mean : 19.92 " +"1"," Mean : 19.24 " +"1"," Mean : 12.91 " +"1"," +" +"1"," 3rd Qu.:176.0 " +"1"," 3rd Qu.: 34.00 " +"1"," 3rd Qu.: 5.00 " +"1"," 3rd Qu.: 5.00 " +"1"," +" +"1"," Max. :176.0 " +"1"," Max. :175.00 " +"1"," Max. :176.00 " +"1"," Max. :176.00 " +"1"," +" diff --git a/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cystga685ux9r/000002.csv b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cystga685ux9r/000002.csv new file mode 100644 index 0000000000000000000000000000000000000000..b06568fb1cb9ef00e528b36682ee971d0f270364 --- /dev/null +++ b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cystga685ux9r/000002.csv @@ -0,0 +1,209 @@ +"0","data(genos)" +"0","summary(genos)" +"1","" +"1"," SNP.Name " +"1"," Sample.ID " +"1"," Allele1...Forward " +"1"," +" +"1"," Length:2002493 " +"1"," Length:2002493 " +"1"," Length:2002493 " +"1"," +" +"1"," Class :character " +"1"," Class :character " +"1"," Class :character " +"1"," +" +"1"," Mode :character " +"1"," Mode :character " +"1"," Mode :character " +"1"," +" +"1"," " +"1"," " +"1"," " +"1"," +" +"1"," " +"1"," " +"1"," " +"1"," +" +"1"," " +"1"," " +"1"," " +"1"," +" +"1"," " +"1"," " +"1"," " +"1"," +" +"1","" +"1"," Allele2...Forward " +"1"," X " +"1"," Y " +"1"," +" +"1"," Length:2002493 " +"1"," Min. : 0.0000 " +"1"," Min. : 0.0000 " +"1"," +" +"1"," Class :character " +"1"," 1st Qu.: 0.0260 " +"1"," 1st Qu.: 0.0480 " +"1"," +" +"1"," Mode :character " +"1"," Median : 0.1750 " +"1"," Median : 0.5010 " +"1"," +" +"1"," " +"1"," Mean : 0.5716 " +"1"," Mean : 0.6564 " +"1"," +" +"1"," " +"1"," 3rd Qu.: 0.8560 " +"1"," 3rd Qu.: 1.0470 " +"1"," +" +"1"," " +"1"," Max. :25.1610 " +"1"," Max. :20.7940 " +"1"," +" +"1"," " +"1"," NA's :87 " +"1"," NA's :87 " +"1"," +" +"1","" +"1"," GC.Score " +"1"," Theta " +"1"," X.Raw " +"1"," Y.Raw " +"1"," +" +"1"," Min. :0.0000 " +"1"," Min. :0.0000 " +"1"," Min. : 0 " +"1"," Min. : 0 " +"1"," +" +"1"," 1st Qu.:0.6747 " +"1"," 1st Qu.:0.0420 " +"1"," 1st Qu.: 546 " +"1"," 1st Qu.: 678 " +"1"," +" +"1"," Median :0.8550 " +"1"," Median :0.6620 " +"1"," Median : 1611 " +"1"," Median : 3908 " +"1"," +" +"1"," Mean :0.7276 " +"1"," Mean :0.5606 " +"1"," Mean : 5265 " +"1"," Mean : 5578 " +"1"," +" +"1"," 3rd Qu.:0.9245 " +"1"," 3rd Qu.:0.9800 " +"1"," 3rd Qu.: 7924 " +"1"," 3rd Qu.: 9232 " +"1"," +" +"1"," Max. :0.9889 " +"1"," Max. :1.0000 " +"1"," Max. :49906 " +"1"," Max. :43935 " +"1"," +" +"1"," NA's :326 " +"1"," NA's :413 " +"1"," " +"1"," " +"1"," +" +"1","" +"1"," R " +"1"," +" +"1"," Min. : 0.000 " +"1"," +" +"1"," 1st Qu.: 0.605 " +"1"," +" +"1"," Median : 1.027 " +"1"," +" +"1"," Mean : 1.228 " +"1"," +" +"1"," 3rd Qu.: 1.553 " +"1"," +" +"1"," Max. :26.041 " +"1"," +" +"1"," NA's :413 " +"1"," +" +"0","data(phenos)" +"0","summary(phenos)" +"1","" +"1"," Ind " +"1"," Sex " +"1"," Age " +"1"," Pheno " +"1"," +" +"1"," 201 : 1 " +"1"," Length:176 " +"1"," Min. :5.000 " +"1"," Min. : 8.609 " +"1"," +" +"1"," 202 : 1 " +"1"," Class :character " +"1"," 1st Qu.:7.000 " +"1"," 1st Qu.:10.420 " +"1"," +" +"1"," 203 : 1 " +"1"," Mode :character " +"1"," Median :7.000 " +"1"," Median :10.628 " +"1"," +" +"1"," 204 : 1 " +"1"," " +"1"," Mean :7.176 " +"1"," Mean :10.526 " +"1"," +" +"1"," 205 : 1 " +"1"," " +"1"," 3rd Qu.:8.000 " +"1"," 3rd Qu.:10.793 " +"1"," +" +"1"," 206 : 1 " +"1"," " +"1"," Max. :9.000 " +"1"," Max. :11.147 " +"1"," +" +"1"," (Other):170 " +"1"," " +"1"," " +"1"," " +"1"," +" diff --git a/.Rproj.user/shared/notebooks/patch-chunk-names b/.Rproj.user/shared/notebooks/patch-chunk-names new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.Rproj.user/shared/notebooks/paths b/.Rproj.user/shared/notebooks/paths new file mode 100644 index 0000000000000000000000000000000000000000..087641a55ee28f04f32985d6fd367387560a8391 --- /dev/null +++ b/.Rproj.user/shared/notebooks/paths @@ -0,0 +1,2 @@ +/Users/mariebourdon/stuart_package/stuart/vignettes/stuaRt.R="6C8CAE13" +/Users/mariebourdon/stuart_package/stuart/vignettes/stuaRt.Rmd="4D49CCFD" diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..00b31dc8122796441fafc7815eb71f44411a5dd5 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +doc +Meta diff --git a/R/write_rqtl.R b/R/write_rqtl.R index b02eb333df6c67886b4a8245fe181dab8e923155..b6beb864d28ee5a4d533d5f9fb57a99499504522 100755 --- a/R/write_rqtl.R +++ b/R/write_rqtl.R @@ -8,7 +8,7 @@ #' @param ref data frame with the reference genotypes of mouse lines #' @param par1 first parental strain used in the cross, the name must be written as in the "ref" data frame #' @param par2 second parental strain used in the cross, the name must be written as in the "ref" data frame -#' @param method method of calculation of cM position, can be "cM_cox" of "cM_g2f1" +#' @param pos column with marker positions #' @param path if indicated, the data frame will be exported in this path #' #' @import dplyr @@ -20,7 +20,7 @@ #' #### write_rqtl #### ## write data frame in rqtl format (csv), if path != NA writes the file in the path indicated -write_rqtl <- function(geno,pheno,tab,ref,par1,par2,prefix,method="cM_cox",path=NA){ +write_rqtl <- function(geno,pheno,tab,ref,par1,par2,prefix,pos,path=NA){ #extract snps non excluded if("exclude_match" %in% colnames(tab)){ tab <- tab %>% filter(exclude_match==0) @@ -48,7 +48,7 @@ write_rqtl <- function(geno,pheno,tab,ref,par1,par2,prefix,method="cM_cox",path= #keep parental lines genotypes colnames(ref) <- make.names(colnames(ref)) - ref <- ref %>% select(marker,chr,bp_mm10,cM_cox,cM_g2f1,!!sym(par1),!!sym(par2)) + ref <- ref %>% select(marker,chr,bp_mm10,!!sym(pos),!!sym(par1),!!sym(par2)) #merge genotypes with parents geno <- left_join(geno,ref,by=c("SNP.Name"="marker")) @@ -92,7 +92,7 @@ write_rqtl <- function(geno,pheno,tab,ref,par1,par2,prefix,method="cM_cox",path= #keep positions of markers - markers <- geno %>% select(SNP.Name,chr,bp_mm10,cM_cox,cM_g2f1) %>% distinct() + markers <- geno %>% select(SNP.Name,chr,bp_mm10,!!sym(pos)) %>% distinct() markers <- markers %>% arrange(chr,bp_mm10) @@ -106,11 +106,11 @@ write_rqtl <- function(geno,pheno,tab,ref,par1,par2,prefix,method="cM_cox",path= #keep only non excluded markers and merge with positions markers <- markers %>% mutate(SNP.Name=as.character(SNP.Name)) markers <- markers %>% mutate(chr=as.character(chr)) - geno <- markers %>% select(SNP.Name,chr,!!sym(method)) %>% full_join(.,geno,by="SNP.Name") + geno <- markers %>% select(SNP.Name,chr,!!sym(pos)) %>% full_join(.,geno,by="SNP.Name") #pivoting - geno <- geno %>% pivot_wider(names_from = c(SNP.Name,chr,!!sym(method)),values_from = Geno,names_sep=",") + geno <- geno %>% pivot_wider(names_from = c(SNP.Name,chr,!!sym(pos)),values_from = Geno,names_sep=",") geno <- geno %>% mutate(Sample.ID=as.character(Sample.ID)) geno <- geno %>% rename("Sample.ID,,"=Sample.ID) diff --git a/man/write_rqtl.Rd b/man/write_rqtl.Rd index 77fb9302cf40dd2bff55569500ef452f85e9214b..0a52b05dbafb614aa89bd3209a8d3f86d255b50e 100755 --- a/man/write_rqtl.Rd +++ b/man/write_rqtl.Rd @@ -4,17 +4,7 @@ \alias{write_rqtl} \title{Create data frame in Rqtl CSV format} \usage{ -write_rqtl( - geno, - pheno, - tab, - ref, - par1, - par2, - prefix, - method = "cM_cox", - path = NA -) +write_rqtl(geno, pheno, tab, ref, par1, par2, prefix, pos, path = NA) } \arguments{ \item{geno}{data frame with the genotyping results for your cross} @@ -31,7 +21,7 @@ write_rqtl( \item{prefix}{potential prefix present in the names of the individuals in the geno data frame to be removed in ordere to have the same names as in the pheno file} -\item{method}{method of calculation of cM position, can be "cM_cox" of "cM_g2f1"} +\item{pos}{column with marker positions} \item{path}{if indicated, the data frame will be exported in this path} } diff --git a/stuart_0.1.0.tar.gz b/stuart_0.1.0.tar.gz index 140b8ed060e56fe4f8b1241a40f6514e0bceb3dc..b975259ba39e1b147de97d89b118dd258f90a024 100644 Binary files a/stuart_0.1.0.tar.gz and b/stuart_0.1.0.tar.gz differ diff --git a/vignettes/stuaRt.Rmd b/vignettes/stuaRt.Rmd index 4e294ab0cc3558b6b84acf6a660e790a6c6522f4..92709f484cb21eb508c53c98fe3f975f8e90ee70 100755 --- a/vignettes/stuaRt.Rmd +++ b/vignettes/stuaRt.Rmd @@ -1,8 +1,8 @@ --- -title: "stuaRt" +title: "stuart" output: rmarkdown::html_vignette vignette: > - %\VignetteIndexEntry{stuaRt} + %\VignetteIndexEntry{stuart} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- @@ -21,7 +21,7 @@ April 2021 ## Goal -stuaRt is a R package which formats the genotyping data from MUGA arrays (Neogen) to use it in Rqtl, for backcross or F2 crosses. It allows to filter the markers in MUGA arrays that can or cannot be used for Rqtl analysis, from a genetic point of view. Indeed, markers will be selected depending on their proportion of each genotype, correspondance between F2 or N2 individuals alleles and parental strains alleles, etc. +stuart is a R package which formats the genotyping data from MUGA arrays (Neogen) to use it in Rqtl, for backcross or F2 crosses. It allows to filter the markers in MUGA arrays that can or cannot be used for Rqtl analysis, from a genetic point of view. Indeed, markers will be selected depending on their proportion of each genotype, correspondance between F2 or N2 individuals alleles and parental strains alleles, etc. The examples shown here require the use of dplyr package. @@ -38,7 +38,7 @@ The developer of Rqtl and Rqtl2 packages, Karl Broman, realised that the annotat We recommand to use these annotation files to reconstruct the file use for Rqtl analysis. You can load the datasets with these annotations from GitHub (https://github.com/kbroman/MUGAarrays/tree/master/UWisc). Choose the file corresponding to the MUGA array that you used and use the URL to load the dataset in R. -Here, we will present an example of the use of stuaRt with results of a F2 cross genotyped with miniMUGA. We load the result of Neogen genotyping: `genos` and thephenotype dataset produced by the lab: `phenos`. All these datasets are available for example in stuaRt package. +Here, we will present an example of the use of stuart with results of a F2 cross genotyped with miniMUGA. We load the result of Neogen genotyping: `genos` and thephenotype dataset produced by the lab: `phenos`. All these datasets are available for example in stuart package. ```{r annot} annot_mini <- read.csv(url("https://raw.githubusercontent.com/kbroman/MUGAarrays/master/UWisc/mini_uwisc_v2.csv")) @@ -127,7 +127,7 @@ strains %>% filter(marker %in% c("gJAX00038569","gJAX00425031","gUNC12245354","g After excluding the problematic markers, we can create the R/qtl file. The individuals must have the same ID in the geno and in the pheno file. If there is a prefix in the geno file that must be removed in order to acheive this, you can use the "prefix" argument. The "path" argument can be used in order to create a CSV file that you can laod with qtl::read.cross. ```{r write_qtl} -rqtl_file <- (write_rqtl(geno=genos,pheno=phenos,tab=tab2,ref=strains,par1="parent1",par2="parent2",prefix="ind_")) +rqtl_file <- write_rqtl(geno=genos,pheno=phenos,tab=tab2,ref=strains,par1="parent1",par2="parent2",prefix="ind_",pos="cM_cox") rqtl_file[1:10,1:7] ```