diff --git a/.Rproj.user/9DAE6990/pcs/debug-breakpoints.pper b/.Rproj.user/9DAE6990/pcs/debug-breakpoints.pper new file mode 100644 index 0000000000000000000000000000000000000000..5528aea2ba53c044f500e2c6c10b5679ec78cffc --- /dev/null +++ b/.Rproj.user/9DAE6990/pcs/debug-breakpoints.pper @@ -0,0 +1,6 @@ +{ + "debugBreakpointsState" : { + "breakpoints" : [ + ] + } +} \ No newline at end of file diff --git a/.Rproj.user/9DAE6990/pcs/source-pane.pper b/.Rproj.user/9DAE6990/pcs/source-pane.pper index 3249574fb10b23a53547595769fe33dcbefc247e..1743e40fec30e357993d33f5cb053bf027524dc8 100644 --- a/.Rproj.user/9DAE6990/pcs/source-pane.pper +++ b/.Rproj.user/9DAE6990/pcs/source-pane.pper @@ -1,3 +1,3 @@ { - "activeTab" : 2 + "activeTab" : 0 } \ No newline at end of file diff --git a/.Rproj.user/9DAE6990/pcs/windowlayoutstate.pper b/.Rproj.user/9DAE6990/pcs/windowlayoutstate.pper index 51a8bddc2368de4b56205ec1bcaf88f0626afb6d..bc18be10f83a2abcc1dc870217f08f663eef0adf 100644 --- a/.Rproj.user/9DAE6990/pcs/windowlayoutstate.pper +++ b/.Rproj.user/9DAE6990/pcs/windowlayoutstate.pper @@ -1,14 +1,14 @@ { "left" : { - "panelheight" : 689, - "splitterpos" : 290, + "panelheight" : 1271, + "splitterpos" : 533, "topwindowstate" : "NORMAL", - "windowheight" : 727 + "windowheight" : 1309 }, "right" : { - "panelheight" : 689, - "splitterpos" : 436, + "panelheight" : 1271, + "splitterpos" : 804, "topwindowstate" : "NORMAL", - "windowheight" : 727 + "windowheight" : 1309 } } \ No newline at end of file diff --git a/.Rproj.user/9DAE6990/pcs/workbench-pane.pper b/.Rproj.user/9DAE6990/pcs/workbench-pane.pper index 3eb507b3b7673752c5fcdc0c6072023af93d1b7e..dd00eb66589a93dc361ab2526cb1a958c2b41bcb 100644 --- a/.Rproj.user/9DAE6990/pcs/workbench-pane.pper +++ b/.Rproj.user/9DAE6990/pcs/workbench-pane.pper @@ -1,6 +1,6 @@ { "TabSet1" : 3, - "TabSet2" : 3, + "TabSet2" : 4, "TabZoom" : { } } \ No newline at end of file diff --git a/.Rproj.user/9DAE6990/persistent-state b/.Rproj.user/9DAE6990/persistent-state index 5dca5416de0070e60c804242c2243fc7336ef63b..9d859cb1b1a467561a68e55e57eae088ad344f30 100644 --- a/.Rproj.user/9DAE6990/persistent-state +++ b/.Rproj.user/9DAE6990/persistent-state @@ -1,6 +1,6 @@ build-last-errors="[]" build-last-errors-base-dir="~/stuart_package/stuart/" -build-last-outputs="[{\"output\":\"==> R CMD INSTALL --no-multiarch --with-keep.source stuart\\n\\n\",\"type\":0},{\"output\":\"* installing to library ‘/Library/Frameworks/R.framework/Versions/4.0/Resources/library’\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"* installing *source* package ‘stuart’ ...\\n\",\"type\":1},{\"output\":\"** using staged installation\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** R\\n\",\"type\":1},{\"output\":\"** data\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"*** moving datasets to lazyload DB\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** inst\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** byte-compile and prepare package for lazy loading\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** help\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"*** installing help indices\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** building package indices\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** installing vignettes\\n\",\"type\":1},{\"output\":\"** testing if installed package can be loaded from temporary location\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** testing if installed package can be loaded from final location\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** testing if installed package keeps a record of temporary installation path\\n\",\"type\":1},{\"output\":\"* DONE (stuart)\\n\",\"type\":1},{\"output\":\"\",\"type\":1}]" +build-last-outputs="[{\"output\":\"==> R CMD INSTALL --no-multiarch --with-keep.source stuart\\n\\n\",\"type\":0},{\"output\":\"* installing to library ‘/Library/Frameworks/R.framework/Versions/4.0/Resources/library’\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"* installing *source* package ‘stuart’ ...\\n\",\"type\":1},{\"output\":\"** using staged installation\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** R\\n\",\"type\":1},{\"output\":\"** data\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"*** moving datasets to lazyload DB\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** inst\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** byte-compile and prepare package for lazy loading\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** help\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"*** installing help indices\\n\",\"type\":1},{\"output\":\"** building package indices\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** installing vignettes\\n\",\"type\":1},{\"output\":\"** testing if installed package can be loaded from temporary location\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** testing if installed package can be loaded from final location\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** testing if installed package keeps a record of temporary installation path\\n\",\"type\":1},{\"output\":\"* DONE (stuart)\\n\",\"type\":1},{\"output\":\"\",\"type\":1}]" compile_pdf_state="{\"errors\":[],\"output\":\"\",\"running\":false,\"tab_visible\":false,\"target_file\":\"\"}" files.monitored-path="" find-in-files-state="{\"handle\":\"\",\"input\":\"\",\"path\":\"\",\"regex\":true,\"results\":{\"file\":[],\"line\":[],\"lineValue\":[],\"matchOff\":[],\"matchOn\":[]},\"running\":false}" diff --git a/.Rproj.user/9DAE6990/rmd-outputs b/.Rproj.user/9DAE6990/rmd-outputs index 3f2ff2d6cc8f257ffcade7ead1ca4042c0e884b9..dd18eccd1a62da6163f019b613c98281f6147ebc 100644 --- a/.Rproj.user/9DAE6990/rmd-outputs +++ b/.Rproj.user/9DAE6990/rmd-outputs @@ -1,5 +1,7 @@ +/private/var/folders/dn/j71yz2tn5_gdffs8fqxhddrr0000gn/T/Rtmp3VMULh/preview-1ced414f7aab.dir/stuaRt.html +/private/var/folders/dn/j71yz2tn5_gdffs8fqxhddrr0000gn/T/Rtmp3VMULh/preview-1ced48fe920c.dir/stuaRt.html - +/private/var/folders/dn/j71yz2tn5_gdffs8fqxhddrr0000gn/T/RtmpNme5vw/preview-4c4321234d03.dir/stuaRt.html diff --git a/.Rproj.user/9DAE6990/sources/prop/1416C0B5 b/.Rproj.user/9DAE6990/sources/prop/1416C0B5 new file mode 100644 index 0000000000000000000000000000000000000000..a3b2e9f1972ab5f437c3782dab851521fe0d6f46 --- /dev/null +++ b/.Rproj.user/9DAE6990/sources/prop/1416C0B5 @@ -0,0 +1,4 @@ +{ + "cursorPosition" : "23,55", + "scrollLine" : "0" +} \ No newline at end of file diff --git a/.Rproj.user/9DAE6990/sources/prop/3A328548 b/.Rproj.user/9DAE6990/sources/prop/3A328548 index 7a73a41bfdf76d6f793007240d80983a52f15f97..a5638714b0481f1423ea85365c8af64243a71277 100644 --- a/.Rproj.user/9DAE6990/sources/prop/3A328548 +++ b/.Rproj.user/9DAE6990/sources/prop/3A328548 @@ -1,2 +1,4 @@ { + "cursorPosition" : "9,51", + "scrollLine" : "0" } \ No newline at end of file diff --git a/.Rproj.user/9DAE6990/sources/prop/5B8691C7 b/.Rproj.user/9DAE6990/sources/prop/5B8691C7 index 1f3aca9005e6c2bfeb0a9b999c80078a7d57fb23..dd976e56d512c526a7b5d85f156ef0835b1871df 100644 --- a/.Rproj.user/9DAE6990/sources/prop/5B8691C7 +++ b/.Rproj.user/9DAE6990/sources/prop/5B8691C7 @@ -1,4 +1,4 @@ { - "cursorPosition" : "48,32", - "scrollLine" : "33" + "cursorPosition" : "84,18", + "scrollLine" : "94" } \ No newline at end of file diff --git a/.Rproj.user/9DAE6990/sources/prop/65312719 b/.Rproj.user/9DAE6990/sources/prop/65312719 new file mode 100644 index 0000000000000000000000000000000000000000..7a73a41bfdf76d6f793007240d80983a52f15f97 --- /dev/null +++ b/.Rproj.user/9DAE6990/sources/prop/65312719 @@ -0,0 +1,2 @@ +{ +} \ No newline at end of file diff --git a/.Rproj.user/9DAE6990/sources/prop/7411866 b/.Rproj.user/9DAE6990/sources/prop/7411866 new file mode 100644 index 0000000000000000000000000000000000000000..5ba20d4665c8e92b885b11b7eb579f858494350a --- /dev/null +++ b/.Rproj.user/9DAE6990/sources/prop/7411866 @@ -0,0 +1,4 @@ +{ + "cursorPosition" : "8,12", + "scrollLine" : "0" +} \ No newline at end of file diff --git a/.Rproj.user/9DAE6990/sources/prop/75F49619 b/.Rproj.user/9DAE6990/sources/prop/75F49619 new file mode 100644 index 0000000000000000000000000000000000000000..4c067597f326f93a2178a77bfffe89df9d1ceed1 --- /dev/null +++ b/.Rproj.user/9DAE6990/sources/prop/75F49619 @@ -0,0 +1,4 @@ +{ + "cursorPosition" : "20,46", + "scrollLine" : "0" +} \ No newline at end of file diff --git a/.Rproj.user/9DAE6990/sources/prop/7FA3B215 b/.Rproj.user/9DAE6990/sources/prop/7FA3B215 new file mode 100644 index 0000000000000000000000000000000000000000..94c7b77269eb489932ce52fa9bfafa4d9499431d --- /dev/null +++ b/.Rproj.user/9DAE6990/sources/prop/7FA3B215 @@ -0,0 +1,5 @@ +{ + "cursorPosition" : "43,27", + "scrollLine" : "29", + "source_window_id" : "" +} \ No newline at end of file diff --git a/.Rproj.user/9DAE6990/sources/prop/8F7B714A b/.Rproj.user/9DAE6990/sources/prop/8F7B714A new file mode 100644 index 0000000000000000000000000000000000000000..178047270b28ae5a314194a4d204cf820f8cc815 --- /dev/null +++ b/.Rproj.user/9DAE6990/sources/prop/8F7B714A @@ -0,0 +1,4 @@ +{ + "cursorPosition" : "22,39", + "scrollLine" : "7" +} \ No newline at end of file diff --git a/.Rproj.user/9DAE6990/sources/prop/A609F054 b/.Rproj.user/9DAE6990/sources/prop/A609F054 new file mode 100644 index 0000000000000000000000000000000000000000..5f0afe0648b6cd7813c3d9cd265fd84d876fa506 --- /dev/null +++ b/.Rproj.user/9DAE6990/sources/prop/A609F054 @@ -0,0 +1,4 @@ +{ + "cursorPosition" : "2,104", + "scrollLine" : "0" +} \ No newline at end of file diff --git a/.Rproj.user/9DAE6990/sources/prop/C10FF5C8 b/.Rproj.user/9DAE6990/sources/prop/C10FF5C8 new file mode 100644 index 0000000000000000000000000000000000000000..7a73a41bfdf76d6f793007240d80983a52f15f97 --- /dev/null +++ b/.Rproj.user/9DAE6990/sources/prop/C10FF5C8 @@ -0,0 +1,2 @@ +{ +} \ No newline at end of file diff --git a/.Rproj.user/9DAE6990/sources/prop/D602FFE4 b/.Rproj.user/9DAE6990/sources/prop/D602FFE4 index af5f2e48960c4e1a4aac4a30a1c7f41c9a094e04..59518e3e3c0da54f662f7dd699fd976f910ca7c7 100644 --- a/.Rproj.user/9DAE6990/sources/prop/D602FFE4 +++ b/.Rproj.user/9DAE6990/sources/prop/D602FFE4 @@ -1,5 +1,5 @@ { - "cursorPosition" : "44,0", + "cursorPosition" : "135,15", "last_setup_crc32" : "39B546A65bfca283", - "scrollLine" : "44" + "scrollLine" : "130" } \ No newline at end of file diff --git a/.Rproj.user/9DAE6990/sources/prop/INDEX b/.Rproj.user/9DAE6990/sources/prop/INDEX index 2ae9108e07b4ff0d7c0f0fc9a79a4e96343a7e78..16f98885be4bba0b41bed600c49af1738abd995d 100644 --- a/.Rproj.user/9DAE6990/sources/prop/INDEX +++ b/.Rproj.user/9DAE6990/sources/prop/INDEX @@ -1,6 +1,14 @@ ~%2Fstuart_package%2Fstuart%2FDESCRIPTION="BEB7232" ~%2Fstuart_package%2Fstuart%2FNAMESPACE="AE613167" +~%2Fstuart_package%2Fstuart%2FR%2Fgeno_strains.R="8F7B714A" +~%2Fstuart_package%2Fstuart%2FR%2Fgenos-data.R="C10FF5C8" +~%2Fstuart_package%2Fstuart%2FR%2Fmark_allele.R="1416C0B5" +~%2Fstuart_package%2Fstuart%2FR%2Fmark_match.R="75F49619" ~%2Fstuart_package%2Fstuart%2FR%2Fmark_poly.R="3A328548" +~%2Fstuart_package%2Fstuart%2FR%2Fmark_prop.R="A609F054" +~%2Fstuart_package%2Fstuart%2FR%2Fstuart_tab-data.R="7411866" +~%2Fstuart_package%2Fstuart%2FR%2Ftab_mark.R="7FA3B215" ~%2Fstuart_package%2Fstuart%2FR%2Fwrite_rqtl.R="5B8691C7" +~%2Fstuart_package%2Fstuart%2Fdoc%2FstuaRt.R="65312719" ~%2Fstuart_package%2Fstuart%2Fvignettes%2FstuaRt.R="EBD625D2" ~%2Fstuart_package%2Fstuart%2Fvignettes%2FstuaRt.Rmd="D602FFE4" diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/1656F55 b/.Rproj.user/9DAE6990/sources/s-39B546A6/1656F55 new file mode 100644 index 0000000000000000000000000000000000000000..7aaecc2f4049a5605d0b04da4901da8698b401ce --- /dev/null +++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/1656F55 @@ -0,0 +1,25 @@ +{ + "collab_server" : "", + "contents" : "", + "created" : 1622641774018.000, + "dirty" : false, + "encoding" : "UTF-8", + "folds" : "", + "hash" : "1203844341", + "id" : "1656F55", + "lastKnownWriteTime" : 1622640147, + "last_content_update" : 1622640147, + "path" : "~/stuart_package/stuart/R/geno_strains.R", + "project_path" : "R/geno_strains.R", + "properties" : { + "cursorPosition" : "22,39", + "scrollLine" : "7" + }, + "read_only" : false, + "read_only_alternatives" : [ + ], + "relative_order" : 4, + "source_on_save" : false, + "source_window" : "", + "type" : "r_source" +} \ No newline at end of file diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/1656F55-contents b/.Rproj.user/9DAE6990/sources/s-39B546A6/1656F55-contents new file mode 100644 index 0000000000000000000000000000000000000000..d4104d0f3af4780745e4939ce649d7e90e8c995e --- /dev/null +++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/1656F55-contents @@ -0,0 +1,55 @@ +#' @title Create haplotype for a new mouse strain into a reference dataframe +#' +#' @description This functions adds columns for parental strains used in the cross in the annotation data frame, from the genotype data frame in which one or several animal of the parental strains were genotyped. +#' If several animals of one strain were genotyped, a consensus is created from these animals. +#' The consensus is created as follow : if the indivuals carry the same allele, this allele is kept, otherwise, the allele is noted as "N". If individuals show residual heterozygosity, it is encoded as "H". +#' @param ref data frame with the reference genotypes of mouse lines +#' @param geno data frame with the genotyping results for your cross from miniMUGA array +#' @param par1 first parental strain used in the cross, the name must be written as in the geno data frame +#' @param par2 second parental strain used in the cross, the name must be written as in the geno data frame +#' @param name1 name of the first parental strain to use as the column name in the ref data frame +#' @param name2 name of the second parental strain to use as the column name in the ref data frame +#' +#' @import dplyr +#' @import tidyr +#' +#' @export +#' +geno_strains <- function(ref,geno,par1,par2,name1,name2){ + #rename df columns + geno <- geno %>% rename("marker"=1, + "id"=2, + "allele_1"=3, + "allele_2"=4) + + + #recode genotypes from 2 alleles to 1 + geno <- geno %>% mutate_all(as.character) + geno <- geno %>% filter(id %in% c(par1,par2)) + geno <- geno %>% mutate(Geno=case_when(allele_1 == "-" | allele_2 == "-" ~ "N", + allele_1 == allele_2 ~ allele_1, + allele_1 %in% c("A","T","G","C") & allele_2 %in% c("A","T","G","C") ~ "H")) + + geno <- geno %>% select(marker,id,Geno) %>% pivot_wider(names_from = id, values_from = Geno) + + + #create consensus + if(length(par1)!=1){ + geno <- geno %>% mutate(parent1 = ifelse(!!sym(par1[1])==!!sym(par1[2]),!!sym(par1[1]),"N")) + } else { + geno <- geno %>% rename(parent1=!!sym(par1[1])) + } + + if(length(par2)!=1){ + geno <- geno %>% mutate(parent2 = ifelse(!!sym(par2[1])==!!sym(par2[2]),!!sym(par2[1]),"N")) + } else { + geno <- geno %>% rename(parent2=!!sym(par2[1])) + } + + geno <- geno %>% select(marker,parent1,parent2) + colnames(geno) <- c("marker",name1,name2) + + #merge with ref file + ref <- full_join(ref,geno,by=c("marker"="marker")) + return(ref) +} diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/17AE345-contents b/.Rproj.user/9DAE6990/sources/s-39B546A6/17AE345-contents new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/42D37312 b/.Rproj.user/9DAE6990/sources/s-39B546A6/42D37312 new file mode 100644 index 0000000000000000000000000000000000000000..583c1f70b07438033c80b8e1b996e4db2d8d5b17 --- /dev/null +++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/42D37312 @@ -0,0 +1,25 @@ +{ + "collab_server" : "", + "contents" : "", + "created" : 1622636142238.000, + "dirty" : false, + "encoding" : "UTF-8", + "folds" : "", + "hash" : "0", + "id" : "42D37312", + "lastKnownWriteTime" : 1622465534, + "last_content_update" : 1622465534, + "path" : "~/stuart_package/stuart/R/mark_prop.R", + "project_path" : "R/mark_prop.R", + "properties" : { + "cursorPosition" : "2,104", + "scrollLine" : "0" + }, + "read_only" : false, + "read_only_alternatives" : [ + ], + "relative_order" : 9, + "source_on_save" : false, + "source_window" : "", + "type" : "r_source" +} \ No newline at end of file diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/42D37312-contents b/.Rproj.user/9DAE6990/sources/s-39B546A6/42D37312-contents new file mode 100644 index 0000000000000000000000000000000000000000..a0827e8c27b02c9d65a6cc3af34cf595f50a5840 --- /dev/null +++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/42D37312-contents @@ -0,0 +1,50 @@ +#' @title Exclude markers depending on proportions of homo/hetorozygous +#' +#' @description uses the dataframe produced by the tab_mark function and fills the "exclude" column for all the markers that present odd proportions of each genotype. You can define these proportions thanks to the arguments of the function. +#' @param tab data frame obtained with tab_mark function. +#' @param cross F2 or N2. +#' @param homo proportion of homozygous individuals under which the marker is excluded. Will apply on both homozygous genotypes for a F2, but only on one for N2. +#' @param hetero proportion of heterozygous individuals under which the marker is excluded. +#' @param na proportion of non-genotyped individuals above which the marker is excluded. +#' +#' @import dplyr +#' +#' @export +#' + +#### mark_prop #### +## excludes markers depending on proportions of homo/hetorozygous +mark_prop <- function(tab,cross,homo=NA,hetero=NA,na=0.5){ + #calculate total number of individuals genotyped for each marker + tab <- tab %>% mutate(n_geno = tab %>% select(n_HM1,n_HM2,n_HT) %>% rowSums(na.rm=TRUE)) + + #calculate proportion of each genotype + tab <- tab %>% mutate(p_HM1 = n_HM1/n_geno) + tab <- tab %>% mutate(p_HM2 = n_HM2/n_geno) + tab <- tab %>% mutate(p_HT = n_HT/n_geno) + tab <- tab %>% mutate(p_NA = n_NA/(n_geno+n_NA)) + + #stop if cross != "F2" or "N2" + if(!cross %in% c("F2","N2")){ + stop("Cross must be F2 or N2") + } + + #stop if no value for "homo" for F2 cross + if(cross=="F2" & (is.na(homo)==TRUE | is.na(hetero)==TRUE)){ + stop("Arguments homo and hetero must be specified for F2 crosses") + } + + #stop if no value for "homo" and "hetero" for N2 cross + if(cross=="N2" & (is.na(homo)==TRUE | is.na(hetero)==TRUE)){ + stop("Arguments homo and hetero must be specified for N2 crosses") + } + + #exclude markers according to proportion of na + tab <- tab %>% mutate(exclude_prop=case_when(p_NA > na ~ 1, #exclude markers according to proportion of na + cross=="F2" & (p_HM1 < homo | p_HM2 < homo | p_HT < hetero) ~ 1, #exclude markers according to proportion of homozygous individuals for F2 cross + cross=="N2" & (p_HM1 < homo | p_HT < hetero) ~ 1, #exclude markers according to proportion of homozygous and heterozygous individuals for N2 cross + T ~ 0)) + + tab <- tab %>% select(-c(p_HM1,p_HM2,p_HT,p_NA,n_geno)) + return(tab) +} diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/45D91D58 b/.Rproj.user/9DAE6990/sources/s-39B546A6/45D91D58 index 60c39ac66e63aef8037f21693027f0e9f18946db..f68fb4c1e8cf5d5be740afb5ed88aef0a4f0d0a3 100644 --- a/.Rproj.user/9DAE6990/sources/s-39B546A6/45D91D58 +++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/45D91D58 @@ -5,20 +5,20 @@ "dirty" : false, "encoding" : "UTF-8", "folds" : "", - "hash" : "3120719904", + "hash" : "1139135974", "id" : "45D91D58", - "lastKnownWriteTime" : 1622621980, - "last_content_update" : 1622621980790, + "lastKnownWriteTime" : 1622648301, + "last_content_update" : 1622648301329, "path" : "~/stuart_package/stuart/R/write_rqtl.R", "project_path" : "R/write_rqtl.R", "properties" : { - "cursorPosition" : "48,32", - "scrollLine" : "33" + "cursorPosition" : "84,18", + "scrollLine" : "94" }, "read_only" : false, "read_only_alternatives" : [ ], - "relative_order" : 5, + "relative_order" : 11, "source_on_save" : false, "source_window" : "", "type" : "r_source" diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/45D91D58-contents b/.Rproj.user/9DAE6990/sources/s-39B546A6/45D91D58-contents index cbfc05505af72a38887f69cdef97707b5d8e8bf1..0926c5c3a44d009a46244ee5d52835afb1309978 100644 --- a/.Rproj.user/9DAE6990/sources/s-39B546A6/45D91D58-contents +++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/45D91D58-contents @@ -21,6 +21,12 @@ #### write_rqtl #### ## write data frame in rqtl format (csv), if path != NA writes the file in the path indicated write_rqtl <- function(geno,pheno,tab,ref,par1,par2,prefix,pos,path=NA){ + #rename df columns + geno <- geno %>% rename("marker"=1, + "id"=2, + "allele_1"=3, + "allele_2"=4) + #extract snps non excluded if("exclude_match" %in% colnames(tab)){ tab <- tab %>% filter(exclude_match==0) @@ -40,7 +46,7 @@ write_rqtl <- function(geno,pheno,tab,ref,par1,par2,prefix,pos,path=NA){ #filter genotypes for non excluded markers in geno file - geno <- geno %>% select(c(SNP.Name,Sample.ID,Allele1...Forward,Allele2...Forward)) %>% filter(SNP.Name %in% tab$SNP.Name) + geno <- geno %>% select(c(marker,id,allele_1,allele_2)) %>% filter(marker %in% tab$marker) #recode parents' names to match column names nomenclature par1 <- make.names(par1) @@ -51,33 +57,33 @@ write_rqtl <- function(geno,pheno,tab,ref,par1,par2,prefix,pos,path=NA){ ref <- ref %>% select(marker,chr,!!sym(pos),!!sym(par1),!!sym(par2)) #merge genotypes with parents - geno <- left_join(geno,ref,by=c("SNP.Name"="marker")) + geno <- left_join(geno,ref,by=c("marker"="marker")) #recode "-" in "N" in geno file - geno <- geno %>% mutate(Allele1...Forward = recode(Allele1...Forward, + geno <- geno %>% mutate(allele_1 = recode(allele_1, "-" = "N")) - geno <- geno %>% mutate(Allele2...Forward = recode(Allele2...Forward, + geno <- geno %>% mutate(allele_2 = recode(allele_2, "-" = "N")) #recode geno in factors with same levels - geno <- geno %>% mutate(Allele1...Forward = factor(Allele1...Forward,levels=c("A","C","G","H","N","T"))) - geno <- geno %>% mutate(Allele2...Forward = factor(Allele2...Forward,levels=c("A","C","G","H","N","T"))) + geno <- geno %>% mutate(allele_1 = factor(allele_1,levels=c("A","C","G","H","N","T"))) + geno <- geno %>% mutate(allele_2 = factor(allele_2,levels=c("A","C","G","H","N","T"))) #recode genotypes depending on parents' genotypes geno <- geno %>% mutate(Geno = case_when( #if one allele not genotyped: - Allele1...Forward=="N" | Allele2...Forward=="N" ~ "NA", + allele_1=="N" | allele_2=="N" ~ "NA", #if both alleles genotyped ##homozygous 0 - Allele1...Forward==Allele2...Forward & Allele1...Forward==!!sym(par1) ~ "0", + allele_1==allele_2 & allele_1==!!sym(par1) ~ "0", ##homozygous 2 - Allele1...Forward==Allele2...Forward & Allele1...Forward==!!sym(par2) ~ "2", + allele_1==allele_2 & allele_1==!!sym(par2) ~ "2", ##heterozygous - Allele1...Forward!=Allele2...Forward ~ "1", + allele_1!=allele_2 ~ "1", #if parental strains are N/H ##homozygous for parent that is N/H @@ -92,33 +98,33 @@ write_rqtl <- function(geno,pheno,tab,ref,par1,par2,prefix,pos,path=NA){ #keep positions of markers - markers <- geno %>% select(SNP.Name,chr,!!sym(pos)) %>% distinct() + markers <- geno %>% select(marker,chr,!!sym(pos)) %>% distinct() markers <- markers %>% arrange(chr,!!sym(pos)) #keep only interesting columns in geno file geno <- geno %>% arrange(chr,!!sym(pos)) - geno <- geno %>% select(SNP.Name,Sample.ID,Geno) + geno <- geno %>% select(marker,id,Geno) #remove prefix - geno <- geno %>% mutate(Sample.ID=str_remove(Sample.ID,prefix)) + geno <- geno %>% mutate(id=str_remove(id,prefix)) #keep only non excluded markers and merge with positions - markers <- markers %>% mutate(SNP.Name=as.character(SNP.Name)) + markers <- markers %>% mutate(marker=as.character(marker)) markers <- markers %>% mutate(chr=as.character(chr)) - geno <- markers %>% select(SNP.Name,chr,!!sym(pos)) %>% full_join(.,geno,by="SNP.Name") + geno <- markers %>% select(marker,chr,!!sym(pos)) %>% full_join(.,geno,by="marker") #pivoting - geno <- geno %>% pivot_wider(names_from = c(SNP.Name,chr,!!sym(pos)),values_from = Geno,names_sep=",") - geno <- geno %>% mutate(Sample.ID=as.character(Sample.ID)) - geno <- geno %>% rename("Sample.ID,,"=Sample.ID) + geno <- geno %>% pivot_wider(names_from = c(marker,chr,!!sym(pos)),values_from = Geno,names_sep=",") + geno <- geno %>% mutate(id=as.character(id)) + geno <- geno %>% rename("id,,"=id) #merge with phenotype file pheno <- pheno %>% mutate_all(as.character) colnames(pheno) <- str_c(colnames(pheno),",,") - qtl_file <- right_join(pheno,geno,by=c("Ind,,"="Sample.ID,,")) + qtl_file <- right_join(pheno,geno,by=c("Ind,,"="id,,")) #prepare file qtl_file <- rbind(colnames(qtl_file),qtl_file) diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/47AFB64 b/.Rproj.user/9DAE6990/sources/s-39B546A6/47AFB64 deleted file mode 100644 index 1d0bce58ef73f88ca7ce09fdac2b8dce157f7dd9..0000000000000000000000000000000000000000 --- a/.Rproj.user/9DAE6990/sources/s-39B546A6/47AFB64 +++ /dev/null @@ -1,33 +0,0 @@ -{ - "collab_server" : "", - "contents" : "", - "created" : 1622538246256.000, - "dirty" : false, - "encoding" : "", - "folds" : "", - "hash" : "0", - "id" : "47AFB64", - "lastKnownWriteTime" : 140548509794308, - "last_content_update" : 1622538246256, - "path" : null, - "project_path" : null, - "properties" : { - "cacheKey" : "6294E01A", - "caption" : "annot_mini", - "contentUrl" : "grid_resource/gridviewer.html?env=&obj=annot_mini&cache_key=6294E01A", - "displayedObservations" : 11125, - "environment" : "", - "expression" : "annot_mini", - "object" : "annot_mini", - "preview" : 0, - "totalObservations" : 11125, - "variables" : 12 - }, - "read_only" : false, - "read_only_alternatives" : [ - ], - "relative_order" : 6, - "source_on_save" : false, - "source_window" : "", - "type" : "r_dataframe" -} \ No newline at end of file diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/4A9D04E b/.Rproj.user/9DAE6990/sources/s-39B546A6/4A9D04E deleted file mode 100644 index 2576ef9c04c31f42f598b3ab49d0632a0f8a8001..0000000000000000000000000000000000000000 --- a/.Rproj.user/9DAE6990/sources/s-39B546A6/4A9D04E +++ /dev/null @@ -1,33 +0,0 @@ -{ - "collab_server" : "", - "contents" : "", - "created" : 1622538256440.000, - "dirty" : false, - "encoding" : "", - "folds" : "", - "hash" : "0", - "id" : "4A9D04E", - "lastKnownWriteTime" : 140548509794304, - "last_content_update" : 1622538256440, - "path" : null, - "project_path" : null, - "properties" : { - "cacheKey" : "F7A5FD33", - "caption" : "strains", - "contentUrl" : "grid_resource/gridviewer.html?env=&obj=strains&cache_key=F7A5FD33", - "displayedObservations" : "11125", - "environment" : "", - "expression" : "strains", - "object" : "strains", - "preview" : "0", - "totalObservations" : "11125", - "variables" : "14" - }, - "read_only" : false, - "read_only_alternatives" : [ - ], - "relative_order" : 3, - "source_on_save" : false, - "source_window" : "", - "type" : "r_dataframe" -} \ No newline at end of file diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/65C9B72B b/.Rproj.user/9DAE6990/sources/s-39B546A6/65C9B72B new file mode 100644 index 0000000000000000000000000000000000000000..51dacef1135242c12e552e8bc12afea49f8f3133 --- /dev/null +++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/65C9B72B @@ -0,0 +1,26 @@ +{ + "collab_server" : "", + "contents" : "", + "created" : 1622637863181.000, + "dirty" : false, + "encoding" : "UTF-8", + "folds" : "", + "hash" : "1762622592", + "id" : "65C9B72B", + "lastKnownWriteTime" : 1622642120, + "last_content_update" : 1622642120783, + "path" : "~/stuart_package/stuart/R/tab_mark.R", + "project_path" : "R/tab_mark.R", + "properties" : { + "cursorPosition" : "43,27", + "scrollLine" : "29", + "source_window_id" : "" + }, + "read_only" : false, + "read_only_alternatives" : [ + ], + "relative_order" : 5, + "source_on_save" : false, + "source_window" : "", + "type" : "r_source" +} \ No newline at end of file diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/65C9B72B-contents b/.Rproj.user/9DAE6990/sources/s-39B546A6/65C9B72B-contents new file mode 100644 index 0000000000000000000000000000000000000000..95f4e184b34f860c2f5fee3e2cb8d4cf2fbc1fa8 --- /dev/null +++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/65C9B72B-contents @@ -0,0 +1,143 @@ +#' @title Create of the summary table for all markers from the genotype data frame +#' +#' @description This function creates a table with all the markers that were genotyped in the array, the alleles for these markers, the number of homozygous and heterozygous animals, as well as the number of non genotyped animals. +#' @param geno data frame with the genotyping results for your cross +#' +#' @import dplyr +#' @import tidyr +#' +#' @export +#' + +#### tab_mark function #### +## create table with markers and counts +tab_mark <- function(geno){ + #rename df columns + geno <- geno %>% rename("marker"=1, + "id"=2, + "allele_1"=3, + "allele_2"=4) + + #create geno column in geno df + geno <- geno %>% unite(Geno,c("allele_1","allele_2"),sep="",remove=FALSE) + + #recode genotypes to have all heterozygous encoded the same way (ex: only "AT", no "TA") + geno <- geno %>% mutate(Geno=recode(Geno, + "TA" = "AT", + "GA" = "AG", + "CA" = "AC", + "GT" = "TG", + "CT" = "TC", + "GC" = "CG")) + + + #create df with counts for each genotype + df_count <- tibble(marker = as.character(unique(geno$marker)), + allele_1 = NA, + allele_2 = NA, + n_HM1 = NA, + n_HM2 = NA, + n_HT = NA, + n_NA = NA) + + + ## loop to count genotype + for(i in df_count$marker){ + #extract alleles for each marker + Alleles <- geno %>% filter(marker==i) %>% + select(c(marker,id,Geno,allele_1,allele_2)) %>% + pivot_longer(c(allele_1,allele_2),names_to="Allele_name",values_to="Allele") %>% + distinct(Allele) %>% filter(Allele != "-") + Alleles <- as.factor(paste(Alleles$Allele)) + + #sort alleles + Alleles <- factor(Alleles,levels=c("A","T","C","G")) + Alleles <- sort(Alleles) + + #add alleles and counts, only for markers with alleles (not markers with no genotyped ind) + if(all(rapportools::is.empty(Alleles))==FALSE){ + + #add alleles to df_count + df_count <- df_count %>% mutate(allele_1 = ifelse(marker == i, + paste(Alleles[1]), allele_1)) + + + + #count for homozygous for allele 1 + n1 <- geno %>% filter(marker==i) %>% + filter(Geno == paste(Alleles[1],Alleles[1],sep="")) %>% + summarise(n=n()) + + + #add count for homozygous for allele 1 to df_count + df_count <- df_count %>% mutate(n_HM1 = ifelse(marker == i, + n1$n, n_HM1)) + + + } + + #if marker not polymorphic + if(is.na(Alleles[2])==TRUE){ + #NA as allele_2 + df_count <- df_count %>% mutate(allele_2 = ifelse(marker == i, + NA, allele_2)) + + #NA as n_HM2 + df_count <- df_count %>% mutate(n_HM2 = ifelse(marker == i, + NA, n_HM2)) + + #NA as n_HT + df_count <- df_count %>% mutate(n_HT = ifelse(marker == i, + NA, n_HT)) + } else { + #add alleles to df_count + df_count <- df_count %>% mutate(allele_2 = ifelse(marker == i, + paste(Alleles[2]), allele_2)) + + + #count for homozygous for allele 2 + n2 <- geno %>% filter(marker==i) %>% + filter(Geno == paste(Alleles[2],Alleles[2],sep="")) %>% + summarise(n=n()) + + #add count for homozygous for allele 1 to df_count + df_count <- df_count %>% mutate(n_HM2 = ifelse(marker == i, + n2$n, n_HM2)) + + + #count for heterozygous + n3 <- geno %>% filter(marker==i) %>% + filter(Geno == paste(Alleles[1],Alleles[2],sep="")) %>% + summarise(n=n()) + + + #add count for homozygous for allele 1 to df_count + df_count <- df_count %>% mutate(n_HT = ifelse(marker == i, + n3$n, n_HT)) + + + } + + #count for NA + n4 <- geno %>% filter(marker==i) %>% + filter(Geno == "--" | + Geno == paste(Alleles[1],"-",sep="") | Geno == paste(Alleles[2],"-",sep="") | + Geno == paste("-",Alleles[1],sep="") | Geno == paste("-",Alleles[2],sep="")) %>% + summarise(n=n()) + + #add count for NA to df_count + df_count <- df_count %>% mutate(n_NA = ifelse(marker == i, + n4$n, n_NA)) + } + #change class of counts as numeric : + df_count$n_HM1 <- df_count$n_HM1 %>% as.numeric() + df_count$n_HM2 <- df_count$n_HM2 %>% as.numeric() + df_count$n_HT <- df_count$n_HT %>% as.numeric() + df_count$n_NA <- df_count$n_NA %>% as.numeric() + + #add 0 for null counts + df_count <- df_count %>% mutate_at(.vars=vars(n_HM1,n_HM2,n_HT,n_NA),~replace(., is.na(.), 0)) + + #return + return(df_count) +} diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/6B5DE0D4-contents b/.Rproj.user/9DAE6990/sources/s-39B546A6/6B5DE0D4-contents new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/76AC3EC b/.Rproj.user/9DAE6990/sources/s-39B546A6/76AC3EC new file mode 100644 index 0000000000000000000000000000000000000000..19b279e0190cba636ccbe66aa8dcf40ccca650c5 --- /dev/null +++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/76AC3EC @@ -0,0 +1,25 @@ +{ + "collab_server" : "", + "contents" : "", + "created" : 1622636138213.000, + "dirty" : false, + "encoding" : "UTF-8", + "folds" : "", + "hash" : "0", + "id" : "76AC3EC", + "lastKnownWriteTime" : 1622644628, + "last_content_update" : 1622644628349, + "path" : "~/stuart_package/stuart/R/mark_match.R", + "project_path" : "R/mark_match.R", + "properties" : { + "cursorPosition" : "20,46", + "scrollLine" : "0" + }, + "read_only" : false, + "read_only_alternatives" : [ + ], + "relative_order" : 7, + "source_on_save" : false, + "source_window" : "", + "type" : "r_source" +} \ No newline at end of file diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/76AC3EC-contents b/.Rproj.user/9DAE6990/sources/s-39B546A6/76AC3EC-contents new file mode 100644 index 0000000000000000000000000000000000000000..e1c38342708d568c92282dd02bf7938859b2d1d0 --- /dev/null +++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/76AC3EC-contents @@ -0,0 +1,25 @@ +#' @title Exclude markers that were not genotyped in the reference strains +#' +#' @description This functions uses the dataframe produced by the tab_mark function and fills the "exclude" column for all the markers that were genotyped in the individuals of the cross but not in the reference strains. This is useful if the parental strains of the cross were not genotyped with the individuals and a previous genotyping result is used. Indeed, changes in the markers of the array may have happened. We recommend always using this function in order to avoid errors. +#' @param tab data frame obtained with tab_mark function +#' @param ref data frame with the reference genotypes of mouse lines +#' +#' @import dplyr +#' +#' @export +#' +mark_match <- function(tab, #tab_mark df + ref){ #strain ref geno file + + #finds SNPs that are in both files: + snp_strains <- as.character(ref$marker) #extracts SNPs in strains ref geno file + snp_genfile <- as.character(tab$marker) #extracts SNPs in cross geno file + snp <- intersect(snp_strains,snp_genfile) #take intercept + + + #add results in exclude column + return(tab %>% mutate(exclude_match=ifelse(!marker %in% snp, + 1, + 0))) + +} diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/7E4482C6-contents b/.Rproj.user/9DAE6990/sources/s-39B546A6/7E4482C6-contents new file mode 100644 index 0000000000000000000000000000000000000000..fd50eb0ea6a1188c1f619aa7510b6b276c9c2895 --- /dev/null +++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/7E4482C6-contents @@ -0,0 +1,8 @@ +#' Data frame with miniMUGA genotyping of F2 individuals and parental strains +#' +#' A dataset containing the genotypes of 176 F2 individuals +#' +#' @format A data frame with 2002493 observations of 11 variables + + +"genos" diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/806AAC34 b/.Rproj.user/9DAE6990/sources/s-39B546A6/806AAC34 deleted file mode 100644 index bb5c4d7f601ff6996edbba477057227ab1fcc07c..0000000000000000000000000000000000000000 --- a/.Rproj.user/9DAE6990/sources/s-39B546A6/806AAC34 +++ /dev/null @@ -1,33 +0,0 @@ -{ - "collab_server" : "", - "contents" : "", - "created" : 1622538162413.000, - "dirty" : false, - "encoding" : "", - "folds" : "", - "hash" : "0", - "id" : "806AAC34", - "lastKnownWriteTime" : 5, - "last_content_update" : 1622538162413, - "path" : null, - "project_path" : null, - "properties" : { - "cacheKey" : "634A6953", - "caption" : "stuart_tab", - "contentUrl" : "grid_resource/gridviewer.html?env=&obj=stuart_tab&cache_key=634A6953", - "displayedObservations" : 11125, - "environment" : "", - "expression" : "stuart_tab", - "object" : "stuart_tab", - "preview" : 0, - "totalObservations" : 11125, - "variables" : 7 - }, - "read_only" : false, - "read_only_alternatives" : [ - ], - "relative_order" : 4, - "source_on_save" : false, - "source_window" : "", - "type" : "r_dataframe" -} \ No newline at end of file diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/81311FDE-contents b/.Rproj.user/9DAE6990/sources/s-39B546A6/81311FDE-contents new file mode 100644 index 0000000000000000000000000000000000000000..612c27db930cb54d481e09a9a7ab5efd4912420f --- /dev/null +++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/81311FDE-contents @@ -0,0 +1,137 @@ +#' @title Create of the summary table for all markers from the genotype data frame +#' +#' @description This function creates a table with all the markers that were genotyped in the array, the alleles for these markers, the number of homozygous and heterozygous animals, as well as the number of non genotyped animals. +#' @param geno data frame with the genotyping results for your cross +#' +#' @import dplyr +#' @import tidyr +#' +#' @export +#' + +#### tab_mark function #### +## create table with markers and counts +tab_mark <- function(geno){ + #create geno column in geno df + geno <- geno %>% unite(Geno,c("Allele1...Forward","Allele2...Forward"),sep="",remove=FALSE) + + #recode genotypes to have all heterozygous encoded the same way (ex: only "AT", no "TA") + geno <- geno %>% mutate(Geno=recode(Geno, + "TA" = "AT", + "GA" = "AG", + "CA" = "AC", + "GT" = "TG", + "CT" = "TC", + "GC" = "CG")) + + + #create df with counts for each genotype + df_count <- tibble(SNP.Name = as.character(unique(geno$SNP.Name)), + Allele_1 = NA, + Allele_2 = NA, + n_HM1 = NA, + n_HM2 = NA, + n_HT = NA, + n_NA = NA) + + + ## loop to count genotype + for(i in df_count$SNP.Name){ + #extract alleles for each marker + Alleles <- geno %>% filter(SNP.Name==i) %>% + select(c(SNP.Name,Sample.ID,Geno,Allele1...Forward,Allele2...Forward)) %>% + pivot_longer(c(Allele1...Forward,Allele2...Forward),names_to="Allele_name",values_to="Allele") %>% + distinct(Allele) %>% filter(Allele != "-") + Alleles <- as.factor(paste(Alleles$Allele)) + + #sort alleles + Alleles <- factor(Alleles,levels=c("A","T","C","G")) + Alleles <- sort(Alleles) + + #add alleles and counts, only for markers with alleles (not markers with no genotyped ind) + if(all(rapportools::is.empty(Alleles))==FALSE){ + + #add alleles to df_count + df_count <- df_count %>% mutate(Allele_1 = ifelse(SNP.Name == i, + paste(Alleles[1]), Allele_1)) + + + + #count for homozygous for allele 1 + n1 <- geno %>% filter(SNP.Name==i) %>% + filter(Geno == paste(Alleles[1],Alleles[1],sep="")) %>% + summarise(n=n()) + + + #add count for homozygous for allele 1 to df_count + df_count <- df_count %>% mutate(n_HM1 = ifelse(SNP.Name == i, + n1$n, n_HM1)) + + + } + + #if marker not polymorphic + if(is.na(Alleles[2])==TRUE){ + #NA as Allele_2 + df_count <- df_count %>% mutate(Allele_2 = ifelse(SNP.Name == i, + NA, Allele_2)) + + #NA as n_HM2 + df_count <- df_count %>% mutate(n_HM2 = ifelse(SNP.Name == i, + NA, n_HM2)) + + #NA as n_HT + df_count <- df_count %>% mutate(n_HT = ifelse(SNP.Name == i, + NA, n_HT)) + } else { + #add alleles to df_count + df_count <- df_count %>% mutate(Allele_2 = ifelse(SNP.Name == i, + paste(Alleles[2]), Allele_2)) + + + #count for homozygous for allele 2 + n2 <- geno %>% filter(SNP.Name==i) %>% + filter(Geno == paste(Alleles[2],Alleles[2],sep="")) %>% + summarise(n=n()) + + #add count for homozygous for allele 1 to df_count + df_count <- df_count %>% mutate(n_HM2 = ifelse(SNP.Name == i, + n2$n, n_HM2)) + + + #count for heterozygous + n3 <- geno %>% filter(SNP.Name==i) %>% + filter(Geno == paste(Alleles[1],Alleles[2],sep="")) %>% + summarise(n=n()) + + + #add count for homozygous for allele 1 to df_count + df_count <- df_count %>% mutate(n_HT = ifelse(SNP.Name == i, + n3$n, n_HT)) + + + } + + #count for NA + n4 <- geno %>% filter(SNP.Name==i) %>% + filter(Geno == "--" | + Geno == paste(Alleles[1],"-",sep="") | Geno == paste(Alleles[2],"-",sep="") | + Geno == paste("-",Alleles[1],sep="") | Geno == paste("-",Alleles[2],sep="")) %>% + summarise(n=n()) + + #add count for NA to df_count + df_count <- df_count %>% mutate(n_NA = ifelse(SNP.Name == i, + n4$n, n_NA)) + } + #change class of counts as numeric : + df_count$n_HM1 <- df_count$n_HM1 %>% as.numeric() + df_count$n_HM2 <- df_count$n_HM2 %>% as.numeric() + df_count$n_HT <- df_count$n_HT %>% as.numeric() + df_count$n_NA <- df_count$n_NA %>% as.numeric() + + #add 0 for null counts + df_count <- df_count %>% mutate_at(.vars=vars(n_HM1,n_HM2,n_HT,n_NA),~replace(., is.na(.), 0)) + + #return + return(df_count) +} diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/8DB3123E-contents b/.Rproj.user/9DAE6990/sources/s-39B546A6/8DB3123E-contents new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/907DF1D3-contents b/.Rproj.user/9DAE6990/sources/s-39B546A6/907DF1D3-contents new file mode 100644 index 0000000000000000000000000000000000000000..d4104d0f3af4780745e4939ce649d7e90e8c995e --- /dev/null +++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/907DF1D3-contents @@ -0,0 +1,55 @@ +#' @title Create haplotype for a new mouse strain into a reference dataframe +#' +#' @description This functions adds columns for parental strains used in the cross in the annotation data frame, from the genotype data frame in which one or several animal of the parental strains were genotyped. +#' If several animals of one strain were genotyped, a consensus is created from these animals. +#' The consensus is created as follow : if the indivuals carry the same allele, this allele is kept, otherwise, the allele is noted as "N". If individuals show residual heterozygosity, it is encoded as "H". +#' @param ref data frame with the reference genotypes of mouse lines +#' @param geno data frame with the genotyping results for your cross from miniMUGA array +#' @param par1 first parental strain used in the cross, the name must be written as in the geno data frame +#' @param par2 second parental strain used in the cross, the name must be written as in the geno data frame +#' @param name1 name of the first parental strain to use as the column name in the ref data frame +#' @param name2 name of the second parental strain to use as the column name in the ref data frame +#' +#' @import dplyr +#' @import tidyr +#' +#' @export +#' +geno_strains <- function(ref,geno,par1,par2,name1,name2){ + #rename df columns + geno <- geno %>% rename("marker"=1, + "id"=2, + "allele_1"=3, + "allele_2"=4) + + + #recode genotypes from 2 alleles to 1 + geno <- geno %>% mutate_all(as.character) + geno <- geno %>% filter(id %in% c(par1,par2)) + geno <- geno %>% mutate(Geno=case_when(allele_1 == "-" | allele_2 == "-" ~ "N", + allele_1 == allele_2 ~ allele_1, + allele_1 %in% c("A","T","G","C") & allele_2 %in% c("A","T","G","C") ~ "H")) + + geno <- geno %>% select(marker,id,Geno) %>% pivot_wider(names_from = id, values_from = Geno) + + + #create consensus + if(length(par1)!=1){ + geno <- geno %>% mutate(parent1 = ifelse(!!sym(par1[1])==!!sym(par1[2]),!!sym(par1[1]),"N")) + } else { + geno <- geno %>% rename(parent1=!!sym(par1[1])) + } + + if(length(par2)!=1){ + geno <- geno %>% mutate(parent2 = ifelse(!!sym(par2[1])==!!sym(par2[2]),!!sym(par2[1]),"N")) + } else { + geno <- geno %>% rename(parent2=!!sym(par2[1])) + } + + geno <- geno %>% select(marker,parent1,parent2) + colnames(geno) <- c("marker",name1,name2) + + #merge with ref file + ref <- full_join(ref,geno,by=c("marker"="marker")) + return(ref) +} diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/96AB3736 b/.Rproj.user/9DAE6990/sources/s-39B546A6/96AB3736 index 601d204391765df78d1b564a3375e7d83f05883c..ff216ba631ada3582dc5f0c3069563828d2cae4a 100644 --- a/.Rproj.user/9DAE6990/sources/s-39B546A6/96AB3736 +++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/96AB3736 @@ -5,16 +5,16 @@ "dirty" : false, "encoding" : "UTF-8", "folds" : "", - "hash" : "0", + "hash" : "2014255563", "id" : "96AB3736", - "lastKnownWriteTime" : 1622624631, - "last_content_update" : 1622624631694, + "lastKnownWriteTime" : 1622648726, + "last_content_update" : 1622648726992, "path" : "~/stuart_package/stuart/vignettes/stuaRt.Rmd", "project_path" : "vignettes/stuaRt.Rmd", "properties" : { - "cursorPosition" : "44,0", + "cursorPosition" : "135,15", "last_setup_crc32" : "39B546A65bfca283", - "scrollLine" : "44" + "scrollLine" : "130" }, "read_only" : false, "read_only_alternatives" : [ diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/96AB3736-contents b/.Rproj.user/9DAE6990/sources/s-39B546A6/96AB3736-contents index 92709f484cb21eb508c53c98fe3f975f8e90ee70..e73872a8f734ba8177b36fc84e013586614d4848 100644 --- a/.Rproj.user/9DAE6990/sources/s-39B546A6/96AB3736-contents +++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/96AB3736-contents @@ -17,11 +17,11 @@ knitr::opts_chunk$set( Marie Bourdon -April 2021 +June 2021 ## Goal -stuart is a R package which formats the genotyping data from MUGA arrays (Neogen) to use it in Rqtl, for backcross or F2 crosses. It allows to filter the markers in MUGA arrays that can or cannot be used for Rqtl analysis, from a genetic point of view. Indeed, markers will be selected depending on their proportion of each genotype, correspondance between F2 or N2 individuals alleles and parental strains alleles, etc. +stuart is a R package which formats results of genotyping. It was developed to analyse data from MUGA arrays (Neogen) for Rqtl analysis, for backcross or F2 crosses, but can be used to analyze data of other laboratory animal strains with other arrays. It allows to filter the markers in arrays, from a genetic point of view. Indeed, markers will be selected depending on their proportion of each genotype, correspondance between F2 or N2 individuals alleles and parental strains alleles, etc. The examples shown here require the use of dplyr package. @@ -34,17 +34,24 @@ library(stuart) ## Annotation files +In order to map the markers on the genome of the individuals, you need to load a table with the position of all markers in the array. The data frame must contain the following columns: `marker` with the markers names, `chr` with the chromosome of each marker, and a column with the position of the marker on the chromosome. For Rqtl analysis, you need to provide positions in cM. The data frame can contain other columns that you judge helpful. + The developer of Rqtl and Rqtl2 packages, Karl Broman, realised that the annotation of the MUGA arrays was not correct for some markers. Thus, he produced new annotation files for MUGA, miniMUGA, megaMUGA and gigaMUGA arrays. These files contain some informations about the markers including the chromosome and position where the probe of the marker matchs on the genome, wether the marker maps uniquely or not, etc. These files also contains the genetic position of the markers calculated with two methods : "cM_cox" and "cM_g2f1" (see https://kbroman.org/MUGAarrays/mini_revisited.html for more informations). We recommand to use these annotation files to reconstruct the file use for Rqtl analysis. You can load the datasets with these annotations from GitHub (https://github.com/kbroman/MUGAarrays/tree/master/UWisc). Choose the file corresponding to the MUGA array that you used and use the URL to load the dataset in R. -Here, we will present an example of the use of stuart with results of a F2 cross genotyped with miniMUGA. We load the result of Neogen genotyping: `genos` and thephenotype dataset produced by the lab: `phenos`. All these datasets are available for example in stuart package. ```{r annot} annot_mini <- read.csv(url("https://raw.githubusercontent.com/kbroman/MUGAarrays/master/UWisc/mini_uwisc_v2.csv")) ``` +Here, we will present an example of the use of stuart with results of a F2 cross genotyped with miniMUGA. Examples of genotypes and phenotypes dataframe are available in stuart package. + +The genotype data frame must contain a first column with marker names, a second column with sample IDs, a third column with the first allele and a fourth column with the second allele. This format corresponds to the MUGA results. If your data differ, make sure to have these columns in this order. + +We load the result of Neogen genotyping: `genos` (only useful columns with marker name, sample ID and alleles were kept) and the phenotype dataset produced by the lab: `phenos`. + ```{r load} data(genos) @@ -84,7 +91,7 @@ data(stuart_tab) summary(stuart_tab) ``` -Then we will use the different mark_* functions in order to filter the markers. First, we can use mark_match() function. This function excludes markers that are in your genotype file but not in the reference genotype dataset. We recomend using this function as the chip used for genotyping may change. +Then we will use the different mark_* functions in order to filter the markers. First, we can use `mark_match()`` function. Here, the parental strains were genotyped with the F2 individuals, but it can happen that you use previous genotyping results for the parental strains. `mark_match()` function excludes markers that are in your genotype file but not in the reference genotype dataset. We recomend using this function as the chip used for genotyping may change. ```{r mark_match} tab2 <- mark_match(stuart_tab,ref=strains) @@ -95,28 +102,28 @@ tab2 %>% filter(exclude_match==1) Here the reference strains were genotyped with the same version of the chip as the F2 individuals so no marker was excluded. -Then, we can use the mark_poly() function, which will exclude the markers that are not polymorphic. +Then, we can use the `mark_poly()` function, which will exclude the markers that are not polymorphic. ```{r mark_poly ex} tab2 <- mark_poly(tab2) head(tab2) ``` -The mark_prop() function can be used to filter markers depending on the proportion of each genotype. Here, we have a F2 so we can use the "homo" argument in order to filter depending on the proportion of both homozygous genotype. If we have a N2, we can filter with the proportion of homozygous individuals with the "homo" argument and of heterozygous individuals with the hetero" argument. Moreover, this function allows to filter marker depending on the proportion on non genotyped animals. By defaults, markers for which more than 50% of individuals were not genotyped. +The `mark_prop()` function can be used to filter markers depending on the proportion of each genotype. Here, we have a F2 so we can use the "homo" argument in order to filter depending on the proportion of both homozygous genotype. If we have a N2, we can filter with the proportion of homozygous individuals with the "homo" argument and of heterozygous individuals with the hetero" argument. Moreover, this function allows to filter marker depending on the proportion on non genotyped animals. By defaults, markers for which more than 50% of individuals were not genotyped. ```{r mark_prop ex} tab2 <- mark_prop(tab2,cross="F2",homo=0.1,hetero=0.1) head(tab2) ``` -Last, we can use the mark_allele(). This very helpful function excludes markers for which the alleles found in the F2/N2 individuals do not correspond to the alleles found in the parental strains. For example, if for a marker is not polymorphic in the parental strains but we found two alleles in the F2/N2 individuals, it will be excluded. +Last, we can use the `mark_allele()` function. This very helpful function excludes markers for which the alleles found in the F2/N2 individuals do not correspond to the alleles found in the parental strains. For example, if for a marker is not polymorphic in the parental strains but we found two alleles in the F2/N2 individuals, it will be excluded. ```{r mark_allele} tab2 <- mark_allele(tab=tab2,ref=strains,par1="parent1",par2="parent2") tab2 %>% arrange(desc(exclude_allele)) %>% head() ``` -Indeed, we can see that the markers excluded with mark_allele() have different alleles in the parental strains. +Indeed, we can see that the markers excluded with `mark_allele()` have different alleles in the parental strains. ```{r mark_allele-strains} strains %>% filter(marker %in% c("gJAX00038569","gJAX00425031","gUNC12245354","gUNC15530876","gUNC21555204","gUNC21596600")) %>% arrange(marker) %>% select(marker,parent1,parent2) @@ -124,7 +131,7 @@ strains %>% filter(marker %in% c("gJAX00038569","gJAX00425031","gUNC12245354","g # Creation of the R/qtl file -After excluding the problematic markers, we can create the R/qtl file. The individuals must have the same ID in the geno and in the pheno file. If there is a prefix in the geno file that must be removed in order to acheive this, you can use the "prefix" argument. The "path" argument can be used in order to create a CSV file that you can laod with qtl::read.cross. +After excluding the problematic markers, we can create the R/qtl file. The individuals must have the same ID in the geno and in the pheno file. If there is a prefix in the geno file that must be removed in order to acheive this, you can use the "prefix" argument. The "path" argument can be used in order to create a CSV file that you can laod with `qtl::read.cross`. ```{r write_qtl} rqtl_file <- write_rqtl(geno=genos,pheno=phenos,tab=tab2,ref=strains,par1="parent1",par2="parent2",prefix="ind_",pos="cM_cox") diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/9E3EC0FD b/.Rproj.user/9DAE6990/sources/s-39B546A6/9E3EC0FD deleted file mode 100644 index 19d6a0284a4f80e652f82039e649457b4cb04245..0000000000000000000000000000000000000000 --- a/.Rproj.user/9DAE6990/sources/s-39B546A6/9E3EC0FD +++ /dev/null @@ -1,23 +0,0 @@ -{ - "collab_server" : "", - "contents" : "", - "created" : 1622622953119.000, - "dirty" : false, - "encoding" : "UTF-8", - "folds" : "", - "hash" : "2767839831", - "id" : "9E3EC0FD", - "lastKnownWriteTime" : 1622539469, - "last_content_update" : 1622539469, - "path" : "~/stuart_package/stuart/NAMESPACE", - "project_path" : "NAMESPACE", - "properties" : { - }, - "read_only" : true, - "read_only_alternatives" : [ - ], - "relative_order" : 8, - "source_on_save" : false, - "source_window" : "", - "type" : "r_namespace" -} \ No newline at end of file diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/A6A5703A b/.Rproj.user/9DAE6990/sources/s-39B546A6/A6A5703A new file mode 100644 index 0000000000000000000000000000000000000000..f1dd731e6dca27456679b529e0b091df748a97ca --- /dev/null +++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/A6A5703A @@ -0,0 +1,25 @@ +{ + "collab_server" : "", + "contents" : "", + "created" : 1622641730533.000, + "dirty" : false, + "encoding" : "UTF-8", + "folds" : "", + "hash" : "953998608", + "id" : "A6A5703A", + "lastKnownWriteTime" : 1622641746, + "last_content_update" : 1622641746302, + "path" : "~/stuart_package/stuart/R/stuart_tab-data.R", + "project_path" : "R/stuart_tab-data.R", + "properties" : { + "cursorPosition" : "8,12", + "scrollLine" : "0" + }, + "read_only" : false, + "read_only_alternatives" : [ + ], + "relative_order" : 3, + "source_on_save" : false, + "source_window" : "", + "type" : "r_source" +} \ No newline at end of file diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/A6A5703A-contents b/.Rproj.user/9DAE6990/sources/s-39B546A6/A6A5703A-contents new file mode 100644 index 0000000000000000000000000000000000000000..fcb71e4035488add00e88581add3dd5e39109914 --- /dev/null +++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/A6A5703A-contents @@ -0,0 +1,17 @@ +#' Output of tab_mark function +#' +#' A dataset with the output of tab_mark() function. +#' +#' @format A data frame with 11125 rows and 7 variables +#' \describe{ +#' \item{marker}{name of the marker} +#' \item{allele_1}{first allele of the marker} +#' \item{allele_2}{second allele of the marker} +#' \item{n_HM1}{number of homozygous individuals for the first allele} +#' \item{n_HM2}{number of homozygous individuals for the second allele} +#' \item{n_HT}{number of heterozygous individuals} +#' \item{n_NA}{number of non genotyped individuals} +#' } + + +"stuart_tab" diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/B86C81D8 b/.Rproj.user/9DAE6990/sources/s-39B546A6/B86C81D8 new file mode 100644 index 0000000000000000000000000000000000000000..66bd8e587e07c451e45a10606ba2cf11b8bd1fa6 --- /dev/null +++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/B86C81D8 @@ -0,0 +1,25 @@ +{ + "collab_server" : "", + "contents" : "", + "created" : 1622636135728.000, + "dirty" : false, + "encoding" : "UTF-8", + "folds" : "", + "hash" : "0", + "id" : "B86C81D8", + "lastKnownWriteTime" : 1622646597, + "last_content_update" : 1622646597951, + "path" : "~/stuart_package/stuart/R/mark_allele.R", + "project_path" : "R/mark_allele.R", + "properties" : { + "cursorPosition" : "23,55", + "scrollLine" : "0" + }, + "read_only" : false, + "read_only_alternatives" : [ + ], + "relative_order" : 10, + "source_on_save" : false, + "source_window" : "", + "type" : "r_source" +} \ No newline at end of file diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/B86C81D8-contents b/.Rproj.user/9DAE6990/sources/s-39B546A6/B86C81D8-contents new file mode 100644 index 0000000000000000000000000000000000000000..f83d3ac4ea31d973b58101efc2b7f0862fb8714a --- /dev/null +++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/B86C81D8-contents @@ -0,0 +1,46 @@ +#' @title Exclude markers that have different alleles in the individuals of the cross and in parental strains +#' +#' @description This functions uses the dataframe produced by the tab_mark function and fills the "exclude" column for all the markers which have alleles observed in the individuals of the cross that do not correspond to the alleles observed in the parental strains. For example, a marker which is not polymorphic between the two parental strains but which has two alleles in the cross individuals will be excluded. +#' @param tab data frame obtained with tab_mark function +#' @param ref data frame with the reference genotypes of mouse lines +#' @param par1 first parental strain used in the cross, the name must be written as in the "ref" data frame +#' @param par2 second parental strain used in the cross, the name must be written as in the "ref" data frame +#' +#' @import dplyr +#' +#' @export +#' +mark_allele <- function(tab,ref,par1,par2){ + + #markers of ref df as characters + ref$marker <- as.character(ref$marker) + colnames(ref) <- make.names(colnames(ref)) + + #recode parents' names to match column names nomenclature + par1 <- make.names(par1) + par2 <- make.names(par2) + + #join tab and ref genotypes + ref <- ref %>% select(marker,!!sym(par1),!!sym(par2)) + tab <- full_join(tab,ref,by=c("marker"="marker")) + + #function core + tab <- tab %>% mutate(exclude_allele = case_when(is.na(allele_2)==FALSE & + !!sym(par1) != "N" & !!sym(par2) != "N" & !!sym(par1) != "H" & !!sym(par2) != "H" & + ((allele_1!=!!sym(par1) & allele_1!=!!sym(par2)) | (allele_2!=!!sym(par1) & allele_2!=!!sym(par2))) ~ 1, + is.na(allele_2)==FALSE & + (!!sym(par1)=="N" | !!sym(par2)=="N" | !!sym(par1)=="H" | !!sym(par2)=="H") & + ((allele_1!=!!sym(par1) & allele_1!=!!sym(par2)) & (allele_2!=!!sym(par1) & allele_2!=!!sym(par2))) ~ 1, + is.na(allele_2)==TRUE & + !!sym(par1) != "N" & !!sym(par2) != "N" & !!sym(par1) != "H" & !!sym(par2) != "H" & + (allele_1!=!!sym(par1) | allele_1!=!!sym(par2)) ~ 1, + is.na(allele_2)==TRUE & + (!!sym(par1)=="N" | !!sym(par2)=="N" | !!sym(par1)=="H" | !!sym(par2)=="H") & + allele_1!=!!sym(par1) & allele_1!=!!sym(par2) ~ 1, + T ~ 0) + ) + + tab <- tab %>% select(-c(!!sym(par1),!!sym(par2))) + + return(tab) +} diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/C2CE7FEA-contents b/.Rproj.user/9DAE6990/sources/s-39B546A6/C2CE7FEA-contents new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/C5228C18 b/.Rproj.user/9DAE6990/sources/s-39B546A6/C5228C18 index 117fe9108f5fc30ed9cf2f206ee651fb7bbbdea7..57f6d66cd07d3b881fe3dcc18d07bd201c027b9d 100644 --- a/.Rproj.user/9DAE6990/sources/s-39B546A6/C5228C18 +++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/C5228C18 @@ -5,18 +5,20 @@ "dirty" : false, "encoding" : "UTF-8", "folds" : "", - "hash" : "1539229739", + "hash" : "0", "id" : "C5228C18", - "lastKnownWriteTime" : 1622462353, - "last_content_update" : 1622462353, + "lastKnownWriteTime" : 1622645200, + "last_content_update" : 1622645200474, "path" : "~/stuart_package/stuart/R/mark_poly.R", "project_path" : "R/mark_poly.R", "properties" : { + "cursorPosition" : "9,51", + "scrollLine" : "0" }, "read_only" : false, "read_only_alternatives" : [ ], - "relative_order" : 7, + "relative_order" : 8, "source_on_save" : false, "source_window" : "", "type" : "r_source" diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/C5228C18-contents b/.Rproj.user/9DAE6990/sources/s-39B546A6/C5228C18-contents index 168ccf3c35f28348477d6700238dc487666aae6a..170f601676934dbc84ae76f5dca7aa9967547b78 100644 --- a/.Rproj.user/9DAE6990/sources/s-39B546A6/C5228C18-contents +++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/C5228C18-contents @@ -7,7 +7,7 @@ #' #' @export mark_poly <- function(tab){ - return(tab %>% mutate(exclude_poly=ifelse(is.na(Allele_2)==TRUE, + return(tab %>% mutate(exclude_poly=ifelse(is.na(allele_2)==TRUE, 1, 0))) } diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/C6286151-contents b/.Rproj.user/9DAE6990/sources/s-39B546A6/C6286151-contents new file mode 100644 index 0000000000000000000000000000000000000000..0035346d6c1f1b76d77d1af4868a3329f2545064 --- /dev/null +++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/C6286151-contents @@ -0,0 +1,56 @@ +## ---- include = FALSE--------------------------------------------------------- +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) + +## ----setup-------------------------------------------------------------------- +library(dplyr) +library(stuart) + +## ----annot-------------------------------------------------------------------- +annot_mini <- read.csv(url("https://raw.githubusercontent.com/kbroman/MUGAarrays/master/UWisc/mini_uwisc_v2.csv")) + +## ----load--------------------------------------------------------------------- +data(genos) +summary(genos) +data(phenos) +summary(phenos) + +## ----strains------------------------------------------------------------------ +strains <- geno_strains(ref=annot_mini,geno=genos,par1=c("StrainsA_1","StrainsA_2"),par2=c("StrainsB_1","StrainsB_2"),name1="parent1",name2="parent2") +head(strains) + +## ----no_parent---------------------------------------------------------------- +genos <- genos %>% filter(!Sample.ID %in% c("StrainsA_1", "StrainsA_2", "StrainsB_1","StrainsB_2")) + +## ----tab_mark----------------------------------------------------------------- +data(stuart_tab) +summary(stuart_tab) + +## ----mark_match--------------------------------------------------------------- +tab2 <- mark_match(stuart_tab,ref=strains) + + +tab2 %>% filter(exclude_match==1) + +## ----mark_poly ex------------------------------------------------------------- +tab2 <- mark_poly(tab2) +head(tab2) + +## ----mark_prop ex------------------------------------------------------------- +tab2 <- mark_prop(tab2,cross="F2",homo=0.1,hetero=0.1) +head(tab2) + +## ----mark_allele-------------------------------------------------------------- +tab2 <- mark_allele(tab=tab2,ref=strains,par1="parent1",par2="parent2") +tab2 %>% arrange(desc(exclude_allele)) %>% head() + +## ----mark_allele-strains------------------------------------------------------ +strains %>% filter(marker %in% c("gJAX00038569","gJAX00425031","gUNC12245354","gUNC15530876","gUNC21555204","gUNC21596600")) %>% arrange(marker) %>% select(marker,parent1,parent2) + +## ----write_qtl---------------------------------------------------------------- +rqtl_file <- write_rqtl(geno=genos,pheno=phenos,tab=tab2,ref=strains,par1="parent1",par2="parent2",prefix="ind_",pos="cM_cox") + +rqtl_file[1:10,1:7] + diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/C81C94E6 b/.Rproj.user/9DAE6990/sources/s-39B546A6/C81C94E6 index 1372ca65acf3f5a950452689a001c8527c71eefb..bd85e7ad7d4b50c442dac33ba1e74b7ccfc79637 100644 --- a/.Rproj.user/9DAE6990/sources/s-39B546A6/C81C94E6 +++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/C81C94E6 @@ -5,7 +5,7 @@ "dirty" : false, "encoding" : "UTF-8", "folds" : "", - "hash" : "0", + "hash" : "1425873394", "id" : "C81C94E6", "lastKnownWriteTime" : 1622623114, "last_content_update" : 1622623114332, @@ -18,7 +18,7 @@ "read_only" : false, "read_only_alternatives" : [ ], - "relative_order" : 9, + "relative_order" : 12, "source_on_save" : false, "source_window" : "", "type" : "dcf" diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/D49EE59C b/.Rproj.user/9DAE6990/sources/s-39B546A6/D49EE59C deleted file mode 100644 index 1351a95a5c791e2a51e36a6271a6addd1c959c84..0000000000000000000000000000000000000000 --- a/.Rproj.user/9DAE6990/sources/s-39B546A6/D49EE59C +++ /dev/null @@ -1,33 +0,0 @@ -{ - "collab_server" : "", - "contents" : "", - "created" : 1622538242843.000, - "dirty" : false, - "encoding" : "", - "folds" : "", - "hash" : "0", - "id" : "D49EE59C", - "lastKnownWriteTime" : 140551864679664, - "last_content_update" : 1622538242843, - "path" : null, - "project_path" : null, - "properties" : { - "cacheKey" : "4C070D0B", - "caption" : "genos", - "contentUrl" : "grid_resource/gridviewer.html?env=&obj=genos&cache_key=4C070D0B", - "displayedObservations" : 1957993, - "environment" : "", - "expression" : "genos", - "object" : "genos", - "preview" : 0, - "totalObservations" : 1957993, - "variables" : 11 - }, - "read_only" : false, - "read_only_alternatives" : [ - ], - "relative_order" : 2, - "source_on_save" : false, - "source_window" : "", - "type" : "r_dataframe" -} \ No newline at end of file diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/E7584E4F-contents b/.Rproj.user/9DAE6990/sources/s-39B546A6/E7584E4F-contents new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.Rproj.user/9DAE6990/viewer-cache/4C070D0B.Rdata b/.Rproj.user/9DAE6990/viewer-cache/4C070D0B.Rdata deleted file mode 100644 index c7f59bfdcb70ea156f7c5abe0a7f47d4bdd858f8..0000000000000000000000000000000000000000 Binary files a/.Rproj.user/9DAE6990/viewer-cache/4C070D0B.Rdata and /dev/null differ diff --git a/.Rproj.user/9DAE6990/viewer-cache/6294E01A.Rdata b/.Rproj.user/9DAE6990/viewer-cache/6294E01A.Rdata deleted file mode 100644 index 6d9cbeb3f64ff88d5fcca84cc5ff97afd9cbb75f..0000000000000000000000000000000000000000 Binary files a/.Rproj.user/9DAE6990/viewer-cache/6294E01A.Rdata and /dev/null differ diff --git a/.Rproj.user/9DAE6990/viewer-cache/634A6953.Rdata b/.Rproj.user/9DAE6990/viewer-cache/634A6953.Rdata deleted file mode 100644 index ad0ad30a49aa244ba94b04ed2893c0739e7226ae..0000000000000000000000000000000000000000 Binary files a/.Rproj.user/9DAE6990/viewer-cache/634A6953.Rdata and /dev/null differ diff --git a/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/9DAE699039B546A6/chunks.json b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/9DAE699039B546A6/chunks.json index 131e086e4826cd672b68c9364909576dad9ae87b..d8a6599efd36ca83763410009f57390ee063abbf 100644 --- a/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/9DAE699039B546A6/chunks.json +++ b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/9DAE699039B546A6/chunks.json @@ -1 +1 @@ -{"chunk_definitions":[{"chunk_id":"cugiprfbptcaw","chunk_label":"unnamed-chunk-1","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","include":false,"label":"unnamed-chunk-1"},"row":15,"row_count":1,"visible":true},{"chunk_id":"csetup_chunk","chunk_label":"setup","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","include":false,"label":"setup"},"row":31,"row_count":1,"visible":true},{"chunk_id":"ct8u35p5h48pa","chunk_label":"annot","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"annot"},"row":44,"row_count":1,"visible":true},{"chunk_id":"cystga685ux9r","chunk_label":"load","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"load"},"row":53,"row_count":1,"visible":true},{"chunk_id":"cc71rfo54vvou","chunk_label":"strains","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"strains"},"row":66,"row_count":1,"visible":true},{"chunk_id":"coar8mvardv1z","chunk_label":"no_parent","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"no_parent"},"row":71,"row_count":1,"visible":true},{"chunk_id":"cyqo4jk1414tp","chunk_label":"tab_mark","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"tab_mark"},"row":84,"row_count":1,"visible":true},{"chunk_id":"ci1zg9xosgth8","chunk_label":"mark_match","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_match"},"row":93,"row_count":1,"visible":true},{"chunk_id":"c4j6ei29p4187","chunk_label":"mark_poly ex","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_poly ex"},"row":102,"row_count":1,"visible":true},{"chunk_id":"cndnl4vh4xyj8","chunk_label":"mark_prop ex","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_prop ex"},"row":109,"row_count":1,"visible":true},{"chunk_id":"cgrscnwnyajvi","chunk_label":"mark_allele","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_allele"},"row":116,"row_count":1,"visible":true},{"chunk_id":"c45rvmci4gaoy","chunk_label":"mark_allele-strains","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_allele-strains"},"row":122,"row_count":1,"visible":true},{"chunk_id":"cv6d9nrsrzqfm","chunk_label":"write_qtl","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"write_qtl"},"row":132,"row_count":1,"visible":true}],"default_chunk_options":{},"doc_write_time":1622538645,"working_dir":null} \ No newline at end of file +{"chunk_definitions":[{"chunk_id":"cugiprfbptcaw","chunk_label":"unnamed-chunk-1","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","include":false,"label":"unnamed-chunk-1"},"row":15,"row_count":1,"visible":true},{"chunk_id":"csetup_chunk","chunk_label":"setup","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","include":false,"label":"setup"},"row":31,"row_count":1,"visible":true},{"chunk_id":"cvmghh26lmpwd","chunk_label":"annot","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"annot"},"row":45,"row_count":1,"visible":true},{"chunk_id":"cystga685ux9r","chunk_label":"load","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"load"},"row":60,"row_count":1,"visible":true},{"chunk_id":"cc71rfo54vvou","chunk_label":"strains","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"strains"},"row":73,"row_count":1,"visible":true},{"chunk_id":"coar8mvardv1z","chunk_label":"no_parent","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"no_parent"},"row":78,"row_count":1,"visible":true},{"chunk_id":"cyqo4jk1414tp","chunk_label":"tab_mark","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"tab_mark"},"row":91,"row_count":1,"visible":true},{"chunk_id":"ci1zg9xosgth8","chunk_label":"mark_match","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_match"},"row":100,"row_count":1,"visible":true},{"chunk_id":"c4j6ei29p4187","chunk_label":"mark_poly ex","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_poly ex"},"row":109,"row_count":1,"visible":true},{"chunk_id":"cndnl4vh4xyj8","chunk_label":"mark_prop ex","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_prop ex"},"row":116,"row_count":1,"visible":true},{"chunk_id":"cgrscnwnyajvi","chunk_label":"mark_allele","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_allele"},"row":123,"row_count":1,"visible":true},{"chunk_id":"c45rvmci4gaoy","chunk_label":"mark_allele-strains","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_allele-strains"},"row":129,"row_count":1,"visible":true},{"chunk_id":"cv6d9nrsrzqfm","chunk_label":"write_qtl","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"write_qtl"},"row":139,"row_count":1,"visible":true}],"default_chunk_options":{},"doc_write_time":1622642681,"working_dir":null} \ No newline at end of file diff --git a/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cc71rfo54vvou/000012.csv b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cc71rfo54vvou/000014.csv similarity index 100% rename from .Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cc71rfo54vvou/000012.csv rename to .Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cc71rfo54vvou/000014.csv diff --git a/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cc71rfo54vvou/000013.metadata b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cc71rfo54vvou/000015.metadata similarity index 100% rename from .Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cc71rfo54vvou/000013.metadata rename to .Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cc71rfo54vvou/000015.metadata diff --git a/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cc71rfo54vvou/000013.rdf b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cc71rfo54vvou/000015.rdf similarity index 100% rename from .Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cc71rfo54vvou/000013.rdf rename to .Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cc71rfo54vvou/000015.rdf diff --git a/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/chunks.json b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/chunks.json index e3364a47c8c6f23c449ad55269d52ead3facee93..d8a6599efd36ca83763410009f57390ee063abbf 100644 --- a/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/chunks.json +++ b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/chunks.json @@ -1 +1 @@ -{"chunk_definitions":[{"chunk_id":"cugiprfbptcaw","chunk_label":"unnamed-chunk-1","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","include":false,"label":"unnamed-chunk-1"},"row":15,"row_count":1,"visible":true},{"chunk_id":"csetup_chunk","chunk_label":"setup","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","include":false,"label":"setup"},"row":31,"row_count":1,"visible":true},{"chunk_id":"ct8u35p5h48pa","chunk_label":"annot","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"annot"},"row":44,"row_count":1,"visible":true},{"chunk_id":"cystga685ux9r","chunk_label":"load","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"load"},"row":53,"row_count":1,"visible":true},{"chunk_id":"cc71rfo54vvou","chunk_label":"strains","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"strains"},"row":66,"row_count":1,"visible":true},{"chunk_id":"coar8mvardv1z","chunk_label":"no_parent","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"no_parent"},"row":71,"row_count":1,"visible":true},{"chunk_id":"cyqo4jk1414tp","chunk_label":"tab_mark","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"tab_mark"},"row":84,"row_count":1,"visible":true},{"chunk_id":"ci1zg9xosgth8","chunk_label":"mark_match","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_match"},"row":93,"row_count":1,"visible":true},{"chunk_id":"c4j6ei29p4187","chunk_label":"mark_poly ex","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_poly ex"},"row":102,"row_count":1,"visible":true},{"chunk_id":"cndnl4vh4xyj8","chunk_label":"mark_prop ex","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_prop ex"},"row":109,"row_count":1,"visible":true},{"chunk_id":"cgrscnwnyajvi","chunk_label":"mark_allele","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_allele"},"row":116,"row_count":1,"visible":true},{"chunk_id":"c45rvmci4gaoy","chunk_label":"mark_allele-strains","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_allele-strains"},"row":122,"row_count":1,"visible":true},{"chunk_id":"cv6d9nrsrzqfm","chunk_label":"write_qtl","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"write_qtl"},"row":132,"row_count":1,"visible":true}],"doc_write_time":1622538645} \ No newline at end of file +{"chunk_definitions":[{"chunk_id":"cugiprfbptcaw","chunk_label":"unnamed-chunk-1","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","include":false,"label":"unnamed-chunk-1"},"row":15,"row_count":1,"visible":true},{"chunk_id":"csetup_chunk","chunk_label":"setup","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","include":false,"label":"setup"},"row":31,"row_count":1,"visible":true},{"chunk_id":"cvmghh26lmpwd","chunk_label":"annot","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"annot"},"row":45,"row_count":1,"visible":true},{"chunk_id":"cystga685ux9r","chunk_label":"load","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"load"},"row":60,"row_count":1,"visible":true},{"chunk_id":"cc71rfo54vvou","chunk_label":"strains","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"strains"},"row":73,"row_count":1,"visible":true},{"chunk_id":"coar8mvardv1z","chunk_label":"no_parent","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"no_parent"},"row":78,"row_count":1,"visible":true},{"chunk_id":"cyqo4jk1414tp","chunk_label":"tab_mark","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"tab_mark"},"row":91,"row_count":1,"visible":true},{"chunk_id":"ci1zg9xosgth8","chunk_label":"mark_match","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_match"},"row":100,"row_count":1,"visible":true},{"chunk_id":"c4j6ei29p4187","chunk_label":"mark_poly ex","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_poly ex"},"row":109,"row_count":1,"visible":true},{"chunk_id":"cndnl4vh4xyj8","chunk_label":"mark_prop ex","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_prop ex"},"row":116,"row_count":1,"visible":true},{"chunk_id":"cgrscnwnyajvi","chunk_label":"mark_allele","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_allele"},"row":123,"row_count":1,"visible":true},{"chunk_id":"c45rvmci4gaoy","chunk_label":"mark_allele-strains","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_allele-strains"},"row":129,"row_count":1,"visible":true},{"chunk_id":"cv6d9nrsrzqfm","chunk_label":"write_qtl","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"write_qtl"},"row":139,"row_count":1,"visible":true}],"default_chunk_options":{},"doc_write_time":1622642681,"working_dir":null} \ No newline at end of file diff --git a/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/ct8u35p5h48pa/000008.csv b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/ct8u35p5h48pa/000008.csv deleted file mode 100644 index e9239946eb49c9294b861127284a8f387ff55d1b..0000000000000000000000000000000000000000 --- a/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/ct8u35p5h48pa/000008.csv +++ /dev/null @@ -1,22 +0,0 @@ -"0","annot_mini <- read.csv(url(""https://raw.githubusercontent.com/kbroman/MUGAarrays/master/UWisc/mini_uwisc_v2.csv"")) -" -"2","Warning messages: -" -"2","1: " -"2","In doTryCatch(return(expr), name, parentenv, handler) :" -"2"," - " -"2"," fermeture de la connexion inutilisée 5 (https://raw.githubusercontent.com/kbroman/MUGAarrays/master/UWisc/mini_uwisc_v2.csv) -" -"2","2: " -"2","In doTryCatch(return(expr), name, parentenv, handler) :" -"2"," - " -"2"," fermeture de la connexion inutilisée 4 (https://raw.githubusercontent.com/kbroman/MUGAarrays/master/UWisc/mini_uwisc_v2.csv) -" -"2","3: " -"2","In doTryCatch(return(expr), name, parentenv, handler) :" -"2"," - " -"2"," fermeture de la connexion inutilisée 3 (https://raw.githubusercontent.com/kbroman/MUGAarrays/master/UWisc/mini_uwisc_v2.csv) -" diff --git a/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cvmghh26lmpwd/000002.csv b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cvmghh26lmpwd/000002.csv new file mode 100644 index 0000000000000000000000000000000000000000..d60d661c6e52cccece20691efd360d25d764c5b0 --- /dev/null +++ b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cvmghh26lmpwd/000002.csv @@ -0,0 +1 @@ +"0","annot_mini <- read.csv(url(""https://raw.githubusercontent.com/kbroman/MUGAarrays/master/UWisc/mini_uwisc_v2.csv""))" diff --git a/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cyqo4jk1414tp/000002.csv b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cyqo4jk1414tp/000002.csv deleted file mode 100644 index 013157e7e1147f09fc88225a6e3fea3880398082..0000000000000000000000000000000000000000 --- a/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cyqo4jk1414tp/000002.csv +++ /dev/null @@ -1,66 +0,0 @@ -"0","data(stuart_tab)" -"0","summary(stuart_tab)" -"1","" -"1"," SNP.Name " -"1"," Allele_1 " -"1"," Allele_2 " -"1"," n_HM1 " -"1"," n_HM2 " -"1"," n_HT " -"1"," n_NA " -"1"," -" -"1"," Length:11125 " -"1"," Length:11125 " -"1"," Length:11125 " -"1"," Min. : 0.0 " -"1"," Min. : 0.00 " -"1"," Min. : 0.00 " -"1"," Min. : 0.00 " -"1"," -" -"1"," Class :character " -"1"," Class :character " -"1"," Class :character " -"1"," 1st Qu.: 44.0 " -"1"," 1st Qu.: 0.00 " -"1"," 1st Qu.: 0.00 " -"1"," 1st Qu.: 0.00 " -"1"," -" -"1"," Mode :character " -"1"," Mode :character " -"1"," Mode :character " -"1"," Median :174.0 " -"1"," Median : 0.00 " -"1"," Median : 0.00 " -"1"," Median : 1.00 " -"1"," -" -"1"," " -"1"," " -"1"," " -"1"," Mean :123.9 " -"1"," Mean : 19.92 " -"1"," Mean : 19.24 " -"1"," Mean : 12.91 " -"1"," -" -"1"," " -"1"," " -"1"," " -"1"," 3rd Qu.:176.0 " -"1"," 3rd Qu.: 34.00 " -"1"," 3rd Qu.: 5.00 " -"1"," 3rd Qu.: 5.00 " -"1"," -" -"1"," " -"1"," " -"1"," " -"1"," Max. :176.0 " -"1"," Max. :175.00 " -"1"," Max. :176.00 " -"1"," Max. :176.00 " -"1"," -" diff --git a/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cyqo4jk1414tp/000005.csv b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cyqo4jk1414tp/000005.csv new file mode 100644 index 0000000000000000000000000000000000000000..8f499aea3ff86e63d5beeafd589aa8cf86622e3a --- /dev/null +++ b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cyqo4jk1414tp/000005.csv @@ -0,0 +1,4 @@ +"0","tab <- mark_tab(genos) +" +"2","Error in mark_tab(genos) : impossible de trouver la fonction ""mark_tab"" +" diff --git a/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cyqo4jk1414tp/000006.error b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cyqo4jk1414tp/000006.error new file mode 100644 index 0000000000000000000000000000000000000000..3aaf4b2b5b4ec1243e367bb27f235ba6ebf71da0 --- /dev/null +++ b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cyqo4jk1414tp/000006.error @@ -0,0 +1 @@ +{"frames":[],"message":"Error in mark_tab(genos) : impossible de trouver la fonction \"mark_tab\"\n"} \ No newline at end of file diff --git a/.Rproj.user/shared/notebooks/paths b/.Rproj.user/shared/notebooks/paths index f9ea644cfa136d27e66b59656b2c94c427126af1..7779f200af63a93c07e730a2f2b07fa1992b2f1b 100644 --- a/.Rproj.user/shared/notebooks/paths +++ b/.Rproj.user/shared/notebooks/paths @@ -1 +1,6 @@ +/Users/mariebourdon/stuart_package/stuart/NAMESPACE="6A276B5" +/Users/mariebourdon/stuart_package/stuart/R/geno_strains.R="1F9B28F5" +/Users/mariebourdon/stuart_package/stuart/R/genos-data.R="9943E26B" +/Users/mariebourdon/stuart_package/stuart/R/tab_mark.R="DEC9867F" +/Users/mariebourdon/stuart_package/stuart/doc/stuaRt.R="E6241391" /Users/mariebourdon/stuart_package/stuart/vignettes/stuaRt.Rmd="4D49CCFD" diff --git a/R/geno_strains.R b/R/geno_strains.R index a0ac113461536b3410adb9bc4e176c06600024a6..d4104d0f3af4780745e4939ce649d7e90e8c995e 100755 --- a/R/geno_strains.R +++ b/R/geno_strains.R @@ -16,14 +16,21 @@ #' @export #' geno_strains <- function(ref,geno,par1,par2,name1,name2){ + #rename df columns + geno <- geno %>% rename("marker"=1, + "id"=2, + "allele_1"=3, + "allele_2"=4) + + #recode genotypes from 2 alleles to 1 geno <- geno %>% mutate_all(as.character) - geno <- geno %>% filter(Sample.ID %in% c(par1,par2)) - geno <- geno %>% mutate(Geno=case_when(Allele1...Forward == "-" | Allele2...Forward == "-" ~ "N", - Allele1...Forward == Allele2...Forward ~ Allele1...Forward, - Allele1...Forward %in% c("A","T","G","C") & Allele2...Forward %in% c("A","T","G","C") ~ "H")) + geno <- geno %>% filter(id %in% c(par1,par2)) + geno <- geno %>% mutate(Geno=case_when(allele_1 == "-" | allele_2 == "-" ~ "N", + allele_1 == allele_2 ~ allele_1, + allele_1 %in% c("A","T","G","C") & allele_2 %in% c("A","T","G","C") ~ "H")) - geno <- geno %>% select(SNP.Name,Sample.ID,Geno) %>% pivot_wider(names_from = Sample.ID, values_from = Geno) + geno <- geno %>% select(marker,id,Geno) %>% pivot_wider(names_from = id, values_from = Geno) #create consensus @@ -39,10 +46,10 @@ geno_strains <- function(ref,geno,par1,par2,name1,name2){ geno <- geno %>% rename(parent2=!!sym(par2[1])) } - geno <- geno %>% select(SNP.Name,parent1,parent2) - colnames(geno) <- c("SNP.Name",name1,name2) + geno <- geno %>% select(marker,parent1,parent2) + colnames(geno) <- c("marker",name1,name2) #merge with ref file - ref <- full_join(ref,geno,by=c("marker"="SNP.Name")) + ref <- full_join(ref,geno,by=c("marker"="marker")) return(ref) } diff --git a/R/mark_allele.R b/R/mark_allele.R index b679916993328de1dfd7b34b3325bf8066424b8e..f83d3ac4ea31d973b58101efc2b7f0862fb8714a 100755 --- a/R/mark_allele.R +++ b/R/mark_allele.R @@ -22,21 +22,21 @@ mark_allele <- function(tab,ref,par1,par2){ #join tab and ref genotypes ref <- ref %>% select(marker,!!sym(par1),!!sym(par2)) - tab <- full_join(tab,ref,by=c("SNP.Name"="marker")) + tab <- full_join(tab,ref,by=c("marker"="marker")) #function core - tab <- tab %>% mutate(exclude_allele = case_when(is.na(Allele_2)==FALSE & + tab <- tab %>% mutate(exclude_allele = case_when(is.na(allele_2)==FALSE & !!sym(par1) != "N" & !!sym(par2) != "N" & !!sym(par1) != "H" & !!sym(par2) != "H" & - ((Allele_1!=!!sym(par1) & Allele_1!=!!sym(par2)) | (Allele_2!=!!sym(par1) & Allele_2!=!!sym(par2))) ~ 1, - is.na(Allele_2)==FALSE & + ((allele_1!=!!sym(par1) & allele_1!=!!sym(par2)) | (allele_2!=!!sym(par1) & allele_2!=!!sym(par2))) ~ 1, + is.na(allele_2)==FALSE & (!!sym(par1)=="N" | !!sym(par2)=="N" | !!sym(par1)=="H" | !!sym(par2)=="H") & - ((Allele_1!=!!sym(par1) & Allele_1!=!!sym(par2)) & (Allele_2!=!!sym(par1) & Allele_2!=!!sym(par2))) ~ 1, - is.na(Allele_2)==TRUE & + ((allele_1!=!!sym(par1) & allele_1!=!!sym(par2)) & (allele_2!=!!sym(par1) & allele_2!=!!sym(par2))) ~ 1, + is.na(allele_2)==TRUE & !!sym(par1) != "N" & !!sym(par2) != "N" & !!sym(par1) != "H" & !!sym(par2) != "H" & - (Allele_1!=!!sym(par1) | Allele_1!=!!sym(par2)) ~ 1, - is.na(Allele_2)==TRUE & + (allele_1!=!!sym(par1) | allele_1!=!!sym(par2)) ~ 1, + is.na(allele_2)==TRUE & (!!sym(par1)=="N" | !!sym(par2)=="N" | !!sym(par1)=="H" | !!sym(par2)=="H") & - Allele_1!=!!sym(par1) & Allele_1!=!!sym(par2) ~ 1, + allele_1!=!!sym(par1) & allele_1!=!!sym(par2) ~ 1, T ~ 0) ) diff --git a/R/mark_match.R b/R/mark_match.R index 8781242a7568eb4081bcd0170f5ff17a049f2358..e1c38342708d568c92282dd02bf7938859b2d1d0 100755 --- a/R/mark_match.R +++ b/R/mark_match.R @@ -13,12 +13,12 @@ mark_match <- function(tab, #tab_mark df #finds SNPs that are in both files: snp_strains <- as.character(ref$marker) #extracts SNPs in strains ref geno file - snp_genfile <- as.character(tab$SNP.Name) #extracts SNPs in cross geno file + snp_genfile <- as.character(tab$marker) #extracts SNPs in cross geno file snp <- intersect(snp_strains,snp_genfile) #take intercept #add results in exclude column - return(tab %>% mutate(exclude_match=ifelse(!SNP.Name %in% snp, + return(tab %>% mutate(exclude_match=ifelse(!marker %in% snp, 1, 0))) diff --git a/R/mark_poly.R b/R/mark_poly.R index 168ccf3c35f28348477d6700238dc487666aae6a..170f601676934dbc84ae76f5dca7aa9967547b78 100755 --- a/R/mark_poly.R +++ b/R/mark_poly.R @@ -7,7 +7,7 @@ #' #' @export mark_poly <- function(tab){ - return(tab %>% mutate(exclude_poly=ifelse(is.na(Allele_2)==TRUE, + return(tab %>% mutate(exclude_poly=ifelse(is.na(allele_2)==TRUE, 1, 0))) } diff --git a/R/ref_strains_mini-data.R b/R/ref_strains_mini-data.R deleted file mode 100755 index 3fdffa9ee1a5665666e7c7b9c25aa92d3d144cbf..0000000000000000000000000000000000000000 --- a/R/ref_strains_mini-data.R +++ /dev/null @@ -1,29 +0,0 @@ -#' Data frame with miniMUGA genotyping of classical lab strains. -#' -#' A dataset containing the genotypes of 10 mouse strains of the Institut pasteur. Markers positions and other information are from by Karl Broman (https://kbroman.org/MUGAarrays/mini_revisited.html). Strains genotyped from Institut Pasteur. -#' -#' @format A data frame with 11299 rows and 18 variables -#' \describe{ -#' \item{CC001}{CC001 mouse strain} -#' \item{CC005}{CC005 mouse strain} -#' \item{CC042}{CC042 mouse strain} -#' \item{CC071}{CC071 mouse strain} -#' \item{Ifnar.KO.129}{Ifnar KO 129 mouse strain} -#' \item{Ifnar.KO.B6}{Ifnar KO B6 mouse strain} -#' \item{Rvfs2.1}{Rvfs2-1 mouse strain} -#' \item{Rvfs2.2}{Rvfs2-2 mouse strain} -#' \item{Rvfs2.6}{Rvfs2-6 mouse strain} -#' \item{Rvfs2.7}{Rvfs2-7 mouse strain} -#' \item{marker}{name of the marker} -#' \item{chr}{chromosome} -#' \item{bp_mm10}{localisation on chromosome in bp (mm10 assembly)} -#' \item{cM_cox}{localisation on chromosome in cM (from Cox et al.)} -#' \item{cM_g2f1}{localisation on chromosome in cM (from Liu et al.)} -#' \item{snp}{marker alleles} -#' \item{unique}{indicates if the marker maps uniquely on mm10} -#' \item{multi}{indicates if the marker maps more than one time on mm10} -#' \item{unmapped}{indicates if the marker does not map perfectly on mm10} -#' } - - -"ref_strains_mini" diff --git a/R/stuart_tab-data.R b/R/stuart_tab-data.R index a5e0d4fb13d058c8b8147662389812fa7c300f28..fcb71e4035488add00e88581add3dd5e39109914 100644 --- a/R/stuart_tab-data.R +++ b/R/stuart_tab-data.R @@ -4,9 +4,9 @@ #' #' @format A data frame with 11125 rows and 7 variables #' \describe{ -#' \item{SNP.Name}{name of the marker} -#' \item{Allele_1}{first allele of the marker} -#' \item{Allele_2}{second allele of the marker} +#' \item{marker}{name of the marker} +#' \item{allele_1}{first allele of the marker} +#' \item{allele_2}{second allele of the marker} #' \item{n_HM1}{number of homozygous individuals for the first allele} #' \item{n_HM2}{number of homozygous individuals for the second allele} #' \item{n_HT}{number of heterozygous individuals} diff --git a/R/tab_mark.R b/R/tab_mark.R index 612c27db930cb54d481e09a9a7ab5efd4912420f..95f4e184b34f860c2f5fee3e2cb8d4cf2fbc1fa8 100755 --- a/R/tab_mark.R +++ b/R/tab_mark.R @@ -12,8 +12,14 @@ #### tab_mark function #### ## create table with markers and counts tab_mark <- function(geno){ + #rename df columns + geno <- geno %>% rename("marker"=1, + "id"=2, + "allele_1"=3, + "allele_2"=4) + #create geno column in geno df - geno <- geno %>% unite(Geno,c("Allele1...Forward","Allele2...Forward"),sep="",remove=FALSE) + geno <- geno %>% unite(Geno,c("allele_1","allele_2"),sep="",remove=FALSE) #recode genotypes to have all heterozygous encoded the same way (ex: only "AT", no "TA") geno <- geno %>% mutate(Geno=recode(Geno, @@ -26,9 +32,9 @@ tab_mark <- function(geno){ #create df with counts for each genotype - df_count <- tibble(SNP.Name = as.character(unique(geno$SNP.Name)), - Allele_1 = NA, - Allele_2 = NA, + df_count <- tibble(marker = as.character(unique(geno$marker)), + allele_1 = NA, + allele_2 = NA, n_HM1 = NA, n_HM2 = NA, n_HT = NA, @@ -36,11 +42,11 @@ tab_mark <- function(geno){ ## loop to count genotype - for(i in df_count$SNP.Name){ + for(i in df_count$marker){ #extract alleles for each marker - Alleles <- geno %>% filter(SNP.Name==i) %>% - select(c(SNP.Name,Sample.ID,Geno,Allele1...Forward,Allele2...Forward)) %>% - pivot_longer(c(Allele1...Forward,Allele2...Forward),names_to="Allele_name",values_to="Allele") %>% + Alleles <- geno %>% filter(marker==i) %>% + select(c(marker,id,Geno,allele_1,allele_2)) %>% + pivot_longer(c(allele_1,allele_2),names_to="Allele_name",values_to="Allele") %>% distinct(Allele) %>% filter(Allele != "-") Alleles <- as.factor(paste(Alleles$Allele)) @@ -52,19 +58,19 @@ tab_mark <- function(geno){ if(all(rapportools::is.empty(Alleles))==FALSE){ #add alleles to df_count - df_count <- df_count %>% mutate(Allele_1 = ifelse(SNP.Name == i, - paste(Alleles[1]), Allele_1)) + df_count <- df_count %>% mutate(allele_1 = ifelse(marker == i, + paste(Alleles[1]), allele_1)) #count for homozygous for allele 1 - n1 <- geno %>% filter(SNP.Name==i) %>% + n1 <- geno %>% filter(marker==i) %>% filter(Geno == paste(Alleles[1],Alleles[1],sep="")) %>% summarise(n=n()) #add count for homozygous for allele 1 to df_count - df_count <- df_count %>% mutate(n_HM1 = ifelse(SNP.Name == i, + df_count <- df_count %>% mutate(n_HM1 = ifelse(marker == i, n1$n, n_HM1)) @@ -72,55 +78,55 @@ tab_mark <- function(geno){ #if marker not polymorphic if(is.na(Alleles[2])==TRUE){ - #NA as Allele_2 - df_count <- df_count %>% mutate(Allele_2 = ifelse(SNP.Name == i, - NA, Allele_2)) + #NA as allele_2 + df_count <- df_count %>% mutate(allele_2 = ifelse(marker == i, + NA, allele_2)) #NA as n_HM2 - df_count <- df_count %>% mutate(n_HM2 = ifelse(SNP.Name == i, + df_count <- df_count %>% mutate(n_HM2 = ifelse(marker == i, NA, n_HM2)) #NA as n_HT - df_count <- df_count %>% mutate(n_HT = ifelse(SNP.Name == i, + df_count <- df_count %>% mutate(n_HT = ifelse(marker == i, NA, n_HT)) } else { #add alleles to df_count - df_count <- df_count %>% mutate(Allele_2 = ifelse(SNP.Name == i, - paste(Alleles[2]), Allele_2)) + df_count <- df_count %>% mutate(allele_2 = ifelse(marker == i, + paste(Alleles[2]), allele_2)) #count for homozygous for allele 2 - n2 <- geno %>% filter(SNP.Name==i) %>% + n2 <- geno %>% filter(marker==i) %>% filter(Geno == paste(Alleles[2],Alleles[2],sep="")) %>% summarise(n=n()) #add count for homozygous for allele 1 to df_count - df_count <- df_count %>% mutate(n_HM2 = ifelse(SNP.Name == i, + df_count <- df_count %>% mutate(n_HM2 = ifelse(marker == i, n2$n, n_HM2)) #count for heterozygous - n3 <- geno %>% filter(SNP.Name==i) %>% + n3 <- geno %>% filter(marker==i) %>% filter(Geno == paste(Alleles[1],Alleles[2],sep="")) %>% summarise(n=n()) #add count for homozygous for allele 1 to df_count - df_count <- df_count %>% mutate(n_HT = ifelse(SNP.Name == i, + df_count <- df_count %>% mutate(n_HT = ifelse(marker == i, n3$n, n_HT)) } #count for NA - n4 <- geno %>% filter(SNP.Name==i) %>% + n4 <- geno %>% filter(marker==i) %>% filter(Geno == "--" | Geno == paste(Alleles[1],"-",sep="") | Geno == paste(Alleles[2],"-",sep="") | Geno == paste("-",Alleles[1],sep="") | Geno == paste("-",Alleles[2],sep="")) %>% summarise(n=n()) #add count for NA to df_count - df_count <- df_count %>% mutate(n_NA = ifelse(SNP.Name == i, + df_count <- df_count %>% mutate(n_NA = ifelse(marker == i, n4$n, n_NA)) } #change class of counts as numeric : diff --git a/R/write_rqtl.R b/R/write_rqtl.R index cbfc05505af72a38887f69cdef97707b5d8e8bf1..0926c5c3a44d009a46244ee5d52835afb1309978 100755 --- a/R/write_rqtl.R +++ b/R/write_rqtl.R @@ -21,6 +21,12 @@ #### write_rqtl #### ## write data frame in rqtl format (csv), if path != NA writes the file in the path indicated write_rqtl <- function(geno,pheno,tab,ref,par1,par2,prefix,pos,path=NA){ + #rename df columns + geno <- geno %>% rename("marker"=1, + "id"=2, + "allele_1"=3, + "allele_2"=4) + #extract snps non excluded if("exclude_match" %in% colnames(tab)){ tab <- tab %>% filter(exclude_match==0) @@ -40,7 +46,7 @@ write_rqtl <- function(geno,pheno,tab,ref,par1,par2,prefix,pos,path=NA){ #filter genotypes for non excluded markers in geno file - geno <- geno %>% select(c(SNP.Name,Sample.ID,Allele1...Forward,Allele2...Forward)) %>% filter(SNP.Name %in% tab$SNP.Name) + geno <- geno %>% select(c(marker,id,allele_1,allele_2)) %>% filter(marker %in% tab$marker) #recode parents' names to match column names nomenclature par1 <- make.names(par1) @@ -51,33 +57,33 @@ write_rqtl <- function(geno,pheno,tab,ref,par1,par2,prefix,pos,path=NA){ ref <- ref %>% select(marker,chr,!!sym(pos),!!sym(par1),!!sym(par2)) #merge genotypes with parents - geno <- left_join(geno,ref,by=c("SNP.Name"="marker")) + geno <- left_join(geno,ref,by=c("marker"="marker")) #recode "-" in "N" in geno file - geno <- geno %>% mutate(Allele1...Forward = recode(Allele1...Forward, + geno <- geno %>% mutate(allele_1 = recode(allele_1, "-" = "N")) - geno <- geno %>% mutate(Allele2...Forward = recode(Allele2...Forward, + geno <- geno %>% mutate(allele_2 = recode(allele_2, "-" = "N")) #recode geno in factors with same levels - geno <- geno %>% mutate(Allele1...Forward = factor(Allele1...Forward,levels=c("A","C","G","H","N","T"))) - geno <- geno %>% mutate(Allele2...Forward = factor(Allele2...Forward,levels=c("A","C","G","H","N","T"))) + geno <- geno %>% mutate(allele_1 = factor(allele_1,levels=c("A","C","G","H","N","T"))) + geno <- geno %>% mutate(allele_2 = factor(allele_2,levels=c("A","C","G","H","N","T"))) #recode genotypes depending on parents' genotypes geno <- geno %>% mutate(Geno = case_when( #if one allele not genotyped: - Allele1...Forward=="N" | Allele2...Forward=="N" ~ "NA", + allele_1=="N" | allele_2=="N" ~ "NA", #if both alleles genotyped ##homozygous 0 - Allele1...Forward==Allele2...Forward & Allele1...Forward==!!sym(par1) ~ "0", + allele_1==allele_2 & allele_1==!!sym(par1) ~ "0", ##homozygous 2 - Allele1...Forward==Allele2...Forward & Allele1...Forward==!!sym(par2) ~ "2", + allele_1==allele_2 & allele_1==!!sym(par2) ~ "2", ##heterozygous - Allele1...Forward!=Allele2...Forward ~ "1", + allele_1!=allele_2 ~ "1", #if parental strains are N/H ##homozygous for parent that is N/H @@ -92,33 +98,33 @@ write_rqtl <- function(geno,pheno,tab,ref,par1,par2,prefix,pos,path=NA){ #keep positions of markers - markers <- geno %>% select(SNP.Name,chr,!!sym(pos)) %>% distinct() + markers <- geno %>% select(marker,chr,!!sym(pos)) %>% distinct() markers <- markers %>% arrange(chr,!!sym(pos)) #keep only interesting columns in geno file geno <- geno %>% arrange(chr,!!sym(pos)) - geno <- geno %>% select(SNP.Name,Sample.ID,Geno) + geno <- geno %>% select(marker,id,Geno) #remove prefix - geno <- geno %>% mutate(Sample.ID=str_remove(Sample.ID,prefix)) + geno <- geno %>% mutate(id=str_remove(id,prefix)) #keep only non excluded markers and merge with positions - markers <- markers %>% mutate(SNP.Name=as.character(SNP.Name)) + markers <- markers %>% mutate(marker=as.character(marker)) markers <- markers %>% mutate(chr=as.character(chr)) - geno <- markers %>% select(SNP.Name,chr,!!sym(pos)) %>% full_join(.,geno,by="SNP.Name") + geno <- markers %>% select(marker,chr,!!sym(pos)) %>% full_join(.,geno,by="marker") #pivoting - geno <- geno %>% pivot_wider(names_from = c(SNP.Name,chr,!!sym(pos)),values_from = Geno,names_sep=",") - geno <- geno %>% mutate(Sample.ID=as.character(Sample.ID)) - geno <- geno %>% rename("Sample.ID,,"=Sample.ID) + geno <- geno %>% pivot_wider(names_from = c(marker,chr,!!sym(pos)),values_from = Geno,names_sep=",") + geno <- geno %>% mutate(id=as.character(id)) + geno <- geno %>% rename("id,,"=id) #merge with phenotype file pheno <- pheno %>% mutate_all(as.character) colnames(pheno) <- str_c(colnames(pheno),",,") - qtl_file <- right_join(pheno,geno,by=c("Ind,,"="Sample.ID,,")) + qtl_file <- right_join(pheno,geno,by=c("Ind,,"="id,,")) #prepare file qtl_file <- rbind(colnames(qtl_file),qtl_file) diff --git a/data/ref_strains_mini.rda b/data/ref_strains_mini.rda deleted file mode 100755 index e5673c4faec5cb1bc70625c468ebeba059709c02..0000000000000000000000000000000000000000 Binary files a/data/ref_strains_mini.rda and /dev/null differ diff --git a/data/stuart_tab.rda b/data/stuart_tab.rda index b8b8e8f4ec7299f686eb136dc11793732cee6dd5..2fc099ceef03b0a32a1b3929ed785f48bb2ee821 100644 Binary files a/data/stuart_tab.rda and b/data/stuart_tab.rda differ diff --git a/man/ref_strains_mini.Rd b/man/ref_strains_mini.Rd deleted file mode 100755 index e9117d1075cc3823b7511e4b219a5656a5cf348a..0000000000000000000000000000000000000000 --- a/man/ref_strains_mini.Rd +++ /dev/null @@ -1,37 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/ref_strains_mini-data.R -\docType{data} -\name{ref_strains_mini} -\alias{ref_strains_mini} -\title{Data frame with miniMUGA genotyping of classical lab strains.} -\format{ -A data frame with 11299 rows and 18 variables -\describe{ -\item{CC001}{CC001 mouse strain} -\item{CC005}{CC005 mouse strain} -\item{CC042}{CC042 mouse strain} -\item{CC071}{CC071 mouse strain} -\item{Ifnar.KO.129}{Ifnar KO 129 mouse strain} -\item{Ifnar.KO.B6}{Ifnar KO B6 mouse strain} -\item{Rvfs2.1}{Rvfs2-1 mouse strain} -\item{Rvfs2.2}{Rvfs2-2 mouse strain} -\item{Rvfs2.6}{Rvfs2-6 mouse strain} -\item{Rvfs2.7}{Rvfs2-7 mouse strain} -\item{marker}{name of the marker} -\item{chr}{chromosome} -\item{bp_mm10}{localisation on chromosome in bp (mm10 assembly)} -\item{cM_cox}{localisation on chromosome in cM (from Cox et al.)} -\item{cM_g2f1}{localisation on chromosome in cM (from Liu et al.)} -\item{snp}{marker alleles} -\item{unique}{indicates if the marker maps uniquely on mm10} -\item{multi}{indicates if the marker maps more than one time on mm10} -\item{unmapped}{indicates if the marker does not map perfectly on mm10} -} -} -\usage{ -ref_strains_mini -} -\description{ -A dataset containing the genotypes of 10 mouse strains of the Institut pasteur. Markers positions and other information are from by Karl Broman (https://kbroman.org/MUGAarrays/mini_revisited.html). Strains genotyped from Institut Pasteur. -} -\keyword{datasets} diff --git a/stuart_0.1.0.pdf b/stuart_0.1.0.pdf index f9ea2268c3cc25004f91a18c5cf147e5cbdf9992..d491cf6b1adcae0e1263384dac12110650ad862c 100644 Binary files a/stuart_0.1.0.pdf and b/stuart_0.1.0.pdf differ diff --git a/stuart_0.1.0.tar.gz b/stuart_0.1.0.tar.gz index 16a4e742ef6458742380c88032957e328b217275..0574719a0ca2ebd5501a2b4295da59a4b5a87746 100644 Binary files a/stuart_0.1.0.tar.gz and b/stuart_0.1.0.tar.gz differ diff --git a/vignettes/stuaRt.Rmd b/vignettes/stuaRt.Rmd index 92709f484cb21eb508c53c98fe3f975f8e90ee70..e73872a8f734ba8177b36fc84e013586614d4848 100755 --- a/vignettes/stuaRt.Rmd +++ b/vignettes/stuaRt.Rmd @@ -17,11 +17,11 @@ knitr::opts_chunk$set( Marie Bourdon -April 2021 +June 2021 ## Goal -stuart is a R package which formats the genotyping data from MUGA arrays (Neogen) to use it in Rqtl, for backcross or F2 crosses. It allows to filter the markers in MUGA arrays that can or cannot be used for Rqtl analysis, from a genetic point of view. Indeed, markers will be selected depending on their proportion of each genotype, correspondance between F2 or N2 individuals alleles and parental strains alleles, etc. +stuart is a R package which formats results of genotyping. It was developed to analyse data from MUGA arrays (Neogen) for Rqtl analysis, for backcross or F2 crosses, but can be used to analyze data of other laboratory animal strains with other arrays. It allows to filter the markers in arrays, from a genetic point of view. Indeed, markers will be selected depending on their proportion of each genotype, correspondance between F2 or N2 individuals alleles and parental strains alleles, etc. The examples shown here require the use of dplyr package. @@ -34,17 +34,24 @@ library(stuart) ## Annotation files +In order to map the markers on the genome of the individuals, you need to load a table with the position of all markers in the array. The data frame must contain the following columns: `marker` with the markers names, `chr` with the chromosome of each marker, and a column with the position of the marker on the chromosome. For Rqtl analysis, you need to provide positions in cM. The data frame can contain other columns that you judge helpful. + The developer of Rqtl and Rqtl2 packages, Karl Broman, realised that the annotation of the MUGA arrays was not correct for some markers. Thus, he produced new annotation files for MUGA, miniMUGA, megaMUGA and gigaMUGA arrays. These files contain some informations about the markers including the chromosome and position where the probe of the marker matchs on the genome, wether the marker maps uniquely or not, etc. These files also contains the genetic position of the markers calculated with two methods : "cM_cox" and "cM_g2f1" (see https://kbroman.org/MUGAarrays/mini_revisited.html for more informations). We recommand to use these annotation files to reconstruct the file use for Rqtl analysis. You can load the datasets with these annotations from GitHub (https://github.com/kbroman/MUGAarrays/tree/master/UWisc). Choose the file corresponding to the MUGA array that you used and use the URL to load the dataset in R. -Here, we will present an example of the use of stuart with results of a F2 cross genotyped with miniMUGA. We load the result of Neogen genotyping: `genos` and thephenotype dataset produced by the lab: `phenos`. All these datasets are available for example in stuart package. ```{r annot} annot_mini <- read.csv(url("https://raw.githubusercontent.com/kbroman/MUGAarrays/master/UWisc/mini_uwisc_v2.csv")) ``` +Here, we will present an example of the use of stuart with results of a F2 cross genotyped with miniMUGA. Examples of genotypes and phenotypes dataframe are available in stuart package. + +The genotype data frame must contain a first column with marker names, a second column with sample IDs, a third column with the first allele and a fourth column with the second allele. This format corresponds to the MUGA results. If your data differ, make sure to have these columns in this order. + +We load the result of Neogen genotyping: `genos` (only useful columns with marker name, sample ID and alleles were kept) and the phenotype dataset produced by the lab: `phenos`. + ```{r load} data(genos) @@ -84,7 +91,7 @@ data(stuart_tab) summary(stuart_tab) ``` -Then we will use the different mark_* functions in order to filter the markers. First, we can use mark_match() function. This function excludes markers that are in your genotype file but not in the reference genotype dataset. We recomend using this function as the chip used for genotyping may change. +Then we will use the different mark_* functions in order to filter the markers. First, we can use `mark_match()`` function. Here, the parental strains were genotyped with the F2 individuals, but it can happen that you use previous genotyping results for the parental strains. `mark_match()` function excludes markers that are in your genotype file but not in the reference genotype dataset. We recomend using this function as the chip used for genotyping may change. ```{r mark_match} tab2 <- mark_match(stuart_tab,ref=strains) @@ -95,28 +102,28 @@ tab2 %>% filter(exclude_match==1) Here the reference strains were genotyped with the same version of the chip as the F2 individuals so no marker was excluded. -Then, we can use the mark_poly() function, which will exclude the markers that are not polymorphic. +Then, we can use the `mark_poly()` function, which will exclude the markers that are not polymorphic. ```{r mark_poly ex} tab2 <- mark_poly(tab2) head(tab2) ``` -The mark_prop() function can be used to filter markers depending on the proportion of each genotype. Here, we have a F2 so we can use the "homo" argument in order to filter depending on the proportion of both homozygous genotype. If we have a N2, we can filter with the proportion of homozygous individuals with the "homo" argument and of heterozygous individuals with the hetero" argument. Moreover, this function allows to filter marker depending on the proportion on non genotyped animals. By defaults, markers for which more than 50% of individuals were not genotyped. +The `mark_prop()` function can be used to filter markers depending on the proportion of each genotype. Here, we have a F2 so we can use the "homo" argument in order to filter depending on the proportion of both homozygous genotype. If we have a N2, we can filter with the proportion of homozygous individuals with the "homo" argument and of heterozygous individuals with the hetero" argument. Moreover, this function allows to filter marker depending on the proportion on non genotyped animals. By defaults, markers for which more than 50% of individuals were not genotyped. ```{r mark_prop ex} tab2 <- mark_prop(tab2,cross="F2",homo=0.1,hetero=0.1) head(tab2) ``` -Last, we can use the mark_allele(). This very helpful function excludes markers for which the alleles found in the F2/N2 individuals do not correspond to the alleles found in the parental strains. For example, if for a marker is not polymorphic in the parental strains but we found two alleles in the F2/N2 individuals, it will be excluded. +Last, we can use the `mark_allele()` function. This very helpful function excludes markers for which the alleles found in the F2/N2 individuals do not correspond to the alleles found in the parental strains. For example, if for a marker is not polymorphic in the parental strains but we found two alleles in the F2/N2 individuals, it will be excluded. ```{r mark_allele} tab2 <- mark_allele(tab=tab2,ref=strains,par1="parent1",par2="parent2") tab2 %>% arrange(desc(exclude_allele)) %>% head() ``` -Indeed, we can see that the markers excluded with mark_allele() have different alleles in the parental strains. +Indeed, we can see that the markers excluded with `mark_allele()` have different alleles in the parental strains. ```{r mark_allele-strains} strains %>% filter(marker %in% c("gJAX00038569","gJAX00425031","gUNC12245354","gUNC15530876","gUNC21555204","gUNC21596600")) %>% arrange(marker) %>% select(marker,parent1,parent2) @@ -124,7 +131,7 @@ strains %>% filter(marker %in% c("gJAX00038569","gJAX00425031","gUNC12245354","g # Creation of the R/qtl file -After excluding the problematic markers, we can create the R/qtl file. The individuals must have the same ID in the geno and in the pheno file. If there is a prefix in the geno file that must be removed in order to acheive this, you can use the "prefix" argument. The "path" argument can be used in order to create a CSV file that you can laod with qtl::read.cross. +After excluding the problematic markers, we can create the R/qtl file. The individuals must have the same ID in the geno and in the pheno file. If there is a prefix in the geno file that must be removed in order to acheive this, you can use the "prefix" argument. The "path" argument can be used in order to create a CSV file that you can laod with `qtl::read.cross`. ```{r write_qtl} rqtl_file <- write_rqtl(geno=genos,pheno=phenos,tab=tab2,ref=strains,par1="parent1",par2="parent2",prefix="ind_",pos="cM_cox")