diff --git a/.Rproj.user/9DAE6990/pcs/debug-breakpoints.pper b/.Rproj.user/9DAE6990/pcs/debug-breakpoints.pper new file mode 100644 index 0000000000000000000000000000000000000000..5528aea2ba53c044f500e2c6c10b5679ec78cffc --- /dev/null +++ b/.Rproj.user/9DAE6990/pcs/debug-breakpoints.pper @@ -0,0 +1,6 @@ +{ + "debugBreakpointsState" : { + "breakpoints" : [ + ] + } +} \ No newline at end of file diff --git a/.Rproj.user/9DAE6990/pcs/source-pane.pper b/.Rproj.user/9DAE6990/pcs/source-pane.pper index 3249574fb10b23a53547595769fe33dcbefc247e..1743e40fec30e357993d33f5cb053bf027524dc8 100644 --- a/.Rproj.user/9DAE6990/pcs/source-pane.pper +++ b/.Rproj.user/9DAE6990/pcs/source-pane.pper @@ -1,3 +1,3 @@ { - "activeTab" : 2 + "activeTab" : 0 } \ No newline at end of file diff --git a/.Rproj.user/9DAE6990/pcs/windowlayoutstate.pper b/.Rproj.user/9DAE6990/pcs/windowlayoutstate.pper index 51a8bddc2368de4b56205ec1bcaf88f0626afb6d..bc18be10f83a2abcc1dc870217f08f663eef0adf 100644 --- a/.Rproj.user/9DAE6990/pcs/windowlayoutstate.pper +++ b/.Rproj.user/9DAE6990/pcs/windowlayoutstate.pper @@ -1,14 +1,14 @@ { "left" : { - "panelheight" : 689, - "splitterpos" : 290, + "panelheight" : 1271, + "splitterpos" : 533, "topwindowstate" : "NORMAL", - "windowheight" : 727 + "windowheight" : 1309 }, "right" : { - "panelheight" : 689, - "splitterpos" : 436, + "panelheight" : 1271, + "splitterpos" : 804, "topwindowstate" : "NORMAL", - "windowheight" : 727 + "windowheight" : 1309 } } \ No newline at end of file diff --git a/.Rproj.user/9DAE6990/pcs/workbench-pane.pper b/.Rproj.user/9DAE6990/pcs/workbench-pane.pper index 3eb507b3b7673752c5fcdc0c6072023af93d1b7e..dd00eb66589a93dc361ab2526cb1a958c2b41bcb 100644 --- a/.Rproj.user/9DAE6990/pcs/workbench-pane.pper +++ b/.Rproj.user/9DAE6990/pcs/workbench-pane.pper @@ -1,6 +1,6 @@ { "TabSet1" : 3, - "TabSet2" : 3, + "TabSet2" : 4, "TabZoom" : { } } \ No newline at end of file diff --git a/.Rproj.user/9DAE6990/persistent-state b/.Rproj.user/9DAE6990/persistent-state index 5dca5416de0070e60c804242c2243fc7336ef63b..9d859cb1b1a467561a68e55e57eae088ad344f30 100644 --- a/.Rproj.user/9DAE6990/persistent-state +++ b/.Rproj.user/9DAE6990/persistent-state @@ -1,6 +1,6 @@ build-last-errors="[]" build-last-errors-base-dir="~/stuart_package/stuart/" -build-last-outputs="[{\"output\":\"==> R CMD INSTALL --no-multiarch --with-keep.source stuart\\n\\n\",\"type\":0},{\"output\":\"* installing to library ‘/Library/Frameworks/R.framework/Versions/4.0/Resources/library’\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"* installing *source* package ‘stuart’ ...\\n\",\"type\":1},{\"output\":\"** using staged installation\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** R\\n\",\"type\":1},{\"output\":\"** data\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"*** moving datasets to lazyload DB\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** inst\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** byte-compile and prepare package for lazy loading\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** help\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"*** installing help indices\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** building package indices\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** installing vignettes\\n\",\"type\":1},{\"output\":\"** testing if installed package can be loaded from temporary location\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** testing if installed package can be loaded from final location\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** testing if installed package keeps a record of temporary installation path\\n\",\"type\":1},{\"output\":\"* DONE (stuart)\\n\",\"type\":1},{\"output\":\"\",\"type\":1}]" +build-last-outputs="[{\"output\":\"==> R CMD INSTALL --no-multiarch --with-keep.source stuart\\n\\n\",\"type\":0},{\"output\":\"* installing to library ‘/Library/Frameworks/R.framework/Versions/4.0/Resources/library’\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"* installing *source* package ‘stuart’ ...\\n\",\"type\":1},{\"output\":\"** using staged installation\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** R\\n\",\"type\":1},{\"output\":\"** data\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"*** moving datasets to lazyload DB\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** inst\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** byte-compile and prepare package for lazy loading\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** help\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"*** installing help indices\\n\",\"type\":1},{\"output\":\"** building package indices\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** installing vignettes\\n\",\"type\":1},{\"output\":\"** testing if installed package can be loaded from temporary location\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** testing if installed package can be loaded from final location\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** testing if installed package keeps a record of temporary installation path\\n\",\"type\":1},{\"output\":\"* DONE (stuart)\\n\",\"type\":1},{\"output\":\"\",\"type\":1}]" compile_pdf_state="{\"errors\":[],\"output\":\"\",\"running\":false,\"tab_visible\":false,\"target_file\":\"\"}" files.monitored-path="" find-in-files-state="{\"handle\":\"\",\"input\":\"\",\"path\":\"\",\"regex\":true,\"results\":{\"file\":[],\"line\":[],\"lineValue\":[],\"matchOff\":[],\"matchOn\":[]},\"running\":false}" diff --git a/.Rproj.user/9DAE6990/rmd-outputs b/.Rproj.user/9DAE6990/rmd-outputs index 3f2ff2d6cc8f257ffcade7ead1ca4042c0e884b9..dd18eccd1a62da6163f019b613c98281f6147ebc 100644 --- a/.Rproj.user/9DAE6990/rmd-outputs +++ b/.Rproj.user/9DAE6990/rmd-outputs @@ -1,5 +1,7 @@ +/private/var/folders/dn/j71yz2tn5_gdffs8fqxhddrr0000gn/T/Rtmp3VMULh/preview-1ced414f7aab.dir/stuaRt.html +/private/var/folders/dn/j71yz2tn5_gdffs8fqxhddrr0000gn/T/Rtmp3VMULh/preview-1ced48fe920c.dir/stuaRt.html - +/private/var/folders/dn/j71yz2tn5_gdffs8fqxhddrr0000gn/T/RtmpNme5vw/preview-4c4321234d03.dir/stuaRt.html diff --git a/.Rproj.user/9DAE6990/sources/prop/7411866 b/.Rproj.user/9DAE6990/sources/prop/7411866 new file mode 100644 index 0000000000000000000000000000000000000000..5ba20d4665c8e92b885b11b7eb579f858494350a --- /dev/null +++ b/.Rproj.user/9DAE6990/sources/prop/7411866 @@ -0,0 +1,4 @@ +{ + "cursorPosition" : "8,12", + "scrollLine" : "0" +} \ No newline at end of file diff --git a/.Rproj.user/9DAE6990/sources/prop/7FA3B215 b/.Rproj.user/9DAE6990/sources/prop/7FA3B215 index c951d78668261554276d96efbe60d26cfb9de090..94c7b77269eb489932ce52fa9bfafa4d9499431d 100644 --- a/.Rproj.user/9DAE6990/sources/prop/7FA3B215 +++ b/.Rproj.user/9DAE6990/sources/prop/7FA3B215 @@ -1,5 +1,5 @@ { - "cursorPosition" : "38,30", - "scrollLine" : "25", + "cursorPosition" : "43,27", + "scrollLine" : "29", "source_window_id" : "" } \ No newline at end of file diff --git a/.Rproj.user/9DAE6990/sources/prop/8F7B714A b/.Rproj.user/9DAE6990/sources/prop/8F7B714A index 9f60283e46214e9b88630e93870613833e66dcac..178047270b28ae5a314194a4d204cf820f8cc815 100644 --- a/.Rproj.user/9DAE6990/sources/prop/8F7B714A +++ b/.Rproj.user/9DAE6990/sources/prop/8F7B714A @@ -1,4 +1,4 @@ { - "cursorPosition" : "34,0", - "scrollLine" : "21" + "cursorPosition" : "22,39", + "scrollLine" : "7" } \ No newline at end of file diff --git a/.Rproj.user/9DAE6990/sources/prop/D602FFE4 b/.Rproj.user/9DAE6990/sources/prop/D602FFE4 index 600309fb192ff468987ccbe8766414d03b4905d8..1b4759ca6abaa64809f32a79600c59dd5bce738e 100644 --- a/.Rproj.user/9DAE6990/sources/prop/D602FFE4 +++ b/.Rproj.user/9DAE6990/sources/prop/D602FFE4 @@ -1,5 +1,5 @@ { - "cursorPosition" : "78,3", + "cursorPosition" : "89,0", "last_setup_crc32" : "39B546A65bfca283", - "scrollLine" : "63" + "scrollLine" : "74" } \ No newline at end of file diff --git a/.Rproj.user/9DAE6990/sources/prop/INDEX b/.Rproj.user/9DAE6990/sources/prop/INDEX index c6af6e3d2d6a09efc4edfc266fc0ef9609fb5bc5..b5e12590bd67d9b3c876796a41d7a702c54e6a41 100644 --- a/.Rproj.user/9DAE6990/sources/prop/INDEX +++ b/.Rproj.user/9DAE6990/sources/prop/INDEX @@ -6,6 +6,7 @@ ~%2Fstuart_package%2Fstuart%2FR%2Fmark_match.R="75F49619" ~%2Fstuart_package%2Fstuart%2FR%2Fmark_poly.R="3A328548" ~%2Fstuart_package%2Fstuart%2FR%2Fmark_prop.R="A609F054" +~%2Fstuart_package%2Fstuart%2FR%2Fstuart_tab-data.R="7411866" ~%2Fstuart_package%2Fstuart%2FR%2Ftab_mark.R="7FA3B215" ~%2Fstuart_package%2Fstuart%2FR%2Fwrite_rqtl.R="5B8691C7" ~%2Fstuart_package%2Fstuart%2Fvignettes%2FstuaRt.R="EBD625D2" diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/907DF1D3 b/.Rproj.user/9DAE6990/sources/s-39B546A6/1656F55 similarity index 66% rename from .Rproj.user/9DAE6990/sources/s-39B546A6/907DF1D3 rename to .Rproj.user/9DAE6990/sources/s-39B546A6/1656F55 index dce483d417ea42db70cc869b57eed8ef24b40124..7aaecc2f4049a5605d0b04da4901da8698b401ce 100644 --- a/.Rproj.user/9DAE6990/sources/s-39B546A6/907DF1D3 +++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/1656F55 @@ -1,24 +1,24 @@ { "collab_server" : "", "contents" : "", - "created" : 1622636127574.000, + "created" : 1622641774018.000, "dirty" : false, "encoding" : "UTF-8", "folds" : "", - "hash" : "0", - "id" : "907DF1D3", + "hash" : "1203844341", + "id" : "1656F55", "lastKnownWriteTime" : 1622640147, - "last_content_update" : 1622640147854, + "last_content_update" : 1622640147, "path" : "~/stuart_package/stuart/R/geno_strains.R", "project_path" : "R/geno_strains.R", "properties" : { - "cursorPosition" : "34,0", - "scrollLine" : "21" + "cursorPosition" : "22,39", + "scrollLine" : "7" }, "read_only" : false, "read_only_alternatives" : [ ], - "relative_order" : 2, + "relative_order" : 4, "source_on_save" : false, "source_window" : "", "type" : "r_source" diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/1656F55-contents b/.Rproj.user/9DAE6990/sources/s-39B546A6/1656F55-contents new file mode 100644 index 0000000000000000000000000000000000000000..d4104d0f3af4780745e4939ce649d7e90e8c995e --- /dev/null +++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/1656F55-contents @@ -0,0 +1,55 @@ +#' @title Create haplotype for a new mouse strain into a reference dataframe +#' +#' @description This functions adds columns for parental strains used in the cross in the annotation data frame, from the genotype data frame in which one or several animal of the parental strains were genotyped. +#' If several animals of one strain were genotyped, a consensus is created from these animals. +#' The consensus is created as follow : if the indivuals carry the same allele, this allele is kept, otherwise, the allele is noted as "N". If individuals show residual heterozygosity, it is encoded as "H". +#' @param ref data frame with the reference genotypes of mouse lines +#' @param geno data frame with the genotyping results for your cross from miniMUGA array +#' @param par1 first parental strain used in the cross, the name must be written as in the geno data frame +#' @param par2 second parental strain used in the cross, the name must be written as in the geno data frame +#' @param name1 name of the first parental strain to use as the column name in the ref data frame +#' @param name2 name of the second parental strain to use as the column name in the ref data frame +#' +#' @import dplyr +#' @import tidyr +#' +#' @export +#' +geno_strains <- function(ref,geno,par1,par2,name1,name2){ + #rename df columns + geno <- geno %>% rename("marker"=1, + "id"=2, + "allele_1"=3, + "allele_2"=4) + + + #recode genotypes from 2 alleles to 1 + geno <- geno %>% mutate_all(as.character) + geno <- geno %>% filter(id %in% c(par1,par2)) + geno <- geno %>% mutate(Geno=case_when(allele_1 == "-" | allele_2 == "-" ~ "N", + allele_1 == allele_2 ~ allele_1, + allele_1 %in% c("A","T","G","C") & allele_2 %in% c("A","T","G","C") ~ "H")) + + geno <- geno %>% select(marker,id,Geno) %>% pivot_wider(names_from = id, values_from = Geno) + + + #create consensus + if(length(par1)!=1){ + geno <- geno %>% mutate(parent1 = ifelse(!!sym(par1[1])==!!sym(par1[2]),!!sym(par1[1]),"N")) + } else { + geno <- geno %>% rename(parent1=!!sym(par1[1])) + } + + if(length(par2)!=1){ + geno <- geno %>% mutate(parent2 = ifelse(!!sym(par2[1])==!!sym(par2[2]),!!sym(par2[1]),"N")) + } else { + geno <- geno %>% rename(parent2=!!sym(par2[1])) + } + + geno <- geno %>% select(marker,parent1,parent2) + colnames(geno) <- c("marker",name1,name2) + + #merge with ref file + ref <- full_join(ref,geno,by=c("marker"="marker")) + return(ref) +} diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/17AE345 b/.Rproj.user/9DAE6990/sources/s-39B546A6/17AE345 new file mode 100644 index 0000000000000000000000000000000000000000..3d4ffec7d1a62ab16711b90287eab022e3216334 --- /dev/null +++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/17AE345 @@ -0,0 +1,33 @@ +{ + "collab_server" : "", + "contents" : "", + "created" : 1622641680399.000, + "dirty" : false, + "encoding" : "", + "folds" : "", + "hash" : "0", + "id" : "17AE345", + "lastKnownWriteTime" : 140481884136632, + "last_content_update" : 1622641680399, + "path" : null, + "project_path" : null, + "properties" : { + "cacheKey" : "E58FACDB", + "caption" : "stuart_tab", + "contentUrl" : "grid_resource/gridviewer.html?env=&obj=stuart_tab&cache_key=E58FACDB", + "displayedObservations" : 11125, + "environment" : "", + "expression" : "stuart_tab", + "object" : "stuart_tab", + "preview" : 0, + "totalObservations" : 11125, + "variables" : 7 + }, + "read_only" : false, + "read_only_alternatives" : [ + ], + "relative_order" : 2, + "source_on_save" : false, + "source_window" : "", + "type" : "r_dataframe" +} \ No newline at end of file diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/17AE345-contents b/.Rproj.user/9DAE6990/sources/s-39B546A6/17AE345-contents new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/42D37312 b/.Rproj.user/9DAE6990/sources/s-39B546A6/42D37312 index d1aa6bb250c6ae441fb0f74cda1c05bd75f9c6b7..1182daeb619e55806e8083e2f024997371e73f0c 100644 --- a/.Rproj.user/9DAE6990/sources/s-39B546A6/42D37312 +++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/42D37312 @@ -16,7 +16,7 @@ "read_only" : false, "read_only_alternatives" : [ ], - "relative_order" : 7, + "relative_order" : 9, "source_on_save" : false, "source_window" : "", "type" : "r_source" diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/45D91D58 b/.Rproj.user/9DAE6990/sources/s-39B546A6/45D91D58 index 48318c85d383a55f9cdaf128250d2a2e36637f50..7b1630989ded63aeb84eee2ab57468fa86e768a2 100644 --- a/.Rproj.user/9DAE6990/sources/s-39B546A6/45D91D58 +++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/45D91D58 @@ -18,7 +18,7 @@ "read_only" : false, "read_only_alternatives" : [ ], - "relative_order" : 9, + "relative_order" : 11, "source_on_save" : false, "source_window" : "", "type" : "r_source" diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/65C9B72B b/.Rproj.user/9DAE6990/sources/s-39B546A6/65C9B72B index 973ea910f5a3794d58e3d19c741e359421fe09b3..51dacef1135242c12e552e8bc12afea49f8f3133 100644 --- a/.Rproj.user/9DAE6990/sources/s-39B546A6/65C9B72B +++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/65C9B72B @@ -5,21 +5,21 @@ "dirty" : false, "encoding" : "UTF-8", "folds" : "", - "hash" : "1306976036", + "hash" : "1762622592", "id" : "65C9B72B", - "lastKnownWriteTime" : 1622462353, - "last_content_update" : 1622462353, + "lastKnownWriteTime" : 1622642120, + "last_content_update" : 1622642120783, "path" : "~/stuart_package/stuart/R/tab_mark.R", "project_path" : "R/tab_mark.R", "properties" : { - "cursorPosition" : "38,30", - "scrollLine" : "25", + "cursorPosition" : "43,27", + "scrollLine" : "29", "source_window_id" : "" }, "read_only" : false, "read_only_alternatives" : [ ], - "relative_order" : 3, + "relative_order" : 5, "source_on_save" : false, "source_window" : "", "type" : "r_source" diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/65C9B72B-contents b/.Rproj.user/9DAE6990/sources/s-39B546A6/65C9B72B-contents index 612c27db930cb54d481e09a9a7ab5efd4912420f..95f4e184b34f860c2f5fee3e2cb8d4cf2fbc1fa8 100644 --- a/.Rproj.user/9DAE6990/sources/s-39B546A6/65C9B72B-contents +++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/65C9B72B-contents @@ -12,8 +12,14 @@ #### tab_mark function #### ## create table with markers and counts tab_mark <- function(geno){ + #rename df columns + geno <- geno %>% rename("marker"=1, + "id"=2, + "allele_1"=3, + "allele_2"=4) + #create geno column in geno df - geno <- geno %>% unite(Geno,c("Allele1...Forward","Allele2...Forward"),sep="",remove=FALSE) + geno <- geno %>% unite(Geno,c("allele_1","allele_2"),sep="",remove=FALSE) #recode genotypes to have all heterozygous encoded the same way (ex: only "AT", no "TA") geno <- geno %>% mutate(Geno=recode(Geno, @@ -26,9 +32,9 @@ tab_mark <- function(geno){ #create df with counts for each genotype - df_count <- tibble(SNP.Name = as.character(unique(geno$SNP.Name)), - Allele_1 = NA, - Allele_2 = NA, + df_count <- tibble(marker = as.character(unique(geno$marker)), + allele_1 = NA, + allele_2 = NA, n_HM1 = NA, n_HM2 = NA, n_HT = NA, @@ -36,11 +42,11 @@ tab_mark <- function(geno){ ## loop to count genotype - for(i in df_count$SNP.Name){ + for(i in df_count$marker){ #extract alleles for each marker - Alleles <- geno %>% filter(SNP.Name==i) %>% - select(c(SNP.Name,Sample.ID,Geno,Allele1...Forward,Allele2...Forward)) %>% - pivot_longer(c(Allele1...Forward,Allele2...Forward),names_to="Allele_name",values_to="Allele") %>% + Alleles <- geno %>% filter(marker==i) %>% + select(c(marker,id,Geno,allele_1,allele_2)) %>% + pivot_longer(c(allele_1,allele_2),names_to="Allele_name",values_to="Allele") %>% distinct(Allele) %>% filter(Allele != "-") Alleles <- as.factor(paste(Alleles$Allele)) @@ -52,19 +58,19 @@ tab_mark <- function(geno){ if(all(rapportools::is.empty(Alleles))==FALSE){ #add alleles to df_count - df_count <- df_count %>% mutate(Allele_1 = ifelse(SNP.Name == i, - paste(Alleles[1]), Allele_1)) + df_count <- df_count %>% mutate(allele_1 = ifelse(marker == i, + paste(Alleles[1]), allele_1)) #count for homozygous for allele 1 - n1 <- geno %>% filter(SNP.Name==i) %>% + n1 <- geno %>% filter(marker==i) %>% filter(Geno == paste(Alleles[1],Alleles[1],sep="")) %>% summarise(n=n()) #add count for homozygous for allele 1 to df_count - df_count <- df_count %>% mutate(n_HM1 = ifelse(SNP.Name == i, + df_count <- df_count %>% mutate(n_HM1 = ifelse(marker == i, n1$n, n_HM1)) @@ -72,55 +78,55 @@ tab_mark <- function(geno){ #if marker not polymorphic if(is.na(Alleles[2])==TRUE){ - #NA as Allele_2 - df_count <- df_count %>% mutate(Allele_2 = ifelse(SNP.Name == i, - NA, Allele_2)) + #NA as allele_2 + df_count <- df_count %>% mutate(allele_2 = ifelse(marker == i, + NA, allele_2)) #NA as n_HM2 - df_count <- df_count %>% mutate(n_HM2 = ifelse(SNP.Name == i, + df_count <- df_count %>% mutate(n_HM2 = ifelse(marker == i, NA, n_HM2)) #NA as n_HT - df_count <- df_count %>% mutate(n_HT = ifelse(SNP.Name == i, + df_count <- df_count %>% mutate(n_HT = ifelse(marker == i, NA, n_HT)) } else { #add alleles to df_count - df_count <- df_count %>% mutate(Allele_2 = ifelse(SNP.Name == i, - paste(Alleles[2]), Allele_2)) + df_count <- df_count %>% mutate(allele_2 = ifelse(marker == i, + paste(Alleles[2]), allele_2)) #count for homozygous for allele 2 - n2 <- geno %>% filter(SNP.Name==i) %>% + n2 <- geno %>% filter(marker==i) %>% filter(Geno == paste(Alleles[2],Alleles[2],sep="")) %>% summarise(n=n()) #add count for homozygous for allele 1 to df_count - df_count <- df_count %>% mutate(n_HM2 = ifelse(SNP.Name == i, + df_count <- df_count %>% mutate(n_HM2 = ifelse(marker == i, n2$n, n_HM2)) #count for heterozygous - n3 <- geno %>% filter(SNP.Name==i) %>% + n3 <- geno %>% filter(marker==i) %>% filter(Geno == paste(Alleles[1],Alleles[2],sep="")) %>% summarise(n=n()) #add count for homozygous for allele 1 to df_count - df_count <- df_count %>% mutate(n_HT = ifelse(SNP.Name == i, + df_count <- df_count %>% mutate(n_HT = ifelse(marker == i, n3$n, n_HT)) } #count for NA - n4 <- geno %>% filter(SNP.Name==i) %>% + n4 <- geno %>% filter(marker==i) %>% filter(Geno == "--" | Geno == paste(Alleles[1],"-",sep="") | Geno == paste(Alleles[2],"-",sep="") | Geno == paste("-",Alleles[1],sep="") | Geno == paste("-",Alleles[2],sep="")) %>% summarise(n=n()) #add count for NA to df_count - df_count <- df_count %>% mutate(n_NA = ifelse(SNP.Name == i, + df_count <- df_count %>% mutate(n_NA = ifelse(marker == i, n4$n, n_NA)) } #change class of counts as numeric : diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/76AC3EC b/.Rproj.user/9DAE6990/sources/s-39B546A6/76AC3EC index d073da46fdf7205ce479a88abf6977c7b119cce4..669afa63ba7559a7a854776221d12c0cd57d41e0 100644 --- a/.Rproj.user/9DAE6990/sources/s-39B546A6/76AC3EC +++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/76AC3EC @@ -16,7 +16,7 @@ "read_only" : false, "read_only_alternatives" : [ ], - "relative_order" : 5, + "relative_order" : 7, "source_on_save" : false, "source_window" : "", "type" : "r_source" diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/8DB3123E b/.Rproj.user/9DAE6990/sources/s-39B546A6/8DB3123E new file mode 100644 index 0000000000000000000000000000000000000000..ad4597a11476cab0ef8a820787d96988365976a3 --- /dev/null +++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/8DB3123E @@ -0,0 +1,33 @@ +{ + "collab_server" : "", + "contents" : "", + "created" : 1622642648310.000, + "dirty" : false, + "encoding" : "", + "folds" : "", + "hash" : "0", + "id" : "8DB3123E", + "lastKnownWriteTime" : 140647294042113, + "last_content_update" : 1622642648310, + "path" : null, + "project_path" : null, + "properties" : { + "cacheKey" : "B33E01E3", + "caption" : "tab", + "contentUrl" : "grid_resource/gridviewer.html?env=&obj=tab&cache_key=B33E01E3", + "displayedObservations" : 2000, + "environment" : "", + "expression" : "tab", + "object" : "tab", + "preview" : 0, + "totalObservations" : 2000, + "variables" : 7 + }, + "read_only" : false, + "read_only_alternatives" : [ + ], + "relative_order" : 14, + "source_on_save" : false, + "source_window" : "", + "type" : "r_dataframe" +} \ No newline at end of file diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/8DB3123E-contents b/.Rproj.user/9DAE6990/sources/s-39B546A6/8DB3123E-contents new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/96AB3736 b/.Rproj.user/9DAE6990/sources/s-39B546A6/96AB3736 index 3f7a50343a4fe7a5bd0c3a2449f07f72de7f3f6f..1b68bc9ef4da3c55cc0937cf143165d551e66719 100644 --- a/.Rproj.user/9DAE6990/sources/s-39B546A6/96AB3736 +++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/96AB3736 @@ -5,16 +5,16 @@ "dirty" : false, "encoding" : "UTF-8", "folds" : "", - "hash" : "0", + "hash" : "3133824271", "id" : "96AB3736", - "lastKnownWriteTime" : 1622640207, - "last_content_update" : 1622640207996, + "lastKnownWriteTime" : 1622642682, + "last_content_update" : 1622642682378, "path" : "~/stuart_package/stuart/vignettes/stuaRt.Rmd", "project_path" : "vignettes/stuaRt.Rmd", "properties" : { - "cursorPosition" : "78,3", + "cursorPosition" : "89,0", "last_setup_crc32" : "39B546A65bfca283", - "scrollLine" : "63" + "scrollLine" : "74" }, "read_only" : false, "read_only_alternatives" : [ diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/96AB3736-contents b/.Rproj.user/9DAE6990/sources/s-39B546A6/96AB3736-contents index de0ce5aceffd208490965bc9efd0d4b4ec8e78fe..123928d6aded4403210eb7a59eeca6c059d7f7f0 100644 --- a/.Rproj.user/9DAE6990/sources/s-39B546A6/96AB3736-contents +++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/96AB3736-contents @@ -86,7 +86,7 @@ genos <- genos %>% filter(!Sample.ID %in% c("StrainsA_1", "StrainsA_2", "Strains The first step of the markers sorting is to create the marker dataframe with the tab_mark() function. This dataframe contains for each marker the two alleles that can be found in the F2/N2 population (`Allele_1` and `Allele_2`), the number of individuals for each genotype (homozygous for each allele (`n_HM1` and `n_HM2`) and heterozygous (`n_HT`)), and the number of non genotyped individuals (`n_NA`) This step can take several minutes. You can also load the output of this function. -```{r tab_mark,eval=F} +```{r tab_mark} data(stuart_tab) summary(stuart_tab) ``` diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/A6A5703A b/.Rproj.user/9DAE6990/sources/s-39B546A6/A6A5703A new file mode 100644 index 0000000000000000000000000000000000000000..f1dd731e6dca27456679b529e0b091df748a97ca --- /dev/null +++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/A6A5703A @@ -0,0 +1,25 @@ +{ + "collab_server" : "", + "contents" : "", + "created" : 1622641730533.000, + "dirty" : false, + "encoding" : "UTF-8", + "folds" : "", + "hash" : "953998608", + "id" : "A6A5703A", + "lastKnownWriteTime" : 1622641746, + "last_content_update" : 1622641746302, + "path" : "~/stuart_package/stuart/R/stuart_tab-data.R", + "project_path" : "R/stuart_tab-data.R", + "properties" : { + "cursorPosition" : "8,12", + "scrollLine" : "0" + }, + "read_only" : false, + "read_only_alternatives" : [ + ], + "relative_order" : 3, + "source_on_save" : false, + "source_window" : "", + "type" : "r_source" +} \ No newline at end of file diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/A6A5703A-contents b/.Rproj.user/9DAE6990/sources/s-39B546A6/A6A5703A-contents new file mode 100644 index 0000000000000000000000000000000000000000..fcb71e4035488add00e88581add3dd5e39109914 --- /dev/null +++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/A6A5703A-contents @@ -0,0 +1,17 @@ +#' Output of tab_mark function +#' +#' A dataset with the output of tab_mark() function. +#' +#' @format A data frame with 11125 rows and 7 variables +#' \describe{ +#' \item{marker}{name of the marker} +#' \item{allele_1}{first allele of the marker} +#' \item{allele_2}{second allele of the marker} +#' \item{n_HM1}{number of homozygous individuals for the first allele} +#' \item{n_HM2}{number of homozygous individuals for the second allele} +#' \item{n_HT}{number of heterozygous individuals} +#' \item{n_NA}{number of non genotyped individuals} +#' } + + +"stuart_tab" diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/B86C81D8 b/.Rproj.user/9DAE6990/sources/s-39B546A6/B86C81D8 index 0069b153fb956ccf9e32d32f2dbbaadf3ba90a0f..ab6c927aeb68ad6a84ea9ff826873d7ecbe2d39d 100644 --- a/.Rproj.user/9DAE6990/sources/s-39B546A6/B86C81D8 +++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/B86C81D8 @@ -18,7 +18,7 @@ "read_only" : false, "read_only_alternatives" : [ ], - "relative_order" : 8, + "relative_order" : 10, "source_on_save" : false, "source_window" : "", "type" : "r_source" diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/C2CE7FEA b/.Rproj.user/9DAE6990/sources/s-39B546A6/C2CE7FEA index defadb8ff5fcdf70afd49209fd05f87fc86b8aa0..5b72e3a7a36f8d69de0503f12879ea24bdec4739 100644 --- a/.Rproj.user/9DAE6990/sources/s-39B546A6/C2CE7FEA +++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/C2CE7FEA @@ -26,7 +26,7 @@ "read_only" : false, "read_only_alternatives" : [ ], - "relative_order" : 4, + "relative_order" : 6, "source_on_save" : false, "source_window" : "", "type" : "r_dataframe" diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/C5228C18 b/.Rproj.user/9DAE6990/sources/s-39B546A6/C5228C18 index cd989e9e5e74c809b83e1a70f8fa676bb1689a28..a48b29a0245e5ccb1ab077b14a31fa1bc6efba63 100644 --- a/.Rproj.user/9DAE6990/sources/s-39B546A6/C5228C18 +++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/C5228C18 @@ -16,7 +16,7 @@ "read_only" : false, "read_only_alternatives" : [ ], - "relative_order" : 6, + "relative_order" : 8, "source_on_save" : false, "source_window" : "", "type" : "r_source" diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/C81C94E6 b/.Rproj.user/9DAE6990/sources/s-39B546A6/C81C94E6 index 925c2cddb67cefd4110ca6a3c2cbdb821e30d3ab..bd85e7ad7d4b50c442dac33ba1e74b7ccfc79637 100644 --- a/.Rproj.user/9DAE6990/sources/s-39B546A6/C81C94E6 +++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/C81C94E6 @@ -18,7 +18,7 @@ "read_only" : false, "read_only_alternatives" : [ ], - "relative_order" : 10, + "relative_order" : 12, "source_on_save" : false, "source_window" : "", "type" : "dcf" diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/E7584E4F b/.Rproj.user/9DAE6990/sources/s-39B546A6/E7584E4F new file mode 100644 index 0000000000000000000000000000000000000000..c5aba762a2ab65e228efd618fd2292465c82cb7e --- /dev/null +++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/E7584E4F @@ -0,0 +1,33 @@ +{ + "collab_server" : "", + "contents" : "", + "created" : 1622642417308.000, + "dirty" : false, + "encoding" : "", + "folds" : "", + "hash" : "0", + "id" : "E7584E4F", + "lastKnownWriteTime" : -1152921504606846976, + "last_content_update" : 1622642417308, + "path" : null, + "project_path" : null, + "properties" : { + "cacheKey" : "6BE6FDA8", + "caption" : "genos", + "contentUrl" : "grid_resource/gridviewer.html?env=&obj=genos&cache_key=6BE6FDA8", + "displayedObservations" : 1957993, + "environment" : "", + "expression" : "genos", + "object" : "genos", + "preview" : 0, + "totalObservations" : 1957993, + "variables" : 11 + }, + "read_only" : false, + "read_only_alternatives" : [ + ], + "relative_order" : 13, + "source_on_save" : false, + "source_window" : "", + "type" : "r_dataframe" +} \ No newline at end of file diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/E7584E4F-contents b/.Rproj.user/9DAE6990/sources/s-39B546A6/E7584E4F-contents new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.Rproj.user/9DAE6990/viewer-cache/862FB1D9.Rdata b/.Rproj.user/9DAE6990/viewer-cache/862FB1D9.Rdata new file mode 100644 index 0000000000000000000000000000000000000000..192c879fc924932c6b6bda4448066e64fbf92a34 Binary files /dev/null and b/.Rproj.user/9DAE6990/viewer-cache/862FB1D9.Rdata differ diff --git a/.Rproj.user/9DAE6990/viewer-cache/E58FACDB.Rdata b/.Rproj.user/9DAE6990/viewer-cache/E58FACDB.Rdata new file mode 100644 index 0000000000000000000000000000000000000000..305b55ce0cc1ca9c86f3470f5b973e64bc1c5a0c Binary files /dev/null and b/.Rproj.user/9DAE6990/viewer-cache/E58FACDB.Rdata differ diff --git a/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/9DAE699039B546A6/chunks.json b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/9DAE699039B546A6/chunks.json index b86d0151df31c4b6323160db108efc8e5041c995..d8a6599efd36ca83763410009f57390ee063abbf 100644 --- a/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/9DAE699039B546A6/chunks.json +++ b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/9DAE699039B546A6/chunks.json @@ -1 +1 @@ -{"chunk_definitions":[{"chunk_id":"cugiprfbptcaw","chunk_label":"unnamed-chunk-1","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","include":false,"label":"unnamed-chunk-1"},"row":15,"row_count":1,"visible":true},{"chunk_id":"csetup_chunk","chunk_label":"setup","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","include":false,"label":"setup"},"row":31,"row_count":1,"visible":true},{"chunk_id":"cystga685ux9r","chunk_label":"load","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"load"},"row":60,"row_count":1,"visible":true},{"chunk_id":"cc71rfo54vvou","chunk_label":"strains","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"strains"},"row":73,"row_count":1,"visible":true},{"chunk_id":"coar8mvardv1z","chunk_label":"no_parent","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"no_parent"},"row":78,"row_count":1,"visible":true},{"chunk_id":"cyqo4jk1414tp","chunk_label":"tab_mark","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"tab_mark"},"row":91,"row_count":1,"visible":true},{"chunk_id":"ci1zg9xosgth8","chunk_label":"mark_match","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_match"},"row":100,"row_count":1,"visible":true},{"chunk_id":"c4j6ei29p4187","chunk_label":"mark_poly ex","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_poly ex"},"row":109,"row_count":1,"visible":true},{"chunk_id":"cndnl4vh4xyj8","chunk_label":"mark_prop ex","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_prop ex"},"row":116,"row_count":1,"visible":true},{"chunk_id":"cgrscnwnyajvi","chunk_label":"mark_allele","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_allele"},"row":123,"row_count":1,"visible":true},{"chunk_id":"c45rvmci4gaoy","chunk_label":"mark_allele-strains","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_allele-strains"},"row":129,"row_count":1,"visible":true},{"chunk_id":"cv6d9nrsrzqfm","chunk_label":"write_qtl","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"write_qtl"},"row":139,"row_count":1,"visible":true}],"default_chunk_options":{},"doc_write_time":1622638573,"working_dir":null} \ No newline at end of file +{"chunk_definitions":[{"chunk_id":"cugiprfbptcaw","chunk_label":"unnamed-chunk-1","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","include":false,"label":"unnamed-chunk-1"},"row":15,"row_count":1,"visible":true},{"chunk_id":"csetup_chunk","chunk_label":"setup","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","include":false,"label":"setup"},"row":31,"row_count":1,"visible":true},{"chunk_id":"cvmghh26lmpwd","chunk_label":"annot","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"annot"},"row":45,"row_count":1,"visible":true},{"chunk_id":"cystga685ux9r","chunk_label":"load","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"load"},"row":60,"row_count":1,"visible":true},{"chunk_id":"cc71rfo54vvou","chunk_label":"strains","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"strains"},"row":73,"row_count":1,"visible":true},{"chunk_id":"coar8mvardv1z","chunk_label":"no_parent","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"no_parent"},"row":78,"row_count":1,"visible":true},{"chunk_id":"cyqo4jk1414tp","chunk_label":"tab_mark","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"tab_mark"},"row":91,"row_count":1,"visible":true},{"chunk_id":"ci1zg9xosgth8","chunk_label":"mark_match","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_match"},"row":100,"row_count":1,"visible":true},{"chunk_id":"c4j6ei29p4187","chunk_label":"mark_poly ex","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_poly ex"},"row":109,"row_count":1,"visible":true},{"chunk_id":"cndnl4vh4xyj8","chunk_label":"mark_prop ex","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_prop ex"},"row":116,"row_count":1,"visible":true},{"chunk_id":"cgrscnwnyajvi","chunk_label":"mark_allele","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_allele"},"row":123,"row_count":1,"visible":true},{"chunk_id":"c45rvmci4gaoy","chunk_label":"mark_allele-strains","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_allele-strains"},"row":129,"row_count":1,"visible":true},{"chunk_id":"cv6d9nrsrzqfm","chunk_label":"write_qtl","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"write_qtl"},"row":139,"row_count":1,"visible":true}],"default_chunk_options":{},"doc_write_time":1622642681,"working_dir":null} \ No newline at end of file diff --git a/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cc71rfo54vvou/000012.csv b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cc71rfo54vvou/000014.csv similarity index 100% rename from .Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cc71rfo54vvou/000012.csv rename to .Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cc71rfo54vvou/000014.csv diff --git a/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cc71rfo54vvou/000013.metadata b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cc71rfo54vvou/000015.metadata similarity index 100% rename from .Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cc71rfo54vvou/000013.metadata rename to .Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cc71rfo54vvou/000015.metadata diff --git a/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cc71rfo54vvou/000013.rdf b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cc71rfo54vvou/000015.rdf similarity index 100% rename from .Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cc71rfo54vvou/000013.rdf rename to .Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cc71rfo54vvou/000015.rdf diff --git a/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/chunks.json b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/chunks.json index b86d0151df31c4b6323160db108efc8e5041c995..d8a6599efd36ca83763410009f57390ee063abbf 100644 --- a/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/chunks.json +++ b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/chunks.json @@ -1 +1 @@ -{"chunk_definitions":[{"chunk_id":"cugiprfbptcaw","chunk_label":"unnamed-chunk-1","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","include":false,"label":"unnamed-chunk-1"},"row":15,"row_count":1,"visible":true},{"chunk_id":"csetup_chunk","chunk_label":"setup","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","include":false,"label":"setup"},"row":31,"row_count":1,"visible":true},{"chunk_id":"cystga685ux9r","chunk_label":"load","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"load"},"row":60,"row_count":1,"visible":true},{"chunk_id":"cc71rfo54vvou","chunk_label":"strains","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"strains"},"row":73,"row_count":1,"visible":true},{"chunk_id":"coar8mvardv1z","chunk_label":"no_parent","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"no_parent"},"row":78,"row_count":1,"visible":true},{"chunk_id":"cyqo4jk1414tp","chunk_label":"tab_mark","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"tab_mark"},"row":91,"row_count":1,"visible":true},{"chunk_id":"ci1zg9xosgth8","chunk_label":"mark_match","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_match"},"row":100,"row_count":1,"visible":true},{"chunk_id":"c4j6ei29p4187","chunk_label":"mark_poly ex","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_poly ex"},"row":109,"row_count":1,"visible":true},{"chunk_id":"cndnl4vh4xyj8","chunk_label":"mark_prop ex","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_prop ex"},"row":116,"row_count":1,"visible":true},{"chunk_id":"cgrscnwnyajvi","chunk_label":"mark_allele","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_allele"},"row":123,"row_count":1,"visible":true},{"chunk_id":"c45rvmci4gaoy","chunk_label":"mark_allele-strains","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_allele-strains"},"row":129,"row_count":1,"visible":true},{"chunk_id":"cv6d9nrsrzqfm","chunk_label":"write_qtl","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"write_qtl"},"row":139,"row_count":1,"visible":true}],"default_chunk_options":{},"doc_write_time":1622638573,"working_dir":null} \ No newline at end of file +{"chunk_definitions":[{"chunk_id":"cugiprfbptcaw","chunk_label":"unnamed-chunk-1","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","include":false,"label":"unnamed-chunk-1"},"row":15,"row_count":1,"visible":true},{"chunk_id":"csetup_chunk","chunk_label":"setup","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","include":false,"label":"setup"},"row":31,"row_count":1,"visible":true},{"chunk_id":"cvmghh26lmpwd","chunk_label":"annot","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"annot"},"row":45,"row_count":1,"visible":true},{"chunk_id":"cystga685ux9r","chunk_label":"load","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"load"},"row":60,"row_count":1,"visible":true},{"chunk_id":"cc71rfo54vvou","chunk_label":"strains","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"strains"},"row":73,"row_count":1,"visible":true},{"chunk_id":"coar8mvardv1z","chunk_label":"no_parent","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"no_parent"},"row":78,"row_count":1,"visible":true},{"chunk_id":"cyqo4jk1414tp","chunk_label":"tab_mark","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"tab_mark"},"row":91,"row_count":1,"visible":true},{"chunk_id":"ci1zg9xosgth8","chunk_label":"mark_match","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_match"},"row":100,"row_count":1,"visible":true},{"chunk_id":"c4j6ei29p4187","chunk_label":"mark_poly ex","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_poly ex"},"row":109,"row_count":1,"visible":true},{"chunk_id":"cndnl4vh4xyj8","chunk_label":"mark_prop ex","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_prop ex"},"row":116,"row_count":1,"visible":true},{"chunk_id":"cgrscnwnyajvi","chunk_label":"mark_allele","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_allele"},"row":123,"row_count":1,"visible":true},{"chunk_id":"c45rvmci4gaoy","chunk_label":"mark_allele-strains","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_allele-strains"},"row":129,"row_count":1,"visible":true},{"chunk_id":"cv6d9nrsrzqfm","chunk_label":"write_qtl","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"write_qtl"},"row":139,"row_count":1,"visible":true}],"default_chunk_options":{},"doc_write_time":1622642681,"working_dir":null} \ No newline at end of file diff --git a/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cvmghh26lmpwd/000002.csv b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cvmghh26lmpwd/000002.csv new file mode 100644 index 0000000000000000000000000000000000000000..d60d661c6e52cccece20691efd360d25d764c5b0 --- /dev/null +++ b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cvmghh26lmpwd/000002.csv @@ -0,0 +1 @@ +"0","annot_mini <- read.csv(url(""https://raw.githubusercontent.com/kbroman/MUGAarrays/master/UWisc/mini_uwisc_v2.csv""))" diff --git a/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cyqo4jk1414tp/000002.csv b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cyqo4jk1414tp/000002.csv deleted file mode 100644 index 013157e7e1147f09fc88225a6e3fea3880398082..0000000000000000000000000000000000000000 --- a/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cyqo4jk1414tp/000002.csv +++ /dev/null @@ -1,66 +0,0 @@ -"0","data(stuart_tab)" -"0","summary(stuart_tab)" -"1","" -"1"," SNP.Name " -"1"," Allele_1 " -"1"," Allele_2 " -"1"," n_HM1 " -"1"," n_HM2 " -"1"," n_HT " -"1"," n_NA " -"1"," -" -"1"," Length:11125 " -"1"," Length:11125 " -"1"," Length:11125 " -"1"," Min. : 0.0 " -"1"," Min. : 0.00 " -"1"," Min. : 0.00 " -"1"," Min. : 0.00 " -"1"," -" -"1"," Class :character " -"1"," Class :character " -"1"," Class :character " -"1"," 1st Qu.: 44.0 " -"1"," 1st Qu.: 0.00 " -"1"," 1st Qu.: 0.00 " -"1"," 1st Qu.: 0.00 " -"1"," -" -"1"," Mode :character " -"1"," Mode :character " -"1"," Mode :character " -"1"," Median :174.0 " -"1"," Median : 0.00 " -"1"," Median : 0.00 " -"1"," Median : 1.00 " -"1"," -" -"1"," " -"1"," " -"1"," " -"1"," Mean :123.9 " -"1"," Mean : 19.92 " -"1"," Mean : 19.24 " -"1"," Mean : 12.91 " -"1"," -" -"1"," " -"1"," " -"1"," " -"1"," 3rd Qu.:176.0 " -"1"," 3rd Qu.: 34.00 " -"1"," 3rd Qu.: 5.00 " -"1"," 3rd Qu.: 5.00 " -"1"," -" -"1"," " -"1"," " -"1"," " -"1"," Max. :176.0 " -"1"," Max. :175.00 " -"1"," Max. :176.00 " -"1"," Max. :176.00 " -"1"," -" diff --git a/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cyqo4jk1414tp/000005.csv b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cyqo4jk1414tp/000005.csv new file mode 100644 index 0000000000000000000000000000000000000000..8f499aea3ff86e63d5beeafd589aa8cf86622e3a --- /dev/null +++ b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cyqo4jk1414tp/000005.csv @@ -0,0 +1,4 @@ +"0","tab <- mark_tab(genos) +" +"2","Error in mark_tab(genos) : impossible de trouver la fonction ""mark_tab"" +" diff --git a/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cyqo4jk1414tp/000006.error b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cyqo4jk1414tp/000006.error new file mode 100644 index 0000000000000000000000000000000000000000..3aaf4b2b5b4ec1243e367bb27f235ba6ebf71da0 --- /dev/null +++ b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/cyqo4jk1414tp/000006.error @@ -0,0 +1 @@ +{"frames":[],"message":"Error in mark_tab(genos) : impossible de trouver la fonction \"mark_tab\"\n"} \ No newline at end of file diff --git a/.Rproj.user/shared/notebooks/paths b/.Rproj.user/shared/notebooks/paths index 3a875c8f655b8c166e29857ed66aa4638147112f..987226a929aa70c98c5954e2e9f8ce2716ea30d6 100644 --- a/.Rproj.user/shared/notebooks/paths +++ b/.Rproj.user/shared/notebooks/paths @@ -1,4 +1,5 @@ /Users/mariebourdon/stuart_package/stuart/NAMESPACE="6A276B5" +/Users/mariebourdon/stuart_package/stuart/R/geno_strains.R="1F9B28F5" /Users/mariebourdon/stuart_package/stuart/R/genos-data.R="9943E26B" /Users/mariebourdon/stuart_package/stuart/R/tab_mark.R="DEC9867F" /Users/mariebourdon/stuart_package/stuart/vignettes/stuaRt.Rmd="4D49CCFD" diff --git a/R/stuart_tab-data.R b/R/stuart_tab-data.R index a5e0d4fb13d058c8b8147662389812fa7c300f28..fcb71e4035488add00e88581add3dd5e39109914 100644 --- a/R/stuart_tab-data.R +++ b/R/stuart_tab-data.R @@ -4,9 +4,9 @@ #' #' @format A data frame with 11125 rows and 7 variables #' \describe{ -#' \item{SNP.Name}{name of the marker} -#' \item{Allele_1}{first allele of the marker} -#' \item{Allele_2}{second allele of the marker} +#' \item{marker}{name of the marker} +#' \item{allele_1}{first allele of the marker} +#' \item{allele_2}{second allele of the marker} #' \item{n_HM1}{number of homozygous individuals for the first allele} #' \item{n_HM2}{number of homozygous individuals for the second allele} #' \item{n_HT}{number of heterozygous individuals} diff --git a/R/tab_mark.R b/R/tab_mark.R index 612c27db930cb54d481e09a9a7ab5efd4912420f..95f4e184b34f860c2f5fee3e2cb8d4cf2fbc1fa8 100755 --- a/R/tab_mark.R +++ b/R/tab_mark.R @@ -12,8 +12,14 @@ #### tab_mark function #### ## create table with markers and counts tab_mark <- function(geno){ + #rename df columns + geno <- geno %>% rename("marker"=1, + "id"=2, + "allele_1"=3, + "allele_2"=4) + #create geno column in geno df - geno <- geno %>% unite(Geno,c("Allele1...Forward","Allele2...Forward"),sep="",remove=FALSE) + geno <- geno %>% unite(Geno,c("allele_1","allele_2"),sep="",remove=FALSE) #recode genotypes to have all heterozygous encoded the same way (ex: only "AT", no "TA") geno <- geno %>% mutate(Geno=recode(Geno, @@ -26,9 +32,9 @@ tab_mark <- function(geno){ #create df with counts for each genotype - df_count <- tibble(SNP.Name = as.character(unique(geno$SNP.Name)), - Allele_1 = NA, - Allele_2 = NA, + df_count <- tibble(marker = as.character(unique(geno$marker)), + allele_1 = NA, + allele_2 = NA, n_HM1 = NA, n_HM2 = NA, n_HT = NA, @@ -36,11 +42,11 @@ tab_mark <- function(geno){ ## loop to count genotype - for(i in df_count$SNP.Name){ + for(i in df_count$marker){ #extract alleles for each marker - Alleles <- geno %>% filter(SNP.Name==i) %>% - select(c(SNP.Name,Sample.ID,Geno,Allele1...Forward,Allele2...Forward)) %>% - pivot_longer(c(Allele1...Forward,Allele2...Forward),names_to="Allele_name",values_to="Allele") %>% + Alleles <- geno %>% filter(marker==i) %>% + select(c(marker,id,Geno,allele_1,allele_2)) %>% + pivot_longer(c(allele_1,allele_2),names_to="Allele_name",values_to="Allele") %>% distinct(Allele) %>% filter(Allele != "-") Alleles <- as.factor(paste(Alleles$Allele)) @@ -52,19 +58,19 @@ tab_mark <- function(geno){ if(all(rapportools::is.empty(Alleles))==FALSE){ #add alleles to df_count - df_count <- df_count %>% mutate(Allele_1 = ifelse(SNP.Name == i, - paste(Alleles[1]), Allele_1)) + df_count <- df_count %>% mutate(allele_1 = ifelse(marker == i, + paste(Alleles[1]), allele_1)) #count for homozygous for allele 1 - n1 <- geno %>% filter(SNP.Name==i) %>% + n1 <- geno %>% filter(marker==i) %>% filter(Geno == paste(Alleles[1],Alleles[1],sep="")) %>% summarise(n=n()) #add count for homozygous for allele 1 to df_count - df_count <- df_count %>% mutate(n_HM1 = ifelse(SNP.Name == i, + df_count <- df_count %>% mutate(n_HM1 = ifelse(marker == i, n1$n, n_HM1)) @@ -72,55 +78,55 @@ tab_mark <- function(geno){ #if marker not polymorphic if(is.na(Alleles[2])==TRUE){ - #NA as Allele_2 - df_count <- df_count %>% mutate(Allele_2 = ifelse(SNP.Name == i, - NA, Allele_2)) + #NA as allele_2 + df_count <- df_count %>% mutate(allele_2 = ifelse(marker == i, + NA, allele_2)) #NA as n_HM2 - df_count <- df_count %>% mutate(n_HM2 = ifelse(SNP.Name == i, + df_count <- df_count %>% mutate(n_HM2 = ifelse(marker == i, NA, n_HM2)) #NA as n_HT - df_count <- df_count %>% mutate(n_HT = ifelse(SNP.Name == i, + df_count <- df_count %>% mutate(n_HT = ifelse(marker == i, NA, n_HT)) } else { #add alleles to df_count - df_count <- df_count %>% mutate(Allele_2 = ifelse(SNP.Name == i, - paste(Alleles[2]), Allele_2)) + df_count <- df_count %>% mutate(allele_2 = ifelse(marker == i, + paste(Alleles[2]), allele_2)) #count for homozygous for allele 2 - n2 <- geno %>% filter(SNP.Name==i) %>% + n2 <- geno %>% filter(marker==i) %>% filter(Geno == paste(Alleles[2],Alleles[2],sep="")) %>% summarise(n=n()) #add count for homozygous for allele 1 to df_count - df_count <- df_count %>% mutate(n_HM2 = ifelse(SNP.Name == i, + df_count <- df_count %>% mutate(n_HM2 = ifelse(marker == i, n2$n, n_HM2)) #count for heterozygous - n3 <- geno %>% filter(SNP.Name==i) %>% + n3 <- geno %>% filter(marker==i) %>% filter(Geno == paste(Alleles[1],Alleles[2],sep="")) %>% summarise(n=n()) #add count for homozygous for allele 1 to df_count - df_count <- df_count %>% mutate(n_HT = ifelse(SNP.Name == i, + df_count <- df_count %>% mutate(n_HT = ifelse(marker == i, n3$n, n_HT)) } #count for NA - n4 <- geno %>% filter(SNP.Name==i) %>% + n4 <- geno %>% filter(marker==i) %>% filter(Geno == "--" | Geno == paste(Alleles[1],"-",sep="") | Geno == paste(Alleles[2],"-",sep="") | Geno == paste("-",Alleles[1],sep="") | Geno == paste("-",Alleles[2],sep="")) %>% summarise(n=n()) #add count for NA to df_count - df_count <- df_count %>% mutate(n_NA = ifelse(SNP.Name == i, + df_count <- df_count %>% mutate(n_NA = ifelse(marker == i, n4$n, n_NA)) } #change class of counts as numeric : diff --git a/data/stuart_tab.rda b/data/stuart_tab.rda index b8b8e8f4ec7299f686eb136dc11793732cee6dd5..2fc099ceef03b0a32a1b3929ed785f48bb2ee821 100644 Binary files a/data/stuart_tab.rda and b/data/stuart_tab.rda differ diff --git a/stuart_0.1.0.tar.gz b/stuart_0.1.0.tar.gz index e1bbd31b1b0d8f51dd273855fe20ed9970f99e97..e4e3cefe63a27dd1f3522e92722e9461ed3d3bc7 100644 Binary files a/stuart_0.1.0.tar.gz and b/stuart_0.1.0.tar.gz differ diff --git a/vignettes/stuaRt.Rmd b/vignettes/stuaRt.Rmd index de0ce5aceffd208490965bc9efd0d4b4ec8e78fe..123928d6aded4403210eb7a59eeca6c059d7f7f0 100755 --- a/vignettes/stuaRt.Rmd +++ b/vignettes/stuaRt.Rmd @@ -86,7 +86,7 @@ genos <- genos %>% filter(!Sample.ID %in% c("StrainsA_1", "StrainsA_2", "Strains The first step of the markers sorting is to create the marker dataframe with the tab_mark() function. This dataframe contains for each marker the two alleles that can be found in the F2/N2 population (`Allele_1` and `Allele_2`), the number of individuals for each genotype (homozygous for each allele (`n_HM1` and `n_HM2`) and heterozygous (`n_HT`)), and the number of non genotyped individuals (`n_NA`) This step can take several minutes. You can also load the output of this function. -```{r tab_mark,eval=F} +```{r tab_mark} data(stuart_tab) summary(stuart_tab) ```