diff --git a/.Rproj.user/9DAE6990/sources/prop/1416C0B5 b/.Rproj.user/9DAE6990/sources/prop/1416C0B5
new file mode 100644
index 0000000000000000000000000000000000000000..a6d7a30c22455e70e2ae615bad99a076580ac071
--- /dev/null
+++ b/.Rproj.user/9DAE6990/sources/prop/1416C0B5
@@ -0,0 +1,4 @@
+{
+    "cursorPosition" : "24,47",
+    "scrollLine" : "2"
+}
\ No newline at end of file
diff --git a/.Rproj.user/9DAE6990/sources/prop/75F49619 b/.Rproj.user/9DAE6990/sources/prop/75F49619
new file mode 100644
index 0000000000000000000000000000000000000000..7a73a41bfdf76d6f793007240d80983a52f15f97
--- /dev/null
+++ b/.Rproj.user/9DAE6990/sources/prop/75F49619
@@ -0,0 +1,2 @@
+{
+}
\ No newline at end of file
diff --git a/.Rproj.user/9DAE6990/sources/prop/7FA3B215 b/.Rproj.user/9DAE6990/sources/prop/7FA3B215
new file mode 100644
index 0000000000000000000000000000000000000000..c951d78668261554276d96efbe60d26cfb9de090
--- /dev/null
+++ b/.Rproj.user/9DAE6990/sources/prop/7FA3B215
@@ -0,0 +1,5 @@
+{
+    "cursorPosition" : "38,30",
+    "scrollLine" : "25",
+    "source_window_id" : ""
+}
\ No newline at end of file
diff --git a/.Rproj.user/9DAE6990/sources/prop/8F7B714A b/.Rproj.user/9DAE6990/sources/prop/8F7B714A
new file mode 100644
index 0000000000000000000000000000000000000000..9f60283e46214e9b88630e93870613833e66dcac
--- /dev/null
+++ b/.Rproj.user/9DAE6990/sources/prop/8F7B714A
@@ -0,0 +1,4 @@
+{
+    "cursorPosition" : "34,0",
+    "scrollLine" : "21"
+}
\ No newline at end of file
diff --git a/.Rproj.user/9DAE6990/sources/prop/A609F054 b/.Rproj.user/9DAE6990/sources/prop/A609F054
new file mode 100644
index 0000000000000000000000000000000000000000..7a73a41bfdf76d6f793007240d80983a52f15f97
--- /dev/null
+++ b/.Rproj.user/9DAE6990/sources/prop/A609F054
@@ -0,0 +1,2 @@
+{
+}
\ No newline at end of file
diff --git a/.Rproj.user/9DAE6990/sources/prop/C10FF5C8 b/.Rproj.user/9DAE6990/sources/prop/C10FF5C8
new file mode 100644
index 0000000000000000000000000000000000000000..7a73a41bfdf76d6f793007240d80983a52f15f97
--- /dev/null
+++ b/.Rproj.user/9DAE6990/sources/prop/C10FF5C8
@@ -0,0 +1,2 @@
+{
+}
\ No newline at end of file
diff --git a/.Rproj.user/9DAE6990/sources/prop/D602FFE4 b/.Rproj.user/9DAE6990/sources/prop/D602FFE4
index af5f2e48960c4e1a4aac4a30a1c7f41c9a094e04..600309fb192ff468987ccbe8766414d03b4905d8 100644
--- a/.Rproj.user/9DAE6990/sources/prop/D602FFE4
+++ b/.Rproj.user/9DAE6990/sources/prop/D602FFE4
@@ -1,5 +1,5 @@
 {
-    "cursorPosition" : "44,0",
+    "cursorPosition" : "78,3",
     "last_setup_crc32" : "39B546A65bfca283",
-    "scrollLine" : "44"
+    "scrollLine" : "63"
 }
\ No newline at end of file
diff --git a/.Rproj.user/9DAE6990/sources/prop/INDEX b/.Rproj.user/9DAE6990/sources/prop/INDEX
index 2ae9108e07b4ff0d7c0f0fc9a79a4e96343a7e78..c6af6e3d2d6a09efc4edfc266fc0ef9609fb5bc5 100644
--- a/.Rproj.user/9DAE6990/sources/prop/INDEX
+++ b/.Rproj.user/9DAE6990/sources/prop/INDEX
@@ -1,6 +1,12 @@
 ~%2Fstuart_package%2Fstuart%2FDESCRIPTION="BEB7232"
 ~%2Fstuart_package%2Fstuart%2FNAMESPACE="AE613167"
+~%2Fstuart_package%2Fstuart%2FR%2Fgeno_strains.R="8F7B714A"
+~%2Fstuart_package%2Fstuart%2FR%2Fgenos-data.R="C10FF5C8"
+~%2Fstuart_package%2Fstuart%2FR%2Fmark_allele.R="1416C0B5"
+~%2Fstuart_package%2Fstuart%2FR%2Fmark_match.R="75F49619"
 ~%2Fstuart_package%2Fstuart%2FR%2Fmark_poly.R="3A328548"
+~%2Fstuart_package%2Fstuart%2FR%2Fmark_prop.R="A609F054"
+~%2Fstuart_package%2Fstuart%2FR%2Ftab_mark.R="7FA3B215"
 ~%2Fstuart_package%2Fstuart%2FR%2Fwrite_rqtl.R="5B8691C7"
 ~%2Fstuart_package%2Fstuart%2Fvignettes%2FstuaRt.R="EBD625D2"
 ~%2Fstuart_package%2Fstuart%2Fvignettes%2FstuaRt.Rmd="D602FFE4"
diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/42D37312 b/.Rproj.user/9DAE6990/sources/s-39B546A6/42D37312
new file mode 100644
index 0000000000000000000000000000000000000000..d1aa6bb250c6ae441fb0f74cda1c05bd75f9c6b7
--- /dev/null
+++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/42D37312
@@ -0,0 +1,23 @@
+{
+    "collab_server" : "",
+    "contents" : "",
+    "created" : 1622636142238.000,
+    "dirty" : false,
+    "encoding" : "UTF-8",
+    "folds" : "",
+    "hash" : "387034705",
+    "id" : "42D37312",
+    "lastKnownWriteTime" : 1622465534,
+    "last_content_update" : 1622465534,
+    "path" : "~/stuart_package/stuart/R/mark_prop.R",
+    "project_path" : "R/mark_prop.R",
+    "properties" : {
+    },
+    "read_only" : false,
+    "read_only_alternatives" : [
+    ],
+    "relative_order" : 7,
+    "source_on_save" : false,
+    "source_window" : "",
+    "type" : "r_source"
+}
\ No newline at end of file
diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/42D37312-contents b/.Rproj.user/9DAE6990/sources/s-39B546A6/42D37312-contents
new file mode 100644
index 0000000000000000000000000000000000000000..a0827e8c27b02c9d65a6cc3af34cf595f50a5840
--- /dev/null
+++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/42D37312-contents
@@ -0,0 +1,50 @@
+#' @title Exclude markers depending on proportions of homo/hetorozygous
+#'
+#' @description uses the dataframe produced by the tab_mark function and fills the "exclude" column for all the markers that present odd proportions of each genotype. You can define these proportions thanks to the arguments of the function.
+#' @param tab data frame obtained with tab_mark function.
+#' @param cross F2 or N2.
+#' @param homo proportion of homozygous individuals under which the marker is excluded. Will apply on both homozygous genotypes for a F2, but only on one for N2.
+#' @param hetero proportion of heterozygous individuals under which the marker is excluded.
+#' @param na proportion of non-genotyped individuals above which the marker is excluded.
+#'
+#' @import dplyr
+#'
+#' @export
+#'
+
+#### mark_prop ####
+## excludes markers depending on proportions of homo/hetorozygous
+mark_prop <- function(tab,cross,homo=NA,hetero=NA,na=0.5){
+  #calculate total number of individuals genotyped for each marker
+  tab <- tab %>% mutate(n_geno = tab %>% select(n_HM1,n_HM2,n_HT) %>% rowSums(na.rm=TRUE))
+
+  #calculate proportion of each genotype
+  tab <- tab %>% mutate(p_HM1 = n_HM1/n_geno)
+  tab <- tab %>% mutate(p_HM2 = n_HM2/n_geno)
+  tab <- tab %>% mutate(p_HT = n_HT/n_geno)
+  tab <- tab %>% mutate(p_NA = n_NA/(n_geno+n_NA))
+
+  #stop if cross != "F2" or "N2"
+  if(!cross %in% c("F2","N2")){
+    stop("Cross must be F2 or N2")
+  }
+
+  #stop if no value for "homo" for F2 cross
+  if(cross=="F2" & (is.na(homo)==TRUE | is.na(hetero)==TRUE)){
+    stop("Arguments homo and hetero must be specified for F2 crosses")
+  }
+
+  #stop if no value for "homo" and "hetero" for N2 cross
+  if(cross=="N2" & (is.na(homo)==TRUE | is.na(hetero)==TRUE)){
+    stop("Arguments homo and hetero must be specified for N2 crosses")
+  }
+
+  #exclude markers according to proportion of na
+  tab <- tab %>% mutate(exclude_prop=case_when(p_NA > na ~ 1, #exclude markers according to proportion of na
+                                               cross=="F2" & (p_HM1 < homo | p_HM2 < homo | p_HT < hetero) ~ 1, #exclude markers according to proportion of homozygous individuals for F2 cross
+                                               cross=="N2" & (p_HM1 < homo | p_HT < hetero) ~ 1, #exclude markers according to proportion of homozygous and heterozygous individuals for N2 cross
+                                               T ~ 0))
+
+  tab <- tab %>% select(-c(p_HM1,p_HM2,p_HT,p_NA,n_geno))
+  return(tab)
+}
diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/45D91D58 b/.Rproj.user/9DAE6990/sources/s-39B546A6/45D91D58
index 60c39ac66e63aef8037f21693027f0e9f18946db..48318c85d383a55f9cdaf128250d2a2e36637f50 100644
--- a/.Rproj.user/9DAE6990/sources/s-39B546A6/45D91D58
+++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/45D91D58
@@ -18,7 +18,7 @@
     "read_only" : false,
     "read_only_alternatives" : [
     ],
-    "relative_order" : 5,
+    "relative_order" : 9,
     "source_on_save" : false,
     "source_window" : "",
     "type" : "r_source"
diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/4A9D04E b/.Rproj.user/9DAE6990/sources/s-39B546A6/4A9D04E
deleted file mode 100644
index 2576ef9c04c31f42f598b3ab49d0632a0f8a8001..0000000000000000000000000000000000000000
--- a/.Rproj.user/9DAE6990/sources/s-39B546A6/4A9D04E
+++ /dev/null
@@ -1,33 +0,0 @@
-{
-    "collab_server" : "",
-    "contents" : "",
-    "created" : 1622538256440.000,
-    "dirty" : false,
-    "encoding" : "",
-    "folds" : "",
-    "hash" : "0",
-    "id" : "4A9D04E",
-    "lastKnownWriteTime" : 140548509794304,
-    "last_content_update" : 1622538256440,
-    "path" : null,
-    "project_path" : null,
-    "properties" : {
-        "cacheKey" : "F7A5FD33",
-        "caption" : "strains",
-        "contentUrl" : "grid_resource/gridviewer.html?env=&obj=strains&cache_key=F7A5FD33",
-        "displayedObservations" : "11125",
-        "environment" : "",
-        "expression" : "strains",
-        "object" : "strains",
-        "preview" : "0",
-        "totalObservations" : "11125",
-        "variables" : "14"
-    },
-    "read_only" : false,
-    "read_only_alternatives" : [
-    ],
-    "relative_order" : 3,
-    "source_on_save" : false,
-    "source_window" : "",
-    "type" : "r_dataframe"
-}
\ No newline at end of file
diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/65C9B72B b/.Rproj.user/9DAE6990/sources/s-39B546A6/65C9B72B
new file mode 100644
index 0000000000000000000000000000000000000000..973ea910f5a3794d58e3d19c741e359421fe09b3
--- /dev/null
+++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/65C9B72B
@@ -0,0 +1,26 @@
+{
+    "collab_server" : "",
+    "contents" : "",
+    "created" : 1622637863181.000,
+    "dirty" : false,
+    "encoding" : "UTF-8",
+    "folds" : "",
+    "hash" : "1306976036",
+    "id" : "65C9B72B",
+    "lastKnownWriteTime" : 1622462353,
+    "last_content_update" : 1622462353,
+    "path" : "~/stuart_package/stuart/R/tab_mark.R",
+    "project_path" : "R/tab_mark.R",
+    "properties" : {
+        "cursorPosition" : "38,30",
+        "scrollLine" : "25",
+        "source_window_id" : ""
+    },
+    "read_only" : false,
+    "read_only_alternatives" : [
+    ],
+    "relative_order" : 3,
+    "source_on_save" : false,
+    "source_window" : "",
+    "type" : "r_source"
+}
\ No newline at end of file
diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/65C9B72B-contents b/.Rproj.user/9DAE6990/sources/s-39B546A6/65C9B72B-contents
new file mode 100644
index 0000000000000000000000000000000000000000..612c27db930cb54d481e09a9a7ab5efd4912420f
--- /dev/null
+++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/65C9B72B-contents
@@ -0,0 +1,137 @@
+#' @title Create of the summary table for all markers from the genotype data frame
+#'
+#' @description This function creates a table with all the markers that were genotyped in the array, the alleles for these markers, the number of homozygous and heterozygous animals, as well as the number of non genotyped animals.
+#' @param geno data frame with the genotyping results for your cross
+#'
+#' @import dplyr
+#' @import tidyr
+#'
+#' @export
+#'
+
+#### tab_mark function ####
+## create table with markers and counts
+tab_mark <- function(geno){
+  #create geno column in geno df
+  geno <- geno %>% unite(Geno,c("Allele1...Forward","Allele2...Forward"),sep="",remove=FALSE)
+
+  #recode genotypes to have all heterozygous encoded the same way (ex: only "AT", no "TA")
+  geno <- geno %>% mutate(Geno=recode(Geno,
+                                      "TA" = "AT",
+                                      "GA" = "AG",
+                                      "CA" = "AC",
+                                      "GT" = "TG",
+                                      "CT" = "TC",
+                                      "GC" = "CG"))
+
+
+  #create df with counts for each genotype
+  df_count <- tibble(SNP.Name = as.character(unique(geno$SNP.Name)),
+                     Allele_1 = NA,
+                     Allele_2 = NA,
+                     n_HM1 = NA,
+                     n_HM2 = NA,
+                     n_HT = NA,
+                     n_NA = NA)
+
+
+  ## loop to count genotype
+  for(i in df_count$SNP.Name){
+    #extract alleles for each marker
+    Alleles <- geno %>% filter(SNP.Name==i) %>%
+      select(c(SNP.Name,Sample.ID,Geno,Allele1...Forward,Allele2...Forward)) %>%
+      pivot_longer(c(Allele1...Forward,Allele2...Forward),names_to="Allele_name",values_to="Allele") %>%
+      distinct(Allele) %>% filter(Allele != "-")
+    Alleles <- as.factor(paste(Alleles$Allele))
+
+    #sort alleles
+    Alleles <- factor(Alleles,levels=c("A","T","C","G"))
+    Alleles <- sort(Alleles)
+
+    #add alleles and counts, only for markers with alleles (not markers with no genotyped ind)
+    if(all(rapportools::is.empty(Alleles))==FALSE){
+
+      #add alleles to df_count
+      df_count <- df_count %>% mutate(Allele_1 = ifelse(SNP.Name == i,
+                                                        paste(Alleles[1]), Allele_1))
+
+
+
+      #count for homozygous for allele 1
+      n1 <- geno %>% filter(SNP.Name==i) %>%
+        filter(Geno == paste(Alleles[1],Alleles[1],sep="")) %>%
+        summarise(n=n())
+
+
+      #add count for homozygous for allele 1 to df_count
+      df_count <- df_count %>% mutate(n_HM1 = ifelse(SNP.Name == i,
+                                                     n1$n, n_HM1))
+
+
+    }
+
+    #if marker not polymorphic
+    if(is.na(Alleles[2])==TRUE){
+      #NA as Allele_2
+      df_count <- df_count %>% mutate(Allele_2 = ifelse(SNP.Name == i,
+                                                        NA, Allele_2))
+
+      #NA as n_HM2
+      df_count <- df_count %>% mutate(n_HM2 = ifelse(SNP.Name == i,
+                                                     NA, n_HM2))
+
+      #NA as n_HT
+      df_count <- df_count %>% mutate(n_HT = ifelse(SNP.Name == i,
+                                                    NA, n_HT))
+    } else {
+      #add alleles to df_count
+      df_count <- df_count %>% mutate(Allele_2 = ifelse(SNP.Name == i,
+                                                        paste(Alleles[2]), Allele_2))
+
+
+      #count for homozygous for allele 2
+      n2 <- geno %>% filter(SNP.Name==i) %>%
+        filter(Geno == paste(Alleles[2],Alleles[2],sep="")) %>%
+        summarise(n=n())
+
+      #add count for homozygous for allele 1 to df_count
+      df_count <- df_count %>% mutate(n_HM2 = ifelse(SNP.Name == i,
+                                                     n2$n, n_HM2))
+
+
+      #count for heterozygous
+      n3 <- geno %>% filter(SNP.Name==i) %>%
+        filter(Geno == paste(Alleles[1],Alleles[2],sep="")) %>%
+        summarise(n=n())
+
+
+      #add count for homozygous for allele 1 to df_count
+      df_count <- df_count %>% mutate(n_HT = ifelse(SNP.Name == i,
+                                                    n3$n, n_HT))
+
+
+    }
+
+    #count for NA
+    n4 <- geno %>% filter(SNP.Name==i) %>%
+      filter(Geno == "--" |
+               Geno == paste(Alleles[1],"-",sep="") | Geno == paste(Alleles[2],"-",sep="") |
+               Geno == paste("-",Alleles[1],sep="") | Geno == paste("-",Alleles[2],sep="")) %>%
+      summarise(n=n())
+
+    #add count for NA to df_count
+    df_count <- df_count %>% mutate(n_NA = ifelse(SNP.Name == i,
+                                                  n4$n, n_NA))
+  }
+  #change class of counts as numeric :
+  df_count$n_HM1 <- df_count$n_HM1 %>% as.numeric()
+  df_count$n_HM2 <- df_count$n_HM2 %>% as.numeric()
+  df_count$n_HT <- df_count$n_HT %>% as.numeric()
+  df_count$n_NA <- df_count$n_NA %>% as.numeric()
+
+  #add 0 for null counts
+  df_count <- df_count %>% mutate_at(.vars=vars(n_HM1,n_HM2,n_HT,n_NA),~replace(., is.na(.), 0))
+
+  #return
+  return(df_count)
+}
diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/6B5DE0D4-contents b/.Rproj.user/9DAE6990/sources/s-39B546A6/6B5DE0D4-contents
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/76AC3EC b/.Rproj.user/9DAE6990/sources/s-39B546A6/76AC3EC
new file mode 100644
index 0000000000000000000000000000000000000000..d073da46fdf7205ce479a88abf6977c7b119cce4
--- /dev/null
+++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/76AC3EC
@@ -0,0 +1,23 @@
+{
+    "collab_server" : "",
+    "contents" : "",
+    "created" : 1622636138213.000,
+    "dirty" : false,
+    "encoding" : "UTF-8",
+    "folds" : "",
+    "hash" : "897610086",
+    "id" : "76AC3EC",
+    "lastKnownWriteTime" : 1622462353,
+    "last_content_update" : 1622462353,
+    "path" : "~/stuart_package/stuart/R/mark_match.R",
+    "project_path" : "R/mark_match.R",
+    "properties" : {
+    },
+    "read_only" : false,
+    "read_only_alternatives" : [
+    ],
+    "relative_order" : 5,
+    "source_on_save" : false,
+    "source_window" : "",
+    "type" : "r_source"
+}
\ No newline at end of file
diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/76AC3EC-contents b/.Rproj.user/9DAE6990/sources/s-39B546A6/76AC3EC-contents
new file mode 100644
index 0000000000000000000000000000000000000000..8781242a7568eb4081bcd0170f5ff17a049f2358
--- /dev/null
+++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/76AC3EC-contents
@@ -0,0 +1,25 @@
+#' @title Exclude markers that were not genotyped in the reference strains
+#'
+#' @description This functions uses the dataframe produced by the tab_mark function and fills the "exclude" column for all the markers that were genotyped in the individuals of the cross but not in the reference strains. This is useful if the parental strains of the cross were not genotyped with the individuals and a previous genotyping result is used. Indeed, changes in the markers of the array may have happened. We recommend always using this function in order to avoid errors.
+#' @param tab data frame obtained with tab_mark function
+#' @param ref data frame with the reference genotypes of mouse lines
+#'
+#' @import dplyr
+#'
+#' @export
+#'
+mark_match <- function(tab,  #tab_mark df
+                       ref){  #strain ref geno file
+
+  #finds SNPs that are in both files:
+  snp_strains <- as.character(ref$marker) #extracts SNPs in strains ref geno file
+  snp_genfile <- as.character(tab$SNP.Name) #extracts SNPs in cross geno file
+  snp <- intersect(snp_strains,snp_genfile) #take intercept
+
+
+  #add results in exclude column
+  return(tab %>% mutate(exclude_match=ifelse(!SNP.Name %in% snp,
+                                             1,
+                                             0)))
+
+}
diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/7E4482C6-contents b/.Rproj.user/9DAE6990/sources/s-39B546A6/7E4482C6-contents
new file mode 100644
index 0000000000000000000000000000000000000000..fd50eb0ea6a1188c1f619aa7510b6b276c9c2895
--- /dev/null
+++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/7E4482C6-contents
@@ -0,0 +1,8 @@
+#' Data frame with miniMUGA genotyping of F2 individuals and parental strains
+#'
+#' A dataset containing the genotypes of 176 F2 individuals
+#'
+#' @format A data frame with 2002493 observations of 11 variables
+
+
+"genos"
diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/806AAC34 b/.Rproj.user/9DAE6990/sources/s-39B546A6/806AAC34
deleted file mode 100644
index bb5c4d7f601ff6996edbba477057227ab1fcc07c..0000000000000000000000000000000000000000
--- a/.Rproj.user/9DAE6990/sources/s-39B546A6/806AAC34
+++ /dev/null
@@ -1,33 +0,0 @@
-{
-    "collab_server" : "",
-    "contents" : "",
-    "created" : 1622538162413.000,
-    "dirty" : false,
-    "encoding" : "",
-    "folds" : "",
-    "hash" : "0",
-    "id" : "806AAC34",
-    "lastKnownWriteTime" : 5,
-    "last_content_update" : 1622538162413,
-    "path" : null,
-    "project_path" : null,
-    "properties" : {
-        "cacheKey" : "634A6953",
-        "caption" : "stuart_tab",
-        "contentUrl" : "grid_resource/gridviewer.html?env=&obj=stuart_tab&cache_key=634A6953",
-        "displayedObservations" : 11125,
-        "environment" : "",
-        "expression" : "stuart_tab",
-        "object" : "stuart_tab",
-        "preview" : 0,
-        "totalObservations" : 11125,
-        "variables" : 7
-    },
-    "read_only" : false,
-    "read_only_alternatives" : [
-    ],
-    "relative_order" : 4,
-    "source_on_save" : false,
-    "source_window" : "",
-    "type" : "r_dataframe"
-}
\ No newline at end of file
diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/81311FDE-contents b/.Rproj.user/9DAE6990/sources/s-39B546A6/81311FDE-contents
new file mode 100644
index 0000000000000000000000000000000000000000..612c27db930cb54d481e09a9a7ab5efd4912420f
--- /dev/null
+++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/81311FDE-contents
@@ -0,0 +1,137 @@
+#' @title Create of the summary table for all markers from the genotype data frame
+#'
+#' @description This function creates a table with all the markers that were genotyped in the array, the alleles for these markers, the number of homozygous and heterozygous animals, as well as the number of non genotyped animals.
+#' @param geno data frame with the genotyping results for your cross
+#'
+#' @import dplyr
+#' @import tidyr
+#'
+#' @export
+#'
+
+#### tab_mark function ####
+## create table with markers and counts
+tab_mark <- function(geno){
+  #create geno column in geno df
+  geno <- geno %>% unite(Geno,c("Allele1...Forward","Allele2...Forward"),sep="",remove=FALSE)
+
+  #recode genotypes to have all heterozygous encoded the same way (ex: only "AT", no "TA")
+  geno <- geno %>% mutate(Geno=recode(Geno,
+                                      "TA" = "AT",
+                                      "GA" = "AG",
+                                      "CA" = "AC",
+                                      "GT" = "TG",
+                                      "CT" = "TC",
+                                      "GC" = "CG"))
+
+
+  #create df with counts for each genotype
+  df_count <- tibble(SNP.Name = as.character(unique(geno$SNP.Name)),
+                     Allele_1 = NA,
+                     Allele_2 = NA,
+                     n_HM1 = NA,
+                     n_HM2 = NA,
+                     n_HT = NA,
+                     n_NA = NA)
+
+
+  ## loop to count genotype
+  for(i in df_count$SNP.Name){
+    #extract alleles for each marker
+    Alleles <- geno %>% filter(SNP.Name==i) %>%
+      select(c(SNP.Name,Sample.ID,Geno,Allele1...Forward,Allele2...Forward)) %>%
+      pivot_longer(c(Allele1...Forward,Allele2...Forward),names_to="Allele_name",values_to="Allele") %>%
+      distinct(Allele) %>% filter(Allele != "-")
+    Alleles <- as.factor(paste(Alleles$Allele))
+
+    #sort alleles
+    Alleles <- factor(Alleles,levels=c("A","T","C","G"))
+    Alleles <- sort(Alleles)
+
+    #add alleles and counts, only for markers with alleles (not markers with no genotyped ind)
+    if(all(rapportools::is.empty(Alleles))==FALSE){
+
+      #add alleles to df_count
+      df_count <- df_count %>% mutate(Allele_1 = ifelse(SNP.Name == i,
+                                                        paste(Alleles[1]), Allele_1))
+
+
+
+      #count for homozygous for allele 1
+      n1 <- geno %>% filter(SNP.Name==i) %>%
+        filter(Geno == paste(Alleles[1],Alleles[1],sep="")) %>%
+        summarise(n=n())
+
+
+      #add count for homozygous for allele 1 to df_count
+      df_count <- df_count %>% mutate(n_HM1 = ifelse(SNP.Name == i,
+                                                     n1$n, n_HM1))
+
+
+    }
+
+    #if marker not polymorphic
+    if(is.na(Alleles[2])==TRUE){
+      #NA as Allele_2
+      df_count <- df_count %>% mutate(Allele_2 = ifelse(SNP.Name == i,
+                                                        NA, Allele_2))
+
+      #NA as n_HM2
+      df_count <- df_count %>% mutate(n_HM2 = ifelse(SNP.Name == i,
+                                                     NA, n_HM2))
+
+      #NA as n_HT
+      df_count <- df_count %>% mutate(n_HT = ifelse(SNP.Name == i,
+                                                    NA, n_HT))
+    } else {
+      #add alleles to df_count
+      df_count <- df_count %>% mutate(Allele_2 = ifelse(SNP.Name == i,
+                                                        paste(Alleles[2]), Allele_2))
+
+
+      #count for homozygous for allele 2
+      n2 <- geno %>% filter(SNP.Name==i) %>%
+        filter(Geno == paste(Alleles[2],Alleles[2],sep="")) %>%
+        summarise(n=n())
+
+      #add count for homozygous for allele 1 to df_count
+      df_count <- df_count %>% mutate(n_HM2 = ifelse(SNP.Name == i,
+                                                     n2$n, n_HM2))
+
+
+      #count for heterozygous
+      n3 <- geno %>% filter(SNP.Name==i) %>%
+        filter(Geno == paste(Alleles[1],Alleles[2],sep="")) %>%
+        summarise(n=n())
+
+
+      #add count for homozygous for allele 1 to df_count
+      df_count <- df_count %>% mutate(n_HT = ifelse(SNP.Name == i,
+                                                    n3$n, n_HT))
+
+
+    }
+
+    #count for NA
+    n4 <- geno %>% filter(SNP.Name==i) %>%
+      filter(Geno == "--" |
+               Geno == paste(Alleles[1],"-",sep="") | Geno == paste(Alleles[2],"-",sep="") |
+               Geno == paste("-",Alleles[1],sep="") | Geno == paste("-",Alleles[2],sep="")) %>%
+      summarise(n=n())
+
+    #add count for NA to df_count
+    df_count <- df_count %>% mutate(n_NA = ifelse(SNP.Name == i,
+                                                  n4$n, n_NA))
+  }
+  #change class of counts as numeric :
+  df_count$n_HM1 <- df_count$n_HM1 %>% as.numeric()
+  df_count$n_HM2 <- df_count$n_HM2 %>% as.numeric()
+  df_count$n_HT <- df_count$n_HT %>% as.numeric()
+  df_count$n_NA <- df_count$n_NA %>% as.numeric()
+
+  #add 0 for null counts
+  df_count <- df_count %>% mutate_at(.vars=vars(n_HM1,n_HM2,n_HT,n_NA),~replace(., is.na(.), 0))
+
+  #return
+  return(df_count)
+}
diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/907DF1D3 b/.Rproj.user/9DAE6990/sources/s-39B546A6/907DF1D3
new file mode 100644
index 0000000000000000000000000000000000000000..dce483d417ea42db70cc869b57eed8ef24b40124
--- /dev/null
+++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/907DF1D3
@@ -0,0 +1,25 @@
+{
+    "collab_server" : "",
+    "contents" : "",
+    "created" : 1622636127574.000,
+    "dirty" : false,
+    "encoding" : "UTF-8",
+    "folds" : "",
+    "hash" : "0",
+    "id" : "907DF1D3",
+    "lastKnownWriteTime" : 1622640147,
+    "last_content_update" : 1622640147854,
+    "path" : "~/stuart_package/stuart/R/geno_strains.R",
+    "project_path" : "R/geno_strains.R",
+    "properties" : {
+        "cursorPosition" : "34,0",
+        "scrollLine" : "21"
+    },
+    "read_only" : false,
+    "read_only_alternatives" : [
+    ],
+    "relative_order" : 2,
+    "source_on_save" : false,
+    "source_window" : "",
+    "type" : "r_source"
+}
\ No newline at end of file
diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/907DF1D3-contents b/.Rproj.user/9DAE6990/sources/s-39B546A6/907DF1D3-contents
new file mode 100644
index 0000000000000000000000000000000000000000..d4104d0f3af4780745e4939ce649d7e90e8c995e
--- /dev/null
+++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/907DF1D3-contents
@@ -0,0 +1,55 @@
+#' @title Create haplotype for a new mouse strain into a reference dataframe
+#'
+#' @description This functions adds columns for parental strains used in the cross in the annotation data frame, from the genotype data frame in which one or several animal of the parental strains were genotyped.
+#' If several animals of one strain were genotyped, a consensus is created from these animals.
+#' The consensus is created as follow : if the indivuals carry the same allele, this allele is kept, otherwise, the allele is noted as "N". If individuals show residual heterozygosity, it is encoded as "H".
+#' @param ref data frame with the reference genotypes of mouse lines
+#' @param geno data frame with the genotyping results for your cross from miniMUGA array
+#' @param par1 first parental strain used in the cross, the name must be written as in the geno data frame
+#' @param par2 second parental strain used in the cross, the name must be written as in the geno data frame
+#' @param name1 name of the first parental strain to use as the column name in the ref data frame
+#' @param name2 name of the second parental strain to use as the column name in the ref data frame
+#'
+#' @import dplyr
+#' @import tidyr
+#'
+#' @export
+#'
+geno_strains <- function(ref,geno,par1,par2,name1,name2){
+  #rename df columns
+  geno <- geno %>% rename("marker"=1,
+                          "id"=2,
+                          "allele_1"=3,
+                          "allele_2"=4)
+
+
+  #recode genotypes from 2 alleles to 1
+  geno <- geno %>% mutate_all(as.character)
+  geno <- geno %>% filter(id %in% c(par1,par2))
+  geno <- geno %>% mutate(Geno=case_when(allele_1 == "-" | allele_2 == "-" ~ "N",
+                                         allele_1 == allele_2 ~ allele_1,
+                                         allele_1 %in% c("A","T","G","C") & allele_2 %in% c("A","T","G","C") ~ "H"))
+
+  geno <- geno %>% select(marker,id,Geno) %>% pivot_wider(names_from = id, values_from = Geno)
+
+
+  #create consensus
+  if(length(par1)!=1){
+    geno <- geno %>% mutate(parent1 = ifelse(!!sym(par1[1])==!!sym(par1[2]),!!sym(par1[1]),"N"))
+  } else {
+    geno <- geno %>% rename(parent1=!!sym(par1[1]))
+  }
+
+  if(length(par2)!=1){
+    geno <- geno %>% mutate(parent2 = ifelse(!!sym(par2[1])==!!sym(par2[2]),!!sym(par2[1]),"N"))
+  } else {
+    geno <- geno %>% rename(parent2=!!sym(par2[1]))
+  }
+
+  geno <- geno %>% select(marker,parent1,parent2)
+  colnames(geno) <- c("marker",name1,name2)
+
+  #merge with ref file
+  ref <- full_join(ref,geno,by=c("marker"="marker"))
+  return(ref)
+}
diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/96AB3736 b/.Rproj.user/9DAE6990/sources/s-39B546A6/96AB3736
index 601d204391765df78d1b564a3375e7d83f05883c..3f7a50343a4fe7a5bd0c3a2449f07f72de7f3f6f 100644
--- a/.Rproj.user/9DAE6990/sources/s-39B546A6/96AB3736
+++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/96AB3736
@@ -7,14 +7,14 @@
     "folds" : "",
     "hash" : "0",
     "id" : "96AB3736",
-    "lastKnownWriteTime" : 1622624631,
-    "last_content_update" : 1622624631694,
+    "lastKnownWriteTime" : 1622640207,
+    "last_content_update" : 1622640207996,
     "path" : "~/stuart_package/stuart/vignettes/stuaRt.Rmd",
     "project_path" : "vignettes/stuaRt.Rmd",
     "properties" : {
-        "cursorPosition" : "44,0",
+        "cursorPosition" : "78,3",
         "last_setup_crc32" : "39B546A65bfca283",
-        "scrollLine" : "44"
+        "scrollLine" : "63"
     },
     "read_only" : false,
     "read_only_alternatives" : [
diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/96AB3736-contents b/.Rproj.user/9DAE6990/sources/s-39B546A6/96AB3736-contents
index 92709f484cb21eb508c53c98fe3f975f8e90ee70..de0ce5aceffd208490965bc9efd0d4b4ec8e78fe 100644
--- a/.Rproj.user/9DAE6990/sources/s-39B546A6/96AB3736-contents
+++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/96AB3736-contents
@@ -17,11 +17,11 @@ knitr::opts_chunk$set(
 
 Marie Bourdon
 
-April 2021
+June 2021
 
 ## Goal
 
-stuart is a R package which formats the genotyping data from MUGA arrays (Neogen) to use it in Rqtl, for backcross or F2 crosses. It allows to filter the markers in MUGA arrays that can or cannot be used for Rqtl analysis, from a genetic point of view. Indeed, markers will be selected depending on their proportion of each genotype, correspondance between F2 or N2 individuals alleles and parental strains alleles, etc.
+stuart is a R package which formats results of genotyping. It was developed to analyse data from MUGA arrays (Neogen) for Rqtl analysis, for backcross or F2 crosses, but can be used to analyze data of other laboratory animal strains with other arrays. It allows to filter the markers in arrays, from a genetic point of view. Indeed, markers will be selected depending on their proportion of each genotype, correspondance between F2 or N2 individuals alleles and parental strains alleles, etc.
 
 The examples shown here require the use of dplyr package.
 
@@ -34,17 +34,24 @@ library(stuart)
 
 ## Annotation files
 
+In order to map the markers on the genome of the individuals, you need to load a table with the position of all markers in the array. The data frame must contain the following columns: `marker` with the markers names, `chr` with the chromosome of each marker, and a column with the position of the marker on the chromosome. For Rqtl analysis, you need to provide positions in cM. The data frame can contain other columns that you judge helpful.
+
 The developer of Rqtl and Rqtl2 packages, Karl Broman, realised that the annotation of the MUGA arrays was not correct for some markers. Thus, he produced new annotation files for MUGA, miniMUGA, megaMUGA and gigaMUGA arrays. These files contain some informations about the markers including the chromosome and position where the probe of the marker matchs on the genome, wether the marker maps uniquely or not, etc. These files also contains the genetic position of the markers calculated with two methods : "cM_cox" and "cM_g2f1" (see https://kbroman.org/MUGAarrays/mini_revisited.html for more informations).
 
 We recommand to use these annotation files to reconstruct the file use for Rqtl analysis. You can load the datasets with these annotations from GitHub (https://github.com/kbroman/MUGAarrays/tree/master/UWisc). Choose the file corresponding to the MUGA array that you used and use the URL to load the dataset in R.
 
-Here, we will present an example of the use of stuart with results of a F2 cross genotyped with miniMUGA. We load the result of Neogen genotyping: `genos` and thephenotype dataset produced by the lab: `phenos`. All these datasets are available for example in stuart package.
 
 ```{r annot}
 annot_mini <- read.csv(url("https://raw.githubusercontent.com/kbroman/MUGAarrays/master/UWisc/mini_uwisc_v2.csv"))
 ```
 
 
+Here, we will present an example of the use of stuart with results of a F2 cross genotyped with miniMUGA. Examples of genotypes and phenotypes dataframe are available in stuart package. 
+
+The genotype data frame must contain a first column with marker names, a second column with sample IDs, a third column with the first allele and a fourth column with the second allele. This format corresponds to the MUGA results. If your data differ, make sure to have these columns in this order.
+
+We load the result of Neogen genotyping: `genos` (only useful columns with marker name, sample ID and alleles were kept) and the phenotype dataset produced by the lab: `phenos`.
+
 
 ```{r load}
 data(genos)
@@ -79,14 +86,14 @@ genos <- genos %>% filter(!Sample.ID %in% c("StrainsA_1", "StrainsA_2", "Strains
 The first step of the markers sorting is to create the marker dataframe with the tab_mark() function. This dataframe contains for each marker the two alleles that can be found in the F2/N2 population (`Allele_1` and `Allele_2`), the number of individuals for each genotype (homozygous for each allele (`n_HM1` and `n_HM2`) and heterozygous (`n_HT`)), and the number of non genotyped individuals (`n_NA`) This step can take several minutes. You can also load the output of this function.
 
 
-```{r tab_mark}
+```{r tab_mark,eval=F}
 data(stuart_tab)
 summary(stuart_tab)
 ```
 
 Then we will use the different mark_* functions in order to filter the markers. First, we can use mark_match() function. This function excludes markers that are in your genotype file but not in the reference genotype dataset. We recomend using this function as the chip used for genotyping may change.
 
-```{r mark_match}
+```{r mark_match,eval=F}
 tab2 <- mark_match(stuart_tab,ref=strains)
 
 
@@ -97,28 +104,28 @@ Here the reference strains were genotyped with the same version of the chip as t
 
 Then, we can use the mark_poly() function, which will exclude the markers that are not polymorphic.
 
-```{r mark_poly ex}
+```{r mark_poly ex,eval=F}
 tab2 <- mark_poly(tab2)
 head(tab2)
 ```
 
 The mark_prop() function can be used to filter markers depending on the proportion of each genotype. Here, we have a F2 so we can use the "homo" argument in order to filter depending on the proportion of both homozygous genotype. If we have a N2, we can filter with the proportion of homozygous individuals with the "homo" argument and of heterozygous individuals with the hetero" argument. Moreover, this function allows to filter marker depending on the proportion on non genotyped animals. By defaults, markers for which more than 50% of individuals were not genotyped.
 
-```{r mark_prop ex}
+```{r mark_prop ex,eval=F}
 tab2 <- mark_prop(tab2,cross="F2",homo=0.1,hetero=0.1)
 head(tab2)
 ```
 
 Last, we can use the mark_allele(). This very helpful function excludes markers for which the alleles found in the F2/N2 individuals do not correspond to the alleles found in the parental strains. For example, if for a marker is not polymorphic in the parental strains but we found two alleles in the F2/N2 individuals, it will be excluded.
 
-```{r mark_allele}
+```{r mark_allele,eval=F}
 tab2 <- mark_allele(tab=tab2,ref=strains,par1="parent1",par2="parent2")
 tab2 %>% arrange(desc(exclude_allele)) %>% head()
 ```
 
 Indeed, we can see that the markers excluded with mark_allele() have different alleles in the parental strains.
 
-```{r mark_allele-strains}
+```{r mark_allele-strains,eval=F}
 strains %>% filter(marker %in% c("gJAX00038569","gJAX00425031","gUNC12245354","gUNC15530876","gUNC21555204","gUNC21596600")) %>% arrange(marker) %>% select(marker,parent1,parent2)
 ```
 
@@ -126,7 +133,7 @@ strains %>% filter(marker %in% c("gJAX00038569","gJAX00425031","gUNC12245354","g
 
 After excluding the problematic markers, we can create the R/qtl file. The individuals must have the same ID in the geno and in the pheno file. If there is a prefix in the geno file that must be removed in order to acheive this, you can use the "prefix" argument. The "path" argument can be used in order to create a CSV file that you can laod with qtl::read.cross. 
 
-```{r write_qtl}
+```{r write_qtl,eval=F}
 rqtl_file <- write_rqtl(geno=genos,pheno=phenos,tab=tab2,ref=strains,par1="parent1",par2="parent2",prefix="ind_",pos="cM_cox")
 
 rqtl_file[1:10,1:7]
diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/9E3EC0FD b/.Rproj.user/9DAE6990/sources/s-39B546A6/9E3EC0FD
deleted file mode 100644
index 19d6a0284a4f80e652f82039e649457b4cb04245..0000000000000000000000000000000000000000
--- a/.Rproj.user/9DAE6990/sources/s-39B546A6/9E3EC0FD
+++ /dev/null
@@ -1,23 +0,0 @@
-{
-    "collab_server" : "",
-    "contents" : "",
-    "created" : 1622622953119.000,
-    "dirty" : false,
-    "encoding" : "UTF-8",
-    "folds" : "",
-    "hash" : "2767839831",
-    "id" : "9E3EC0FD",
-    "lastKnownWriteTime" : 1622539469,
-    "last_content_update" : 1622539469,
-    "path" : "~/stuart_package/stuart/NAMESPACE",
-    "project_path" : "NAMESPACE",
-    "properties" : {
-    },
-    "read_only" : true,
-    "read_only_alternatives" : [
-    ],
-    "relative_order" : 8,
-    "source_on_save" : false,
-    "source_window" : "",
-    "type" : "r_namespace"
-}
\ No newline at end of file
diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/B86C81D8 b/.Rproj.user/9DAE6990/sources/s-39B546A6/B86C81D8
new file mode 100644
index 0000000000000000000000000000000000000000..0069b153fb956ccf9e32d32f2dbbaadf3ba90a0f
--- /dev/null
+++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/B86C81D8
@@ -0,0 +1,25 @@
+{
+    "collab_server" : "",
+    "contents" : "",
+    "created" : 1622636135728.000,
+    "dirty" : false,
+    "encoding" : "UTF-8",
+    "folds" : "",
+    "hash" : "2531250629",
+    "id" : "B86C81D8",
+    "lastKnownWriteTime" : 1622462353,
+    "last_content_update" : 1622462353,
+    "path" : "~/stuart_package/stuart/R/mark_allele.R",
+    "project_path" : "R/mark_allele.R",
+    "properties" : {
+        "cursorPosition" : "24,47",
+        "scrollLine" : "2"
+    },
+    "read_only" : false,
+    "read_only_alternatives" : [
+    ],
+    "relative_order" : 8,
+    "source_on_save" : false,
+    "source_window" : "",
+    "type" : "r_source"
+}
\ No newline at end of file
diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/B86C81D8-contents b/.Rproj.user/9DAE6990/sources/s-39B546A6/B86C81D8-contents
new file mode 100644
index 0000000000000000000000000000000000000000..b679916993328de1dfd7b34b3325bf8066424b8e
--- /dev/null
+++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/B86C81D8-contents
@@ -0,0 +1,46 @@
+#' @title Exclude markers that have different alleles in the individuals of the cross and in parental strains
+#'
+#' @description This functions uses the dataframe produced by the tab_mark function and fills the "exclude" column for all the markers which have alleles observed in the individuals of the cross that do not correspond to the alleles observed in the parental strains. For example, a marker which is not polymorphic between the two parental strains but which has two alleles in the cross individuals will be excluded.
+#' @param tab data frame obtained with tab_mark function
+#' @param ref data frame with the reference genotypes of mouse lines
+#' @param par1 first parental strain used in the cross, the name must be written as in the "ref" data frame
+#' @param par2 second parental strain used in the cross, the name must be written as in the "ref" data frame
+#'
+#' @import dplyr
+#'
+#' @export
+#'
+mark_allele <- function(tab,ref,par1,par2){
+
+  #markers of ref df as characters
+  ref$marker <- as.character(ref$marker)
+  colnames(ref) <- make.names(colnames(ref))
+
+  #recode parents' names to match column names nomenclature
+  par1 <- make.names(par1)
+  par2 <- make.names(par2)
+
+  #join tab and ref genotypes
+  ref <- ref %>% select(marker,!!sym(par1),!!sym(par2))
+  tab <- full_join(tab,ref,by=c("SNP.Name"="marker"))
+
+  #function core
+  tab <- tab %>% mutate(exclude_allele = case_when(is.na(Allele_2)==FALSE &
+                                                     !!sym(par1) != "N" & !!sym(par2) != "N" & !!sym(par1) != "H" & !!sym(par2) != "H" &
+                                                     ((Allele_1!=!!sym(par1) & Allele_1!=!!sym(par2)) | (Allele_2!=!!sym(par1) & Allele_2!=!!sym(par2))) ~ 1,
+                                                   is.na(Allele_2)==FALSE &
+                                                     (!!sym(par1)=="N" | !!sym(par2)=="N" | !!sym(par1)=="H" | !!sym(par2)=="H") &
+                                                     ((Allele_1!=!!sym(par1) & Allele_1!=!!sym(par2)) & (Allele_2!=!!sym(par1) & Allele_2!=!!sym(par2))) ~ 1,
+                                                   is.na(Allele_2)==TRUE &
+                                                     !!sym(par1) != "N" & !!sym(par2) != "N" & !!sym(par1) != "H" & !!sym(par2) != "H" &
+                                                     (Allele_1!=!!sym(par1) | Allele_1!=!!sym(par2)) ~ 1,
+                                                   is.na(Allele_2)==TRUE &
+                                                     (!!sym(par1)=="N" | !!sym(par2)=="N" | !!sym(par1)=="H" | !!sym(par2)=="H") &
+                                                     Allele_1!=!!sym(par1) & Allele_1!=!!sym(par2) ~ 1,
+                                                   T ~ 0)
+  )
+
+  tab <- tab %>% select(-c(!!sym(par1),!!sym(par2)))
+
+  return(tab)
+}
diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/47AFB64 b/.Rproj.user/9DAE6990/sources/s-39B546A6/C2CE7FEA
similarity index 74%
rename from .Rproj.user/9DAE6990/sources/s-39B546A6/47AFB64
rename to .Rproj.user/9DAE6990/sources/s-39B546A6/C2CE7FEA
index 1d0bce58ef73f88ca7ce09fdac2b8dce157f7dd9..defadb8ff5fcdf70afd49209fd05f87fc86b8aa0 100644
--- a/.Rproj.user/9DAE6990/sources/s-39B546A6/47AFB64
+++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/C2CE7FEA
@@ -1,20 +1,20 @@
 {
     "collab_server" : "",
     "contents" : "",
-    "created" : 1622538246256.000,
+    "created" : 1622638761182.000,
     "dirty" : false,
     "encoding" : "",
     "folds" : "",
     "hash" : "0",
-    "id" : "47AFB64",
-    "lastKnownWriteTime" : 140548509794308,
-    "last_content_update" : 1622538246256,
+    "id" : "C2CE7FEA",
+    "lastKnownWriteTime" : 1622638687,
+    "last_content_update" : 1622638761182,
     "path" : null,
     "project_path" : null,
     "properties" : {
-        "cacheKey" : "6294E01A",
+        "cacheKey" : "862FB1D9",
         "caption" : "annot_mini",
-        "contentUrl" : "grid_resource/gridviewer.html?env=&obj=annot_mini&cache_key=6294E01A",
+        "contentUrl" : "grid_resource/gridviewer.html?env=&obj=annot_mini&cache_key=862FB1D9",
         "displayedObservations" : 11125,
         "environment" : "",
         "expression" : "annot_mini",
@@ -26,7 +26,7 @@
     "read_only" : false,
     "read_only_alternatives" : [
     ],
-    "relative_order" : 6,
+    "relative_order" : 4,
     "source_on_save" : false,
     "source_window" : "",
     "type" : "r_dataframe"
diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/C2CE7FEA-contents b/.Rproj.user/9DAE6990/sources/s-39B546A6/C2CE7FEA-contents
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/C5228C18 b/.Rproj.user/9DAE6990/sources/s-39B546A6/C5228C18
index 117fe9108f5fc30ed9cf2f206ee651fb7bbbdea7..cd989e9e5e74c809b83e1a70f8fa676bb1689a28 100644
--- a/.Rproj.user/9DAE6990/sources/s-39B546A6/C5228C18
+++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/C5228C18
@@ -16,7 +16,7 @@
     "read_only" : false,
     "read_only_alternatives" : [
     ],
-    "relative_order" : 7,
+    "relative_order" : 6,
     "source_on_save" : false,
     "source_window" : "",
     "type" : "r_source"
diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/C81C94E6 b/.Rproj.user/9DAE6990/sources/s-39B546A6/C81C94E6
index 1372ca65acf3f5a950452689a001c8527c71eefb..925c2cddb67cefd4110ca6a3c2cbdb821e30d3ab 100644
--- a/.Rproj.user/9DAE6990/sources/s-39B546A6/C81C94E6
+++ b/.Rproj.user/9DAE6990/sources/s-39B546A6/C81C94E6
@@ -5,7 +5,7 @@
     "dirty" : false,
     "encoding" : "UTF-8",
     "folds" : "",
-    "hash" : "0",
+    "hash" : "1425873394",
     "id" : "C81C94E6",
     "lastKnownWriteTime" : 1622623114,
     "last_content_update" : 1622623114332,
@@ -18,7 +18,7 @@
     "read_only" : false,
     "read_only_alternatives" : [
     ],
-    "relative_order" : 9,
+    "relative_order" : 10,
     "source_on_save" : false,
     "source_window" : "",
     "type" : "dcf"
diff --git a/.Rproj.user/9DAE6990/sources/s-39B546A6/D49EE59C b/.Rproj.user/9DAE6990/sources/s-39B546A6/D49EE59C
deleted file mode 100644
index 1351a95a5c791e2a51e36a6271a6addd1c959c84..0000000000000000000000000000000000000000
--- a/.Rproj.user/9DAE6990/sources/s-39B546A6/D49EE59C
+++ /dev/null
@@ -1,33 +0,0 @@
-{
-    "collab_server" : "",
-    "contents" : "",
-    "created" : 1622538242843.000,
-    "dirty" : false,
-    "encoding" : "",
-    "folds" : "",
-    "hash" : "0",
-    "id" : "D49EE59C",
-    "lastKnownWriteTime" : 140551864679664,
-    "last_content_update" : 1622538242843,
-    "path" : null,
-    "project_path" : null,
-    "properties" : {
-        "cacheKey" : "4C070D0B",
-        "caption" : "genos",
-        "contentUrl" : "grid_resource/gridviewer.html?env=&obj=genos&cache_key=4C070D0B",
-        "displayedObservations" : 1957993,
-        "environment" : "",
-        "expression" : "genos",
-        "object" : "genos",
-        "preview" : 0,
-        "totalObservations" : 1957993,
-        "variables" : 11
-    },
-    "read_only" : false,
-    "read_only_alternatives" : [
-    ],
-    "relative_order" : 2,
-    "source_on_save" : false,
-    "source_window" : "",
-    "type" : "r_dataframe"
-}
\ No newline at end of file
diff --git a/.Rproj.user/9DAE6990/viewer-cache/4C070D0B.Rdata b/.Rproj.user/9DAE6990/viewer-cache/4C070D0B.Rdata
deleted file mode 100644
index c7f59bfdcb70ea156f7c5abe0a7f47d4bdd858f8..0000000000000000000000000000000000000000
Binary files a/.Rproj.user/9DAE6990/viewer-cache/4C070D0B.Rdata and /dev/null differ
diff --git a/.Rproj.user/9DAE6990/viewer-cache/6294E01A.Rdata b/.Rproj.user/9DAE6990/viewer-cache/6294E01A.Rdata
deleted file mode 100644
index 6d9cbeb3f64ff88d5fcca84cc5ff97afd9cbb75f..0000000000000000000000000000000000000000
Binary files a/.Rproj.user/9DAE6990/viewer-cache/6294E01A.Rdata and /dev/null differ
diff --git a/.Rproj.user/9DAE6990/viewer-cache/634A6953.Rdata b/.Rproj.user/9DAE6990/viewer-cache/634A6953.Rdata
deleted file mode 100644
index ad0ad30a49aa244ba94b04ed2893c0739e7226ae..0000000000000000000000000000000000000000
Binary files a/.Rproj.user/9DAE6990/viewer-cache/634A6953.Rdata and /dev/null differ
diff --git a/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/9DAE699039B546A6/chunks.json b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/9DAE699039B546A6/chunks.json
index 131e086e4826cd672b68c9364909576dad9ae87b..b86d0151df31c4b6323160db108efc8e5041c995 100644
--- a/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/9DAE699039B546A6/chunks.json
+++ b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/9DAE699039B546A6/chunks.json
@@ -1 +1 @@
-{"chunk_definitions":[{"chunk_id":"cugiprfbptcaw","chunk_label":"unnamed-chunk-1","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","include":false,"label":"unnamed-chunk-1"},"row":15,"row_count":1,"visible":true},{"chunk_id":"csetup_chunk","chunk_label":"setup","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","include":false,"label":"setup"},"row":31,"row_count":1,"visible":true},{"chunk_id":"ct8u35p5h48pa","chunk_label":"annot","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"annot"},"row":44,"row_count":1,"visible":true},{"chunk_id":"cystga685ux9r","chunk_label":"load","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"load"},"row":53,"row_count":1,"visible":true},{"chunk_id":"cc71rfo54vvou","chunk_label":"strains","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"strains"},"row":66,"row_count":1,"visible":true},{"chunk_id":"coar8mvardv1z","chunk_label":"no_parent","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"no_parent"},"row":71,"row_count":1,"visible":true},{"chunk_id":"cyqo4jk1414tp","chunk_label":"tab_mark","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"tab_mark"},"row":84,"row_count":1,"visible":true},{"chunk_id":"ci1zg9xosgth8","chunk_label":"mark_match","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_match"},"row":93,"row_count":1,"visible":true},{"chunk_id":"c4j6ei29p4187","chunk_label":"mark_poly ex","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_poly ex"},"row":102,"row_count":1,"visible":true},{"chunk_id":"cndnl4vh4xyj8","chunk_label":"mark_prop ex","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_prop ex"},"row":109,"row_count":1,"visible":true},{"chunk_id":"cgrscnwnyajvi","chunk_label":"mark_allele","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_allele"},"row":116,"row_count":1,"visible":true},{"chunk_id":"c45rvmci4gaoy","chunk_label":"mark_allele-strains","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_allele-strains"},"row":122,"row_count":1,"visible":true},{"chunk_id":"cv6d9nrsrzqfm","chunk_label":"write_qtl","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"write_qtl"},"row":132,"row_count":1,"visible":true}],"default_chunk_options":{},"doc_write_time":1622538645,"working_dir":null}
\ No newline at end of file
+{"chunk_definitions":[{"chunk_id":"cugiprfbptcaw","chunk_label":"unnamed-chunk-1","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","include":false,"label":"unnamed-chunk-1"},"row":15,"row_count":1,"visible":true},{"chunk_id":"csetup_chunk","chunk_label":"setup","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","include":false,"label":"setup"},"row":31,"row_count":1,"visible":true},{"chunk_id":"cystga685ux9r","chunk_label":"load","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"load"},"row":60,"row_count":1,"visible":true},{"chunk_id":"cc71rfo54vvou","chunk_label":"strains","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"strains"},"row":73,"row_count":1,"visible":true},{"chunk_id":"coar8mvardv1z","chunk_label":"no_parent","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"no_parent"},"row":78,"row_count":1,"visible":true},{"chunk_id":"cyqo4jk1414tp","chunk_label":"tab_mark","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"tab_mark"},"row":91,"row_count":1,"visible":true},{"chunk_id":"ci1zg9xosgth8","chunk_label":"mark_match","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_match"},"row":100,"row_count":1,"visible":true},{"chunk_id":"c4j6ei29p4187","chunk_label":"mark_poly ex","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_poly ex"},"row":109,"row_count":1,"visible":true},{"chunk_id":"cndnl4vh4xyj8","chunk_label":"mark_prop ex","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_prop ex"},"row":116,"row_count":1,"visible":true},{"chunk_id":"cgrscnwnyajvi","chunk_label":"mark_allele","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_allele"},"row":123,"row_count":1,"visible":true},{"chunk_id":"c45rvmci4gaoy","chunk_label":"mark_allele-strains","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_allele-strains"},"row":129,"row_count":1,"visible":true},{"chunk_id":"cv6d9nrsrzqfm","chunk_label":"write_qtl","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"write_qtl"},"row":139,"row_count":1,"visible":true}],"default_chunk_options":{},"doc_write_time":1622638573,"working_dir":null}
\ No newline at end of file
diff --git a/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/chunks.json b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/chunks.json
index e3364a47c8c6f23c449ad55269d52ead3facee93..b86d0151df31c4b6323160db108efc8e5041c995 100644
--- a/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/chunks.json
+++ b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/chunks.json
@@ -1 +1 @@
-{"chunk_definitions":[{"chunk_id":"cugiprfbptcaw","chunk_label":"unnamed-chunk-1","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","include":false,"label":"unnamed-chunk-1"},"row":15,"row_count":1,"visible":true},{"chunk_id":"csetup_chunk","chunk_label":"setup","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","include":false,"label":"setup"},"row":31,"row_count":1,"visible":true},{"chunk_id":"ct8u35p5h48pa","chunk_label":"annot","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"annot"},"row":44,"row_count":1,"visible":true},{"chunk_id":"cystga685ux9r","chunk_label":"load","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"load"},"row":53,"row_count":1,"visible":true},{"chunk_id":"cc71rfo54vvou","chunk_label":"strains","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"strains"},"row":66,"row_count":1,"visible":true},{"chunk_id":"coar8mvardv1z","chunk_label":"no_parent","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"no_parent"},"row":71,"row_count":1,"visible":true},{"chunk_id":"cyqo4jk1414tp","chunk_label":"tab_mark","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"tab_mark"},"row":84,"row_count":1,"visible":true},{"chunk_id":"ci1zg9xosgth8","chunk_label":"mark_match","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_match"},"row":93,"row_count":1,"visible":true},{"chunk_id":"c4j6ei29p4187","chunk_label":"mark_poly ex","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_poly ex"},"row":102,"row_count":1,"visible":true},{"chunk_id":"cndnl4vh4xyj8","chunk_label":"mark_prop ex","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_prop ex"},"row":109,"row_count":1,"visible":true},{"chunk_id":"cgrscnwnyajvi","chunk_label":"mark_allele","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_allele"},"row":116,"row_count":1,"visible":true},{"chunk_id":"c45rvmci4gaoy","chunk_label":"mark_allele-strains","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_allele-strains"},"row":122,"row_count":1,"visible":true},{"chunk_id":"cv6d9nrsrzqfm","chunk_label":"write_qtl","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"write_qtl"},"row":132,"row_count":1,"visible":true}],"doc_write_time":1622538645}
\ No newline at end of file
+{"chunk_definitions":[{"chunk_id":"cugiprfbptcaw","chunk_label":"unnamed-chunk-1","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","include":false,"label":"unnamed-chunk-1"},"row":15,"row_count":1,"visible":true},{"chunk_id":"csetup_chunk","chunk_label":"setup","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","include":false,"label":"setup"},"row":31,"row_count":1,"visible":true},{"chunk_id":"cystga685ux9r","chunk_label":"load","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"load"},"row":60,"row_count":1,"visible":true},{"chunk_id":"cc71rfo54vvou","chunk_label":"strains","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"strains"},"row":73,"row_count":1,"visible":true},{"chunk_id":"coar8mvardv1z","chunk_label":"no_parent","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"no_parent"},"row":78,"row_count":1,"visible":true},{"chunk_id":"cyqo4jk1414tp","chunk_label":"tab_mark","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"tab_mark"},"row":91,"row_count":1,"visible":true},{"chunk_id":"ci1zg9xosgth8","chunk_label":"mark_match","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_match"},"row":100,"row_count":1,"visible":true},{"chunk_id":"c4j6ei29p4187","chunk_label":"mark_poly ex","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_poly ex"},"row":109,"row_count":1,"visible":true},{"chunk_id":"cndnl4vh4xyj8","chunk_label":"mark_prop ex","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_prop ex"},"row":116,"row_count":1,"visible":true},{"chunk_id":"cgrscnwnyajvi","chunk_label":"mark_allele","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_allele"},"row":123,"row_count":1,"visible":true},{"chunk_id":"c45rvmci4gaoy","chunk_label":"mark_allele-strains","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"mark_allele-strains"},"row":129,"row_count":1,"visible":true},{"chunk_id":"cv6d9nrsrzqfm","chunk_label":"write_qtl","document_id":"96AB3736","expansion_state":0,"options":{"engine":"r","label":"write_qtl"},"row":139,"row_count":1,"visible":true}],"default_chunk_options":{},"doc_write_time":1622638573,"working_dir":null}
\ No newline at end of file
diff --git a/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/ct8u35p5h48pa/000008.csv b/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/ct8u35p5h48pa/000008.csv
deleted file mode 100644
index e9239946eb49c9294b861127284a8f387ff55d1b..0000000000000000000000000000000000000000
--- a/.Rproj.user/shared/notebooks/4D49CCFD-stuaRt/1/s/ct8u35p5h48pa/000008.csv
+++ /dev/null
@@ -1,22 +0,0 @@
-"0","annot_mini <- read.csv(url(""https://raw.githubusercontent.com/kbroman/MUGAarrays/master/UWisc/mini_uwisc_v2.csv""))
-"
-"2","Warning messages:
-"
-"2","1: "
-"2","In doTryCatch(return(expr), name, parentenv, handler) :"
-"2","
- "
-"2"," fermeture de la connexion inutilisÃ©e 5 (https://raw.githubusercontent.com/kbroman/MUGAarrays/master/UWisc/mini_uwisc_v2.csv)
-"
-"2","2: "
-"2","In doTryCatch(return(expr), name, parentenv, handler) :"
-"2","
- "
-"2"," fermeture de la connexion inutilisÃ©e 4 (https://raw.githubusercontent.com/kbroman/MUGAarrays/master/UWisc/mini_uwisc_v2.csv)
-"
-"2","3: "
-"2","In doTryCatch(return(expr), name, parentenv, handler) :"
-"2","
- "
-"2"," fermeture de la connexion inutilisÃ©e 3 (https://raw.githubusercontent.com/kbroman/MUGAarrays/master/UWisc/mini_uwisc_v2.csv)
-"
diff --git a/.Rproj.user/shared/notebooks/paths b/.Rproj.user/shared/notebooks/paths
index f9ea644cfa136d27e66b59656b2c94c427126af1..3a875c8f655b8c166e29857ed66aa4638147112f 100644
--- a/.Rproj.user/shared/notebooks/paths
+++ b/.Rproj.user/shared/notebooks/paths
@@ -1 +1,4 @@
+/Users/mariebourdon/stuart_package/stuart/NAMESPACE="6A276B5"
+/Users/mariebourdon/stuart_package/stuart/R/genos-data.R="9943E26B"
+/Users/mariebourdon/stuart_package/stuart/R/tab_mark.R="DEC9867F"
 /Users/mariebourdon/stuart_package/stuart/vignettes/stuaRt.Rmd="4D49CCFD"
diff --git a/R/geno_strains.R b/R/geno_strains.R
index a0ac113461536b3410adb9bc4e176c06600024a6..d4104d0f3af4780745e4939ce649d7e90e8c995e 100755
--- a/R/geno_strains.R
+++ b/R/geno_strains.R
@@ -16,14 +16,21 @@
 #' @export
 #'
 geno_strains <- function(ref,geno,par1,par2,name1,name2){
+  #rename df columns
+  geno <- geno %>% rename("marker"=1,
+                          "id"=2,
+                          "allele_1"=3,
+                          "allele_2"=4)
+
+
   #recode genotypes from 2 alleles to 1
   geno <- geno %>% mutate_all(as.character)
-  geno <- geno %>% filter(Sample.ID %in% c(par1,par2))
-  geno <- geno %>% mutate(Geno=case_when(Allele1...Forward == "-" | Allele2...Forward == "-" ~ "N",
-                                         Allele1...Forward == Allele2...Forward ~ Allele1...Forward,
-                                         Allele1...Forward %in% c("A","T","G","C") & Allele2...Forward %in% c("A","T","G","C") ~ "H"))
+  geno <- geno %>% filter(id %in% c(par1,par2))
+  geno <- geno %>% mutate(Geno=case_when(allele_1 == "-" | allele_2 == "-" ~ "N",
+                                         allele_1 == allele_2 ~ allele_1,
+                                         allele_1 %in% c("A","T","G","C") & allele_2 %in% c("A","T","G","C") ~ "H"))
 
-  geno <- geno %>% select(SNP.Name,Sample.ID,Geno) %>% pivot_wider(names_from = Sample.ID, values_from = Geno)
+  geno <- geno %>% select(marker,id,Geno) %>% pivot_wider(names_from = id, values_from = Geno)
 
 
   #create consensus
@@ -39,10 +46,10 @@ geno_strains <- function(ref,geno,par1,par2,name1,name2){
     geno <- geno %>% rename(parent2=!!sym(par2[1]))
   }
 
-  geno <- geno %>% select(SNP.Name,parent1,parent2)
-  colnames(geno) <- c("SNP.Name",name1,name2)
+  geno <- geno %>% select(marker,parent1,parent2)
+  colnames(geno) <- c("marker",name1,name2)
 
   #merge with ref file
-  ref <- full_join(ref,geno,by=c("marker"="SNP.Name"))
+  ref <- full_join(ref,geno,by=c("marker"="marker"))
   return(ref)
 }
diff --git a/R/ref_strains_mini-data.R b/R/ref_strains_mini-data.R
deleted file mode 100755
index 3fdffa9ee1a5665666e7c7b9c25aa92d3d144cbf..0000000000000000000000000000000000000000
--- a/R/ref_strains_mini-data.R
+++ /dev/null
@@ -1,29 +0,0 @@
-#' Data frame with miniMUGA genotyping of classical lab strains.
-#'
-#' A dataset containing the genotypes of 10 mouse strains of the Institut pasteur. Markers positions and other information are from by Karl Broman (https://kbroman.org/MUGAarrays/mini_revisited.html). Strains genotyped from Institut Pasteur.
-#'
-#' @format A data frame with 11299 rows and 18 variables
-#' \describe{
-#'   \item{CC001}{CC001 mouse strain}
-#'   \item{CC005}{CC005 mouse strain}
-#'   \item{CC042}{CC042 mouse strain}
-#'   \item{CC071}{CC071 mouse strain}
-#'   \item{Ifnar.KO.129}{Ifnar KO 129 mouse strain}
-#'   \item{Ifnar.KO.B6}{Ifnar KO B6 mouse strain}
-#'   \item{Rvfs2.1}{Rvfs2-1 mouse strain}
-#'   \item{Rvfs2.2}{Rvfs2-2 mouse strain}
-#'   \item{Rvfs2.6}{Rvfs2-6 mouse strain}
-#'   \item{Rvfs2.7}{Rvfs2-7 mouse strain}
-#'   \item{marker}{name of the marker}
-#'   \item{chr}{chromosome}
-#'   \item{bp_mm10}{localisation on chromosome in bp (mm10 assembly)}
-#'   \item{cM_cox}{localisation on chromosome in cM (from Cox et al.)}
-#'   \item{cM_g2f1}{localisation on chromosome in cM (from Liu et al.)}
-#'   \item{snp}{marker alleles}
-#'   \item{unique}{indicates if the marker maps uniquely on mm10}
-#'   \item{multi}{indicates if the marker maps more than one time on mm10}
-#'   \item{unmapped}{indicates if the marker does not map perfectly on mm10}
-#' }
-
-
-"ref_strains_mini"
diff --git a/data/ref_strains_mini.rda b/data/ref_strains_mini.rda
deleted file mode 100755
index e5673c4faec5cb1bc70625c468ebeba059709c02..0000000000000000000000000000000000000000
Binary files a/data/ref_strains_mini.rda and /dev/null differ
diff --git a/man/ref_strains_mini.Rd b/man/ref_strains_mini.Rd
deleted file mode 100755
index e9117d1075cc3823b7511e4b219a5656a5cf348a..0000000000000000000000000000000000000000
--- a/man/ref_strains_mini.Rd
+++ /dev/null
@@ -1,37 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/ref_strains_mini-data.R
-\docType{data}
-\name{ref_strains_mini}
-\alias{ref_strains_mini}
-\title{Data frame with miniMUGA genotyping of classical lab strains.}
-\format{
-A data frame with 11299 rows and 18 variables
-\describe{
-\item{CC001}{CC001 mouse strain}
-\item{CC005}{CC005 mouse strain}
-\item{CC042}{CC042 mouse strain}
-\item{CC071}{CC071 mouse strain}
-\item{Ifnar.KO.129}{Ifnar KO 129 mouse strain}
-\item{Ifnar.KO.B6}{Ifnar KO B6 mouse strain}
-\item{Rvfs2.1}{Rvfs2-1 mouse strain}
-\item{Rvfs2.2}{Rvfs2-2 mouse strain}
-\item{Rvfs2.6}{Rvfs2-6 mouse strain}
-\item{Rvfs2.7}{Rvfs2-7 mouse strain}
-\item{marker}{name of the marker}
-\item{chr}{chromosome}
-\item{bp_mm10}{localisation on chromosome in bp (mm10 assembly)}
-\item{cM_cox}{localisation on chromosome in cM (from Cox et al.)}
-\item{cM_g2f1}{localisation on chromosome in cM (from Liu et al.)}
-\item{snp}{marker alleles}
-\item{unique}{indicates if the marker maps uniquely on mm10}
-\item{multi}{indicates if the marker maps more than one time on mm10}
-\item{unmapped}{indicates if the marker does not map perfectly on mm10}
-}
-}
-\usage{
-ref_strains_mini
-}
-\description{
-A dataset containing the genotypes of 10 mouse strains of the Institut pasteur. Markers positions and other information are from by Karl Broman (https://kbroman.org/MUGAarrays/mini_revisited.html). Strains genotyped from Institut Pasteur.
-}
-\keyword{datasets}
diff --git a/stuart_0.1.0.pdf b/stuart_0.1.0.pdf
index f9ea2268c3cc25004f91a18c5cf147e5cbdf9992..d491cf6b1adcae0e1263384dac12110650ad862c 100644
Binary files a/stuart_0.1.0.pdf and b/stuart_0.1.0.pdf differ
diff --git a/stuart_0.1.0.tar.gz b/stuart_0.1.0.tar.gz
index 16a4e742ef6458742380c88032957e328b217275..e1bbd31b1b0d8f51dd273855fe20ed9970f99e97 100644
Binary files a/stuart_0.1.0.tar.gz and b/stuart_0.1.0.tar.gz differ
diff --git a/vignettes/stuaRt.Rmd b/vignettes/stuaRt.Rmd
index 92709f484cb21eb508c53c98fe3f975f8e90ee70..de0ce5aceffd208490965bc9efd0d4b4ec8e78fe 100755
--- a/vignettes/stuaRt.Rmd
+++ b/vignettes/stuaRt.Rmd
@@ -17,11 +17,11 @@ knitr::opts_chunk$set(
 
 Marie Bourdon
 
-April 2021
+June 2021
 
 ## Goal
 
-stuart is a R package which formats the genotyping data from MUGA arrays (Neogen) to use it in Rqtl, for backcross or F2 crosses. It allows to filter the markers in MUGA arrays that can or cannot be used for Rqtl analysis, from a genetic point of view. Indeed, markers will be selected depending on their proportion of each genotype, correspondance between F2 or N2 individuals alleles and parental strains alleles, etc.
+stuart is a R package which formats results of genotyping. It was developed to analyse data from MUGA arrays (Neogen) for Rqtl analysis, for backcross or F2 crosses, but can be used to analyze data of other laboratory animal strains with other arrays. It allows to filter the markers in arrays, from a genetic point of view. Indeed, markers will be selected depending on their proportion of each genotype, correspondance between F2 or N2 individuals alleles and parental strains alleles, etc.
 
 The examples shown here require the use of dplyr package.
 
@@ -34,17 +34,24 @@ library(stuart)
 
 ## Annotation files
 
+In order to map the markers on the genome of the individuals, you need to load a table with the position of all markers in the array. The data frame must contain the following columns: `marker` with the markers names, `chr` with the chromosome of each marker, and a column with the position of the marker on the chromosome. For Rqtl analysis, you need to provide positions in cM. The data frame can contain other columns that you judge helpful.
+
 The developer of Rqtl and Rqtl2 packages, Karl Broman, realised that the annotation of the MUGA arrays was not correct for some markers. Thus, he produced new annotation files for MUGA, miniMUGA, megaMUGA and gigaMUGA arrays. These files contain some informations about the markers including the chromosome and position where the probe of the marker matchs on the genome, wether the marker maps uniquely or not, etc. These files also contains the genetic position of the markers calculated with two methods : "cM_cox" and "cM_g2f1" (see https://kbroman.org/MUGAarrays/mini_revisited.html for more informations).
 
 We recommand to use these annotation files to reconstruct the file use for Rqtl analysis. You can load the datasets with these annotations from GitHub (https://github.com/kbroman/MUGAarrays/tree/master/UWisc). Choose the file corresponding to the MUGA array that you used and use the URL to load the dataset in R.
 
-Here, we will present an example of the use of stuart with results of a F2 cross genotyped with miniMUGA. We load the result of Neogen genotyping: `genos` and thephenotype dataset produced by the lab: `phenos`. All these datasets are available for example in stuart package.
 
 ```{r annot}
 annot_mini <- read.csv(url("https://raw.githubusercontent.com/kbroman/MUGAarrays/master/UWisc/mini_uwisc_v2.csv"))
 ```
 
 
+Here, we will present an example of the use of stuart with results of a F2 cross genotyped with miniMUGA. Examples of genotypes and phenotypes dataframe are available in stuart package. 
+
+The genotype data frame must contain a first column with marker names, a second column with sample IDs, a third column with the first allele and a fourth column with the second allele. This format corresponds to the MUGA results. If your data differ, make sure to have these columns in this order.
+
+We load the result of Neogen genotyping: `genos` (only useful columns with marker name, sample ID and alleles were kept) and the phenotype dataset produced by the lab: `phenos`.
+
 
 ```{r load}
 data(genos)
@@ -79,14 +86,14 @@ genos <- genos %>% filter(!Sample.ID %in% c("StrainsA_1", "StrainsA_2", "Strains
 The first step of the markers sorting is to create the marker dataframe with the tab_mark() function. This dataframe contains for each marker the two alleles that can be found in the F2/N2 population (`Allele_1` and `Allele_2`), the number of individuals for each genotype (homozygous for each allele (`n_HM1` and `n_HM2`) and heterozygous (`n_HT`)), and the number of non genotyped individuals (`n_NA`) This step can take several minutes. You can also load the output of this function.
 
 
-```{r tab_mark}
+```{r tab_mark,eval=F}
 data(stuart_tab)
 summary(stuart_tab)
 ```
 
 Then we will use the different mark_* functions in order to filter the markers. First, we can use mark_match() function. This function excludes markers that are in your genotype file but not in the reference genotype dataset. We recomend using this function as the chip used for genotyping may change.
 
-```{r mark_match}
+```{r mark_match,eval=F}
 tab2 <- mark_match(stuart_tab,ref=strains)
 
 
@@ -97,28 +104,28 @@ Here the reference strains were genotyped with the same version of the chip as t
 
 Then, we can use the mark_poly() function, which will exclude the markers that are not polymorphic.
 
-```{r mark_poly ex}
+```{r mark_poly ex,eval=F}
 tab2 <- mark_poly(tab2)
 head(tab2)
 ```
 
 The mark_prop() function can be used to filter markers depending on the proportion of each genotype. Here, we have a F2 so we can use the "homo" argument in order to filter depending on the proportion of both homozygous genotype. If we have a N2, we can filter with the proportion of homozygous individuals with the "homo" argument and of heterozygous individuals with the hetero" argument. Moreover, this function allows to filter marker depending on the proportion on non genotyped animals. By defaults, markers for which more than 50% of individuals were not genotyped.
 
-```{r mark_prop ex}
+```{r mark_prop ex,eval=F}
 tab2 <- mark_prop(tab2,cross="F2",homo=0.1,hetero=0.1)
 head(tab2)
 ```
 
 Last, we can use the mark_allele(). This very helpful function excludes markers for which the alleles found in the F2/N2 individuals do not correspond to the alleles found in the parental strains. For example, if for a marker is not polymorphic in the parental strains but we found two alleles in the F2/N2 individuals, it will be excluded.
 
-```{r mark_allele}
+```{r mark_allele,eval=F}
 tab2 <- mark_allele(tab=tab2,ref=strains,par1="parent1",par2="parent2")
 tab2 %>% arrange(desc(exclude_allele)) %>% head()
 ```
 
 Indeed, we can see that the markers excluded with mark_allele() have different alleles in the parental strains.
 
-```{r mark_allele-strains}
+```{r mark_allele-strains,eval=F}
 strains %>% filter(marker %in% c("gJAX00038569","gJAX00425031","gUNC12245354","gUNC15530876","gUNC21555204","gUNC21596600")) %>% arrange(marker) %>% select(marker,parent1,parent2)
 ```
 
@@ -126,7 +133,7 @@ strains %>% filter(marker %in% c("gJAX00038569","gJAX00425031","gUNC12245354","g
 
 After excluding the problematic markers, we can create the R/qtl file. The individuals must have the same ID in the geno and in the pheno file. If there is a prefix in the geno file that must be removed in order to acheive this, you can use the "prefix" argument. The "path" argument can be used in order to create a CSV file that you can laod with qtl::read.cross. 
 
-```{r write_qtl}
+```{r write_qtl,eval=F}
 rqtl_file <- write_rqtl(geno=genos,pheno=phenos,tab=tab2,ref=strains,par1="parent1",par2="parent2",prefix="ind_",pos="cM_cox")
 
 rqtl_file[1:10,1:7]