diff --git a/article/.RData b/article/.RData
index 439a2e1d9b2f72071735afc73410e1969b8bb902..a664efc58c763b9f841bd59d020adcf0a278cc6a 100644
Binary files a/article/.RData and b/article/.RData differ
diff --git a/article/article_figures.Rmd b/article/article_figures.Rmd
index 2ad0c9169d548c929dc995123fc326a279b0f1a4..0d6d24e6550143d29602c578ec73c9332f1e6b24 100644
--- a/article/article_figures.Rmd
+++ b/article/article_figures.Rmd
@@ -1,7 +1,7 @@
 ---
 title: "article_figures.Rmd"
 author: "Marie Bourdon"
-date: "July 2021"
+date: "June 2022"
 output: html_document
 ---
 
@@ -246,6 +246,27 @@ pheno_before_plot <- qtl_plot(pheno_before,lod=data.frame(group = c("alpha=0.05"
 pheno_before_plot
 ```
 
+```{r before_ann}
+chrs <- c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "X")
+ann_dat_text<-data.frame(
+    chr=factor(chrs,
+               levels=chrs),
+    lod=c(NA,7.5,NA,NA,15,NA,10.5,10.5,NA,5.5,NA,NA,18.5,NA,NA,NA,NA,NA,NA,NA),
+    label=c(NA,"1",NA,NA,"2",NA,"3","4",NA,"5",NA,NA,"6",NA,NA,NA,NA,NA,NA,NA),
+    x=c(NA,17,NA,NA,27,NA,70,10,NA,30,NA,NA,33,NA,NA,NA,NA,NA,NA,NA)
+)
+
+pheno_before_plot +  geom_text(
+    # the new dataframe for annotating text
+    data = ann_dat_text,
+    mapping = aes(x = x,
+                  y = lod,
+                  label = label,
+                  color="blue")
+)
+```
+
+
 
 ```{r before_plot}
 pheno_before_zoom <- qtl_plot(pheno_before,lod=data.frame(group = c("alpha=0.05", "alpha=0.1","alpha=0.63"),
@@ -447,18 +468,29 @@ functions_plot
 rm(none,allele,match,poly,prop,barplot_df)
 ```
 
-# Pheno data format
+## Pheno data format
+
 ```{r pheno}
 format_pheno <- phenos[1:6,]
 write_csv(format_pheno,file="sup/tableS1.csv")
 print(xtable::xtable(format_pheno, type = "latex"), file = "tables/tab_alleles.tex",include.rownames=FALSE)
 ```
 
+## est rf
+
+```{r}
+source("files/find_linked_markers.R")
+estrf_matrix_after <- pull.rf(cross_after, what=c("lod"))
+
+find_linked_markers(estrf_matrix_after,mark="S6J011498219",annot=annot_mini)
+```
+
+
 ## Narrow peaks
 
 Investigation of high lod score peaks 
 
-### Chromosome 2
+### Peak 1
 
 1 peak on 1 pseudomarker : c2.loc10, postionned between gUNC2731905 and gUNCHS004244.
 
@@ -471,14 +503,6 @@ tab_before %>% filter(marker %in% c("gUNC2731905","gUNCHS004244")) %>% select(ma
 For gUNC2731905, all individuals except 1 are homozygous so this marker should be removed. The proportions for gUNCHS004244 seem correct.
 
 
-```{r parents_geno_chr2}
-strns_ref %>% filter(marker %in% c("gUNCHS004244"))
-strains %>% filter(marker %in% c("gUNCHS004244"))
-```
-
-The alleles of parental strains seem correct too and are the same in the reference alleles data and in our genotypes.
-
-
 Graph:
 
 ```{r geno_plot_chr2}
@@ -507,15 +531,16 @@ geno_plot2 <- pgmap %>% filter(pos > 10 & pos < 20) %>%
 geno_plot2
 ```
 
-However, it seems to be an excess of recombinants for gUNCHS004244. Let's look at the calculated map for this marker :
+This region contains many markers with non Mendelian proportions (many have an excess of homozygous). This leads to the creation of a pseudomarker between  gUNC2731905 and gUNCHS004244 with incorrect genotypic probabilities which leads to a spurious peak.
 
 
 ```{r}
-newmap_before[["2"]][11:16]
+newmap_before[["2"]][8:20]
 ```
-There is indeed an increase recombinants for gUNCHS004244 as the calculated distance with the previous marker is 4428.269-4277.043=151.226 cM and the calculated distance with the following marker is 4569.736-4428.269=141.467 cM.
 
-### Chromosome 5
+Recombination fractions between adjacent markers in this regions are indeed high.
+
+### Peak 2
 
 1 peak on 1 marker : mUNC050096588
 
@@ -525,7 +550,7 @@ Here are the infos on genotype counts for these markers:
 tab_before %>% filter(marker %in% c("mUNC050096588")) %>% select(marker:n_NA)
 ```
 
-All individuals heterozygous so this marker should be removed
+All individuals heterozygous so this marker should be removed.
 
 ```{r}
 newmap_before[["5"]][18:23]
@@ -561,7 +586,7 @@ test_plot <- pgmap %>% filter(pos > 20 & pos < 30) %>%
 test_plot 
 ```
 
-### Chromosome 7
+### Peak 3
 
 2 peaks, one on 1 pseudomarker : c7.loc74, between UNC13823755 and S3J075374098; and one on 1 marker and 1 pseudomarker : c7.loc82 and gUNC13998623, c7.loc82 being between gUNC13979374 and gUNC13998623.
 
@@ -607,7 +632,7 @@ test_plot <- pgmap %>% filter(pos > 65 & pos < 90) %>%
 test_plot 
 ```
 
-### Chromosome 8
+### Peak 4
 
 1 peaks, one on 1 marker : mbackupJAX00158395
 
@@ -654,7 +679,7 @@ test_plot
 ```
 
 
-### Chromosome 10
+### Peak 5
 
 1 peaks, one on 1 marker : S6J102311553
 
@@ -710,7 +735,7 @@ newmap_before[["10"]][80:110]
 
 Indeed, there is not an increased distance between S6J102311553 and the adjacent markers; but S6J102311553 is in the middle of a region with enormous calculated with its adjacent markers, between S2T101968115 (showing 19084.57-18083.06=1001.51 cM with the previous marker) and S2T102642258 (showing 20373.93-19372.42=1001.51 cM with the following marker)
 
-### Chromosome 13
+### Peak 6
 
 1 peak, one on 1 marker : SAC132487883
 
@@ -817,6 +842,8 @@ compar_pos_after <- tibble(marker=compar_pos_after$marker,
          dif_fol=calc_fol/cox_fol)
 
 mean(compar_pos_before$dif_prev,na.rm=TRUE)
+sd(compar_pos_before$dif_prev,na.rm=TRUE)
 mean(compar_pos_after$dif_prev,na.rm=TRUE)
+sd(compar_pos_after$dif_prev,na.rm=TRUE)
 ```
 
diff --git a/article/data2/.RData b/article/data2/.RData
index cc719da40b4c5bbfd84e1b13b3e0c48bdc0be988..a927b648399c261a1f7ac6b98c65a591677efd80 100644
Binary files a/article/data2/.RData and b/article/data2/.RData differ
diff --git a/article/data2/data2.Rmd b/article/data2/data2.Rmd
index 5befcc1b728413b4f9a2643a3a5ac3d877c659c6..091637ce0bc696a0c58bd5047d58457e51e10383 100644
--- a/article/data2/data2.Rmd
+++ b/article/data2/data2.Rmd
@@ -83,6 +83,27 @@ pheno_before_plot <- qtl_plot(pheno_before,lod=data.frame(group = c("alpha=0.05"
 pheno_before_plot
 ```
 
+```{r before_ann}
+chrs <- c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "X")
+ann_dat_text<-data.frame(
+    chr=factor(chrs,
+               levels=chrs),
+    lod=c(rep(NA,10),8,rep(NA,9)),
+    label=c(rep(NA,10),7,rep(NA,9)),
+    x=c(rep(NA,10),22,rep(NA,9))
+)
+
+pheno_before_plot +  geom_text(
+    # the new dataframe for annotating text
+    data = ann_dat_text,
+    mapping = aes(x = x,
+                  y = lod,
+                  label = label,
+                  color="blue")
+)
+```
+
+
 ## create file for parental strains genotyped
 
 ```{r}
@@ -187,4 +208,50 @@ functions_plot <- functions_df %>% ggplot(aes(x=markers,y=fct)) +
 
 functions_plot
 rm(none,allele,match,poly,prop,functions_df)
-```
\ No newline at end of file
+```
+
+## Narrow peaks
+
+Investigation of high lod score peaks 
+
+### Peak 7
+
+1 peak on 1 pseudomarker : c11.loc18, postionned between SNT111392585 and mJAX00308021.
+
+Here are the infos on genotype counts for these markers:
+
+```{r summary_geno_chr2}
+tab_before %>% filter(marker %in% c("SNT111392585","mJAX00308021")) %>% select(marker:n_NA)
+```
+
+For SNT111392585, all individuals except 1 are homozygous so this marker should be removed. The proportions for mJAX00308021 seem correct.
+
+Graph:
+
+```{r geno_plot_chr2}
+phenotypes <- cross_before[["pheno"]]
+map <- cross_before[["geno"]][["11"]][["map"]] 
+map <- tibble(marker=names(map),pos=map)
+genotypes <- cross_before[["geno"]][["11"]][["data"]]
+genotypes <- as_tibble(genotypes)
+phenogeno <- cbind(phenotypes,genotypes)
+phenogeno %<>% pivot_longer(mbackupUNC110000218:gUNC20538837,names_to="marker",values_to="genotype")
+pgmap <- full_join(phenogeno,map,by="marker")
+
+geno_plot2 <- pgmap %>% filter(pos > 15 & pos < 25) %>%
+  filter(id %in% sample(phenotypes$id,10)) %>%
+  ggplot(aes(x=pos,y=as.factor(id))) +
+  geom_point(aes(color=as.factor(genotype))) +
+  coord_cartesian(ylim = c(1, 10), expand = TRUE, clip = "off") +
+  annotate(geom="text",y=-1,size=3,
+           x = map %>% filter(pos > 15 & pos < 25) %>% pull(pos),
+           label = map %>% filter(pos > 15 & pos < 25) %>% pull(marker),
+           angle=90) +
+  labs(x="Position (cM)",y="Individual",color="Genotype") +
+  theme_bw() +
+  theme(plot.margin = unit(c(1, 1, 1, 1), "lines"),
+        axis.title.x = element_text(margin = margin(t = 50)))
+geno_plot2
+```
+
+The two markers before the pseudomarker have an excess of homozygous.
\ No newline at end of file
diff --git a/article/data4/.RData b/article/data4/.RData
index bd38fc1fce23514a1b3e0c337f5bca8fca3be212..5fbfda8e7679ec0598f0472bf1461ab10406d80d 100644
Binary files a/article/data4/.RData and b/article/data4/.RData differ
diff --git a/article/data4/data4.Rmd b/article/data4/data4.Rmd
index 989a3d4378e121b1f3602b7b814d27fa7d952839..f3715c8b837d0b658b463e22e9326c9aa6f19c50 100644
--- a/article/data4/data4.Rmd
+++ b/article/data4/data4.Rmd
@@ -82,6 +82,26 @@ pheno_before_plot <- qtl_plot(pheno_before,lod=data.frame(group = c("alpha=0.05"
 pheno_before_plot
 ```
 
+```{r before_ann}
+chrs <- c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "X")
+ann_dat_text<-data.frame(
+    chr=factor(chrs,
+               levels=chrs),
+    lod=c(9,NA,NA,NA,11,NA,NA,NA,NA,13,NA,NA,8.5,NA,NA,NA,NA,7,NA,NA),
+    label=c(8,NA,NA,NA,9,NA,NA,NA,NA,10,NA,NA,11,NA,NA,NA,NA,12,NA,NA),
+    x=c(45,NA,NA,NA,20,NA,NA,NA,NA,27,NA,NA,30,NA,NA,NA,NA,35,NA,NA)
+)
+
+pheno_before_plot +  geom_text(
+    # the new dataframe for annotating text
+    data = ann_dat_text,
+    mapping = aes(x = x,
+                  y = lod,
+                  label = label,
+                  color="blue")
+)
+```
+
 ```{r}
 #our genotypes
 
@@ -200,4 +220,215 @@ functions_plot <- functions_df %>% ggplot(aes(x=markers,y=fct)) +
 
 functions_plot
 rm(none,allele,match,poly,prop)
-```
\ No newline at end of file
+```
+
+
+## Narrow peaks
+
+Investigation of high lod score peaks 
+
+### Peak 8
+
+1 peak on 1 pseudomarker : c1.loc42, postionned between gUNC1177319 and S6J013976867.
+
+Here are the infos on genotype counts for these markers:
+
+```{r summary_geno_peak8}
+tab_before %>% filter(marker %in% c("gUNC1177319", "S6J013976867")) %>% select(marker:n_NA)
+```
+
+
+For gUNC1177319, all individuals heterozygous so this marker should be removed. The proportions for S6J013976867 seem correct.
+
+Graph:
+
+```{r geno_plot_peak8}
+phenotypes <- cross_before[["pheno"]]
+map <- cross_before[["geno"]][["1"]][["map"]] 
+map <- tibble(marker=names(map),pos=map)
+genotypes <- cross_before[["geno"]][["1"]][["data"]]
+genotypes <- as_tibble(genotypes)
+phenogeno <- cbind(phenotypes,genotypes)
+phenogeno %<>% pivot_longer(SAH010136363:gUNC2460751,names_to="marker",values_to="genotype")
+pgmap <- full_join(phenogeno,map,by="marker")
+
+geno_plot8 <- pgmap %>% filter(pos > 42 & pos < 52) %>%
+  filter(id %in% sample(phenotypes$id,10)) %>%
+  ggplot(aes(x=pos,y=as.factor(id))) +
+  geom_point(aes(color=as.factor(genotype))) +
+  coord_cartesian(ylim = c(1, 10), expand = TRUE, clip = "off") +
+  annotate(geom="text",y=-1,size=3,
+           x = map %>% filter(pos > 42 & pos < 52) %>% pull(pos),
+           label = map %>% filter(pos > 42 & pos < 52) %>% pull(marker),
+           angle=90) +
+  labs(x="Position (cM)",y="Individual",color="Genotype") +
+  theme_bw() +
+  theme(plot.margin = unit(c(1, 1, 1, 1), "lines"),
+        axis.title.x = element_text(margin = margin(t = 50)))
+geno_plot8
+```
+
+### Peak 9
+
+1 peak on 1 pseudomarker : c5.loc16, in a region with very few markers, postionned between S6J050685107 and mUNC050096588.
+
+Here are the infos on genotype counts for these markers:
+
+```{r summary_geno_peak9}
+tab_before %>% filter(marker %in% c("S6J050685107","mUNC050096588")) %>% select(marker:n_NA)
+```
+
+
+For mUNC050096588, all individuals heterozygous so this marker should be removed. The proportions for S6J050685107 seem correct.
+
+Graph:
+
+```{r geno_plot_peak9}
+phenotypes <- cross_before[["pheno"]]
+map <- cross_before[["geno"]][["5"]][["map"]] 
+map <- tibble(marker=names(map),pos=map)
+genotypes <- cross_before[["geno"]][["5"]][["data"]]
+genotypes <- as_tibble(genotypes)
+phenogeno <- cbind(phenotypes,genotypes)
+phenogeno %<>% pivot_longer(mUNC050013072:gUNC10448854,names_to="marker",values_to="genotype")
+pgmap <- full_join(phenogeno,map,by="marker")
+
+geno_plot9 <- pgmap %>% filter(pos > 1 & pos < 30) %>%
+  filter(id %in% sample(phenotypes$id,10)) %>%
+  ggplot(aes(x=pos,y=as.factor(id))) +
+  geom_point(aes(color=as.factor(genotype))) +
+  coord_cartesian(ylim = c(1, 10), expand = TRUE, clip = "off") +
+  annotate(geom="text",y=-1,size=3,
+           x = map %>% filter(pos > 1 & pos < 30) %>% pull(pos),
+           label = map %>% filter(pos > 1 & pos < 30) %>% pull(marker),
+           angle=90) +
+  labs(x="Position (cM)",y="Individual",color="Genotype") +
+  theme_bw() +
+  theme(plot.margin = unit(c(1, 1, 1, 1), "lines"),
+        axis.title.x = element_text(margin = margin(t = 50)))
+geno_plot9
+```
+
+### Peak 10
+
+1 peak on 1 pseudomarker : c10.loc30, in a region with very few markers, postionned between SAH102097335 and S2C102505843.
+
+Here are the infos on genotype counts for these markers:
+
+```{r summary_geno_peak10}
+tab_before %>% filter(marker %in% c("SAH102097335","S2C102505843")) %>% select(marker:n_NA)
+```
+
+
+For S2C102505843, all individuals except 1 are homozygous so this marker should be removed. The proportions for SAH102097335 seem correct.
+
+Graph:
+
+```{r geno_plot_peak10}
+phenotypes <- cross_before[["pheno"]]
+map <- cross_before[["geno"]][["10"]][["map"]] 
+map <- tibble(marker=names(map),pos=map)
+genotypes <- cross_before[["geno"]][["10"]][["data"]]
+genotypes <- as_tibble(genotypes)
+phenogeno <- cbind(phenotypes,genotypes)
+phenogeno %<>% pivot_longer(gICR258:S6J105101847,names_to="marker",values_to="genotype")
+pgmap <- full_join(phenogeno,map,by="marker")
+
+geno_plot10 <- pgmap %>% filter(pos > 25 & pos < 35) %>%
+  filter(id %in% sample(phenotypes$id,10)) %>%
+  ggplot(aes(x=pos,y=as.factor(id))) +
+  geom_point(aes(color=as.factor(genotype))) +
+  coord_cartesian(ylim = c(1, 10), expand = TRUE, clip = "off") +
+  annotate(geom="text",y=-1,size=3,
+           x = map %>% filter(pos > 25 & pos < 35) %>% pull(pos),
+           label = map %>% filter(pos > 25 & pos < 35) %>% pull(marker),
+           angle=90) +
+  labs(x="Position (cM)",y="Individual",color="Genotype") +
+  theme_bw() +
+  theme(plot.margin = unit(c(1, 1, 1, 1), "lines"),
+        axis.title.x = element_text(margin = margin(t = 50)))
+geno_plot10
+```
+
+
+### Peak 11
+
+1 peak on 1 pseudomarker : c13.loc28, postionned between S6J132182752 and SAC132487883.
+
+Here are the infos on genotype counts for these markers:
+
+```{r summary_geno_peak11}
+tab_before %>% filter(marker %in% c("S6J132182752","SAC132487883")) %>% select(marker:n_NA)
+```
+
+
+For SAC132487883, all individuals are heterozygous so this marker should be removed. The proportions for S6J132182752 seem correct.
+
+Graph:
+
+```{r geno_plot_peak11}
+phenotypes <- cross_before[["pheno"]]
+map <- cross_before[["geno"]][["13"]][["map"]] 
+map <- tibble(marker=names(map),pos=map)
+genotypes <- cross_before[["geno"]][["13"]][["data"]]
+genotypes <- as_tibble(genotypes)
+phenogeno <- cbind(phenotypes,genotypes)
+phenogeno %<>% pivot_longer(gB6_rs13481676:SFH134765844,names_to="marker",values_to="genotype")
+pgmap <- full_join(phenogeno,map,by="marker")
+
+geno_plot11 <- pgmap %>% filter(pos > 20 & pos < 35) %>%
+  filter(id %in% sample(phenotypes$id,10)) %>%
+  ggplot(aes(x=pos,y=as.factor(id))) +
+  geom_point(aes(color=as.factor(genotype))) +
+  coord_cartesian(ylim = c(1, 10), expand = TRUE, clip = "off") +
+  annotate(geom="text",y=-1,size=3,
+           x = map %>% filter(pos > 20 & pos < 35) %>% pull(pos),
+           label = map %>% filter(pos > 20 & pos < 35) %>% pull(marker),
+           angle=90) +
+  labs(x="Position (cM)",y="Individual",color="Genotype") +
+  theme_bw() +
+  theme(plot.margin = unit(c(1, 1, 1, 1), "lines"),
+        axis.title.x = element_text(margin = margin(t = 50)))
+geno_plot11
+```
+
+### Peak 12
+
+1 peak on 1 marker : S3C182557441
+
+Here are the infos on genotype counts for these markers:
+
+```{r summary_geno_peak12}
+tab_before %>% filter(marker %in% c("S3C182557441")) %>% select(marker:n_NA)
+```
+
+
+For this marker, all individuals except one are homozygous so this marker should be removed.
+
+Graph:
+
+```{r geno_plot_peak12}
+phenotypes <- cross_before[["pheno"]]
+map <- cross_before[["geno"]][["18"]][["map"]] 
+map <- tibble(marker=names(map),pos=map)
+genotypes <- cross_before[["geno"]][["18"]][["data"]]
+genotypes <- as_tibble(genotypes)
+phenogeno <- cbind(phenotypes,genotypes)
+phenogeno %<>% pivot_longer(mJAX00450679:gUNC29817480,names_to="marker",values_to="genotype")
+pgmap <- full_join(phenogeno,map,by="marker")
+
+geno_plot12 <- pgmap %>% filter(pos > 32 & pos < 42) %>%
+  filter(id %in% sample(phenotypes$id,10)) %>%
+  ggplot(aes(x=pos,y=as.factor(id))) +
+  geom_point(aes(color=as.factor(genotype))) +
+  coord_cartesian(ylim = c(1, 10), expand = TRUE, clip = "off") +
+  annotate(geom="text",y=-1,size=3,
+           x = map %>% filter(pos > 32 & pos < 42) %>% pull(pos),
+           label = map %>% filter(pos > 32 & pos < 42) %>% pull(marker),
+           angle=90) +
+  labs(x="Position (cM)",y="Individual",color="Genotype") +
+  theme_bw() +
+  theme(plot.margin = unit(c(1, 1, 1, 1), "lines"),
+        axis.title.x = element_text(margin = margin(t = 50)))
+geno_plot12
+```
diff --git a/article/files/find_linked_markers.R b/article/files/find_linked_markers.R
new file mode 100644
index 0000000000000000000000000000000000000000..7c8b6b75782648edf53bb0b2baf1c0e4bd1b5016
--- /dev/null
+++ b/article/files/find_linked_markers.R
@@ -0,0 +1,13 @@
+find_linked_markers <- function(rf,annot,mark,maxit=10000,tol=1e-6,lod=3){
+  #calculate matrix with estimated RF
+
+  names <- rownames(rf)  
+  estrf_df <- as_tibble(rf)
+  estrf_df <- tibble(marker=names,
+                     estrf_df)
+  
+  
+  estrf_df %<>% select(marker,all_of(mark))
+  estrf_df <- full_join(estrf_df,annot,by=c("marker"="marker"))
+  estrf_df %>% filter(!!sym(mark)>3)
+}
\ No newline at end of file