Commit 93e5fbeb authored by mariefbourdon's avatar mariefbourdon
Browse files

data3-4

parent 6a644002
No preview for this file type
......@@ -247,11 +247,11 @@ pheno_before_plot
```
```{r before_scan}
```{r before_plot}
pheno_before_zoom <- qtl_plot(pheno_before,lod=data.frame(group = c("alpha=0.05", "alpha=0.1","alpha=0.63"),
lod = threshold_before[1:3]),
ylab="LOD score",
chrs = "13",
chrs = "7",
size=0.6) +
theme(legend.position = "none",
strip.background = element_blank(),
......@@ -281,7 +281,6 @@ tab2 <- mark_na(tab2)
tab2 <- mark_prop(tab2,cross="F2",homo=0.1,hetero=0.1,homo1X=c(0.1,1),homo2X=c(0.1,1),heteroX=c(0.1,1))
tab2 <- mark_allele(tab2,ref=strains,cross="F2",par1="parent1",par2="parent2")
# create rqtl csv file
write_rqtl(geno=genos,pheno=phenos,tab=tab2,ref=strains,par1="parent1",par2="parent2",prefix="ind_",pos="cM_cox",path="files/cluster/cross_after.csv")
```
......@@ -454,3 +453,369 @@ write_csv(format_pheno,file="sup/tableS1.csv")
print(xtable::xtable(format_pheno, type = "latex"), file = "tables/tab_alleles.tex",include.rownames=FALSE)
```
## Narrow peaks
Investigation of high lod score peaks
### Chromosome 2
1 peak on 1 pseudomarker : c2.loc10, postionned between gUNC2731905 and gUNCHS004244.
Here are the infos on genotype counts for these markers:
```{r summary_geno_chr2}
tab_before %>% filter(marker %in% c("gUNC2731905","gUNCHS004244")) %>% select(marker:n_NA)
```
For gUNC2731905, all individuals except 1 are homozygous so this marker should be removed. The proportions for gUNCHS004244 seem correct.
```{r parents_geno_chr2}
strns_ref %>% filter(marker %in% c("gUNCHS004244"))
strains %>% filter(marker %in% c("gUNCHS004244"))
```
The alleles of parental strains seem correct too and are the same in the reference alleles data and in our genotypes.
Graph:
```{r geno_plot_chr2}
phenotypes <- cross_before[["pheno"]]
map <- cross_before[["geno"]][["2"]][["map"]]
map <- tibble(marker=names(map),pos=map)
genotypes <- cross_before[["geno"]][["2"]][["data"]]
genotypes <- as_tibble(genotypes)
phenogeno <- cbind(phenotypes,genotypes)
phenogeno %<>% pivot_longer(SNJ020344053:mUNC4608754,names_to="marker",values_to="genotype")
pgmap <- full_join(phenogeno,map,by="marker")
geno_plot2 <- pgmap %>% filter(pos > 10 & pos < 20) %>%
filter(id %in% sample(phenotypes$id,10)) %>%
ggplot(aes(x=pos,y=as.factor(id))) +
geom_point(aes(color=as.factor(genotype))) +
coord_cartesian(ylim = c(1, 10), expand = TRUE, clip = "off") +
annotate(geom="text",y=-1,size=3,
x = map %>% filter(pos > 10 & pos < 20) %>% pull(pos),
label = map %>% filter(pos > 10 & pos < 20) %>% pull(marker),
angle=90) +
labs(x="Position (cM)",y="Individual",color="Genotype") +
theme_bw() +
theme(plot.margin = unit(c(1, 1, 1, 1), "lines"),
axis.title.x = element_text(margin = margin(t = 50)))
geno_plot2
```
However, it seems to be an excess of recombinants for gUNCHS004244. Let's look at the calculated map for this marker :
```{r}
newmap_before[["2"]][11:16]
```
There is indeed an increase recombinants for gUNCHS004244 as the calculated distance with the previous marker is 4428.269-4277.043=151.226 cM and the calculated distance with the following marker is 4569.736-4428.269=141.467 cM.
### Chromosome 5
1 peak on 1 marker : mUNC050096588
Here are the infos on genotype counts for these markers:
```{r summary_geno_chr5}
tab_before %>% filter(marker %in% c("mUNC050096588")) %>% select(marker:n_NA)
```
All individuals heterozygous so this marker should be removed
```{r}
newmap_before[["5"]][18:23]
```
This leads to enormous calculated distance with adjacent markers: 1186.0871-184.5811 = 2187.5930-1186.0871=1001.506 cM with the previous and the following marker.
Graph:
```{r geno_plot_chr5}
phenotypes <- cross_before[["pheno"]]
map <- cross_before[["geno"]][["5"]][["map"]]
map <- tibble(marker=names(map),pos=map)
genotypes <- cross_before[["geno"]][["5"]][["data"]]
genotypes <- as_tibble(genotypes)
phenogeno <- cbind(phenotypes,genotypes)
phenogeno %<>% pivot_longer(gUNCHS013469:SAC056009450,names_to="marker",values_to="genotype")
pgmap <- full_join(phenogeno,map,by="marker")
test_plot <- pgmap %>% filter(pos > 20 & pos < 30) %>%
filter(id %in% sample(phenotypes$id,10)) %>%
ggplot(aes(x=pos,y=as.factor(id))) +
geom_point(aes(color=as.factor(genotype))) +
coord_cartesian(ylim = c(1, 10), expand = TRUE, clip = "off") +
annotate(geom="text",y=-1,size=3,
x = map %>% filter(pos > 20 & pos < 30) %>% pull(pos),
label = map %>% filter(pos > 20 & pos < 30) %>% pull(marker),
angle=90) +
labs(x="Position (cM)",y="Individual",color="Genotype") +
theme_bw() +
theme(plot.margin = unit(c(1, 1, 1, 1), "lines"),
axis.title.x = element_text(margin = margin(t = 50)))
test_plot
```
### Chromosome 7
2 peaks, one on 1 pseudomarker : c7.loc74, between UNC13823755 and S3J075374098; and one on 1 marker and 1 pseudomarker : c7.loc82 and gUNC13998623, c7.loc82 being between gUNC13979374 and gUNC13998623.
Here are the infos on genotype counts for these markers:
```{r summary_geno_chr7}
tab_before %>% filter(marker %in% c("UNC13823755", "S3J075374098","gUNC13979374","gUNC13998623")) %>% select(marker:n_NA)
```
There are no homozygous individuals for one allele for S3J075374098, gUNC13979374 and gUNC13998623 so these markers should be removed. Proportions seem correct for UNC13823755.
```{r}
newmap_before[["7"]][206:212]
```
Indeed, there is not an increased in the calculated distance with the markers before UNC13823755, but there is 29066.20-28864.88=201.32 cM between UNC13823755 and S3J075374098 and 29116.71-29076.90=39.81 cM between gUNC13979374 and the previous marker.
Graph:
```{r geno_plot_chr7}
phenotypes <- cross_before[["pheno"]]
map <- cross_before[["geno"]][["7"]][["map"]]
map <- tibble(marker=names(map),pos=map)
genotypes <- cross_before[["geno"]][["7"]][["data"]]
genotypes <- as_tibble(genotypes)
phenogeno <- cbind(phenotypes,genotypes)
phenogeno %<>% pivot_longer(SBJ070191318:gUNCHS021959,names_to="marker",values_to="genotype")
pgmap <- full_join(phenogeno,map,by="marker")
test_plot <- pgmap %>% filter(pos > 65 & pos < 90) %>%
filter(id %in% sample(phenotypes$id,10)) %>%
ggplot(aes(x=pos,y=as.factor(id))) +
geom_point(aes(color=as.factor(genotype))) +
coord_cartesian(ylim = c(1, 10), expand = TRUE, clip = "off") +
annotate(geom="text",y=-1.5,size=3,
x = map %>% filter(pos > 65 & pos < 90) %>% pull(pos),
label = map %>% filter(pos > 65 & pos < 90) %>% pull(marker),
angle=90) +
labs(x="Position (cM)",y="Individual",color="Genotype") +
theme_bw() +
theme(plot.margin = unit(c(1, 1, 1, 1), "lines"),
axis.title.x = element_text(margin = margin(t = 50)))
test_plot
```
### Chromosome 8
1 peaks, one on 1 marker : mbackupJAX00158395
Here are the infos on genotype counts for this marker:
```{r summary_geno_chr8}
tab_before %>% filter(marker %in% c("mbackupJAX00158395")) %>% select(marker:n_NA)
```
All individuals are homozygous except one so this marker should be removed.
```{r}
newmap_before[["8"]][1:5]
```
Indeed there are 189.3831-2.1670=187.2161 cM between mbackupJAX00158395 and the following marker.
Graph:
```{r geno_plot_chr8}
phenotypes <- cross_before[["pheno"]]
map <- cross_before[["geno"]][["8"]][["map"]]
map <- tibble(marker=names(map),pos=map)
genotypes <- cross_before[["geno"]][["8"]][["data"]]
genotypes <- as_tibble(genotypes)
phenogeno <- cbind(phenotypes,genotypes)
phenogeno %<>% pivot_longer(mbackupJAX00158395:S6J085066393,names_to="marker",values_to="genotype")
pgmap <- full_join(phenogeno,map,by="marker")
test_plot <- pgmap %>% filter(pos > 0 & pos < 10) %>%
filter(id %in% sample(phenotypes$id,10)) %>%
ggplot(aes(x=pos,y=as.factor(id))) +
geom_point(aes(color=as.factor(genotype))) +
coord_cartesian(ylim = c(1, 10), expand = TRUE, clip = "off") +
annotate(geom="text",y=-1.5,size=3,
x = map %>% filter(pos > 0 & pos < 10) %>% pull(pos),
label = map %>% filter(pos > 0 & pos < 10) %>% pull(marker),
angle=90) +
labs(x="Position (cM)",y="Individual",color="Genotype") +
theme_bw() +
theme(plot.margin = unit(c(1, 1, 1, 1), "lines"),
axis.title.x = element_text(margin = margin(t = 50)))
test_plot
```
### Chromosome 10
1 peaks, one on 1 marker : S6J102311553
Here are the infos on genotype counts for this marker:
```{r summary_geno_chr10}
tab_before %>% filter(marker %in% c("S6J102311553")) %>% select(marker:n_NA)
```
The genotypic proportions for this marker seem correct.
```{r parents_geno_chr10}
strns_ref %>% filter(marker %in% c("S6J102311553"))
strains %>% filter(marker %in% c("S6J102311553"))
```
The alleles of parental strains seemed correct too and are the same in the reference alleles data and in our genotypes.
Graph:
```{r geno_plot_chr10}
phenotypes <- cross_before[["pheno"]]
map <- cross_before[["geno"]][["10"]][["map"]]
map <- tibble(marker=names(map),pos=map)
genotypes <- cross_before[["geno"]][["10"]][["data"]]
genotypes <- as_tibble(genotypes)
phenogeno <- cbind(phenotypes,genotypes)
phenogeno %<>% pivot_longer(gICR258:gUNC18984159,names_to="marker",values_to="genotype")
pgmap <- full_join(phenogeno,map,by="marker")
test_plot <- pgmap %>% filter(pos > 22 & pos < 36.5) %>%
filter(id %in% sample(phenotypes$id,10)) %>%
ggplot(aes(x=pos,y=as.factor(id))) +
geom_point(aes(color=as.factor(genotype))) +
coord_cartesian(ylim = c(1, 10), expand = TRUE, clip = "off") +
annotate(geom="text",y=-1.5,size=3,
x = map %>% filter(pos > 22 & pos < 36.5) %>% pull(pos),
label = map %>% filter(pos > 22 & pos < 36.5) %>% pull(marker),
angle=90) +
labs(x="Position (cM)",y="Individual",color="Genotype") +
theme_bw() +
theme(plot.margin = unit(c(1, 1, 1, 1), "lines"),
axis.title.x = element_text(margin = margin(t = 50)))
test_plot
```
S6J102311553 seem to be surrounded by markers with an increased proportion of homozygous individuals.
```{r}
newmap_before[["10"]][80:110]
```
Indeed, there is not an increased distance between S6J102311553 and the adjacent markers; but S6J102311553 is in the middle of a region with enormous calculated with its adjacent markers, between S2T101968115 (showing 19084.57-18083.06=1001.51 cM with the previous marker) and S2T102642258 (showing 20373.93-19372.42=1001.51 cM with the following marker)
### Chromosome 13
1 peak, one on 1 marker : SAC132487883
Here are the infos on genotype counts for this marker:
```{r summary_geno_chr13}
tab_before %>% filter(marker %in% c("SAC132487883")) %>% select(marker:n_NA)
```
All individuals are heterozygous at this loci so this marker should be removed.
```{r}
newmap_before[["13"]][48:50]
```
This leads to enormous calculated distance with adjacent markers: 9521.842-8520.336 = 8520.336-7518.830 = 1001.506 cM with the previous and the following marker.
Graph:
```{r geno_plot_chr13}
phenotypes <- cross_before[["pheno"]]
map <- cross_before[["geno"]][["13"]][["map"]]
map <- tibble(marker=names(map),pos=map)
genotypes <- cross_before[["geno"]][["13"]][["data"]]
genotypes <- as_tibble(genotypes)
phenogeno <- cbind(phenotypes,genotypes)
phenogeno %<>% pivot_longer(gUNCHS034900:S3H134792711,names_to="marker",values_to="genotype")
pgmap <- full_join(phenogeno,map,by="marker")
test_plot <- pgmap %>% filter(pos > 25 & pos < 35) %>%
filter(id %in% sample(phenotypes$id,10)) %>%
ggplot(aes(x=pos,y=as.factor(id))) +
geom_point(aes(color=as.factor(genotype))) +
coord_cartesian(ylim = c(1, 10), expand = TRUE, clip = "off") +
annotate(geom="text",y=-1.7,size=3,
x = map %>% filter(pos > 25 & pos < 35) %>% pull(pos),
label = map %>% filter(pos > 25 & pos < 35) %>% pull(marker),
angle=90) +
labs(x="Position (cM)",y="Individual",color="Genotype") +
theme_bw() +
theme(plot.margin = unit(c(1, 1, 1, 1), "lines"),
axis.title.x = element_text(margin = margin(t = 55)))
test_plot
```
## Fold change distance between adjacent markers
```{r fold_change}
#before
names_mark <- c(names(newmap_before[["1"]]),names(newmap_before[["2"]]),names(newmap_before[["3"]]),names(newmap_before[["4"]]),
names(newmap_before[["5"]]),names(newmap_before[["6"]]),names(newmap_before[["7"]]),names(newmap_before[["8"]]),
names(newmap_before[["9"]]),names(newmap_before[["10"]]),names(newmap_before[["11"]]),names(newmap_before[["12"]]),
names(newmap_before[["13"]]),names(newmap_before[["14"]]),names(newmap_before[["15"]]),names(newmap_before[["16"]]),
names(newmap_before[["17"]]),names(newmap_before[["18"]]),names(newmap_before[["19"]]),names(newmap_before[["X"]]))
pos_mark <- c(newmap_before[["1"]],newmap_before[["2"]],newmap_before[["3"]],newmap_before[["4"]],
newmap_before[["5"]],newmap_before[["6"]],newmap_before[["7"]],newmap_before[["8"]],
newmap_before[["9"]],newmap_before[["10"]],newmap_before[["11"]],newmap_before[["12"]],
newmap_before[["13"]],newmap_before[["14"]],newmap_before[["15"]],newmap_before[["16"]],
newmap_before[["17"]],newmap_before[["18"]],newmap_before[["19"]],newmap_before[["X"]])
tibble_newmap_before <- tibble(marker=names_mark,
cM_calc=pos_mark)
compar_pos_before <- full_join(tibble_newmap_before,annot_mini) %>% select(marker,chr,cM_calc,cM_cox)
know <- compar_pos_before$cM_cox
calc <- compar_pos_before$cM_calc
compar_pos_before <- tibble(marker=compar_pos_before$marker,
chr=compar_pos_before$chr,
cM_cox=compar_pos_before$cM_cox,
cox_prev=c(NA,compar_pos_before$cM_cox[1:11124]),
cox_fol=c(compar_pos_before$cM_cox[2:11125],NA),
cM_calc=compar_pos_before$cM_calc,
calc_prev=c(NA,compar_pos_before$cM_calc[1:11124]),
calc_fol=c(compar_pos_before$cM_calc[2:11125],NA)) %>%
mutate(dif_prev=calc_prev/cox_prev,
dif_fol=calc_fol/cox_fol)
#after
names_mark <- c(names(newmap_after2[["1"]]),names(newmap_after2[["2"]]),names(newmap_after2[["3"]]),names(newmap_after2[["4"]]),
names(newmap_after2[["5"]]),names(newmap_after2[["6"]]),names(newmap_after2[["7"]]),names(newmap_after2[["8"]]),
names(newmap_after2[["9"]]),names(newmap_after2[["10"]]),names(newmap_after2[["11"]]),names(newmap_after2[["12"]]),
names(newmap_after2[["13"]]),names(newmap_after2[["14"]]),names(newmap_after2[["15"]]),names(newmap_after2[["16"]]),
names(newmap_after2[["17"]]),names(newmap_after2[["18"]]),names(newmap_after2[["19"]]),names(newmap_after2[["X"]]))
pos_mark <- c(newmap_after2[["1"]],newmap_after2[["2"]],newmap_after2[["3"]],newmap_after2[["4"]],
newmap_after2[["5"]],newmap_after2[["6"]],newmap_after2[["7"]],newmap_after2[["8"]],
newmap_after2[["9"]],newmap_after2[["10"]],newmap_after2[["11"]],newmap_after2[["12"]],
newmap_after2[["13"]],newmap_after2[["14"]],newmap_after2[["15"]],newmap_after2[["16"]],
newmap_after2[["17"]],newmap_after2[["18"]],newmap_after2[["19"]],newmap_after2[["X"]])
tibble_newmap_after <- tibble(marker=names_mark,
cM_calc=pos_mark)
compar_pos_after <- full_join(tibble_newmap_after,annot_mini) %>% select(marker,chr,cM_calc,cM_cox)
know <- compar_pos_after$cM_cox
calc <- compar_pos_after$cM_calc
compar_pos_after <- tibble(marker=compar_pos_after$marker,
chr=compar_pos_after$chr,
cM_cox=compar_pos_after$cM_cox,
cox_prev=c(NA,compar_pos_after$cM_cox[1:11124]),
cox_fol=c(compar_pos_after$cM_cox[2:11125],NA),
cM_calc=compar_pos_after$cM_calc,
calc_prev=c(NA,compar_pos_after$cM_calc[1:11124]),
calc_fol=c(compar_pos_after$cM_calc[2:11125],NA)) %>%
mutate(dif_prev=calc_prev/cox_prev,
dif_fol=calc_fol/cox_fol)
mean(compar_pos_before$dif_prev,na.rm=TRUE)
mean(compar_pos_after$dif_prev,na.rm=TRUE)
```
This diff is collapsed.
This diff is collapsed.
......@@ -25,7 +25,7 @@ parents <- read_csv("parents_data2.csv",show_col_types = FALSE) #genotypes of pa
strns_ref <- read_csv("ref_geno_data2.csv",show_col_types = FALSE) #reference genotypes of parental strains
```
```{r}
```{r,cache=TRUE}
tab <- tab_mark(genos,annot_mini,"cM_cox")
```
......@@ -129,7 +129,7 @@ cross_after <- read.cross(format="csv",file="cluster/cross_after.csv",
load("cluster/newmap_after.rda")
plotMap(cross_after,newmap_after,shift=TRUE)
tab2 <- mark_estmap(tab=tab2,map=newmap_after,annot=annot_mini)
tab2 <- mark_estmap(tab=tab2,map=newmap_after,annot=annot_mini) #0 marker removed
```
......
This diff is collapsed.
base
methods
datasets
utils
grDevices
graphics
stats
dplyr
tidyr
stuart
magrittr
readr
stringr
qtl
ggplot2
ggrepel
grid
No preview for this file type
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment