Commit 99dc483f authored by Marie Bourdon's avatar Marie Bourdon
Browse files

article figures

parent adc44da6
/home/marie/Documents/stuart_package/stuart/NAMESPACE="2ECA567D" /home/marie/Documents/stuart/stuart_package/stuart/vignettes/stuart.Rmd="EE74038B"
/home/marie/Documents/stuart_package/stuart/R/geno_strains.R="AF4CFAD2"
/home/marie/Documents/stuart_package/stuart/R/genos-data.R="1CBE4D7A"
/home/marie/Documents/stuart_package/stuart/R/mark_allele.R="32D90792"
/home/marie/Documents/stuart_package/stuart/R/mark_estmap.R="2E43F0E2"
/home/marie/Documents/stuart_package/stuart/R/mark_match.R="39634C04"
/home/marie/Documents/stuart_package/stuart/R/mark_poly.R="D19CB401"
/home/marie/Documents/stuart_package/stuart/R/mark_prop.R="31DD9C22"
/home/marie/Documents/stuart_package/stuart/R/phenos-data.R="753628A1"
/home/marie/Documents/stuart_package/stuart/R/stuart_cross-data.R="2A7132AD"
/home/marie/Documents/stuart_package/stuart/R/stuart_newmap-data.R="86A2EB27"
/home/marie/Documents/stuart_package/stuart/R/stuart_tab-data.R="D77DF427"
/home/marie/Documents/stuart_package/stuart/R/tab_mark.R="4023F220"
/home/marie/Documents/stuart_package/stuart/R/write_rqtl.R="DA3DDAC1"
/home/marie/Documents/stuart_package/stuart/vignettes/stuart.Rmd="568884A2"
/mnt/zeus/zeus_mouselab/marie/map_after/map_after.R="6CD0FCD2" /mnt/zeus/zeus_mouselab/marie/map_after/map_after.R="6CD0FCD2"
/mnt/zeus/zeus_mouselab/marie/stuart_estmap/stuart_estmap.R="E9861925" /mnt/zeus/zeus_mouselab/marie/stuart_estmap/stuart_estmap.R="E9861925"
No preview for this file type
...@@ -5,7 +5,7 @@ date: "July 2021" ...@@ -5,7 +5,7 @@ date: "July 2021"
output: html_document output: html_document
--- ---
## Goal and raw data # Goal and raw data
The goal of this script is to produce figure for the stuart package manuscript. The goal of this script is to produce figure for the stuart package manuscript.
...@@ -28,30 +28,30 @@ library(stuart) ...@@ -28,30 +28,30 @@ library(stuart)
source("files/QTL_plot.R") source("files/QTL_plot.R")
``` ```
## Data load and use of stuart functions # Data load and use of stuart functions
### genos dataframe ## genos dataframe
Data frame from stuart with genotypes of 176 F2 individuals and parental strains. Data frame from stuart with genotypes of 176 F2 individuals and parental strains.
```{r genos} ```{r genos}
data(genos) data(genos)
summary(genos) summary(genos)
``` ```
### phenos dataframe ## phenos dataframe
Data frame from stuart with phenotypes of 176 F2 individuals for a quantitative trait. Data frame from stuart with phenotypes of 176 F2 individuals for a quantitative trait.
```{r phenos} ```{r phenos}
data(phenos) data(phenos)
summary(phenos) summary(phenos)
``` ```
### annotation file ## annotation file
Annotation file from K.Broman: https://kbroman.org/MUGAarrays/mini_annotations.html Annotation file from K.Broman: https://kbroman.org/MUGAarrays/mini_annotations.html
```{r annot} ```{r annot}
annot_mini <- read.csv(url("https://raw.githubusercontent.com/kbroman/MUGAarrays/master/UWisc/mini_uwisc_v2.csv")) annot_mini <- read.csv(url("https://raw.githubusercontent.com/kbroman/MUGAarrays/master/UWisc/mini_uwisc_v2.csv"))
summary(annot_mini) summary(annot_mini)
``` ```
### parental strains genotypes ## parental strains genotypes
2 genotypes tables will be used: 2 genotypes tables will be used:
...@@ -91,7 +91,7 @@ strns_ref <- strns_ref %>% select(name,parent1,parent2) %>% right_join(annot_min ...@@ -91,7 +91,7 @@ strns_ref <- strns_ref %>% select(name,parent1,parent2) %>% right_join(annot_min
summary(strns_ref) summary(strns_ref)
``` ```
### Use of stuart's functions ## Use of stuart's functions
```{r} ```{r}
data(stuart_tab) data(stuart_tab)
summary(stuart_tab) summary(stuart_tab)
...@@ -99,9 +99,304 @@ summary(stuart_tab) ...@@ -99,9 +99,304 @@ summary(stuart_tab)
tab2 <- mark_match(stuart_tab,ref=strns_ref) tab2 <- mark_match(stuart_tab,ref=strns_ref)
tab2 <- mark_poly(tab2) tab2 <- mark_poly(tab2)
tab2 <- mark_prop(tab2,cross="F2",homo=0.1,hetero=0.1,homo1X=c(0.1,1),homo2X=c(0.1,1),heteroX=c(0.1,1)) tab2 <- mark_prop(tab2,cross="F2",homo=0.1,hetero=0.1,homo1X=c(0.1,1),homo2X=c(0.1,1),heteroX=c(0.1,1))
tab2 <- mark_allele(tab2,ref=strns_ref,par1="parent1",par2="parent2") tab2 <- mark_allele(tab2,ref=strns_ref,cross="F2",par1="parent1",par2="parent2")
data(stuart_newmap) data(stuart_newmap)
tab2 <- mark_estmap(tab2,map=stuart_newmap,dist=100) tab2 <- mark_estmap(tab2,map=stuart_newmap,dist=100)
``` ```
# Graphs
## Grid mark_prop
### Graph missing genotypes
```{r graph_NA}
na_plot <- tab2 %>% mutate(prop_NA=n_NA/180) %>% ggplot(aes(x=prop_NA)) +
geom_histogram() +
scale_y_log10() +
theme_bw() +
labs(title="Proportion of missing genotyped",
x="Proportion of NA",y="Number of markers") +
theme(
aspect.ratio=0.8,
plot.title = element_text(hjust = 0.4,face="bold",size=14))
na_plot
```
### Graph proportion of genotypes
```{r graph_prop}
prop_plot <- tab2 %>% filter(n_NA<88) %>% ggplot(aes(x=n_HM1/(n_HM1+n_HM2+n_HT),y=n_HM2/(n_HM1+n_HM2+n_HT),color=as.factor(exclude_prop))) +
geom_point() +
scale_color_manual(values=c("#66bd63","#b2182b")) +
#geom_text(aes(label=ifelse(exclude_prop=="1",SNP.Name,'')),hjust=0, vjust=0,size=2) +
labs(title="Exclusion of markers with mark_prop()",
x="Proportion of homozygous individuals AA",
y="Proportion of homozygous individuals BB",
color="Exclusion") +
theme_classic() +
theme(
aspect.ratio=0.8,
legend.position=c(0.8,0.8)) +
theme(plot.title = element_text(hjust = 0.4,face="bold",size=14))
prop_plot
```
### Grid
Figure with graphs proportion of NA and exclusion depending on genotype proportions
```{r grid_na_prop}
grid <- plot_grid(na_plot,prop_plot,
ncol=1,
labels = c('A', 'B'),
label_size = 20,
byrow = TRUE
)
grid
ggsave("figures/naprop.pdf",grid,width=5,height=8)
rm(na_plot,prop_plot)
```
## Table alleles different between parental strains and F2s
```{r allele}
#investigation of the role of mark_allele function
#prove that some marker with non corresponding alleles between parents and F2
#keep only markers that are exlcuded with mark_allele
allele <- tab2%>%
filter(exclude_allele==1&exclude_poly==0&exclude_prop==0)
strains_allele <- strns_ref %>% filter(marker %in% allele$marker)
#join with strains genotypes to have parental strains
allele <- left_join(allele,strains_allele,by=c("marker"="marker"))
#most of markers excluded with mark_allele that were not excluded with other functions have N/H as genotype for parents
#keep only those with non missing/heterozygous genotypes
allele %<>% filter(parent1 != "N" & parent2 != "N")
allele %<>% select(marker,parent1,parent2,allele_1,allele_2,n_HM1,n_HM2,n_HT)
#keep only beggining of the table
allele <- allele[1:6,]
print(allele)
print(xtable::xtable(allele, type = "latex"), file = "tables/tab_alleles.tex",include.rownames=FALSE)
rm(allele,strains_allele)
```
## Graph number of markers kept after each function
```{r barplot}
none <- tab2 %>% nrow()
match <- tab2 %>% filter(exclude_match==0) %>% nrow()
poly <- tab2 %>% filter(exclude_match==0&exclude_poly==0) %>% nrow()
prop <- tab2 %>% filter(exclude_match==0&exclude_poly==0&exclude_prop==0) %>% nrow()
allele <- tab2 %>% filter(exclude_match==0&exclude_poly==0&exclude_prop==0&exclude_allele==0) %>% nrow()
barplot_df <- tibble(
fct = c("none","match","poly","prop","allele"),
markers = c(none, match, poly, prop, allele)
)
functions_plot <- barplot_df %>% ggplot(aes(x=markers,y=fct)) +
geom_bar(stat="identity",width=0.6) +
geom_text(aes(label=markers), hjust=1.3, color="white", size=3.5) +
scale_y_discrete(limits=c("allele", "prop", "poly","match","none")) +
theme(aspect.ratio=0.7) +
labs(title="Number of markers kept after each step",
x="Number of markers",
y="Function used") +
theme_classic() +
theme(plot.title = element_text(hjust = 0.4,face="bold",size=14))
functions_plot
ggsave("figures/functions.pdf",functions_plot,width=5,height=3)
rm(none,allele,match,poly,prop,barplot_df)
```
## Graph before after
### Before: creation of Rqtl csv file
```{r cross_before}
# filter at minima: remove non polymorphic and NA>0.5
tab_before <- mark_poly(stuart_tab)
tab_before <- mark_prop(tab_before,cross="F2",homo=0,hetero=0)
# create rqtl csv file
write_rqtl(geno=genos,pheno=phenos,tab=tab_before,ref=strns_ref,par1="parent1",par2="parent2",prefix="ind_",pos="cM_cox",path="files/cluster/cross_before.csv")
```
### Before: newmap and permutation
These objects were produced on our cluster with the following script: /files/cluster/before_after.R
### Before: plot estimated map
```{r before_map}
# import cross
cross_before <- read.cross(format="csv",file="files/cluster/cross_before.csv",
genotypes=c("0","1","2"),na.strings=c("NA"), convertXdata=TRUE)
# load rda with newmap
load("files/cluster/newmap_before.rda")
# plot
plotMap(cross_before,newmap_before,shift=TRUE)
plotmap_before <- ~plotMap(cross_before,newmap_before,shift=TRUE,main="Before stuart")
```
### Before: plot genome scan
```{r before_scan}
# load rda with perm
load("files/cluster/before_1000p.rda")
# load("files/before_1000p.rda")
# calculate thresholds
threshold_before <- summary(before_1000p,alpha=c(0.05,0.1,0.63)) #donne lod score pour risque 0.05, 0.1, 0.63
# scanone
cross_before <- calc.genoprob(cross_before, step=2.0, off.end=0.0,
error.prob=1.0E-4, map.function="haldane", stepwidth="fixed")
pheno_before <- scanone(cross=cross_before, chr=c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "X", "Y"), pheno.col="Pheno", model="normal", method="em")
summary(pheno_before)
# Plot
pheno_before_plot <- qtl_plot(pheno_before,lod=data.frame(group = c("alpha=0.05", "alpha=0.1","alpha=0.63"),
lod = threshold_before[1:3]),
ylab="LOD score",
title="QTL mapping",
x.label = element_blank(),
size=0.6) +
theme(legend.position = "none") +
ggtitle("")
pheno_before_plot
```
### After: creation of Rqtl csv file
```{r cross_after}
# filter with stuart functions: use the good data for parental strains (strains df)
tab2 <- mark_match(stuart_tab,ref=strains)
tab2 <- mark_poly(tab2)
tab2 <- mark_prop(tab2,cross="F2",homo=0.1,hetero=0.1,homo1X=c(0.1,1),homo2X=c(0.1,1),heteroX=c(0.1,1))
tab2 <- mark_allele(tab2,ref=strains,cross="F2",par1="parent1",par2="parent2")
# create rqtl csv file
write_rqtl(geno=genos,pheno=phenos,tab=tab2,ref=strains,par1="parent1",par2="parent2",prefix="ind_",pos="cM_cox",path="files/cluster/cross_after.csv")
```
### After: newmap
These objects were produced on our cluster with the following script: /files/cluster/before_after.R
### After: plot estimated map
```{r after_map}
# import cross
cross_after <- read.cross(format="csv",file="files/cluster/cross_after.csv",
genotypes=c("0","1","2"),na.strings=c("NA"), convertXdata=TRUE)
# load rda with newmap
load("files/cluster/newmap_after.rda")
# plot
plotMap(cross_after,newmap_after,shift=TRUE)
plotmap_after <- ~plotMap(cross_after,newmap_after,shift=TRUE,main="After stuart")
```
### Remove last problematic markers with mark_estmap
```{r after_estmap}
#filter with mark_estmap
tab2 <- mark_estmap(tab2,newmap_after)
# create rqtl csv file
write_rqtl(geno=genos,pheno=phenos,tab=tab2,ref=strains,par1="parent1",par2="parent2",prefix="ind_",pos="cM_cox",path="files/cluster2/cross_after.csv")
```
### After: plot estimated map 2
```{r after_map2}
# import cross
cross_after <- read.cross(format="csv",file="files/cluster2/cross_after.csv",
genotypes=c("0","1","2"),na.strings=c("NA"), convertXdata=TRUE)
# load rda with newmap
load("files/cluster2/newmap_after.rda")
# plot
plotMap(cross_after,newmap_after,shift=TRUE)
plotmap_after <- ~plotMap(cross_after,newmap_after,shift=TRUE,main="After stuart")
```
### After: plot genome scan
```{r after_scan}
# load rda with perm
load("files/cluster2/after_1000p.rda")
# load("files/after_1000p.rda")
# calculate thresholds
threshold_after <- summary(after_1000p,alpha=c(0.05,0.1,0.63)) #donne lod score pour risque 0.05, 0.1, 0.63
# scanone
cross_after <- calc.genoprob(cross_after, step=2.0, off.end=0.0,
error.prob=1.0E-4, map.function="haldane", stepwidth="fixed")
pheno_after <- scanone(cross=cross_after, chr=c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "X", "Y"), pheno.col="Pheno", model="normal", method="em")
summary(pheno_after)
# Plot
pheno_after_plot <- qtl_plot(pheno_after,lod=data.frame(group = c("alpha=0.05", "alpha=0.1","alpha=0.63"),
lod = threshold_after[1:3]),
ylab="LOD score",
title="QTL mapping",
x.label = element_blank(),
size=0.6) +
theme(legend.position = "none") +
ggtitle("")
pheno_after_plot
```
### Grid
```{r grid fig1, fig.height = 7, fig.width = 13, fig.align = "center"}
grid <- plot_grid(as_grob(plotmap_before),as_grob(plotmap_after),pheno_before_plot,pheno_after_plot,ncol=2,labels=c("A","B","C","D"),label_size=20)
grid
ggsave("figures/before_after.pdf",grid,width=11,height = 7)
```
# Differences between real and reference genotypes for parental strains
```{r dif_number}
dif <- full_join(strains,strns_ref,by=c("marker","chr","cM_cox")) %>%
mutate(dif=case_when((!parent1.x%in%c("N","H") &
!parent1.y%in%c("N","H") &
parent1.x!=parent1.y) ~ 1,
(!parent2.x%in%c("N","H") &
!parent2.y%in%c("N","H") &
parent2.x!=parent2.y) ~ 1,
T~0))
dif %>% filter(dif==1) %>% count()
```
```{r dif_table}
table_dif <- dif %>% filter(dif==1) %>% select(marker,parent1_ref=parent1.y,parent1_geno=parent1.x,parent2_ref=parent2.y,parent2_geno=parent2.x) %>% head()
knitr::kable(table_dif)
```
\ No newline at end of file
This diff is collapsed.
##################################################################
########### before_after.R
########### R script for stuart article figures
##################################################################
## Goal and data
###########################
# The goal of this script is to calculate est.map and permutations of cross_before et cross_after
#
# It imports the csv files "cross_before.csv" and "cross_after.csv"
## Import packages
library(qtl)
##############################
## Before
##############################
## read cross
cross_before <- read.cross(format="csv",file="./cross_before.csv",
genotypes=c("0","1","2"),na.strings=c("NA"), convertXdata=TRUE)
## calc geno prob
cross_before <- calc.genoprob(cross_before, step=2.0, off.end=0.0,
error.prob=1.0E-4, map.function="haldane", stepwidth="fixed")
## calculate est map
# newmap_before <- est.map(cross=cross_before,error.prob=0.01)
# save(newmap_before,file="./newmap_before.rda")
## calculate 1000p
before_1000p <- scanone(cross=cross_before,
chr=c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "X", "Y"),
pheno.col="Pheno", model="normal", method="em", n.perm=1000, perm.Xsp=FALSE, verbose=FALSE)
save(before_1000p,file="./before_1000p.rda")
##############################
## After
##############################
## read cross
cross_after <- read.cross(format="csv",file="./cross_after.csv",
genotypes=c("0","1","2"),na.strings=c("NA"), convertXdata=TRUE)
## calc geno prob
cross_after <- calc.genoprob(cross_after, step=2.0, off.end=0.0,
error.prob=1.0E-4, map.function="haldane", stepwidth="fixed")
## calculate est map
newmap_after <- est.map(cross=cross_after,error.prob=0.01)
save(newmap_after,file="./newmap_after.rda")
## calculate 1000p
after_1000p <- scanone(cross=cross_after,
chr=c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "X", "Y"),
pheno.col="Pheno", model="normal", method="em", n.perm=1000, perm.Xsp=FALSE, verbose=FALSE)
save(after_1000p,file="./after_1000p.rda")
\ No newline at end of file
Warning messages:
1: In fixXgeno.f2(cross, alleles) :
--Omitting 1120 male heterozygote genotypes on the X chromosome.
2: In fixXgeno.f2(cross, alleles) :
--There appear to be some individuals of each cross direction, but "pgm" is not provided.
Check the X chr genotype data and include a "pgm" column in the phenotype data.
"pgm" was inferred (probably poorly).
3: In summary.cross(cross) :
Some markers at the same position on chr 1,2,3,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,X; use jittermap().
Warning message:
In matchchr(chr, names(x$geno)) : Chr Y not found
Warning messages:
1: In fixXgeno.f2(cross, alleles) :
--Omitting 236 male heterozygote genotypes on the X chromosome.
2: In fixXgeno.f2(cross, alleles) :
--There appear to be some individuals of each cross direction, but "pgm" is not provided.
Check the X chr genotype data and include a "pgm" column in the phenotype data.
"pgm" was inferred (probably poorly).
3: In summary.cross(cross) :
Some markers at the same position on chr 1,2,5,6,7,8,9,10,11,12,13,14,15,17,18,19,X; use jittermap().
Warning message:
In est.map(cross = cross_after, error.prob = 0.01) : Didn't converge!
Warning message:
In matchchr(chr, names(x$geno)) : Chr Y not found
--Read the following data:
176 individuals
3323 markers
4 phenotypes
--Cross type: f2
--Read the following data:
176 individuals
2238 markers
4 phenotypes
--Cross type: f2
#!/bin/sh
#SBATCH -J "before_after"
#SBATCH --qos=normal
#SBATCH --mem=1000
#SBATCH -o before_after.out -e before_after.err
#SBATCH --mail-type=END --mail-user=mbourdon@pasteur.fr
Rscript before_after.R || exit 1
exit 0
This diff is collapsed.
This diff is collapsed.
##################################################################
########### after.R
########### R script for stuart article figures
##################################################################
## Goal and data
###########################
# The goal of this script is to calculate est.map and permutations of cross_after
#
# It imports the csv file "cross_after.csv"
## Import packages
library(qtl)
##############################
## read cross
cross_after <- read.cross(format="csv",file="./cross_after.csv",
genotypes=c("0","1","2"),na.strings=c("NA"), convertXdata=TRUE)
## calc geno prob
cross_after <- calc.genoprob(cross_after, step=2.0, off.end=0.0,
error.prob=1.0E-4, map.function="haldane", stepwidth="fixed")
## calculate est map
newmap_after <- est.map(cross=cross_after,error.prob=0.01)
save(newmap_after,file="./newmap_after.rda")
## calculate 1000p
after_1000p <- scanone(cross=cross_after,
chr=c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "X", "Y"),
pheno.col="Pheno", model="normal", method="em", n.perm=1000, perm.Xsp=FALSE, verbose=FALSE)
save(after_1000p,file="./after_1000p.rda")
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment