Commit 99dc483f authored by Marie Bourdon's avatar Marie Bourdon
Browse files

article figures

parent adc44da6
/home/marie/Documents/stuart_package/stuart/NAMESPACE="2ECA567D"
/home/marie/Documents/stuart_package/stuart/R/geno_strains.R="AF4CFAD2"
/home/marie/Documents/stuart_package/stuart/R/genos-data.R="1CBE4D7A"
/home/marie/Documents/stuart_package/stuart/R/mark_allele.R="32D90792"
/home/marie/Documents/stuart_package/stuart/R/mark_estmap.R="2E43F0E2"
/home/marie/Documents/stuart_package/stuart/R/mark_match.R="39634C04"
/home/marie/Documents/stuart_package/stuart/R/mark_poly.R="D19CB401"
/home/marie/Documents/stuart_package/stuart/R/mark_prop.R="31DD9C22"
/home/marie/Documents/stuart_package/stuart/R/phenos-data.R="753628A1"
/home/marie/Documents/stuart_package/stuart/R/stuart_cross-data.R="2A7132AD"
/home/marie/Documents/stuart_package/stuart/R/stuart_newmap-data.R="86A2EB27"
/home/marie/Documents/stuart_package/stuart/R/stuart_tab-data.R="D77DF427"
/home/marie/Documents/stuart_package/stuart/R/tab_mark.R="4023F220"
/home/marie/Documents/stuart_package/stuart/R/write_rqtl.R="DA3DDAC1"
/home/marie/Documents/stuart_package/stuart/vignettes/stuart.Rmd="568884A2"
/home/marie/Documents/stuart/stuart_package/stuart/vignettes/stuart.Rmd="EE74038B"
/mnt/zeus/zeus_mouselab/marie/map_after/map_after.R="6CD0FCD2"
/mnt/zeus/zeus_mouselab/marie/stuart_estmap/stuart_estmap.R="E9861925"
No preview for this file type
......@@ -5,7 +5,7 @@ date: "July 2021"
output: html_document
---
## Goal and raw data
# Goal and raw data
The goal of this script is to produce figure for the stuart package manuscript.
......@@ -28,30 +28,30 @@ library(stuart)
source("files/QTL_plot.R")
```
## Data load and use of stuart functions
# Data load and use of stuart functions
### genos dataframe
## genos dataframe
Data frame from stuart with genotypes of 176 F2 individuals and parental strains.
```{r genos}
data(genos)
summary(genos)
```
### phenos dataframe
## phenos dataframe
Data frame from stuart with phenotypes of 176 F2 individuals for a quantitative trait.
```{r phenos}
data(phenos)
summary(phenos)
```
### annotation file
## annotation file
Annotation file from K.Broman: https://kbroman.org/MUGAarrays/mini_annotations.html
```{r annot}
annot_mini <- read.csv(url("https://raw.githubusercontent.com/kbroman/MUGAarrays/master/UWisc/mini_uwisc_v2.csv"))
summary(annot_mini)
```
### parental strains genotypes
## parental strains genotypes
2 genotypes tables will be used:
......@@ -91,7 +91,7 @@ strns_ref <- strns_ref %>% select(name,parent1,parent2) %>% right_join(annot_min
summary(strns_ref)
```
### Use of stuart's functions
## Use of stuart's functions
```{r}
data(stuart_tab)
summary(stuart_tab)
......@@ -99,9 +99,304 @@ summary(stuart_tab)
tab2 <- mark_match(stuart_tab,ref=strns_ref)
tab2 <- mark_poly(tab2)
tab2 <- mark_prop(tab2,cross="F2",homo=0.1,hetero=0.1,homo1X=c(0.1,1),homo2X=c(0.1,1),heteroX=c(0.1,1))
tab2 <- mark_allele(tab2,ref=strns_ref,par1="parent1",par2="parent2")
tab2 <- mark_allele(tab2,ref=strns_ref,cross="F2",par1="parent1",par2="parent2")
data(stuart_newmap)
tab2 <- mark_estmap(tab2,map=stuart_newmap,dist=100)
```
# Graphs
## Grid mark_prop
### Graph missing genotypes
```{r graph_NA}
na_plot <- tab2 %>% mutate(prop_NA=n_NA/180) %>% ggplot(aes(x=prop_NA)) +
geom_histogram() +
scale_y_log10() +
theme_bw() +
labs(title="Proportion of missing genotyped",
x="Proportion of NA",y="Number of markers") +
theme(
aspect.ratio=0.8,
plot.title = element_text(hjust = 0.4,face="bold",size=14))
na_plot
```
### Graph proportion of genotypes
```{r graph_prop}
prop_plot <- tab2 %>% filter(n_NA<88) %>% ggplot(aes(x=n_HM1/(n_HM1+n_HM2+n_HT),y=n_HM2/(n_HM1+n_HM2+n_HT),color=as.factor(exclude_prop))) +
geom_point() +
scale_color_manual(values=c("#66bd63","#b2182b")) +
#geom_text(aes(label=ifelse(exclude_prop=="1",SNP.Name,'')),hjust=0, vjust=0,size=2) +
labs(title="Exclusion of markers with mark_prop()",
x="Proportion of homozygous individuals AA",
y="Proportion of homozygous individuals BB",
color="Exclusion") +
theme_classic() +
theme(
aspect.ratio=0.8,
legend.position=c(0.8,0.8)) +
theme(plot.title = element_text(hjust = 0.4,face="bold",size=14))
prop_plot
```
### Grid
Figure with graphs proportion of NA and exclusion depending on genotype proportions
```{r grid_na_prop}
grid <- plot_grid(na_plot,prop_plot,
ncol=1,
labels = c('A', 'B'),
label_size = 20,
byrow = TRUE
)
grid
ggsave("figures/naprop.pdf",grid,width=5,height=8)
rm(na_plot,prop_plot)
```
## Table alleles different between parental strains and F2s
```{r allele}
#investigation of the role of mark_allele function
#prove that some marker with non corresponding alleles between parents and F2
#keep only markers that are exlcuded with mark_allele
allele <- tab2%>%
filter(exclude_allele==1&exclude_poly==0&exclude_prop==0)
strains_allele <- strns_ref %>% filter(marker %in% allele$marker)
#join with strains genotypes to have parental strains
allele <- left_join(allele,strains_allele,by=c("marker"="marker"))
#most of markers excluded with mark_allele that were not excluded with other functions have N/H as genotype for parents
#keep only those with non missing/heterozygous genotypes
allele %<>% filter(parent1 != "N" & parent2 != "N")
allele %<>% select(marker,parent1,parent2,allele_1,allele_2,n_HM1,n_HM2,n_HT)
#keep only beggining of the table
allele <- allele[1:6,]
print(allele)
print(xtable::xtable(allele, type = "latex"), file = "tables/tab_alleles.tex",include.rownames=FALSE)
rm(allele,strains_allele)
```
## Graph number of markers kept after each function
```{r barplot}
none <- tab2 %>% nrow()
match <- tab2 %>% filter(exclude_match==0) %>% nrow()
poly <- tab2 %>% filter(exclude_match==0&exclude_poly==0) %>% nrow()
prop <- tab2 %>% filter(exclude_match==0&exclude_poly==0&exclude_prop==0) %>% nrow()
allele <- tab2 %>% filter(exclude_match==0&exclude_poly==0&exclude_prop==0&exclude_allele==0) %>% nrow()
barplot_df <- tibble(
fct = c("none","match","poly","prop","allele"),
markers = c(none, match, poly, prop, allele)
)
functions_plot <- barplot_df %>% ggplot(aes(x=markers,y=fct)) +
geom_bar(stat="identity",width=0.6) +
geom_text(aes(label=markers), hjust=1.3, color="white", size=3.5) +
scale_y_discrete(limits=c("allele", "prop", "poly","match","none")) +
theme(aspect.ratio=0.7) +
labs(title="Number of markers kept after each step",
x="Number of markers",
y="Function used") +
theme_classic() +
theme(plot.title = element_text(hjust = 0.4,face="bold",size=14))
functions_plot
ggsave("figures/functions.pdf",functions_plot,width=5,height=3)
rm(none,allele,match,poly,prop,barplot_df)
```
## Graph before after
### Before: creation of Rqtl csv file
```{r cross_before}
# filter at minima: remove non polymorphic and NA>0.5
tab_before <- mark_poly(stuart_tab)
tab_before <- mark_prop(tab_before,cross="F2",homo=0,hetero=0)
# create rqtl csv file
write_rqtl(geno=genos,pheno=phenos,tab=tab_before,ref=strns_ref,par1="parent1",par2="parent2",prefix="ind_",pos="cM_cox",path="files/cluster/cross_before.csv")
```
### Before: newmap and permutation
These objects were produced on our cluster with the following script: /files/cluster/before_after.R
### Before: plot estimated map
```{r before_map}
# import cross
cross_before <- read.cross(format="csv",file="files/cluster/cross_before.csv",
genotypes=c("0","1","2"),na.strings=c("NA"), convertXdata=TRUE)
# load rda with newmap
load("files/cluster/newmap_before.rda")
# plot
plotMap(cross_before,newmap_before,shift=TRUE)
plotmap_before <- ~plotMap(cross_before,newmap_before,shift=TRUE,main="Before stuart")
```
### Before: plot genome scan
```{r before_scan}
# load rda with perm
load("files/cluster/before_1000p.rda")
# load("files/before_1000p.rda")
# calculate thresholds
threshold_before <- summary(before_1000p,alpha=c(0.05,0.1,0.63)) #donne lod score pour risque 0.05, 0.1, 0.63
# scanone
cross_before <- calc.genoprob(cross_before, step=2.0, off.end=0.0,
error.prob=1.0E-4, map.function="haldane", stepwidth="fixed")
pheno_before <- scanone(cross=cross_before, chr=c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "X", "Y"), pheno.col="Pheno", model="normal", method="em")
summary(pheno_before)
# Plot
pheno_before_plot <- qtl_plot(pheno_before,lod=data.frame(group = c("alpha=0.05", "alpha=0.1","alpha=0.63"),
lod = threshold_before[1:3]),
ylab="LOD score",
title="QTL mapping",
x.label = element_blank(),
size=0.6) +
theme(legend.position = "none") +
ggtitle("")
pheno_before_plot
```
### After: creation of Rqtl csv file
```{r cross_after}
# filter with stuart functions: use the good data for parental strains (strains df)
tab2 <- mark_match(stuart_tab,ref=strains)
tab2 <- mark_poly(tab2)
tab2 <- mark_prop(tab2,cross="F2",homo=0.1,hetero=0.1,homo1X=c(0.1,1),homo2X=c(0.1,1),heteroX=c(0.1,1))
tab2 <- mark_allele(tab2,ref=strains,cross="F2",par1="parent1",par2="parent2")
# create rqtl csv file
write_rqtl(geno=genos,pheno=phenos,tab=tab2,ref=strains,par1="parent1",par2="parent2",prefix="ind_",pos="cM_cox",path="files/cluster/cross_after.csv")
```
### After: newmap
These objects were produced on our cluster with the following script: /files/cluster/before_after.R
### After: plot estimated map
```{r after_map}
# import cross
cross_after <- read.cross(format="csv",file="files/cluster/cross_after.csv",
genotypes=c("0","1","2"),na.strings=c("NA"), convertXdata=TRUE)
# load rda with newmap
load("files/cluster/newmap_after.rda")
# plot
plotMap(cross_after,newmap_after,shift=TRUE)
plotmap_after <- ~plotMap(cross_after,newmap_after,shift=TRUE,main="After stuart")
```
### Remove last problematic markers with mark_estmap
```{r after_estmap}
#filter with mark_estmap
tab2 <- mark_estmap(tab2,newmap_after)
# create rqtl csv file
write_rqtl(geno=genos,pheno=phenos,tab=tab2,ref=strains,par1="parent1",par2="parent2",prefix="ind_",pos="cM_cox",path="files/cluster2/cross_after.csv")
```
### After: plot estimated map 2
```{r after_map2}
# import cross
cross_after <- read.cross(format="csv",file="files/cluster2/cross_after.csv",
genotypes=c("0","1","2"),na.strings=c("NA"), convertXdata=TRUE)
# load rda with newmap
load("files/cluster2/newmap_after.rda")
# plot
plotMap(cross_after,newmap_after,shift=TRUE)
plotmap_after <- ~plotMap(cross_after,newmap_after,shift=TRUE,main="After stuart")
```
### After: plot genome scan
```{r after_scan}
# load rda with perm
load("files/cluster2/after_1000p.rda")
# load("files/after_1000p.rda")
# calculate thresholds
threshold_after <- summary(after_1000p,alpha=c(0.05,0.1,0.63)) #donne lod score pour risque 0.05, 0.1, 0.63
# scanone
cross_after <- calc.genoprob(cross_after, step=2.0, off.end=0.0,
error.prob=1.0E-4, map.function="haldane", stepwidth="fixed")
pheno_after <- scanone(cross=cross_after, chr=c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "X", "Y"), pheno.col="Pheno", model="normal", method="em")
summary(pheno_after)
# Plot
pheno_after_plot <- qtl_plot(pheno_after,lod=data.frame(group = c("alpha=0.05", "alpha=0.1","alpha=0.63"),
lod = threshold_after[1:3]),
ylab="LOD score",
title="QTL mapping",
x.label = element_blank(),
size=0.6) +
theme(legend.position = "none") +
ggtitle("")
pheno_after_plot
```
### Grid
```{r grid fig1, fig.height = 7, fig.width = 13, fig.align = "center"}
grid <- plot_grid(as_grob(plotmap_before),as_grob(plotmap_after),pheno_before_plot,pheno_after_plot,ncol=2,labels=c("A","B","C","D"),label_size=20)
grid
ggsave("figures/before_after.pdf",grid,width=11,height = 7)
```
# Differences between real and reference genotypes for parental strains
```{r dif_number}
dif <- full_join(strains,strns_ref,by=c("marker","chr","cM_cox")) %>%
mutate(dif=case_when((!parent1.x%in%c("N","H") &
!parent1.y%in%c("N","H") &
parent1.x!=parent1.y) ~ 1,
(!parent2.x%in%c("N","H") &
!parent2.y%in%c("N","H") &
parent2.x!=parent2.y) ~ 1,
T~0))
dif %>% filter(dif==1) %>% count()
```
```{r dif_table}
table_dif <- dif %>% filter(dif==1) %>% select(marker,parent1_ref=parent1.y,parent1_geno=parent1.x,parent2_ref=parent2.y,parent2_geno=parent2.x) %>% head()
knitr::kable(table_dif)
```
\ No newline at end of file
This diff is collapsed.
##################################################################
########### before_after.R
########### R script for stuart article figures
##################################################################
## Goal and data
###########################
# The goal of this script is to calculate est.map and permutations of cross_before et cross_after
#
# It imports the csv files "cross_before.csv" and "cross_after.csv"
## Import packages
library(qtl)
##############################
## Before
##############################
## read cross
cross_before <- read.cross(format="csv",file="./cross_before.csv",
genotypes=c("0","1","2"),na.strings=c("NA"), convertXdata=TRUE)
## calc geno prob
cross_before <- calc.genoprob(cross_before, step=2.0, off.end=0.0,
error.prob=1.0E-4, map.function="haldane", stepwidth="fixed")
## calculate est map
# newmap_before <- est.map(cross=cross_before,error.prob=0.01)
# save(newmap_before,file="./newmap_before.rda")
## calculate 1000p
before_1000p <- scanone(cross=cross_before,
chr=c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "X", "Y"),
pheno.col="Pheno", model="normal", method="em", n.perm=1000, perm.Xsp=FALSE, verbose=FALSE)
save(before_1000p,file="./before_1000p.rda")
##############################
## After
##############################
## read cross
cross_after <- read.cross(format="csv",file="./cross_after.csv",
genotypes=c("0","1","2"),na.strings=c("NA"), convertXdata=TRUE)
## calc geno prob
cross_after <- calc.genoprob(cross_after, step=2.0, off.end=0.0,
error.prob=1.0E-4, map.function="haldane", stepwidth="fixed")
## calculate est map
newmap_after <- est.map(cross=cross_after,error.prob=0.01)
save(newmap_after,file="./newmap_after.rda")
## calculate 1000p
after_1000p <- scanone(cross=cross_after,
chr=c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "X", "Y"),
pheno.col="Pheno", model="normal", method="em", n.perm=1000, perm.Xsp=FALSE, verbose=FALSE)
save(after_1000p,file="./after_1000p.rda")
\ No newline at end of file
Warning messages:
1: In fixXgeno.f2(cross, alleles) :
--Omitting 1120 male heterozygote genotypes on the X chromosome.
2: In fixXgeno.f2(cross, alleles) :
--There appear to be some individuals of each cross direction, but "pgm" is not provided.
Check the X chr genotype data and include a "pgm" column in the phenotype data.
"pgm" was inferred (probably poorly).
3: In summary.cross(cross) :
Some markers at the same position on chr 1,2,3,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,X; use jittermap().
Warning message:
In matchchr(chr, names(x$geno)) : Chr Y not found
Warning messages:
1: In fixXgeno.f2(cross, alleles) :
--Omitting 236 male heterozygote genotypes on the X chromosome.
2: In fixXgeno.f2(cross, alleles) :
--There appear to be some individuals of each cross direction, but "pgm" is not provided.
Check the X chr genotype data and include a "pgm" column in the phenotype data.
"pgm" was inferred (probably poorly).
3: In summary.cross(cross) :
Some markers at the same position on chr 1,2,5,6,7,8,9,10,11,12,13,14,15,17,18,19,X; use jittermap().
Warning message:
In est.map(cross = cross_after, error.prob = 0.01) : Didn't converge!
Warning message:
In matchchr(chr, names(x$geno)) : Chr Y not found
--Read the following data:
176 individuals
3323 markers
4 phenotypes
--Cross type: f2
--Read the following data:
176 individuals
2238 markers
4 phenotypes
--Cross type: f2
#!/bin/sh
#SBATCH -J "before_after"
#SBATCH --qos=normal
#SBATCH --mem=1000
#SBATCH -o before_after.out -e before_after.err
#SBATCH --mail-type=END --mail-user=mbourdon@pasteur.fr
Rscript before_after.R || exit 1
exit 0
This diff is collapsed.
This diff is collapsed.
##################################################################
########### after.R
########### R script for stuart article figures
##################################################################
## Goal and data
###########################
# The goal of this script is to calculate est.map and permutations of cross_after
#
# It imports the csv file "cross_after.csv"
## Import packages
library(qtl)
##############################
## read cross
cross_after <- read.cross(format="csv",file="./cross_after.csv",
genotypes=c("0","1","2"),na.strings=c("NA"), convertXdata=TRUE)
## calc geno prob
cross_after <- calc.genoprob(cross_after, step=2.0, off.end=0.0,
error.prob=1.0E-4, map.function="haldane", stepwidth="fixed")
## calculate est map
newmap_after <- est.map(cross=cross_after,error.prob=0.01)
save(newmap_after,file="./newmap_after.rda")
## calculate 1000p
after_1000p <- scanone(cross=cross_after,
chr=c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "X", "Y"),
pheno.col="Pheno", model="normal", method="em", n.perm=1000, perm.Xsp=FALSE, verbose=FALSE)
save(after_1000p,file="./after_1000p.rda")
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment