Commit 186d1d08 authored by mariefbourdon's avatar mariefbourdon
Browse files

220126 small modifs vignette

parent 94899005
{
"path" : "~/Documents/PhD/stuart_package/stuart/vignettes",
"path" : "~/Documents/PhD/stuart_package/stuart",
"sortOrder" : [
{
"ascending" : true,
......
No preview for this file type
......@@ -112,7 +112,7 @@ tab2 <- mark_estmap(tab=tab2,map=stuart_newmap,annot=annot_mini)
### Graph missing genotypes
```{r graph_NA}
na_plot <- tab2 %>% mutate(prop_NA=n_NA/180) %>% ggplot(aes(x=prop_NA)) +
na_plot <- tab2 %>% mutate(prop_NA=n_NA/176) %>% ggplot(aes(x=prop_NA)) +
geom_histogram() +
scale_y_log10() +
theme_bw() +
......@@ -125,25 +125,30 @@ na_plot <- tab2 %>% mutate(prop_NA=n_NA/180) %>% ggplot(aes(x=prop_NA)) +
na_plot
```
Proportions of markers with more than 75% of missing genotypes:
```{r prop_missing}
tab2 %>% mutate(prop_NA=n_NA/176) %>% filter(prop_NA > 0.75) %>% summarise(p=n()/count(tab2)%>%pull())
```
### Graph proportion of genotypes
```{r graph_prop}
prop_plot <- tab2 %>% filter(n_NA<88) %>%
prop_plot <- tab2 %>% filter(n_NA<88) %>% filter(!chr %in% c("M","X","Y")) %>%
ggplot(aes(x=n_HM1/(n_HM1+n_HM2+n_HT),y=n_HM2/(n_HM1+n_HM2+n_HT),color=as.factor(exclude_prop))) +
geom_point() +
scale_color_manual(values=c("#66bd63","#b2182b")) +
scale_color_manual(values=c("#66bd63","#b2182b"),labels = c("Retained", "Excluded")) +
#geom_text(aes(label=ifelse(exclude_prop=="1",SNP.Name,'')),hjust=0, vjust=0,size=2) +
labs(title="Exclusion of markers with mark_prop()",
x="Proportion of homozygous individuals AA",
y="Proportion of homozygous individuals BB",
color="Exclusion") +
theme_classic() +
theme(
aspect.ratio=0.8,
legend.position=c(0.8,0.8)) +
theme(aspect.ratio=0.8,
legend.position=c(0.8,0.8),
legend.title = element_blank()) +
theme(plot.title = element_text(hjust = 0.4,face="bold",size=14))
prop_plot
```
### Grid
......@@ -183,7 +188,12 @@ allele <- left_join(allele,strains_allele,by=c("marker"="marker"))
#most of markers excluded with mark_allele that were not excluded with other functions have N/H as genotype for parents
#keep only those with non missing/heterozygous genotypes
allele %<>% filter(parent1 != "N" & parent2 != "N")
allele %<>% select(marker,parent1,parent2,allele_1,allele_2,n_HM1,n_HM2,n_HT)
allele %<>% select(marker,parent1,parent2,allele_1,allele_2)
#number of markers in such situation
count(tab2%>%
filter(exclude_allele==1))
#keep only beggining of the table
allele <- allele[1:6,]
......@@ -297,31 +307,6 @@ pheno_before_zoom <- qtl_plot(pheno_before,lod=data.frame(group = c("alpha=0.05"
pheno_before_zoom
ggsave("figures/zoom_peak_13.pdf",pheno_before_zoom,width=3)
pheno_before_zoom <- qtl_plot(pheno_before,lod=data.frame(group = c("alpha=0.05", "alpha=0.1","alpha=0.63"),
lod = threshold_before[1:3]),
ylab="LOD score",
title="QTL mapping",
chrs = "5",
size=0.6) +
theme(legend.position = "none") +
ggtitle("")
pheno_before_zoom
ggsave("figures/zoom_peak_5.pdf",pheno_before_zoom,width=3)
pheno_before_zoom <- qtl_plot(pheno_before,lod=data.frame(group = c("alpha=0.05", "alpha=0.1","alpha=0.63"),
lod = threshold_before[1:3]),
ylab="LOD score",
title="QTL mapping",
chrs = "7",
size=0.6) +
theme(legend.position = "none") +
ggtitle("")
pheno_before_zoom
ggsave("figures/zoom_peak_7.pdf",pheno_before_zoom,width=3)
```
......@@ -331,6 +316,7 @@ ggsave("figures/zoom_peak_7.pdf",pheno_before_zoom,width=3)
# filter with stuart functions: use the good data for parental strains (strains df)
tab2 <- mark_match(stuart_tab,ref=strains)
tab2 <- mark_poly(tab2)
tab2 <- mark_na(tab2)
tab2 <- mark_prop(tab2,cross="F2",homo=0.1,hetero=0.1,homo1X=c(0.1,1),homo2X=c(0.1,1),heteroX=c(0.1,1))
tab2 <- mark_allele(tab2,ref=strains,cross="F2",par1="parent1",par2="parent2")
......@@ -438,4 +424,13 @@ dif %>% filter(dif==1) %>% count()
```{r dif_table}
table_dif <- dif %>% filter(dif==1) %>% select(marker,parent1_ref=parent1.y,parent1_geno=parent1.x,parent2_ref=parent2.y,parent2_geno=parent2.x) %>% head()
knitr::kable(table_dif)
```
\ No newline at end of file
```
# Pheno data format
```{r pheno}
format_pheno <- phenos[1:6,]
print(xtable::xtable(format_pheno, type = "latex"), file = "tables/tab_alleles.tex",include.rownames=FALSE)
```
This diff is collapsed.
% latex table generated in R 4.0.4 by xtable 1.8-4 package
% Tue Jan 4 10:53:15 2022
% Wed Jan 26 15:27:47 2022
\begin{table}[ht]
\centering
\begin{tabular}{lllllrrr}
\begin{tabular}{lllll}
\hline
marker & parent1 & parent2 & allele\_1 & allele\_2 & n\_HM1 & n\_HM2 & n\_HT \\
marker & parent1 & parent2 & allele\_1 & allele\_2 \\
\hline
S6J010381992 & T & T & T & C & 52.00 & 24.00 & 83.00 \\
S6J011498219 & T & T & T & G & 52.00 & 33.00 & 86.00 \\
S6H012742425 & G & G & A & G & 30.00 & 52.00 & 94.00 \\
mUNC010317552 & G & G & A & G & 34.00 & 67.00 & 75.00 \\
gUNC1973125 & T & T & T & C & 67.00 & 34.00 & 74.00 \\
S6J032196596 & A & A & A & G & 45.00 & 41.00 & 74.00 \\
S6J010381992 & T & T & T & C \\
S6J011498219 & T & T & T & G \\
S6H012742425 & G & G & A & G \\
mUNC010317552 & G & G & A & G \\
gUNC1973125 & T & T & T & C \\
S6J032196596 & A & A & A & G \\
\hline
\end{tabular}
\end{table}
% latex table generated in R 4.0.4 by xtable 1.8-4 package
% Wed Jan 26 16:11:09 2022
\begin{table}[ht]
\centering
\begin{tabular}{llrr}
\hline
Ind & Sex & Age & Pheno \\
\hline
201 & M & 7 & 10.53 \\
210 & M & 7 & 10.49 \\
308 & F & 7 & 10.97 \\
309 & M & 7 & 10.85 \\
310 & M & 7 & 11.07 \\
311 & F & 9 & 10.58 \\
\hline
\end{tabular}
\end{table}
......@@ -80,7 +80,7 @@ To use genotyping result for Rqtl analysis, we need to recode the genotypes of t
We recommend to always genotype the parental strains of the cross. Here, their genotypes are in the `genos` file and correspond to the Sample.ID "StrainsA_1", "StrainsA_2", "StrainsB_1" and "StrainsB_2". Two individuals were genotyped for each parental strain. The first step will be to create a consensus genotype for each strain from the two genotyped individuals. The consensus genotype will be added to the annotation dataset in order to obtain a dataset with both annotation and reference genotype of the parental strains that will be used for recoding the genotypes or the F2 individuals.
This is done with the `geno_strains` function.
This is done with the `geno_strains()` function. If parental genotypes was in another dataset than the one with second generation individuals' geotypes, from a previous genotyping result for example, this dataset would have been used in this function, indicated with the `geno` argument. If you use reference genotypes data for your parental strain, you must create a table with genotypes of parental strains and marker informations by merging the `annot_mini` table with reference genotypes table. This can be done with `merge()` or `dplyr::full_join()` functions.
```{r strains}
parental_strains <- tibble::tibble(parent1 = c("StrainsA_1","StrainsA_2"),
......@@ -93,6 +93,7 @@ head(strains) %>% print.data.frame()
```
After this step, we need to remove the genotyping result for these individuals from the `genos` dataset.
```{r no_parent}
genos <- genos %>% filter(!Sample.ID %in% c("StrainsA_1", "StrainsA_2",
"StrainsB_1","StrainsB_2"))
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment