Commit 0c2a19f1 authored by Marie  BOURDON's avatar Marie BOURDON
Browse files

Merge branch 'develop2' into 'develop'

Develop2

See merge request !2
parents 6145aaab 8629fd3f
"0","data(stuart_tab)"
"0","summary(stuart_tab)"
"1",""
"1"," SNP.Name "
"1"," Allele_1 "
"1"," Allele_2 "
"1"," n_HM1 "
"1"," n_HM2 "
"1"," n_HT "
"1"," n_NA "
"1","
"
"1"," Length:11125 "
"1"," Length:11125 "
"1"," Length:11125 "
"1"," Min. : 0.0 "
"1"," Min. : 0.00 "
"1"," Min. : 0.00 "
"1"," Min. : 0.00 "
"1","
"
"1"," Class :character "
"1"," Class :character "
"1"," Class :character "
"1"," 1st Qu.: 44.0 "
"1"," 1st Qu.: 0.00 "
"1"," 1st Qu.: 0.00 "
"1"," 1st Qu.: 0.00 "
"1","
"
"1"," Mode :character "
"1"," Mode :character "
"1"," Mode :character "
"1"," Median :174.0 "
"1"," Median : 0.00 "
"1"," Median : 0.00 "
"1"," Median : 1.00 "
"1","
"
"1"," "
"1"," "
"1"," "
"1"," Mean :123.9 "
"1"," Mean : 19.92 "
"1"," Mean : 19.24 "
"1"," Mean : 12.91 "
"1","
"
"1"," "
"1"," "
"1"," "
"1"," 3rd Qu.:176.0 "
"1"," 3rd Qu.: 34.00 "
"1"," 3rd Qu.: 5.00 "
"1"," 3rd Qu.: 5.00 "
"1","
"
"1"," "
"1"," "
"1"," "
"1"," Max. :176.0 "
"1"," Max. :175.00 "
"1"," Max. :176.00 "
"1"," Max. :176.00 "
"1","
"
"0","tab <- mark_tab(genos)
"
"2","Error in mark_tab(genos) : impossible de trouver la fonction ""mark_tab""
"
{"frames":[],"message":"Error in mark_tab(genos) : impossible de trouver la fonction \"mark_tab\"\n"}
\ No newline at end of file
/Users/mariebourdon/stuart_package/stuart/NAMESPACE="6A276B5"
/Users/mariebourdon/stuart_package/stuart/R/geno_strains.R="1F9B28F5"
/Users/mariebourdon/stuart_package/stuart/R/genos-data.R="9943E26B"
/Users/mariebourdon/stuart_package/stuart/R/tab_mark.R="DEC9867F"
/Users/mariebourdon/stuart_package/stuart/doc/stuaRt.R="E6241391"
/Users/mariebourdon/stuart_package/stuart/vignettes/stuaRt.Rmd="4D49CCFD"
......@@ -16,14 +16,21 @@
#' @export
#'
geno_strains <- function(ref,geno,par1,par2,name1,name2){
#rename df columns
geno <- geno %>% rename("marker"=1,
"id"=2,
"allele_1"=3,
"allele_2"=4)
#recode genotypes from 2 alleles to 1
geno <- geno %>% mutate_all(as.character)
geno <- geno %>% filter(Sample.ID %in% c(par1,par2))
geno <- geno %>% mutate(Geno=case_when(Allele1...Forward == "-" | Allele2...Forward == "-" ~ "N",
Allele1...Forward == Allele2...Forward ~ Allele1...Forward,
Allele1...Forward %in% c("A","T","G","C") & Allele2...Forward %in% c("A","T","G","C") ~ "H"))
geno <- geno %>% filter(id %in% c(par1,par2))
geno <- geno %>% mutate(Geno=case_when(allele_1 == "-" | allele_2 == "-" ~ "N",
allele_1 == allele_2 ~ allele_1,
allele_1 %in% c("A","T","G","C") & allele_2 %in% c("A","T","G","C") ~ "H"))
geno <- geno %>% select(SNP.Name,Sample.ID,Geno) %>% pivot_wider(names_from = Sample.ID, values_from = Geno)
geno <- geno %>% select(marker,id,Geno) %>% pivot_wider(names_from = id, values_from = Geno)
#create consensus
......@@ -39,10 +46,10 @@ geno_strains <- function(ref,geno,par1,par2,name1,name2){
geno <- geno %>% rename(parent2=!!sym(par2[1]))
}
geno <- geno %>% select(SNP.Name,parent1,parent2)
colnames(geno) <- c("SNP.Name",name1,name2)
geno <- geno %>% select(marker,parent1,parent2)
colnames(geno) <- c("marker",name1,name2)
#merge with ref file
ref <- full_join(ref,geno,by=c("marker"="SNP.Name"))
ref <- full_join(ref,geno,by=c("marker"="marker"))
return(ref)
}
......@@ -22,21 +22,21 @@ mark_allele <- function(tab,ref,par1,par2){
#join tab and ref genotypes
ref <- ref %>% select(marker,!!sym(par1),!!sym(par2))
tab <- full_join(tab,ref,by=c("SNP.Name"="marker"))
tab <- full_join(tab,ref,by=c("marker"="marker"))
#function core
tab <- tab %>% mutate(exclude_allele = case_when(is.na(Allele_2)==FALSE &
tab <- tab %>% mutate(exclude_allele = case_when(is.na(allele_2)==FALSE &
!!sym(par1) != "N" & !!sym(par2) != "N" & !!sym(par1) != "H" & !!sym(par2) != "H" &
((Allele_1!=!!sym(par1) & Allele_1!=!!sym(par2)) | (Allele_2!=!!sym(par1) & Allele_2!=!!sym(par2))) ~ 1,
is.na(Allele_2)==FALSE &
((allele_1!=!!sym(par1) & allele_1!=!!sym(par2)) | (allele_2!=!!sym(par1) & allele_2!=!!sym(par2))) ~ 1,
is.na(allele_2)==FALSE &
(!!sym(par1)=="N" | !!sym(par2)=="N" | !!sym(par1)=="H" | !!sym(par2)=="H") &
((Allele_1!=!!sym(par1) & Allele_1!=!!sym(par2)) & (Allele_2!=!!sym(par1) & Allele_2!=!!sym(par2))) ~ 1,
is.na(Allele_2)==TRUE &
((allele_1!=!!sym(par1) & allele_1!=!!sym(par2)) & (allele_2!=!!sym(par1) & allele_2!=!!sym(par2))) ~ 1,
is.na(allele_2)==TRUE &
!!sym(par1) != "N" & !!sym(par2) != "N" & !!sym(par1) != "H" & !!sym(par2) != "H" &
(Allele_1!=!!sym(par1) | Allele_1!=!!sym(par2)) ~ 1,
is.na(Allele_2)==TRUE &
(allele_1!=!!sym(par1) | allele_1!=!!sym(par2)) ~ 1,
is.na(allele_2)==TRUE &
(!!sym(par1)=="N" | !!sym(par2)=="N" | !!sym(par1)=="H" | !!sym(par2)=="H") &
Allele_1!=!!sym(par1) & Allele_1!=!!sym(par2) ~ 1,
allele_1!=!!sym(par1) & allele_1!=!!sym(par2) ~ 1,
T ~ 0)
)
......
......@@ -13,12 +13,12 @@ mark_match <- function(tab, #tab_mark df
#finds SNPs that are in both files:
snp_strains <- as.character(ref$marker) #extracts SNPs in strains ref geno file
snp_genfile <- as.character(tab$SNP.Name) #extracts SNPs in cross geno file
snp_genfile <- as.character(tab$marker) #extracts SNPs in cross geno file
snp <- intersect(snp_strains,snp_genfile) #take intercept
#add results in exclude column
return(tab %>% mutate(exclude_match=ifelse(!SNP.Name %in% snp,
return(tab %>% mutate(exclude_match=ifelse(!marker %in% snp,
1,
0)))
......
......@@ -7,7 +7,7 @@
#'
#' @export
mark_poly <- function(tab){
return(tab %>% mutate(exclude_poly=ifelse(is.na(Allele_2)==TRUE,
return(tab %>% mutate(exclude_poly=ifelse(is.na(allele_2)==TRUE,
1,
0)))
}
#' Data frame with miniMUGA genotyping of classical lab strains.
#'
#' A dataset containing the genotypes of 10 mouse strains of the Institut pasteur. Markers positions and other information are from by Karl Broman (https://kbroman.org/MUGAarrays/mini_revisited.html). Strains genotyped from Institut Pasteur.
#'
#' @format A data frame with 11299 rows and 18 variables
#' \describe{
#' \item{CC001}{CC001 mouse strain}
#' \item{CC005}{CC005 mouse strain}
#' \item{CC042}{CC042 mouse strain}
#' \item{CC071}{CC071 mouse strain}
#' \item{Ifnar.KO.129}{Ifnar KO 129 mouse strain}
#' \item{Ifnar.KO.B6}{Ifnar KO B6 mouse strain}
#' \item{Rvfs2.1}{Rvfs2-1 mouse strain}
#' \item{Rvfs2.2}{Rvfs2-2 mouse strain}
#' \item{Rvfs2.6}{Rvfs2-6 mouse strain}
#' \item{Rvfs2.7}{Rvfs2-7 mouse strain}
#' \item{marker}{name of the marker}
#' \item{chr}{chromosome}
#' \item{bp_mm10}{localisation on chromosome in bp (mm10 assembly)}
#' \item{cM_cox}{localisation on chromosome in cM (from Cox et al.)}
#' \item{cM_g2f1}{localisation on chromosome in cM (from Liu et al.)}
#' \item{snp}{marker alleles}
#' \item{unique}{indicates if the marker maps uniquely on mm10}
#' \item{multi}{indicates if the marker maps more than one time on mm10}
#' \item{unmapped}{indicates if the marker does not map perfectly on mm10}
#' }
"ref_strains_mini"
......@@ -4,9 +4,9 @@
#'
#' @format A data frame with 11125 rows and 7 variables
#' \describe{
#' \item{SNP.Name}{name of the marker}
#' \item{Allele_1}{first allele of the marker}
#' \item{Allele_2}{second allele of the marker}
#' \item{marker}{name of the marker}
#' \item{allele_1}{first allele of the marker}
#' \item{allele_2}{second allele of the marker}
#' \item{n_HM1}{number of homozygous individuals for the first allele}
#' \item{n_HM2}{number of homozygous individuals for the second allele}
#' \item{n_HT}{number of heterozygous individuals}
......
......@@ -12,8 +12,14 @@
#### tab_mark function ####
## create table with markers and counts
tab_mark <- function(geno){
#rename df columns
geno <- geno %>% rename("marker"=1,
"id"=2,
"allele_1"=3,
"allele_2"=4)
#create geno column in geno df
geno <- geno %>% unite(Geno,c("Allele1...Forward","Allele2...Forward"),sep="",remove=FALSE)
geno <- geno %>% unite(Geno,c("allele_1","allele_2"),sep="",remove=FALSE)
#recode genotypes to have all heterozygous encoded the same way (ex: only "AT", no "TA")
geno <- geno %>% mutate(Geno=recode(Geno,
......@@ -26,9 +32,9 @@ tab_mark <- function(geno){
#create df with counts for each genotype
df_count <- tibble(SNP.Name = as.character(unique(geno$SNP.Name)),
Allele_1 = NA,
Allele_2 = NA,
df_count <- tibble(marker = as.character(unique(geno$marker)),
allele_1 = NA,
allele_2 = NA,
n_HM1 = NA,
n_HM2 = NA,
n_HT = NA,
......@@ -36,11 +42,11 @@ tab_mark <- function(geno){
## loop to count genotype
for(i in df_count$SNP.Name){
for(i in df_count$marker){
#extract alleles for each marker
Alleles <- geno %>% filter(SNP.Name==i) %>%
select(c(SNP.Name,Sample.ID,Geno,Allele1...Forward,Allele2...Forward)) %>%
pivot_longer(c(Allele1...Forward,Allele2...Forward),names_to="Allele_name",values_to="Allele") %>%
Alleles <- geno %>% filter(marker==i) %>%
select(c(marker,id,Geno,allele_1,allele_2)) %>%
pivot_longer(c(allele_1,allele_2),names_to="Allele_name",values_to="Allele") %>%
distinct(Allele) %>% filter(Allele != "-")
Alleles <- as.factor(paste(Alleles$Allele))
......@@ -52,19 +58,19 @@ tab_mark <- function(geno){
if(all(rapportools::is.empty(Alleles))==FALSE){
#add alleles to df_count
df_count <- df_count %>% mutate(Allele_1 = ifelse(SNP.Name == i,
paste(Alleles[1]), Allele_1))
df_count <- df_count %>% mutate(allele_1 = ifelse(marker == i,
paste(Alleles[1]), allele_1))
#count for homozygous for allele 1
n1 <- geno %>% filter(SNP.Name==i) %>%
n1 <- geno %>% filter(marker==i) %>%
filter(Geno == paste(Alleles[1],Alleles[1],sep="")) %>%
summarise(n=n())
#add count for homozygous for allele 1 to df_count
df_count <- df_count %>% mutate(n_HM1 = ifelse(SNP.Name == i,
df_count <- df_count %>% mutate(n_HM1 = ifelse(marker == i,
n1$n, n_HM1))
......@@ -72,55 +78,55 @@ tab_mark <- function(geno){
#if marker not polymorphic
if(is.na(Alleles[2])==TRUE){
#NA as Allele_2
df_count <- df_count %>% mutate(Allele_2 = ifelse(SNP.Name == i,
NA, Allele_2))
#NA as allele_2
df_count <- df_count %>% mutate(allele_2 = ifelse(marker == i,
NA, allele_2))
#NA as n_HM2
df_count <- df_count %>% mutate(n_HM2 = ifelse(SNP.Name == i,
df_count <- df_count %>% mutate(n_HM2 = ifelse(marker == i,
NA, n_HM2))
#NA as n_HT
df_count <- df_count %>% mutate(n_HT = ifelse(SNP.Name == i,
df_count <- df_count %>% mutate(n_HT = ifelse(marker == i,
NA, n_HT))
} else {
#add alleles to df_count
df_count <- df_count %>% mutate(Allele_2 = ifelse(SNP.Name == i,
paste(Alleles[2]), Allele_2))
df_count <- df_count %>% mutate(allele_2 = ifelse(marker == i,
paste(Alleles[2]), allele_2))
#count for homozygous for allele 2
n2 <- geno %>% filter(SNP.Name==i) %>%
n2 <- geno %>% filter(marker==i) %>%
filter(Geno == paste(Alleles[2],Alleles[2],sep="")) %>%
summarise(n=n())
#add count for homozygous for allele 1 to df_count
df_count <- df_count %>% mutate(n_HM2 = ifelse(SNP.Name == i,
df_count <- df_count %>% mutate(n_HM2 = ifelse(marker == i,
n2$n, n_HM2))
#count for heterozygous
n3 <- geno %>% filter(SNP.Name==i) %>%
n3 <- geno %>% filter(marker==i) %>%
filter(Geno == paste(Alleles[1],Alleles[2],sep="")) %>%
summarise(n=n())
#add count for homozygous for allele 1 to df_count
df_count <- df_count %>% mutate(n_HT = ifelse(SNP.Name == i,
df_count <- df_count %>% mutate(n_HT = ifelse(marker == i,
n3$n, n_HT))
}
#count for NA
n4 <- geno %>% filter(SNP.Name==i) %>%
n4 <- geno %>% filter(marker==i) %>%
filter(Geno == "--" |
Geno == paste(Alleles[1],"-",sep="") | Geno == paste(Alleles[2],"-",sep="") |
Geno == paste("-",Alleles[1],sep="") | Geno == paste("-",Alleles[2],sep="")) %>%
summarise(n=n())
#add count for NA to df_count
df_count <- df_count %>% mutate(n_NA = ifelse(SNP.Name == i,
df_count <- df_count %>% mutate(n_NA = ifelse(marker == i,
n4$n, n_NA))
}
#change class of counts as numeric :
......
......@@ -21,6 +21,12 @@
#### write_rqtl ####
## write data frame in rqtl format (csv), if path != NA writes the file in the path indicated
write_rqtl <- function(geno,pheno,tab,ref,par1,par2,prefix,pos,path=NA){
#rename df columns
geno <- geno %>% rename("marker"=1,
"id"=2,
"allele_1"=3,
"allele_2"=4)
#extract snps non excluded
if("exclude_match" %in% colnames(tab)){
tab <- tab %>% filter(exclude_match==0)
......@@ -40,7 +46,7 @@ write_rqtl <- function(geno,pheno,tab,ref,par1,par2,prefix,pos,path=NA){
#filter genotypes for non excluded markers in geno file
geno <- geno %>% select(c(SNP.Name,Sample.ID,Allele1...Forward,Allele2...Forward)) %>% filter(SNP.Name %in% tab$SNP.Name)
geno <- geno %>% select(c(marker,id,allele_1,allele_2)) %>% filter(marker %in% tab$marker)
#recode parents' names to match column names nomenclature
par1 <- make.names(par1)
......@@ -51,33 +57,33 @@ write_rqtl <- function(geno,pheno,tab,ref,par1,par2,prefix,pos,path=NA){
ref <- ref %>% select(marker,chr,!!sym(pos),!!sym(par1),!!sym(par2))
#merge genotypes with parents
geno <- left_join(geno,ref,by=c("SNP.Name"="marker"))
geno <- left_join(geno,ref,by=c("marker"="marker"))
#recode "-" in "N" in geno file
geno <- geno %>% mutate(Allele1...Forward = recode(Allele1...Forward,
geno <- geno %>% mutate(allele_1 = recode(allele_1,
"-" = "N"))
geno <- geno %>% mutate(Allele2...Forward = recode(Allele2...Forward,
geno <- geno %>% mutate(allele_2 = recode(allele_2,
"-" = "N"))
#recode geno in factors with same levels
geno <- geno %>% mutate(Allele1...Forward = factor(Allele1...Forward,levels=c("A","C","G","H","N","T")))
geno <- geno %>% mutate(Allele2...Forward = factor(Allele2...Forward,levels=c("A","C","G","H","N","T")))
geno <- geno %>% mutate(allele_1 = factor(allele_1,levels=c("A","C","G","H","N","T")))
geno <- geno %>% mutate(allele_2 = factor(allele_2,levels=c("A","C","G","H","N","T")))
#recode genotypes depending on parents' genotypes
geno <- geno %>% mutate(Geno = case_when(
#if one allele not genotyped:
Allele1...Forward=="N" | Allele2...Forward=="N" ~ "NA",
allele_1=="N" | allele_2=="N" ~ "NA",
#if both alleles genotyped
##homozygous 0
Allele1...Forward==Allele2...Forward & Allele1...Forward==!!sym(par1) ~ "0",
allele_1==allele_2 & allele_1==!!sym(par1) ~ "0",
##homozygous 2
Allele1...Forward==Allele2...Forward & Allele1...Forward==!!sym(par2) ~ "2",
allele_1==allele_2 & allele_1==!!sym(par2) ~ "2",
##heterozygous
Allele1...Forward!=Allele2...Forward ~ "1",
allele_1!=allele_2 ~ "1",
#if parental strains are N/H
##homozygous for parent that is N/H
......@@ -92,33 +98,33 @@ write_rqtl <- function(geno,pheno,tab,ref,par1,par2,prefix,pos,path=NA){
#keep positions of markers
markers <- geno %>% select(SNP.Name,chr,!!sym(pos)) %>% distinct()
markers <- geno %>% select(marker,chr,!!sym(pos)) %>% distinct()
markers <- markers %>% arrange(chr,!!sym(pos))
#keep only interesting columns in geno file
geno <- geno %>% arrange(chr,!!sym(pos))
geno <- geno %>% select(SNP.Name,Sample.ID,Geno)
geno <- geno %>% select(marker,id,Geno)
#remove prefix
geno <- geno %>% mutate(Sample.ID=str_remove(Sample.ID,prefix))
geno <- geno %>% mutate(id=str_remove(id,prefix))
#keep only non excluded markers and merge with positions
markers <- markers %>% mutate(SNP.Name=as.character(SNP.Name))
markers <- markers %>% mutate(marker=as.character(marker))
markers <- markers %>% mutate(chr=as.character(chr))
geno <- markers %>% select(SNP.Name,chr,!!sym(pos)) %>% full_join(.,geno,by="SNP.Name")
geno <- markers %>% select(marker,chr,!!sym(pos)) %>% full_join(.,geno,by="marker")
#pivoting
geno <- geno %>% pivot_wider(names_from = c(SNP.Name,chr,!!sym(pos)),values_from = Geno,names_sep=",")
geno <- geno %>% mutate(Sample.ID=as.character(Sample.ID))
geno <- geno %>% rename("Sample.ID,,"=Sample.ID)
geno <- geno %>% pivot_wider(names_from = c(marker,chr,!!sym(pos)),values_from = Geno,names_sep=",")
geno <- geno %>% mutate(id=as.character(id))
geno <- geno %>% rename("id,,"=id)
#merge with phenotype file
pheno <- pheno %>% mutate_all(as.character)
colnames(pheno) <- str_c(colnames(pheno),",,")
qtl_file <- right_join(pheno,geno,by=c("Ind,,"="Sample.ID,,"))
qtl_file <- right_join(pheno,geno,by=c("Ind,,"="id,,"))
#prepare file
qtl_file <- rbind(colnames(qtl_file),qtl_file)
......
No preview for this file type
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/ref_strains_mini-data.R
\docType{data}
\name{ref_strains_mini}
\alias{ref_strains_mini}
\title{Data frame with miniMUGA genotyping of classical lab strains.}
\format{
A data frame with 11299 rows and 18 variables
\describe{
\item{CC001}{CC001 mouse strain}
\item{CC005}{CC005 mouse strain}
\item{CC042}{CC042 mouse strain}
\item{CC071}{CC071 mouse strain}
\item{Ifnar.KO.129}{Ifnar KO 129 mouse strain}
\item{Ifnar.KO.B6}{Ifnar KO B6 mouse strain}
\item{Rvfs2.1}{Rvfs2-1 mouse strain}
\item{Rvfs2.2}{Rvfs2-2 mouse strain}
\item{Rvfs2.6}{Rvfs2-6 mouse strain}
\item{Rvfs2.7}{Rvfs2-7 mouse strain}
\item{marker}{name of the marker}
\item{chr}{chromosome}
\item{bp_mm10}{localisation on chromosome in bp (mm10 assembly)}
\item{cM_cox}{localisation on chromosome in cM (from Cox et al.)}
\item{cM_g2f1}{localisation on chromosome in cM (from Liu et al.)}
\item{snp}{marker alleles}
\item{unique}{indicates if the marker maps uniquely on mm10}
\item{multi}{indicates if the marker maps more than one time on mm10}
\item{unmapped}{indicates if the marker does not map perfectly on mm10}
}
}
\usage{
ref_strains_mini
}
\description{
A dataset containing the genotypes of 10 mouse strains of the Institut pasteur. Markers positions and other information are from by Karl Broman (https://kbroman.org/MUGAarrays/mini_revisited.html). Strains genotyped from Institut Pasteur.
}
\keyword{datasets}
No preview for this file type
No preview for this file type
......@@ -17,11 +17,11 @@ knitr::opts_chunk$set(
Marie Bourdon
April 2021
June 2021
## Goal
stuart is a R package which formats the genotyping data from MUGA arrays (Neogen) to use it in Rqtl, for backcross or F2 crosses. It allows to filter the markers in MUGA arrays that can or cannot be used for Rqtl analysis, from a genetic point of view. Indeed, markers will be selected depending on their proportion of each genotype, correspondance between F2 or N2 individuals alleles and parental strains alleles, etc.
stuart is a R package which formats results of genotyping. It was developed to analyse data from MUGA arrays (Neogen) for Rqtl analysis, for backcross or F2 crosses, but can be used to analyze data of other laboratory animal strains with other arrays. It allows to filter the markers in arrays, from a genetic point of view. Indeed, markers will be selected depending on their proportion of each genotype, correspondance between F2 or N2 individuals alleles and parental strains alleles, etc.
The examples shown here require the use of dplyr package.
......@@ -34,17 +34,24 @@ library(stuart)
## Annotation files
In order to map the markers on the genome of the individuals, you need to load a table with the position of all markers in the array. The data frame must contain the following columns: `marker` with the markers names, `chr` with the chromosome of each marker, and a column with the position of the marker on the chromosome. For Rqtl analysis, you need to provide positions in cM. The data frame can contain other columns that you judge helpful.
The developer of Rqtl and Rqtl2 packages, Karl Broman, realised that the annotation of the MUGA arrays was not correct for some markers. Thus, he produced new annotation files for MUGA, miniMUGA, megaMUGA and gigaMUGA arrays. These files contain some informations about the markers including the chromosome and position where the probe of the marker matchs on the genome, wether the marker maps uniquely or not, etc. These files also contains the genetic position of the markers calculated with two methods : "cM_cox" and "cM_g2f1" (see https://kbroman.org/MUGAarrays/mini_revisited.html for more informations).
We recommand to use these annotation files to reconstruct the file use for Rqtl analysis. You can load the datasets with these annotations from GitHub (https://github.com/kbroman/MUGAarrays/tree/master/UWisc). Choose the file corresponding to the MUGA array that you used and use the URL to load the dataset in R.
Here, we will present an example of the use of stuart with results of a F2 cross genotyped with miniMUGA. We load the result of Neogen genotyping: `genos` and thephenotype dataset produced by the lab: `phenos`. All these datasets are available for example in stuart package.
```{r annot}
annot_mini <- read.csv(url("https://raw.githubusercontent.com/kbroman/MUGAarrays/master/UWisc/mini_uwisc_v2.csv"))
```
Here, we will present an example of the use of stuart with results of a F2 cross genotyped with miniMUGA. Examples of genotypes and phenotypes dataframe are available in stuart package.
The genotype data frame must contain a first column with marker names, a second column with sample IDs, a third column with the first allele and a fourth column with the second allele. This format corresponds to the MUGA results. If your data differ, make sure to have these columns in this order.
We load the result of Neogen genotyping: `genos` (only useful columns with marker name, sample ID and alleles were kept) and the phenotype dataset produced by the lab: `phenos`.
```{r load}
data(genos)
......@@ -84,7 +91,7 @@ data(stuart_tab)
summary(stuart_tab)
```
Then we will use the different mark_* functions in order to filter the markers. First, we can use mark_match() function. This function excludes markers that are in your genotype file but not in the reference genotype dataset. We recomend using this function as the chip used for genotyping may change.
Then we will use the different mark_* functions in order to filter the markers. First, we can use `mark_match()`` function. Here, the parental strains were genotyped with the F2 individuals, but it can happen that you use previous genotyping results for the parental strains. `mark_match()` function excludes markers that are in your genotype file but not in the reference genotype dataset. We recomend using this function as the chip used for genotyping may change.
```{r mark_match}
tab2 <- mark_match(stuart_tab,ref=strains)
......@@ -95,28 +102,28 @@ tab2 %>% filter(exclude_match==1)
Here the reference strains were genotyped with the same version of the chip as the F2 individuals so no marker was excluded.
Then, we can use the mark_poly() function, which will exclude the markers that are not polymorphic.
Then, we can use the `mark_poly()` function, which will exclude the markers that are not polymorphic.
```{r mark_poly ex}
tab2 <- mark_poly(tab2)
head(tab2)
```
The mark_prop() function can be used to filter markers depending on the proportion of each genotype. Here, we have a F2 so we can use the "homo" argument in order to filter depending on the proportion of both homozygous genotype. If we have a N2, we can filter with the proportion of homozygous individuals with the "homo" argument and of heterozygous individuals with the hetero" argument. Moreover, this function allows to filter marker depending on the proportion on non genotyped animals. By defaults, markers for which more than 50% of individuals were not genotyped.
The `mark_prop()` function can be used to filter markers depending on the proportion of each genotype. Here, we have a F2 so we can use the "homo" argument in order to filter depending on the proportion of both homozygous genotype. If we have a N2, we can filter with the proportion of homozygous individuals with the "homo" argument and of heterozygous individuals with the hetero" argument. Moreover, this function allows to filter marker depending on the proportion on non genotyped animals. By defaults, markers for which more than 50% of individuals were not genotyped.
```{r mark_prop ex}
tab2 <- mark_prop(tab2,cross="F2",homo=0.1,hetero=0.1)
head(tab2)
```
Last, we can use the mark_allele(). This very helpful function excludes markers for which the alleles found in the F2/N2 individuals do not correspond to the alleles found in the parental strains. For example, if for a marker is not polymorphic in the parental strains but we found two alleles in the F2/N2 individuals, it will be excluded.
Last, we can use the `mark_allele()` function. This very helpful function excludes markers for which the alleles found in the F2/N2 individuals do not correspond to the alleles found in the parental strains. For example, if for a marker is not polymorphic in the parental strains but we found two alleles in the F2/N2 individuals, it will be excluded.