Commit d5572d89 authored by Marie Bourdon's avatar Marie Bourdon
Browse files

modif names tab_mark

parent c62185f3
"0","data(stuart_tab)"
"0","summary(stuart_tab)"
"1",""
"1"," SNP.Name "
"1"," Allele_1 "
"1"," Allele_2 "
"1"," n_HM1 "
"1"," n_HM2 "
"1"," n_HT "
"1"," n_NA "
"1","
"
"1"," Length:11125 "
"1"," Length:11125 "
"1"," Length:11125 "
"1"," Min. : 0.0 "
"1"," Min. : 0.00 "
"1"," Min. : 0.00 "
"1"," Min. : 0.00 "
"1","
"
"1"," Class :character "
"1"," Class :character "
"1"," Class :character "
"1"," 1st Qu.: 44.0 "
"1"," 1st Qu.: 0.00 "
"1"," 1st Qu.: 0.00 "
"1"," 1st Qu.: 0.00 "
"1","
"
"1"," Mode :character "
"1"," Mode :character "
"1"," Mode :character "
"1"," Median :174.0 "
"1"," Median : 0.00 "
"1"," Median : 0.00 "
"1"," Median : 1.00 "
"1","
"
"1"," "
"1"," "
"1"," "
"1"," Mean :123.9 "
"1"," Mean : 19.92 "
"1"," Mean : 19.24 "
"1"," Mean : 12.91 "
"1","
"
"1"," "
"1"," "
"1"," "
"1"," 3rd Qu.:176.0 "
"1"," 3rd Qu.: 34.00 "
"1"," 3rd Qu.: 5.00 "
"1"," 3rd Qu.: 5.00 "
"1","
"
"1"," "
"1"," "
"1"," "
"1"," Max. :176.0 "
"1"," Max. :175.00 "
"1"," Max. :176.00 "
"1"," Max. :176.00 "
"1","
"
"0","tab <- mark_tab(genos)
"
"2","Error in mark_tab(genos) : impossible de trouver la fonction ""mark_tab""
"
{"frames":[],"message":"Error in mark_tab(genos) : impossible de trouver la fonction \"mark_tab\"\n"}
\ No newline at end of file
/Users/mariebourdon/stuart_package/stuart/NAMESPACE="6A276B5"
/Users/mariebourdon/stuart_package/stuart/R/geno_strains.R="1F9B28F5"
/Users/mariebourdon/stuart_package/stuart/R/genos-data.R="9943E26B"
/Users/mariebourdon/stuart_package/stuart/R/tab_mark.R="DEC9867F"
/Users/mariebourdon/stuart_package/stuart/vignettes/stuaRt.Rmd="4D49CCFD"
......@@ -4,9 +4,9 @@
#'
#' @format A data frame with 11125 rows and 7 variables
#' \describe{
#' \item{SNP.Name}{name of the marker}
#' \item{Allele_1}{first allele of the marker}
#' \item{Allele_2}{second allele of the marker}
#' \item{marker}{name of the marker}
#' \item{allele_1}{first allele of the marker}
#' \item{allele_2}{second allele of the marker}
#' \item{n_HM1}{number of homozygous individuals for the first allele}
#' \item{n_HM2}{number of homozygous individuals for the second allele}
#' \item{n_HT}{number of heterozygous individuals}
......
......@@ -12,8 +12,14 @@
#### tab_mark function ####
## create table with markers and counts
tab_mark <- function(geno){
#rename df columns
geno <- geno %>% rename("marker"=1,
"id"=2,
"allele_1"=3,
"allele_2"=4)
#create geno column in geno df
geno <- geno %>% unite(Geno,c("Allele1...Forward","Allele2...Forward"),sep="",remove=FALSE)
geno <- geno %>% unite(Geno,c("allele_1","allele_2"),sep="",remove=FALSE)
#recode genotypes to have all heterozygous encoded the same way (ex: only "AT", no "TA")
geno <- geno %>% mutate(Geno=recode(Geno,
......@@ -26,9 +32,9 @@ tab_mark <- function(geno){
#create df with counts for each genotype
df_count <- tibble(SNP.Name = as.character(unique(geno$SNP.Name)),
Allele_1 = NA,
Allele_2 = NA,
df_count <- tibble(marker = as.character(unique(geno$marker)),
allele_1 = NA,
allele_2 = NA,
n_HM1 = NA,
n_HM2 = NA,
n_HT = NA,
......@@ -36,11 +42,11 @@ tab_mark <- function(geno){
## loop to count genotype
for(i in df_count$SNP.Name){
for(i in df_count$marker){
#extract alleles for each marker
Alleles <- geno %>% filter(SNP.Name==i) %>%
select(c(SNP.Name,Sample.ID,Geno,Allele1...Forward,Allele2...Forward)) %>%
pivot_longer(c(Allele1...Forward,Allele2...Forward),names_to="Allele_name",values_to="Allele") %>%
Alleles <- geno %>% filter(marker==i) %>%
select(c(marker,id,Geno,allele_1,allele_2)) %>%
pivot_longer(c(allele_1,allele_2),names_to="Allele_name",values_to="Allele") %>%
distinct(Allele) %>% filter(Allele != "-")
Alleles <- as.factor(paste(Alleles$Allele))
......@@ -52,19 +58,19 @@ tab_mark <- function(geno){
if(all(rapportools::is.empty(Alleles))==FALSE){
#add alleles to df_count
df_count <- df_count %>% mutate(Allele_1 = ifelse(SNP.Name == i,
paste(Alleles[1]), Allele_1))
df_count <- df_count %>% mutate(allele_1 = ifelse(marker == i,
paste(Alleles[1]), allele_1))
#count for homozygous for allele 1
n1 <- geno %>% filter(SNP.Name==i) %>%
n1 <- geno %>% filter(marker==i) %>%
filter(Geno == paste(Alleles[1],Alleles[1],sep="")) %>%
summarise(n=n())
#add count for homozygous for allele 1 to df_count
df_count <- df_count %>% mutate(n_HM1 = ifelse(SNP.Name == i,
df_count <- df_count %>% mutate(n_HM1 = ifelse(marker == i,
n1$n, n_HM1))
......@@ -72,55 +78,55 @@ tab_mark <- function(geno){
#if marker not polymorphic
if(is.na(Alleles[2])==TRUE){
#NA as Allele_2
df_count <- df_count %>% mutate(Allele_2 = ifelse(SNP.Name == i,
NA, Allele_2))
#NA as allele_2
df_count <- df_count %>% mutate(allele_2 = ifelse(marker == i,
NA, allele_2))
#NA as n_HM2
df_count <- df_count %>% mutate(n_HM2 = ifelse(SNP.Name == i,
df_count <- df_count %>% mutate(n_HM2 = ifelse(marker == i,
NA, n_HM2))
#NA as n_HT
df_count <- df_count %>% mutate(n_HT = ifelse(SNP.Name == i,
df_count <- df_count %>% mutate(n_HT = ifelse(marker == i,
NA, n_HT))
} else {
#add alleles to df_count
df_count <- df_count %>% mutate(Allele_2 = ifelse(SNP.Name == i,
paste(Alleles[2]), Allele_2))
df_count <- df_count %>% mutate(allele_2 = ifelse(marker == i,
paste(Alleles[2]), allele_2))
#count for homozygous for allele 2
n2 <- geno %>% filter(SNP.Name==i) %>%
n2 <- geno %>% filter(marker==i) %>%
filter(Geno == paste(Alleles[2],Alleles[2],sep="")) %>%
summarise(n=n())
#add count for homozygous for allele 1 to df_count
df_count <- df_count %>% mutate(n_HM2 = ifelse(SNP.Name == i,
df_count <- df_count %>% mutate(n_HM2 = ifelse(marker == i,
n2$n, n_HM2))
#count for heterozygous
n3 <- geno %>% filter(SNP.Name==i) %>%
n3 <- geno %>% filter(marker==i) %>%
filter(Geno == paste(Alleles[1],Alleles[2],sep="")) %>%
summarise(n=n())
#add count for homozygous for allele 1 to df_count
df_count <- df_count %>% mutate(n_HT = ifelse(SNP.Name == i,
df_count <- df_count %>% mutate(n_HT = ifelse(marker == i,
n3$n, n_HT))
}
#count for NA
n4 <- geno %>% filter(SNP.Name==i) %>%
n4 <- geno %>% filter(marker==i) %>%
filter(Geno == "--" |
Geno == paste(Alleles[1],"-",sep="") | Geno == paste(Alleles[2],"-",sep="") |
Geno == paste("-",Alleles[1],sep="") | Geno == paste("-",Alleles[2],sep="")) %>%
summarise(n=n())
#add count for NA to df_count
df_count <- df_count %>% mutate(n_NA = ifelse(SNP.Name == i,
df_count <- df_count %>% mutate(n_NA = ifelse(marker == i,
n4$n, n_NA))
}
#change class of counts as numeric :
......
No preview for this file type
No preview for this file type
......@@ -86,7 +86,7 @@ genos <- genos %>% filter(!Sample.ID %in% c("StrainsA_1", "StrainsA_2", "Strains
The first step of the markers sorting is to create the marker dataframe with the tab_mark() function. This dataframe contains for each marker the two alleles that can be found in the F2/N2 population (`Allele_1` and `Allele_2`), the number of individuals for each genotype (homozygous for each allele (`n_HM1` and `n_HM2`) and heterozygous (`n_HT`)), and the number of non genotyped individuals (`n_NA`) This step can take several minutes. You can also load the output of this function.
```{r tab_mark,eval=F}
```{r tab_mark}
data(stuart_tab)
summary(stuart_tab)
```
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment