Commit df999311 authored by Marie Bourdon's avatar Marie Bourdon
Browse files

start ignoring .Rhistory

parent 070023a0
#create geno column in geno df
geno <- geno %>% unite(Geno,c("allele_1","allele_2"),sep="",remove=FALSE)
#recode genotypes to have all heterozygous encoded the same way (ex: only "AT", no "TA")
geno <- geno %>% mutate(Geno=recode(Geno,
"TA" = "AT",
"GA" = "AG",
"CA" = "AC",
"GT" = "TG",
"CT" = "TC",
"GC" = "CG"))
#create df with counts for each genotype
df_count <- tibble(marker = as.character(unique(geno$marker)),
allele_1 = NA,
allele_2 = NA,
n_HM1 = NA,
n_HM2 = NA,
n_HT = NA,
n_NA = NA)
## loop to count genotype
for(i in df_count$marker){
#extract alleles for each marker
Alleles <- geno %>% filter(marker==i) %>%
select(c(marker,id,Geno,allele_1,allele_2)) %>%
pivot_longer(c(allele_1,allele_2),names_to="Allele_name",values_to="Allele") %>%
distinct(Allele) %>% filter(Allele != "-")
Alleles <- as.factor(paste(Alleles$Allele))
#sort alleles
Alleles <- factor(Alleles,levels=c("A","T","C","G"))
Alleles <- sort(Alleles)
#add alleles and counts, only for markers with alleles (not markers with no genotyped ind)
if(all(rapportools::is.empty(Alleles))==FALSE){
#add alleles to df_count
df_count <- df_count %>% mutate(allele_1 = ifelse(marker == i,
paste(Alleles[1]), allele_1))
#count for homozygous for allele 1
n1 <- geno %>% filter(marker==i) %>%
filter(Geno == paste(Alleles[1],Alleles[1],sep="")) %>%
summarise(n=n())
#add count for homozygous for allele 1 to df_count
df_count <- df_count %>% mutate(n_HM1 = ifelse(marker == i,
n1$n, n_HM1))
}
#if marker not polymorphic
if(is.na(Alleles[2])==TRUE){
#NA as allele_2
df_count <- df_count %>% mutate(allele_2 = ifelse(marker == i,
NA, allele_2))
#NA as n_HM2
df_count <- df_count %>% mutate(n_HM2 = ifelse(marker == i,
NA, n_HM2))
#NA as n_HT
df_count <- df_count %>% mutate(n_HT = ifelse(marker == i,
NA, n_HT))
} else {
#add alleles to df_count
df_count <- df_count %>% mutate(allele_2 = ifelse(marker == i,
paste(Alleles[2]), allele_2))
#count for homozygous for allele 2
n2 <- geno %>% filter(marker==i) %>%
filter(Geno == paste(Alleles[2],Alleles[2],sep="")) %>%
summarise(n=n())
#add count for homozygous for allele 1 to df_count
df_count <- df_count %>% mutate(n_HM2 = ifelse(marker == i,
n2$n, n_HM2))
#count for heterozygous
n3 <- geno %>% filter(marker==i) %>%
filter(Geno == paste(Alleles[1],Alleles[2],sep="")) %>%
summarise(n=n())
#add count for homozygous for allele 1 to df_count
df_count <- df_count %>% mutate(n_HT = ifelse(marker == i,
n3$n, n_HT))
}
#count for NA
n4 <- geno %>% filter(marker==i) %>%
filter(Geno == "--" |
Geno == paste(Alleles[1],"-",sep="") | Geno == paste(Alleles[2],"-",sep="") |
Geno == paste("-",Alleles[1],sep="") | Geno == paste("-",Alleles[2],sep="")) %>%
summarise(n=n())
#add count for NA to df_count
df_count <- df_count %>% mutate(n_NA = ifelse(marker == i,
n4$n, n_NA))
}
#change class of counts as numeric :
df_count$n_HM1 <- df_count$n_HM1 %>% as.numeric()
df_count$n_HM2 <- df_count$n_HM2 %>% as.numeric()
df_count$n_HT <- df_count$n_HT %>% as.numeric()
df_count$n_NA <- df_count$n_NA %>% as.numeric()
#add 0 for null counts
df_count <- df_count %>% mutate_at(.vars=vars(n_HM1,n_HM2,n_HT,n_NA),~replace(., is.na(.), 0))
#save useful columns in annot dataframe
annot <- annot %>% select(marker,chr,!!sym(pos))
print(annot)
#return
return(df_count)
}
tab_mark(genos,annot_mini,"cM_cox")
library(tidyr)
tab_mark(genos,annot_mini,"cM_cox")
tab_mark <- function(geno,annot,pos){
#rename df columns
geno <- geno %>% rename("marker"=1,
"id"=2,
"allele_1"=3,
"allele_2"=4)
#create geno column in geno df
geno <- geno %>% unite(Geno,c("allele_1","allele_2"),sep="",remove=FALSE)
#recode genotypes to have all heterozygous encoded the same way (ex: only "AT", no "TA")
geno <- geno %>% mutate(Geno=recode(Geno,
"TA" = "AT",
"GA" = "AG",
"CA" = "AC",
"GT" = "TG",
"CT" = "TC",
"GC" = "CG"))
#create df with counts for each genotype
df_count <- tibble(marker = as.character(unique(geno$marker)),
allele_1 = NA,
allele_2 = NA,
n_HM1 = NA,
n_HM2 = NA,
n_HT = NA,
n_NA = NA)
## loop to count genotype
for(i in df_count$marker){
#extract alleles for each marker
Alleles <- geno %>% filter(marker==i) %>%
select(c(marker,id,Geno,allele_1,allele_2)) %>%
pivot_longer(c(allele_1,allele_2),names_to="Allele_name",values_to="Allele") %>%
distinct(Allele) %>% filter(Allele != "-")
Alleles <- as.factor(paste(Alleles$Allele))
#sort alleles
Alleles <- factor(Alleles,levels=c("A","T","C","G"))
Alleles <- sort(Alleles)
#add alleles and counts, only for markers with alleles (not markers with no genotyped ind)
if(all(rapportools::is.empty(Alleles))==FALSE){
#add alleles to df_count
df_count <- df_count %>% mutate(allele_1 = ifelse(marker == i,
paste(Alleles[1]), allele_1))
#count for homozygous for allele 1
n1 <- geno %>% filter(marker==i) %>%
filter(Geno == paste(Alleles[1],Alleles[1],sep="")) %>%
summarise(n=n())
#add count for homozygous for allele 1 to df_count
df_count <- df_count %>% mutate(n_HM1 = ifelse(marker == i,
n1$n, n_HM1))
}
#if marker not polymorphic
if(is.na(Alleles[2])==TRUE){
#NA as allele_2
df_count <- df_count %>% mutate(allele_2 = ifelse(marker == i,
NA, allele_2))
#NA as n_HM2
df_count <- df_count %>% mutate(n_HM2 = ifelse(marker == i,
NA, n_HM2))
#NA as n_HT
df_count <- df_count %>% mutate(n_HT = ifelse(marker == i,
NA, n_HT))
} else {
#add alleles to df_count
df_count <- df_count %>% mutate(allele_2 = ifelse(marker == i,
paste(Alleles[2]), allele_2))
#count for homozygous for allele 2
n2 <- geno %>% filter(marker==i) %>%
filter(Geno == paste(Alleles[2],Alleles[2],sep="")) %>%
summarise(n=n())
#add count for homozygous for allele 1 to df_count
df_count <- df_count %>% mutate(n_HM2 = ifelse(marker == i,
n2$n, n_HM2))
#count for heterozygous
n3 <- geno %>% filter(marker==i) %>%
filter(Geno == paste(Alleles[1],Alleles[2],sep="")) %>%
summarise(n=n())
#add count for homozygous for allele 1 to df_count
df_count <- df_count %>% mutate(n_HT = ifelse(marker == i,
n3$n, n_HT))
}
#count for NA
n4 <- geno %>% filter(marker==i) %>%
filter(Geno == "--" |
Geno == paste(Alleles[1],"-",sep="") | Geno == paste(Alleles[2],"-",sep="") |
Geno == paste("-",Alleles[1],sep="") | Geno == paste("-",Alleles[2],sep="")) %>%
summarise(n=n())
#add count for NA to df_count
df_count <- df_count %>% mutate(n_NA = ifelse(marker == i,
n4$n, n_NA))
}
#change class of counts as numeric :
df_count$n_HM1 <- df_count$n_HM1 %>% as.numeric()
df_count$n_HM2 <- df_count$n_HM2 %>% as.numeric()
df_count$n_HT <- df_count$n_HT %>% as.numeric()
df_count$n_NA <- df_count$n_NA %>% as.numeric()
#add 0 for null counts
df_count <- df_count %>% mutate_at(.vars=vars(n_HM1,n_HM2,n_HT,n_NA),~replace(., is.na(.), 0))
#save useful columns in annot dataframe
annot <- annot %>% select(marker,chr,!!sym(pos))
tab <- left_join(tab,annot)
#return
return(df_count)
}
tab_mark(genos,annot_mini,"cM_cox")
tab_mark <- function(geno,annot,pos){
#rename df columns
geno <- geno %>% rename("marker"=1,
"id"=2,
"allele_1"=3,
"allele_2"=4)
#create geno column in geno df
geno <- geno %>% unite(Geno,c("allele_1","allele_2"),sep="",remove=FALSE)
#recode genotypes to have all heterozygous encoded the same way (ex: only "AT", no "TA")
geno <- geno %>% mutate(Geno=recode(Geno,
"TA" = "AT",
"GA" = "AG",
"CA" = "AC",
"GT" = "TG",
"CT" = "TC",
"GC" = "CG"))
#create df with counts for each genotype
tab <- tibble(marker = as.character(unique(geno$marker)),
allele_1 = NA,
allele_2 = NA,
n_HM1 = NA,
n_HM2 = NA,
n_HT = NA,
n_NA = NA)
## loop to count genotype
for(i in tab$marker){
#extract alleles for each marker
Alleles <- geno %>% filter(marker==i) %>%
select(c(marker,id,Geno,allele_1,allele_2)) %>%
pivot_longer(c(allele_1,allele_2),names_to="Allele_name",values_to="Allele") %>%
distinct(Allele) %>% filter(Allele != "-")
Alleles <- as.factor(paste(Alleles$Allele))
#sort alleles
Alleles <- factor(Alleles,levels=c("A","T","C","G"))
Alleles <- sort(Alleles)
#add alleles and counts, only for markers with alleles (not markers with no genotyped ind)
if(all(rapportools::is.empty(Alleles))==FALSE){
#add alleles to tab
tab <- tab %>% mutate(allele_1 = ifelse(marker == i,
paste(Alleles[1]), allele_1))
#count for homozygous for allele 1
n1 <- geno %>% filter(marker==i) %>%
filter(Geno == paste(Alleles[1],Alleles[1],sep="")) %>%
summarise(n=n())
#add count for homozygous for allele 1 to tab
tab <- tab %>% mutate(n_HM1 = ifelse(marker == i,
n1$n, n_HM1))
}
#if marker not polymorphic
if(is.na(Alleles[2])==TRUE){
#NA as allele_2
tab <- tab %>% mutate(allele_2 = ifelse(marker == i,
NA, allele_2))
#NA as n_HM2
tab <- tab %>% mutate(n_HM2 = ifelse(marker == i,
NA, n_HM2))
#NA as n_HT
tab <- tab %>% mutate(n_HT = ifelse(marker == i,
NA, n_HT))
} else {
#add alleles to tab
tab <- tab %>% mutate(allele_2 = ifelse(marker == i,
paste(Alleles[2]), allele_2))
#count for homozygous for allele 2
n2 <- geno %>% filter(marker==i) %>%
filter(Geno == paste(Alleles[2],Alleles[2],sep="")) %>%
summarise(n=n())
#add count for homozygous for allele 1 to tab
tab <- tab %>% mutate(n_HM2 = ifelse(marker == i,
n2$n, n_HM2))
#count for heterozygous
n3 <- geno %>% filter(marker==i) %>%
filter(Geno == paste(Alleles[1],Alleles[2],sep="")) %>%
summarise(n=n())
#add count for homozygous for allele 1 to tab
tab <- tab %>% mutate(n_HT = ifelse(marker == i,
n3$n, n_HT))
}
#count for NA
n4 <- geno %>% filter(marker==i) %>%
filter(Geno == "--" |
Geno == paste(Alleles[1],"-",sep="") | Geno == paste(Alleles[2],"-",sep="") |
Geno == paste("-",Alleles[1],sep="") | Geno == paste("-",Alleles[2],sep="")) %>%
summarise(n=n())
#add count for NA to tab
tab <- tab %>% mutate(n_NA = ifelse(marker == i,
n4$n, n_NA))
}
#change class of counts as numeric :
tab$n_HM1 <- tab$n_HM1 %>% as.numeric()
tab$n_HM2 <- tab$n_HM2 %>% as.numeric()
tab$n_HT <- tab$n_HT %>% as.numeric()
tab$n_NA <- tab$n_NA %>% as.numeric()
#add 0 for null counts
tab <- tab %>% mutate_at(.vars=vars(n_HM1,n_HM2,n_HT,n_NA),~replace(., is.na(.), 0))
#save useful columns in annot dataframe
annot <- annot %>% select(marker,chr,!!sym(pos))
tab <- left_join(tab,annot)
#return
return(tab)
}
tab_mark(genos,annot_mini,"cM_cox")
tab_mark <- function(geno,annot,pos){
#rename df columns
geno <- geno %>% rename("marker"=1,
"id"=2,
"allele_1"=3,
"allele_2"=4)
#create geno column in geno df
geno <- geno %>% unite(Geno,c("allele_1","allele_2"),sep="",remove=FALSE)
#recode genotypes to have all heterozygous encoded the same way (ex: only "AT", no "TA")
geno <- geno %>% mutate(Geno=recode(Geno,
"TA" = "AT",
"GA" = "AG",
"CA" = "AC",
"GT" = "TG",
"CT" = "TC",
"GC" = "CG"))
#create df with counts for each genotype
tab <- tibble(marker = as.character(unique(geno$marker)),
allele_1 = NA,
allele_2 = NA,
n_HM1 = NA,
n_HM2 = NA,
n_HT = NA,
n_NA = NA)
## loop to count genotype
for(i in tab$marker){
#extract alleles for each marker
Alleles <- geno %>% filter(marker==i) %>%
select(c(marker,id,Geno,allele_1,allele_2)) %>%
pivot_longer(c(allele_1,allele_2),names_to="Allele_name",values_to="Allele") %>%
distinct(Allele) %>% filter(Allele != "-")
Alleles <- as.factor(paste(Alleles$Allele))
#sort alleles
Alleles <- factor(Alleles,levels=c("A","T","C","G"))
Alleles <- sort(Alleles)
#add alleles and counts, only for markers with alleles (not markers with no genotyped ind)
if(all(rapportools::is.empty(Alleles))==FALSE){
#add alleles to tab
tab <- tab %>% mutate(allele_1 = ifelse(marker == i,
paste(Alleles[1]), allele_1))
#count for homozygous for allele 1
n1 <- geno %>% filter(marker==i) %>%
filter(Geno == paste(Alleles[1],Alleles[1],sep="")) %>%
summarise(n=n())
#add count for homozygous for allele 1 to tab
tab <- tab %>% mutate(n_HM1 = ifelse(marker == i,
n1$n, n_HM1))
}
#if marker not polymorphic
if(is.na(Alleles[2])==TRUE){
#NA as allele_2
tab <- tab %>% mutate(allele_2 = ifelse(marker == i,
NA, allele_2))
#NA as n_HM2
tab <- tab %>% mutate(n_HM2 = ifelse(marker == i,
NA, n_HM2))
#NA as n_HT
tab <- tab %>% mutate(n_HT = ifelse(marker == i,
NA, n_HT))
} else {
#add alleles to tab
tab <- tab %>% mutate(allele_2 = ifelse(marker == i,
paste(Alleles[2]), allele_2))
#count for homozygous for allele 2
n2 <- geno %>% filter(marker==i) %>%
filter(Geno == paste(Alleles[2],Alleles[2],sep="")) %>%
summarise(n=n())
#add count for homozygous for allele 1 to tab
tab <- tab %>% mutate(n_HM2 = ifelse(marker == i,
n2$n, n_HM2))
#count for heterozygous
n3 <- geno %>% filter(marker==i) %>%
filter(Geno == paste(Alleles[1],Alleles[2],sep="")) %>%
summarise(n=n())
#add count for homozygous for allele 1 to tab
tab <- tab %>% mutate(n_HT = ifelse(marker == i,
n3$n, n_HT))
}
#count for NA
n4 <- geno %>% filter(marker==i) %>%
filter(Geno == "--" |
Geno == paste(Alleles[1],"-",sep="") | Geno == paste(Alleles[2],"-",sep="") |
Geno == paste("-",Alleles[1],sep="") | Geno == paste("-",Alleles[2],sep="")) %>%
summarise(n=n())
#add count for NA to tab
tab <- tab %>% mutate(n_NA = ifelse(marker == i,
n4$n, n_NA))
}
#change class of counts as numeric :
tab$n_HM1 <- tab$n_HM1 %>% as.numeric()
tab$n_HM2 <- tab$n_HM2 %>% as.numeric()
tab$n_HT <- tab$n_HT %>% as.numeric()
tab$n_NA <- tab$n_NA %>% as.numeric()
#add 0 for null counts
tab <- tab %>% mutate_at(.vars=vars(n_HM1,n_HM2,n_HT,n_NA),~replace(., is.na(.), 0))
#save useful columns in annot dataframe
annot <- annot %>% select(marker,chr,!!sym(pos))
tab <- left_join(tab,annot,by="marker")
#return
return(tab)
}
tab_mark(genos,annot_mini,"cM_cox")
tab_mark <- function(geno,annot,pos){
#rename df columns
geno <- geno %>% rename("marker"=1,
"id"=2,
"allele_1"=3,
"allele_2"=4)
#create geno column in geno df
geno <- geno %>% unite(Geno,c("allele_1","allele_2"),sep="",remove=FALSE)
#recode genotypes to have all heterozygous encoded the same way (ex: only "AT", no "TA")
geno <- geno %>% mutate(Geno=recode(Geno,
"TA" = "AT",
"GA" = "AG",
"CA" = "AC",
"GT" = "TG",
"CT" = "TC",
"GC" = "CG"))
#create df with counts for each genotype
tab <- tibble(marker = as.character(unique(geno$marker)),
allele_1 = NA,
allele_2 = NA,
n_HM1 = NA,
n_HM2 = NA,
n_HT = NA,
n_NA = NA)
## loop to count genotype
for(i in tab$marker){
#extract alleles for each marker
Alleles <- geno %>% filter(marker==i) %>%
select(c(marker,id,Geno,allele_1,allele_2)) %>%
pivot_longer(c(allele_1,allele_2),names_to="Allele_name",values_to="Allele") %>%
distinct(Allele) %>% filter(Allele != "-")
Alleles <- as.factor(paste(Alleles$Allele))
#sort alleles
Alleles <- factor(Alleles,levels=c("A","T","C","G"))
Alleles <- sort(Alleles)
#add alleles and counts, only for markers with alleles (not markers with no genotyped ind)
if(all(rapportools::is.empty(Alleles))==FALSE){
#add alleles to tab
tab <- tab %>% mutate(allele_1 = ifelse(marker == i,
paste(Alleles[1]), allele_1))
#count for homozygous for allele 1
n1 <- geno %>% filter(marker==i) %>%
filter(Geno == paste(Alleles[1],Alleles[1],sep="")) %>%
summarise(n=n())
#add count for homozygous for allele 1 to tab
tab <- tab %>% mutate(n_HM1 = ifelse(marker == i,
n1$n, n_HM1))
}
#if marker not polymorphic
if(is.na(Alleles[2])==TRUE){
#NA as allele_2
tab <- tab %>% mutate(allele_2 = ifelse(marker == i,
NA, allele_2))
#NA as n_HM2
tab <- tab %>% mutate(n_HM2 = ifelse(marker == i,
NA, n_HM2))
#NA as n_HT
tab <- tab %>% mutate(n_HT = ifelse(marker == i,
NA, n_HT))
} else {
#add alleles to tab
tab <- tab %>% mutate(allele_2 = ifelse(marker == i,
paste(Alleles[2]), allele_2))
#count for homozygous for allele 2
n2 <- geno %>% filter(marker==i) %>%
filter(Geno == paste(Alleles[2],Alleles[2],sep="")) %>%
summarise(n=n())
#add count for homozygous for allele 1 to tab
tab <- tab %>% mutate(n_HM2 = ifelse(marker == i,
n2$n, n_HM2))
#count for heterozygous
n3 <- geno %>% filter(marker==i) %>%
filter(Geno == paste(Alleles[1],Alleles[2],sep="")) %>%
summarise(n=n())
#add count for homozygous for allele 1 to tab
tab <- tab %>% mutate(n_HT = ifelse(marker == i,
n3$n, n_HT))
}
#count for NA
n4 <- geno %>% filter(marker==i) %>%
filter(Geno == "--" |
Geno == paste(Alleles[1],"-",sep="") | Geno == paste(Alleles[2],"-",sep="") |
Geno == paste("-",Alleles[1],sep="") | Geno == paste("-",Alleles[2],sep="")) %>%
summarise(n=n())
#add count for NA to tab
tab <- tab %>% mutate(n_NA = ifelse(marker == i,
n4$n, n_NA))
}
#change class of counts as numeric :
tab$n_HM1 <- tab$n_HM1 %>% as.numeric()
tab$n_HM2 <- tab$n_HM2 %>% as.numeric()
tab$n_HT <- tab$n_HT %>% as.numeric()
tab$n_NA <- tab$n_NA %>% as.numeric()
#add 0 for null counts
tab <- tab %>% mutate_at(.vars=vars(n_HM1,n_HM2,n_HT,n_NA),~replace(., is.na(.), 0))
#save useful columns in annot dataframe
annot <- annot %>% select(marker,chr,!!sym(pos))
tab <- right_join(annot,tab,by="marker")
#return
return(tab)
}
tab_mark(genos,annot_mini,"cM_cox")
# how to use the function:
# stuart_tab <- tab_mark(genos,annot_mini,"cM_cox")
tab <- tab_mark(genos,annot_mini,"cM_cox")
View(tab)
View(genos)
View(tab)
......@@ -4,3 +4,4 @@ Meta
/doc/
/Meta/
.Rhistory
.Rhistory
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment