diff --git a/.Rhistory b/.Rhistory
deleted file mode 100644
index a6361541dd90ae181bc81517277060a6416e03cd..0000000000000000000000000000000000000000
--- a/.Rhistory
+++ /dev/null
@@ -1,512 +0,0 @@
-#create geno column in geno df
-geno <- geno %>% unite(Geno,c("allele_1","allele_2"),sep="",remove=FALSE)
-#recode genotypes to have all heterozygous encoded the same way (ex: only "AT", no "TA")
-geno <- geno %>% mutate(Geno=recode(Geno,
-"TA" = "AT",
-"GA" = "AG",
-"CA" = "AC",
-"GT" = "TG",
-"CT" = "TC",
-"GC" = "CG"))
-#create df with counts for each genotype
-df_count <- tibble(marker = as.character(unique(geno$marker)),
-allele_1 = NA,
-allele_2 = NA,
-n_HM1 = NA,
-n_HM2 = NA,
-n_HT = NA,
-n_NA = NA)
-## loop to count genotype
-for(i in df_count$marker){
-#extract alleles for each marker
-Alleles <- geno %>% filter(marker==i) %>%
-select(c(marker,id,Geno,allele_1,allele_2)) %>%
-pivot_longer(c(allele_1,allele_2),names_to="Allele_name",values_to="Allele") %>%
-distinct(Allele) %>% filter(Allele != "-")
-Alleles <- as.factor(paste(Alleles$Allele))
-#sort alleles
-Alleles <- factor(Alleles,levels=c("A","T","C","G"))
-Alleles <- sort(Alleles)
-#add alleles and counts, only for markers with alleles (not markers with no genotyped ind)
-if(all(rapportools::is.empty(Alleles))==FALSE){
-#add alleles to df_count
-df_count <- df_count %>% mutate(allele_1 = ifelse(marker == i,
-paste(Alleles[1]), allele_1))
-#count for homozygous for allele 1
-n1 <- geno %>% filter(marker==i) %>%
-filter(Geno == paste(Alleles[1],Alleles[1],sep="")) %>%
-summarise(n=n())
-#add count for homozygous for allele 1 to df_count
-df_count <- df_count %>% mutate(n_HM1 = ifelse(marker == i,
-n1$n, n_HM1))
-}
-#if marker not polymorphic
-if(is.na(Alleles[2])==TRUE){
-#NA as allele_2
-df_count <- df_count %>% mutate(allele_2 = ifelse(marker == i,
-NA, allele_2))
-#NA as n_HM2
-df_count <- df_count %>% mutate(n_HM2 = ifelse(marker == i,
-NA, n_HM2))
-#NA as n_HT
-df_count <- df_count %>% mutate(n_HT = ifelse(marker == i,
-NA, n_HT))
-} else {
-#add alleles to df_count
-df_count <- df_count %>% mutate(allele_2 = ifelse(marker == i,
-paste(Alleles[2]), allele_2))
-#count for homozygous for allele 2
-n2 <- geno %>% filter(marker==i) %>%
-filter(Geno == paste(Alleles[2],Alleles[2],sep="")) %>%
-summarise(n=n())
-#add count for homozygous for allele 1 to df_count
-df_count <- df_count %>% mutate(n_HM2 = ifelse(marker == i,
-n2$n, n_HM2))
-#count for heterozygous
-n3 <- geno %>% filter(marker==i) %>%
-filter(Geno == paste(Alleles[1],Alleles[2],sep="")) %>%
-summarise(n=n())
-#add count for homozygous for allele 1 to df_count
-df_count <- df_count %>% mutate(n_HT = ifelse(marker == i,
-n3$n, n_HT))
-}
-#count for NA
-n4 <- geno %>% filter(marker==i) %>%
-filter(Geno == "--" |
-Geno == paste(Alleles[1],"-",sep="") | Geno == paste(Alleles[2],"-",sep="") |
-Geno == paste("-",Alleles[1],sep="") | Geno == paste("-",Alleles[2],sep="")) %>%
-summarise(n=n())
-#add count for NA to df_count
-df_count <- df_count %>% mutate(n_NA = ifelse(marker == i,
-n4$n, n_NA))
-}
-#change class of counts as numeric :
-df_count$n_HM1 <- df_count$n_HM1 %>% as.numeric()
-df_count$n_HM2 <- df_count$n_HM2 %>% as.numeric()
-df_count$n_HT <- df_count$n_HT %>% as.numeric()
-df_count$n_NA <- df_count$n_NA %>% as.numeric()
-#add 0 for null counts
-df_count <- df_count %>% mutate_at(.vars=vars(n_HM1,n_HM2,n_HT,n_NA),~replace(., is.na(.), 0))
-#save useful columns in annot dataframe
-annot <- annot %>% select(marker,chr,!!sym(pos))
-print(annot)
-#return
-return(df_count)
-}
-tab_mark(genos,annot_mini,"cM_cox")
-library(tidyr)
-tab_mark(genos,annot_mini,"cM_cox")
-tab_mark <- function(geno,annot,pos){
-#rename df columns
-geno <- geno %>% rename("marker"=1,
-"id"=2,
-"allele_1"=3,
-"allele_2"=4)
-#create geno column in geno df
-geno <- geno %>% unite(Geno,c("allele_1","allele_2"),sep="",remove=FALSE)
-#recode genotypes to have all heterozygous encoded the same way (ex: only "AT", no "TA")
-geno <- geno %>% mutate(Geno=recode(Geno,
-"TA" = "AT",
-"GA" = "AG",
-"CA" = "AC",
-"GT" = "TG",
-"CT" = "TC",
-"GC" = "CG"))
-#create df with counts for each genotype
-df_count <- tibble(marker = as.character(unique(geno$marker)),
-allele_1 = NA,
-allele_2 = NA,
-n_HM1 = NA,
-n_HM2 = NA,
-n_HT = NA,
-n_NA = NA)
-## loop to count genotype
-for(i in df_count$marker){
-#extract alleles for each marker
-Alleles <- geno %>% filter(marker==i) %>%
-select(c(marker,id,Geno,allele_1,allele_2)) %>%
-pivot_longer(c(allele_1,allele_2),names_to="Allele_name",values_to="Allele") %>%
-distinct(Allele) %>% filter(Allele != "-")
-Alleles <- as.factor(paste(Alleles$Allele))
-#sort alleles
-Alleles <- factor(Alleles,levels=c("A","T","C","G"))
-Alleles <- sort(Alleles)
-#add alleles and counts, only for markers with alleles (not markers with no genotyped ind)
-if(all(rapportools::is.empty(Alleles))==FALSE){
-#add alleles to df_count
-df_count <- df_count %>% mutate(allele_1 = ifelse(marker == i,
-paste(Alleles[1]), allele_1))
-#count for homozygous for allele 1
-n1 <- geno %>% filter(marker==i) %>%
-filter(Geno == paste(Alleles[1],Alleles[1],sep="")) %>%
-summarise(n=n())
-#add count for homozygous for allele 1 to df_count
-df_count <- df_count %>% mutate(n_HM1 = ifelse(marker == i,
-n1$n, n_HM1))
-}
-#if marker not polymorphic
-if(is.na(Alleles[2])==TRUE){
-#NA as allele_2
-df_count <- df_count %>% mutate(allele_2 = ifelse(marker == i,
-NA, allele_2))
-#NA as n_HM2
-df_count <- df_count %>% mutate(n_HM2 = ifelse(marker == i,
-NA, n_HM2))
-#NA as n_HT
-df_count <- df_count %>% mutate(n_HT = ifelse(marker == i,
-NA, n_HT))
-} else {
-#add alleles to df_count
-df_count <- df_count %>% mutate(allele_2 = ifelse(marker == i,
-paste(Alleles[2]), allele_2))
-#count for homozygous for allele 2
-n2 <- geno %>% filter(marker==i) %>%
-filter(Geno == paste(Alleles[2],Alleles[2],sep="")) %>%
-summarise(n=n())
-#add count for homozygous for allele 1 to df_count
-df_count <- df_count %>% mutate(n_HM2 = ifelse(marker == i,
-n2$n, n_HM2))
-#count for heterozygous
-n3 <- geno %>% filter(marker==i) %>%
-filter(Geno == paste(Alleles[1],Alleles[2],sep="")) %>%
-summarise(n=n())
-#add count for homozygous for allele 1 to df_count
-df_count <- df_count %>% mutate(n_HT = ifelse(marker == i,
-n3$n, n_HT))
-}
-#count for NA
-n4 <- geno %>% filter(marker==i) %>%
-filter(Geno == "--" |
-Geno == paste(Alleles[1],"-",sep="") | Geno == paste(Alleles[2],"-",sep="") |
-Geno == paste("-",Alleles[1],sep="") | Geno == paste("-",Alleles[2],sep="")) %>%
-summarise(n=n())
-#add count for NA to df_count
-df_count <- df_count %>% mutate(n_NA = ifelse(marker == i,
-n4$n, n_NA))
-}
-#change class of counts as numeric :
-df_count$n_HM1 <- df_count$n_HM1 %>% as.numeric()
-df_count$n_HM2 <- df_count$n_HM2 %>% as.numeric()
-df_count$n_HT <- df_count$n_HT %>% as.numeric()
-df_count$n_NA <- df_count$n_NA %>% as.numeric()
-#add 0 for null counts
-df_count <- df_count %>% mutate_at(.vars=vars(n_HM1,n_HM2,n_HT,n_NA),~replace(., is.na(.), 0))
-#save useful columns in annot dataframe
-annot <- annot %>% select(marker,chr,!!sym(pos))
-tab <- left_join(tab,annot)
-#return
-return(df_count)
-}
-tab_mark(genos,annot_mini,"cM_cox")
-tab_mark <- function(geno,annot,pos){
-#rename df columns
-geno <- geno %>% rename("marker"=1,
-"id"=2,
-"allele_1"=3,
-"allele_2"=4)
-#create geno column in geno df
-geno <- geno %>% unite(Geno,c("allele_1","allele_2"),sep="",remove=FALSE)
-#recode genotypes to have all heterozygous encoded the same way (ex: only "AT", no "TA")
-geno <- geno %>% mutate(Geno=recode(Geno,
-"TA" = "AT",
-"GA" = "AG",
-"CA" = "AC",
-"GT" = "TG",
-"CT" = "TC",
-"GC" = "CG"))
-#create df with counts for each genotype
-tab <- tibble(marker = as.character(unique(geno$marker)),
-allele_1 = NA,
-allele_2 = NA,
-n_HM1 = NA,
-n_HM2 = NA,
-n_HT = NA,
-n_NA = NA)
-## loop to count genotype
-for(i in tab$marker){
-#extract alleles for each marker
-Alleles <- geno %>% filter(marker==i) %>%
-select(c(marker,id,Geno,allele_1,allele_2)) %>%
-pivot_longer(c(allele_1,allele_2),names_to="Allele_name",values_to="Allele") %>%
-distinct(Allele) %>% filter(Allele != "-")
-Alleles <- as.factor(paste(Alleles$Allele))
-#sort alleles
-Alleles <- factor(Alleles,levels=c("A","T","C","G"))
-Alleles <- sort(Alleles)
-#add alleles and counts, only for markers with alleles (not markers with no genotyped ind)
-if(all(rapportools::is.empty(Alleles))==FALSE){
-#add alleles to tab
-tab <- tab %>% mutate(allele_1 = ifelse(marker == i,
-paste(Alleles[1]), allele_1))
-#count for homozygous for allele 1
-n1 <- geno %>% filter(marker==i) %>%
-filter(Geno == paste(Alleles[1],Alleles[1],sep="")) %>%
-summarise(n=n())
-#add count for homozygous for allele 1 to tab
-tab <- tab %>% mutate(n_HM1 = ifelse(marker == i,
-n1$n, n_HM1))
-}
-#if marker not polymorphic
-if(is.na(Alleles[2])==TRUE){
-#NA as allele_2
-tab <- tab %>% mutate(allele_2 = ifelse(marker == i,
-NA, allele_2))
-#NA as n_HM2
-tab <- tab %>% mutate(n_HM2 = ifelse(marker == i,
-NA, n_HM2))
-#NA as n_HT
-tab <- tab %>% mutate(n_HT = ifelse(marker == i,
-NA, n_HT))
-} else {
-#add alleles to tab
-tab <- tab %>% mutate(allele_2 = ifelse(marker == i,
-paste(Alleles[2]), allele_2))
-#count for homozygous for allele 2
-n2 <- geno %>% filter(marker==i) %>%
-filter(Geno == paste(Alleles[2],Alleles[2],sep="")) %>%
-summarise(n=n())
-#add count for homozygous for allele 1 to tab
-tab <- tab %>% mutate(n_HM2 = ifelse(marker == i,
-n2$n, n_HM2))
-#count for heterozygous
-n3 <- geno %>% filter(marker==i) %>%
-filter(Geno == paste(Alleles[1],Alleles[2],sep="")) %>%
-summarise(n=n())
-#add count for homozygous for allele 1 to tab
-tab <- tab %>% mutate(n_HT = ifelse(marker == i,
-n3$n, n_HT))
-}
-#count for NA
-n4 <- geno %>% filter(marker==i) %>%
-filter(Geno == "--" |
-Geno == paste(Alleles[1],"-",sep="") | Geno == paste(Alleles[2],"-",sep="") |
-Geno == paste("-",Alleles[1],sep="") | Geno == paste("-",Alleles[2],sep="")) %>%
-summarise(n=n())
-#add count for NA to tab
-tab <- tab %>% mutate(n_NA = ifelse(marker == i,
-n4$n, n_NA))
-}
-#change class of counts as numeric :
-tab$n_HM1 <- tab$n_HM1 %>% as.numeric()
-tab$n_HM2 <- tab$n_HM2 %>% as.numeric()
-tab$n_HT <- tab$n_HT %>% as.numeric()
-tab$n_NA <- tab$n_NA %>% as.numeric()
-#add 0 for null counts
-tab <- tab %>% mutate_at(.vars=vars(n_HM1,n_HM2,n_HT,n_NA),~replace(., is.na(.), 0))
-#save useful columns in annot dataframe
-annot <- annot %>% select(marker,chr,!!sym(pos))
-tab <- left_join(tab,annot)
-#return
-return(tab)
-}
-tab_mark(genos,annot_mini,"cM_cox")
-tab_mark <- function(geno,annot,pos){
-#rename df columns
-geno <- geno %>% rename("marker"=1,
-"id"=2,
-"allele_1"=3,
-"allele_2"=4)
-#create geno column in geno df
-geno <- geno %>% unite(Geno,c("allele_1","allele_2"),sep="",remove=FALSE)
-#recode genotypes to have all heterozygous encoded the same way (ex: only "AT", no "TA")
-geno <- geno %>% mutate(Geno=recode(Geno,
-"TA" = "AT",
-"GA" = "AG",
-"CA" = "AC",
-"GT" = "TG",
-"CT" = "TC",
-"GC" = "CG"))
-#create df with counts for each genotype
-tab <- tibble(marker = as.character(unique(geno$marker)),
-allele_1 = NA,
-allele_2 = NA,
-n_HM1 = NA,
-n_HM2 = NA,
-n_HT = NA,
-n_NA = NA)
-## loop to count genotype
-for(i in tab$marker){
-#extract alleles for each marker
-Alleles <- geno %>% filter(marker==i) %>%
-select(c(marker,id,Geno,allele_1,allele_2)) %>%
-pivot_longer(c(allele_1,allele_2),names_to="Allele_name",values_to="Allele") %>%
-distinct(Allele) %>% filter(Allele != "-")
-Alleles <- as.factor(paste(Alleles$Allele))
-#sort alleles
-Alleles <- factor(Alleles,levels=c("A","T","C","G"))
-Alleles <- sort(Alleles)
-#add alleles and counts, only for markers with alleles (not markers with no genotyped ind)
-if(all(rapportools::is.empty(Alleles))==FALSE){
-#add alleles to tab
-tab <- tab %>% mutate(allele_1 = ifelse(marker == i,
-paste(Alleles[1]), allele_1))
-#count for homozygous for allele 1
-n1 <- geno %>% filter(marker==i) %>%
-filter(Geno == paste(Alleles[1],Alleles[1],sep="")) %>%
-summarise(n=n())
-#add count for homozygous for allele 1 to tab
-tab <- tab %>% mutate(n_HM1 = ifelse(marker == i,
-n1$n, n_HM1))
-}
-#if marker not polymorphic
-if(is.na(Alleles[2])==TRUE){
-#NA as allele_2
-tab <- tab %>% mutate(allele_2 = ifelse(marker == i,
-NA, allele_2))
-#NA as n_HM2
-tab <- tab %>% mutate(n_HM2 = ifelse(marker == i,
-NA, n_HM2))
-#NA as n_HT
-tab <- tab %>% mutate(n_HT = ifelse(marker == i,
-NA, n_HT))
-} else {
-#add alleles to tab
-tab <- tab %>% mutate(allele_2 = ifelse(marker == i,
-paste(Alleles[2]), allele_2))
-#count for homozygous for allele 2
-n2 <- geno %>% filter(marker==i) %>%
-filter(Geno == paste(Alleles[2],Alleles[2],sep="")) %>%
-summarise(n=n())
-#add count for homozygous for allele 1 to tab
-tab <- tab %>% mutate(n_HM2 = ifelse(marker == i,
-n2$n, n_HM2))
-#count for heterozygous
-n3 <- geno %>% filter(marker==i) %>%
-filter(Geno == paste(Alleles[1],Alleles[2],sep="")) %>%
-summarise(n=n())
-#add count for homozygous for allele 1 to tab
-tab <- tab %>% mutate(n_HT = ifelse(marker == i,
-n3$n, n_HT))
-}
-#count for NA
-n4 <- geno %>% filter(marker==i) %>%
-filter(Geno == "--" |
-Geno == paste(Alleles[1],"-",sep="") | Geno == paste(Alleles[2],"-",sep="") |
-Geno == paste("-",Alleles[1],sep="") | Geno == paste("-",Alleles[2],sep="")) %>%
-summarise(n=n())
-#add count for NA to tab
-tab <- tab %>% mutate(n_NA = ifelse(marker == i,
-n4$n, n_NA))
-}
-#change class of counts as numeric :
-tab$n_HM1 <- tab$n_HM1 %>% as.numeric()
-tab$n_HM2 <- tab$n_HM2 %>% as.numeric()
-tab$n_HT <- tab$n_HT %>% as.numeric()
-tab$n_NA <- tab$n_NA %>% as.numeric()
-#add 0 for null counts
-tab <- tab %>% mutate_at(.vars=vars(n_HM1,n_HM2,n_HT,n_NA),~replace(., is.na(.), 0))
-#save useful columns in annot dataframe
-annot <- annot %>% select(marker,chr,!!sym(pos))
-tab <- left_join(tab,annot,by="marker")
-#return
-return(tab)
-}
-tab_mark(genos,annot_mini,"cM_cox")
-tab_mark <- function(geno,annot,pos){
-#rename df columns
-geno <- geno %>% rename("marker"=1,
-"id"=2,
-"allele_1"=3,
-"allele_2"=4)
-#create geno column in geno df
-geno <- geno %>% unite(Geno,c("allele_1","allele_2"),sep="",remove=FALSE)
-#recode genotypes to have all heterozygous encoded the same way (ex: only "AT", no "TA")
-geno <- geno %>% mutate(Geno=recode(Geno,
-"TA" = "AT",
-"GA" = "AG",
-"CA" = "AC",
-"GT" = "TG",
-"CT" = "TC",
-"GC" = "CG"))
-#create df with counts for each genotype
-tab <- tibble(marker = as.character(unique(geno$marker)),
-allele_1 = NA,
-allele_2 = NA,
-n_HM1 = NA,
-n_HM2 = NA,
-n_HT = NA,
-n_NA = NA)
-## loop to count genotype
-for(i in tab$marker){
-#extract alleles for each marker
-Alleles <- geno %>% filter(marker==i) %>%
-select(c(marker,id,Geno,allele_1,allele_2)) %>%
-pivot_longer(c(allele_1,allele_2),names_to="Allele_name",values_to="Allele") %>%
-distinct(Allele) %>% filter(Allele != "-")
-Alleles <- as.factor(paste(Alleles$Allele))
-#sort alleles
-Alleles <- factor(Alleles,levels=c("A","T","C","G"))
-Alleles <- sort(Alleles)
-#add alleles and counts, only for markers with alleles (not markers with no genotyped ind)
-if(all(rapportools::is.empty(Alleles))==FALSE){
-#add alleles to tab
-tab <- tab %>% mutate(allele_1 = ifelse(marker == i,
-paste(Alleles[1]), allele_1))
-#count for homozygous for allele 1
-n1 <- geno %>% filter(marker==i) %>%
-filter(Geno == paste(Alleles[1],Alleles[1],sep="")) %>%
-summarise(n=n())
-#add count for homozygous for allele 1 to tab
-tab <- tab %>% mutate(n_HM1 = ifelse(marker == i,
-n1$n, n_HM1))
-}
-#if marker not polymorphic
-if(is.na(Alleles[2])==TRUE){
-#NA as allele_2
-tab <- tab %>% mutate(allele_2 = ifelse(marker == i,
-NA, allele_2))
-#NA as n_HM2
-tab <- tab %>% mutate(n_HM2 = ifelse(marker == i,
-NA, n_HM2))
-#NA as n_HT
-tab <- tab %>% mutate(n_HT = ifelse(marker == i,
-NA, n_HT))
-} else {
-#add alleles to tab
-tab <- tab %>% mutate(allele_2 = ifelse(marker == i,
-paste(Alleles[2]), allele_2))
-#count for homozygous for allele 2
-n2 <- geno %>% filter(marker==i) %>%
-filter(Geno == paste(Alleles[2],Alleles[2],sep="")) %>%
-summarise(n=n())
-#add count for homozygous for allele 1 to tab
-tab <- tab %>% mutate(n_HM2 = ifelse(marker == i,
-n2$n, n_HM2))
-#count for heterozygous
-n3 <- geno %>% filter(marker==i) %>%
-filter(Geno == paste(Alleles[1],Alleles[2],sep="")) %>%
-summarise(n=n())
-#add count for homozygous for allele 1 to tab
-tab <- tab %>% mutate(n_HT = ifelse(marker == i,
-n3$n, n_HT))
-}
-#count for NA
-n4 <- geno %>% filter(marker==i) %>%
-filter(Geno == "--" |
-Geno == paste(Alleles[1],"-",sep="") | Geno == paste(Alleles[2],"-",sep="") |
-Geno == paste("-",Alleles[1],sep="") | Geno == paste("-",Alleles[2],sep="")) %>%
-summarise(n=n())
-#add count for NA to tab
-tab <- tab %>% mutate(n_NA = ifelse(marker == i,
-n4$n, n_NA))
-}
-#change class of counts as numeric :
-tab$n_HM1 <- tab$n_HM1 %>% as.numeric()
-tab$n_HM2 <- tab$n_HM2 %>% as.numeric()
-tab$n_HT <- tab$n_HT %>% as.numeric()
-tab$n_NA <- tab$n_NA %>% as.numeric()
-#add 0 for null counts
-tab <- tab %>% mutate_at(.vars=vars(n_HM1,n_HM2,n_HT,n_NA),~replace(., is.na(.), 0))
-#save useful columns in annot dataframe
-annot <- annot %>% select(marker,chr,!!sym(pos))
-tab <- right_join(annot,tab,by="marker")
-#return
-return(tab)
-}
-tab_mark(genos,annot_mini,"cM_cox")
-# how to use the function:
-# stuart_tab <- tab_mark(genos,annot_mini,"cM_cox")
-tab <- tab_mark(genos,annot_mini,"cM_cox")
-View(tab)
-View(genos)
-View(tab)
diff --git a/.gitignore b/.gitignore
index 42d718175c9ea808fa1d65491df803f76914cfb2..e934e149a702dc6cf7e0ccb24cb264857131d614 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,3 +4,4 @@ Meta
 /doc/
 /Meta/
 .Rhistory
+.Rhistory