diff --git a/.Rhistory b/.Rhistory index 85815d3515adc7b44826ae11d44aea4a9fb4a9b1..1e181379d8497f5334411cb095f9584c62aa9e30 100644 --- a/.Rhistory +++ b/.Rhistory @@ -1,254 +1,3 @@ -pos = pos, -place = place) -marker <- c() -chr <- c() -pos <- c() -place <- c() -for(i in names(newmap_after)){ -marker <- c(marker,names(newmap_after[[i]])) -chr <- c(chr,rep(i,times=length(newmap_after[[i]]))) -pos <- c(pos,unname(newmap_after[[i]])) -place <- c(place,"first",rep("middle",times=(length(newmap_after[[i]])),"last")) -} -marker <- c() -chr <- c() -pos <- c() -place <- c() -for(i in names(newmap_after)){ -marker <- c(marker,names(newmap_after[[i]])) -chr <- c(chr,rep(i,times=length(newmap_after[[i]]))) -pos <- c(pos,unname(newmap_after[[i]])) -place <- c(place,"first",rep("middle",times=(length(newmap_after[[i]])-2)),"last") -} -test <- tibble(marker = marker, -chr = chr, -pos = pos, -place = place) -test$pos -test$pos[1:] -test$pos[1:10] -test$pos[1:length(test$pos)] -test$pos[2:length(test$pos)] -marker <- c() -chr <- c() -pos <- c() -place <- c() -previous <- c() -for(i in names(newmap_after)){ -marker <- c(marker,names(newmap_after[[i]])) -chr <- c(chr,rep(i,times=length(newmap_after[[i]]))) -pos <- c(pos,unname(newmap_after[[i]])) -place <- c(place,"first",rep("middle",times=(length(newmap_after[[i]])-2)),"last") -prev <- c(unname(newmap_after[[i]])[2:length(newmap_after[[i]])],NA) -} -marker <- c() -chr <- c() -pos <- c() -place <- c() -previous <- c() -for(i in names(newmap_after)){ -marker <- c(marker,names(newmap_after[[i]])) -chr <- c(chr,rep(i,times=length(newmap_after[[i]]))) -pos <- c(pos,unname(newmap_after[[i]])) -place <- c(place,"first",rep("middle",times=(length(newmap_after[[i]])-2)),"last") -prev <- c(unname(newmap_after[[i]])[2:length(newmap_after[[i]])],NA) -previous <- c(previous,prev) -} -test <- tibble(marker = marker, -chr = chr, -pos = pos, -place = place, -previous = previous) -marker <- c() -chr <- c() -pos <- c() -place <- c() -follow <- c() -for(i in names(newmap_after)){ -marker <- c(marker,names(newmap_after[[i]])) -chr <- c(chr,rep(i,times=length(newmap_after[[i]]))) -pos <- c(pos,unname(newmap_after[[i]])) -place <- c(place,"first",rep("middle",times=(length(newmap_after[[i]])-2)),"last") -fol <- c(unname(newmap_after[[i]])[2:length(newmap_after[[i]])],NA) -follow <- c(follow,fol) -} -test <- tibble(marker = marker, -chr = chr, -pos = pos, -place = place, -pfollow = follow) -marker <- c() -chr <- c() -pos <- c() -place <- c() -follow <- c() -for(i in names(newmap_after)){ -marker <- c(marker,names(newmap_after[[i]])) -chr <- c(chr,rep(i,times=length(newmap_after[[i]]))) -pos <- c(pos,unname(newmap_after[[i]])) -place <- c(place,"first",rep("middle",times=(length(newmap_after[[i]])-2)),"last") -fol <- c(unname(newmap_after[[i]])[2:length(newmap_after[[i]])],NA) -follow <- c(follow,fol) -} -test <- tibble(marker = marker, -chr = chr, -place = place, -pos = pos, -pfollow = follow) -marker <- c() -chr <- c() -pos <- c() -place <- c() -follow <- c() -previous <- c() -for(i in names(newmap_after)){ -marker <- c(marker,names(newmap_after[[i]])) -chr <- c(chr,rep(i,times=length(newmap_after[[i]]))) -pos <- c(pos,unname(newmap_after[[i]])) -place <- c(place,"first",rep("middle",times=(length(newmap_after[[i]])-2)),"last") -fol <- c(unname(newmap_after[[i]])[2:length(newmap_after[[i]])],NA) -follow <- c(follow,fol) -prev <- c(NA,unname(newmap_after[[i]])[1:length(newmap_after[[i]])-1]) -previous <- c(previous,prev) -} -test <- tibble(marker = marker, -chr = chr, -place = place, -pos = pos, -pfollow = follow) -marker <- c() -chr <- c() -pos <- c() -place <- c() -follow <- c() -previous <- c() -for(i in names(newmap_after)){ -marker <- c(marker,names(newmap_after[[i]])) -chr <- c(chr,rep(i,times=length(newmap_after[[i]]))) -pos <- c(pos,unname(newmap_after[[i]])) -place <- c(place,"first",rep("middle",times=(length(newmap_after[[i]])-2)),"last") -fol <- c(unname(newmap_after[[i]])[2:length(newmap_after[[i]])],NA) -follow <- c(follow,fol) -prev <- c(NA,unname(newmap_after[[i]])[1:length(newmap_after[[i]])-1]) -previous <- c(previous,prev) -} -test <- tibble(marker = marker, -chr = chr, -place = place, -pos = pos, -follow = follow, -previous = previous) -marker <- c() -chr <- c() -pos <- c() -place <- c() -previous <- c() -follow <- c() -for(i in names(newmap_after)){ -marker <- c(marker,names(newmap_after[[i]])) -chr <- c(chr,rep(i,times=length(newmap_after[[i]]))) -pos <- c(pos,unname(newmap_after[[i]])) -place <- c(place,"first",rep("middle",times=(length(newmap_after[[i]])-2)),"last") -prev <- c(NA,unname(newmap_after[[i]])[1:length(newmap_after[[i]])-1]) -previous <- c(previous,prev) -fol <- c(unname(newmap_after[[i]])[2:length(newmap_after[[i]])],NA) -follow <- c(follow,fol) -} -test <- tibble(marker = marker, -chr = chr, -place = place, -pos = pos, -previous = previous, -follow = follow) -class(test$previous) -class(test$follow) -test %>% mutate(exclude=case_when(pos == "first" & follow > 100 ~ 1, -pos == "middle" & previous > 100 & follow > 100 ~ 1, -pos == "last" & previous > 100 ~ 1, -T ~ 0)) -test %<>% mutate(exclude=case_when(pos == "first" & follow > 100 ~ 1, -pos == "middle" & previous > 100 & follow > 100 ~ 1, -pos == "last" & previous > 100 ~ 1, -T ~ 0)) -test <- test %>% mutate(exclude=case_when(pos == "first" & follow > 100 ~ 1, -pos == "middle" & previous > 100 & follow > 100 ~ 1, -pos == "last" & previous > 100 ~ 1, -T ~ 0)) -test <- test %>% mutate(exclude=case_when(place == "first" & follow > 100 ~ 1, -place == "middle" & previous > 100 & follow > 100 ~ 1, -place == "last" & previous > 100 ~ 1, -T ~ 0)) -test <- tibble(marker = marker, -chr = chr, -place = place, -pos = pos, -previous = pos-previous, -follow = follow-pos) -test <- test %>% mutate(exclude=case_when(place == "first" & follow > 100 ~ 1, -place == "middle" & previous > 100 & follow > 100 ~ 1, -place == "last" & previous > 100 ~ 1, -T ~ 0)) -test %>% pull(exclude) -test %>% filter(exclude == 1) %>% pull(marker) -class(newmap_after) -load("/mnt/gaia/gaia_mouselab/Marie/Package_stuaRt/Article/Figures/Rdata/many_files.rda") -rm(after_1000p,before_neogen_1000p,before_us_1000p,newmap_before_neogen) -cross <- read.cross(format="csv",file="rqtl_file.csv", -genotypes=c("0","1","2"),na.strings=c("NA"), convertXdata=TRUE) -library(qtl) -cross <- read.cross(format="csv",file="rqtl_file.csv", -genotypes=c("0","1","2"),na.strings=c("NA"), convertXdata=TRUE) -setwd("~/Documents/stuart_package/stuart") -cross <- read.cross(format="csv",file="rqtl_file.csv", -genotypes=c("0","1","2"),na.strings=c("NA"), convertXdata=TRUE) -library(qtl) -cross <- read.cross(format="csv",file="rqtl_file.csv", -genotypes=c("0","1","2"),na.strings=c("NA"), convertXdata=TRUE) -rqtl_file <- write_rqtl(geno=genos,pheno=phenos,tab=tab2, -ref=strains,par1="parent1",par2="parent2",prefix="ind_",pos="cM_cox",path = "./rqtl_file.csv") -knitr::opts_chunk$set( -collapse = TRUE, -comment = "#>" -) -library(dplyr) -library(stuart) -annot_mini <- read.csv(url("https://raw.githubusercontent.com/kbroman/MUGAarrays/master/UWisc/mini_uwisc_v2.csv")) -data(genos) -summary(genos) -data(phenos) -summary(phenos) -strains <- geno_strains(ref=annot_mini,geno=genos,par1=c("StrainsA_1","StrainsA_2"), -par2=c("StrainsB_1","StrainsB_2"),name1="parent1",name2="parent2") -head(strains) %>% print.data.frame() -genos <- genos %>% filter(!Sample.ID %in% c("StrainsA_1", "StrainsA_2", -"StrainsB_1","StrainsB_2")) -data(stuart_tab) -summary(stuart_tab) -tab2 <- mark_match(stuart_tab,ref=strains) -tab2 %>% filter(exclude_match==1) %>% print.data.frame() -tab2 <- mark_poly(tab2) -head(tab2) %>% print.data.frame() -tab2 <- mark_prop(tab2,cross="F2",homo=0.1,hetero=0.1) -head(tab2) %>% print.data.frame() -tab2 <- mark_allele(tab=tab2,ref=strains,par1="parent1",par2="parent2") -tab2 %>% arrange(desc(exclude_allele)) %>% head() %>% print.data.frame() -strains %>% filter(marker %in% c("gJAX00038569","gJAX00425031","gUNC12245354", -"gUNC15530876","gUNC21555204","gUNC21596600")) %>% arrange(marker) %>% -select(marker,parent1,parent2) %>% print.data.frame() -rqtl_file <- write_rqtl(geno=genos,pheno=phenos,tab=tab2, -ref=strains,par1="parent1",par2="parent2",prefix="ind_",pos="cM_cox",path = "rqtl_file.csv") -cross <- read.cross(format="csv",file="rqtl_file.csv", -genotypes=c("0","1","2"),na.strings=c("NA"), convertXdata=TRUE) -bad_markers <- test %>% filter(exclude == 1) %>% pull(marker) -drop.markers(cross=cross,markers=bad_markers) -View(cross) -newcross <- drop.markers(cross=cross,markers=bad_markers) -View(newcross) -mark_estmap <- function(map,dist=100){ -#initialize variables -marker <- c() -chr <- c() -pos <- c() place <- c() previous <- c() follow <- c() @@ -510,3 +259,254 @@ devtools::build(path=".",vignettes=FALSE) devtools::build_vignettes() devtools::build_manual(path=".") library(stuart) +devtools::build_vignettes() +test <- tibble(sample.id=c(1,2,3),pheno=c(4,5,6)) +library(dplyr) +library(dplyr) +test <- tibble(sample.id=c(1,2,3),pheno=c(4,5,6)) +View(test) +test %>% rename(1="id") +test %>% rename("id"=1) +library(dplyr) +library(stuart) +write_rqtl <- function(geno,pheno,tab,ref,par1,par2,par_N=TRUE,prefix,pos,path=NA){ +#rename df columns +geno <- geno %>% rename("marker"=1, +"id"=2, +"allele_1"=3, +"allele_2"=4) +#extract snps non excluded +if("exclude_match" %in% colnames(tab)){ +tab <- tab %>% filter(exclude_match==0) +} +if("exclude_poly" %in% colnames(tab)){ +tab <- tab %>% filter(exclude_poly==0) +} +if("exclude_prop" %in% colnames(tab)){ +tab <- tab %>% filter(exclude_prop==0) +} +if("exclude_allele" %in% colnames(tab)){ +tab <- tab %>% filter(exclude_allele==0) +} +#filter genotypes for non excluded markers in geno file +geno <- geno %>% select(c(marker,id,allele_1,allele_2)) %>% filter(marker %in% tab$marker) +#recode parents' names to match column names nomenclature +par1 <- make.names(par1) +par2 <- make.names(par2) +#keep parental lines genotypes +colnames(ref) <- make.names(colnames(ref)) +ref <- ref %>% select(marker,chr,!!sym(pos),!!sym(par1),!!sym(par2)) +#merge genotypes with parents +geno <- left_join(geno,ref,by=c("marker"="marker")) +#remove snps with no position +geno <- geno %>% filter(is.na(chr)==FALSE) %>% filter(is.na(!!sym(pos))==FALSE) +#recode "-" in "N" in geno file +geno <- geno %>% mutate(allele_1 = recode(allele_1, +"-" = "N")) +geno <- geno %>% mutate(allele_2 = recode(allele_2, +"-" = "N")) +#recode geno in factors with same levels +geno <- geno %>% mutate(allele_1 = factor(allele_1,levels=c("A","C","G","H","N","T"))) +geno <- geno %>% mutate(allele_2 = factor(allele_2,levels=c("A","C","G","H","N","T"))) +#recode genotypes depending on parents' genotypes +geno <- geno %>% mutate(Geno = case_when( +#if one allele not genotyped: +allele_1=="N" | allele_2=="N" ~ "NA", +#if both alleles genotyped +##homozygous 0 +allele_1==allele_2 & allele_1==!!sym(par1) ~ "0", +##homozygous 2 +allele_1==allele_2 & allele_1==!!sym(par2) ~ "2", +##heterozygous +allele_1!=allele_2 ~ "1", +#if parental strains are N/H +##homozygous for parent that is N/H +###homozygous 0 +(!!sym(par1)%in%c("H","N") | !!sym(par2)%in%c("H","N")) & +!!sym(par1)%in%c("H","N") ~ "0", +###homozygous 2 +(!!sym(par1)%in%c("H","N") | !!sym(par2)%in%c("H","N")) & +!!sym(par2)%in%c("H","N") ~ "2" +) +) +#keep positions of markers +markers <- geno %>% select(marker,chr,!!sym(pos)) %>% distinct() +markers <- markers %>% arrange(chr,!!sym(pos)) +#keep only interesting columns in geno file +geno <- geno %>% arrange(chr,!!sym(pos)) +geno <- geno %>% select(marker,id,Geno) +#remove prefix +geno <- geno %>% mutate(id=str_remove(id,prefix)) +#keep only non excluded markers and merge with positions +markers <- markers %>% mutate(marker=as.character(marker)) +markers <- markers %>% mutate(chr=as.character(chr)) +geno <- markers %>% select(marker,chr,!!sym(pos)) %>% full_join(.,geno,by="marker") +#pivoting +geno <- geno %>% pivot_wider(names_from = c(marker,chr,!!sym(pos)),values_from = Geno,names_sep=",") +geno <- geno %>% mutate(id=as.character(id)) +geno <- geno %>% rename("id,,"=id) +#merge with phenotype file +pheno <- pheno %>% rename("id"=1) +pheno <- pheno %>% mutate_all(as.character) +colnames(pheno) <- str_c(colnames(pheno),",,") +qtl_file <- right_join(pheno,geno,by=c("id,,"="id,,")) +#prepare file +qtl_file <- rbind(colnames(qtl_file),qtl_file) +qtl_file <- separate_rows(qtl_file,everything(),sep=",") +colnames(qtl_file) <- qtl_file[1,] +qtl_file <- qtl_file %>% slice(-1) +if(is.na(path)==FALSE){ +write.csv(qtl_file,file=path,quote=FALSE,row.names = FALSE) +} +return(qtl_file) +} +data(phenos) +summary(phenos) +View(phenos) +pheno %>% rename("sample"="Ind") +phenos %>% rename("sample"="Ind") +phenos <- pheno %>% rename("sample"="Ind") +phenos <- phenos %>% rename("sample"="Ind") +knitr::opts_chunk$set( +collapse = TRUE, +comment = "#>" +) +library(dplyr) +library(stuart) +annot_mini <- read.csv(url("https://raw.githubusercontent.com/kbroman/MUGAarrays/master/UWisc/mini_uwisc_v2.csv")) +data(genos) +summary(genos) +data(phenos) +summary(phenos) +strains <- geno_strains(ref=annot_mini,geno=genos,par1=c("StrainsA_1","StrainsA_2"), +par2=c("StrainsB_1","StrainsB_2"),name1="parent1",name2="parent2") +head(strains) %>% print.data.frame() +genos <- genos %>% filter(!Sample.ID %in% c("StrainsA_1", "StrainsA_2", +"StrainsB_1","StrainsB_2")) +data(stuart_tab) +summary(stuart_tab) +tab2 <- mark_match(stuart_tab,ref=strains) +tab2 %>% filter(exclude_match==1) %>% print.data.frame() +tab2 <- mark_poly(tab2) +head(tab2) %>% print.data.frame() +tab2 <- mark_prop(tab2,cross="F2",homo=0.1,hetero=0.1) +head(tab2) %>% print.data.frame() +mark_prop(tab2,cross="F2",pval=0.05) %>% head() %>% print.data.frame() +tab2 <- mark_allele(tab=tab2,ref=strains,par1="parent1",par2="parent2") +tab2 %>% arrange(desc(exclude_allele)) %>% head() %>% print.data.frame() +strains %>% filter(marker %in% c("gJAX00038569","gJAX00425031","gUNC12245354", +"gUNC15530876","gUNC21555204","gUNC21596600")) %>% arrange(marker) %>% +select(marker,parent1,parent2) %>% print.data.frame() +rqtl_file <- write_rqtl(geno=genos,pheno=phenos,tab=tab2, +ref=strains,par1="parent1",par2="parent2",prefix="ind_",pos="cM_cox") +library(dplyr) +library(stuart) +rqtl_file <- write_rqtl(geno=genos,pheno=phenos,tab=tab2, +ref=strains,par1="parent1",par2="parent2",prefix="ind_",pos="cM_cox") +library(stuart) +library(dplyr) +library(stuart) +rqtl_file <- write_rqtl(geno=genos,pheno=phenos,tab=tab2, +ref=strains,par1="parent1",par2="parent2",prefix="ind_",pos="cM_cox") +rqtl_file <- write_rqtl(geno=genos,pheno=phenos,tab=tab2, +ref=strains,par1="parent1",par2="parent2",prefix="ind_",pos="cM_cox") +library(stuart) +library(stringr) +rqtl_file <- write_rqtl(geno=genos,pheno=phenos,tab=tab2, +ref=strains,par1="parent1",par2="parent2",prefix="ind_",pos="cM_cox") +detach("package:stuart", unload = TRUE) +install.packages(stuart) +library(tidyverse) +rqtl_file <- write_rqtl(geno=genos,pheno=phenos,tab=tab2, +ref=strains,par1="parent1",par2="parent2",prefix="ind_",pos="cM_cox") +write_rqtl(geno=genos,pheno=phenos,tab=tab2, +ref=strains,par1="parent1",par2="parent2",prefix="ind_",pos="cM_cox") +detach("package:stringr", unload = TRUE) +detach("package:tibble", unload = TRUE) +detach("package:tidyr", unload = TRUE) +detach("package:tidyverse", unload = TRUE) +detach("package:readr", unload = TRUE) +detach("package:purrr", unload = TRUE) +detach("package:forcats", unload = TRUE) +detach("package:ggplot2", unload = TRUE) +detach("package:dplyr", unload = TRUE) +devtools::build(path=".",vignettes=FALSE) +devtools::build_vignettes() +devtools::build_manual(as.package()) +devtools::build_manual( +) +library(stuart) +knitr::opts_chunk$set( +collapse = TRUE, +comment = "#>" +) +library(dplyr) +library(stuart) +annot_mini <- read.csv(url("https://raw.githubusercontent.com/kbroman/MUGAarrays/master/UWisc/mini_uwisc_v2.csv")) +data(genos) +summary(genos) +data(phenos) +summary(phenos) +strains <- geno_strains(ref=annot_mini,geno=genos,par1=c("StrainsA_1","StrainsA_2"), +par2=c("StrainsB_1","StrainsB_2"),name1="parent1",name2="parent2") +head(strains) %>% print.data.frame() +genos <- genos %>% filter(!Sample.ID %in% c("StrainsA_1", "StrainsA_2", +"StrainsB_1","StrainsB_2")) +# how to use the function: +# stuart_tab <- tab_mark(genos) +data(stuart_tab) +summary(stuart_tab) +tab2 <- mark_match(stuart_tab,ref=strains) +tab2 %>% filter(exclude_match==1) %>% print.data.frame() +tab2 <- mark_poly(tab2) +head(tab2) %>% print.data.frame() +tab2 <- mark_prop(tab2,cross="F2",homo=0.1,hetero=0.1) +head(tab2) %>% print.data.frame() +mark_prop(tab2,cross="F2",pval=0.05) %>% head() %>% print.data.frame() +tab2 <- mark_allele(tab=tab2,ref=strains,par1="parent1",par2="parent2") +tab2 %>% arrange(desc(exclude_allele)) %>% head() %>% print.data.frame() +strains %>% filter(marker %in% c("gJAX00038569","gJAX00425031","gUNC12245354", +"gUNC15530876","gUNC21555204","gUNC21596600")) %>% arrange(marker) %>% +select(marker,parent1,parent2) %>% print.data.frame() +stuart_cross <- write_rqtl(geno=genos,pheno=phenos,tab=tab2, +ref=strains,par1="parent1",par2="parent2",prefix="ind_",pos="cM_cox") +stuart_cross[1:10,1:7] %>% print.data.frame() +knitr::opts_chunk$set( +collapse = TRUE, +comment = "#>" +) +library(dplyr) +library(stuart) +annot_mini <- read.csv(url("https://raw.githubusercontent.com/kbroman/MUGAarrays/master/UWisc/mini_uwisc_v2.csv")) +data(genos) +summary(genos) +data(phenos) +summary(phenos) +strains <- geno_strains(ref=annot_mini,geno=genos,par1=c("StrainsA_1","StrainsA_2"), +par2=c("StrainsB_1","StrainsB_2"),name1="parent1",name2="parent2") +head(strains) %>% print.data.frame() +genos <- genos %>% filter(!Sample.ID %in% c("StrainsA_1", "StrainsA_2", +"StrainsB_1","StrainsB_2")) +# how to use the function: +# stuart_tab <- tab_mark(genos) +data(stuart_tab) +summary(stuart_tab) +tab2 <- mark_match(stuart_tab,ref=strains) +tab2 %>% filter(exclude_match==1) %>% print.data.frame() +tab2 <- mark_poly(tab2) +head(tab2) %>% print.data.frame() +tab2 <- mark_prop(tab2,cross="F2",homo=0.1,hetero=0.1) +head(tab2) %>% print.data.frame() +mark_prop(tab2,cross="F2",pval=0.05) %>% head() %>% print.data.frame() +tab2 <- mark_allele(tab=tab2,ref=strains,par1="parent1",par2="parent2") +tab2 %>% arrange(desc(exclude_allele)) %>% head() %>% print.data.frame() +strains %>% filter(marker %in% c("gJAX00038569","gJAX00425031","gUNC12245354", +"gUNC15530876","gUNC21555204","gUNC21596600")) %>% arrange(marker) %>% +select(marker,parent1,parent2) %>% print.data.frame() +stuart_cross <- write_rqtl(geno=genos,pheno=phenos,tab=tab2, +ref=strains,par1="parent1",par2="parent2",prefix="ind_",pos="cM_cox") +stuart_cross[1:10,1:7] %>% print.data.frame() +rm(stuart_cross) +library(qtl) +data(stuart_cross) +summary(stuart_cross) diff --git a/.Rproj.user/shared/notebooks/paths b/.Rproj.user/shared/notebooks/paths index 6186be1988b8e4b6049fe6bc27f364a0638eb922..1d8c157013cb2edfe8baabe96e6f7ecaf0710755 100644 --- a/.Rproj.user/shared/notebooks/paths +++ b/.Rproj.user/shared/notebooks/paths @@ -20,3 +20,5 @@ /home/marie/Documents/stuart_package/stuart/man/genos.Rd="383A8DC0" /home/marie/Documents/stuart_package/stuart/vignettes/stuart.Rmd="1F99EBA0" /mnt/gaia/gaia_mouselab/Marie/Package_stuaRt/Article/Figures/Maestro/map_before_neogen/map_before_neogen.R="36D40A63" +/mnt/zeus/zeus_mouselab/anais/newmap_perm_anais.R="545E1544" +/mnt/zeus/zeus_mouselab/marie/map_after/map_after.R="697AFFBF" diff --git a/DESCRIPTION b/DESCRIPTION index e06c81a5fd1af10cbbee799bb8017425c6854a1e..97383236f652fd10d1d0f78be665df2b2c4680dd 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: stuart Title: stuart -Version: 1.0.3 +Version: 1.0.3.9000 Authors@R: person(given = "Marie", family = "Bourdon", diff --git a/R/mark_prop.R b/R/mark_prop.R index a632fe9ac853ee95d5cff21faf5c8b5e0ac2e2c7..744cc5cf4fa54d4eae63c1bbe46f4ef1b41b3a5e 100755 --- a/R/mark_prop.R +++ b/R/mark_prop.R @@ -54,7 +54,10 @@ mark_prop <- function(tab,cross,homo=NA,hetero=NA,pval=NA,na=0.5){ tab <- tab %>% mutate(exclude_prop=case_when(p_NA > na ~ 1, cross=="F2" & (p_HM1 < homo | p_HM2 < homo | p_HT < hetero) ~ 1, - cross=="N2" & (p_HM1 < homo | p_HT < hetero) ~ 1, + cross=="N2" & ((p_HM2 == 0 & p_HM1 < homo) | + (p_HM1 == 0 & p_HM2 < homo) | + (p_HT < hetero) | + (p_HM1 !=0 & p_HM2 != 0)) ~ 1, T ~ exclude_prop )) diff --git a/R/write_rqtl.R b/R/write_rqtl.R index d08f63624348c3e10815e30a5a3bdc0cdf6bd72f..d74a85b38ad95d200fa0cb44e2916e07b6c58d0d 100755 --- a/R/write_rqtl.R +++ b/R/write_rqtl.R @@ -64,10 +64,10 @@ write_rqtl <- function(geno,pheno,tab,ref,par1,par2,par_N=TRUE,prefix,pos,path=N #recode "-" in "N" in geno file geno <- geno %>% mutate(allele_1 = recode(allele_1, - "-" = "N")) + "-" = "N")) geno <- geno %>% mutate(allele_2 = recode(allele_2, - "-" = "N")) + "-" = "N")) #recode geno in factors with same levels geno <- geno %>% mutate(allele_1 = factor(allele_1,levels=c("A","C","G","H","N","T"))) @@ -125,9 +125,10 @@ write_rqtl <- function(geno,pheno,tab,ref,par1,par2,par_N=TRUE,prefix,pos,path=N #merge with phenotype file + pheno <- pheno %>% rename("id"=1) pheno <- pheno %>% mutate_all(as.character) colnames(pheno) <- str_c(colnames(pheno),",,") - qtl_file <- right_join(pheno,geno,by=c("Ind,,"="id,,")) + qtl_file <- right_join(pheno,geno,by=c("id,,"="id,,")) #prepare file qtl_file <- rbind(colnames(qtl_file),qtl_file) diff --git a/stuart_1.0.3.pdf b/stuart_1.0.3.9000.pdf similarity index 98% rename from stuart_1.0.3.pdf rename to stuart_1.0.3.9000.pdf index 02bc406eb8ceaa55a9a9c780e86393c8b3c567b3..f73c11f6354d02bcdc80df8099b12eab3771c0e9 100644 Binary files a/stuart_1.0.3.pdf and b/stuart_1.0.3.9000.pdf differ diff --git a/stuart_1.0.3.tar.gz b/stuart_1.0.3.9000.tar.gz similarity index 85% rename from stuart_1.0.3.tar.gz rename to stuart_1.0.3.9000.tar.gz index 15f65acabeb7277f7cc4c22f5e181271aae52fd4..9c60309bfe6312beb5c1ad305f6b7c405003048a 100644 Binary files a/stuart_1.0.3.tar.gz and b/stuart_1.0.3.9000.tar.gz differ diff --git a/vignettes/stuart.Rmd b/vignettes/stuart.Rmd index 485be9906ac4a55ef1e306adf3bf6a7e1ff6567e..75a039d28f2461052660359f88279a100e4e441e 100755 --- a/vignettes/stuart.Rmd +++ b/vignettes/stuart.Rmd @@ -87,10 +87,13 @@ genos <- genos %>% filter(!Sample.ID %in% c("StrainsA_1", "StrainsA_2", ### Marker tab -The first step of the markers sorting is to create the marker dataframe with the tab_mark() function. This dataframe contains for each marker the two alleles that can be found in the F2/N2 population (`Allele_1` and `Allele_2`), the number of individuals for each genotype (homozygous for each allele (`n_HM1` and `n_HM2`) and heterozygous (`n_HT`)), and the number of non genotyped individuals (`n_NA`) This step can take several minutes. You can also load the output of this function. +The first step of the markers sorting is to create the marker dataframe with the tab_mark() function. This dataframe contains for each marker the two alleles that can be found in the F2/N2 population (`Allele_1` and `Allele_2`), the number of individuals for each genotype (homozygous for each allele (`n_HM1` and `n_HM2`) and heterozygous (`n_HT`)), and the number of non genotyped individuals (`n_NA`) This step can take several minutes, so you can also load the example output of this function. ```{r tab_mark} +# how to use the function: +# stuart_tab <- tab_mark(genos) + data(stuart_tab) summary(stuart_tab) ``` @@ -134,7 +137,7 @@ mark_prop(tab2,cross="F2",pval=0.05) %>% head() %>% print.data.frame() ### mark_allele -Last, we can use the `mark_allele()` function. This very helpful function excludes markers for which the alleles found in the F2/N2 individuals do not correspond to the alleles found in the parental strains. For example, if for a marker is not polymorphic in the parental strains but we found two alleles in the F2/N2 individuals, it will be excluded. +Last, we can use the `mark_allele()` function. This very helpful function excludes markers for which the alleles found in the F2/N2 individuals do not correspond to the alleles found in the parental strains. For example, if a marker is not polymorphic in the parental strains but we found two alleles in the F2/N2 individuals, it will be excluded. ```{r mark_allele} tab2 <- mark_allele(tab=tab2,ref=strains,par1="parent1",par2="parent2") @@ -154,10 +157,10 @@ strains %>% filter(marker %in% c("gJAX00038569","gJAX00425031","gUNC12245354", After excluding the problematic markers, we can create the R/qtl file. The individuals must have the same ID in the geno and in the pheno file. If there is a prefix in the geno file that must be removed in order to acheive this, you can use the "prefix" argument. The "path" argument can be used in order to create a CSV file that you can laod with `qtl::read.cross`. ```{r write_qtl} -rqtl_file <- write_rqtl(geno=genos,pheno=phenos,tab=tab2, - ref=strains,par1="parent1",par2="parent2",prefix="ind_",pos="cM_cox",path = "rqtl_file.csv") +stuart_cross <- write_rqtl(geno=genos,pheno=phenos,tab=tab2, + ref=strains,par1="parent1",par2="parent2",prefix="ind_",pos="cM_cox") -rqtl_file[1:10,1:7] %>% print.data.frame() +stuart_cross[1:10,1:7] %>% print.data.frame() ``` ## Check markers with est.map() @@ -172,7 +175,7 @@ data(stuart_cross) summary(stuart_cross) ##### TO RUN ##### -# newmap <- est.map(cross=cross,error.prob=0.01) +# stuart_newmap <- est.map(cross=stuart_cross,error.prob=0.01) # save the map as data in the package # data(stuart_newmap) @@ -190,7 +193,7 @@ Then we can plot the newmap to compare it with the positions found in the annota It seems to be still 2 problematic markers. We can identify them with the `mark_estmap()`function. ```{r mark_estmap} -# bad_markers <- mark_estmap(map=newmap_after,dist=100) +# bad_markers <- mark_estmap(map=stuart_newmap,dist=100) # print(bad_markers) ``` @@ -229,3 +232,4 @@ Or if you prefere to have a correct CSV object that can always be used for QTL a # ref=strains,par1="parent1",par2="parent2",prefix="ind_",pos="cM_cox",path = "rqtl_file.csv") ``` +