45D91D58-contents 5.34 KB
Newer Older
Marie Bourdon's avatar
Marie Bourdon committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
#' @title Create data frame in Rqtl CSV format
#'
#' @description This function uses the table produced by tab_mark function filled by all the mark_* functions in order to create a data frame in the right format for Rqtl read.cross function. Only the non-excluded markers will be kept and genotypeds will be encoded in "0", "1" and "2", "0" being homozygous for the first parental strain, "1" heterozygous and "2" homozygous for the second parental strain. Caution, this file create a data frame and a CSV file in the path of your choice if indicated by the "path" argument. This function does not create a "cross" object in your environment that can be directly used for QTL mapping. You will need to load the CSV file with qtl::read.cross.
#' @param geno data frame with the genotyping results for your cross
#' @param pheno data frame with phenotypes of the individuals (individuals must have the same ID in the geno data frame and in the pheno data frame)
#' @param prefix potential prefix present in the names of the individuals in the geno data frame to be removed in ordere to have the same names as in the pheno file
#' @param tab data frame obtained with tab_mark function
#' @param ref data frame with the reference genotypes of mouse lines
#' @param par1 first parental strain used in the cross, the name must be written as in the "ref" data frame
#' @param par2 second parental strain used in the cross, the name must be written as in the "ref" data frame
#' @param pos column with marker positions
#' @param path if indicated, the data frame will be exported in this path
#'
#' @import dplyr
#' @import tidyr
#' @import utils
#' @import stringr
#'
#' @export
#'
#### write_rqtl ####
## write data frame in rqtl format (csv), if path != NA writes the file in the path indicated
write_rqtl <- function(geno,pheno,tab,ref,par1,par2,prefix,pos,path=NA){
Marie Bourdon's avatar
Marie Bourdon committed
24
25
26
27
28
29
  #rename df columns
  geno <- geno %>% rename("marker"=1,
                          "id"=2,
                          "allele_1"=3,
                          "allele_2"=4)

Marie Bourdon's avatar
Marie Bourdon committed
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
  #extract snps non excluded
  if("exclude_match" %in% colnames(tab)){
    tab <- tab %>% filter(exclude_match==0)
  }

  if("exclude_poly" %in% colnames(tab)){
    tab <- tab %>% filter(exclude_poly==0)
  }

  if("exclude_prop" %in% colnames(tab)){
    tab <- tab %>% filter(exclude_prop==0)
  }

  if("exclude_allele" %in% colnames(tab)){
    tab <- tab %>% filter(exclude_allele==0)
  }


  #filter genotypes for non excluded markers in geno file
Marie Bourdon's avatar
Marie Bourdon committed
49
  geno <- geno %>% select(c(marker,id,allele_1,allele_2)) %>% filter(marker %in% tab$marker)
Marie Bourdon's avatar
Marie Bourdon committed
50
51
52
53
54
55
56

  #recode parents' names to match column names nomenclature
  par1 <- make.names(par1)
  par2 <- make.names(par2)

  #keep parental lines genotypes
  colnames(ref) <- make.names(colnames(ref))
Marie Bourdon's avatar
Marie Bourdon committed
57
  ref <- ref %>% select(marker,chr,!!sym(pos),!!sym(par1),!!sym(par2))
Marie Bourdon's avatar
Marie Bourdon committed
58
59

  #merge genotypes with parents
Marie Bourdon's avatar
Marie Bourdon committed
60
  geno <- left_join(geno,ref,by=c("marker"="marker"))
Marie Bourdon's avatar
Marie Bourdon committed
61
62

  #recode "-" in "N" in geno file
Marie Bourdon's avatar
Marie Bourdon committed
63
  geno <- geno %>% mutate(allele_1 = recode(allele_1,
Marie Bourdon's avatar
Marie Bourdon committed
64
65
                                                     "-" = "N"))

Marie Bourdon's avatar
Marie Bourdon committed
66
  geno <- geno %>% mutate(allele_2 = recode(allele_2,
Marie Bourdon's avatar
Marie Bourdon committed
67
68
69
                                                     "-" = "N"))

  #recode geno in factors with same levels
Marie Bourdon's avatar
Marie Bourdon committed
70
71
  geno <- geno %>% mutate(allele_1 = factor(allele_1,levels=c("A","C","G","H","N","T")))
  geno <- geno %>% mutate(allele_2 = factor(allele_2,levels=c("A","C","G","H","N","T")))
Marie Bourdon's avatar
Marie Bourdon committed
72
73
74
75
76
77



  #recode genotypes depending on parents' genotypes
  geno <- geno %>% mutate(Geno = case_when(
    #if one allele not genotyped:
Marie Bourdon's avatar
Marie Bourdon committed
78
    allele_1=="N" | allele_2=="N" ~ "NA",
Marie Bourdon's avatar
Marie Bourdon committed
79
80
81

    #if both alleles genotyped
    ##homozygous 0
Marie Bourdon's avatar
Marie Bourdon committed
82
    allele_1==allele_2 & allele_1==!!sym(par1) ~ "0",
Marie Bourdon's avatar
Marie Bourdon committed
83
    ##homozygous 2
Marie Bourdon's avatar
Marie Bourdon committed
84
    allele_1==allele_2 & allele_1==!!sym(par2) ~ "2",
Marie Bourdon's avatar
Marie Bourdon committed
85
    ##heterozygous
Marie Bourdon's avatar
Marie Bourdon committed
86
    allele_1!=allele_2 ~ "1",
Marie Bourdon's avatar
Marie Bourdon committed
87
88
89
90
91
92
93
94
95
96
97
98
99
100

    #if parental strains are N/H
    ##homozygous for parent that is N/H
    ###homozygous 0
    (!!sym(par1)%in%c("H","N") | !!sym(par2)%in%c("H","N")) &
      !!sym(par1)%in%c("H","N") ~ "0",
    ###homozygous 2
    (!!sym(par1)%in%c("H","N") | !!sym(par2)%in%c("H","N")) &
      !!sym(par2)%in%c("H","N") ~ "2",
  )
  )


  #keep positions of markers
Marie Bourdon's avatar
Marie Bourdon committed
101
  markers <- geno %>% select(marker,chr,!!sym(pos)) %>% distinct()
Marie Bourdon's avatar
Marie Bourdon committed
102
  markers <- markers %>% arrange(chr,!!sym(pos))
Marie Bourdon's avatar
Marie Bourdon committed
103
104
105


  #keep only interesting columns in geno file
Marie Bourdon's avatar
Marie Bourdon committed
106
  geno <- geno %>% arrange(chr,!!sym(pos))
Marie Bourdon's avatar
Marie Bourdon committed
107
  geno <- geno %>% select(marker,id,Geno)
Marie Bourdon's avatar
Marie Bourdon committed
108
109

  #remove prefix
Marie Bourdon's avatar
Marie Bourdon committed
110
  geno <- geno %>% mutate(id=str_remove(id,prefix))
Marie Bourdon's avatar
Marie Bourdon committed
111
112

  #keep only non excluded markers and merge with positions
Marie Bourdon's avatar
Marie Bourdon committed
113
  markers <- markers %>% mutate(marker=as.character(marker))
Marie Bourdon's avatar
Marie Bourdon committed
114
  markers <- markers %>% mutate(chr=as.character(chr))
Marie Bourdon's avatar
Marie Bourdon committed
115
  geno <- markers %>% select(marker,chr,!!sym(pos)) %>% full_join(.,geno,by="marker")
Marie Bourdon's avatar
Marie Bourdon committed
116
117
118


  #pivoting
Marie Bourdon's avatar
Marie Bourdon committed
119
120
121
  geno <- geno %>% pivot_wider(names_from = c(marker,chr,!!sym(pos)),values_from = Geno,names_sep=",")
  geno <- geno %>% mutate(id=as.character(id))
  geno <- geno %>% rename("id,,"=id)
Marie Bourdon's avatar
Marie Bourdon committed
122
123
124
125
126


  #merge with phenotype file
  pheno <- pheno %>% mutate_all(as.character)
  colnames(pheno) <- str_c(colnames(pheno),",,")
Marie Bourdon's avatar
Marie Bourdon committed
127
  qtl_file <- right_join(pheno,geno,by=c("Ind,,"="id,,"))
Marie Bourdon's avatar
Marie Bourdon committed
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143

  #prepare file
  qtl_file <- rbind(colnames(qtl_file),qtl_file)
  qtl_file <- separate_rows(qtl_file,everything(),sep=",")
  colnames(qtl_file) <- qtl_file[1,]
  qtl_file <- qtl_file %>% slice(-1)

  if(is.na(path)==FALSE){
    write.csv(qtl_file,file=path,quote=FALSE,row.names = FALSE)
  }




  return(qtl_file)
}