mark_prop.R 5.39 KB
Newer Older
Marie  BOURDON's avatar
Marie BOURDON committed
1
2
#' @title Exclude markers depending on proportions of homo/hetorozygous
#'
3
#' @description This function uses the dataframe produced by the tab_mark function and fills the "exclude" column for all the markers that present too much missing genotypes or odd proportions of each genotype. You can define these proportions thanks to the arguments of the function. The filter on genotype proportions applies to autosomes only (and not the chromosomes encoded as "X", "Y" and "M")
Marie Bourdon's avatar
Marie Bourdon committed
4
5
6
7
#' @param tab data frame obtained with tab_mark function.
#' @param cross F2 or N2.
#' @param homo proportion of homozygous individuals under which the marker is excluded. Will apply on both homozygous genotypes for a F2, but only on one for N2.
#' @param hetero proportion of heterozygous individuals under which the marker is excluded.
Marie Bourdon's avatar
Marie Bourdon committed
8
9
10
#' @param homo1X a vector of two numbers. The lower and upper limits for the proportion of homozygous individuals for markers on X chromosome. This argument is for homozygous genotype with the highest expected proportion.
#' @param homo2X a vector of two numbers. The lower and upper limits for the proportion of homozygous individuals for markers on X chromosome. This argument is for homozygous genotype with the lowest expected proportion.
#' @param heteroX a vector of two numbers. The lower and upper limits for the proportion of heterozygous individuals for markers on X chromosome.
Marie Bourdon's avatar
Marie Bourdon committed
11
#' @param na proportion of non-genotyped individuals above which the marker is excluded.
Marie  BOURDON's avatar
Marie BOURDON committed
12
13
#'
#' @import dplyr
Marie Bourdon's avatar
Marie Bourdon committed
14
#' @import tidyselect
Marie  BOURDON's avatar
Marie BOURDON committed
15
16
17
18
19
20
#'
#' @export
#'

#### mark_prop ####
## excludes markers depending on proportions of homo/hetorozygous
Marie Bourdon's avatar
Marie Bourdon committed
21
mark_prop <- function(tab,cross,homo=NA,hetero=NA,pval=NA,homo1X=NA,homo2X=NA,heteroX=NA,na=0.5){
Marie Bourdon's avatar
Marie Bourdon committed
22
23
  #calculate total number of individuals genotyped for each marker
  tab <- tab %>% mutate(n_geno = (n_HM1 + n_HM2 + n_HT))
mariefbourdon's avatar
mariefbourdon committed
24
25


Marie  BOURDON's avatar
Marie BOURDON committed
26
27
28
29
30
  #stop if cross != "F2" or "N2"
  if(!cross %in% c("F2","N2")){
    stop("Cross must be F2 or N2")
  }

mariefbourdon's avatar
mariefbourdon committed
31
32
33
  #stop of homo&hetero or pval not specified
  if((is.na(homo)==TRUE | is.na(hetero)==TRUE) & is.na(pval)==TRUE){
    stop("Arguments homo and hetero or argument pval must be specified")
Marie  BOURDON's avatar
Marie BOURDON committed
34
35
  }

Marie Bourdon's avatar
Marie Bourdon committed
36
37
38
39
  #warn if chrX not used
  if(is.na(homo1X)==TRUE & is.na(homo2X)==TRUE & is.na(heteroX)==TRUE){
    warning("Arguments homo1X, homo1X and heteroX are not specified: proportions for markers on X chromosomes will not be tested")
  }
mariefbourdon's avatar
mariefbourdon committed
40
41
42


  #stop with prop of na
Marie Bourdon's avatar
Marie Bourdon committed
43
44
45
  #calculate proportion
  tab <- tab %>% mutate(p_NA = n_NA/(n_geno+n_NA))

mariefbourdon's avatar
mariefbourdon committed
46
47
48
49
  tab <- tab %>%
    mutate(exclude_prop=case_when(p_NA > na ~ 1,
                                  T ~ 0))

Marie Bourdon's avatar
Marie Bourdon committed
50
51
52
  #stock colnames to join
  names <- colnames(tab)

Marie Bourdon's avatar
Marie Bourdon committed
53
  #exclude with prop of homo/hetero
mariefbourdon's avatar
mariefbourdon committed
54
  if(is.na(pval)==TRUE){
Marie Bourdon's avatar
Marie Bourdon committed
55
56
57
58
59
    #calculate proportion of each genotype
    tab <- tab %>% mutate(p_HM1 = n_HM1/n_geno)
    tab <- tab %>% mutate(p_HM2 = n_HM2/n_geno)
    tab <- tab %>% mutate(p_HT = n_HT/n_geno)

mariefbourdon's avatar
mariefbourdon committed
60
    tab <- tab %>%
Marie Bourdon's avatar
Marie Bourdon committed
61
62
63
64
65
66
67
68
69
70
71
      mutate(exclude_prop=case_when(p_NA > na ~ 1,
                                    !chr %in% c("X","Y","M") & cross=="F2" & (p_HM1 < homo | p_HM2 < homo | p_HT < hetero) ~ 1,
                                    !chr %in% c("X","Y","M") & cross=="N2" & ((p_HM2 == 0 & p_HM1 < homo) |
                                                                                (p_HM1 == 0 & p_HM2 < homo) |
                                                                                (p_HT < hetero) |
                                                                                (p_HM1 !=0 & p_HM2 != 0)) ~ 1,
                                    chr == "X" & p_HM1 >= p_HM2 & (!between(p_HM1,homo1X[1],homo1X[2]) |
                                                                     !between(p_HM2,homo2X[1],homo2X[2])) ~ 1,
                                    chr == "X" & p_HM2 > p_HM1 & (!between(p_HM2,homo1X[1],homo1X[2]) |
                                                                    !between(p_HM1,homo2X[1],homo2X[2])) ~ 1,
                                    chr == "X" & !between(p_HT,heteroX[1],heteroX[2]) ~ 1,
mariefbourdon's avatar
mariefbourdon committed
72
73
                                    T ~ exclude_prop
      ))
Marie  BOURDON's avatar
Marie BOURDON committed
74

Marie Bourdon's avatar
Marie Bourdon committed
75
    print(tab)
Marie Bourdon's avatar
Marie Bourdon committed
76
    #exclude with pval chisq.test
Marie Bourdon's avatar
Marie Bourdon committed
77
78
79
80
  } else if(is.na(pval)==FALSE){

    #if cross F2
    if(cross=="F2"){
Marie Bourdon's avatar
Marie Bourdon committed
81
      #chisq test
Marie Bourdon's avatar
Marie Bourdon committed
82
83
84
85
86
      tab <- tab %>% filter(p_NA != 1) %>% rowwise() %>%
        mutate(.,chi_pval = tibble(n_HM1,n_HM2,n_HT) %>%
                 chisq.test(p=c(0.25,0.25,0.5)) %>% .$p.value) %>%
        full_join(.,tab,by=all_of(names))

Marie Bourdon's avatar
Marie Bourdon committed
87
      #convert in score
Marie Bourdon's avatar
Marie Bourdon committed
88
      tab <- tab %>% mutate(exclude_prop=case_when(!chr %in% c("X","Y","M") & chi_pval < pval ~ 1,
Marie Bourdon's avatar
Marie Bourdon committed
89
90
91
92
                                                   T ~ exclude_prop))

      #if cross N2
    } else if(cross=="N2"){
Marie Bourdon's avatar
Marie Bourdon committed
93
94
95
96
97
98

      #stock homozygous data in a new column (either n_HM1 or n-<hm2)
      tab <- tab %>% mutate(n_hm = case_when(n_HM1>n_HM2 ~ n_HM1,
                                             T ~ n_HM2))

      #chisq test
Marie Bourdon's avatar
Marie Bourdon committed
99
      tab <- tab %>% filter(p_NA != 1) %>% rowwise() %>%
Marie Bourdon's avatar
Marie Bourdon committed
100
101
        mutate(.,chi_pval =chisq.test(x=c(n_hm,n_HT),p=c(0.5,0.5)) %>% .$p.value) %>%
        full_join(.,tab,by=c(all_of(names),"n_hm"))
Marie Bourdon's avatar
Marie Bourdon committed
102

Marie Bourdon's avatar
Marie Bourdon committed
103
      #convert in score
Marie Bourdon's avatar
Marie Bourdon committed
104
105
      tab <- tab %>% mutate(exclude_prop=case_when(!chr %in% c("X","Y","M") & chi_pval < pval ~ 1,
                                                   !chr %in% c("X","Y","M") & (n_HM1 !=0 & n_HM2 != 0) ~ 1,
Marie Bourdon's avatar
Marie Bourdon committed
106
107
                                                   T ~ exclude_prop))
    }
mariefbourdon's avatar
mariefbourdon committed
108
109
110


  }
Marie Bourdon's avatar
Marie Bourdon committed
111
112
  tab <- tab %>% select(all_of(names)) %>% select(-c(n_geno,p_NA))
  return(tab)
Marie  BOURDON's avatar
Marie BOURDON committed
113
}