Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
mouselab
stuart
Commits
df999311
Commit
df999311
authored
Jul 22, 2021
by
Marie Bourdon
Browse files
start ignoring .Rhistory
parent
070023a0
Changes
2
Hide whitespace changes
Inline
Side-by-side
.Rhistory
deleted
100644 → 0
View file @
070023a0
#create geno column in geno df
geno
<-
geno
%>%
unite
(
Geno
,
c
(
"allele_1"
,
"allele_2"
),
sep
=
""
,
remove
=
FALSE
)
#recode genotypes to have all heterozygous encoded the same way (ex: only "AT", no "TA")
geno
<-
geno
%>%
mutate
(
Geno
=
recode
(
Geno
,
"TA"
=
"AT"
,
"GA"
=
"AG"
,
"CA"
=
"AC"
,
"GT"
=
"TG"
,
"CT"
=
"TC"
,
"GC"
=
"CG"
))
#create df with counts for each genotype
df_count
<-
tibble
(
marker
=
as.character
(
unique
(
geno
$
marker
)),
allele_1
=
NA
,
allele_2
=
NA
,
n_HM1
=
NA
,
n_HM2
=
NA
,
n_HT
=
NA
,
n_NA
=
NA
)
## loop to count genotype
for
(
i
in
df_count
$
marker
){
#extract alleles for each marker
Alleles
<-
geno
%>%
filter
(
marker
==
i
)
%>%
select
(
c
(
marker
,
id
,
Geno
,
allele_1
,
allele_2
))
%>%
pivot_longer
(
c
(
allele_1
,
allele_2
),
names_to
=
"Allele_name"
,
values_to
=
"Allele"
)
%>%
distinct
(
Allele
)
%>%
filter
(
Allele
!=
"-"
)
Alleles
<-
as.factor
(
paste
(
Alleles
$
Allele
))
#sort alleles
Alleles
<-
factor
(
Alleles
,
levels
=
c
(
"A"
,
"T"
,
"C"
,
"G"
))
Alleles
<-
sort
(
Alleles
)
#add alleles and counts, only for markers with alleles (not markers with no genotyped ind)
if
(
all
(
rapportools
::
is.empty
(
Alleles
))
==
FALSE
){
#add alleles to df_count
df_count
<-
df_count
%>%
mutate
(
allele_1
=
ifelse
(
marker
==
i
,
paste
(
Alleles
[
1
]),
allele_1
))
#count for homozygous for allele 1
n1
<-
geno
%>%
filter
(
marker
==
i
)
%>%
filter
(
Geno
==
paste
(
Alleles
[
1
],
Alleles
[
1
],
sep
=
""
))
%>%
summarise
(
n
=
n
())
#add count for homozygous for allele 1 to df_count
df_count
<-
df_count
%>%
mutate
(
n_HM1
=
ifelse
(
marker
==
i
,
n1
$
n
,
n_HM1
))
}
#if marker not polymorphic
if
(
is.na
(
Alleles
[
2
])
==
TRUE
){
#NA as allele_2
df_count
<-
df_count
%>%
mutate
(
allele_2
=
ifelse
(
marker
==
i
,
NA
,
allele_2
))
#NA as n_HM2
df_count
<-
df_count
%>%
mutate
(
n_HM2
=
ifelse
(
marker
==
i
,
NA
,
n_HM2
))
#NA as n_HT
df_count
<-
df_count
%>%
mutate
(
n_HT
=
ifelse
(
marker
==
i
,
NA
,
n_HT
))
}
else
{
#add alleles to df_count
df_count
<-
df_count
%>%
mutate
(
allele_2
=
ifelse
(
marker
==
i
,
paste
(
Alleles
[
2
]),
allele_2
))
#count for homozygous for allele 2
n2
<-
geno
%>%
filter
(
marker
==
i
)
%>%
filter
(
Geno
==
paste
(
Alleles
[
2
],
Alleles
[
2
],
sep
=
""
))
%>%
summarise
(
n
=
n
())
#add count for homozygous for allele 1 to df_count
df_count
<-
df_count
%>%
mutate
(
n_HM2
=
ifelse
(
marker
==
i
,
n2
$
n
,
n_HM2
))
#count for heterozygous
n3
<-
geno
%>%
filter
(
marker
==
i
)
%>%
filter
(
Geno
==
paste
(
Alleles
[
1
],
Alleles
[
2
],
sep
=
""
))
%>%
summarise
(
n
=
n
())
#add count for homozygous for allele 1 to df_count
df_count
<-
df_count
%>%
mutate
(
n_HT
=
ifelse
(
marker
==
i
,
n3
$
n
,
n_HT
))
}
#count for NA
n4
<-
geno
%>%
filter
(
marker
==
i
)
%>%
filter
(
Geno
==
"--"
|
Geno
==
paste
(
Alleles
[
1
],
"-"
,
sep
=
""
)
|
Geno
==
paste
(
Alleles
[
2
],
"-"
,
sep
=
""
)
|
Geno
==
paste
(
"-"
,
Alleles
[
1
],
sep
=
""
)
|
Geno
==
paste
(
"-"
,
Alleles
[
2
],
sep
=
""
))
%>%
summarise
(
n
=
n
())
#add count for NA to df_count
df_count
<-
df_count
%>%
mutate
(
n_NA
=
ifelse
(
marker
==
i
,
n4
$
n
,
n_NA
))
}
#change class of counts as numeric :
df_count
$
n_HM1
<-
df_count
$
n_HM1
%>%
as.numeric
()
df_count
$
n_HM2
<-
df_count
$
n_HM2
%>%
as.numeric
()
df_count
$
n_HT
<-
df_count
$
n_HT
%>%
as.numeric
()
df_count
$
n_NA
<-
df_count
$
n_NA
%>%
as.numeric
()
#add 0 for null counts
df_count
<-
df_count
%>%
mutate_at
(
.vars
=
vars
(
n_HM1
,
n_HM2
,
n_HT
,
n_NA
),
~
replace
(
.
,
is.na
(
.
),
0
))
#save useful columns in annot dataframe
annot
<-
annot
%>%
select
(
marker
,
chr
,
!!
sym
(
pos
))
print
(
annot
)
#return
return
(
df_count
)
}
tab_mark
(
genos
,
annot_mini
,
"cM_cox"
)
library
(
tidyr
)
tab_mark
(
genos
,
annot_mini
,
"cM_cox"
)
tab_mark
<-
function
(
geno
,
annot
,
pos
){
#rename df columns
geno
<-
geno
%>%
rename
(
"marker"
=
1
,
"id"
=
2
,
"allele_1"
=
3
,
"allele_2"
=
4
)
#create geno column in geno df
geno
<-
geno
%>%
unite
(
Geno
,
c
(
"allele_1"
,
"allele_2"
),
sep
=
""
,
remove
=
FALSE
)
#recode genotypes to have all heterozygous encoded the same way (ex: only "AT", no "TA")
geno
<-
geno
%>%
mutate
(
Geno
=
recode
(
Geno
,
"TA"
=
"AT"
,
"GA"
=
"AG"
,
"CA"
=
"AC"
,
"GT"
=
"TG"
,
"CT"
=
"TC"
,
"GC"
=
"CG"
))
#create df with counts for each genotype
df_count
<-
tibble
(
marker
=
as.character
(
unique
(
geno
$
marker
)),
allele_1
=
NA
,
allele_2
=
NA
,
n_HM1
=
NA
,
n_HM2
=
NA
,
n_HT
=
NA
,
n_NA
=
NA
)
## loop to count genotype
for
(
i
in
df_count
$
marker
){
#extract alleles for each marker
Alleles
<-
geno
%>%
filter
(
marker
==
i
)
%>%
select
(
c
(
marker
,
id
,
Geno
,
allele_1
,
allele_2
))
%>%
pivot_longer
(
c
(
allele_1
,
allele_2
),
names_to
=
"Allele_name"
,
values_to
=
"Allele"
)
%>%
distinct
(
Allele
)
%>%
filter
(
Allele
!=
"-"
)
Alleles
<-
as.factor
(
paste
(
Alleles
$
Allele
))
#sort alleles
Alleles
<-
factor
(
Alleles
,
levels
=
c
(
"A"
,
"T"
,
"C"
,
"G"
))
Alleles
<-
sort
(
Alleles
)
#add alleles and counts, only for markers with alleles (not markers with no genotyped ind)
if
(
all
(
rapportools
::
is.empty
(
Alleles
))
==
FALSE
){
#add alleles to df_count
df_count
<-
df_count
%>%
mutate
(
allele_1
=
ifelse
(
marker
==
i
,
paste
(
Alleles
[
1
]),
allele_1
))
#count for homozygous for allele 1
n1
<-
geno
%>%
filter
(
marker
==
i
)
%>%
filter
(
Geno
==
paste
(
Alleles
[
1
],
Alleles
[
1
],
sep
=
""
))
%>%
summarise
(
n
=
n
())
#add count for homozygous for allele 1 to df_count
df_count
<-
df_count
%>%
mutate
(
n_HM1
=
ifelse
(
marker
==
i
,
n1
$
n
,
n_HM1
))
}
#if marker not polymorphic
if
(
is.na
(
Alleles
[
2
])
==
TRUE
){
#NA as allele_2
df_count
<-
df_count
%>%
mutate
(
allele_2
=
ifelse
(
marker
==
i
,
NA
,
allele_2
))
#NA as n_HM2
df_count
<-
df_count
%>%
mutate
(
n_HM2
=
ifelse
(
marker
==
i
,
NA
,
n_HM2
))
#NA as n_HT
df_count
<-
df_count
%>%
mutate
(
n_HT
=
ifelse
(
marker
==
i
,
NA
,
n_HT
))
}
else
{
#add alleles to df_count
df_count
<-
df_count
%>%
mutate
(
allele_2
=
ifelse
(
marker
==
i
,
paste
(
Alleles
[
2
]),
allele_2
))
#count for homozygous for allele 2
n2
<-
geno
%>%
filter
(
marker
==
i
)
%>%
filter
(
Geno
==
paste
(
Alleles
[
2
],
Alleles
[
2
],
sep
=
""
))
%>%
summarise
(
n
=
n
())
#add count for homozygous for allele 1 to df_count
df_count
<-
df_count
%>%
mutate
(
n_HM2
=
ifelse
(
marker
==
i
,
n2
$
n
,
n_HM2
))
#count for heterozygous
n3
<-
geno
%>%
filter
(
marker
==
i
)
%>%
filter
(
Geno
==
paste
(
Alleles
[
1
],
Alleles
[
2
],
sep
=
""
))
%>%
summarise
(
n
=
n
())
#add count for homozygous for allele 1 to df_count
df_count
<-
df_count
%>%
mutate
(
n_HT
=
ifelse
(
marker
==
i
,
n3
$
n
,
n_HT
))
}
#count for NA
n4
<-
geno
%>%
filter
(
marker
==
i
)
%>%
filter
(
Geno
==
"--"
|
Geno
==
paste
(
Alleles
[
1
],
"-"
,
sep
=
""
)
|
Geno
==
paste
(
Alleles
[
2
],
"-"
,
sep
=
""
)
|
Geno
==
paste
(
"-"
,
Alleles
[
1
],
sep
=
""
)
|
Geno
==
paste
(
"-"
,
Alleles
[
2
],
sep
=
""
))
%>%
summarise
(
n
=
n
())
#add count for NA to df_count
df_count
<-
df_count
%>%
mutate
(
n_NA
=
ifelse
(
marker
==
i
,
n4
$
n
,
n_NA
))
}
#change class of counts as numeric :
df_count
$
n_HM1
<-
df_count
$
n_HM1
%>%
as.numeric
()
df_count
$
n_HM2
<-
df_count
$
n_HM2
%>%
as.numeric
()
df_count
$
n_HT
<-
df_count
$
n_HT
%>%
as.numeric
()
df_count
$
n_NA
<-
df_count
$
n_NA
%>%
as.numeric
()
#add 0 for null counts
df_count
<-
df_count
%>%
mutate_at
(
.vars
=
vars
(
n_HM1
,
n_HM2
,
n_HT
,
n_NA
),
~
replace
(
.
,
is.na
(
.
),
0
))
#save useful columns in annot dataframe
annot
<-
annot
%>%
select
(
marker
,
chr
,
!!
sym
(
pos
))
tab
<-
left_join
(
tab
,
annot
)
#return
return
(
df_count
)
}
tab_mark
(
genos
,
annot_mini
,
"cM_cox"
)
tab_mark
<-
function
(
geno
,
annot
,
pos
){
#rename df columns
geno
<-
geno
%>%
rename
(
"marker"
=
1
,
"id"
=
2
,
"allele_1"
=
3
,
"allele_2"
=
4
)
#create geno column in geno df
geno
<-
geno
%>%
unite
(
Geno
,
c
(
"allele_1"
,
"allele_2"
),
sep
=
""
,
remove
=
FALSE
)
#recode genotypes to have all heterozygous encoded the same way (ex: only "AT", no "TA")
geno
<-
geno
%>%
mutate
(
Geno
=
recode
(
Geno
,
"TA"
=
"AT"
,
"GA"
=
"AG"
,
"CA"
=
"AC"
,
"GT"
=
"TG"
,
"CT"
=
"TC"
,
"GC"
=
"CG"
))
#create df with counts for each genotype
tab
<-
tibble
(
marker
=
as.character
(
unique
(
geno
$
marker
)),
allele_1
=
NA
,
allele_2
=
NA
,
n_HM1
=
NA
,
n_HM2
=
NA
,
n_HT
=
NA
,
n_NA
=
NA
)
## loop to count genotype
for
(
i
in
tab
$
marker
){
#extract alleles for each marker
Alleles
<-
geno
%>%
filter
(
marker
==
i
)
%>%
select
(
c
(
marker
,
id
,
Geno
,
allele_1
,
allele_2
))
%>%
pivot_longer
(
c
(
allele_1
,
allele_2
),
names_to
=
"Allele_name"
,
values_to
=
"Allele"
)
%>%
distinct
(
Allele
)
%>%
filter
(
Allele
!=
"-"
)
Alleles
<-
as.factor
(
paste
(
Alleles
$
Allele
))
#sort alleles
Alleles
<-
factor
(
Alleles
,
levels
=
c
(
"A"
,
"T"
,
"C"
,
"G"
))
Alleles
<-
sort
(
Alleles
)
#add alleles and counts, only for markers with alleles (not markers with no genotyped ind)
if
(
all
(
rapportools
::
is.empty
(
Alleles
))
==
FALSE
){
#add alleles to tab
tab
<-
tab
%>%
mutate
(
allele_1
=
ifelse
(
marker
==
i
,
paste
(
Alleles
[
1
]),
allele_1
))
#count for homozygous for allele 1
n1
<-
geno
%>%
filter
(
marker
==
i
)
%>%
filter
(
Geno
==
paste
(
Alleles
[
1
],
Alleles
[
1
],
sep
=
""
))
%>%
summarise
(
n
=
n
())
#add count for homozygous for allele 1 to tab
tab
<-
tab
%>%
mutate
(
n_HM1
=
ifelse
(
marker
==
i
,
n1
$
n
,
n_HM1
))
}
#if marker not polymorphic
if
(
is.na
(
Alleles
[
2
])
==
TRUE
){
#NA as allele_2
tab
<-
tab
%>%
mutate
(
allele_2
=
ifelse
(
marker
==
i
,
NA
,
allele_2
))
#NA as n_HM2
tab
<-
tab
%>%
mutate
(
n_HM2
=
ifelse
(
marker
==
i
,
NA
,
n_HM2
))
#NA as n_HT
tab
<-
tab
%>%
mutate
(
n_HT
=
ifelse
(
marker
==
i
,
NA
,
n_HT
))
}
else
{
#add alleles to tab
tab
<-
tab
%>%
mutate
(
allele_2
=
ifelse
(
marker
==
i
,
paste
(
Alleles
[
2
]),
allele_2
))
#count for homozygous for allele 2
n2
<-
geno
%>%
filter
(
marker
==
i
)
%>%
filter
(
Geno
==
paste
(
Alleles
[
2
],
Alleles
[
2
],
sep
=
""
))
%>%
summarise
(
n
=
n
())
#add count for homozygous for allele 1 to tab
tab
<-
tab
%>%
mutate
(
n_HM2
=
ifelse
(
marker
==
i
,
n2
$
n
,
n_HM2
))
#count for heterozygous
n3
<-
geno
%>%
filter
(
marker
==
i
)
%>%
filter
(
Geno
==
paste
(
Alleles
[
1
],
Alleles
[
2
],
sep
=
""
))
%>%
summarise
(
n
=
n
())
#add count for homozygous for allele 1 to tab
tab
<-
tab
%>%
mutate
(
n_HT
=
ifelse
(
marker
==
i
,
n3
$
n
,
n_HT
))
}
#count for NA
n4
<-
geno
%>%
filter
(
marker
==
i
)
%>%
filter
(
Geno
==
"--"
|
Geno
==
paste
(
Alleles
[
1
],
"-"
,
sep
=
""
)
|
Geno
==
paste
(
Alleles
[
2
],
"-"
,
sep
=
""
)
|
Geno
==
paste
(
"-"
,
Alleles
[
1
],
sep
=
""
)
|
Geno
==
paste
(
"-"
,
Alleles
[
2
],
sep
=
""
))
%>%
summarise
(
n
=
n
())
#add count for NA to tab
tab
<-
tab
%>%
mutate
(
n_NA
=
ifelse
(
marker
==
i
,
n4
$
n
,
n_NA
))
}
#change class of counts as numeric :
tab
$
n_HM1
<-
tab
$
n_HM1
%>%
as.numeric
()
tab
$
n_HM2
<-
tab
$
n_HM2
%>%
as.numeric
()
tab
$
n_HT
<-
tab
$
n_HT
%>%
as.numeric
()
tab
$
n_NA
<-
tab
$
n_NA
%>%
as.numeric
()
#add 0 for null counts
tab
<-
tab
%>%
mutate_at
(
.vars
=
vars
(
n_HM1
,
n_HM2
,
n_HT
,
n_NA
),
~
replace
(
.
,
is.na
(
.
),
0
))
#save useful columns in annot dataframe
annot
<-
annot
%>%
select
(
marker
,
chr
,
!!
sym
(
pos
))
tab
<-
left_join
(
tab
,
annot
)
#return
return
(
tab
)
}
tab_mark
(
genos
,
annot_mini
,
"cM_cox"
)
tab_mark
<-
function
(
geno
,
annot
,
pos
){
#rename df columns
geno
<-
geno
%>%
rename
(
"marker"
=
1
,
"id"
=
2
,
"allele_1"
=
3
,
"allele_2"
=
4
)
#create geno column in geno df
geno
<-
geno
%>%
unite
(
Geno
,
c
(
"allele_1"
,
"allele_2"
),
sep
=
""
,
remove
=
FALSE
)
#recode genotypes to have all heterozygous encoded the same way (ex: only "AT", no "TA")
geno
<-
geno
%>%
mutate
(
Geno
=
recode
(
Geno
,
"TA"
=
"AT"
,
"GA"
=
"AG"
,
"CA"
=
"AC"
,
"GT"
=
"TG"
,
"CT"
=
"TC"
,
"GC"
=
"CG"
))
#create df with counts for each genotype
tab
<-
tibble
(
marker
=
as.character
(
unique
(
geno
$
marker
)),
allele_1
=
NA
,
allele_2
=
NA
,
n_HM1
=
NA
,
n_HM2
=
NA
,
n_HT
=
NA
,
n_NA
=
NA
)
## loop to count genotype
for
(
i
in
tab
$
marker
){
#extract alleles for each marker
Alleles
<-
geno
%>%
filter
(
marker
==
i
)
%>%
select
(
c
(
marker
,
id
,
Geno
,
allele_1
,
allele_2
))
%>%
pivot_longer
(
c
(
allele_1
,
allele_2
),
names_to
=
"Allele_name"
,
values_to
=
"Allele"
)
%>%
distinct
(
Allele
)
%>%
filter
(
Allele
!=
"-"
)
Alleles
<-
as.factor
(
paste
(
Alleles
$
Allele
))
#sort alleles
Alleles
<-
factor
(
Alleles
,
levels
=
c
(
"A"
,
"T"
,
"C"
,
"G"
))
Alleles
<-
sort
(
Alleles
)
#add alleles and counts, only for markers with alleles (not markers with no genotyped ind)
if
(
all
(
rapportools
::
is.empty
(
Alleles
))
==
FALSE
){
#add alleles to tab
tab
<-
tab
%>%
mutate
(
allele_1
=
ifelse
(
marker
==
i
,
paste
(
Alleles
[
1
]),
allele_1
))
#count for homozygous for allele 1
n1
<-
geno
%>%
filter
(
marker
==
i
)
%>%
filter
(
Geno
==
paste
(
Alleles
[
1
],
Alleles
[
1
],
sep
=
""
))
%>%
summarise
(
n
=
n
())
#add count for homozygous for allele 1 to tab
tab
<-
tab
%>%
mutate
(
n_HM1
=
ifelse
(
marker
==
i
,
n1
$
n
,
n_HM1
))
}
#if marker not polymorphic
if
(
is.na
(
Alleles
[
2
])
==
TRUE
){
#NA as allele_2
tab
<-
tab
%>%
mutate
(
allele_2
=
ifelse
(
marker
==
i
,
NA
,
allele_2
))
#NA as n_HM2
tab
<-
tab
%>%
mutate
(
n_HM2
=
ifelse
(
marker
==
i
,
NA
,
n_HM2
))
#NA as n_HT
tab
<-
tab
%>%
mutate
(
n_HT
=
ifelse
(
marker
==
i
,
NA
,
n_HT
))
}
else
{
#add alleles to tab
tab
<-
tab
%>%
mutate
(
allele_2
=
ifelse
(
marker
==
i
,
paste
(
Alleles
[
2
]),
allele_2
))
#count for homozygous for allele 2
n2
<-
geno
%>%
filter
(
marker
==
i
)
%>%
filter
(
Geno
==
paste
(
Alleles
[
2
],
Alleles
[
2
],
sep
=
""
))
%>%
summarise
(
n
=
n
())
#add count for homozygous for allele 1 to tab
tab
<-
tab
%>%
mutate
(
n_HM2
=
ifelse
(
marker
==
i
,
n2
$
n
,
n_HM2
))
#count for heterozygous
n3
<-
geno
%>%
filter
(
marker
==
i
)
%>%
filter
(
Geno
==
paste
(
Alleles
[
1
],
Alleles
[
2
],
sep
=
""
))
%>%
summarise
(
n
=
n
())
#add count for homozygous for allele 1 to tab
tab
<-
tab
%>%
mutate
(
n_HT
=
ifelse
(
marker
==
i
,
n3
$
n
,
n_HT
))
}
#count for NA
n4
<-
geno
%>%
filter
(
marker
==
i
)
%>%
filter
(
Geno
==
"--"
|
Geno
==
paste
(
Alleles
[
1
],
"-"
,
sep
=
""
)
|
Geno
==
paste
(
Alleles
[
2
],
"-"
,
sep
=
""
)
|
Geno
==
paste
(
"-"
,
Alleles
[
1
],
sep
=
""
)
|
Geno
==
paste
(
"-"
,
Alleles
[
2
],
sep
=
""
))
%>%
summarise
(
n
=
n
())
#add count for NA to tab
tab
<-
tab
%>%
mutate
(
n_NA
=
ifelse
(
marker
==
i
,
n4
$
n
,
n_NA
))
}
#change class of counts as numeric :
tab
$
n_HM1
<-
tab
$
n_HM1
%>%
as.numeric
()
tab
$
n_HM2
<-
tab
$
n_HM2
%>%
as.numeric
()
tab
$
n_HT
<-
tab
$
n_HT
%>%
as.numeric
()
tab
$
n_NA
<-
tab
$
n_NA
%>%
as.numeric
()
#add 0 for null counts
tab
<-
tab
%>%
mutate_at
(
.vars
=
vars
(
n_HM1
,
n_HM2
,
n_HT
,
n_NA
),
~
replace
(
.
,
is.na
(
.
),
0
))
#save useful columns in annot dataframe
annot
<-
annot
%>%
select
(
marker
,
chr
,
!!
sym
(
pos
))
tab
<-
left_join
(
tab
,
annot
,
by
=
"marker"
)
#return
return
(
tab
)
}
tab_mark
(
genos
,
annot_mini
,
"cM_cox"
)
tab_mark
<-
function
(
geno
,
annot
,
pos
){
#rename df columns
geno
<-
geno
%>%
rename
(
"marker"
=
1
,
"id"
=
2
,
"allele_1"
=
3
,
"allele_2"
=
4
)
#create geno column in geno df
geno
<-
geno
%>%
unite
(
Geno
,
c
(
"allele_1"
,
"allele_2"
),
sep
=
""
,
remove
=
FALSE
)
#recode genotypes to have all heterozygous encoded the same way (ex: only "AT", no "TA")
geno
<-
geno
%>%
mutate
(
Geno
=
recode
(
Geno
,
"TA"
=
"AT"
,
"GA"
=
"AG"
,
"CA"
=
"AC"
,
"GT"
=
"TG"
,
"CT"
=
"TC"
,
"GC"
=
"CG"
))
#create df with counts for each genotype
tab
<-
tibble
(
marker
=
as.character
(
unique
(
geno
$
marker
)),
allele_1
=
NA
,
allele_2
=
NA
,
n_HM1
=
NA
,
n_HM2
=
NA
,
n_HT
=
NA
,
n_NA
=
NA
)
## loop to count genotype
for
(
i
in
tab
$
marker
){
#extract alleles for each marker
Alleles
<-
geno
%>%
filter
(
marker
==
i
)
%>%
select
(
c
(
marker
,
id
,
Geno
,
allele_1
,
allele_2
))
%>%
pivot_longer
(
c
(
allele_1
,
allele_2
),
names_to
=
"Allele_name"
,
values_to
=
"Allele"
)
%>%
distinct
(
Allele
)
%>%
filter
(
Allele
!=
"-"
)
Alleles
<-
as.factor
(
paste
(
Alleles
$
Allele
))
#sort alleles
Alleles
<-
factor
(
Alleles
,
levels
=
c
(
"A"
,
"T"
,
"C"
,
"G"
))
Alleles
<-
sort
(
Alleles
)
#add alleles and counts, only for markers with alleles (not markers with no genotyped ind)
if
(
all
(
rapportools
::
is.empty
(
Alleles
))
==
FALSE
){
#add alleles to tab
tab
<-
tab
%>%
mutate
(
allele_1
=
ifelse
(
marker
==
i
,
paste
(
Alleles
[
1
]),
allele_1
))
#count for homozygous for allele 1
n1
<-
geno
%>%
filter
(
marker
==
i
)
%>%
filter
(
Geno
==
paste
(
Alleles
[
1
],
Alleles
[
1
],
sep
=
""
))
%>%
summarise
(
n
=
n
())
#add count for homozygous for allele 1 to tab
tab
<-
tab
%>%
mutate
(
n_HM1
=
ifelse
(
marker
==
i
,
n1
$
n
,
n_HM1
))
}
#if marker not polymorphic
if
(
is.na
(
Alleles
[
2
])
==
TRUE
){
#NA as allele_2
tab
<-
tab
%>%
mutate
(
allele_2
=
ifelse
(
marker
==
i
,
NA
,
allele_2
))
#NA as n_HM2
tab
<-
tab
%>%
mutate
(
n_HM2
=
ifelse
(
marker
==
i
,
NA
,
n_HM2
))
#NA as n_HT
tab
<-
tab
%>%
mutate
(
n_HT
=
ifelse
(
marker
==
i
,
NA
,
n_HT
))
}
else
{
#add alleles to tab
tab
<-
tab
%>%
mutate
(
allele_2
=
ifelse
(
marker
==
i
,
paste
(
Alleles
[
2
]),
allele_2
))
#count for homozygous for allele 2
n2
<-
geno
%>%
filter
(
marker
==
i
)
%>%
filter
(
Geno
==
paste
(
Alleles
[
2
],
Alleles
[
2
],
sep
=
""
))
%>%
summarise
(
n
=
n
())
#add count for homozygous for allele 1 to tab
tab
<-
tab
%>%
mutate
(
n_HM2
=
ifelse
(
marker
==
i
,
n2
$
n
,
n_HM2
))
#count for heterozygous
n3
<-
geno
%>%
filter
(
marker
==
i
)
%>%
filter
(
Geno
==
paste
(
Alleles
[
1
],
Alleles
[
2
],
sep
=
""
))
%>%
summarise
(
n
=
n
())
#add count for homozygous for allele 1 to tab
tab
<-
tab
%>%
mutate
(
n_HT
=
ifelse
(
marker
==
i
,
n3
$
n
,
n_HT
))
}
#count for NA
n4
<-
geno
%>%
filter
(
marker
==
i
)
%>%
filter
(
Geno
==
"--"
|
Geno
==
paste
(
Alleles
[
1
],
"-"
,
sep
=
""
)
|
Geno
==
paste
(
Alleles
[
2
],
"-"
,
sep
=
""
)
|
Geno
==
paste
(
"-"
,
Alleles
[
1
],
sep
=
""
)
|
Geno
==
paste
(
"-"
,
Alleles
[
2
],
sep
=
""
))
%>%
summarise
(
n
=
n
())
#add count for NA to tab
tab
<-
tab
%>%
mutate
(
n_NA
=
ifelse
(
marker
==
i
,
n4
$
n
,
n_NA
))
}
#change class of counts as numeric :
tab
$
n_HM1
<-
tab
$
n_HM1
%>%
as.numeric
()
tab
$
n_HM2
<-
tab
$
n_HM2
%>%
as.numeric
()
tab
$
n_HT
<-
tab
$
n_HT
%>%
as.numeric
()
tab
$
n_NA
<-
tab
$
n_NA
%>%
as.numeric
()
#add 0 for null counts
tab
<-
tab
%>%
mutate_at
(
.vars
=
vars
(
n_HM1
,
n_HM2
,
n_HT
,
n_NA
),
~
replace
(
.
,
is.na
(
.
),
0
))
#save useful columns in annot dataframe
annot
<-
annot
%>%
select
(
marker
,
chr
,
!!
sym
(
pos
))
tab
<-
right_join
(
annot
,
tab
,
by
=
"marker"
)
#return
return
(
tab
)
}
tab_mark
(
genos
,
annot_mini
,
"cM_cox"
)
# how to use the function:
# stuart_tab <- tab_mark(genos,annot_mini,"cM_cox")
tab
<-
tab_mark
(
genos
,
annot_mini
,
"cM_cox"
)
View
(
tab
)
View
(
genos
)
View
(
tab
)
.gitignore
View file @
df999311
...
...
@@ -4,3 +4,4 @@ Meta
/doc/
/Meta/
.Rhistory
.Rhistory
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment