Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
mouselab
stuart
Commits
070023a0
Commit
070023a0
authored
Jul 21, 2021
by
mariefbourdon
Browse files
modif tab_mark to add chr and pos
parent
1367ad7d
Changes
37
Hide whitespace changes
Inline
Side-by-side
.Rhistory
View file @
070023a0
annot_mini
<-
read.csv
(
url
(
"https://raw.githubusercontent.com/kbroman/MUGAarrays/master/UWisc/mini_uwisc_v2.csv"
))
data
(
genos
)
summary
(
genos
)
data
(
phenos
)
summary
(
phenos
)
strains
<-
geno_strains
(
ref
=
annot_mini
,
geno
=
genos
,
par1
=
c
(
"StrainsA_1"
,
"StrainsA_2"
),
par2
=
c
(
"StrainsB_1"
,
"StrainsB_2"
),
name1
=
"parent1"
,
name2
=
"parent2"
)
head
(
strains
)
%>%
print.data.frame
()
genos
<-
genos
%>%
filter
(
!
Sample.ID
%in%
c
(
"StrainsA_1"
,
"StrainsA_2"
,
"StrainsB_1"
,
"StrainsB_2"
))
data
(
stuart_tab
)
summary
(
stuart_tab
)
tab2
<-
mark_match
(
stuart_tab
,
ref
=
strains
)
tab2
%>%
filter
(
exclude_match
==
1
)
%>%
print.data.frame
()
tab2
<-
mark_poly
(
tab2
)
head
(
tab2
)
%>%
print.data.frame
()
tab2
<-
mark_prop
(
tab2
,
cross
=
"F2"
,
homo
=
0.1
,
hetero
=
0.1
)
head
(
tab2
)
%>%
print.data.frame
()
tab2
<-
mark_allele
(
tab
=
tab2
,
ref
=
strains
,
par1
=
"parent1"
,
par2
=
"parent2"
)
tab2
%>%
arrange
(
desc
(
exclude_allele
))
%>%
head
()
%>%
print.data.frame
()
strains
%>%
filter
(
marker
%in%
c
(
"gJAX00038569"
,
"gJAX00425031"
,
"gUNC12245354"
,
"gUNC15530876"
,
"gUNC21555204"
,
"gUNC21596600"
))
%>%
arrange
(
marker
)
%>%
select
(
marker
,
parent1
,
parent2
)
%>%
print.data.frame
()
rqtl_file
<-
write_rqtl
(
geno
=
genos
,
pheno
=
phenos
,
tab
=
tab2
,
ref
=
strains
,
par1
=
"parent1"
,
par2
=
"parent2"
,
prefix
=
"ind_"
,
pos
=
"cM_cox"
,
path
=
"rqtl_file.csv"
)
rqtl_file
[
1
:
10
,
1
:
7
]
%>%
print.data.frame
()
bad_markers
<-
mark_estmap
(
map
=
newmap_after
,
dist
=
100
)
tab2
%>%
filter
(
marker
%in%
c
(
"S6J010381992"
,
"S6J205609960"
))
View
(
annot_mini
)
annot_mini
%>%
filter
(
marker
%in%
c
(
"S6J010381992"
,
"S6J205609960"
))
# annot_mini %>% filter(marker %in% bad_markers)
View
(
annot_mini
)
View
(
strains
)
strains
%>%
filter
(
marker
%in%
c
(
"S6J010381992"
,
"S6J205609960"
))
data
(
stuart_cross
)
devtools
::
build
(
path
=
"."
,
vignettes
=
FALSE
)
devtools
::
build_vignettes
()
devtools
::
build_manual
(
path
=
"."
)
knitr
::
opts_chunk
$
set
(
collapse
=
TRUE
,
comment
=
"#>"
)
library
(
dplyr
)
library
(
stuart
)
annot_mini
<-
read.csv
(
url
(
"https://raw.githubusercontent.com/kbroman/MUGAarrays/master/UWisc/mini_uwisc_v2.csv"
))
data
(
genos
)
summary
(
genos
)
data
(
phenos
)
summary
(
phenos
)
strains
<-
geno_strains
(
ref
=
annot_mini
,
geno
=
genos
,
par1
=
c
(
"StrainsA_1"
,
"StrainsA_2"
),
par2
=
c
(
"StrainsB_1"
,
"StrainsB_2"
),
name1
=
"parent1"
,
name2
=
"parent2"
)
head
(
strains
)
%>%
print.data.frame
()
genos
<-
genos
%>%
filter
(
!
Sample.ID
%in%
c
(
"StrainsA_1"
,
"StrainsA_2"
,
"StrainsB_1"
,
"StrainsB_2"
))
data
(
stuart_tab
)
summary
(
stuart_tab
)
tab2
<-
mark_match
(
stuart_tab
,
ref
=
strains
)
tab2
%>%
filter
(
exclude_match
==
1
)
%>%
print.data.frame
()
tab2
<-
mark_poly
(
tab2
)
head
(
tab2
)
%>%
print.data.frame
()
tab2
<-
mark_prop
(
tab2
,
cross
=
"F2"
,
homo
=
0.1
,
hetero
=
0.1
)
head
(
tab2
)
%>%
print.data.frame
()
mark_prop
(
tab2
,
cross
=
"F2"
,
pval
=
0.05
)
%>%
head
()
%>%
print.data.frame
()
a
<-
mark_prop
(
tab2
,
cross
=
"F2"
,
pval
=
0.05
)
View
(
tab2
)
View
(
a
)
identical
(
a
,
tab2
)
library
(
stuart
)
devtools
::
build
(
path
=
"."
,
vignettes
=
FALSE
)
devtools
::
build_vignettes
()
devtools
::
build_manual
(
path
=
"."
)
library
(
stuart
)
devtools
::
build_vignettes
()
test
<-
tibble
(
sample.id
=
c
(
1
,
2
,
3
),
pheno
=
c
(
4
,
5
,
6
))
library
(
dplyr
)
library
(
dplyr
)
test
<-
tibble
(
sample.id
=
c
(
1
,
2
,
3
),
pheno
=
c
(
4
,
5
,
6
))
View
(
test
)
test
%>%
rename
(
1
=
"id"
)
test
%>%
rename
(
"id"
=
1
)
library
(
dplyr
)
library
(
stuart
)
write_rqtl
<-
function
(
geno
,
pheno
,
tab
,
ref
,
par1
,
par2
,
par_N
=
TRUE
,
prefix
,
pos
,
path
=
NA
){
#create geno column in geno df
geno
<-
geno
%>%
unite
(
Geno
,
c
(
"allele_1"
,
"allele_2"
),
sep
=
""
,
remove
=
FALSE
)
#recode genotypes to have all heterozygous encoded the same way (ex: only "AT", no "TA")
geno
<-
geno
%>%
mutate
(
Geno
=
recode
(
Geno
,
"TA"
=
"AT"
,
"GA"
=
"AG"
,
"CA"
=
"AC"
,
"GT"
=
"TG"
,
"CT"
=
"TC"
,
"GC"
=
"CG"
))
#create df with counts for each genotype
df_count
<-
tibble
(
marker
=
as.character
(
unique
(
geno
$
marker
)),
allele_1
=
NA
,
allele_2
=
NA
,
n_HM1
=
NA
,
n_HM2
=
NA
,
n_HT
=
NA
,
n_NA
=
NA
)
## loop to count genotype
for
(
i
in
df_count
$
marker
){
#extract alleles for each marker
Alleles
<-
geno
%>%
filter
(
marker
==
i
)
%>%
select
(
c
(
marker
,
id
,
Geno
,
allele_1
,
allele_2
))
%>%
pivot_longer
(
c
(
allele_1
,
allele_2
),
names_to
=
"Allele_name"
,
values_to
=
"Allele"
)
%>%
distinct
(
Allele
)
%>%
filter
(
Allele
!=
"-"
)
Alleles
<-
as.factor
(
paste
(
Alleles
$
Allele
))
#sort alleles
Alleles
<-
factor
(
Alleles
,
levels
=
c
(
"A"
,
"T"
,
"C"
,
"G"
))
Alleles
<-
sort
(
Alleles
)
#add alleles and counts, only for markers with alleles (not markers with no genotyped ind)
if
(
all
(
rapportools
::
is.empty
(
Alleles
))
==
FALSE
){
#add alleles to df_count
df_count
<-
df_count
%>%
mutate
(
allele_1
=
ifelse
(
marker
==
i
,
paste
(
Alleles
[
1
]),
allele_1
))
#count for homozygous for allele 1
n1
<-
geno
%>%
filter
(
marker
==
i
)
%>%
filter
(
Geno
==
paste
(
Alleles
[
1
],
Alleles
[
1
],
sep
=
""
))
%>%
summarise
(
n
=
n
())
#add count for homozygous for allele 1 to df_count
df_count
<-
df_count
%>%
mutate
(
n_HM1
=
ifelse
(
marker
==
i
,
n1
$
n
,
n_HM1
))
}
#if marker not polymorphic
if
(
is.na
(
Alleles
[
2
])
==
TRUE
){
#NA as allele_2
df_count
<-
df_count
%>%
mutate
(
allele_2
=
ifelse
(
marker
==
i
,
NA
,
allele_2
))
#NA as n_HM2
df_count
<-
df_count
%>%
mutate
(
n_HM2
=
ifelse
(
marker
==
i
,
NA
,
n_HM2
))
#NA as n_HT
df_count
<-
df_count
%>%
mutate
(
n_HT
=
ifelse
(
marker
==
i
,
NA
,
n_HT
))
}
else
{
#add alleles to df_count
df_count
<-
df_count
%>%
mutate
(
allele_2
=
ifelse
(
marker
==
i
,
paste
(
Alleles
[
2
]),
allele_2
))
#count for homozygous for allele 2
n2
<-
geno
%>%
filter
(
marker
==
i
)
%>%
filter
(
Geno
==
paste
(
Alleles
[
2
],
Alleles
[
2
],
sep
=
""
))
%>%
summarise
(
n
=
n
())
#add count for homozygous for allele 1 to df_count
df_count
<-
df_count
%>%
mutate
(
n_HM2
=
ifelse
(
marker
==
i
,
n2
$
n
,
n_HM2
))
#count for heterozygous
n3
<-
geno
%>%
filter
(
marker
==
i
)
%>%
filter
(
Geno
==
paste
(
Alleles
[
1
],
Alleles
[
2
],
sep
=
""
))
%>%
summarise
(
n
=
n
())
#add count for homozygous for allele 1 to df_count
df_count
<-
df_count
%>%
mutate
(
n_HT
=
ifelse
(
marker
==
i
,
n3
$
n
,
n_HT
))
}
#count for NA
n4
<-
geno
%>%
filter
(
marker
==
i
)
%>%
filter
(
Geno
==
"--"
|
Geno
==
paste
(
Alleles
[
1
],
"-"
,
sep
=
""
)
|
Geno
==
paste
(
Alleles
[
2
],
"-"
,
sep
=
""
)
|
Geno
==
paste
(
"-"
,
Alleles
[
1
],
sep
=
""
)
|
Geno
==
paste
(
"-"
,
Alleles
[
2
],
sep
=
""
))
%>%
summarise
(
n
=
n
())
#add count for NA to df_count
df_count
<-
df_count
%>%
mutate
(
n_NA
=
ifelse
(
marker
==
i
,
n4
$
n
,
n_NA
))
}
#change class of counts as numeric :
df_count
$
n_HM1
<-
df_count
$
n_HM1
%>%
as.numeric
()
df_count
$
n_HM2
<-
df_count
$
n_HM2
%>%
as.numeric
()
df_count
$
n_HT
<-
df_count
$
n_HT
%>%
as.numeric
()
df_count
$
n_NA
<-
df_count
$
n_NA
%>%
as.numeric
()
#add 0 for null counts
df_count
<-
df_count
%>%
mutate_at
(
.vars
=
vars
(
n_HM1
,
n_HM2
,
n_HT
,
n_NA
),
~
replace
(
.
,
is.na
(
.
),
0
))
#save useful columns in annot dataframe
annot
<-
annot
%>%
select
(
marker
,
chr
,
!!
sym
(
pos
))
print
(
annot
)
#return
return
(
df_count
)
}
tab_mark
(
genos
,
annot_mini
,
"cM_cox"
)
library
(
tidyr
)
tab_mark
(
genos
,
annot_mini
,
"cM_cox"
)
tab_mark
<-
function
(
geno
,
annot
,
pos
){
#rename df columns
geno
<-
geno
%>%
rename
(
"marker"
=
1
,
"id"
=
2
,
"allele_1"
=
3
,
"allele_2"
=
4
)
#extract snps non excluded
if
(
"exclude_match"
%in%
colnames
(
tab
)){
tab
<-
tab
%>%
filter
(
exclude_match
==
0
)
#create geno column in geno df
geno
<-
geno
%>%
unite
(
Geno
,
c
(
"allele_1"
,
"allele_2"
),
sep
=
""
,
remove
=
FALSE
)
#recode genotypes to have all heterozygous encoded the same way (ex: only "AT", no "TA")
geno
<-
geno
%>%
mutate
(
Geno
=
recode
(
Geno
,
"TA"
=
"AT"
,
"GA"
=
"AG"
,
"CA"
=
"AC"
,
"GT"
=
"TG"
,
"CT"
=
"TC"
,
"GC"
=
"CG"
))
#create df with counts for each genotype
df_count
<-
tibble
(
marker
=
as.character
(
unique
(
geno
$
marker
)),
allele_1
=
NA
,
allele_2
=
NA
,
n_HM1
=
NA
,
n_HM2
=
NA
,
n_HT
=
NA
,
n_NA
=
NA
)
## loop to count genotype
for
(
i
in
df_count
$
marker
){
#extract alleles for each marker
Alleles
<-
geno
%>%
filter
(
marker
==
i
)
%>%
select
(
c
(
marker
,
id
,
Geno
,
allele_1
,
allele_2
))
%>%
pivot_longer
(
c
(
allele_1
,
allele_2
),
names_to
=
"Allele_name"
,
values_to
=
"Allele"
)
%>%
distinct
(
Allele
)
%>%
filter
(
Allele
!=
"-"
)
Alleles
<-
as.factor
(
paste
(
Alleles
$
Allele
))
#sort alleles
Alleles
<-
factor
(
Alleles
,
levels
=
c
(
"A"
,
"T"
,
"C"
,
"G"
))
Alleles
<-
sort
(
Alleles
)
#add alleles and counts, only for markers with alleles (not markers with no genotyped ind)
if
(
all
(
rapportools
::
is.empty
(
Alleles
))
==
FALSE
){
#add alleles to df_count
df_count
<-
df_count
%>%
mutate
(
allele_1
=
ifelse
(
marker
==
i
,
paste
(
Alleles
[
1
]),
allele_1
))
#count for homozygous for allele 1
n1
<-
geno
%>%
filter
(
marker
==
i
)
%>%
filter
(
Geno
==
paste
(
Alleles
[
1
],
Alleles
[
1
],
sep
=
""
))
%>%
summarise
(
n
=
n
())
#add count for homozygous for allele 1 to df_count
df_count
<-
df_count
%>%
mutate
(
n_HM1
=
ifelse
(
marker
==
i
,
n1
$
n
,
n_HM1
))
}
if
(
"exclude_poly"
%in%
colnames
(
tab
)){
tab
<-
tab
%>%
filter
(
exclude_poly
==
0
)
#if marker not polymorphic
if
(
is.na
(
Alleles
[
2
])
==
TRUE
){
#NA as allele_2
df_count
<-
df_count
%>%
mutate
(
allele_2
=
ifelse
(
marker
==
i
,
NA
,
allele_2
))
#NA as n_HM2
df_count
<-
df_count
%>%
mutate
(
n_HM2
=
ifelse
(
marker
==
i
,
NA
,
n_HM2
))
#NA as n_HT
df_count
<-
df_count
%>%
mutate
(
n_HT
=
ifelse
(
marker
==
i
,
NA
,
n_HT
))
}
else
{
#add alleles to df_count
df_count
<-
df_count
%>%
mutate
(
allele_2
=
ifelse
(
marker
==
i
,
paste
(
Alleles
[
2
]),
allele_2
))
#count for homozygous for allele 2
n2
<-
geno
%>%
filter
(
marker
==
i
)
%>%
filter
(
Geno
==
paste
(
Alleles
[
2
],
Alleles
[
2
],
sep
=
""
))
%>%
summarise
(
n
=
n
())
#add count for homozygous for allele 1 to df_count
df_count
<-
df_count
%>%
mutate
(
n_HM2
=
ifelse
(
marker
==
i
,
n2
$
n
,
n_HM2
))
#count for heterozygous
n3
<-
geno
%>%
filter
(
marker
==
i
)
%>%
filter
(
Geno
==
paste
(
Alleles
[
1
],
Alleles
[
2
],
sep
=
""
))
%>%
summarise
(
n
=
n
())
#add count for homozygous for allele 1 to df_count
df_count
<-
df_count
%>%
mutate
(
n_HT
=
ifelse
(
marker
==
i
,
n3
$
n
,
n_HT
))
}
if
(
"exclude_prop"
%in%
colnames
(
tab
)){
tab
<-
tab
%>%
filter
(
exclude_prop
==
0
)
#count for NA
n4
<-
geno
%>%
filter
(
marker
==
i
)
%>%
filter
(
Geno
==
"--"
|
Geno
==
paste
(
Alleles
[
1
],
"-"
,
sep
=
""
)
|
Geno
==
paste
(
Alleles
[
2
],
"-"
,
sep
=
""
)
|
Geno
==
paste
(
"-"
,
Alleles
[
1
],
sep
=
""
)
|
Geno
==
paste
(
"-"
,
Alleles
[
2
],
sep
=
""
))
%>%
summarise
(
n
=
n
())
#add count for NA to df_count
df_count
<-
df_count
%>%
mutate
(
n_NA
=
ifelse
(
marker
==
i
,
n4
$
n
,
n_NA
))
}
if
(
"exclude_allele"
%in%
colnames
(
tab
)){
tab
<-
tab
%>%
filter
(
exclude_allele
==
0
)
#change class of counts as numeric :
df_count
$
n_HM1
<-
df_count
$
n_HM1
%>%
as.numeric
()
df_count
$
n_HM2
<-
df_count
$
n_HM2
%>%
as.numeric
()
df_count
$
n_HT
<-
df_count
$
n_HT
%>%
as.numeric
()
df_count
$
n_NA
<-
df_count
$
n_NA
%>%
as.numeric
()
#add 0 for null counts
df_count
<-
df_count
%>%
mutate_at
(
.vars
=
vars
(
n_HM1
,
n_HM2
,
n_HT
,
n_NA
),
~
replace
(
.
,
is.na
(
.
),
0
))
#save useful columns in annot dataframe
annot
<-
annot
%>%
select
(
marker
,
chr
,
!!
sym
(
pos
))
tab
<-
left_join
(
tab
,
annot
)
#return
return
(
df_count
)
}
#filter genotypes for non excluded markers in geno file
geno
<-
geno
%>%
select
(
c
(
marker
,
id
,
allele_1
,
allele_2
))
%>%
filter
(
marker
%in%
tab
$
marker
)
#recode parents' names to match column names nomenclature
par1
<-
make.names
(
par1
)
par2
<-
make.names
(
par2
)
#keep parental lines genotypes
colnames
(
ref
)
<-
make.names
(
colnames
(
ref
))
ref
<-
ref
%>%
select
(
marker
,
chr
,
!!
sym
(
pos
),
!!
sym
(
par1
),
!!
sym
(
par2
))
#merge genotypes with parents
geno
<-
left_join
(
geno
,
ref
,
by
=
c
(
"marker"
=
"marker"
))
#remove snps with no position
geno
<-
geno
%>%
filter
(
is.na
(
chr
)
==
FALSE
)
%>%
filter
(
is.na
(
!!
sym
(
pos
))
==
FALSE
)
#recode "-" in "N" in geno file
geno
<-
geno
%>%
mutate
(
allele_1
=
recode
(
allele_1
,
"-"
=
"N"
))
geno
<-
geno
%>%
mutate
(
allele_2
=
recode
(
allele_2
,
"-"
=
"N"
))
#recode geno in factors with same levels
geno
<-
geno
%>%
mutate
(
allele_1
=
factor
(
allele_1
,
levels
=
c
(
"A"
,
"C"
,
"G"
,
"H"
,
"N"
,
"T"
)))
geno
<-
geno
%>%
mutate
(
allele_2
=
factor
(
allele_2
,
levels
=
c
(
"A"
,
"C"
,
"G"
,
"H"
,
"N"
,
"T"
)))
#recode genotypes depending on parents' genotypes
geno
<-
geno
%>%
mutate
(
Geno
=
case_when
(
#if one allele not genotyped:
allele_1
==
"N"
|
allele_2
==
"N"
~
"NA"
,
#if both alleles genotyped
##homozygous 0
allele_1
==
allele_2
&
allele_1
==!!
sym
(
par1
)
~
"0"
,
##homozygous 2
allele_1
==
allele_2
&
allele_1
==!!
sym
(
par2
)
~
"2"
,
##heterozygous
allele_1
!=
allele_2
~
"1"
,
#if parental strains are N/H
##homozygous for parent that is N/H
###homozygous 0
(
!!
sym
(
par1
)
%in%
c
(
"H"
,
"N"
)
|
!!
sym
(
par2
)
%in%
c
(
"H"
,
"N"
))
&
!!
sym
(
par1
)
%in%
c
(
"H"
,
"N"
)
~
"0"
,
###homozygous 2
(
!!
sym
(
par1
)
%in%
c
(
"H"
,
"N"
)
|
!!
sym
(
par2
)
%in%
c
(
"H"
,
"N"
))
&
!!
sym
(
par2
)
%in%
c
(
"H"
,
"N"
)
~
"2"
)
)
#keep positions of markers
markers
<-
geno
%>%
select
(
marker
,
chr
,
!!
sym
(
pos
))
%>%
distinct
()
markers
<-
markers
%>%
arrange
(
chr
,
!!
sym
(
pos
))
#keep only interesting columns in geno file
geno
<-
geno
%>%
arrange
(
chr
,
!!
sym
(
pos
))
geno
<-
geno
%>%
select
(
marker
,
id
,
Geno
)
#remove prefix
geno
<-
geno
%>%
mutate
(
id
=
str_remove
(
id
,
prefix
))
#keep only non excluded markers and merge with positions
markers
<-
markers
%>%
mutate
(
marker
=
as.character
(
marker
))
markers
<-
markers
%>%
mutate
(
chr
=
as.character
(
chr
))
geno
<-
markers
%>%
select
(
marker
,
chr
,
!!
sym
(
pos
))
%>%
full_join
(
.
,
geno
,
by
=
"marker"
)
#pivoting
geno
<-
geno
%>%
pivot_wider
(
names_from
=
c
(
marker
,
chr
,
!!
sym
(
pos
)),
values_from
=
Geno
,
names_sep
=
","
)
geno
<-
geno
%>%
mutate
(
id
=
as.character
(
id
))
geno
<-
geno
%>%
rename
(
"id,,"
=
id
)
#merge with phenotype file
pheno
<-
pheno
%>%
rename
(
"id"
=
1
)
pheno
<-
pheno
%>%
mutate_all
(
as.character
)
colnames
(
pheno
)
<-
str_c
(
colnames
(
pheno
),
",,"
)
qtl_file
<-
right_join
(
pheno
,
geno
,
by
=
c
(
"id,,"
=
"id,,"
))
#prepare file
qtl_file
<-
rbind
(
colnames
(
qtl_file
),
qtl_file
)
qtl_file
<-
separate_rows
(
qtl_file
,
everything
(),
sep
=
","
)
colnames
(
qtl_file
)
<-
qtl_file
[
1
,]
qtl_file
<-
qtl_file
%>%
slice
(
-1
)
if
(
is.na
(
path
)
==
FALSE
){
write.csv
(
qtl_file
,
file
=
path
,
quote
=
FALSE
,
row.names
=
FALSE
)
tab_mark
(
genos
,
annot_mini
,
"cM_cox"
)
tab_mark
<-
function
(
geno
,
annot
,
pos
){
#rename df columns
geno
<-
geno
%>%
rename
(
"marker"
=
1
,
"id"
=
2
,
"allele_1"
=
3
,
"allele_2"
=
4
)
#create geno column in geno df
geno
<-
geno
%>%
unite
(
Geno
,
c
(
"allele_1"
,
"allele_2"
),
sep
=
""
,
remove
=
FALSE
)
#recode genotypes to have all heterozygous encoded the same way (ex: only "AT", no "TA")
geno
<-
geno
%>%
mutate
(
Geno
=
recode
(
Geno
,
"TA"
=
"AT"
,
"GA"
=
"AG"
,
"CA"
=
"AC"
,
"GT"
=
"TG"
,
"CT"
=
"TC"
,
"GC"
=
"CG"
))
#create df with counts for each genotype
tab
<-
tibble
(
marker
=
as.character
(
unique
(
geno
$
marker
)),
allele_1
=
NA
,
allele_2
=
NA
,
n_HM1
=
NA
,
n_HM2
=
NA
,
n_HT
=
NA
,
n_NA
=
NA
)
## loop to count genotype
for
(
i
in
tab
$
marker
){
#extract alleles for each marker
Alleles
<-
geno
%>%
filter
(
marker
==
i
)
%>%
select
(
c
(
marker
,
id
,
Geno
,
allele_1
,
allele_2
))
%>%
pivot_longer
(
c
(
allele_1
,
allele_2
),
names_to
=
"Allele_name"
,
values_to
=
"Allele"
)
%>%
distinct
(
Allele
)
%>%
filter
(
Allele
!=
"-"
)
Alleles
<-
as.factor
(
paste
(
Alleles
$
Allele
))
#sort alleles
Alleles
<-
factor
(
Alleles
,
levels
=
c
(
"A"
,
"T"
,
"C"
,
"G"
))
Alleles
<-
sort
(
Alleles
)
#add alleles and counts, only for markers with alleles (not markers with no genotyped ind)
if
(
all
(
rapportools
::
is.empty
(
Alleles
))
==
FALSE
){
#add alleles to tab
tab
<-
tab
%>%
mutate
(
allele_1
=
ifelse
(
marker
==
i
,
paste
(
Alleles
[
1
]),
allele_1
))
#count for homozygous for allele 1
n1
<-
geno
%>%
filter
(
marker
==
i
)
%>%
filter
(
Geno
==
paste
(
Alleles
[
1
],
Alleles
[
1
],
sep
=
""
))
%>%
summarise
(
n
=
n
())
#add count for homozygous for allele 1 to tab
tab
<-
tab
%>%
mutate
(
n_HM1
=
ifelse
(
marker
==
i
,
n1
$
n
,
n_HM1
))
}
return
(
qtl_file
)
#if marker not polymorphic
if
(
is.na
(
Alleles
[
2
])
==
TRUE
){
#NA as allele_2
tab
<-
tab
%>%
mutate
(
allele_2
=
ifelse
(
marker
==
i
,
NA
,
allele_2
))
#NA as n_HM2
tab
<-
tab
%>%
mutate
(
n_HM2
=
ifelse
(
marker
==
i
,
NA
,
n_HM2
))
#NA as n_HT
tab
<-
tab
%>%
mutate
(
n_HT
=
ifelse
(
marker
==
i
,
NA
,
n_HT
))
}
else
{
#add alleles to tab
tab
<-
tab
%>%
mutate
(
allele_2
=
ifelse
(
marker
==
i
,
paste
(
Alleles
[
2
]),
allele_2
))
#count for homozygous for allele 2
n2
<-
geno
%>%
filter
(
marker
==
i
)
%>%
filter
(
Geno
==
paste
(
Alleles
[
2
],
Alleles
[
2
],
sep
=
""
))
%>%
summarise
(
n
=
n
())
#add count for homozygous for allele 1 to tab
tab
<-
tab
%>%
mutate
(
n_HM2
=
ifelse
(
marker
==
i
,
n2
$
n
,
n_HM2
))
#count for heterozygous
n3
<-
geno
%>%
filter
(
marker
==
i
)
%>%
filter
(
Geno
==
paste
(
Alleles
[
1
],
Alleles
[
2
],
sep
=
""
))
%>%
summarise
(
n
=
n
())
#add count for homozygous for allele 1 to tab
tab
<-
tab
%>%
mutate
(
n_HT
=
ifelse
(
marker
==
i
,
n3
$
n
,
n_HT
))
}
data
(
phenos
)
summary
(
phenos
)
View
(
phenos
)
pheno
%>%
rename
(
"sample"
=
"Ind"
)
phenos
%>%
rename
(
"sample"
=
"Ind"
)
phenos
<-
pheno
%>%
rename
(
"sample"
=
"Ind"
)
phenos
<-
phenos
%>%
rename
(
"sample"
=
"Ind"
)
knitr
::
opts_chunk
$
set
(
collapse
=
TRUE
,
comment
=
"#>"
)
library
(
dplyr
)
library
(
stuart
)
annot_mini
<-
read.csv
(
url
(
"https://raw.githubusercontent.com/kbroman/MUGAarrays/master/UWisc/mini_uwisc_v2.csv"
))
data
(
genos
)
summary
(
genos
)
data
(
phenos
)
summary
(
phenos
)
strains
<-
geno_strains
(
ref
=
annot_mini
,
geno
=
genos
,
par1
=
c
(
"StrainsA_1"
,
"StrainsA_2"
),
par2
=
c
(
"StrainsB_1"
,
"StrainsB_2"
),
name1
=
"parent1"
,
name2
=
"parent2"
)
head
(
strains
)
%>%
print.data.frame
()
genos
<-
genos
%>%
filter
(
!
Sample.ID
%in%
c
(
"StrainsA_1"
,
"StrainsA_2"
,
"StrainsB_1"
,
"StrainsB_2"
))
data
(
stuart_tab
)
summary
(
stuart_tab
)
tab2
<-
mark_match
(
stuart_tab
,
ref
=
strains
)
tab2
%>%
filter
(
exclude_match
==
1
)
%>%
print.data.frame
()
tab2
<-
mark_poly
(
tab2
)
head
(
tab2
)
%>%
print.data.frame
()
tab2
<-
mark_prop
(
tab2
,
cross
=
"F2"
,
homo
=
0.1
,
hetero
=
0.1
)
head
(
tab2
)
%>%
print.data.frame
()
mark_prop
(
tab2
,
cross
=
"F2"
,
pval
=
0.05
)
%>%
head
()
%>%
print.data.frame
()
tab2
<-
mark_allele
(
tab
=
tab2
,
ref
=
strains
,
par1
=
"parent1"
,
par2
=
"parent2"
)
tab2
%>%
arrange
(
desc
(
exclude_allele
))
%>%
head
()
%>%
print.data.frame
()
strains
%>%
filter
(
marker
%in%
c
(
"gJAX00038569"
,
"gJAX00425031"
,
"gUNC12245354"
,
"gUNC15530876"
,
"gUNC21555204"
,
"gUNC21596600"
))
%>%
arrange
(
marker
)
%>%
select
(
marker
,
parent1
,
parent2
)
%>%
print.data.frame
()
rqtl_file
<-
write_rqtl
(
geno
=
genos
,
pheno
=
phenos
,
tab
=
tab2
,
ref
=
strains
,
par1
=
"parent1"
,
par2
=
"parent2"
,
prefix
=
"ind_"
,
pos
=
"cM_cox"
)
library
(
dplyr
)
library
(
stuart
)
rqtl_file
<-
write_rqtl
(
geno
=
genos
,
pheno
=
phenos
,
tab
=
tab2
,
ref
=
strains
,
par1
=
"parent1"
,
par2
=
"parent2"
,
prefix
=
"ind_"
,
pos
=
"cM_cox"
)
library
(
stuart
)
library
(
dplyr
)
library
(
stuart
)
rqtl_file
<-
write_rqtl
(
geno
=
genos
,
pheno
=
phenos
,
tab
=
tab2
,
ref
=
strains
,
par1
=
"parent1"
,
par2
=
"parent2"
,
prefix
=
"ind_"
,
pos
=
"cM_cox"
)
rqtl_file
<-
write_rqtl
(
geno
=
genos
,
pheno
=
phenos
,
tab
=
tab2
,
ref
=
strains
,
par1
=
"parent1"
,
par2
=
"parent2"
,
prefix
=
"ind_"
,
pos
=
"cM_cox"
)
library
(
stuart
)
library
(
stringr
)
rqtl_file
<-
write_rqtl
(
geno
=
genos
,
pheno
=
phenos
,
tab
=
tab2
,
ref
=
strains
,
par1
=
"parent1"
,
par2
=
"parent2"
,
prefix
=
"ind_"
,
pos
=
"cM_cox"
)
detach
(
"package:stuart"
,
unload
=
TRUE
)
install.packages
(
stuart
)
library
(
tidyverse
)
rqtl_file
<-
write_rqtl
(
geno
=
genos
,
pheno
=
phenos
,
tab
=
tab2
,
ref
=
strains
,
par1
=
"parent1"
,
par2
=
"parent2"
,
prefix
=
"ind_"
,
pos
=
"cM_cox"
)
write_rqtl
(
geno
=
genos
,
pheno
=
phenos
,
tab
=
tab2
,
ref
=
strains
,
par1
=
"parent1"
,
par2
=
"parent2"
,
prefix
=
"ind_"
,
pos
=
"cM_cox"
)
detach
(
"package:stringr"
,
unload
=
TRUE
)
detach
(
"package:tibble"
,
unload
=
TRUE
)
detach
(
"package:tidyr"
,
unload
=
TRUE
)
detach
(
"package:tidyverse"
,
unload
=
TRUE
)
detach
(
"package:readr"
,
unload
=
TRUE
)
detach
(
"package:purrr"
,
unload
=
TRUE
)
detach
(
"package:forcats"
,
unload
=
TRUE
)
detach
(
"package:ggplot2"
,
unload
=
TRUE
)
detach
(
"package:dplyr"
,
unload
=
TRUE
)
devtools
::
build
(
path
=
"."
,
vignettes
=
FALSE
)
devtools
::
build_vignettes
()
devtools
::
build_manual
(
as.package
())
devtools
::
build_manual
(
)
library
(
stuart
)
knitr
::
opts_chunk
$
set
(
collapse
=
TRUE
,
comment
=
"#>"
)
library
(
dplyr
)
library
(
stuart
)
annot_mini
<-
read.csv
(
url
(
"https://raw.githubusercontent.com/kbroman/MUGAarrays/master/UWisc/mini_uwisc_v2.csv"
))
data
(
genos
)
summary
(
genos
)
data
(
phenos
)
summary
(
phenos
)
strains
<-
geno_strains
(
ref
=
annot_mini
,
geno
=
genos
,
par1
=
c
(
"StrainsA_1"
,
"StrainsA_2"
),
par2
=
c
(
"StrainsB_1"
,
"StrainsB_2"
),
name1
=
"parent1"
,
name2
=
"parent2"
)
head
(
strains
)
%>%
print.data.frame
()
genos
<-
genos
%>%
filter
(
!
Sample.ID
%in%
c
(
"StrainsA_1"
,
"StrainsA_2"
,
"StrainsB_1"
,
"StrainsB_2"
))
# how to use the function:
# stuart_tab <- tab_mark(genos)
data
(
stuart_tab
)
summary
(
stuart_tab
)
tab2
<-
mark_match
(
stuart_tab
,
ref
=
strains
)
tab2
%>%
filter
(
exclude_match
==
1
)
%>%
print.data.frame
()
tab2
<-
mark_poly
(
tab2
)
head
(
tab2
)
%>%
print.data.frame
()
tab2
<-
mark_prop
(
tab2
,
cross
=
"F2"
,
homo
=
0.1
,
hetero
=
0.1
)
head
(
tab2
)
%>%
print.data.frame
()
mark_prop
(
tab2
,
cross
=
"F2"
,
pval
=
0.05
)
%>%
head
()
%>%
print.data.frame
()
tab2
<-
mark_allele
(
tab
=
tab2
,
ref
=
strains
,
par1
=
"parent1"
,
par2
=
"parent2"
)
tab2
%>%
arrange
(
desc
(
exclude_allele
))
%>%
head
()
%>%
print.data.frame
()
strains
%>%
filter
(
marker
%in%
c
(
"gJAX00038569"
,
"gJAX00425031"
,
"gUNC12245354"
,
"gUNC15530876"
,
"gUNC21555204"
,
"gUNC21596600"
))
%>%
arrange
(
marker
)
%>%
select
(
marker
,
parent1
,
parent2
)
%>%
print.data.frame
()
stuart_cross
<-
write_rqtl
(
geno
=
genos
,
pheno
=
phenos
,
tab
=
tab2
,
ref
=
strains
,
par1
=
"parent1"
,
par2
=
"parent2"
,
prefix
=
"ind_"
,
pos
=
"cM_cox"
)
stuart_cross
[
1
:
10
,
1
:
7
]
%>%
print.data.frame
()
knitr
::
opts_chunk
$
set
(
collapse
=
TRUE
,
comment
=
"#>"
)
library
(
dplyr
)
library
(
stuart
)
annot_mini
<-
read.csv
(
url
(
"https://raw.githubusercontent.com/kbroman/MUGAarrays/master/UWisc/mini_uwisc_v2.csv"
))
data
(
genos
)
summary
(
genos
)
data
(
phenos
)
summary
(
phenos
)
strains
<-
geno_strains
(
ref
=
annot_mini
,
geno
=
genos
,
par1
=
c
(
"StrainsA_1"
,
"StrainsA_2"
),
par2
=
c
(
"StrainsB_1"
,
"StrainsB_2"
),
name1
=
"parent1"
,
name2
=
"parent2"
)
head
(
strains
)
%>%
print.data.frame
()
genos
<-
genos
%>%
filter
(
!
Sample.ID
%in%
c
(
"StrainsA_1"
,
"StrainsA_2"
,
"StrainsB_1"
,
"StrainsB_2"
))
# how to use the function:
# stuart_tab <- tab_mark(genos)
data
(
stuart_tab
)
summary
(
stuart_tab
)