Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
B
bioinfo_utils
Manage
Activity
Members
Labels
Plan
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container registry
Model registry
Operate
Environments
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Blaise LI
bioinfo_utils
Commits
7d9b00ae
Commit
7d9b00ae
authored
4 years ago
by
Blaise Li
Browse files
Options
Downloads
Patches
Plain Diff
Saving old modifications (not used).
parent
d6d6f365
No related branches found
No related tags found
No related merge requests found
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
deseq2.bli.graphs/R/deseq2_graphs.R
+4
-2
4 additions, 2 deletions
deseq2.bli.graphs/R/deseq2_graphs.R
deseq2.bli.utils/R/deseq2_utils.R
+95
-34
95 additions, 34 deletions
deseq2.bli.utils/R/deseq2_utils.R
with
99 additions
and
36 deletions
deseq2.bli.graphs/R/deseq2_graphs.R
+
4
−
2
View file @
7d9b00ae
...
...
@@ -182,13 +182,15 @@ scatter_and_volcano <- function(exp_name, p_size=0.2) {
legend
(
"topright"
,
legend
=
c
(
"22G_down"
,
"22G_up"
),
pch
=
c
(
20
,
20
),
col
=
c
(
"blue"
,
"red"
),
cex
=
.7
)
}
#TODO: adapt this to any number of replicates (put scatterplot for pairs of replicates in a mfrwo(n, n))
#' Generating scatterplots between two replicates
#'
#' @param exp_name The name of the experiment
#' @param cond The condition
#' @export
replicates_scatterplot
<-
function
(
exp_name
,
cond
)
{
counts
<-
get_counts
(
exp_name
,
cond
,
count_type
=
"counts"
)
replicates_scatterplot
<-
function
(
exp_name
,
cond
,
sample_table
)
{
counts
<-
get_counts
(
exp_name
,
cond
,
sample_table
,
count_type
=
"counts"
)
tau
<-
cor
(
counts
[[
"1"
]],
counts
[[
"2"
]],
...
...
This diff is collapsed.
Click to expand it.
deseq2.bli.utils/R/deseq2_utils.R
+
95
−
34
View file @
7d9b00ae
...
...
@@ -66,7 +66,7 @@ load_dds <- function(exp_name, out_dir, sample_table, design_string, references)
print
(
"Loading pre-computed data"
)
load
(
data_filename
)
}
else
{
print
(
exp_name
)
flog.debug
(
exp_name
)
print
(
paste
(
"Reading count data for"
,
exp_name
))
#print(sample_table)
dds_raw
<-
DESeqDataSetFromHTSeqCount
(
...
...
@@ -171,7 +171,7 @@ after.under <- function(s) {
#' @param wormid2name A hashmap converting Wormbase ID to gene names
#' @export
make_subtable
<-
function
(
exp_name
,
ref
,
other
,
wormid2cosmid
,
wormid2name
)
{
#
print
(exp_name)
#
flog.debug
(exp_name)
fold_field
<-
quote
(
paste0
(
exp_name
,
"_log2FoldChange"
))
padj_field
<-
quote
(
paste0
(
exp_name
,
"_padj"
))
results_table
<-
data.frame
(
...
...
@@ -179,7 +179,7 @@ make_subtable <- function(exp_name, ref, other, wormid2cosmid, wormid2name) {
all_results
[,
eval
(
fold_field
)],
all_results
[,
eval
(
padj_field
)]
)
print
(
paste
(
"Renaming column names for"
,
exp_name
))
flog.debug
(
paste
(
"Renaming column names for"
,
exp_name
))
colnames
(
results_table
)
<-
c
(
"gene"
,
"log2FoldChange"
,
...
...
@@ -266,32 +266,56 @@ get_counts <- function(exp_name, cond, sample_table, count_type="counts"){
envir
=
parent.frame
()
)
)[[
count_type
]]
# http://stackoverflow.com/a/40746161/1878788
flog.debug
(
"counts_data is a:"
)
flog.debug
(
class
(
counts_data
))
flog.debug
(
"counts_data columns:"
)
flog.debug
(
colnames
(
counts_data
))
flog.debug
(
"sample_table columns:"
)
flog.debug
(
colnames
(
sample_table
))
flog.debug
(
"condition characteristics:"
)
flog.debug
(
names
(
cond
))
if
(
FALSE
)
{
print
(
"setting keys"
)
print
(
names
(
cond
)
)
# http://stackoverflow.com/a/40746161/1878788
flog.debug
(
"setting keys"
)
setkeyv
(
sample_table
,
names
(
cond
))
print
(
class
(
sample_table
))
print
(
names
(
sample_table
))
print
(
"getting library names"
)
flog.debug
(
class
(
sample_table
))
flog.debug
(
names
(
sample_table
))
flog.debug
(
"getting library names"
)
# Error in `[.data.frame`(x, i, j) : object 'lib' not found
lib_names
<-
sample_table
[
cond
,
lib
]
print
(
lib_names
)
flog.debug
(
lib_names
)
counts_1
<-
counts_data
[,
lib_names
[
1
]]
counts_2
<-
counts_data
[,
lib_names
[
2
]]
return
(
list
(
"1"
=
counts_1
,
"2"
=
counts_2
))
}
else
{
print
(
"Selecting columns corresponding to condition variables"
)
cond_variables
<-
sample_table
[,
names
(
cond
)]
print
(
cond_variables
)
print
(
"Generating boolean vector to select lines matching the condition"
)
flog.debug
(
"Selecting columns corresponding to condition variables"
)
# drop=FALSE because http://stackoverflow.com/questions/40867703
cond_variables
<-
sample_table
[,
names
(
cond
),
drop
=
FALSE
]
flog.debug
(
cond_variables
)
flog.debug
(
cond_variables
==
cond
)
flog.debug
(
"Generating boolean vector to select lines matching the condition"
)
selection_vector
<-
apply
((
cond_variables
==
cond
),
1
,
all
)
print
(
"Doing the selection"
)
flog.debug
(
"Doing the selection"
)
lines_with_condition
<-
sample_table
[
selection_vector
,]
print
(
"Getting the counts"
)
counts_1
<-
counts_data
[,
filter
(
lines_with_condition
,
replicate
==
"1"
)
%>%
select
(
lib
)
%>%
unlist
]
counts_2
<-
counts_data
[,
filter
(
lines_with_condition
,
replicate
==
"2"
)
%>%
select
(
lib
)
%>%
unlist
]
flog.debug
(
"Determining replicates list"
)
replicate_names
<-
levels
(
sample_table
$
replicate
)
flog.debug
(
"Pre-allocating counts list"
)
replicate_counts
<-
vector
(
mode
=
"list"
,
length
=
length
(
replicate_names
))
#counts_1 <- counts_data[, filter(lines_with_condition, replicate=="1") %>% select(lib) %>% unlist]
#counts_2 <- counts_data[, filter(lines_with_condition, replicate=="2") %>% select(lib) %>% unlist]
for
(
i
in
seq_along
(
replicate_names
))
{
rep
<-
replicate_names
[
i
]
flog.debug
(
paste
(
"Finding library name for replicate"
,
rep
))
# as.character: see comment under http://stackoverflow.com/a/15031603/1878788
lib_id
<-
as.character
(
filter
(
lines_with_condition
,
replicate
==
rep
)
%>%
select
(
lib
)
%>%
unlist
)
flog.debug
(
paste
(
"Getting the counts for library"
,
lib_id
))
replicate_counts
[[
i
]]
<-
counts_data
[,
lib_id
]
#replicate_counts[[rep]] <- counts_data[, filter(lines_with_condition, replicate==rep) %>% select(lib) %>% unlist]
}
return
(
list
(
"1"
=
counts_1
,
"2"
=
counts_2
))
names
(
replicate_counts
)
<-
replicate_names
return
(
replicate_counts
)
}
#return(list("1"=counts_1, "2"=counts_2))
}
#' Computes the means over replicates of two conditions
...
...
@@ -299,12 +323,12 @@ get_counts <- function(exp_name, cond, sample_table, count_type="counts"){
#' @param exp_name The name of the experiment
#' @param out_dir The directory in which to write the data
#' @param sample_table The corresponding sample table
#' @param ref The reference condition
#' @param other The compared condition
#' @param ref
_condition
The reference condition
#' @param other
_condition
The compared condition
#' @param wormid2cosmid A hashmap converting Wormbase ID to cosmid-derived ID
#' @param wormid2name A hashmap converting Wormbase ID to gene names
#' @export
compute_means
<-
function
(
exp_name
,
out_dir
,
sample_table
,
ref
,
other
,
wormid2cosmid
,
wormid2name
){
compute_means
<-
function
(
exp_name
,
out_dir
,
sample_table
,
ref
_condition
,
other_condition
,
wormid2cosmid
,
wormid2name
){
#mu_data <- assays(
# get(paste(exp_name, "dds", sep="_"),
# envir=parent.frame()
...
...
@@ -315,13 +339,14 @@ compute_means <- function(exp_name, out_dir, sample_table, ref, other, wormid2co
# Should we use the raw counts instead ("counts" instead of "mu")?
# We use "mu" because it may not make sense to compute an average between raw un-normalized counts.
ref_counts
<-
get_counts
(
exp_name
,
ref
,
sample_table
,
count_type
=
"mu"
)
other_counts
<-
get_counts
(
exp_name
,
other
,
sample_table
,
count_type
=
"mu"
)
ref_counts
<-
get_counts
(
exp_name
,
ref
_condition
,
sample_table
,
count_type
=
"mu"
)
other_counts
<-
get_counts
(
exp_name
,
other
_condition
,
sample_table
,
count_type
=
"mu"
)
mean_ref
<-
Reduce
(
`+`
,
ref_counts
)
/
length
(
ref_counts
)
mean_other
<-
Reduce
(
`+`
,
other_counts
)
/
length
(
other_counts
)
mean_ref_name
<-
paste
(
exp_name
,
"mean"
,
paste
(
ref
,
collapse
=
""
),
"counts"
,
sep
=
"_"
)
mean_other_name
<-
paste
(
exp_name
,
"mean"
,
paste
(
other
,
collapse
=
""
),
"counts"
,
sep
=
"_"
)
print
(
paste
(
"Assigning"
,
mean_ref_name
,
"and"
,
mean_other_name
))
#stopifnot(FALSE)
mean_ref_name
<-
paste
(
exp_name
,
"mean"
,
paste
(
ref_condition
,
collapse
=
""
),
"counts"
,
sep
=
"_"
)
mean_other_name
<-
paste
(
exp_name
,
"mean"
,
paste
(
other_condition
,
collapse
=
""
),
"counts"
,
sep
=
"_"
)
flog.debug
(
paste
(
"Assigning"
,
mean_ref_name
,
"and"
,
mean_other_name
))
assign
(
mean_ref_name
,
mean_ref
,
...
...
@@ -360,31 +385,33 @@ compute_means <- function(exp_name, out_dir, sample_table, ref, other, wormid2co
#'
#' @param exp_name The name of the experiment
#' @param out_dir The directory in which to write the data
#' @param ref The reference condition
#' @param other The compared condition
#' @param ref
_condition
The reference condition
#' @param other
_condition
The compared condition
#' @param wormid2cosmid A hashmap converting Wormbase ID to cosmid-derived ID
#' @param wormid2name A hashmap converting Wormbase ID to gene names
#' @export
make_results_table
<-
function
(
exp_name
,
out_dir
,
ref
,
other
,
wormid2cosmid
,
wormid2name
)
{
make_results_table
<-
function
(
exp_name
,
out_dir
,
ref
_condition
,
other_condition
,
wormid2cosmid
,
wormid2name
)
{
# To use select and %>%
#require(dplyr)
require
(
dtplyr
)
# To use results
require
(
DESeq2
)
print
(
paste
(
"Computing results for"
,
exp_name
))
flog.debug
(
paste
(
"Computing results for"
,
exp_name
))
results_table
<-
results
(
get
(
paste0
(
exp_name
,
"_dds"
),
envir
=
parent.frame
()
),
tidy
=
TRUE
,
alpha
=
0.05
)
%>%
select
(
row
,
log2FoldChange
,
padj
)
flog.debug
(
paste
(
"Setting column names for"
,
exp_name
))
colnames
(
results_table
)
<-
c
(
"gene"
,
"log2FoldChange"
,
"padj"
)
mean_ref_name
<-
paste
(
exp_name
,
"mean"
,
paste
(
ref
,
collapse
=
""
),
"counts"
,
sep
=
"_"
)
mean_other_name
<-
paste
(
exp_name
,
"mean"
,
paste
(
other
,
collapse
=
""
),
"counts"
,
sep
=
"_"
)
flog.debug
(
paste
(
"Retrieving means for"
,
exp_name
))
mean_ref_name
<-
paste
(
exp_name
,
"mean"
,
paste
(
ref_condition
,
collapse
=
""
),
"counts"
,
sep
=
"_"
)
mean_other_name
<-
paste
(
exp_name
,
"mean"
,
paste
(
other_condition
,
collapse
=
""
),
"counts"
,
sep
=
"_"
)
#mean_ref_name <- paste(exp_name, "mean", ref, "counts", sep="_")
#mean_other_name <- paste(exp_name, "mean", other, "counts", sep="_")
mean_ref
<-
get
(
...
...
@@ -395,6 +422,35 @@ make_results_table <- function(exp_name, out_dir, ref, other, wormid2cosmid, wor
mean_other_name
,
envir
=
parent.frame
()
)
flog.debug
(
mean_ref
)
flog.debug
(
mean_other
)
flog.debug
(
paste
(
"Adding means as extra columns for"
,
exp_name
))
flog.debug
(
"dimensions"
)
flog.debug
(
dim
(
results_table
$
gene
))
flog.debug
(
dim
(
wormid2cosmid
$
find
(
results_table
$
gene
)))
flog.debug
(
dim
(
wormid2name
$
find
(
results_table
$
gene
)))
flog.debug
(
dim
(
results_table
[
2
:
3
]))
flog.debug
(
dim
(
mean_ref
))
flog.debug
(
dim
(
mean_other
))
flog.debug
(
"lengths"
)
flog.debug
(
length
(
results_table
$
gene
))
flog.debug
(
length
(
wormid2cosmid
$
find
(
results_table
$
gene
)))
flog.debug
(
length
(
wormid2name
$
find
(
results_table
$
gene
)))
flog.debug
(
length
(
results_table
[
2
:
3
]))
flog.debug
(
length
(
mean_ref
))
flog.debug
(
length
(
mean_other
))
extended_results_table
<-
cbind
(
results_table
$
gene
,
wormid2cosmid
$
find
(
results_table
$
gene
),
...
...
@@ -411,6 +467,7 @@ make_results_table <- function(exp_name, out_dir, ref, other, wormid2cosmid, wor
# envir=parent.frame()
# )
)
flog.debug
(
paste
(
"Updating column names for"
,
exp_name
))
colnames
(
extended_results_table
)
<-
c
(
"gene"
,
"cosmid"
,
"name"
,
"log2FoldChange"
,
"padj"
,
...
...
@@ -418,7 +475,7 @@ make_results_table <- function(exp_name, out_dir, ref, other, wormid2cosmid, wor
"mean_other_counts"
)
if
(
biotype
%in%
c
(
"transposable_elements_wormbase"
,
"transposable_elements_wormbase_reverse"
)){
print
(
"Appending TE family names"
)
flog.debug
(
"Appending TE family names"
)
results_table
<-
merge
(
extended_results_table
,
cosmid_wormid_transposon_family
,
by
=
"gene"
)
}
table_filename
<-
paste
(
...
...
@@ -427,16 +484,19 @@ make_results_table <- function(exp_name, out_dir, ref, other, wormid2cosmid, wor
"results.txt"
,
sep
=
"_"
)
print
(
paste
(
"Writing results to"
,
table_filename
))
write.table
(
extended_results_table
,
file.path
(
out_dir
,
table_filename
)
)
print
(
paste
(
"Assigning"
,
paste0
(
exp_name
,
"_results"
)))
assign
(
paste0
(
exp_name
,
"_results"
),
extended_results_table
,
envir
=
parent.frame
()
#pos=1
)
print
(
paste
(
"Assigning"
,
paste0
(
exp_name
,
"_lfc_percentiles"
)))
assign
(
paste0
(
exp_name
,
"_lfc_percentiles"
),
quantile
(
...
...
@@ -447,6 +507,7 @@ make_results_table <- function(exp_name, out_dir, ref, other, wormid2cosmid, wor
#pos=1
)
#lfc_percentiles <- get(paste0(exp_name, "lfc_percentiles"))
print
(
paste
(
"Assigning"
,
paste0
(
exp_name
,
"_padj_percentiles"
)))
assign
(
paste0
(
exp_name
,
"_padj_percentiles"
),
quantile
(
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment