Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
B
bioinfo_utils
Manage
Activity
Members
Labels
Plan
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Blaise LI
bioinfo_utils
Commits
0372a1b8
Commit
0372a1b8
authored
5 years ago
by
Blaise Li
Browse files
Options
Downloads
Patches
Plain Diff
Skipping size factor graph for small data.
parent
49776bd1
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
RNA_Seq_Cecere/RNA-seq.snakefile
+63
-59
63 additions, 59 deletions
RNA_Seq_Cecere/RNA-seq.snakefile
with
63 additions
and
59 deletions
RNA_Seq_Cecere/RNA-seq.snakefile
+
63
−
59
View file @
0372a1b8
...
@@ -1617,66 +1617,70 @@ rule test_size_factor:
...
@@ -1617,66 +1617,70 @@ rule test_size_factor:
# The filter amounts to counts_data.mean(axis=1) > 4
# The filter amounts to counts_data.mean(axis=1) > 4
#np.log10(counts_data[counts_data.sum(axis=1) > 4 * len(counts_data.columns)] + 1).plot.kde()
#np.log10(counts_data[counts_data.sum(axis=1) > 4 * len(counts_data.columns)] + 1).plot.kde()
#np.log10(counts_data[counts_data.prod(axis=1) > 0]).plot.kde()
#np.log10(counts_data[counts_data.prod(axis=1) > 0]).plot.kde()
assert len(counts_data) > 1, "Counts data with only one row cannot have its distribution estimated using KDE."
# assert len(counts_data) > 1, "Counts data with only one row cannot have its distribution estimated using KDE."
pp = PdfPages(output.norm_counts_distrib_plot)
if len(counts_data) > 1:
for normalizer in params.size_factor_types:
pp = PdfPages(output.norm_counts_distrib_plot)
if normalizer == "median_ratio_to_pseudo_ref":
for normalizer in params.size_factor_types:
size_factors = median_ratio_to_pseudo_ref_size_factors(counts_data)
if normalizer == "median_ratio_to_pseudo_ref":
else:
size_factors = median_ratio_to_pseudo_ref_size_factors(counts_data)
size_factors = summaries.loc[normalizer]
by_norm = counts_data / size_factors
data = np.log10(by_norm[counts_data.prod(axis=1) > 0])
try:
xlabel = "log10(normalized counts)"
save_plot(pp, plot_counts_distribution, data, xlabel,
format="pdf",
title=params.counts_distrib_plot_title.format(normalizer))
except TypeError as e:
if str(e) in NO_DATA_ERRS:
warn("\n".join([
"Got TypeError:",
f"{str(e)}",
f"No data to plot for {normalizer}\n"]))
else:
else:
raise
size_factors = summaries.loc[normalizer]
except LinAlgError as e:
by_norm = counts_data / size_factors
if str(e) == "singular matrix":
data = np.log10(by_norm[counts_data.prod(axis=1) > 0])
warn("\n".join([
try:
"Got LinAlgError:", f"{str(e)}",
xlabel = "log10(normalized counts)"
f"Data cannot be plotted for {normalizer}",
save_plot(pp, plot_counts_distribution, data, xlabel,
f"{data}\n"]))
format="pdf",
else:
title=params.counts_distrib_plot_title.format(normalizer))
raise
except TypeError as e:
except ValueError as e:
if str(e) in NO_DATA_ERRS:
if str(e) == "`dataset` input should have multiple elements.":
warn("\n".join([
warn("\n".join([
"Got TypeError:",
"Got ValueError:", f"{str(e)}",
f"{str(e)}",
f"Data cannot be plotted for {normalizer}",
f"No data to plot for {normalizer}\n"]))
f"{data}\n"]))
else:
else:
raise
raise
except LinAlgError as e:
# xlabel = "log10(normalized counts)"
if str(e) == "singular matrix":
# if len(data) < 2:
warn("\n".join([
# msg = "\n".join([
"Got LinAlgError:", f"{str(e)}",
# "It seems that normalization led to data loss.",
f"Data cannot be plotted for {normalizer}",
# "Cannot use KDE to estimate distribution."])
f"{data}\n"]))
# assert len(by_norm) > 1, msg
else:
# msg = "".join([
raise
# f"Only {len(by_norm[counts_data.prod(axis=1) > 0])} rows have no zeros",
except ValueError as e:
# "and can be log-transformed."])
if str(e) == "`dataset` input should have multiple elements.":
# warnings.warn(
warn("\n".join([
# msg + "\nSkipping %s_%s" % (wildcards.orientation, wildcards.biotype))
"Got ValueError:", f"{str(e)}",
# else:
f"Data cannot be plotted for {normalizer}",
# try:
f"{data}\n"]))
# save_plot(pp, plot_counts_distribution, data, xlabel,
else:
# format="pdf",
raise
# title="Normalized %s_%s counts distributions\n(size factor: %s)" % (wildcards.orientation, wildcards.biotype, normalizer))
# xlabel = "log10(normalized counts)"
# except np.linalg.linalg.LinAlgError as e:
# if len(data) < 2:
# msg = "".join([
# msg = "\n".join([
# "There seems to be a problem with the data.\n",
# "It seems that normalization led to data loss.",
# "The data matrix has %d lines and %d columns.\n" % (len(data), len(data.columns))])
# "Cannot use KDE to estimate distribution."])
# warnings.warn(msg + "\nSkipping %s_%s" % (wildcards.orientation, wildcards.biotype))
# assert len(by_norm) > 1, msg
pp.close()
# msg = "".join([
# f"Only {len(by_norm[counts_data.prod(axis=1) > 0])} rows have no zeros",
# "and can be log-transformed."])
# warnings.warn(
# msg + "\nSkipping %s_%s" % (wildcards.orientation, wildcards.biotype))
# else:
# try:
# save_plot(pp, plot_counts_distribution, data, xlabel,
# format="pdf",
# title="Normalized %s_%s counts distributions\n(size factor: %s)" % (wildcards.orientation, wildcards.biotype, normalizer))
# except np.linalg.linalg.LinAlgError as e:
# msg = "".join([
# "There seems to be a problem with the data.\n",
# "The data matrix has %d lines and %d columns.\n" % (len(data), len(data.columns))])
# warnings.warn(msg + "\nSkipping %s_%s" % (wildcards.orientation, wildcards.biotype))
pp.close()
else:
# Make the file empty
open(output.norm_counts_distrib_plot, "w").close()
# TODO: Deal with 0-counts cases:
# TODO: Deal with 0-counts cases:
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment