diff --git a/backend/metagenedb/api/catalog/views/gene.py b/backend/metagenedb/api/catalog/views/gene.py index ba1c3ff4511869cc62eb9572b34796faba2886ec..4b23b38f18605a9823a51b6dd27349288f3322bd 100644 --- a/backend/metagenedb/api/catalog/views/gene.py +++ b/backend/metagenedb/api/catalog/views/gene.py @@ -52,53 +52,17 @@ class GeneViewSet(BulkViewSet): response['Content-Disposition'] = 'attachment; filename=%s' % filename return response - def _extract_taxonomy_info(self, gene): - if gene.taxonomy is None: - return ['', '', '', ''] - return [ - gene.taxonomy.tax_id, gene.taxonomy.name, - gene.taxonomy.rank, gene.taxonomy.one_line_detailed_taxonomy - ] - - def _extract_function_info(self, gene): - if not gene.functions.all(): - return ['', ''] - function_ids = { - 'kegg': [], - 'eggnog': [] - } - for function in gene.functions.all(): - function_ids.get(function.source).append(function.function_id) - return [ - ';'.join(function_ids['kegg']), - ';'.join(function_ids['eggnog']) - ] - - def _get_metadata_line(self, gene): - """ - Transform gene content to a line for metadata extract - """ - gene_items = [ - gene.gene_id, gene.name, gene.source, gene.length, - ] - gene_items = gene_items + self._extract_taxonomy_info(gene) - gene_items = gene_items + self._extract_function_info(gene) - return ','.join([str(item) for item in gene_items]) - def _build_csv_response(self): queryset = self.filter_queryset(self.get_queryset()) queryset = queryset.select_related("taxonomy").prefetch_related("functions") - # if self._check_too_many_genes(queryset): - # return self.too_many_genes_error_response + if self._check_too_many_genes(queryset): + return self.too_many_genes_error_response with StringIO() as csv_file: # Write header - header = ",".join([ - 'gene_id', 'gene_name', 'gene_source', 'length', 'tax_id', 'tax_name', 'tax_rank', 'tax_full' - 'kegg_id', 'eggnog_id', - ]) + header = Gene.CSV_HEADER csv_file.write(f"{header}\n") - for gene in queryset: - csv_file.write(f"{self._get_metadata_line(gene)}\n") + for gene in queryset.iterator(): + csv_file.write(f"{gene.csv}\n") # generate the file response = HttpResponse(csv_file.getvalue(), content_type='text/csv') filename = 'metagenedb.csv' diff --git a/backend/metagenedb/api/catalog/views/test_gene.py b/backend/metagenedb/api/catalog/views/test_gene.py index 41977fd91d36a23b392db3a59a0011250e9a17b3..2700261c250996019c44d77399591f0c92978b9d 100644 --- a/backend/metagenedb/api/catalog/views/test_gene.py +++ b/backend/metagenedb/api/catalog/views/test_gene.py @@ -3,9 +3,6 @@ from django.urls import reverse from rest_framework import status from metagenedb.api.catalog.views import GeneViewSet -from metagenedb.apps.catalog.factory import ( - GeneFactory, GeneWithEggNOGFactory, GeneWithKeggFactory, GeneWithTaxonomyFactory -) class GeneViewSetMock(GeneViewSet): @@ -26,54 +23,3 @@ class TestGenes(TestCase): url = reverse('api:catalog:v1:genes-list') resp = self.client.get(url) self.assertEqual(resp.status_code, status.HTTP_200_OK) - - def test_get_metadata_line_no_functions(self): - gene = GeneFactory() - expected_items = [ - gene.gene_id, gene.name, gene.source, gene.length, - '', '', '', '', '', '' - ] - expected_line = ','.join([str(item) for item in expected_items]) - # Make test with method from GeneViewSet - viewset = GeneViewSetMock() - tested_line = viewset._get_metadata_line(gene) - self.assertEqual(tested_line, expected_line) - - def test_get_metadata_line_with_taxonomy(self): - gene = GeneWithTaxonomyFactory() - expected_items = [ - gene.gene_id, gene.name, gene.source, gene.length, - gene.taxonomy.tax_id, gene.taxonomy.name, gene.taxonomy.rank, gene.taxonomy.one_line_detailed_taxonomy, - '', '' - ] - expected_line = ','.join([str(item) for item in expected_items]) - # Make test with method from GeneViewSet - viewset = GeneViewSetMock() - tested_line = viewset._get_metadata_line(gene) - self.assertEqual(tested_line, expected_line) - - def test_get_metadata_line_with_kegg(self): - gene = GeneWithKeggFactory() - expected_items = [ - gene.gene_id, gene.name, gene.source, gene.length, - '', '', '', '', - gene.functions.all()[0].function_id, '' - ] - expected_line = ','.join([str(item) for item in expected_items]) - # Make test with method from GeneViewSet - viewset = GeneViewSetMock() - tested_line = viewset._get_metadata_line(gene) - self.assertEqual(tested_line, expected_line) - - def test_get_metadata_line_with_eggnog(self): - gene = GeneWithEggNOGFactory() - expected_items = [ - gene.gene_id, gene.name, gene.source, gene.length, - '', '', '', '', - '', gene.functions.all()[0].function_id, - ] - expected_line = ','.join([str(item) for item in expected_items]) - # Make test with method from GeneViewSet - viewset = GeneViewSetMock() - tested_line = viewset._get_metadata_line(gene) - self.assertEqual(tested_line, expected_line) diff --git a/backend/metagenedb/apps/catalog/models/gene.py b/backend/metagenedb/apps/catalog/models/gene.py index 27bde00f404a0b3523896766bb3ccd835df56d9f..1291e951de442b693921398336c8a82f2bdfbc09 100644 --- a/backend/metagenedb/apps/catalog/models/gene.py +++ b/backend/metagenedb/apps/catalog/models/gene.py @@ -1,3 +1,5 @@ +from itertools import repeat + from django.db import models from .function import Function @@ -12,6 +14,11 @@ class Gene(models.Model): (IGC, 'IGC'), (VIRGO, 'Virgo'), ] + CSV_HEADER = ','.join([ + 'gene_id', 'gene_name', 'gene_source', 'length', + 'tax_id', 'tax_name', 'tax_rank', 'tax_full', + 'kegg_id', 'eggnog_id', + ]) gene_id = models.SlugField(max_length=100, db_index=True, unique=True) name = models.CharField(max_length=100, unique=True) @@ -25,13 +32,54 @@ class Gene(models.Model): ) source = models.CharField(max_length=10, choices=SOURCE_CHOICES, default=UNDEFINED) - def __str__(self): + def __str__(self) -> str: return self.gene_id @property - def fasta(self): + def fasta(self) -> str: return f">{self.gene_id}\n{self.sequence}\n" + @property + def csv_header(self) -> str: + return self.CSV_HEADER + + @property + def csv_gene(self) -> str: + return ",".join([ + self.name, self.source, str(self.length), + ]) + + @property + def csv_tax(self) -> str: + if self.taxonomy is None: + return ",".join(list(repeat('', 4))) + else: + return self.taxonomy.csv + + @property + def csv_functions(self) -> str: + if not self.functions.all(): + function_list = list(repeat('', 2)) + else: + + function_ids = { + 'kegg': [], + 'eggnog': [] + } + for function in self.functions.all(): + function_ids.get(function.source).append(function.function_id) + function_list = [ + ';'.join(function_ids['kegg']), + ';'.join(function_ids['eggnog']) + ] + return ",".join(function_list) + + @property + def csv(self) -> str: + return ",".join( + [self.gene_id, self.csv_gene, self.csv_tax, self.csv_functions] + ) + class Meta: ordering = ['-gene_id'] diff --git a/backend/metagenedb/apps/catalog/models/taxonomy.py b/backend/metagenedb/apps/catalog/models/taxonomy.py index 459be18f2a3f7a951576308c0fe48dc931fbda2a..058182ecccaa0bed5e972f8f5720ea73026539af 100644 --- a/backend/metagenedb/apps/catalog/models/taxonomy.py +++ b/backend/metagenedb/apps/catalog/models/taxonomy.py @@ -42,6 +42,9 @@ class Taxonomy(models.Model): ('varietas', 'Varietas'), ('species_group', 'Species group'), ] + CSV_HEADER = ','.join([ + 'tax_id', 'tax_name', 'tax_rank', 'tax_full', + ]) tax_id = models.CharField(max_length=20, unique=True, db_index=True) name = models.CharField(max_length=200, default=NAME_DEFAULT) @@ -98,6 +101,14 @@ class Taxonomy(models.Model): self._one_line_detailed_taxonomy = self._compute_one_line_detailed_taxonomy() return self._one_line_detailed_taxonomy + @property + def csv_header(self) -> str: + return self.CSV_HEADER + + @property + def csv(self) -> str: + return ','.join([self.tax_id, self.name, self.rank, self.one_line_detailed_taxonomy]) + class Meta: verbose_name_plural = "Taxonomy" ordering = ['-tax_id'] diff --git a/backend/metagenedb/apps/catalog/models/test_gene.py b/backend/metagenedb/apps/catalog/models/test_gene.py new file mode 100644 index 0000000000000000000000000000000000000000..d8d173ae30d60424c1759aa3cd624cc6309d2374 --- /dev/null +++ b/backend/metagenedb/apps/catalog/models/test_gene.py @@ -0,0 +1,56 @@ +from rest_framework.test import APITestCase + +from metagenedb.apps.catalog.factory import ( + GeneWithEggNOGFactory, GeneWithKeggFactory, GeneWithTaxonomyFactory +) + + +class TestGeneCSV(APITestCase): + + @classmethod + def setUpTestData(cls): + """ + Build some test data for different tests + """ + cls.gene_eggnog = GeneWithEggNOGFactory.create() + cls.gene_kegg = GeneWithKeggFactory() + cls.gene_tax = GeneWithTaxonomyFactory() + + def test_csv_header(self): + expected_header = 'gene_id,gene_name,gene_source,length,tax_id,tax_name,tax_rank,tax_full,kegg_id,eggnog_id' + self.assertEqual(self.gene_tax.csv_header, expected_header) + + def test_csv_gene(self): + expected = f"{self.gene_tax.name},{self.gene_tax.source},{self.gene_tax.length}" + self.assertEqual(self.gene_tax.csv_gene, expected) + + def test_csv_tax(self): + expected = ( + f"{self.gene_tax.taxonomy.tax_id},{self.gene_tax.taxonomy.name}," + f"{self.gene_tax.taxonomy.rank},{self.gene_tax.taxonomy.one_line_detailed_taxonomy}" + ) + self.assertEqual(self.gene_tax.csv_tax, expected) + + def test_csv_tax_empty(self): + expected = ",,," + self.assertEqual(self.gene_kegg.csv_tax, expected) + self.assertEqual(self.gene_eggnog.csv_tax, expected) + + def test_csv_functions(self): + expected = f"{self.gene_kegg.functions.all()[0].function_id}," + self.assertEqual(self.gene_kegg.csv_functions, expected) + expected = f",{self.gene_eggnog.functions.all()[0].function_id}" + self.assertEqual(self.gene_eggnog.csv_functions, expected) + + def test_csv_functions_empty(self): + expected = "," + self.assertEqual(self.gene_tax.csv_functions, expected) + + def test_csv(self): + expected = ( + f"{self.gene_tax.gene_id},{self.gene_tax.name},{self.gene_tax.source},{self.gene_tax.length}," + f"{self.gene_tax.taxonomy.tax_id},{self.gene_tax.taxonomy.name}," + f"{self.gene_tax.taxonomy.rank},{self.gene_tax.taxonomy.one_line_detailed_taxonomy}," + f"," + ) + self.assertEqual(self.gene_tax.csv, expected) diff --git a/backend/metagenedb/apps/catalog/models/test_taxonomy.py b/backend/metagenedb/apps/catalog/models/test_taxonomy.py index b0d429539aab068b86a9405d87e310c2c371f8bb..16e6d47c7316125521c39ff2b0ee47c2ee8cb25f 100644 --- a/backend/metagenedb/apps/catalog/models/test_taxonomy.py +++ b/backend/metagenedb/apps/catalog/models/test_taxonomy.py @@ -60,3 +60,10 @@ class TestBuildHierarchy(APITestCase): self.assertEqual(self.phylum.one_line_detailed_taxonomy, expected_str) expected_str = "k__KINGDOM; p__PHYLUM; c__; o__; f__; g__; s__Species" self.assertEqual(self.species.one_line_detailed_taxonomy, expected_str) + + def test_csv(self): + expected = ( + f"{self.species.tax_id},{self.species.name}," + f"{self.species.rank},{self.species.one_line_detailed_taxonomy}" + ) + self.assertEqual(self.species.csv, expected)