...
 
Commits (3)
...@@ -79,6 +79,7 @@ def fetch_attribute(args): ...@@ -79,6 +79,7 @@ def fetch_attribute(args):
bio_samples_attributes = {k: v for k, v in bio_samples_attributes = {k: v for k, v in
sorted(bio_samples_attributes.items(), key=lambda item: item[1]["cpt"], reverse=True)} sorted(bio_samples_attributes.items(), key=lambda item: item[1]["cpt"], reverse=True)}
with open("bio_samples_attributes.csv", 'w') as bio_samples_attributes_txt: with open("bio_samples_attributes.csv", 'w') as bio_samples_attributes_txt:
bio_samples_attributes_txt.write("Attribute\tFilledCount\tExample1\tExample2\tExample3\tExample4\tExample5")
for k, v in bio_samples_attributes.items(): for k, v in bio_samples_attributes.items():
v["values"] = list(v["values"]) v["values"] = list(v["values"])
bio_samples_attributes_txt.write("%s\t%i\t%s\n" % (k, v["cpt"], "\t".join(v["values"][:5]))) bio_samples_attributes_txt.write("%s\t%i\t%s\n" % (k, v["cpt"], "\t".join(v["values"][:5])))
......
...@@ -15,11 +15,6 @@ if __name__ == '__main__': ...@@ -15,11 +15,6 @@ if __name__ == '__main__':
output_header = [ output_header = [
"Strain", "BioSample", "Assembly", "Strain", "BioSample", "Assembly",
"Date",
"Location",
"Source",
"Origin",
"Host",
"Sequencing", "Sequencing",
"Assembling", "Assembling",
"Coverage", "Coverage",
...@@ -31,11 +26,6 @@ if __name__ == '__main__': ...@@ -31,11 +26,6 @@ if __name__ == '__main__':
strain_output_pos = output_header.index('Strain') strain_output_pos = output_header.index('Strain')
bio_sample_output_pos = output_header.index('BioSample') bio_sample_output_pos = output_header.index('BioSample')
assembly_output_pos = output_header.index('Assembly') assembly_output_pos = output_header.index('Assembly')
host_output_pos = output_header.index('Host')
location_output_pos = output_header.index('Location')
date_output_pos = output_header.index('Date')
source_output_pos = output_header.index('Source')
origin_output_pos = output_header.index('Origin')
sequencing_output_pos = output_header.index('Sequencing') sequencing_output_pos = output_header.index('Sequencing')
coverage_output_pos = output_header.index('Coverage') coverage_output_pos = output_header.index('Coverage')
assembling_output_pos = output_header.index('Assembling') assembling_output_pos = output_header.index('Assembling')
...@@ -61,11 +51,6 @@ if __name__ == '__main__': ...@@ -61,11 +51,6 @@ if __name__ == '__main__':
for relative_pos, biosample_attr in enumerate(output_header[selected_attributes_start_at:]): for relative_pos, biosample_attr in enumerate(output_header[selected_attributes_start_at:]):
results[relative_pos + selected_attributes_start_at] = strain.bio_sample_sample_data_attr( results[relative_pos + selected_attributes_start_at] = strain.bio_sample_sample_data_attr(
biosample_attr) biosample_attr)
# results[host_output_pos] = strain.host
# results[location_output_pos] = strain.location
# results[date_output_pos] = strain.isolation_date
# results[source_output_pos] = strain.source
# results[origin_output_pos] = strain.origin
results[sequencing_output_pos] = strain.sequencing results[sequencing_output_pos] = strain.sequencing
results[coverage_output_pos] = strain.coverage results[coverage_output_pos] = strain.coverage
results[assembling_output_pos] = strain.assembling results[assembling_output_pos] = strain.assembling
......
...@@ -10,13 +10,33 @@ class StrainWrapped: ...@@ -10,13 +10,33 @@ class StrainWrapped:
self.strain = strain.strip() self.strain = strain.strip()
self.bio_sample = bio_sample.strip() self.bio_sample = bio_sample.strip()
self.assembly = assembly.strip() self.assembly = assembly.strip()
self.__bio_sample_sample_data_xml = None
self.__assembly_records = None
self.__sra_records = None
@property @property
def _bio_sample_sample_data(self): def _bio_sample_sample_data(self):
if self.__bio_sample_sample_data_xml is not None:
return self.__bio_sample_sample_data_xml
sample_data = bio_sample_record(self.bio_sample)["DocumentSummarySet"]["DocumentSummary"][0]["SampleData"] sample_data = bio_sample_record(self.bio_sample)["DocumentSummarySet"]["DocumentSummary"][0]["SampleData"]
my_xml = fromstring(sample_data) my_xml = fromstring(sample_data)
self.__bio_sample_sample_data_xml = my_xml
return my_xml return my_xml
@property
def assembly_records(self):
if self.__assembly_records is not None:
return self.__assembly_records
self.__assembly_records = assembly_record(self.assembly)["DocumentSummarySet"]["DocumentSummary"]
return self.__assembly_records
@property
def sra_records(self):
if self.__sra_records is not None:
return self.__sra_records
self.__sra_records = sra_records(self.bio_sample)
return self.__sra_records
def get_bio_sample_attributes(self): def get_bio_sample_attributes(self):
for attr in self._bio_sample_sample_data.iterfind(".//*[@attribute_name]"): for attr in self._bio_sample_sample_data.iterfind(".//*[@attribute_name]"):
aka = set(attr.attrib.values()) aka = set(attr.attrib.values())
...@@ -29,28 +49,23 @@ class StrainWrapped: ...@@ -29,28 +49,23 @@ class StrainWrapped:
@property @property
def host(self): def host(self):
return self._bio_sample_sample_data_attr("host") return self.bio_sample_sample_data_attr("host")
@property @property
def location(self): def location(self):
# where was obtained the sample, ville, hospital # where was obtained the sample, ville, hospital
return self._bio_sample_sample_data_attr("geo_loc_name") return self.bio_sample_sample_data_attr("geo_loc_name")
@property @property
def isolation_date(self): def isolation_date(self):
# date du prélévement # date du prélévement
# return bio_sample_record(self.bio_sample)["DocumentSummarySet"]["DocumentSummary"][0]["Date"] # return bio_sample_record(self.bio_sample)["DocumentSummarySet"]["DocumentSummary"][0]["Date"]
return self._bio_sample_sample_data_attr("collection_date") return self.bio_sample_sample_data_attr("collection_date")
@property
def origin(self):
# prélévement sanguin ? bucale ? ...
return self._bio_sample_sample_data_attr("isolation_source")
@property @property
def coverage(self): def coverage(self):
try: try:
return assembly_record(self.assembly)["DocumentSummarySet"]["DocumentSummary"][0]["Coverage"] return self.assembly_records[0]["Coverage"]
except IndexError: except IndexError:
return None return None
...@@ -67,7 +82,7 @@ class StrainWrapped: ...@@ -67,7 +82,7 @@ class StrainWrapped:
try: try:
my_xml = fromstringlist([ my_xml = fromstringlist([
"<wrap>", "<wrap>",
assembly_record(self.assembly)["DocumentSummarySet"]["DocumentSummary"][0]["Meta"], self.assembly_records[0]["Meta"],
"</wrap>", "</wrap>",
]) ])
return ", ".join([node.text for node in my_xml.iterfind(".//assembly-status")]) return ", ".join([node.text for node in my_xml.iterfind(".//assembly-status")])
...@@ -136,7 +151,7 @@ class StrainWrapped: ...@@ -136,7 +151,7 @@ class StrainWrapped:
return None return None
def _sra_attrs(self, attr_name): def _sra_attrs(self, attr_name):
for entry in sra_records(self.bio_sample): for entry in self.sra_records:
my_xml = fromstringlist([ my_xml = fromstringlist([
"<wrap>", "<wrap>",
entry["ExpXml"], entry["ExpXml"],
......