...
 
Commits (3)
......@@ -79,6 +79,7 @@ def fetch_attribute(args):
bio_samples_attributes = {k: v for k, v in
sorted(bio_samples_attributes.items(), key=lambda item: item[1]["cpt"], reverse=True)}
with open("bio_samples_attributes.csv", 'w') as bio_samples_attributes_txt:
bio_samples_attributes_txt.write("Attribute\tFilledCount\tExample1\tExample2\tExample3\tExample4\tExample5")
for k, v in bio_samples_attributes.items():
v["values"] = list(v["values"])
bio_samples_attributes_txt.write("%s\t%i\t%s\n" % (k, v["cpt"], "\t".join(v["values"][:5])))
......
......@@ -15,11 +15,6 @@ if __name__ == '__main__':
output_header = [
"Strain", "BioSample", "Assembly",
"Date",
"Location",
"Source",
"Origin",
"Host",
"Sequencing",
"Assembling",
"Coverage",
......@@ -31,11 +26,6 @@ if __name__ == '__main__':
strain_output_pos = output_header.index('Strain')
bio_sample_output_pos = output_header.index('BioSample')
assembly_output_pos = output_header.index('Assembly')
host_output_pos = output_header.index('Host')
location_output_pos = output_header.index('Location')
date_output_pos = output_header.index('Date')
source_output_pos = output_header.index('Source')
origin_output_pos = output_header.index('Origin')
sequencing_output_pos = output_header.index('Sequencing')
coverage_output_pos = output_header.index('Coverage')
assembling_output_pos = output_header.index('Assembling')
......@@ -61,11 +51,6 @@ if __name__ == '__main__':
for relative_pos, biosample_attr in enumerate(output_header[selected_attributes_start_at:]):
results[relative_pos + selected_attributes_start_at] = strain.bio_sample_sample_data_attr(
biosample_attr)
# results[host_output_pos] = strain.host
# results[location_output_pos] = strain.location
# results[date_output_pos] = strain.isolation_date
# results[source_output_pos] = strain.source
# results[origin_output_pos] = strain.origin
results[sequencing_output_pos] = strain.sequencing
results[coverage_output_pos] = strain.coverage
results[assembling_output_pos] = strain.assembling
......
......@@ -10,13 +10,33 @@ class StrainWrapped:
self.strain = strain.strip()
self.bio_sample = bio_sample.strip()
self.assembly = assembly.strip()
self.__bio_sample_sample_data_xml = None
self.__assembly_records = None
self.__sra_records = None
@property
def _bio_sample_sample_data(self):
if self.__bio_sample_sample_data_xml is not None:
return self.__bio_sample_sample_data_xml
sample_data = bio_sample_record(self.bio_sample)["DocumentSummarySet"]["DocumentSummary"][0]["SampleData"]
my_xml = fromstring(sample_data)
self.__bio_sample_sample_data_xml = my_xml
return my_xml
@property
def assembly_records(self):
if self.__assembly_records is not None:
return self.__assembly_records
self.__assembly_records = assembly_record(self.assembly)["DocumentSummarySet"]["DocumentSummary"]
return self.__assembly_records
@property
def sra_records(self):
if self.__sra_records is not None:
return self.__sra_records
self.__sra_records = sra_records(self.bio_sample)
return self.__sra_records
def get_bio_sample_attributes(self):
for attr in self._bio_sample_sample_data.iterfind(".//*[@attribute_name]"):
aka = set(attr.attrib.values())
......@@ -29,28 +49,23 @@ class StrainWrapped:
@property
def host(self):
return self._bio_sample_sample_data_attr("host")
return self.bio_sample_sample_data_attr("host")
@property
def location(self):
# where was obtained the sample, ville, hospital
return self._bio_sample_sample_data_attr("geo_loc_name")
return self.bio_sample_sample_data_attr("geo_loc_name")
@property
def isolation_date(self):
# date du prélévement
# return bio_sample_record(self.bio_sample)["DocumentSummarySet"]["DocumentSummary"][0]["Date"]
return self._bio_sample_sample_data_attr("collection_date")
@property
def origin(self):
# prélévement sanguin ? bucale ? ...
return self._bio_sample_sample_data_attr("isolation_source")
return self.bio_sample_sample_data_attr("collection_date")
@property
def coverage(self):
try:
return assembly_record(self.assembly)["DocumentSummarySet"]["DocumentSummary"][0]["Coverage"]
return self.assembly_records[0]["Coverage"]
except IndexError:
return None
......@@ -67,7 +82,7 @@ class StrainWrapped:
try:
my_xml = fromstringlist([
"<wrap>",
assembly_record(self.assembly)["DocumentSummarySet"]["DocumentSummary"][0]["Meta"],
self.assembly_records[0]["Meta"],
"</wrap>",
])
return ", ".join([node.text for node in my_xml.iterfind(".//assembly-status")])
......@@ -136,7 +151,7 @@ class StrainWrapped:
return None
def _sra_attrs(self, attr_name):
for entry in sra_records(self.bio_sample):
for entry in self.sra_records:
my_xml = fromstringlist([
"<wrap>",
entry["ExpXml"],
......