Skip to content
Snippets Groups Projects
Commit cec96a3d authored by Cosmin  SAVEANU's avatar Cosmin SAVEANU
Browse files

Update Get rows py3/Gt_rws_with_list.py

parent ab38db0d
No related branches found
No related tags found
No related merge requests found
"""
This script allows the user to obtain
rows from a tab delimited file in which
one of the elements in a column matches
exactly one element from a list of elements
(get the description of every ORF from
a list of ORFs for example).
A line by line filter version should be
used for very large files; beware.
Usage:
--tabfile, -t = description file (tab delimited large file)
--tobematched, -i = file with elements to be mathched against
description file items
--help, -h = this help message
"""
import os, sys, getopt
class TabData:
"""a tab separated data object class"""
def __init__(self):
self.nmblines = 0
self.data = []
def read_from_file(self, filename):
# input data from a tab delimited file
self.filename = filename
f=open(filename, 'r')
for line in f:
self.data.append(line.strip('\n\r').split('\t'))
f.close()
self.nmblines = len(self.data)
def first_line(self):
result = ""
for item in self.data[0]:
result=result+item+' '
return result
def number_of_columns(self):
return len(self.data[0])
def get_row(self, number):
return self.data[number]
def get_row_by_name(self, row_name):
"""row by name and error code 0"""
r_name = str(row_name)
control = 0
row_found = []
for i in range(self.nmblines):
if r_name == self.data[i][0]:
for j in range (len(self.data[i])):
row_found.append(self.data[i][j])
control = 1
break
return row_found, control
def get_row_by_colname(self, name, colnmb):
"""
arguments : name - name to be matched, colnmb - position
returns a tuple containing the row and 1 if OK or 0 if problem
"""
search_name = name.upper()
control = 0
row_found = []
for i in range(self.nmblines):
if (colnmb < len(self.data[i])):
if search_name == self.data[i][colnmb]:
row_found=self.data[i]
control = 1
break
return row_found, control
def get_row_withindex(self, name, column):
"""
arguments : name - name to be matched, colum - obtained by get_column
returns a tuple containing the row and 1 if OK or 0 if problem
"""
search_name = name.upper()
control = 0
row_found = []
try:
idx = column.index(name, 0, len(column))
row_found = self.data[idx]
control = 1
except ValueError:
control = 0
return row_found, control
def get_column(self, number):
nmb = int(number)
col_found = []
for i in range(self.nmblines):
col_found.append(self.data[i][nmb])
return col_found
def get_column_by_name(self, col_name):
c_name = str(col_name)
c_nmb = 0
control = 0
for i in range(len(self.data[0])):
if c_name == self.data[0][i]:
c_nmb = i
control = 1
break
return self.get_column(c_nmb), control
def del_row(self, number):
del self.data[number]
def del_column(self, number):
"""delete column by number"""
nmblines = int(self.nmblines)
for i in range(nmblines):
del self.data[i][number]
def del_column_by_name(self, column_name):
"""delete column by name
accepts a string as the column name
if no such column found, returns the original matrix
"""
number = 0
col_name = str(column_name)
for i in range(len(self.data[0])):
if col_name == self.data[0][i]:
number = i
if number == 0:
return self.data
else:
return self.del_column(number)
def del_row_by_name(self, row_name):
"""delete row by name
accepts a string as the row name
if no such row found, returns the original matrix
"""
number = 0
r_name = str(row_name)
for i in range(self.nmblines):
if r_name == self.data[i][0]:
number = i
if number == 0:
return self.data
else:
return self.del_row(number)
def numb_of_lines(self):
return len(self.data)
def __str__(self):
return self.data
def extract_column(self, column_name):
"""returns a list corresponding to column"""
result_list = []
for i in range(len(self.data)):
for j in range (len(self.data[i])):
if self.data[0][j]==column_name:
result_list.append(self.data[i][j])
return result_list
def transpose(self):
"""transpose matrix"""
transposed = []
for i in range(len(self.data[0])):
int_list = []
for j in range(len(self.data)):
int_list.append(self.data[j][i])
transposed.append(int_list)
return transposed
def transpose_z(self):
"""transpose matrix using zip"""
return list(zip(*self))
#_________________________________
#main
def write_matrix(matrix, file_handle):
for line in matrix:
for item in line:
file_handle.write(item)
file_handle.write('\t')
file_handle.write('\n')
#__________________________________
class Usage(Exception):
def __init__(self, msg):
self.msg = msg
def main(argv=None):
global tabfile_fname, tobematched_fname
tabfile_fname, tobematched_fname = "", ""
if argv is None:
argv = sys.argv
try:
try:
opts, args = getopt.getopt(argv[1:], "ht:i:", ["help", "tabfile=", "tobematched="])
except getopt.error as msg:
raise Usage(msg)
# option processing
for option, value in opts:
if option in ("-h", "--help"):
raise Usage(__doc__)
if option in ("-t", "--tabfile"):
tabfile_fname= value
if option in ("-i", "--tobematched"):
tobematched_fname = value
if (tabfile_fname == "" or tobematched_fname == ""):
raise Usage("no input filenames!")
sys.exit(2)
else:
try:
#read data from tab file in TabData object
file_info = os.stat(tabfile_fname)
print("Reading ", tabfile_fname, ' of ' , (file_info[6]/1024), ' kBytes .......... \n')
print("The results will be written to a file called matchedrows.out")
properties = TabData()
properties.read_from_file(tabfile_fname)
#read data from list of items in a list
file = open(tobematched_fname, 'r')
item_list = []
for line in file:
item_list.append(line.strip('\r\n'))
file.close()
print('Your description file has', properties.number_of_columns(), 'columns')
print('The first line of the file says:')
print(properties.first_line())
print('Enter the number of the column used for match, (1 to '+str(properties.number_of_columns())+'):\n')
match_colnumber = int(input())
#output file path
path_toht = os.path.split(tobematched_fname)
path_to = path_toht[0]
output_filename = os.path.join(path_to,"matchedrows.out")
result_data = []
visual_counter = 0
indexed_column = properties.get_column(match_colnumber -1)
for item in item_list:
int_result = properties.get_row_withindex(item,
indexed_column)
if int_result[1]==1: #check for return
result_data.append(int_result[0])
else:
result_data.append([item,""])
print("Item", item, "not found")
visual_counter+=1
print(visual_counter, item)
out_file = open(output_filename, "w")
write_matrix(result_data, out_file)
out_file.close()
finally:
pass
except Usage as err:
print(sys.argv[0].split("/")[-1] + ": " + str(err.msg), file=sys.stderr)
print("\t for help use --help", file=sys.stderr)
return 2
if __name__ == "__main__":
sys.exit(main())
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment