Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
Data visualization in R and Python snippets
Manage
Activity
Members
Code
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Model registry
Analyze
Contributor analytics
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Cosmin SAVEANU
Data visualization in R and Python snippets
Commits
cec96a3d
Commit
cec96a3d
authored
4 years ago
by
Cosmin SAVEANU
Browse files
Options
Downloads
Patches
Plain Diff
Update Get rows py3/Gt_rws_with_list.py
parent
ab38db0d
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
Get rows py3/Gt_rws_with_list.py
+279
-0
279 additions, 0 deletions
Get rows py3/Gt_rws_with_list.py
with
279 additions
and
0 deletions
Get rows py3/Gt_rws_with_list.py
0 → 100644
+
279
−
0
View file @
cec96a3d
"""
This script allows the user to obtain
rows from a tab delimited file in which
one of the elements in a column matches
exactly one element from a list of elements
(get the description of every ORF from
a list of ORFs for example).
A line by line filter version should be
used for very large files; beware.
Usage:
--tabfile, -t = description file (tab delimited large file)
--tobematched, -i = file with elements to be mathched against
description file items
--help, -h = this help message
"""
import
os
,
sys
,
getopt
class
TabData
:
"""
a tab separated data object class
"""
def
__init__
(
self
):
self
.
nmblines
=
0
self
.
data
=
[]
def
read_from_file
(
self
,
filename
):
# input data from a tab delimited file
self
.
filename
=
filename
f
=
open
(
filename
,
'
r
'
)
for
line
in
f
:
self
.
data
.
append
(
line
.
strip
(
'
\n\r
'
).
split
(
'
\t
'
))
f
.
close
()
self
.
nmblines
=
len
(
self
.
data
)
def
first_line
(
self
):
result
=
""
for
item
in
self
.
data
[
0
]:
result
=
result
+
item
+
'
'
return
result
def
number_of_columns
(
self
):
return
len
(
self
.
data
[
0
])
def
get_row
(
self
,
number
):
return
self
.
data
[
number
]
def
get_row_by_name
(
self
,
row_name
):
"""
row by name and error code 0
"""
r_name
=
str
(
row_name
)
control
=
0
row_found
=
[]
for
i
in
range
(
self
.
nmblines
):
if
r_name
==
self
.
data
[
i
][
0
]:
for
j
in
range
(
len
(
self
.
data
[
i
])):
row_found
.
append
(
self
.
data
[
i
][
j
])
control
=
1
break
return
row_found
,
control
def
get_row_by_colname
(
self
,
name
,
colnmb
):
"""
arguments : name - name to be matched, colnmb - position
returns a tuple containing the row and 1 if OK or 0 if problem
"""
search_name
=
name
.
upper
()
control
=
0
row_found
=
[]
for
i
in
range
(
self
.
nmblines
):
if
(
colnmb
<
len
(
self
.
data
[
i
])):
if
search_name
==
self
.
data
[
i
][
colnmb
]:
row_found
=
self
.
data
[
i
]
control
=
1
break
return
row_found
,
control
def
get_row_withindex
(
self
,
name
,
column
):
"""
arguments : name - name to be matched, colum - obtained by get_column
returns a tuple containing the row and 1 if OK or 0 if problem
"""
search_name
=
name
.
upper
()
control
=
0
row_found
=
[]
try
:
idx
=
column
.
index
(
name
,
0
,
len
(
column
))
row_found
=
self
.
data
[
idx
]
control
=
1
except
ValueError
:
control
=
0
return
row_found
,
control
def
get_column
(
self
,
number
):
nmb
=
int
(
number
)
col_found
=
[]
for
i
in
range
(
self
.
nmblines
):
col_found
.
append
(
self
.
data
[
i
][
nmb
])
return
col_found
def
get_column_by_name
(
self
,
col_name
):
c_name
=
str
(
col_name
)
c_nmb
=
0
control
=
0
for
i
in
range
(
len
(
self
.
data
[
0
])):
if
c_name
==
self
.
data
[
0
][
i
]:
c_nmb
=
i
control
=
1
break
return
self
.
get_column
(
c_nmb
),
control
def
del_row
(
self
,
number
):
del
self
.
data
[
number
]
def
del_column
(
self
,
number
):
"""
delete column by number
"""
nmblines
=
int
(
self
.
nmblines
)
for
i
in
range
(
nmblines
):
del
self
.
data
[
i
][
number
]
def
del_column_by_name
(
self
,
column_name
):
"""
delete column by name
accepts a string as the column name
if no such column found, returns the original matrix
"""
number
=
0
col_name
=
str
(
column_name
)
for
i
in
range
(
len
(
self
.
data
[
0
])):
if
col_name
==
self
.
data
[
0
][
i
]:
number
=
i
if
number
==
0
:
return
self
.
data
else
:
return
self
.
del_column
(
number
)
def
del_row_by_name
(
self
,
row_name
):
"""
delete row by name
accepts a string as the row name
if no such row found, returns the original matrix
"""
number
=
0
r_name
=
str
(
row_name
)
for
i
in
range
(
self
.
nmblines
):
if
r_name
==
self
.
data
[
i
][
0
]:
number
=
i
if
number
==
0
:
return
self
.
data
else
:
return
self
.
del_row
(
number
)
def
numb_of_lines
(
self
):
return
len
(
self
.
data
)
def
__str__
(
self
):
return
self
.
data
def
extract_column
(
self
,
column_name
):
"""
returns a list corresponding to column
"""
result_list
=
[]
for
i
in
range
(
len
(
self
.
data
)):
for
j
in
range
(
len
(
self
.
data
[
i
])):
if
self
.
data
[
0
][
j
]
==
column_name
:
result_list
.
append
(
self
.
data
[
i
][
j
])
return
result_list
def
transpose
(
self
):
"""
transpose matrix
"""
transposed
=
[]
for
i
in
range
(
len
(
self
.
data
[
0
])):
int_list
=
[]
for
j
in
range
(
len
(
self
.
data
)):
int_list
.
append
(
self
.
data
[
j
][
i
])
transposed
.
append
(
int_list
)
return
transposed
def
transpose_z
(
self
):
"""
transpose matrix using zip
"""
return
list
(
zip
(
*
self
))
#_________________________________
#main
def
write_matrix
(
matrix
,
file_handle
):
for
line
in
matrix
:
for
item
in
line
:
file_handle
.
write
(
item
)
file_handle
.
write
(
'
\t
'
)
file_handle
.
write
(
'
\n
'
)
#__________________________________
class
Usage
(
Exception
):
def
__init__
(
self
,
msg
):
self
.
msg
=
msg
def
main
(
argv
=
None
):
global
tabfile_fname
,
tobematched_fname
tabfile_fname
,
tobematched_fname
=
""
,
""
if
argv
is
None
:
argv
=
sys
.
argv
try
:
try
:
opts
,
args
=
getopt
.
getopt
(
argv
[
1
:],
"
ht:i:
"
,
[
"
help
"
,
"
tabfile=
"
,
"
tobematched=
"
])
except
getopt
.
error
as
msg
:
raise
Usage
(
msg
)
# option processing
for
option
,
value
in
opts
:
if
option
in
(
"
-h
"
,
"
--help
"
):
raise
Usage
(
__doc__
)
if
option
in
(
"
-t
"
,
"
--tabfile
"
):
tabfile_fname
=
value
if
option
in
(
"
-i
"
,
"
--tobematched
"
):
tobematched_fname
=
value
if
(
tabfile_fname
==
""
or
tobematched_fname
==
""
):
raise
Usage
(
"
no input filenames!
"
)
sys
.
exit
(
2
)
else
:
try
:
#read data from tab file in TabData object
file_info
=
os
.
stat
(
tabfile_fname
)
print
(
"
Reading
"
,
tabfile_fname
,
'
of
'
,
(
file_info
[
6
]
/
1024
),
'
kBytes ..........
\n
'
)
print
(
"
The results will be written to a file called matchedrows.out
"
)
properties
=
TabData
()
properties
.
read_from_file
(
tabfile_fname
)
#read data from list of items in a list
file
=
open
(
tobematched_fname
,
'
r
'
)
item_list
=
[]
for
line
in
file
:
item_list
.
append
(
line
.
strip
(
'
\r\n
'
))
file
.
close
()
print
(
'
Your description file has
'
,
properties
.
number_of_columns
(),
'
columns
'
)
print
(
'
The first line of the file says:
'
)
print
(
properties
.
first_line
())
print
(
'
Enter the number of the column used for match, (1 to
'
+
str
(
properties
.
number_of_columns
())
+
'
):
\n
'
)
match_colnumber
=
int
(
input
())
#output file path
path_toht
=
os
.
path
.
split
(
tobematched_fname
)
path_to
=
path_toht
[
0
]
output_filename
=
os
.
path
.
join
(
path_to
,
"
matchedrows.out
"
)
result_data
=
[]
visual_counter
=
0
indexed_column
=
properties
.
get_column
(
match_colnumber
-
1
)
for
item
in
item_list
:
int_result
=
properties
.
get_row_withindex
(
item
,
indexed_column
)
if
int_result
[
1
]
==
1
:
#check for return
result_data
.
append
(
int_result
[
0
])
else
:
result_data
.
append
([
item
,
""
])
print
(
"
Item
"
,
item
,
"
not found
"
)
visual_counter
+=
1
print
(
visual_counter
,
item
)
out_file
=
open
(
output_filename
,
"
w
"
)
write_matrix
(
result_data
,
out_file
)
out_file
.
close
()
finally
:
pass
except
Usage
as
err
:
print
(
sys
.
argv
[
0
].
split
(
"
/
"
)[
-
1
]
+
"
:
"
+
str
(
err
.
msg
),
file
=
sys
.
stderr
)
print
(
"
\t
for help use --help
"
,
file
=
sys
.
stderr
)
return
2
if
__name__
==
"
__main__
"
:
sys
.
exit
(
main
())
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment