Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Amine GHOZLANE
AlienTrimmer
Commits
55877c88
Commit
55877c88
authored
Sep 24, 2020
by
Amine GHOZLANE
Browse files
Enable compressed fastq reading and writing
parent
a80bec0d
Changes
3
Hide whitespace changes
Inline
Side-by-side
.gitlab-ci.yml
View file @
55877c88
...
...
@@ -19,5 +19,5 @@ build_conda:
script
:
-
anaconda login --username "$DOCKER_USER" --password "$DOCKER_PASS"
-
conda build conda_inst
only
:
except
:
-
master
\ No newline at end of file
conda_inst/meta.yaml
View file @
55877c88
...
...
@@ -9,8 +9,10 @@ channels:
-
defaults
source
:
url
:
ftp://ftp.pasteur.fr/pub/gensoft/projects/AlienTrimmer/AlienTrimmer_0.4.0.tar.gz
md5
:
8ed76aafea9fef48f49d74b59025212d
-
git_url
:
https://gitlab.pasteur.fr/aghozlan/shaman.git
git_rev
:
{{
environ.get('GIT_DESCRIBE_TAG'
,
'
'
)
}}
#url: ftp://ftp.pasteur.fr/pub/gensoft/projects/AlienTrimmer/AlienTrimmer_0.4.0.tar.gz
#md5: 8ed76aafea9fef48f49d74b59025212d
requirements
:
host
:
...
...
src/AlienTrimmer.java
View file @
55877c88
...
...
@@ -70,6 +70,8 @@
import
java.io.*
;
import
java.util.*
;
import
java.util.zip.GZIPInputStream
;
import
java.util.zip.GZIPOutputStream
;
public
class
AlienTrimmer
{
...
...
@@ -336,6 +338,14 @@ public class AlienTrimmer {
System
.
exit
(
0
);
}
}
String
finfilename
=
finfile
.
toString
();
String
rinfilename
=
rinfile
.
toString
();
String
foutfilename
=
foutfile
.
toString
();
String
routfilename
=
routfile
.
toString
();
String
finext
=
finfilename
.
substring
(
finfilename
.
lastIndexOf
(
"."
)
+
1
,
finfilename
.
length
());
String
rinext
=
rinfilename
.
substring
(
rinfilename
.
lastIndexOf
(
"."
)
+
1
,
rinfilename
.
length
());
String
foutext
=
foutfilename
.
substring
(
foutfilename
.
lastIndexOf
(
"."
)
+
1
,
foutfilename
.
length
());
String
routext
=
routfilename
.
substring
(
routfilename
.
lastIndexOf
(
"."
)
+
1
,
routfilename
.
length
());
//### testing default variables ##############################################################################
if
(
finfile
.
toString
().
equals
(
"no.file"
)
)
{
System
.
out
.
println
(
" no input file"
);
System
.
exit
(
1
);
}
if
(
foutfile
.
toString
().
equals
(
"no.file"
)
)
foutfile
=
new
File
(
finfile
+
".at.fq"
);
...
...
@@ -346,14 +356,13 @@ public class AlienTrimmer {
}
if
(
mismatch
<
B0
)
{
mismatch
=
k
;
++
mismatch
;
mismatch
/=
B2
;
}
//### detecting Phred encoding ##############################################################################
if
(
finfile
.
toString
().
equals
(
".gz"
)
){
try
(
GZIPInputStream
in
=
new
GZIPInputStream
(
new
FileInputStream
(
input
))){
}
phred
=
0
;
fin
=
new
BufferedReader
(
new
FileReader
(
finfile
));
phred
=
0
;
//fin = new BufferedReader(new FileReader(finfile));
if
(
finext
.
equals
(
"gz"
)
||
finext
.
equals
(
"gzip"
))
fin
=
new
BufferedReader
(
new
InputStreamReader
(
new
GZIPInputStream
(
new
FileInputStream
(
finfile
))));
else
fin
=
new
BufferedReader
(
new
FileReader
(
finfile
));
cpt
=
B0
;
while
(
phred
==
0
)
{
//## reading fastq ##
try
{
line
=
fin
.
readLine
().
trim
();
}
catch
(
NullPointerException
e
)
{
fin
.
close
();
break
;
}
try
{
line
=
fin
.
readLine
().
trim
();
}
catch
(
NullPointerException
e
)
{
fin
.
close
();
break
;
}
switch
(
++
cpt
)
{
case
B1:
case
B2:
case
B3:
continue
;
case
B4:
fqsc
=
line
;
cpt
=
B0
;
break
;
}
i
=
(
short
)
fqsc
.
length
();
while
(
--
i
>=
0
)
{
if
(
fqsc
.
charAt
(
i
)
<=
'9'
)
{
phred
=
33
;
break
;
}
if
(
fqsc
.
charAt
(
i
)
>=
'L'
)
{
phred
=
64
;
break
;
}
}
}
...
...
@@ -509,22 +518,24 @@ public class AlienTrimmer {
//####################
//## trimming reads ##
//####################
fin
=
new
BufferedReader
(
new
FileReader
(
finfile
));
fout
=
new
BufferedWriter
(
new
FileWriter
(
foutfile
));
if
(
finext
.
equals
(
"gz"
)
||
finext
.
equals
(
"gzip"
))
fin
=
new
BufferedReader
(
new
InputStreamReader
(
new
GZIPInputStream
(
new
FileInputStream
(
finfile
))));
else
fin
=
new
BufferedReader
(
new
FileReader
(
finfile
));
if
(
foutext
.
equals
(
"gz"
)
||
foutext
.
equals
(
"gzip"
))
fout
=
new
BufferedWriter
(
new
OutputStreamWriter
(
new
GZIPOutputStream
(
new
FileOutputStream
(
foutfile
))));
else
fout
=
new
BufferedWriter
(
new
FileWriter
(
foutfile
));
pcpt
=
0
;
ftcpt
=
0
;
frcpt
=
0
;
cpt
=
B0
;
score
=
new
byte
[
51
];
while
(
true
)
{
//## reading fastq ##
try
{
line
=
fin
.
readLine
().
trim
();
}
catch
(
NullPointerException
e
)
{
fin
.
close
();
break
;
}
switch
(
++
cpt
)
{
case
B1:
fid1
=
line
;
continue
;
case
B2:
fseq
=
line
;
continue
;
case
B3:
fid2
=
line
;
continue
;
case
B4:
fqsc
=
line
;
cpt
=
B0
;
break
;
}
//## displaying info ##
while
(
true
)
{
//## reading fastq ##
try
{
line
=
fin
.
readLine
().
trim
();
}
catch
(
NullPointerException
e
)
{
fin
.
close
();
break
;
}
switch
(
++
cpt
)
{
case
B1:
fid1
=
line
;
continue
;
case
B2:
fseq
=
line
;
continue
;
case
B3:
fid2
=
line
;
continue
;
case
B4:
fqsc
=
line
;
cpt
=
B0
;
break
;
}
//## displaying info ##
if
(
++
pcpt
%
1000000
==
0
)
System
.
out
.
println
(
"["
+
String
.
format
(
Locale
.
US
,
"%02d"
,
new
Long
((
cur
=(
System
.
currentTimeMillis
()-
t
)/
1000
)/
60
))
+
":"
+
(
line
=
"0"
+(
cur
%
60
)).
substring
(
line
.
length
()-
2
)
+
"]"
+
String
.
format
(
Locale
.
US
,
"%,12d"
,
new
Integer
(
pcpt
))
+
" reads processed:"
+
String
.
format
(
Locale
.
US
,
"%,12d"
,
new
Integer
(
ftcpt
))
+
" trimmed"
+
String
.
format
(
Locale
.
US
,
"%,12d"
,
new
Integer
(
frcpt
))
+
" removed"
);
//## matching k-mers... ##
if
(
(
lgt
=(
short
)
fseq
.
length
())
<
minLgt
)
{
++
frcpt
;
continue
;
}
// too small read
if
(
lgt
>=
score
.
length
)
{
score
=
new
byte
[++
lgt
];
--
lgt
;
}
else
Arrays
.
fill
(
score
,
B0
);
...
...
@@ -552,12 +563,12 @@ public class AlienTrimmer {
case
'G'
:
case
'g'
:
iptn
|=
B2
;
lptn
|=
B4
;
nptn
|=
B4
;
break
;
case
'T'
:
case
't'
:
iptn
|=
B3
;
lptn
|=
B8
;
nptn
|=
B8
;
break
;
default
:
n
=
k
;
nptn
|=
B15
;
}
}
switch
(
(((--
n
)
<
B0
)
?
B0
:
B1
)
)
{
case
B0:
if
(
bsfkmer
.
get
((
iptn
&=
imsk
))
)
{
b
=
(
score
[
i
]=(++
b
));
score
[
i
]
+=
bq
;
--
score
[++
j
];
scount
+=
B2
;
start0
=
(
start0
<
B0
)
?
((
short
)(
B2
*
i
))
:
start0
;
b
=
(
score
[
i
]=(++
b
));
score
[
i
]
+=
bq
;
--
score
[++
j
];
scount
+=
B2
;
start0
=
(
start0
<
B0
)
?
((
short
)(
B2
*
i
))
:
start0
;
//start = ( /*(i-start <= k2) &&*/ (j >= start0) && (scount+k2 >= j) ) ? j : ++start; --start;
start
=
(
(
j
>=
start0
)
&&
(
scount
+
k2
>=
j
)
)
?
((
start
<
B0
)
?
j
:
((
i
-
start
<=
mismatch
)
?
j
:
++
start
))
...
...
@@ -580,7 +591,7 @@ public class AlienTrimmer {
}
break
;
}
b
=
(
score
[
i
]=((
b
<
B0
)
?
B0
:
b
));
++
j
;
b
=
(
score
[
i
]=((
b
<
B0
)
?
B0
:
b
));
++
j
;
if
(
(
score
[
i
]
+=
bq
)
==
B0
)
continue
;
scount
+=
B2
;
start0
=
(
start0
<
B0
)
?
((
short
)(
B2
*
i
))
:
start0
;
start
=
(
(
start
<
i
)
&&
((
x
=
i
)-
start
<=
mismatch
)
&&
((++
x
)
>=
start0
)
&&
(
scount
>
x
)
)
?
i
:
start
;
end
=
i
;
...
...
@@ -607,6 +618,7 @@ public class AlienTrimmer {
}
b
=
(
score
[
i
]=((
b
<
B0
)
?
B0
:
b
));
++
j
;
if
(
(
score
[
i
]
+=
bq
)
==
B0
)
continue
;
//Very strange
scount
+=
B2
;
start0
=
(
start0
<
B0
)
?
((
short
)(
B2
*
i
))
:
start0
;
start
=
(
(
start
<
i
)
&&
((
x
=
i
)-
start
<=
mismatch
)
&&
((++
x
)
>=
start0
)
&&
(
scount
>
x
)
)
?
i
:
start
;
end
=
i
;
}
...
...
@@ -681,14 +693,15 @@ public class AlienTrimmer {
o
=
B_1
;
while
(
++
o
<
lgt
)
System
.
out
.
print
(
((
(
b
=
score
[
o
])
==
B0
)
?
" "
:
(
b
<
B10
)
?
b
:
(
b
==
B10
)
?
"0"
:
(
""
+(
char
)(
54
+
b
)))
);
System
.
out
.
println
(
""
);
o
=
B_1
;
while
(
++
o
<=
start
)
System
.
out
.
print
(
">"
);
--
o
;
while
(
++
o
<
end
)
System
.
out
.
print
(
" "
);
--
o
;
while
(
++
o
<
lgt
)
System
.
out
.
print
(
"<"
);
System
.
out
.
println
(
""
);
System
.
out
.
println
(
""
);
}
}
fout
.
close
();
System
.
out
.
println
(
"["
+
String
.
format
(
Locale
.
US
,
"%02d"
,
new
Long
((
cur
=(
System
.
currentTimeMillis
()-
t
)/
1000
)/
60
))
+
":"
+
(
line
=
"0"
+(
cur
%
60
)).
substring
(
line
.
length
()-
2
)
+
"]"
+
String
.
format
(
Locale
.
US
,
"%,12d"
,
new
Integer
(
pcpt
))
+
" reads processed:"
+
String
.
format
(
Locale
.
US
,
"%,12d"
,
new
Integer
(
ftcpt
))
+
" trimmed"
+
String
.
format
(
Locale
.
US
,
"%,12d"
,
new
Integer
(
frcpt
))
+
" removed"
);
}
}
...
...
@@ -961,12 +974,20 @@ public class AlienTrimmer {
//################################
//## trimming reads (fwd & rev) ##
//################################
fin
=
new
BufferedReader
(
new
FileReader
(
finfile
));
fout
=
new
BufferedWriter
(
new
FileWriter
(
foutfile
));
rin
=
new
BufferedReader
(
new
FileReader
(
rinfile
));
rout
=
new
BufferedWriter
(
new
FileWriter
(
routfile
));
if
(
finext
.
equals
(
"gz"
)
||
finext
.
equals
(
".gzip"
))
fin
=
new
BufferedReader
(
new
InputStreamReader
(
new
GZIPInputStream
(
new
FileInputStream
(
finfile
))));
else
fin
=
new
BufferedReader
(
new
FileReader
(
finfile
));
if
(
foutext
.
equals
(
"gz"
)
||
foutext
.
equals
(
".gzip"
))
fout
=
new
BufferedWriter
(
new
OutputStreamWriter
(
new
GZIPOutputStream
(
new
FileOutputStream
(
foutfile
))));
else
fout
=
new
BufferedWriter
(
new
FileWriter
(
foutfile
));
if
(
rinext
.
equals
(
"gz"
)
||
rinext
.
equals
(
".gzip"
))
rin
=
new
BufferedReader
(
new
InputStreamReader
(
new
GZIPInputStream
(
new
FileInputStream
(
rinfile
))));
else
rin
=
new
BufferedReader
(
new
FileReader
(
rinfile
));
if
(
routext
.
equals
(
"gz"
)
||
routext
.
equals
(
".gzip"
))
rout
=
new
BufferedWriter
(
new
OutputStreamWriter
(
new
GZIPOutputStream
(
new
FileOutputStream
(
routfile
))));
else
rout
=
new
BufferedWriter
(
new
FileWriter
(
routfile
));
//fin = new BufferedReader(new FileReader(finfile)); fout = new BufferedWriter(new FileWriter(foutfile));
//rin = new BufferedReader(new FileReader(rinfile)); rout = new BufferedWriter(new FileWriter(routfile));
sout
=
new
BufferedWriter
(
new
FileWriter
(
soutfile
));
pcpt
=
0
;
ftcpt
=
0
;
rtcpt
=
0
;
rtcpt
=
0
;
rrcpt
=
0
;
cpt
=
B0
;
score
=
new
byte
[
51
];
while
(
true
)
{
while
(
true
)
{
//## reading fastq ##
try
{
line
=
fin
.
readLine
().
trim
();
}
catch
(
NullPointerException
e
)
{
fin
.
close
();
rin
.
close
();
break
;
}
switch
(
++
cpt
)
{
case
B1:
fid1
=
line
;
break
;
case
B2:
fseq
=
line
.
toUpperCase
();
break
;
case
B3:
fid2
=
line
;
break
;
case
B4:
fqsc
=
line
;
break
;
}
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment