Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Amine GHOZLANE
AlienTrimmer
Commits
55877c88
Commit
55877c88
authored
Sep 24, 2020
by
Amine GHOZLANE
Browse files
Enable compressed fastq reading and writing
parent
a80bec0d
Changes
3
Hide whitespace changes
Inline
Side-by-side
.gitlab-ci.yml
View file @
55877c88
...
@@ -19,5 +19,5 @@ build_conda:
...
@@ -19,5 +19,5 @@ build_conda:
script
:
script
:
-
anaconda login --username "$DOCKER_USER" --password "$DOCKER_PASS"
-
anaconda login --username "$DOCKER_USER" --password "$DOCKER_PASS"
-
conda build conda_inst
-
conda build conda_inst
only
:
except
:
-
master
-
master
\ No newline at end of file
conda_inst/meta.yaml
View file @
55877c88
...
@@ -9,8 +9,10 @@ channels:
...
@@ -9,8 +9,10 @@ channels:
-
defaults
-
defaults
source
:
source
:
url
:
ftp://ftp.pasteur.fr/pub/gensoft/projects/AlienTrimmer/AlienTrimmer_0.4.0.tar.gz
-
git_url
:
https://gitlab.pasteur.fr/aghozlan/shaman.git
md5
:
8ed76aafea9fef48f49d74b59025212d
git_rev
:
{{
environ.get('GIT_DESCRIBE_TAG'
,
'
'
)
}}
#url: ftp://ftp.pasteur.fr/pub/gensoft/projects/AlienTrimmer/AlienTrimmer_0.4.0.tar.gz
#md5: 8ed76aafea9fef48f49d74b59025212d
requirements
:
requirements
:
host
:
host
:
...
...
src/AlienTrimmer.java
View file @
55877c88
...
@@ -70,6 +70,8 @@
...
@@ -70,6 +70,8 @@
import
java.io.*
;
import
java.io.*
;
import
java.util.*
;
import
java.util.*
;
import
java.util.zip.GZIPInputStream
;
import
java.util.zip.GZIPOutputStream
;
public
class
AlienTrimmer
{
public
class
AlienTrimmer
{
...
@@ -336,6 +338,14 @@ public class AlienTrimmer {
...
@@ -336,6 +338,14 @@ public class AlienTrimmer {
System
.
exit
(
0
);
System
.
exit
(
0
);
}
}
}
}
String
finfilename
=
finfile
.
toString
();
String
rinfilename
=
rinfile
.
toString
();
String
foutfilename
=
foutfile
.
toString
();
String
routfilename
=
routfile
.
toString
();
String
finext
=
finfilename
.
substring
(
finfilename
.
lastIndexOf
(
"."
)
+
1
,
finfilename
.
length
());
String
rinext
=
rinfilename
.
substring
(
rinfilename
.
lastIndexOf
(
"."
)
+
1
,
rinfilename
.
length
());
String
foutext
=
foutfilename
.
substring
(
foutfilename
.
lastIndexOf
(
"."
)
+
1
,
foutfilename
.
length
());
String
routext
=
routfilename
.
substring
(
routfilename
.
lastIndexOf
(
"."
)
+
1
,
routfilename
.
length
());
//### testing default variables ##############################################################################
//### testing default variables ##############################################################################
if
(
finfile
.
toString
().
equals
(
"no.file"
)
)
{
System
.
out
.
println
(
" no input file"
);
System
.
exit
(
1
);
}
if
(
finfile
.
toString
().
equals
(
"no.file"
)
)
{
System
.
out
.
println
(
" no input file"
);
System
.
exit
(
1
);
}
if
(
foutfile
.
toString
().
equals
(
"no.file"
)
)
foutfile
=
new
File
(
finfile
+
".at.fq"
);
if
(
foutfile
.
toString
().
equals
(
"no.file"
)
)
foutfile
=
new
File
(
finfile
+
".at.fq"
);
...
@@ -346,14 +356,13 @@ public class AlienTrimmer {
...
@@ -346,14 +356,13 @@ public class AlienTrimmer {
}
}
if
(
mismatch
<
B0
)
{
mismatch
=
k
;
++
mismatch
;
mismatch
/=
B2
;
}
if
(
mismatch
<
B0
)
{
mismatch
=
k
;
++
mismatch
;
mismatch
/=
B2
;
}
//### detecting Phred encoding ##############################################################################
//### detecting Phred encoding ##############################################################################
if
(
finfile
.
toString
().
equals
(
".gz"
)
){
phred
=
0
;
//fin = new BufferedReader(new FileReader(finfile));
try
(
GZIPInputStream
in
=
new
GZIPInputStream
(
new
FileInputStream
(
input
))){
if
(
finext
.
equals
(
"gz"
)
||
finext
.
equals
(
"gzip"
))
fin
=
new
BufferedReader
(
new
InputStreamReader
(
new
GZIPInputStream
(
new
FileInputStream
(
finfile
))));
}
else
fin
=
new
BufferedReader
(
new
FileReader
(
finfile
));
phred
=
0
;
fin
=
new
BufferedReader
(
new
FileReader
(
finfile
));
cpt
=
B0
;
cpt
=
B0
;
while
(
phred
==
0
)
{
while
(
phred
==
0
)
{
//## reading fastq ##
//## reading fastq ##
try
{
line
=
fin
.
readLine
().
trim
();
}
catch
(
NullPointerException
e
)
{
fin
.
close
();
break
;
}
try
{
line
=
fin
.
readLine
().
trim
();
}
catch
(
NullPointerException
e
)
{
fin
.
close
();
break
;
}
switch
(
++
cpt
)
{
case
B1:
case
B2:
case
B3:
continue
;
case
B4:
fqsc
=
line
;
cpt
=
B0
;
break
;
}
switch
(
++
cpt
)
{
case
B1:
case
B2:
case
B3:
continue
;
case
B4:
fqsc
=
line
;
cpt
=
B0
;
break
;
}
i
=
(
short
)
fqsc
.
length
();
while
(
--
i
>=
0
)
{
if
(
fqsc
.
charAt
(
i
)
<=
'9'
)
{
phred
=
33
;
break
;
}
if
(
fqsc
.
charAt
(
i
)
>=
'L'
)
{
phred
=
64
;
break
;
}
}
i
=
(
short
)
fqsc
.
length
();
while
(
--
i
>=
0
)
{
if
(
fqsc
.
charAt
(
i
)
<=
'9'
)
{
phred
=
33
;
break
;
}
if
(
fqsc
.
charAt
(
i
)
>=
'L'
)
{
phred
=
64
;
break
;
}
}
}
}
...
@@ -509,22 +518,24 @@ public class AlienTrimmer {
...
@@ -509,22 +518,24 @@ public class AlienTrimmer {
//####################
//####################
//## trimming reads ##
//## trimming reads ##
//####################
//####################
fin
=
new
BufferedReader
(
new
FileReader
(
finfile
));
fout
=
new
BufferedWriter
(
new
FileWriter
(
foutfile
));
if
(
finext
.
equals
(
"gz"
)
||
finext
.
equals
(
"gzip"
))
fin
=
new
BufferedReader
(
new
InputStreamReader
(
new
GZIPInputStream
(
new
FileInputStream
(
finfile
))));
else
fin
=
new
BufferedReader
(
new
FileReader
(
finfile
));
if
(
foutext
.
equals
(
"gz"
)
||
foutext
.
equals
(
"gzip"
))
fout
=
new
BufferedWriter
(
new
OutputStreamWriter
(
new
GZIPOutputStream
(
new
FileOutputStream
(
foutfile
))));
else
fout
=
new
BufferedWriter
(
new
FileWriter
(
foutfile
));
pcpt
=
0
;
ftcpt
=
0
;
frcpt
=
0
;
pcpt
=
0
;
ftcpt
=
0
;
frcpt
=
0
;
cpt
=
B0
;
score
=
new
byte
[
51
];
cpt
=
B0
;
score
=
new
byte
[
51
];
while
(
true
)
{
while
(
true
)
{
//## reading fastq ##
//## reading fastq ##
try
{
line
=
fin
.
readLine
().
trim
();
}
catch
(
NullPointerException
e
)
{
fin
.
close
();
break
;
}
try
{
line
=
fin
.
readLine
().
trim
();
}
catch
(
NullPointerException
e
)
{
fin
.
close
();
break
;
}
switch
(
++
cpt
)
{
case
B1:
fid1
=
line
;
continue
;
case
B2:
fseq
=
line
;
continue
;
case
B3:
fid2
=
line
;
continue
;
case
B4:
fqsc
=
line
;
cpt
=
B0
;
break
;
}
switch
(
++
cpt
)
{
case
B1:
fid1
=
line
;
continue
;
case
B2:
fseq
=
line
;
continue
;
case
B3:
fid2
=
line
;
continue
;
case
B4:
fqsc
=
line
;
cpt
=
B0
;
break
;
}
//## displaying info ##
//## displaying info ##
if
(
++
pcpt
%
1000000
==
0
)
if
(
++
pcpt
%
1000000
==
0
)
System
.
out
.
println
(
"["
+
String
.
format
(
Locale
.
US
,
"%02d"
,
new
Long
((
cur
=(
System
.
currentTimeMillis
()-
t
)/
1000
)/
60
))
System
.
out
.
println
(
"["
+
String
.
format
(
Locale
.
US
,
"%02d"
,
new
Long
((
cur
=(
System
.
currentTimeMillis
()-
t
)/
1000
)/
60
))
+
":"
+
(
line
=
"0"
+(
cur
%
60
)).
substring
(
line
.
length
()-
2
)
+
"]"
+
":"
+
(
line
=
"0"
+(
cur
%
60
)).
substring
(
line
.
length
()-
2
)
+
"]"
+
String
.
format
(
Locale
.
US
,
"%,12d"
,
new
Integer
(
pcpt
))
+
" reads processed:"
+
String
.
format
(
Locale
.
US
,
"%,12d"
,
new
Integer
(
pcpt
))
+
" reads processed:"
+
String
.
format
(
Locale
.
US
,
"%,12d"
,
new
Integer
(
ftcpt
))
+
" trimmed"
+
String
.
format
(
Locale
.
US
,
"%,12d"
,
new
Integer
(
ftcpt
))
+
" trimmed"
+
String
.
format
(
Locale
.
US
,
"%,12d"
,
new
Integer
(
frcpt
))
+
" removed"
);
+
String
.
format
(
Locale
.
US
,
"%,12d"
,
new
Integer
(
frcpt
))
+
" removed"
);
//## matching k-mers... ##
//## matching k-mers... ##
if
(
(
lgt
=(
short
)
fseq
.
length
())
<
minLgt
)
{
++
frcpt
;
continue
;
}
// too small read
if
(
(
lgt
=(
short
)
fseq
.
length
())
<
minLgt
)
{
++
frcpt
;
continue
;
}
// too small read
if
(
lgt
>=
score
.
length
)
{
score
=
new
byte
[++
lgt
];
--
lgt
;
}
else
Arrays
.
fill
(
score
,
B0
);
if
(
lgt
>=
score
.
length
)
{
score
=
new
byte
[++
lgt
];
--
lgt
;
}
else
Arrays
.
fill
(
score
,
B0
);
...
@@ -552,12 +563,12 @@ public class AlienTrimmer {
...
@@ -552,12 +563,12 @@ public class AlienTrimmer {
case
'G'
:
case
'g'
:
iptn
|=
B2
;
lptn
|=
B4
;
nptn
|=
B4
;
break
;
case
'G'
:
case
'g'
:
iptn
|=
B2
;
lptn
|=
B4
;
nptn
|=
B4
;
break
;
case
'T'
:
case
't'
:
iptn
|=
B3
;
lptn
|=
B8
;
nptn
|=
B8
;
break
;
case
'T'
:
case
't'
:
iptn
|=
B3
;
lptn
|=
B8
;
nptn
|=
B8
;
break
;
default
:
n
=
k
;
nptn
|=
B15
;
default
:
n
=
k
;
nptn
|=
B15
;
}
}
switch
(
(((--
n
)
<
B0
)
?
B0
:
B1
)
)
{
switch
(
(((--
n
)
<
B0
)
?
B0
:
B1
)
)
{
case
B0:
case
B0:
if
(
bsfkmer
.
get
((
iptn
&=
imsk
))
)
{
if
(
bsfkmer
.
get
((
iptn
&=
imsk
))
)
{
b
=
(
score
[
i
]=(++
b
));
score
[
i
]
+=
bq
;
--
score
[++
j
];
b
=
(
score
[
i
]=(++
b
));
score
[
i
]
+=
bq
;
--
score
[++
j
];
scount
+=
B2
;
start0
=
(
start0
<
B0
)
?
((
short
)(
B2
*
i
))
:
start0
;
scount
+=
B2
;
start0
=
(
start0
<
B0
)
?
((
short
)(
B2
*
i
))
:
start0
;
//start = ( /*(i-start <= k2) &&*/ (j >= start0) && (scount+k2 >= j) ) ? j : ++start; --start;
//start = ( /*(i-start <= k2) &&*/ (j >= start0) && (scount+k2 >= j) ) ? j : ++start; --start;
start
=
(
(
j
>=
start0
)
&&
(
scount
+
k2
>=
j
)
)
start
=
(
(
j
>=
start0
)
&&
(
scount
+
k2
>=
j
)
)
?
((
start
<
B0
)
?
j
:
((
i
-
start
<=
mismatch
)
?
j
:
++
start
))
?
((
start
<
B0
)
?
j
:
((
i
-
start
<=
mismatch
)
?
j
:
++
start
))
...
@@ -580,7 +591,7 @@ public class AlienTrimmer {
...
@@ -580,7 +591,7 @@ public class AlienTrimmer {
}
}
break
;
break
;
}
}
b
=
(
score
[
i
]=((
b
<
B0
)
?
B0
:
b
));
++
j
;
b
=
(
score
[
i
]=((
b
<
B0
)
?
B0
:
b
));
++
j
;
if
(
(
score
[
i
]
+=
bq
)
==
B0
)
continue
;
if
(
(
score
[
i
]
+=
bq
)
==
B0
)
continue
;
scount
+=
B2
;
start0
=
(
start0
<
B0
)
?
((
short
)(
B2
*
i
))
:
start0
;
scount
+=
B2
;
start0
=
(
start0
<
B0
)
?
((
short
)(
B2
*
i
))
:
start0
;
start
=
(
(
start
<
i
)
&&
((
x
=
i
)-
start
<=
mismatch
)
&&
((++
x
)
>=
start0
)
&&
(
scount
>
x
)
)
?
i
:
start
;
end
=
i
;
start
=
(
(
start
<
i
)
&&
((
x
=
i
)-
start
<=
mismatch
)
&&
((++
x
)
>=
start0
)
&&
(
scount
>
x
)
)
?
i
:
start
;
end
=
i
;
...
@@ -607,6 +618,7 @@ public class AlienTrimmer {
...
@@ -607,6 +618,7 @@ public class AlienTrimmer {
}
}
b
=
(
score
[
i
]=((
b
<
B0
)
?
B0
:
b
));
++
j
;
b
=
(
score
[
i
]=((
b
<
B0
)
?
B0
:
b
));
++
j
;
if
(
(
score
[
i
]
+=
bq
)
==
B0
)
continue
;
if
(
(
score
[
i
]
+=
bq
)
==
B0
)
continue
;
//Very strange
scount
+=
B2
;
start0
=
(
start0
<
B0
)
?
((
short
)(
B2
*
i
))
:
start0
;
scount
+=
B2
;
start0
=
(
start0
<
B0
)
?
((
short
)(
B2
*
i
))
:
start0
;
start
=
(
(
start
<
i
)
&&
((
x
=
i
)-
start
<=
mismatch
)
&&
((++
x
)
>=
start0
)
&&
(
scount
>
x
)
)
?
i
:
start
;
end
=
i
;
start
=
(
(
start
<
i
)
&&
((
x
=
i
)-
start
<=
mismatch
)
&&
((++
x
)
>=
start0
)
&&
(
scount
>
x
)
)
?
i
:
start
;
end
=
i
;
}
}
...
@@ -681,14 +693,15 @@ public class AlienTrimmer {
...
@@ -681,14 +693,15 @@ public class AlienTrimmer {
o
=
B_1
;
while
(
++
o
<
lgt
)
System
.
out
.
print
(
((
(
b
=
score
[
o
])
==
B0
)
?
" "
:
(
b
<
B10
)
?
b
:
(
b
==
B10
)
?
"0"
:
(
""
+(
char
)(
54
+
b
)))
);
System
.
out
.
println
(
""
);
o
=
B_1
;
while
(
++
o
<
lgt
)
System
.
out
.
print
(
((
(
b
=
score
[
o
])
==
B0
)
?
" "
:
(
b
<
B10
)
?
b
:
(
b
==
B10
)
?
"0"
:
(
""
+(
char
)(
54
+
b
)))
);
System
.
out
.
println
(
""
);
o
=
B_1
;
while
(
++
o
<=
start
)
System
.
out
.
print
(
">"
);
--
o
;
while
(
++
o
<
end
)
System
.
out
.
print
(
" "
);
--
o
;
while
(
++
o
<
lgt
)
System
.
out
.
print
(
"<"
);
o
=
B_1
;
while
(
++
o
<=
start
)
System
.
out
.
print
(
">"
);
--
o
;
while
(
++
o
<
end
)
System
.
out
.
print
(
" "
);
--
o
;
while
(
++
o
<
lgt
)
System
.
out
.
print
(
"<"
);
System
.
out
.
println
(
""
);
System
.
out
.
println
(
""
);
System
.
out
.
println
(
""
);
System
.
out
.
println
(
""
);
}
}
fout
.
close
();
fout
.
close
();
System
.
out
.
println
(
"["
+
String
.
format
(
Locale
.
US
,
"%02d"
,
new
Long
((
cur
=(
System
.
currentTimeMillis
()-
t
)/
1000
)/
60
))
System
.
out
.
println
(
"["
+
String
.
format
(
Locale
.
US
,
"%02d"
,
new
Long
((
cur
=(
System
.
currentTimeMillis
()-
t
)/
1000
)/
60
))
+
":"
+
(
line
=
"0"
+(
cur
%
60
)).
substring
(
line
.
length
()-
2
)
+
"]"
+
":"
+
(
line
=
"0"
+(
cur
%
60
)).
substring
(
line
.
length
()-
2
)
+
"]"
+
String
.
format
(
Locale
.
US
,
"%,12d"
,
new
Integer
(
pcpt
))
+
" reads processed:"
+
String
.
format
(
Locale
.
US
,
"%,12d"
,
new
Integer
(
pcpt
))
+
" reads processed:"
+
String
.
format
(
Locale
.
US
,
"%,12d"
,
new
Integer
(
ftcpt
))
+
" trimmed"
+
String
.
format
(
Locale
.
US
,
"%,12d"
,
new
Integer
(
ftcpt
))
+
" trimmed"
+
String
.
format
(
Locale
.
US
,
"%,12d"
,
new
Integer
(
frcpt
))
+
" removed"
);
+
String
.
format
(
Locale
.
US
,
"%,12d"
,
new
Integer
(
frcpt
))
+
" removed"
);
}
}
...
@@ -961,12 +974,20 @@ public class AlienTrimmer {
...
@@ -961,12 +974,20 @@ public class AlienTrimmer {
//################################
//################################
//## trimming reads (fwd & rev) ##
//## trimming reads (fwd & rev) ##
//################################
//################################
fin
=
new
BufferedReader
(
new
FileReader
(
finfile
));
fout
=
new
BufferedWriter
(
new
FileWriter
(
foutfile
));
if
(
finext
.
equals
(
"gz"
)
||
finext
.
equals
(
".gzip"
))
fin
=
new
BufferedReader
(
new
InputStreamReader
(
new
GZIPInputStream
(
new
FileInputStream
(
finfile
))));
rin
=
new
BufferedReader
(
new
FileReader
(
rinfile
));
rout
=
new
BufferedWriter
(
new
FileWriter
(
routfile
));
else
fin
=
new
BufferedReader
(
new
FileReader
(
finfile
));
if
(
foutext
.
equals
(
"gz"
)
||
foutext
.
equals
(
".gzip"
))
fout
=
new
BufferedWriter
(
new
OutputStreamWriter
(
new
GZIPOutputStream
(
new
FileOutputStream
(
foutfile
))));
else
fout
=
new
BufferedWriter
(
new
FileWriter
(
foutfile
));
if
(
rinext
.
equals
(
"gz"
)
||
rinext
.
equals
(
".gzip"
))
rin
=
new
BufferedReader
(
new
InputStreamReader
(
new
GZIPInputStream
(
new
FileInputStream
(
rinfile
))));
else
rin
=
new
BufferedReader
(
new
FileReader
(
rinfile
));
if
(
routext
.
equals
(
"gz"
)
||
routext
.
equals
(
".gzip"
))
rout
=
new
BufferedWriter
(
new
OutputStreamWriter
(
new
GZIPOutputStream
(
new
FileOutputStream
(
routfile
))));
else
rout
=
new
BufferedWriter
(
new
FileWriter
(
routfile
));
//fin = new BufferedReader(new FileReader(finfile)); fout = new BufferedWriter(new FileWriter(foutfile));
//rin = new BufferedReader(new FileReader(rinfile)); rout = new BufferedWriter(new FileWriter(routfile));
sout
=
new
BufferedWriter
(
new
FileWriter
(
soutfile
));
sout
=
new
BufferedWriter
(
new
FileWriter
(
soutfile
));
pcpt
=
0
;
ftcpt
=
0
;
rtcpt
=
0
;
rtcpt
=
0
;
rrcpt
=
0
;
pcpt
=
0
;
ftcpt
=
0
;
rtcpt
=
0
;
rtcpt
=
0
;
rrcpt
=
0
;
cpt
=
B0
;
score
=
new
byte
[
51
];
cpt
=
B0
;
score
=
new
byte
[
51
];
while
(
true
)
{
while
(
true
)
{
//## reading fastq ##
//## reading fastq ##
try
{
line
=
fin
.
readLine
().
trim
();
}
catch
(
NullPointerException
e
)
{
fin
.
close
();
rin
.
close
();
break
;
}
try
{
line
=
fin
.
readLine
().
trim
();
}
catch
(
NullPointerException
e
)
{
fin
.
close
();
rin
.
close
();
break
;
}
switch
(
++
cpt
)
{
case
B1:
fid1
=
line
;
break
;
case
B2:
fseq
=
line
.
toUpperCase
();
break
;
case
B3:
fid2
=
line
;
break
;
case
B4:
fqsc
=
line
;
break
;
}
switch
(
++
cpt
)
{
case
B1:
fid1
=
line
;
break
;
case
B2:
fseq
=
line
.
toUpperCase
();
break
;
case
B3:
fid2
=
line
;
break
;
case
B4:
fqsc
=
line
;
break
;
}
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment