Commit 55877c88 authored by Amine  GHOZLANE's avatar Amine GHOZLANE
Browse files

Enable compressed fastq reading and writing

parent a80bec0d
......@@ -19,5 +19,5 @@ build_conda:
script:
- anaconda login --username "$DOCKER_USER" --password "$DOCKER_PASS"
- conda build conda_inst
only:
except:
- master
\ No newline at end of file
......@@ -9,8 +9,10 @@ channels:
- defaults
source:
url: ftp://ftp.pasteur.fr/pub/gensoft/projects/AlienTrimmer/AlienTrimmer_0.4.0.tar.gz
md5: 8ed76aafea9fef48f49d74b59025212d
- git_url: https://gitlab.pasteur.fr/aghozlan/shaman.git
git_rev: {{ environ.get('GIT_DESCRIBE_TAG', '') }}
#url: ftp://ftp.pasteur.fr/pub/gensoft/projects/AlienTrimmer/AlienTrimmer_0.4.0.tar.gz
#md5: 8ed76aafea9fef48f49d74b59025212d
requirements:
host:
......
......@@ -70,6 +70,8 @@
import java.io.*;
import java.util.*;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
public class AlienTrimmer {
......@@ -336,6 +338,14 @@ public class AlienTrimmer {
System.exit(0);
}
}
String finfilename = finfile.toString();
String rinfilename = rinfile.toString();
String foutfilename = foutfile.toString();
String routfilename = routfile.toString();
String finext = finfilename.substring(finfilename.lastIndexOf(".") + 1, finfilename.length());
String rinext = rinfilename.substring(rinfilename.lastIndexOf(".") + 1, rinfilename.length());
String foutext = foutfilename.substring(foutfilename.lastIndexOf(".") + 1, foutfilename.length());
String routext = routfilename.substring(routfilename.lastIndexOf(".") + 1, routfilename.length());
//### testing default variables ##############################################################################
if ( finfile.toString().equals("no.file") ) { System.out.println(" no input file"); System.exit(1); }
if ( foutfile.toString().equals("no.file") ) foutfile = new File(finfile + ".at.fq");
......@@ -346,14 +356,13 @@ public class AlienTrimmer {
}
if ( mismatch < B0 ) { mismatch = k; ++mismatch; mismatch /= B2; }
//### detecting Phred encoding ##############################################################################
if ( finfile.toString().equals(".gz") ){
try (GZIPInputStream in = new GZIPInputStream(new FileInputStream(input))){
}
phred = 0; fin = new BufferedReader(new FileReader(finfile));
phred = 0; //fin = new BufferedReader(new FileReader(finfile));
if(finext.equals("gz") || finext.equals("gzip")) fin = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(finfile))));
else fin = new BufferedReader(new FileReader(finfile));
cpt = B0;
while ( phred == 0 ) {
//## reading fastq ##
try { line = fin.readLine().trim(); } catch ( NullPointerException e ) { fin.close(); break; }
try { line = fin.readLine().trim(); } catch ( NullPointerException e ) { fin.close(); break; }
switch ( ++cpt ) { case B1: case B2: case B3: continue; case B4: fqsc = line; cpt = B0; break; }
i = (short) fqsc.length(); while ( --i >= 0 ) { if ( fqsc.charAt(i) <= '9' ) { phred = 33; break; } if ( fqsc.charAt(i) >= 'L' ) { phred = 64; break; } }
}
......@@ -509,22 +518,24 @@ public class AlienTrimmer {
//####################
//## trimming reads ##
//####################
fin = new BufferedReader(new FileReader(finfile)); fout = new BufferedWriter(new FileWriter(foutfile));
if(finext.equals("gz") || finext.equals("gzip")) fin = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(finfile))));
else fin = new BufferedReader(new FileReader(finfile));
if(foutext.equals("gz") || foutext.equals("gzip")) fout = new BufferedWriter(new OutputStreamWriter(new GZIPOutputStream(new FileOutputStream(foutfile))));
else fout = new BufferedWriter(new FileWriter(foutfile));
pcpt = 0; ftcpt = 0; frcpt = 0;
cpt = B0; score = new byte[51];
while ( true ) {
//## reading fastq ##
try { line = fin.readLine().trim(); } catch ( NullPointerException e ) { fin.close(); break; }
switch ( ++cpt ) { case B1: fid1 = line; continue; case B2: fseq = line; continue; case B3: fid2 = line; continue; case B4: fqsc = line; cpt = B0; break; }
//## displaying info ##
while ( true ) {
//## reading fastq ##
try { line = fin.readLine().trim(); } catch ( NullPointerException e ) { fin.close(); break; }
switch ( ++cpt ) { case B1: fid1 = line; continue; case B2: fseq = line; continue; case B3: fid2 = line; continue; case B4: fqsc = line; cpt = B0; break; }
//## displaying info ##
if ( ++pcpt % 1000000 == 0 )
System.out.println("[" + String.format(Locale.US, "%02d", new Long((cur=(System.currentTimeMillis()-t)/1000)/60))
+ ":" + (line="0"+(cur%60)).substring(line.length()-2) + "]"
+ String.format(Locale.US, "%,12d", new Integer(pcpt)) + " reads processed:"
+ String.format(Locale.US, "%,12d", new Integer(ftcpt)) + " trimmed"
+ String.format(Locale.US, "%,12d", new Integer(frcpt)) + " removed");
//## matching k-mers... ##
if ( (lgt=(short)fseq.length()) < minLgt ) { ++frcpt; continue; } // too small read
if ( lgt >= score.length ) { score = new byte[++lgt]; --lgt; } else Arrays.fill(score, B0);
......@@ -552,12 +563,12 @@ public class AlienTrimmer {
case 'G': case 'g': iptn |= B2; lptn |= B4; nptn |= B4; break;
case 'T': case 't': iptn |= B3; lptn |= B8; nptn |= B8; break;
default: n = k; nptn |= B15;
}
}
switch ( (((--n) < B0) ? B0 : B1) ) {
case B0:
if ( bsfkmer.get((iptn &= imsk)) ) {
b = (score[i]=(++b)); score[i] += bq; --score[++j];
scount += B2; start0 = ( start0 < B0 ) ? ((short)(B2*i)) : start0;
b = (score[i]=(++b)); score[i] += bq; --score[++j];
scount += B2; start0 = ( start0 < B0 ) ? ((short)(B2*i)) : start0;
//start = ( /*(i-start <= k2) &&*/ (j >= start0) && (scount+k2 >= j) ) ? j : ++start; --start;
start = ( (j >= start0) && (scount+k2 >= j) )
? (( start < B0 ) ? j : (( i-start <= mismatch ) ? j : ++start))
......@@ -580,7 +591,7 @@ public class AlienTrimmer {
}
break;
}
b = (score[i]=((b < B0) ? B0 : b)); ++j;
b = (score[i]=((b < B0) ? B0 : b)); ++j;
if ( (score[i] += bq) == B0 ) continue;
scount += B2; start0 = ( start0 < B0 ) ? ((short)(B2*i)) : start0;
start = ( (start < i) && ((x=i)-start <= mismatch) && ((++x) >= start0) && (scount > x) ) ? i : start; end = i;
......@@ -607,6 +618,7 @@ public class AlienTrimmer {
}
b = (score[i]=((b < B0) ? B0 : b)); ++j;
if ( (score[i] += bq) == B0 ) continue;
//Very strange
scount += B2; start0 = ( start0 < B0 ) ? ((short)(B2*i)) : start0;
start = ( (start < i) && ((x=i)-start <= mismatch) && ((++x) >= start0) && (scount > x) ) ? i : start; end = i;
}
......@@ -681,14 +693,15 @@ public class AlienTrimmer {
o = B_1; while ( ++o < lgt ) System.out.print( (( (b=score[o]) == B0 ) ? " " : ( b < B10 ) ? b : ( b == B10 ) ? "0" : (""+(char)(54+b))) ); System.out.println("");
o = B_1; while ( ++o <= start ) System.out.print(">"); --o; while ( ++o < end ) System.out.print(" "); --o; while ( ++o < lgt ) System.out.print("<");
System.out.println(""); System.out.println("");
}
}
fout.close();
System.out.println("[" + String.format(Locale.US, "%02d", new Long((cur=(System.currentTimeMillis()-t)/1000)/60))
+ ":" + (line="0"+(cur%60)).substring(line.length()-2) + "]"
+ String.format(Locale.US, "%,12d", new Integer(pcpt)) + " reads processed:"
+ String.format(Locale.US, "%,12d", new Integer(ftcpt)) + " trimmed"
+ String.format(Locale.US, "%,12d", new Integer(frcpt)) + " removed");
}
}
......@@ -961,12 +974,20 @@ public class AlienTrimmer {
//################################
//## trimming reads (fwd & rev) ##
//################################
fin = new BufferedReader(new FileReader(finfile)); fout = new BufferedWriter(new FileWriter(foutfile));
rin = new BufferedReader(new FileReader(rinfile)); rout = new BufferedWriter(new FileWriter(routfile));
if(finext.equals("gz") || finext.equals(".gzip")) fin = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(finfile))));
else fin = new BufferedReader(new FileReader(finfile));
if(foutext.equals("gz") || foutext.equals(".gzip")) fout = new BufferedWriter(new OutputStreamWriter(new GZIPOutputStream(new FileOutputStream(foutfile))));
else fout = new BufferedWriter(new FileWriter(foutfile));
if(rinext.equals("gz") || rinext.equals(".gzip")) rin = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(rinfile))));
else rin = new BufferedReader(new FileReader(rinfile));
if(routext.equals("gz") || routext.equals(".gzip")) rout = new BufferedWriter(new OutputStreamWriter(new GZIPOutputStream(new FileOutputStream(routfile))));
else rout = new BufferedWriter(new FileWriter(routfile));
//fin = new BufferedReader(new FileReader(finfile)); fout = new BufferedWriter(new FileWriter(foutfile));
//rin = new BufferedReader(new FileReader(rinfile)); rout = new BufferedWriter(new FileWriter(routfile));
sout = new BufferedWriter(new FileWriter(soutfile));
pcpt = 0; ftcpt = 0; rtcpt = 0; rtcpt = 0; rrcpt = 0;
cpt = B0; score = new byte[51];
while ( true ) {
while ( true ) {
//## reading fastq ##
try { line = fin.readLine().trim(); } catch ( NullPointerException e ) { fin.close(); rin.close(); break; }
switch ( ++cpt ) { case B1: fid1 = line; break; case B2: fseq = line.toUpperCase(); break; case B3: fid2 = line; break; case B4: fqsc = line; break; }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment