From 9d746b0d319c1b3f23c7c9c6d68f49d191dae63b Mon Sep 17 00:00:00 2001
From: Veronique Legrand <vlegrand@pasteur.fr>
Date: Wed, 26 Mar 2025 16:38:58 +0100
Subject: [PATCH] work in progress: testing the impact of underlying buffer
 size when reading gzip fasta files

---
 phagetermvirome/IData_handling.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/phagetermvirome/IData_handling.py b/phagetermvirome/IData_handling.py
index dc7d31a..b75abe5 100755
--- a/phagetermvirome/IData_handling.py
+++ b/phagetermvirome/IData_handling.py
@@ -286,14 +286,15 @@ class ReadGetter:
 def totReads(filin):
     """Verify and retrieve the number of reads in the fastq file before alignment"""
     start_t=time.perf_counter_ns()
+    bufsize=102400
     if filin.endswith('.gz'):
         print(io.DEFAULT_BUFFER_SIZE)
         filein = gzip.open(filin, 'rb')
     else:
         filein = open(filin, 'r')
-
     line = 0
-    while filein.readline():
+    with io.BufferedReader(filein, buffer_size=bufsize) as buffered_f:
+    #while filein.readline():
         line += 1
     seq = float(round(line / 4))
     filein.close()
-- 
GitLab