From 9d746b0d319c1b3f23c7c9c6d68f49d191dae63b Mon Sep 17 00:00:00 2001 From: Veronique Legrand <vlegrand@pasteur.fr> Date: Wed, 26 Mar 2025 16:38:58 +0100 Subject: [PATCH] work in progress: testing the impact of underlying buffer size when reading gzip fasta files --- phagetermvirome/IData_handling.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/phagetermvirome/IData_handling.py b/phagetermvirome/IData_handling.py index dc7d31a..b75abe5 100755 --- a/phagetermvirome/IData_handling.py +++ b/phagetermvirome/IData_handling.py @@ -286,14 +286,15 @@ class ReadGetter: def totReads(filin): """Verify and retrieve the number of reads in the fastq file before alignment""" start_t=time.perf_counter_ns() + bufsize=102400 if filin.endswith('.gz'): print(io.DEFAULT_BUFFER_SIZE) filein = gzip.open(filin, 'rb') else: filein = open(filin, 'r') - line = 0 - while filein.readline(): + with io.BufferedReader(filein, buffer_size=bufsize) as buffered_f: + #while filein.readline(): line += 1 seq = float(round(line / 4)) filein.close() -- GitLab