From 45069a9d0b88c80da4c542f01ced8f2cb1f2d456 Mon Sep 17 00:00:00 2001 From: Yoann Dufresne <yoann.dufresne0@gmail.com> Date: Sat, 21 Apr 2018 14:44:41 +0200 Subject: [PATCH] speed test on hash then compare vs hash/compare on the fly --- src/speed_tests.c | 44 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/src/speed_tests.c b/src/speed_tests.c index 8bcc493..0c3c40a 100644 --- a/src/speed_tests.c +++ b/src/speed_tests.c @@ -1,11 +1,49 @@ #include <zlib.h> #include <stdio.h> #include <stdlib.h> +#include <stdint.h> #include "fasta.h" #define HASH_SIZE 32 +#define letter_hash(letter) ((uint64_t)((letter & 0b100) ? (0b10 | ((~letter) & 0b01)) : ((letter >> 1) & 0b11))) + + +uint64_t sliding_hash (seq_t * seq) { + uint64_t hash = 0; + uint64_t nothing = 2135234; + + for (uint idx=0 ; idx<32 ; idx++) + hash = (hash << 2) | letter_hash(seq->value[idx]); + + for (uint idx=32 ; idx<seq->length ; idx++) { + hash = (hash << 2) | letter_hash(seq->value[idx]); + // Do something with the hash to not allow compiler to delete the value. + nothing - hash; + } +} + +uint64_t hash_then_compact (seq_t * seq) { + uint64_t hash = 0; + uint64_t nothing = 2135234; + + // Hash letters + for (uint idx=0 ; idx<seq->length ; idx++) { + seq->value[idx] = letter_hash(seq->value[idx]); + } + + // compute 64 bits hashes + for (uint idx=0 ; idx<32 ; idx++) + hash = (hash << 2) | seq->value[idx]; + + for (uint idx=32 ; idx<seq->length ; idx++) { + hash = (hash << 2) | seq->value[idx]; + // Do something with the hash to not allow compiler to delete the value. + nothing - hash; + } +} + int main () { /* --- Load all the alleles --- */ @@ -13,8 +51,12 @@ int main () { seq_t * alleles; buffer_t * buff = init_buffer (filename); - uint nb_seq = read_sequences (filename, &alleles, buff); + + /* Speed test on hash function */ + for (uint idx=0 ; idx<nb_seq ; idx++) { + sliding_hash(alleles + idx); + } free(alleles); destroy_buffer (buff); -- GitLab