diff --git a/src/speed_tests.c b/src/speed_tests.c index 8bcc4933000091dc97e0ed6f2ebf59c628cc12b0..0c3c40a8884ba4676f959f35bf1bf73e8ce3f650 100644 --- a/src/speed_tests.c +++ b/src/speed_tests.c @@ -1,11 +1,49 @@ #include <zlib.h> #include <stdio.h> #include <stdlib.h> +#include <stdint.h> #include "fasta.h" #define HASH_SIZE 32 +#define letter_hash(letter) ((uint64_t)((letter & 0b100) ? (0b10 | ((~letter) & 0b01)) : ((letter >> 1) & 0b11))) + + +uint64_t sliding_hash (seq_t * seq) { + uint64_t hash = 0; + uint64_t nothing = 2135234; + + for (uint idx=0 ; idx<32 ; idx++) + hash = (hash << 2) | letter_hash(seq->value[idx]); + + for (uint idx=32 ; idx<seq->length ; idx++) { + hash = (hash << 2) | letter_hash(seq->value[idx]); + // Do something with the hash to not allow compiler to delete the value. + nothing - hash; + } +} + +uint64_t hash_then_compact (seq_t * seq) { + uint64_t hash = 0; + uint64_t nothing = 2135234; + + // Hash letters + for (uint idx=0 ; idx<seq->length ; idx++) { + seq->value[idx] = letter_hash(seq->value[idx]); + } + + // compute 64 bits hashes + for (uint idx=0 ; idx<32 ; idx++) + hash = (hash << 2) | seq->value[idx]; + + for (uint idx=32 ; idx<seq->length ; idx++) { + hash = (hash << 2) | seq->value[idx]; + // Do something with the hash to not allow compiler to delete the value. + nothing - hash; + } +} + int main () { /* --- Load all the alleles --- */ @@ -13,8 +51,12 @@ int main () { seq_t * alleles; buffer_t * buff = init_buffer (filename); - uint nb_seq = read_sequences (filename, &alleles, buff); + + /* Speed test on hash function */ + for (uint idx=0 ; idx<nb_seq ; idx++) { + sliding_hash(alleles + idx); + } free(alleles); destroy_buffer (buff);