diff --git a/source/Collection_Data_Types.rst b/source/Collection_Data_Types.rst index 5fc4b1adb515eaad388ebe41fbce18a8c41f9fea..67769b11fea0ea758e77e8c2baa914a906fc3333 100644 --- a/source/Collection_Data_Types.rst +++ b/source/Collection_Data_Types.rst @@ -391,7 +391,7 @@ Compute the 6 mers occurences of the sequence above, and print each 6mer and it' >>> s = s.replace('\n', '') >>> kmers = get_kmer_occurences(s, 6) >>> for kmer in kmers: - >>> print kmer[0], '..', kmer[1] + >>> print(kmer[0], '..', kmer[1]) gcagag .. 2 aacttc .. 1 gcaact .. 1 diff --git a/source/_static/code/kmer.py b/source/_static/code/kmer.py index ae5ce89c1167b5e7d68a569483782300189dc63b..e22fd007ce30fa5ec115a97255d363969d23d90a 100644 --- a/source/_static/code/kmer.py +++ b/source/_static/code/kmer.py @@ -1,12 +1,18 @@ def get_kmer_occurences(seq, kmer_len): """ - return a list of tuple + return a list of tuple each tuple contains a kmers present in seq and its occurence """ + # Counter dictionary (keys will be kmers, values will be counts) kmers = {} stop = len(seq) - kmer_len for i in range(stop + 1): + # Extract kmer starting at position i kmer = seq[i: i + kmer_len] + # Increment the count for kmer + # or create the entry the first time this kmer is seen + # using default value 0, plus 1 kmers[kmer] = kmers.get(kmer, 0) + 1 - return kmers.items() \ No newline at end of file + # pairs (kmer, count) + return kmers.items()