Skip to content
Snippets Groups Projects
Select Git revision
  • 6c6f3f0ae40ec7e53a9a754ada052f94dc686e1d
  • main default protected
  • v0.5.1
  • v0.5
  • v0.4.3.1
  • v0.4.2
  • v0.4.1
  • v0.4
  • v0.3
9 results

business_process.py

Blame
  • Bryan Brancotte's avatar
    Bryan BRANCOTTE authored
    6c6f3f0a
    History
    business_process.py 2.99 KiB
    import re
    from typing import Optional, List
    
    from django.contrib.postgres.lookups import Unaccent
    from django.contrib.postgres.search import TrigramSimilarity
    from django.db import connection
    from django.db.models import Exists, OuterRef, Case, When, Value, F
    from django.db.transaction import atomic
    
    from autocomplete_multi_models import utils, models
    
    _pattern = re.compile("[^\\w\\d]")
    
    _AUTOCOMPLETE_MIN_LENGTH = utils.DEFAULT_AUTOCOMPLETE_MIN_LENGTH
    _AUTOCOMPLETE_MIN_SIMILARITY = utils.DEFAULT_AUTOCOMPLETE_MIN_SIMILARITY
    _AUTOCOMPLETE_LIMIT = utils.DEFAULT_AUTOCOMPLETE_LIMIT
    
    
    def get_setting_from_storage(key, default):
        return key == utils.REBUILD_NEEDED
    
    
    def set_setting_in_storage(key, value):
        pass
    
    
    def split_string(value):
        return _pattern.split(value)
    
    
    @atomic
    def rebuild_index():
        models.IndexedWord.objects.all().delete()
        for model, field_names in utils.get_indexed_fields().items():
            for instance in model.objects.only(*field_names):
                add_instance_to_index(instance, field_names)
        clean_duplicate()
    
    
    def clean_duplicate():
        models.IndexedWord.objects.annotate(
            is_duplicate=Exists(
                models.IndexedWord.objects.filter(
                    word__iexact=OuterRef('word'),
                    pk__gt=OuterRef('pk'),
                )
            )
        ).filter(is_duplicate=True).delete()
    
    
    def add_instance_to_index(instance, field_names: List[str]):
        for field_name in field_names:
            add_text_to_index(getattr(instance, field_name))
    
    
    def add_text_to_index(value: str):
        if value is None or value == '':
            return
        objects = []
        for word in split_string(value):
            len_word = len(word)
            if len_word < _AUTOCOMPLETE_MIN_LENGTH or word.isdecimal() or len_word > 64:
                continue
            objects.append(models.IndexedWord(word=word))
        models.IndexedWord.objects.bulk_create(objects)
    
    
    def get_closest_matching_words(word: str, limit: Optional[int] = None, min_similarity: Optional[float] = None):
        # remove accent from the searched word with postgres
        with connection.cursor() as cursor:
            cursor.execute("SELECT UNACCENT(%s) as value", [word])
            word = cursor.fetchone()[0]
        if limit is None:
            limit = _AUTOCOMPLETE_LIMIT
        if min_similarity is None:
            min_similarity = _AUTOCOMPLETE_MIN_SIMILARITY
        qs = models.IndexedWord.objects
        # search on un-accented word
        qs = qs.annotate(ac_word=Unaccent('word'))
        # get the trigram similarity
        qs = qs.annotate(ac_word_s_tri=TrigramSimilarity('ac_word', word))
        # test if the word start with the searched word, if so give a bonus
        qs = qs.annotate(ac_word_bonus=Case(When(ac_word__startswith=word, then=Value(1.0)), default=Value(0.0)))
        # sum similarity and bonus
        qs = qs.annotate(similarity=F('ac_word_s_tri') + F('ac_word_bonus'))
        # filter by min similarity and order it
        qs = qs.filter(similarity__gt=min_similarity).order_by('-similarity')
        if limit < 0:  # allows to have all results
            return qs
        return qs[:limit]