Select Git revision
business_process.py
Bryan BRANCOTTE authored
business_process.py 2.99 KiB
import re
from typing import Optional, List
from django.contrib.postgres.lookups import Unaccent
from django.contrib.postgres.search import TrigramSimilarity
from django.db import connection
from django.db.models import Exists, OuterRef, Case, When, Value, F
from django.db.transaction import atomic
from autocomplete_multi_models import utils, models
_pattern = re.compile("[^\\w\\d]")
_AUTOCOMPLETE_MIN_LENGTH = utils.DEFAULT_AUTOCOMPLETE_MIN_LENGTH
_AUTOCOMPLETE_MIN_SIMILARITY = utils.DEFAULT_AUTOCOMPLETE_MIN_SIMILARITY
_AUTOCOMPLETE_LIMIT = utils.DEFAULT_AUTOCOMPLETE_LIMIT
def get_setting_from_storage(key, default):
return key == utils.REBUILD_NEEDED
def set_setting_in_storage(key, value):
pass
def split_string(value):
return _pattern.split(value)
@atomic
def rebuild_index():
models.IndexedWord.objects.all().delete()
for model, field_names in utils.get_indexed_fields().items():
for instance in model.objects.only(*field_names):
add_instance_to_index(instance, field_names)
clean_duplicate()
def clean_duplicate():
models.IndexedWord.objects.annotate(
is_duplicate=Exists(
models.IndexedWord.objects.filter(
word__iexact=OuterRef('word'),
pk__gt=OuterRef('pk'),
)
)
).filter(is_duplicate=True).delete()
def add_instance_to_index(instance, field_names: List[str]):
for field_name in field_names:
add_text_to_index(getattr(instance, field_name))
def add_text_to_index(value: str):
if value is None or value == '':
return
objects = []
for word in split_string(value):
len_word = len(word)
if len_word < _AUTOCOMPLETE_MIN_LENGTH or word.isdecimal() or len_word > 64:
continue
objects.append(models.IndexedWord(word=word))
models.IndexedWord.objects.bulk_create(objects)
def get_closest_matching_words(word: str, limit: Optional[int] = None, min_similarity: Optional[float] = None):
# remove accent from the searched word with postgres
with connection.cursor() as cursor:
cursor.execute("SELECT UNACCENT(%s) as value", [word])
word = cursor.fetchone()[0]
if limit is None:
limit = _AUTOCOMPLETE_LIMIT
if min_similarity is None:
min_similarity = _AUTOCOMPLETE_MIN_SIMILARITY
qs = models.IndexedWord.objects
# search on un-accented word
qs = qs.annotate(ac_word=Unaccent('word'))
# get the trigram similarity
qs = qs.annotate(ac_word_s_tri=TrigramSimilarity('ac_word', word))
# test if the word start with the searched word, if so give a bonus
qs = qs.annotate(ac_word_bonus=Case(When(ac_word__startswith=word, then=Value(1.0)), default=Value(0.0)))
# sum similarity and bonus
qs = qs.annotate(similarity=F('ac_word_s_tri') + F('ac_word_bonus'))
# filter by min similarity and order it
qs = qs.filter(similarity__gt=min_similarity).order_by('-similarity')
if limit < 0: # allows to have all results
return qs
return qs[:limit]