From b0672bec96f7b4ebd01870c04535599bb541ad62 Mon Sep 17 00:00:00 2001 From: Bryan Brancotte <bryan.brancotte@pasteur.fr> Date: Mon, 19 Dec 2022 15:03:46 +0100 Subject: [PATCH] Merge plural with singular form, ability to toggle this new feature --- autocomplete_multi_models/business_process.py | 25 ++++++++++++++++++- .../tests/test_business_process.py | 14 +++++++++++ autocomplete_multi_models/utils.py | 6 +++++ setup.cfg | 2 +- 4 files changed, 45 insertions(+), 2 deletions(-) diff --git a/autocomplete_multi_models/business_process.py b/autocomplete_multi_models/business_process.py index a2aa29b..bec655a 100644 --- a/autocomplete_multi_models/business_process.py +++ b/autocomplete_multi_models/business_process.py @@ -5,7 +5,7 @@ from django.contrib.postgres.lookups import Unaccent from django.contrib.postgres.search import TrigramSimilarity from django.db import connection from django.db.models import Exists, OuterRef, Case, When, Value, F -from django.db.models.functions import Upper +from django.db.models.functions import Upper, Substr, Length from django.db.transaction import atomic from autocomplete_multi_models import utils, models @@ -15,6 +15,7 @@ _pattern = re.compile("[^\\w\\d]") _AUTOCOMPLETE_MIN_LENGTH = utils.DEFAULT_AUTOCOMPLETE_MIN_LENGTH _AUTOCOMPLETE_MIN_SIMILARITY = utils.DEFAULT_AUTOCOMPLETE_MIN_SIMILARITY _AUTOCOMPLETE_LIMIT = utils.DEFAULT_AUTOCOMPLETE_LIMIT +_SHOULD_MERGE_PLURAL_INTO_SINGULAR = utils.DEFAULT_SHOULD_MERGE_PLURAL_INTO_SINGULAR _CAN_BE_INDEXED_BY_AUTOCOMPLETE_FUNCTION_NAME = utils.DEFAULT_CAN_BE_INDEXED_BY_AUTOCOMPLETE_FUNCTION_NAME __in_mem_storage = { utils.AUTO_UPDATE_ENABLED: True, @@ -48,6 +49,7 @@ def rebuild_index(): _add_instance_to_index(instance, field_names, objects, cursor) models.IndexedWord.objects.bulk_create(objects.values()) _purge_banned_words() + _merge_plural_into_singular() # def clean_duplicate(): @@ -109,6 +111,7 @@ def _update_in_index(objects: dict): objects_to_create.append(o) models.IndexedWord.objects.bulk_create(objects_to_create) _purge_banned_words() + _merge_plural_into_singular() def _purge_banned_words(): @@ -117,6 +120,26 @@ def _purge_banned_words(): ).filter(banned=True).delete() +def _merge_plural_into_singular(): + if not _SHOULD_MERGE_PLURAL_INTO_SINGULAR: + return + for w in ( + models.IndexedWord.objects.filter(word__endswith="s") + .annotate( + has_singular=Exists( + models.IndexedWord.objects.filter( + word__iexact=Substr(OuterRef('word'), 1, Length(OuterRef('word')) - Value(1)) + ) + ) + ) + .filter(has_singular=True) + ): + # as same word with different case is not allowed, it work as there is thus only one singular word + # If unicity is nt ensured, + models.IndexedWord.objects.filter(word__iexact=w.word[:-1]).update(occurrence=F('occurrence') + w.occurrence) + w.delete() + + def _add_text_to_index(value: str, objects: list, cursor): if value is None or value == '': return diff --git a/autocomplete_multi_models/tests/test_business_process.py b/autocomplete_multi_models/tests/test_business_process.py index ef1dbab..cf044ba 100644 --- a/autocomplete_multi_models/tests/test_business_process.py +++ b/autocomplete_multi_models/tests/test_business_process.py @@ -162,6 +162,20 @@ class NeedRebuildDefaultBehaviorTestCase(test_helpers.ChangeAutoCompleteSettings self.assertEqual(models.IndexedWord.objects.count(), 0) +@override_settings(SHOULD_MERGE_PLURAL_INTO_SINGULAR=True) +class MergePlural1(test_helpers.ChangeAutoCompleteSettingsTestCase): + def test_it(self): + business_process.add_text_to_index("Gene genes gene Genes") + self.assertEqual(models.IndexedWord.objects.count(), 1) + + +@override_settings(SHOULD_MERGE_PLURAL_INTO_SINGULAR=False) +class MergePlural2(test_helpers.ChangeAutoCompleteSettingsTestCase): + def test_it(self): + business_process.add_text_to_index("Gene genes gene genes") + self.assertEqual(models.IndexedWord.objects.count(), 2) + + @override_settings( AUTOCOMPLETE_PERSISTENT_VARIABLE_GETTER_SETTER=( "autocomplete_multi_models.tests.settings_storage_file_based.get_fcn", diff --git a/autocomplete_multi_models/utils.py b/autocomplete_multi_models/utils.py index e01972e..6aaaec9 100644 --- a/autocomplete_multi_models/utils.py +++ b/autocomplete_multi_models/utils.py @@ -5,6 +5,7 @@ from django.apps import apps DEFAULT_AUTOCOMPLETE_MIN_LENGTH = 4 DEFAULT_AUTOCOMPLETE_MIN_SIMILARITY = 0.3 DEFAULT_AUTOCOMPLETE_LIMIT = 10 +DEFAULT_SHOULD_MERGE_PLURAL_INTO_SINGULAR = False REBUILD_NEEDED = "is_autocomplete_multi_models_rebuild_needed" DEFAULT_CAN_BE_INDEXED_BY_AUTOCOMPLETE_FUNCTION_NAME = "can_be_indexed_by_autocomplete" AUTO_UPDATE_ENABLED = "is_autocomplete_auto_update_on_save_enabled" @@ -35,6 +36,11 @@ def init_from_settings(): 'AUTOCOMPLETE_LIMIT', DEFAULT_AUTOCOMPLETE_LIMIT, ) + business_process._SHOULD_MERGE_PLURAL_INTO_SINGULAR = getattr( + settings, + 'SHOULD_MERGE_PLURAL_INTO_SINGULAR', + DEFAULT_SHOULD_MERGE_PLURAL_INTO_SINGULAR, + ) business_process._CAN_BE_INDEXED_BY_AUTOCOMPLETE_FUNCTION_NAME = getattr( settings, 'CAN_BE_INDEXED_BY_AUTOCOMPLETE_FUNCTION_NAME', diff --git a/setup.cfg b/setup.cfg index b146079..18eec64 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = autocomplete-multi-models -version = 0.4.3.1 +version = 0.5 description = An app that index fields across multiple models, and expose an api to query for word similar to the query. long_description = file: README.md author = Bryan Brancotte -- GitLab