From a010d6d70dc64e54893289d770aeb8e7bb693815 Mon Sep 17 00:00:00 2001 From: Bryan Brancotte <bryan.brancotte@pasteur.fr> Date: Tue, 4 Oct 2022 15:02:06 +0200 Subject: [PATCH] ability to ban word from index --- autocomplete_multi_models/admin.py | 21 ++ autocomplete_multi_models/banned_words | 224 ++++++++++++++++++ autocomplete_multi_models/business_process.py | 9 + .../migrations/0003_bannedwordfromindex.py | 41 ++++ autocomplete_multi_models/models.py | 28 +++ setup.cfg | 2 +- 6 files changed, 324 insertions(+), 1 deletion(-) create mode 100644 autocomplete_multi_models/banned_words create mode 100644 autocomplete_multi_models/migrations/0003_bannedwordfromindex.py diff --git a/autocomplete_multi_models/admin.py b/autocomplete_multi_models/admin.py index f868571..ba2fde5 100644 --- a/autocomplete_multi_models/admin.py +++ b/autocomplete_multi_models/admin.py @@ -1,4 +1,5 @@ from django.contrib import admin +from django.db import connection # Register your models here. from autocomplete_multi_models import models @@ -8,3 +9,23 @@ from autocomplete_multi_models import models class IndexedWordAdmin(admin.ModelAdmin): list_display = ("word",) search_fields = ("word",) + actions = [ + "ban_from_index", + ] + + def ban_from_index(self, request, queryset): + to_add = [] + with connection.cursor() as cursor: + for o in queryset: + cursor.execute("SELECT UPPER(UNACCENT(%s)) as value", [o.word]) + word = cursor.fetchone()[0] + if not models.BannedWordFromIndex.objects.filter(word=word).exists(): + to_add.append(models.BannedWordFromIndex(word=word)) + o.delete() + models.BannedWordFromIndex.objects.bulk_create(to_add) + + +@admin.register(models.BannedWordFromIndex) +class BannedWordFromIndexAdmin(admin.ModelAdmin): + list_display = ("word",) + search_fields = ("word",) diff --git a/autocomplete_multi_models/banned_words b/autocomplete_multi_models/banned_words new file mode 100644 index 0000000..f0369cf --- /dev/null +++ b/autocomplete_multi_models/banned_words @@ -0,0 +1,224 @@ +a +about +above +after +again +against +aim +all +also +am +an +and +any +are +aren't +arial +as +at +be +because +been +before +being +below +between +both +but +by +can +cannot +can't +cell +com +could +couldn't +dans +de +des +did +didn't +do +does +doesn't +doing +don't +down +du +during +each +else +est +et +ever +few +font +for +from +further +get +had +hadn't +has +hasn't +have +haven't +having +he +he'd +he'll +hence +her +here +here's +hers +herself +he's +him +himself +his +how +however +how's +http +i +i'd +if +i'll +i'm +in +into +is +isn't +it +it's +its +itself +i've +just +k +la +le +les +let's +like +me +more +most +mustn't +my +myself +nbsp +no +Non +nor +not +of +off +on +once +only +or +other +otherwise +ought +our +ours +ourselves +out +over +own +pour +project +r +same +sans +serif +shall +shan't +she +she'd +she'll +she's +should +shouldn't +since +size +so +some +span +strong +style +such +Sur +than +that +that's +the +their +theirs +them +themselves +then +there +therefore +there's +these +they +they'd +they'll +they're +they've +this +those +through +thus +to +tohama +too +two +type +under +une +until +up +used +using +very +was +wasn't +we +we'd +we'll +we're +were +weren't +we've +what +what's +when +when's +where +where's +which +while +who +whom +who's +why +why's +will +with +won't +would +wouldn't +www +you +you'd +you'll +your +you're +yours +yourself +yourselves +you've \ No newline at end of file diff --git a/autocomplete_multi_models/business_process.py b/autocomplete_multi_models/business_process.py index 5cb7b00..aa00784 100644 --- a/autocomplete_multi_models/business_process.py +++ b/autocomplete_multi_models/business_process.py @@ -5,6 +5,7 @@ from django.contrib.postgres.lookups import Unaccent from django.contrib.postgres.search import TrigramSimilarity from django.db import connection from django.db.models import Exists, OuterRef, Case, When, Value, F +from django.db.models.functions import Upper from django.db.transaction import atomic from autocomplete_multi_models import utils, models @@ -46,6 +47,7 @@ def rebuild_index(): for instance in model.objects.only(*field_names): _add_instance_to_index(instance, field_names, objects, cursor) models.IndexedWord.objects.bulk_create(objects.values()) + _purge_banned_words() # def clean_duplicate(): @@ -106,6 +108,13 @@ def _update_in_index(objects: dict): if changed == 0: objects_to_create.append(o) models.IndexedWord.objects.bulk_create(objects_to_create) + _purge_banned_words() + + +def _purge_banned_words(): + models.IndexedWord.objects.annotate( + banned=Exists(models.BannedWordFromIndex.objects.filter(word=Upper(Unaccent(OuterRef('word'))))) + ).filter(banned=True).delete() def _add_text_to_index(value: str, objects: list, cursor): diff --git a/autocomplete_multi_models/migrations/0003_bannedwordfromindex.py b/autocomplete_multi_models/migrations/0003_bannedwordfromindex.py new file mode 100644 index 0000000..5678788 --- /dev/null +++ b/autocomplete_multi_models/migrations/0003_bannedwordfromindex.py @@ -0,0 +1,41 @@ +# Generated by Django 3.2.9 on 2022-10-04 12:19 +import os + +from django.contrib.postgres.lookups import Unaccent +from django.db import migrations, models +from django.db.models import F, Value +from django.db.models.functions import Upper + +FILE = os.path.dirname(__file__) + + +def migration_code(apps, schema_editor): + BannedWordFromIndex = apps.get_model("autocomplete_multi_models", "BannedWordFromIndex") + BannedWordFromIndex.objects.bulk_create( + [ + BannedWordFromIndex(word=w) + for w in map(str.strip, open(os.path.join(FILE, '..', 'banned_words')).readlines()) + # if not BannedWordFromIndex.objects.filter(word=Unaccent(Upper(Value(w)))).exists() + ] + ) + BannedWordFromIndex.objects.update(word=Unaccent(Upper(F('word')))) + + +class Migration(migrations.Migration): + dependencies = [ + ('autocomplete_multi_models', '0002_add_count'), + ] + + operations = [ + migrations.CreateModel( + name='BannedWordFromIndex', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('word', models.CharField(db_index=True, max_length=64, unique=True)), + ], + options={ + 'verbose_name_plural': 'Banned words from index', + }, + ), + migrations.RunPython(migration_code, reverse_code=migrations.RunPython.noop), + ] diff --git a/autocomplete_multi_models/models.py b/autocomplete_multi_models/models.py index 727f95c..0d85152 100644 --- a/autocomplete_multi_models/models.py +++ b/autocomplete_multi_models/models.py @@ -1,5 +1,10 @@ import django.contrib.postgres.indexes import django.db.models +from django.contrib.postgres.lookups import Unaccent +from django.db.models import F +from django.db.models.functions import Upper +from django.db.models.signals import post_save +from django.dispatch import receiver # Create your models here. @@ -20,3 +25,26 @@ class IndexedWord(django.db.models.Model): def __str__(self): return self.word + + +class BannedWordFromIndex(django.db.models.Model): + class Meta: + verbose_name_plural = "Banned words from index" + + indexes = [ + django.contrib.postgres.indexes.GinIndex(fields=['word']), + ] + word = django.db.models.CharField( + max_length=64, + db_index=True, + unique=True, + null=False, + ) + + def __str__(self): + return self.word + + +@receiver(post_save, sender=BannedWordFromIndex) +def flush_live_settings_in_cache(instance, *args, **kwargs): + BannedWordFromIndex.objects.filter(id=instance.id).update(word=Unaccent(Upper(F('word')))) diff --git a/setup.cfg b/setup.cfg index 786e0ba..4153645 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = autocomplete-multi-models -version = 0.3 +version = 0.4 description = An app that index fields across multiple models, and expose an api to query for word similar to the query. long_description = file: README.md author = Bryan Brancotte -- GitLab