From a010d6d70dc64e54893289d770aeb8e7bb693815 Mon Sep 17 00:00:00 2001
From: Bryan Brancotte <bryan.brancotte@pasteur.fr>
Date: Tue, 4 Oct 2022 15:02:06 +0200
Subject: [PATCH] ability to ban word from index

---
 autocomplete_multi_models/admin.py            |  21 ++
 autocomplete_multi_models/banned_words        | 224 ++++++++++++++++++
 autocomplete_multi_models/business_process.py |   9 +
 .../migrations/0003_bannedwordfromindex.py    |  41 ++++
 autocomplete_multi_models/models.py           |  28 +++
 setup.cfg                                     |   2 +-
 6 files changed, 324 insertions(+), 1 deletion(-)
 create mode 100644 autocomplete_multi_models/banned_words
 create mode 100644 autocomplete_multi_models/migrations/0003_bannedwordfromindex.py

diff --git a/autocomplete_multi_models/admin.py b/autocomplete_multi_models/admin.py
index f868571..ba2fde5 100644
--- a/autocomplete_multi_models/admin.py
+++ b/autocomplete_multi_models/admin.py
@@ -1,4 +1,5 @@
 from django.contrib import admin
+from django.db import connection
 
 # Register your models here.
 from autocomplete_multi_models import models
@@ -8,3 +9,23 @@ from autocomplete_multi_models import models
 class IndexedWordAdmin(admin.ModelAdmin):
     list_display = ("word",)
     search_fields = ("word",)
+    actions = [
+        "ban_from_index",
+    ]
+
+    def ban_from_index(self, request, queryset):
+        to_add = []
+        with connection.cursor() as cursor:
+            for o in queryset:
+                cursor.execute("SELECT UPPER(UNACCENT(%s)) as value", [o.word])
+                word = cursor.fetchone()[0]
+                if not models.BannedWordFromIndex.objects.filter(word=word).exists():
+                    to_add.append(models.BannedWordFromIndex(word=word))
+                o.delete()
+        models.BannedWordFromIndex.objects.bulk_create(to_add)
+
+
+@admin.register(models.BannedWordFromIndex)
+class BannedWordFromIndexAdmin(admin.ModelAdmin):
+    list_display = ("word",)
+    search_fields = ("word",)
diff --git a/autocomplete_multi_models/banned_words b/autocomplete_multi_models/banned_words
new file mode 100644
index 0000000..f0369cf
--- /dev/null
+++ b/autocomplete_multi_models/banned_words
@@ -0,0 +1,224 @@
+a
+about
+above
+after
+again
+against
+aim
+all
+also
+am
+an
+and
+any
+are
+aren't
+arial
+as
+at
+be
+because
+been
+before
+being
+below
+between
+both
+but
+by
+can
+cannot
+can't
+cell
+com
+could
+couldn't
+dans
+de
+des
+did
+didn't
+do
+does
+doesn't
+doing
+don't
+down
+du
+during
+each
+else
+est
+et
+ever
+few
+font
+for
+from
+further
+get
+had
+hadn't
+has
+hasn't
+have
+haven't
+having
+he
+he'd
+he'll
+hence
+her
+here
+here's
+hers
+herself
+he's
+him
+himself
+his
+how
+however
+how's
+http
+i
+i'd
+if
+i'll
+i'm
+in
+into
+is
+isn't
+it
+it's
+its
+itself
+i've
+just
+k
+la
+le
+les
+let's
+like
+me
+more
+most
+mustn't
+my
+myself
+nbsp
+no
+Non
+nor
+not
+of
+off
+on
+once
+only
+or
+other
+otherwise
+ought
+our
+ours
+ourselves
+out
+over
+own
+pour
+project
+r
+same
+sans
+serif
+shall
+shan't
+she
+she'd
+she'll
+she's
+should
+shouldn't
+since
+size
+so
+some
+span
+strong
+style
+such
+Sur
+than
+that
+that's
+the
+their
+theirs
+them
+themselves
+then
+there
+therefore
+there's
+these
+they
+they'd
+they'll
+they're
+they've
+this
+those
+through
+thus
+to
+tohama
+too
+two
+type
+under
+une
+until
+up
+used
+using
+very
+was
+wasn't
+we
+we'd
+we'll
+we're
+were
+weren't
+we've
+what
+what's
+when
+when's
+where
+where's
+which
+while
+who
+whom
+who's
+why
+why's
+will
+with
+won't
+would
+wouldn't
+www
+you
+you'd
+you'll
+your
+you're
+yours
+yourself
+yourselves
+you've
\ No newline at end of file
diff --git a/autocomplete_multi_models/business_process.py b/autocomplete_multi_models/business_process.py
index 5cb7b00..aa00784 100644
--- a/autocomplete_multi_models/business_process.py
+++ b/autocomplete_multi_models/business_process.py
@@ -5,6 +5,7 @@ from django.contrib.postgres.lookups import Unaccent
 from django.contrib.postgres.search import TrigramSimilarity
 from django.db import connection
 from django.db.models import Exists, OuterRef, Case, When, Value, F
+from django.db.models.functions import Upper
 from django.db.transaction import atomic
 
 from autocomplete_multi_models import utils, models
@@ -46,6 +47,7 @@ def rebuild_index():
             for instance in model.objects.only(*field_names):
                 _add_instance_to_index(instance, field_names, objects, cursor)
     models.IndexedWord.objects.bulk_create(objects.values())
+    _purge_banned_words()
 
 
 # def clean_duplicate():
@@ -106,6 +108,13 @@ def _update_in_index(objects: dict):
         if changed == 0:
             objects_to_create.append(o)
     models.IndexedWord.objects.bulk_create(objects_to_create)
+    _purge_banned_words()
+
+
+def _purge_banned_words():
+    models.IndexedWord.objects.annotate(
+        banned=Exists(models.BannedWordFromIndex.objects.filter(word=Upper(Unaccent(OuterRef('word')))))
+    ).filter(banned=True).delete()
 
 
 def _add_text_to_index(value: str, objects: list, cursor):
diff --git a/autocomplete_multi_models/migrations/0003_bannedwordfromindex.py b/autocomplete_multi_models/migrations/0003_bannedwordfromindex.py
new file mode 100644
index 0000000..5678788
--- /dev/null
+++ b/autocomplete_multi_models/migrations/0003_bannedwordfromindex.py
@@ -0,0 +1,41 @@
+# Generated by Django 3.2.9 on 2022-10-04 12:19
+import os
+
+from django.contrib.postgres.lookups import Unaccent
+from django.db import migrations, models
+from django.db.models import F, Value
+from django.db.models.functions import Upper
+
+FILE = os.path.dirname(__file__)
+
+
+def migration_code(apps, schema_editor):
+    BannedWordFromIndex = apps.get_model("autocomplete_multi_models", "BannedWordFromIndex")
+    BannedWordFromIndex.objects.bulk_create(
+        [
+            BannedWordFromIndex(word=w)
+            for w in map(str.strip, open(os.path.join(FILE, '..', 'banned_words')).readlines())
+            # if not BannedWordFromIndex.objects.filter(word=Unaccent(Upper(Value(w)))).exists()
+        ]
+    )
+    BannedWordFromIndex.objects.update(word=Unaccent(Upper(F('word'))))
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ('autocomplete_multi_models', '0002_add_count'),
+    ]
+
+    operations = [
+        migrations.CreateModel(
+            name='BannedWordFromIndex',
+            fields=[
+                ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+                ('word', models.CharField(db_index=True, max_length=64, unique=True)),
+            ],
+            options={
+                'verbose_name_plural': 'Banned words from index',
+            },
+        ),
+        migrations.RunPython(migration_code, reverse_code=migrations.RunPython.noop),
+    ]
diff --git a/autocomplete_multi_models/models.py b/autocomplete_multi_models/models.py
index 727f95c..0d85152 100644
--- a/autocomplete_multi_models/models.py
+++ b/autocomplete_multi_models/models.py
@@ -1,5 +1,10 @@
 import django.contrib.postgres.indexes
 import django.db.models
+from django.contrib.postgres.lookups import Unaccent
+from django.db.models import F
+from django.db.models.functions import Upper
+from django.db.models.signals import post_save
+from django.dispatch import receiver
 
 
 # Create your models here.
@@ -20,3 +25,26 @@ class IndexedWord(django.db.models.Model):
 
     def __str__(self):
         return self.word
+
+
+class BannedWordFromIndex(django.db.models.Model):
+    class Meta:
+        verbose_name_plural = "Banned words from index"
+
+    indexes = [
+        django.contrib.postgres.indexes.GinIndex(fields=['word']),
+    ]
+    word = django.db.models.CharField(
+        max_length=64,
+        db_index=True,
+        unique=True,
+        null=False,
+    )
+
+    def __str__(self):
+        return self.word
+
+
+@receiver(post_save, sender=BannedWordFromIndex)
+def flush_live_settings_in_cache(instance, *args, **kwargs):
+    BannedWordFromIndex.objects.filter(id=instance.id).update(word=Unaccent(Upper(F('word'))))
diff --git a/setup.cfg b/setup.cfg
index 786e0ba..4153645 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,6 +1,6 @@
 [metadata]
 name = autocomplete-multi-models
-version = 0.3
+version = 0.4
 description = An app that index fields across multiple models, and expose an api to query for word similar to the query.
 long_description = file: README.md
 author = Bryan Brancotte
-- 
GitLab