From f7a3254529b07ff951472618d9499f0772a48138 Mon Sep 17 00:00:00 2001
From: Danny Avila <messagedaniel@protonmail.com>
Date: Thu, 27 Mar 2025 10:59:55 -0400
Subject: [PATCH] =?UTF-8?q?=F0=9F=92=BE=20feat:=20Implement=20LRU=20cachin?=
 =?UTF-8?q?g=20for=20query=20embedding=20&=20bump=20deps.=20(#132)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* 💾 feat: Implement LRU caching for query embedding

* 🔧 fix: Update cryptography package version to 44.0.1 in requirements files, remove starlette

* 🔧 fix: Update FastAPI version to 0.115.12 in requirements files
---
 app/routes/document_routes.py | 31 ++++++++++++++++++++++++++-----
 requirements.lite.txt         |  4 ++--
 requirements.txt              |  7 +++----
 3 files changed, 31 insertions(+), 11 deletions(-)

diff --git a/app/routes/document_routes.py b/app/routes/document_routes.py
index 29e58dd..3a0c44b 100644
--- a/app/routes/document_routes.py
+++ b/app/routes/document_routes.py
@@ -6,14 +6,30 @@ import aiofiles
 import aiofiles.os
 from shutil import copyfileobj
 from typing import List, Iterable
-from fastapi import APIRouter, Request, UploadFile, HTTPException, File, Form, Body, Query, status
+from fastapi import (
+    APIRouter,
+    Request,
+    UploadFile,
+    HTTPException,
+    File,
+    Form,
+    Body,
+    Query,
+    status,
+)
 from langchain_core.documents import Document
 from langchain_core.runnables import run_in_executor
 from langchain_text_splitters import RecursiveCharacterTextSplitter
+from functools import lru_cache
 
 from app.config import logger, vector_store, RAG_UPLOAD_DIR, CHUNK_SIZE, CHUNK_OVERLAP
 from app.constants import ERROR_MESSAGES
-from app.models import StoreDocument, QueryRequestBody, DocumentResponse, QueryMultipleBody
+from app.models import (
+    StoreDocument,
+    QueryRequestBody,
+    DocumentResponse,
+    QueryMultipleBody,
+)
 from app.services.vector_store.async_pg_vector import AsyncPgVector
 from app.utils.document_loader import get_loader, clean_text, process_documents
 from app.utils.health import is_health_ok
@@ -135,6 +151,12 @@ async def delete_documents(document_ids: List[str] = Body(...)):
         raise HTTPException(status_code=500, detail=str(e))
 
 
+# Cache the embedding function with LRU cache
+@lru_cache(maxsize=128)
+def get_cached_query_embedding(query: str):
+    return vector_store.embedding_function.embed_query(query)
+
+
 @router.post("/query")
 async def query_embeddings_by_file_id(
     body: QueryRequestBody,
@@ -150,7 +172,7 @@ async def query_embeddings_by_file_id(
     authorized_documents = []
 
     try:
-        embedding = vector_store.embedding_function.embed_query(body.query)
+        embedding = get_cached_query_embedding(body.query)
 
         if isinstance(vector_store, AsyncPgVector):
             documents = await run_in_executor(
@@ -543,7 +565,7 @@ async def embed_file_upload(
 async def query_embeddings_by_file_ids(body: QueryMultipleBody):
     try:
         # Get the embedding of the query text
-        embedding = vector_store.embedding_function.embed_query(body.query)
+        embedding = get_cached_query_embedding(body.query)
 
         # Perform similarity search with the query embedding and filter by the file_ids in metadata
         if isinstance(vector_store, AsyncPgVector):
@@ -582,4 +604,3 @@ async def query_embeddings_by_file_ids(body: QueryMultipleBody):
             traceback.format_exc(),
         )
         raise HTTPException(status_code=500, detail=str(e))
-
diff --git a/requirements.lite.txt b/requirements.lite.txt
index 4e9f4d3..5e0d13b 100644
--- a/requirements.lite.txt
+++ b/requirements.lite.txt
@@ -4,7 +4,7 @@ langchain-openai==0.2.11
 langchain-core==0.3.25 
 sqlalchemy==2.0.28
 python-dotenv==1.0.1
-fastapi==0.110.0
+fastapi==0.115.12
 psycopg2-binary==2.9.9
 pgvector==0.2.5
 uvicorn==0.28.0
@@ -24,7 +24,7 @@ rapidocr-onnxruntime==1.3.24
 opencv-python-headless==4.9.0.80
 pymongo==4.6.3
 langchain-mongodb==0.2.0
-cryptography==43.0.1
+cryptography==44.0.1
 python-magic==0.4.27
 python-pptx==0.6.23
 xlrd==2.0.1
diff --git a/requirements.txt b/requirements.txt
index 4047311..ef964e0 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,7 +7,7 @@ langchain_text_splitters==0.3.3
 boto3==1.34.144
 sqlalchemy==2.0.28
 python-dotenv==1.0.1
-fastapi==0.110.0
+fastapi==0.115.12
 psycopg2-binary==2.9.9
 pgvector==0.2.5
 uvicorn==0.28.0
@@ -30,9 +30,8 @@ pymongo==4.6.3
 langchain-mongodb==0.2.0
 langchain-ollama==0.2.0
 langchain-huggingface==0.1.0
-cryptography==43.0.1
+cryptography==44.0.1
 python-magic==0.4.27
 python-pptx==0.6.23
 xlrd==2.0.1
-pydantic==2.9.2
-starlette==0.36.3
\ No newline at end of file
+pydantic==2.9.2
\ No newline at end of file
-- 
GitLab