From f7a3254529b07ff951472618d9499f0772a48138 Mon Sep 17 00:00:00 2001 From: Danny Avila <messagedaniel@protonmail.com> Date: Thu, 27 Mar 2025 10:59:55 -0400 Subject: [PATCH] =?UTF-8?q?=F0=9F=92=BE=20feat:=20Implement=20LRU=20cachin?= =?UTF-8?q?g=20for=20query=20embedding=20&=20bump=20deps.=20(#132)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 💾 feat: Implement LRU caching for query embedding * 🔧 fix: Update cryptography package version to 44.0.1 in requirements files, remove starlette * 🔧 fix: Update FastAPI version to 0.115.12 in requirements files --- app/routes/document_routes.py | 31 ++++++++++++++++++++++++++----- requirements.lite.txt | 4 ++-- requirements.txt | 7 +++---- 3 files changed, 31 insertions(+), 11 deletions(-) diff --git a/app/routes/document_routes.py b/app/routes/document_routes.py index 29e58dd..3a0c44b 100644 --- a/app/routes/document_routes.py +++ b/app/routes/document_routes.py @@ -6,14 +6,30 @@ import aiofiles import aiofiles.os from shutil import copyfileobj from typing import List, Iterable -from fastapi import APIRouter, Request, UploadFile, HTTPException, File, Form, Body, Query, status +from fastapi import ( + APIRouter, + Request, + UploadFile, + HTTPException, + File, + Form, + Body, + Query, + status, +) from langchain_core.documents import Document from langchain_core.runnables import run_in_executor from langchain_text_splitters import RecursiveCharacterTextSplitter +from functools import lru_cache from app.config import logger, vector_store, RAG_UPLOAD_DIR, CHUNK_SIZE, CHUNK_OVERLAP from app.constants import ERROR_MESSAGES -from app.models import StoreDocument, QueryRequestBody, DocumentResponse, QueryMultipleBody +from app.models import ( + StoreDocument, + QueryRequestBody, + DocumentResponse, + QueryMultipleBody, +) from app.services.vector_store.async_pg_vector import AsyncPgVector from app.utils.document_loader import get_loader, clean_text, process_documents from app.utils.health import is_health_ok @@ -135,6 +151,12 @@ async def delete_documents(document_ids: List[str] = Body(...)): raise HTTPException(status_code=500, detail=str(e)) +# Cache the embedding function with LRU cache +@lru_cache(maxsize=128) +def get_cached_query_embedding(query: str): + return vector_store.embedding_function.embed_query(query) + + @router.post("/query") async def query_embeddings_by_file_id( body: QueryRequestBody, @@ -150,7 +172,7 @@ async def query_embeddings_by_file_id( authorized_documents = [] try: - embedding = vector_store.embedding_function.embed_query(body.query) + embedding = get_cached_query_embedding(body.query) if isinstance(vector_store, AsyncPgVector): documents = await run_in_executor( @@ -543,7 +565,7 @@ async def embed_file_upload( async def query_embeddings_by_file_ids(body: QueryMultipleBody): try: # Get the embedding of the query text - embedding = vector_store.embedding_function.embed_query(body.query) + embedding = get_cached_query_embedding(body.query) # Perform similarity search with the query embedding and filter by the file_ids in metadata if isinstance(vector_store, AsyncPgVector): @@ -582,4 +604,3 @@ async def query_embeddings_by_file_ids(body: QueryMultipleBody): traceback.format_exc(), ) raise HTTPException(status_code=500, detail=str(e)) - diff --git a/requirements.lite.txt b/requirements.lite.txt index 4e9f4d3..5e0d13b 100644 --- a/requirements.lite.txt +++ b/requirements.lite.txt @@ -4,7 +4,7 @@ langchain-openai==0.2.11 langchain-core==0.3.25 sqlalchemy==2.0.28 python-dotenv==1.0.1 -fastapi==0.110.0 +fastapi==0.115.12 psycopg2-binary==2.9.9 pgvector==0.2.5 uvicorn==0.28.0 @@ -24,7 +24,7 @@ rapidocr-onnxruntime==1.3.24 opencv-python-headless==4.9.0.80 pymongo==4.6.3 langchain-mongodb==0.2.0 -cryptography==43.0.1 +cryptography==44.0.1 python-magic==0.4.27 python-pptx==0.6.23 xlrd==2.0.1 diff --git a/requirements.txt b/requirements.txt index 4047311..ef964e0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,7 +7,7 @@ langchain_text_splitters==0.3.3 boto3==1.34.144 sqlalchemy==2.0.28 python-dotenv==1.0.1 -fastapi==0.110.0 +fastapi==0.115.12 psycopg2-binary==2.9.9 pgvector==0.2.5 uvicorn==0.28.0 @@ -30,9 +30,8 @@ pymongo==4.6.3 langchain-mongodb==0.2.0 langchain-ollama==0.2.0 langchain-huggingface==0.1.0 -cryptography==43.0.1 +cryptography==44.0.1 python-magic==0.4.27 python-pptx==0.6.23 xlrd==2.0.1 -pydantic==2.9.2 -starlette==0.36.3 \ No newline at end of file +pydantic==2.9.2 \ No newline at end of file -- GitLab