diff --git a/app/routes/document_routes.py b/app/routes/document_routes.py index 29e58dd34e10914d23503c299e879c9ff9195690..3a0c44be9f18c374c72b512d5f1dfe674d11e3f2 100644 --- a/app/routes/document_routes.py +++ b/app/routes/document_routes.py @@ -6,14 +6,30 @@ import aiofiles import aiofiles.os from shutil import copyfileobj from typing import List, Iterable -from fastapi import APIRouter, Request, UploadFile, HTTPException, File, Form, Body, Query, status +from fastapi import ( + APIRouter, + Request, + UploadFile, + HTTPException, + File, + Form, + Body, + Query, + status, +) from langchain_core.documents import Document from langchain_core.runnables import run_in_executor from langchain_text_splitters import RecursiveCharacterTextSplitter +from functools import lru_cache from app.config import logger, vector_store, RAG_UPLOAD_DIR, CHUNK_SIZE, CHUNK_OVERLAP from app.constants import ERROR_MESSAGES -from app.models import StoreDocument, QueryRequestBody, DocumentResponse, QueryMultipleBody +from app.models import ( + StoreDocument, + QueryRequestBody, + DocumentResponse, + QueryMultipleBody, +) from app.services.vector_store.async_pg_vector import AsyncPgVector from app.utils.document_loader import get_loader, clean_text, process_documents from app.utils.health import is_health_ok @@ -135,6 +151,12 @@ async def delete_documents(document_ids: List[str] = Body(...)): raise HTTPException(status_code=500, detail=str(e)) +# Cache the embedding function with LRU cache +@lru_cache(maxsize=128) +def get_cached_query_embedding(query: str): + return vector_store.embedding_function.embed_query(query) + + @router.post("/query") async def query_embeddings_by_file_id( body: QueryRequestBody, @@ -150,7 +172,7 @@ async def query_embeddings_by_file_id( authorized_documents = [] try: - embedding = vector_store.embedding_function.embed_query(body.query) + embedding = get_cached_query_embedding(body.query) if isinstance(vector_store, AsyncPgVector): documents = await run_in_executor( @@ -543,7 +565,7 @@ async def embed_file_upload( async def query_embeddings_by_file_ids(body: QueryMultipleBody): try: # Get the embedding of the query text - embedding = vector_store.embedding_function.embed_query(body.query) + embedding = get_cached_query_embedding(body.query) # Perform similarity search with the query embedding and filter by the file_ids in metadata if isinstance(vector_store, AsyncPgVector): @@ -582,4 +604,3 @@ async def query_embeddings_by_file_ids(body: QueryMultipleBody): traceback.format_exc(), ) raise HTTPException(status_code=500, detail=str(e)) - diff --git a/requirements.lite.txt b/requirements.lite.txt index 4e9f4d3455d3464032e30c0651f7237f71fa4970..5e0d13b6d8fa1fe7b5786eb8129f49b9f9929d4a 100644 --- a/requirements.lite.txt +++ b/requirements.lite.txt @@ -4,7 +4,7 @@ langchain-openai==0.2.11 langchain-core==0.3.25 sqlalchemy==2.0.28 python-dotenv==1.0.1 -fastapi==0.110.0 +fastapi==0.115.12 psycopg2-binary==2.9.9 pgvector==0.2.5 uvicorn==0.28.0 @@ -24,7 +24,7 @@ rapidocr-onnxruntime==1.3.24 opencv-python-headless==4.9.0.80 pymongo==4.6.3 langchain-mongodb==0.2.0 -cryptography==43.0.1 +cryptography==44.0.1 python-magic==0.4.27 python-pptx==0.6.23 xlrd==2.0.1 diff --git a/requirements.txt b/requirements.txt index 4047311eb1c5be53886ac09b61216637ce9cfbb7..ef964e0212e6808d8a5d031a64d97b29315f1fb5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,7 +7,7 @@ langchain_text_splitters==0.3.3 boto3==1.34.144 sqlalchemy==2.0.28 python-dotenv==1.0.1 -fastapi==0.110.0 +fastapi==0.115.12 psycopg2-binary==2.9.9 pgvector==0.2.5 uvicorn==0.28.0 @@ -30,9 +30,8 @@ pymongo==4.6.3 langchain-mongodb==0.2.0 langchain-ollama==0.2.0 langchain-huggingface==0.1.0 -cryptography==43.0.1 +cryptography==44.0.1 python-magic==0.4.27 python-pptx==0.6.23 xlrd==2.0.1 -pydantic==2.9.2 -starlette==0.36.3 \ No newline at end of file +pydantic==2.9.2 \ No newline at end of file