diff --git a/.gitignore b/.gitignore index 815cb55..81bdac1 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ __pycache__/ venv/ .env -.pytest_cache/ \ No newline at end of file +.pytest_cache/ +.vscode \ No newline at end of file diff --git a/helper_functions.py b/helper_functions.py index d51d202..8302213 100644 --- a/helper_functions.py +++ b/helper_functions.py @@ -200,7 +200,7 @@ def groq_pdf(pdf,model): model_name=model ) text = "".join(page.extract_text() for page in PdfReader(pdf).pages) - text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000) + text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) chunks = text_splitter.split_text(text) vectorstore = FAISS.from_texts(chunks, embedding=google_embedding) retriever = vectorstore.as_retriever()