-
Notifications
You must be signed in to change notification settings - Fork 1
/
.env
34 lines (32 loc) · 1.3 KB
/
.env
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
#common to both .py progs
PERSIST_DIRECTORY=db
COLLECTION_NAME=langchain
EMBEDDINGS_MODEL_NAME=intfloat/multilingual-e5-large
#ingest.py
SOURCE_DIRECTORY=source_documents
APPEND_DIRECTORY=append_documents
TEXT_SPLITTER=SpacyTextSplitter
TEXT_SPLITTER_PARAMETERS={'chunk_size':1000,'chunk_overlap':0,'pipeline':'de_core_news_lg'}
#query.py
# tuned Mistral is better: download model from https://huggingface.co/TheBloke/em_german_leo_mistral-GGUF
# MODEL_PATH=./models/em_german_leo_mistral.Q5_K_M.gguf
# yet anothermultilingual alternative is mistral "tiny" v0.2 https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF
MODEL_PATH=/Users/andi/Projects/models.llama.cpp/mistral-instruct-7b-quantized-gguf/mistral-instruct-7b-q5k-small.gguf
# Mistral 7B Instruct V0.2 has 32k context-limit
MODEL_N_CTX=24576
MODEL_TEMP=0.05
MAX_TOKENS=-2
# 3 items below need edits for different hosts/models
# number of CPU threads to use when running on CPU - 4 for Apple M2
MODEL_THREADS=4
# number of GPU-layers, depends on model (see model-card), 99 is a good default
MODEL_GPU=99
# use "print details" below and use prompt-eval-time tokens/s for this
MODEL_PROMPT_PER_S=70
# context settings
MAX_CONTEXT_CHUNKS=6
MAX_CONTEXT_DISTANCE=0.454
MAX_RESORT_DISTANCE=0.08
# print details for debugging
HIDE_SOURCE=False
HIDE_SOURCE_DETAILS=True