forked from microsoft/kernel-memory
-
Notifications
You must be signed in to change notification settings - Fork 0
/
file9-settings.json
116 lines (116 loc) · 5.29 KB
/
file9-settings.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
{
"Logging": {
"LogLevel": {
"Default": "Warning",
// Examples: how to handle logs differently by class
// "Microsoft.KernelMemory.Handlers.TextExtractionHandler": "Information",
// "Microsoft.KernelMemory.Handlers.TextPartitioningHandler": "Information",
// "Microsoft.KernelMemory.Handlers.GenerateEmbeddingsHandler": "Information",
// "Microsoft.KernelMemory.Handlers.SaveEmbeddingsHandler": "Information",
// "Microsoft.KernelMemory.DocumentStorage.AzureBlobs": "Information",
// "Microsoft.KernelMemory.Pipeline.Queue.AzureQueues": "Information",
"Microsoft.AspNetCore": "Warning"
}
},
"KernelMemory": {
"Services": {
"AzureOpenAIText": {
// "ApiKey" or "AzureIdentity"
// AzureIdentity: use automatic AAD authentication mechanism. You can test locally
// using the env vars AZURE_TENANT_ID, AZURE_CLIENT_ID, AZURE_CLIENT_SECRET.
"Auth": "AzureIdentity",
"Endpoint": "https://<...>.openai.azure.com/",
"APIKey": "",
"Deployment": "",
// The max number of tokens supported by model deployed
// See https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models
"MaxTokenTotal": 16384,
// "ChatCompletion" or "TextCompletion"
"APIType": "ChatCompletion",
"MaxRetries": 10
},
"AzureOpenAIEmbedding": {
// "ApiKey" or "AzureIdentity"
// AzureIdentity: use automatic AAD authentication mechanism. You can test locally
// using the env vars AZURE_TENANT_ID, AZURE_CLIENT_ID, AZURE_CLIENT_SECRET.
"Auth": "AzureIdentity",
"Endpoint": "https://<...>.openai.azure.com/",
"APIKey": "",
"Deployment": "",
// The max number of tokens supported by model deployed
// See https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models
"MaxTokenTotal": 8191
},
"OpenAI": {
// Name of the model used to generate text (text completion or chat completion)
"TextModel": "gpt-3.5-turbo-16k",
// The max number of tokens supported by the text model.
"TextModelMaxTokenTotal": 16384,
// What type of text generation, by default autodetect using the model name.
// Possible values: "Auto", "TextCompletion", "Chat"
"TextGenerationType": "Auto",
// Name of the model used to generate text embeddings
"EmbeddingModel": "text-embedding-ada-002",
// The max number of tokens supported by the embedding model
// See https://platform.openai.com/docs/guides/embeddings/what-are-embeddings
"EmbeddingModelMaxTokenTotal": 8191,
// OpenAI API Key
"APIKey": "",
// OpenAI Organization ID (usually empty, unless you have multiple accounts on different orgs)
"OrgId": "",
// Endpoint to use. By default the system uses 'https://api.openai.com/v1'.
// Change this to use proxies or services compatible with OpenAI HTTP protocol like LM Studio.
"Endpoint": "",
// How many times to retry in case of throttling
"MaxRetries": 10
},
"LlamaSharp": {
// path to file, e.g. "llama-2-7b-chat.Q6_K.gguf"
"ModelPath": "",
// Max number of tokens supported by the model
"MaxTokenTotal": 4096
// Optional parameters
// "GpuLayerCount": 32,
// "Seed": 1337,
},
"AzureAIDocIntel": {
// "APIKey" or "AzureIdentity".
// AzureIdentity: use automatic AAD authentication mechanism. You can test locally
// using the env vars AZURE_TENANT_ID, AZURE_CLIENT_ID, AZURE_CLIENT_SECRET.
"Auth": "AzureIdentity",
// Required when Auth == APIKey
"APIKey": "",
"Endpoint": ""
},
"AzureAISearch": {
// "ApiKey" or "AzureIdentity". For other options see <AzureAISearchConfig>.
// AzureIdentity: use automatic AAD authentication mechanism. You can test locally
// using the env vars AZURE_TENANT_ID, AZURE_CLIENT_ID, AZURE_CLIENT_SECRET.
"Auth": "AzureIdentity",
"Endpoint": "https://<...>",
"APIKey": ""
}
},
"Retrieval": {
"SearchClient": {
// Maximum number of tokens accepted by the LLM used to generate answers.
// The number includes the tokens used for the answer, e.g. when using
// GPT4-32k, set this number to 32768.
// If the value is not set or less than one, SearchClient will use the
// max amount of tokens supported by the model in use.
"MaxAskPromptSize": -1,
// Maximum number of relevant sources to consider when generating an answer.
// The value is also used as the max number of results returned by SearchAsync
// when passing a limit less or equal to zero.
"MaxMatchesCount": 100,
// How many tokens to reserve for the answer generated by the LLM.
// E.g. if the LLM supports max 4000 tokens, and AnswerTokens is 300, then
// the prompt sent to LLM will contain max 3700 tokens, composed by
// prompt + question + grounding information retrieved from memory.
"AnswerTokens": 300,
// Text to return when the LLM cannot produce an answer.
"EmptyAnswer": "INFO NOT FOUND"
}
}
}
}