Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions .github/workflows/gh-pages.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@ jobs:
GH_PAGES: 1
DEBUG: 1
GRAPHRAG_API_KEY: ${{ secrets.GRAPHRAG_API_KEY }}
GRAPHRAG_LLM_MODEL: ${{ secrets.GRAPHRAG_LLM_MODEL }}
GRAPHRAG_EMBEDDING_MODEL: ${{ secrets.GRAPHRAG_EMBEDDING_MODEL }}

steps:
- uses: actions/checkout@v4
Expand Down
2 changes: 0 additions & 2 deletions .github/workflows/python-notebook-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,6 @@ jobs:
env:
DEBUG: 1
GRAPHRAG_API_KEY: ${{ secrets.OPENAI_NOTEBOOK_KEY }}
GRAPHRAG_LLM_MODEL: ${{ secrets.GRAPHRAG_LLM_MODEL }}
GRAPHRAG_EMBEDDING_MODEL: ${{ secrets.GRAPHRAG_EMBEDDING_MODEL }}

runs-on: ${{ matrix.os }}
steps:
Expand Down
12 changes: 0 additions & 12 deletions .github/workflows/python-smoke-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,20 +37,8 @@ jobs:
fail-fast: false # Continue running all jobs even if one fails
env:
DEBUG: 1
GRAPHRAG_LLM_TYPE: "azure_openai_chat"
GRAPHRAG_EMBEDDING_TYPE: "azure_openai_embedding"
GRAPHRAG_API_KEY: ${{ secrets.OPENAI_API_KEY }}
GRAPHRAG_API_BASE: ${{ secrets.GRAPHRAG_API_BASE }}
GRAPHRAG_API_VERSION: ${{ secrets.GRAPHRAG_API_VERSION }}
GRAPHRAG_LLM_DEPLOYMENT_NAME: ${{ secrets.GRAPHRAG_LLM_DEPLOYMENT_NAME }}
GRAPHRAG_EMBEDDING_DEPLOYMENT_NAME: ${{ secrets.GRAPHRAG_EMBEDDING_DEPLOYMENT_NAME }}
GRAPHRAG_LLM_MODEL: ${{ secrets.GRAPHRAG_LLM_MODEL }}
GRAPHRAG_EMBEDDING_MODEL: ${{ secrets.GRAPHRAG_EMBEDDING_MODEL }}
# We have Windows + Linux runners in 3.10, so we need to divide the rate limits by 2
GRAPHRAG_LLM_TPM: 200_000 # 400_000 / 2
GRAPHRAG_LLM_RPM: 1_000 # 2_000 / 2
GRAPHRAG_EMBEDDING_TPM: 225_000 # 450_000 / 2
GRAPHRAG_EMBEDDING_RPM: 1_000 # 2_000 / 2
# Azure AI Search config
AZURE_AI_SEARCH_URL_ENDPOINT: ${{ secrets.AZURE_AI_SEARCH_URL_ENDPOINT }}
AZURE_AI_SEARCH_API_KEY: ${{ secrets.AZURE_AI_SEARCH_API_KEY }}
Expand Down
4 changes: 4 additions & 0 deletions .semversioner/next-release/patch-20251001224059977938.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"type": "patch",
"description": "Fix Azure auth scope issue with LiteLLM."
}
5 changes: 0 additions & 5 deletions DEVELOPING.md
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,3 @@ and then in your bashrc, add
Make sure you have python3.10-dev installed or more generally `python<version>-dev`

`sudo apt-get install python3.10-dev`

### LLM call constantly exceeds TPM, RPM or time limits

`GRAPHRAG_LLM_THREAD_COUNT` and `GRAPHRAG_EMBEDDING_THREAD_COUNT` are both set to 50 by default. You can modify these values
to reduce concurrency. Please refer to the [Configuration Documents](https://microsoft.github.io/graphrag/config/overview/)
5 changes: 0 additions & 5 deletions docs/developing.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,3 @@ Make sure llvm-9 and llvm-9-dev are installed:
and then in your bashrc, add

`export LLVM_CONFIG=/usr/bin/llvm-config-9`

### LLM call constantly exceeds TPM, RPM or time limits

`GRAPHRAG_LLM_THREAD_COUNT` and `GRAPHRAG_EMBEDDING_THREAD_COUNT` are both set to 50 by default. You can modify these values
to reduce concurrency. Please refer to the [Configuration Documents](config/overview.md)
10 changes: 1 addition & 9 deletions docs/prompt_tuning/auto_prompt_tuning.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,15 +79,7 @@ After that, it uses one of the following selection methods to pick a sample to w

## Modify Env Vars

After running auto tuning, you should modify the following environment variables (or config variables) to pick up the new prompts on your index run. Note: Please make sure to update the correct path to the generated prompts, in this example we are using the default "prompts" path.

- `GRAPHRAG_ENTITY_EXTRACTION_PROMPT_FILE` = "prompts/entity_extraction.txt"

- `GRAPHRAG_COMMUNITY_REPORT_PROMPT_FILE` = "prompts/community_report.txt"

- `GRAPHRAG_SUMMARIZE_DESCRIPTIONS_PROMPT_FILE` = "prompts/summarize_descriptions.txt"

or in your yaml config file:
After running auto tuning, you should modify the following config variables to pick up the new prompts on your index run. Note: Please make sure to update the correct path to the generated prompts, in this example we are using the default "prompts" path.

```yaml
entity_extraction:
Expand Down
2 changes: 1 addition & 1 deletion graphrag/config/models/graph_rag_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def _validate_retry_services(self) -> None:

_ = retry_factory.create(
strategy=model.retry_strategy,
max_attempts=model.max_retries,
max_retries=model.max_retries,
max_retry_wait=model.max_retry_wait,
)

Expand Down
75 changes: 36 additions & 39 deletions graphrag/index/validate_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,42 +15,39 @@


def validate_config_names(parameters: GraphRagConfig) -> None:
"""Validate config file for LLM deployment name typos."""
# Validate Chat LLM configs
# TODO: Replace default_chat_model with a way to select the model
default_llm_settings = parameters.get_language_model_config("default_chat_model")

llm = ModelManager().register_chat(
name="test-llm",
model_type=default_llm_settings.type,
config=default_llm_settings,
callbacks=NoopWorkflowCallbacks(),
cache=None,
)

try:
asyncio.run(llm.achat("This is an LLM connectivity test. Say Hello World"))
logger.info("LLM Config Params Validated")
except Exception as e: # noqa: BLE001
logger.error(f"LLM configuration error detected. Exiting...\n{e}") # noqa
sys.exit(1)

# Validate Embeddings LLM configs
embedding_llm_settings = parameters.get_language_model_config(
parameters.embed_text.model_id
)

embed_llm = ModelManager().register_embedding(
name="test-embed-llm",
model_type=embedding_llm_settings.type,
config=embedding_llm_settings,
callbacks=NoopWorkflowCallbacks(),
cache=None,
)

try:
asyncio.run(embed_llm.aembed_batch(["This is an LLM Embedding Test String"]))
logger.info("Embedding LLM Config Params Validated")
except Exception as e: # noqa: BLE001
logger.error(f"Embedding LLM configuration error detected. Exiting...\n{e}") # noqa
sys.exit(1)
"""Validate config file for model deployment name typos, by running a quick test message for each."""
for id, config in parameters.models.items():
if config.type in ["chat", "azure_openai", "openai"]:
llm = ModelManager().register_chat(
name="test-llm",
model_type=config.type,
config=config,
callbacks=NoopWorkflowCallbacks(),
cache=None,
)
try:
asyncio.run(
llm.achat("This is an LLM connectivity test. Say Hello World")
)
logger.info("LLM Config Params Validated")
except Exception as e: # noqa: BLE001
logger.error(f"LLM configuration error detected.\n{e}") # noqa
print(f"Failed to validate language model ({id}) params", e) # noqa: T201
sys.exit(1)
elif config.type in ["embedding", "azure_openai_embedding", "openai_embedding"]:
embed_llm = ModelManager().register_embedding(
name="test-embed-llm",
model_type=config.type,
config=config,
callbacks=NoopWorkflowCallbacks(),
cache=None,
)
try:
asyncio.run(
embed_llm.aembed_batch(["This is an LLM Embedding Test String"])
)
logger.info("Embedding LLM Config Params Validated")
except Exception as e: # noqa: BLE001
logger.error(f"Embedding configuration error detected.\n{e}") # noqa
print(f"Failed to validate embedding model ({id}) params", e) # noqa: T201
sys.exit(1)
3 changes: 2 additions & 1 deletion graphrag/language_model/providers/litellm/chat_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,9 +86,10 @@ def _create_base_completions(
msg = "Azure Managed Identity authentication is only supported for Azure models."
raise ValueError(msg)

base_args["azure_scope"] = base_args.pop("audience")
base_args["azure_ad_token_provider"] = get_bearer_token_provider(
DefaultAzureCredential(),
COGNITIVE_SERVICES_AUDIENCE,
model_config.audience or COGNITIVE_SERVICES_AUDIENCE,
)

def _base_completion(**kwargs: Any) -> ModelResponse | CustomStreamWrapper:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,10 @@ def _create_base_embeddings(
msg = "Azure Managed Identity authentication is only supported for Azure models."
raise ValueError(msg)

base_args["azure_scope"] = base_args.pop("audience")
base_args["azure_ad_token_provider"] = get_bearer_token_provider(
DefaultAzureCredential(),
COGNITIVE_SERVICES_AUDIENCE,
model_config.audience or COGNITIVE_SERVICES_AUDIENCE,
)

def _base_embedding(**kwargs: Any) -> EmbeddingResponse:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def with_retries(
retry_factory = RetryFactory()
retry_service = retry_factory.create(
strategy=model_config.retry_strategy,
max_attempts=model_config.max_retries,
max_retries=model_config.max_retries,
max_retry_wait=model_config.max_retry_wait,
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,20 +21,20 @@ class ExponentialRetry(Retry):
def __init__(
self,
*,
max_attempts: int = 5,
max_retries: int = 5,
base_delay: float = 2.0,
jitter: bool = True,
**kwargs: Any,
):
if max_attempts <= 0:
msg = "max_attempts must be greater than 0."
if max_retries <= 0:
msg = "max_retries must be greater than 0."
raise ValueError(msg)

if base_delay <= 1.0:
msg = "base_delay must be greater than 1.0."
raise ValueError(msg)

self._max_attempts = max_attempts
self._max_retries = max_retries
self._base_delay = base_delay
self._jitter = jitter

Expand All @@ -46,15 +46,15 @@ def retry(self, func: Callable[..., Any], **kwargs: Any) -> Any:
try:
return func(**kwargs)
except Exception as e:
if retries >= self._max_attempts:
if retries >= self._max_retries:
logger.exception(
f"ExponentialRetry: Max retries exceeded, retries={retries}, max_retries={self._max_attempts}, exception={e}", # noqa: G004, TRY401
f"ExponentialRetry: Max retries exceeded, retries={retries}, max_retries={self._max_retries}, exception={e}", # noqa: G004, TRY401
)
raise
retries += 1
delay *= self._base_delay
logger.exception(
f"ExponentialRetry: Request failed, retrying, retries={retries}, delay={delay}, max_retries={self._max_attempts}, exception={e}", # noqa: G004, TRY401
f"ExponentialRetry: Request failed, retrying, retries={retries}, delay={delay}, max_retries={self._max_retries}, exception={e}", # noqa: G004, TRY401
)
time.sleep(delay + (self._jitter * random.uniform(0, 1))) # noqa: S311

Expand All @@ -70,14 +70,14 @@ async def aretry(
try:
return await func(**kwargs)
except Exception as e:
if retries >= self._max_attempts:
if retries >= self._max_retries:
logger.exception(
f"ExponentialRetry: Max retries exceeded, retries={retries}, max_retries={self._max_attempts}, exception={e}", # noqa: G004, TRY401
f"ExponentialRetry: Max retries exceeded, retries={retries}, max_retries={self._max_retries}, exception={e}", # noqa: G004, TRY401
)
raise
retries += 1
delay *= self._base_delay
logger.exception(
f"ExponentialRetry: Request failed, retrying, retries={retries}, delay={delay}, max_retries={self._max_attempts}, exception={e}", # noqa: G004, TRY401
f"ExponentialRetry: Request failed, retrying, retries={retries}, delay={delay}, max_retries={self._max_retries}, exception={e}", # noqa: G004, TRY401
)
await asyncio.sleep(delay + (self._jitter * random.uniform(0, 1))) # noqa: S311
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,20 @@ def __init__(
self,
*,
max_retry_wait: float,
max_attempts: int = 5,
max_retries: int = 5,
**kwargs: Any,
):
if max_attempts <= 0 or max_retry_wait <= 0:
msg = "max_attempts and max_retry_wait must be greater than 0."
if max_retries <= 0:
msg = "max_retries must be greater than 0."
raise ValueError(msg)

self._max_attempts = max_attempts
if max_retry_wait <= 0:
msg = "max_retry_wait must be greater than 0."
raise ValueError(msg)

self._max_retries = max_retries
self._max_retry_wait = max_retry_wait
self._increment = max_retry_wait / max_attempts
self._increment = max_retry_wait / max_retries

def retry(self, func: Callable[..., Any], **kwargs: Any) -> Any:
"""Retry a synchronous function."""
Expand All @@ -40,15 +44,15 @@ def retry(self, func: Callable[..., Any], **kwargs: Any) -> Any:
try:
return func(**kwargs)
except Exception as e:
if retries >= self._max_attempts:
if retries >= self._max_retries:
logger.exception(
f"IncrementalWaitRetry: Max retries exceeded, retries={retries}, max_retries={self._max_attempts}, exception={e}", # noqa: G004, TRY401
f"IncrementalWaitRetry: Max retries exceeded, retries={retries}, max_retries={self._max_retries}, exception={e}", # noqa: G004, TRY401
)
raise
retries += 1
delay += self._increment
logger.exception(
f"IncrementalWaitRetry: Request failed, retrying after incremental delay, retries={retries}, delay={delay}, max_retries={self._max_attempts}, exception={e}", # noqa: G004, TRY401
f"IncrementalWaitRetry: Request failed, retrying after incremental delay, retries={retries}, delay={delay}, max_retries={self._max_retries}, exception={e}", # noqa: G004, TRY401
)
time.sleep(delay)

Expand All @@ -64,14 +68,14 @@ async def aretry(
try:
return await func(**kwargs)
except Exception as e:
if retries >= self._max_attempts:
if retries >= self._max_retries:
logger.exception(
f"IncrementalWaitRetry: Max retries exceeded, retries={retries}, max_retries={self._max_attempts}, exception={e}", # noqa: G004, TRY401
f"IncrementalWaitRetry: Max retries exceeded, retries={retries}, max_retries={self._max_retries}, exception={e}", # noqa: G004, TRY401
)
raise
retries += 1
delay += self._increment
logger.exception(
f"IncrementalWaitRetry: Request failed, retrying after incremental delay, retries={retries}, delay={delay}, max_retries={self._max_attempts}, exception={e}", # noqa: G004, TRY401
f"IncrementalWaitRetry: Request failed, retrying after incremental delay, retries={retries}, delay={delay}, max_retries={self._max_retries}, exception={e}", # noqa: G004, TRY401
)
await asyncio.sleep(delay)
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,14 @@ class NativeRetry(Retry):
def __init__(
self,
*,
max_attempts: int = 5,
max_retries: int = 5,
**kwargs: Any,
):
if max_attempts <= 0:
msg = "max_attempts must be greater than 0."
if max_retries <= 0:
msg = "max_retries must be greater than 0."
raise ValueError(msg)

self._max_attempts = max_attempts
self._max_retries = max_retries

def retry(self, func: Callable[..., Any], **kwargs: Any) -> Any:
"""Retry a synchronous function."""
Expand All @@ -34,14 +34,14 @@ def retry(self, func: Callable[..., Any], **kwargs: Any) -> Any:
try:
return func(**kwargs)
except Exception as e:
if retries >= self._max_attempts:
if retries >= self._max_retries:
logger.exception(
f"NativeRetry: Max retries exceeded, retries={retries}, max_retries={self._max_attempts}, exception={e}", # noqa: G004, TRY401
f"NativeRetry: Max retries exceeded, retries={retries}, max_retries={self._max_retries}, exception={e}", # noqa: G004, TRY401
)
raise
retries += 1
logger.exception(
f"NativeRetry: Request failed, immediately retrying, retries={retries}, max_retries={self._max_attempts}, exception={e}", # noqa: G004, TRY401
f"NativeRetry: Request failed, immediately retrying, retries={retries}, max_retries={self._max_retries}, exception={e}", # noqa: G004, TRY401
)

async def aretry(
Expand All @@ -55,12 +55,12 @@ async def aretry(
try:
return await func(**kwargs)
except Exception as e:
if retries >= self._max_attempts:
if retries >= self._max_retries:
logger.exception(
f"NativeRetry: Max retries exceeded, retries={retries}, max_retries={self._max_attempts}, exception={e}", # noqa: G004, TRY401
f"NativeRetry: Max retries exceeded, retries={retries}, max_retries={self._max_retries}, exception={e}", # noqa: G004, TRY401
)
raise
retries += 1
logger.exception(
f"NativeRetry: Request failed, immediately retrying, retries={retries}, max_retries={self._max_attempts}, exception={e}", # noqa: G004, TRY401
f"NativeRetry: Request failed, immediately retrying, retries={retries}, max_retries={self._max_retries}, exception={e}", # noqa: G004, TRY401
)
Loading