microsoft · natoverse · Oct 6, 2025 · Oct 1, 2025 · Oct 1, 2025 · Oct 1, 2025
@@ -15,8 +15,6 @@ jobs:
       GH_PAGES: 1
       DEBUG: 1
       GRAPHRAG_API_KEY: ${{ secrets.GRAPHRAG_API_KEY }}
-      GRAPHRAG_LLM_MODEL: ${{ secrets.GRAPHRAG_LLM_MODEL }}
-      GRAPHRAG_EMBEDDING_MODEL: ${{ secrets.GRAPHRAG_EMBEDDING_MODEL }}
 
     steps:
       - uses: actions/checkout@v4

@@ -38,8 +38,6 @@ jobs:
     env:
       DEBUG: 1
       GRAPHRAG_API_KEY: ${{ secrets.OPENAI_NOTEBOOK_KEY }}
-      GRAPHRAG_LLM_MODEL: ${{ secrets.GRAPHRAG_LLM_MODEL }}
-      GRAPHRAG_EMBEDDING_MODEL: ${{ secrets.GRAPHRAG_EMBEDDING_MODEL }}
 
     runs-on: ${{ matrix.os }}
     steps:

@@ -37,20 +37,8 @@ jobs:
       fail-fast: false # Continue running all jobs even if one fails
     env:
       DEBUG: 1
-      GRAPHRAG_LLM_TYPE: "azure_openai_chat"
-      GRAPHRAG_EMBEDDING_TYPE: "azure_openai_embedding"
       GRAPHRAG_API_KEY: ${{ secrets.OPENAI_API_KEY }}
       GRAPHRAG_API_BASE: ${{ secrets.GRAPHRAG_API_BASE }}
-      GRAPHRAG_API_VERSION: ${{ secrets.GRAPHRAG_API_VERSION }}
-      GRAPHRAG_LLM_DEPLOYMENT_NAME: ${{ secrets.GRAPHRAG_LLM_DEPLOYMENT_NAME }}
-      GRAPHRAG_EMBEDDING_DEPLOYMENT_NAME: ${{ secrets.GRAPHRAG_EMBEDDING_DEPLOYMENT_NAME }}
-      GRAPHRAG_LLM_MODEL: ${{ secrets.GRAPHRAG_LLM_MODEL }}
-      GRAPHRAG_EMBEDDING_MODEL: ${{ secrets.GRAPHRAG_EMBEDDING_MODEL }}
-      # We have Windows + Linux runners in 3.10, so we need to divide the rate limits by 2
-      GRAPHRAG_LLM_TPM: 200_000 # 400_000 / 2
-      GRAPHRAG_LLM_RPM: 1_000 # 2_000 / 2
-      GRAPHRAG_EMBEDDING_TPM: 225_000 # 450_000 / 2
-      GRAPHRAG_EMBEDDING_RPM: 1_000 # 2_000 / 2
       # Azure AI Search config
       AZURE_AI_SEARCH_URL_ENDPOINT: ${{ secrets.AZURE_AI_SEARCH_URL_ENDPOINT }}
       AZURE_AI_SEARCH_API_KEY: ${{ secrets.AZURE_AI_SEARCH_API_KEY }}

@@ -0,0 +1,4 @@
+{
+  "type": "patch",
+  "description": "Fix Azure auth scope issue with LiteLLM."
+}
@@ -119,8 +119,3 @@ and then in your bashrc, add
 Make sure you have python3.10-dev installed or more generally `python<version>-dev`
 
 `sudo apt-get install python3.10-dev`
-
-### LLM call constantly exceeds TPM, RPM or time limits
-
-`GRAPHRAG_LLM_THREAD_COUNT` and `GRAPHRAG_EMBEDDING_THREAD_COUNT` are both set to 50 by default. You can modify these values
-to reduce concurrency. Please refer to the [Configuration Documents](https://microsoft.github.io/graphrag/config/overview/)
@@ -77,8 +77,3 @@ Make sure llvm-9 and llvm-9-dev are installed:
 and then in your bashrc, add
 
 `export LLVM_CONFIG=/usr/bin/llvm-config-9`
-
-### LLM call constantly exceeds TPM, RPM or time limits
-
-`GRAPHRAG_LLM_THREAD_COUNT` and `GRAPHRAG_EMBEDDING_THREAD_COUNT` are both set to 50 by default. You can modify these values
-to reduce concurrency. Please refer to the [Configuration Documents](config/overview.md)
@@ -79,15 +79,7 @@ After that, it uses one of the following selection methods to pick a sample to w
 
 ## Modify Env Vars
 
-After running auto tuning, you should modify the following environment variables (or config variables) to pick up the new prompts on your index run. Note: Please make sure to update the correct path to the generated prompts, in this example we are using the default "prompts" path.
-
-- `GRAPHRAG_ENTITY_EXTRACTION_PROMPT_FILE` = "prompts/entity_extraction.txt"
-
-- `GRAPHRAG_COMMUNITY_REPORT_PROMPT_FILE` = "prompts/community_report.txt"
-
-- `GRAPHRAG_SUMMARIZE_DESCRIPTIONS_PROMPT_FILE` = "prompts/summarize_descriptions.txt"
-
-or in your yaml config file:
+After running auto tuning, you should modify the following config variables to pick up the new prompts on your index run. Note: Please make sure to update the correct path to the generated prompts, in this example we are using the default "prompts" path.
 
 ```yaml
 entity_extraction:

@@ -107,7 +107,7 @@ def _validate_retry_services(self) -> None:
 
                 _ = retry_factory.create(
                     strategy=model.retry_strategy,
-                    max_attempts=model.max_retries,
+                    max_retries=model.max_retries,
                     max_retry_wait=model.max_retry_wait,
                 )
 

@@ -15,42 +15,39 @@
 
 
 def validate_config_names(parameters: GraphRagConfig) -> None:
-    """Validate config file for LLM deployment name typos."""
-    # Validate Chat LLM configs
-    # TODO: Replace default_chat_model with a way to select the model
-    default_llm_settings = parameters.get_language_model_config("default_chat_model")
-
-    llm = ModelManager().register_chat(
-        name="test-llm",
-        model_type=default_llm_settings.type,
-        config=default_llm_settings,
-        callbacks=NoopWorkflowCallbacks(),
-        cache=None,
-    )
-
-    try:
-        asyncio.run(llm.achat("This is an LLM connectivity test. Say Hello World"))
-        logger.info("LLM Config Params Validated")
-    except Exception as e:  # noqa: BLE001
-        logger.error(f"LLM configuration error detected. Exiting...\n{e}")  # noqa
-        sys.exit(1)
-
-    # Validate Embeddings LLM configs
-    embedding_llm_settings = parameters.get_language_model_config(
-        parameters.embed_text.model_id
-    )
-
-    embed_llm = ModelManager().register_embedding(
-        name="test-embed-llm",
-        model_type=embedding_llm_settings.type,
-        config=embedding_llm_settings,
-        callbacks=NoopWorkflowCallbacks(),
-        cache=None,
-    )
-
-    try:
-        asyncio.run(embed_llm.aembed_batch(["This is an LLM Embedding Test String"]))
-        logger.info("Embedding LLM Config Params Validated")
-    except Exception as e:  # noqa: BLE001
-        logger.error(f"Embedding LLM configuration error detected. Exiting...\n{e}")  # noqa
-        sys.exit(1)
+    """Validate config file for model deployment name typos, by running a quick test message for each."""
+    for id, config in parameters.models.items():
+        if config.type in ["chat", "azure_openai", "openai"]:
+            llm = ModelManager().register_chat(
+                name="test-llm",
+                model_type=config.type,
+                config=config,
+                callbacks=NoopWorkflowCallbacks(),
+                cache=None,
+            )
+            try:
+                asyncio.run(
+                    llm.achat("This is an LLM connectivity test. Say Hello World")
+                )
+                logger.info("LLM Config Params Validated")
+            except Exception as e:  # noqa: BLE001
+                logger.error(f"LLM configuration error detected.\n{e}")  # noqa
+                print(f"Failed to validate language model ({id}) params", e)  # noqa: T201
+                sys.exit(1)
+        elif config.type in ["embedding", "azure_openai_embedding", "openai_embedding"]:
+            embed_llm = ModelManager().register_embedding(
+                name="test-embed-llm",
+                model_type=config.type,
+                config=config,
+                callbacks=NoopWorkflowCallbacks(),
+                cache=None,
+            )
+            try:
+                asyncio.run(
+                    embed_llm.aembed_batch(["This is an LLM Embedding Test String"])
+                )
+                logger.info("Embedding LLM Config Params Validated")
+            except Exception as e:  # noqa: BLE001
+                logger.error(f"Embedding configuration error detected.\n{e}")  # noqa
+                print(f"Failed to validate embedding model ({id}) params", e)  # noqa: T201
+                sys.exit(1)
@@ -86,9 +86,10 @@ def _create_base_completions(
             msg = "Azure Managed Identity authentication is only supported for Azure models."
             raise ValueError(msg)
 
+        base_args["azure_scope"] = base_args.pop("audience")
         base_args["azure_ad_token_provider"] = get_bearer_token_provider(
             DefaultAzureCredential(),
-            COGNITIVE_SERVICES_AUDIENCE,
+            model_config.audience or COGNITIVE_SERVICES_AUDIENCE,
         )
 
     def _base_completion(**kwargs: Any) -> ModelResponse | CustomStreamWrapper:

@@ -72,9 +72,10 @@ def _create_base_embeddings(
             msg = "Azure Managed Identity authentication is only supported for Azure models."
             raise ValueError(msg)
 
+        base_args["azure_scope"] = base_args.pop("audience")
         base_args["azure_ad_token_provider"] = get_bearer_token_provider(
             DefaultAzureCredential(),
-            COGNITIVE_SERVICES_AUDIENCE,
+            model_config.audience or COGNITIVE_SERVICES_AUDIENCE,
         )
 
     def _base_embedding(**kwargs: Any) -> EmbeddingResponse:

@@ -39,7 +39,7 @@ def with_retries(
     retry_factory = RetryFactory()
     retry_service = retry_factory.create(
         strategy=model_config.retry_strategy,
-        max_attempts=model_config.max_retries,
+        max_retries=model_config.max_retries,
         max_retry_wait=model_config.max_retry_wait,
     )
 

@@ -21,20 +21,20 @@ class ExponentialRetry(Retry):
     def __init__(
         self,
         *,
-        max_attempts: int = 5,
+        max_retries: int = 5,
         base_delay: float = 2.0,
         jitter: bool = True,
         **kwargs: Any,
     ):
-        if max_attempts <= 0:
-            msg = "max_attempts must be greater than 0."
+        if max_retries <= 0:
+            msg = "max_retries must be greater than 0."
             raise ValueError(msg)
 
         if base_delay <= 1.0:
             msg = "base_delay must be greater than 1.0."
             raise ValueError(msg)
 
-        self._max_attempts = max_attempts
+        self._max_retries = max_retries
         self._base_delay = base_delay
         self._jitter = jitter
 
@@ -46,15 +46,15 @@ def retry(self, func: Callable[..., Any], **kwargs: Any) -> Any:
             try:
                 return func(**kwargs)
             except Exception as e:
-                if retries >= self._max_attempts:
+                if retries >= self._max_retries:
                     logger.exception(
-                        f"ExponentialRetry: Max retries exceeded, retries={retries}, max_retries={self._max_attempts}, exception={e}",  # noqa: G004, TRY401
+                        f"ExponentialRetry: Max retries exceeded, retries={retries}, max_retries={self._max_retries}, exception={e}",  # noqa: G004, TRY401
                     )
                     raise
                 retries += 1
                 delay *= self._base_delay
                 logger.exception(
-                    f"ExponentialRetry: Request failed, retrying, retries={retries}, delay={delay}, max_retries={self._max_attempts}, exception={e}",  # noqa: G004, TRY401
+                    f"ExponentialRetry: Request failed, retrying, retries={retries}, delay={delay}, max_retries={self._max_retries}, exception={e}",  # noqa: G004, TRY401
                 )
                 time.sleep(delay + (self._jitter * random.uniform(0, 1)))  # noqa: S311
 
@@ -70,14 +70,14 @@ async def aretry(
             try:
                 return await func(**kwargs)
             except Exception as e:
-                if retries >= self._max_attempts:
+                if retries >= self._max_retries:
                     logger.exception(
-                        f"ExponentialRetry: Max retries exceeded, retries={retries}, max_retries={self._max_attempts}, exception={e}",  # noqa: G004, TRY401
+                        f"ExponentialRetry: Max retries exceeded, retries={retries}, max_retries={self._max_retries}, exception={e}",  # noqa: G004, TRY401
                     )
                     raise
                 retries += 1
                 delay *= self._base_delay
                 logger.exception(
-                    f"ExponentialRetry: Request failed, retrying, retries={retries}, delay={delay}, max_retries={self._max_attempts}, exception={e}",  # noqa: G004, TRY401
+                    f"ExponentialRetry: Request failed, retrying, retries={retries}, delay={delay}, max_retries={self._max_retries}, exception={e}",  # noqa: G004, TRY401
                 )
                 await asyncio.sleep(delay + (self._jitter * random.uniform(0, 1)))  # noqa: S311
@@ -21,16 +21,20 @@ def __init__(
         self,
         *,
         max_retry_wait: float,
-        max_attempts: int = 5,
+        max_retries: int = 5,
         **kwargs: Any,
     ):
-        if max_attempts <= 0 or max_retry_wait <= 0:
-            msg = "max_attempts and max_retry_wait must be greater than 0."
+        if max_retries <= 0:
+            msg = "max_retries must be greater than 0."
             raise ValueError(msg)
 
-        self._max_attempts = max_attempts
+        if max_retry_wait <= 0:
+            msg = "max_retry_wait must be greater than 0."
+            raise ValueError(msg)
+
+        self._max_retries = max_retries
         self._max_retry_wait = max_retry_wait
-        self._increment = max_retry_wait / max_attempts
+        self._increment = max_retry_wait / max_retries
 
     def retry(self, func: Callable[..., Any], **kwargs: Any) -> Any:
         """Retry a synchronous function."""
@@ -40,15 +44,15 @@ def retry(self, func: Callable[..., Any], **kwargs: Any) -> Any:
             try:
                 return func(**kwargs)
             except Exception as e:
-                if retries >= self._max_attempts:
+                if retries >= self._max_retries:
                     logger.exception(
-                        f"IncrementalWaitRetry: Max retries exceeded, retries={retries}, max_retries={self._max_attempts}, exception={e}",  # noqa: G004, TRY401
+                        f"IncrementalWaitRetry: Max retries exceeded, retries={retries}, max_retries={self._max_retries}, exception={e}",  # noqa: G004, TRY401
                     )
                     raise
                 retries += 1
                 delay += self._increment
                 logger.exception(
-                    f"IncrementalWaitRetry: Request failed, retrying after incremental delay, retries={retries}, delay={delay}, max_retries={self._max_attempts}, exception={e}",  # noqa: G004, TRY401
+                    f"IncrementalWaitRetry: Request failed, retrying after incremental delay, retries={retries}, delay={delay}, max_retries={self._max_retries}, exception={e}",  # noqa: G004, TRY401
                 )
                 time.sleep(delay)
 
@@ -64,14 +68,14 @@ async def aretry(
             try:
                 return await func(**kwargs)
             except Exception as e:
-                if retries >= self._max_attempts:
+                if retries >= self._max_retries:
                     logger.exception(
-                        f"IncrementalWaitRetry: Max retries exceeded, retries={retries}, max_retries={self._max_attempts}, exception={e}",  # noqa: G004, TRY401
+                        f"IncrementalWaitRetry: Max retries exceeded, retries={retries}, max_retries={self._max_retries}, exception={e}",  # noqa: G004, TRY401
                     )
                     raise
                 retries += 1
                 delay += self._increment
                 logger.exception(
-                    f"IncrementalWaitRetry: Request failed, retrying after incremental delay, retries={retries}, delay={delay}, max_retries={self._max_attempts}, exception={e}",  # noqa: G004, TRY401
+                    f"IncrementalWaitRetry: Request failed, retrying after incremental delay, retries={retries}, delay={delay}, max_retries={self._max_retries}, exception={e}",  # noqa: G004, TRY401
                 )
                 await asyncio.sleep(delay)
@@ -18,14 +18,14 @@ class NativeRetry(Retry):
     def __init__(
         self,
         *,
-        max_attempts: int = 5,
+        max_retries: int = 5,
         **kwargs: Any,
     ):
-        if max_attempts <= 0:
-            msg = "max_attempts must be greater than 0."
+        if max_retries <= 0:
+            msg = "max_retries must be greater than 0."
             raise ValueError(msg)
 
-        self._max_attempts = max_attempts
+        self._max_retries = max_retries
 
     def retry(self, func: Callable[..., Any], **kwargs: Any) -> Any:
         """Retry a synchronous function."""
@@ -34,14 +34,14 @@ def retry(self, func: Callable[..., Any], **kwargs: Any) -> Any:
             try:
                 return func(**kwargs)
             except Exception as e:
-                if retries >= self._max_attempts:
+                if retries >= self._max_retries:
                     logger.exception(
-                        f"NativeRetry: Max retries exceeded, retries={retries}, max_retries={self._max_attempts}, exception={e}",  # noqa: G004, TRY401
+                        f"NativeRetry: Max retries exceeded, retries={retries}, max_retries={self._max_retries}, exception={e}",  # noqa: G004, TRY401
                     )
                     raise
                 retries += 1
                 logger.exception(
-                    f"NativeRetry: Request failed, immediately retrying, retries={retries}, max_retries={self._max_attempts}, exception={e}",  # noqa: G004, TRY401
+                    f"NativeRetry: Request failed, immediately retrying, retries={retries}, max_retries={self._max_retries}, exception={e}",  # noqa: G004, TRY401
                 )
 
     async def aretry(
@@ -55,12 +55,12 @@ async def aretry(
             try:
                 return await func(**kwargs)
             except Exception as e:
-                if retries >= self._max_attempts:
+                if retries >= self._max_retries:
                     logger.exception(
-                        f"NativeRetry: Max retries exceeded, retries={retries}, max_retries={self._max_attempts}, exception={e}",  # noqa: G004, TRY401
+                        f"NativeRetry: Max retries exceeded, retries={retries}, max_retries={self._max_retries}, exception={e}",  # noqa: G004, TRY401
                     )
                     raise
                 retries += 1
                 logger.exception(
-                    f"NativeRetry: Request failed, immediately retrying, retries={retries}, max_retries={self._max_attempts}, exception={e}",  # noqa: G004, TRY401
+                    f"NativeRetry: Request failed, immediately retrying, retries={retries}, max_retries={self._max_retries}, exception={e}",  # noqa: G004, TRY401
                 )