llamastack
diff --git a/‎docs/docs/providers/inference/remote_bedrock.mdx‎
Lines changed: 6 additions & 13 deletions b/‎docs/docs/providers/inference/remote_bedrock.mdx‎
Lines changed: 6 additions & 13 deletions
diff --git a/‎llama_stack/distributions/ci-tests/run.yaml‎
Lines changed: 3 additions & 0 deletions b/‎llama_stack/distributions/ci-tests/run.yaml‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎llama_stack/distributions/starter-gpu/run.yaml‎
Lines changed: 3 additions & 0 deletions b/‎llama_stack/distributions/starter-gpu/run.yaml‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎llama_stack/distributions/starter/run.yaml‎
Lines changed: 3 additions & 0 deletions b/‎llama_stack/distributions/starter/run.yaml‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎llama_stack/providers/registry/inference.py‎
Lines changed: 3 additions & 2 deletions b/‎llama_stack/providers/registry/inference.py‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎llama_stack/providers/remote/inference/bedrock/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎llama_stack/providers/remote/inference/bedrock/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎llama_stack/providers/remote/inference/bedrock/bedrock.py‎
Lines changed: 106 additions & 136 deletions b/‎llama_stack/providers/remote/inference/bedrock/bedrock.py‎
Lines changed: 106 additions & 136 deletions
@@ -1,5 +1,5 @@
 ---
-description: "AWS Bedrock inference provider for accessing various AI models through AWS's managed service."
+description: "AWS Bedrock inference provider using OpenAI compatible endpoint."
 sidebar_label: Remote - Bedrock
 title: remote::bedrock
 ---
@@ -8,27 +8,20 @@ title: remote::bedrock
 
 ## Description
 
-AWS Bedrock inference provider for accessing various AI models through AWS's managed service.
+AWS Bedrock inference provider using OpenAI compatible endpoint.
 
 ## Configuration
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
 | `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
-| `aws_access_key_id` | `str \| None` | No |  | The AWS access key to use. Default use environment variable: AWS_ACCESS_KEY_ID |
-| `aws_secret_access_key` | `str \| None` | No |  | The AWS secret access key to use. Default use environment variable: AWS_SECRET_ACCESS_KEY |
-| `aws_session_token` | `str \| None` | No |  | The AWS session token to use. Default use environment variable: AWS_SESSION_TOKEN |
-| `region_name` | `str \| None` | No |  | The default AWS Region to use, for example, us-west-1 or us-west-2.Default use environment variable: AWS_DEFAULT_REGION |
-| `profile_name` | `str \| None` | No |  | The profile name that contains credentials to use.Default use environment variable: AWS_PROFILE |
-| `total_max_attempts` | `int \| None` | No |  | An integer representing the maximum number of attempts that will be made for a single request, including the initial attempt. Default use environment variable: AWS_MAX_ATTEMPTS |
-| `retry_mode` | `str \| None` | No |  | A string representing the type of retries Boto3 will perform.Default use environment variable: AWS_RETRY_MODE |
-| `connect_timeout` | `float \| None` | No | 60.0 | The time in seconds till a timeout exception is thrown when attempting to make a connection. The default is 60 seconds. |
-| `read_timeout` | `float \| None` | No | 60.0 | The time in seconds till a timeout exception is thrown when attempting to read from a connection.The default is 60 seconds. |
-| `session_ttl` | `int \| None` | No | 3600 | The time in seconds till a session expires. The default is 3600 seconds (1 hour). |
+| `api_key` | `str \| None` | No |  | Amazon Bedrock API key |
+| `region_name` | `<class 'str'>` | No | us-east-2 | AWS Region for the Bedrock Runtime endpoint |
 
 ## Sample Configuration
 
 ```yaml
-{}
+api_key: ${env.AWS_BEDROCK_API_KEY:=}
+region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
 ```
@@ -47,6 +47,9 @@ providers:
       api_key: ${env.TOGETHER_API_KEY:=}
   - provider_id: bedrock
     provider_type: remote::bedrock
+    config:
+      api_key: ${env.AWS_BEDROCK_API_KEY:=}
+      region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
   - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
     provider_type: remote::nvidia
     config:
 
@@ -47,6 +47,9 @@ providers:
       api_key: ${env.TOGETHER_API_KEY:=}
   - provider_id: bedrock
     provider_type: remote::bedrock
+    config:
+      api_key: ${env.AWS_BEDROCK_API_KEY:=}
+      region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
   - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
     provider_type: remote::nvidia
     config:
 
@@ -47,6 +47,9 @@ providers:
       api_key: ${env.TOGETHER_API_KEY:=}
   - provider_id: bedrock
     provider_type: remote::bedrock
+    config:
+      api_key: ${env.AWS_BEDROCK_API_KEY:=}
+      region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
   - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
     provider_type: remote::nvidia
     config:
 
@@ -131,10 +131,11 @@ def available_providers() -> list[ProviderSpec]:
             api=Api.inference,
             adapter_type="bedrock",
             provider_type="remote::bedrock",
-            pip_packages=["boto3"],
+            pip_packages=[],
             module="llama_stack.providers.remote.inference.bedrock",
             config_class="llama_stack.providers.remote.inference.bedrock.BedrockConfig",
-            description="AWS Bedrock inference provider for accessing various AI models through AWS's managed service.",
+            provider_data_validator="llama_stack.providers.remote.inference.bedrock.config.BedrockProviderDataValidator",
+            description="AWS Bedrock inference provider using OpenAI compatible endpoint.",
         ),
         RemoteProviderSpec(
             api=Api.inference,
 
@@ -11,7 +11,7 @@ async def get_adapter_impl(config: BedrockConfig, _deps):
 
     assert isinstance(config, BedrockConfig), f"Unexpected config type: {type(config)}"
 
-    impl = BedrockInferenceAdapter(config)
+    impl = BedrockInferenceAdapter(config=config)
 
     await impl.initialize()
 
 
@@ -4,162 +4,69 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-import json
-from collections.abc import AsyncIterator
+from collections.abc import AsyncIterator, Iterable
 from typing import Any
 
-from botocore.client import BaseClient
+from openai import AuthenticationError, BadRequestError, NotFoundError
 
 from llama_stack.apis.inference import (
-    ChatCompletionRequest,
-    Inference,
-    OpenAIEmbeddingsResponse,
-)
-from llama_stack.apis.inference.inference import (
+    Model,
     OpenAIChatCompletion,
     OpenAIChatCompletionChunk,
-    OpenAICompletion,
     OpenAIMessageParam,
     OpenAIResponseFormatParam,
 )
-from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
-from llama_stack.providers.utils.bedrock.client import create_bedrock_client
-from llama_stack.providers.utils.inference.model_registry import (
-    ModelRegistryHelper,
-)
-from llama_stack.providers.utils.inference.openai_compat import (
-    get_sampling_strategy_options,
-)
-from llama_stack.providers.utils.inference.prompt_adapter import (
-    chat_completion_request_to_prompt,
-)
-
-from .models import MODEL_ENTRIES
-
-REGION_PREFIX_MAP = {
-    "us": "us.",
-    "eu": "eu.",
-    "ap": "ap.",
-}
-
-
-def _get_region_prefix(region: str | None) -> str:
-    # AWS requires region prefixes for inference profiles
-    if region is None:
-        return "us."  # default to US when we don't know
-
-    # Handle case insensitive region matching
-    region_lower = region.lower()
-    for prefix in REGION_PREFIX_MAP:
-        if region_lower.startswith(f"{prefix}-"):
-            return REGION_PREFIX_MAP[prefix]
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.inference.openai_compat import prepare_openai_completion_params
+from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
+from llama_stack.providers.utils.telemetry.tracing import get_current_span
 
-    # Fallback to US for anything we don't recognize
-    return "us."
+from .config import BedrockConfig
 
+logger = get_logger(name=__name__, category="inference::bedrock")
 
-def _to_inference_profile_id(model_id: str, region: str = None) -> str:
-    # Return ARNs unchanged
-    if model_id.startswith("arn:"):
-        return model_id
 
-    # Return inference profile IDs that already have regional prefixes
-    if any(model_id.startswith(p) for p in REGION_PREFIX_MAP.values()):
-        return model_id
+class BedrockInferenceAdapter(OpenAIMixin):
+    """
+    Adapter for AWS Bedrock's OpenAI-compatible API endpoints.
 
-    # Default to US East when no region is provided
-    if region is None:
-        region = "us-east-1"
+    Supports Llama models across regions and GPT-OSS models (us-west-2 only).
 
-    return _get_region_prefix(region) + model_id
+    Note: Bedrock's OpenAI-compatible endpoint does not support /v1/models
+    for dynamic model discovery. Models must be pre-registered in the config.
+    """
 
+    config: BedrockConfig
+    provider_data_api_key_field: str = "aws_bedrock_api_key"
 
-class BedrockInferenceAdapter(
-    ModelRegistryHelper,
-    Inference,
-):
-    def __init__(self, config: BedrockConfig) -> None:
-        ModelRegistryHelper.__init__(self, model_entries=MODEL_ENTRIES)
-        self._config = config
-        self._client = None
+    def get_api_key(self) -> str:
+        """Get API key for OpenAI client."""
+        if not self.config.api_key:
+            raise ValueError(
+                "API key is not set. Please provide a valid API key in the "
+                "provider config or via AWS_BEDROCK_API_KEY environment variable."
+            )
+        return self.config.api_key
 
-    @property
-    def client(self) -> BaseClient:
-        if self._client is None:
-            self._client = create_bedrock_client(self._config)
-        return self._client
+    def get_base_url(self) -> str:
+        """Get base URL for OpenAI client."""
+        return f"https://bedrock-runtime.{self.config.region_name}.amazonaws.com/openai/v1"
 
-    async def initialize(self) -> None:
-        pass
+    async def list_provider_model_ids(self) -> Iterable[str]:
+        """
+        Bedrock's OpenAI-compatible endpoint does not support the /v1/models endpoint.
+        Returns empty list since models must be pre-registered in the config.
+        """
+        return []
 
-    async def shutdown(self) -> None:
-        if self._client is not None:
-            self._client.close()
-
-    async def _get_params_for_chat_completion(self, request: ChatCompletionRequest) -> dict:
-        bedrock_model = request.model
-
-        sampling_params = request.sampling_params
-        options = get_sampling_strategy_options(sampling_params)
-
-        if sampling_params.max_tokens:
-            options["max_gen_len"] = sampling_params.max_tokens
-        if sampling_params.repetition_penalty > 0:
-            options["repetition_penalty"] = sampling_params.repetition_penalty
-
-        prompt = await chat_completion_request_to_prompt(request, self.get_llama_model(request.model))
-
-        # Convert foundation model ID to inference profile ID
-        region_name = self.client.meta.region_name
-        inference_profile_id = _to_inference_profile_id(bedrock_model, region_name)
-
-        return {
-            "modelId": inference_profile_id,
-            "body": json.dumps(
-                {
-                    "prompt": prompt,
-                    **options,
-                }
-            ),
-        }
-
-    async def openai_embeddings(
-        self,
-        model: str,
-        input: str | list[str],
-        encoding_format: str | None = "float",
-        dimensions: int | None = None,
-        user: str | None = None,
-    ) -> OpenAIEmbeddingsResponse:
-        raise NotImplementedError()
+    async def register_model(self, model: Model) -> Model:
+        """
+        Register a model with the Bedrock provider.
 
-    async def openai_completion(
-        self,
-        # Standard OpenAI completion parameters
-        model: str,
-        prompt: str | list[str] | list[int] | list[list[int]],
-        best_of: int | None = None,
-        echo: bool | None = None,
-        frequency_penalty: float | None = None,
-        logit_bias: dict[str, float] | None = None,
-        logprobs: bool | None = None,
-        max_tokens: int | None = None,
-        n: int | None = None,
-        presence_penalty: float | None = None,
-        seed: int | None = None,
-        stop: str | list[str] | None = None,
-        stream: bool | None = None,
-        stream_options: dict[str, Any] | None = None,
-        temperature: float | None = None,
-        top_p: float | None = None,
-        user: str | None = None,
-        # vLLM-specific parameters
-        guided_choice: list[str] | None = None,
-        prompt_logprobs: int | None = None,
-        # for fill-in-the-middle type completion
-        suffix: str | None = None,
-    ) -> OpenAICompletion:
-        raise NotImplementedError("OpenAI completion not supported by the Bedrock provider")
+        Bedrock doesn't support dynamic model listing via /v1/models, so we skip
+        the availability check and accept all models registered in the config.
+        """
+        return model
 
     async def openai_chat_completion(
         self,
@@ -187,4 +94,67 @@ async def openai_chat_completion(
         top_p: float | None = None,
         user: str | None = None,
     ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
-        raise NotImplementedError("OpenAI chat completion not supported by the Bedrock provider")
+        """Override to add Bedrock-specific model ID handling and streaming options."""
+        # Get the provider model ID from model store
+        model_obj = await self.model_store.get_model(model)  # type: ignore[attr-defined]
+        provider_model_id: str = model_obj.provider_resource_id or model
+
+        # Bedrock OpenAI-compatible endpoint expects base model IDs (e.g. "openai.gpt-oss-20b-1:0").
+        # Cross-region inference profile IDs prefixed with "us." are not recognized by the endpoint.
+        # Normalize to base model ID, then try both base and prefixed forms for compatibility.
+        base_model_id = provider_model_id[3:] if provider_model_id.startswith("us.") else provider_model_id
+        candidate_models = [base_model_id, f"us.{base_model_id}"]
+
+        # Enable streaming usage metrics when telemetry is active
+        if stream and get_current_span() is not None:
+            if stream_options is None:
+                stream_options = {"include_usage": True}
+            elif "include_usage" not in stream_options:
+                stream_options = {**stream_options, "include_usage": True}
+
+        # Try candidate model IDs with retry logic
+        last_error: Exception | None = None
+        for candidate in candidate_models:
+            try:
+                logger.debug(f"Attempting request with model ID: {candidate}")
+                # Call OpenAI client directly with the candidate model ID
+                # We can't use super().openai_chat_completion() because it would
+                # call self._get_provider_model_id() which looks up the model again
+                params = await prepare_openai_completion_params(
+                    model=candidate,
+                    messages=messages,
+                    frequency_penalty=frequency_penalty,
+                    function_call=function_call,
+                    functions=functions,
+                    logit_bias=logit_bias,
+                    logprobs=logprobs,
+                    max_completion_tokens=max_completion_tokens,
+                    max_tokens=max_tokens,
+                    n=n,
+                    parallel_tool_calls=parallel_tool_calls,
+                    presence_penalty=presence_penalty,
+                    response_format=response_format,
+                    seed=seed,
+                    stop=stop,
+                    stream=stream,
+                    stream_options=stream_options,
+                    temperature=temperature,
+                    tool_choice=tool_choice,
+                    tools=tools,
+                    top_logprobs=top_logprobs,
+                    top_p=top_p,
+                    user=user,
+                )
+                resp = await self.client.chat.completions.create(**params)
+                return await self._maybe_overwrite_id(resp, stream)  # type: ignore[no-any-return]
+            except AuthenticationError as e:
+                # Authentication errors - no retry with different model IDs
+                raise ValueError(f"Authentication failed: {str(e)}") from e
+            except (NotFoundError, BadRequestError) as e:
+                logger.debug(f"Model ID {candidate} failed: {e}, trying next candidate")
+                last_error = e
+                continue
+
+        if last_error:
+            raise last_error
+        raise RuntimeError("Bedrock chat completion failed")