Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions src/together/cli/api/endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,10 @@ def endpoints(ctx: click.Context) -> None:
type=int,
help="Number of minutes of inactivity after which the endpoint will be automatically stopped. Set to 0 to disable.",
)
@click.option(
"--user-specified-avzone",
help="User-specified availability zone (e.g., us-central-4b)",
)
@click.option(
"--wait",
is_flag=True,
Expand All @@ -152,6 +156,7 @@ def create(
no_speculative_decoding: bool,
no_auto_start: bool,
inactive_timeout: int | None,
user_specified_avzone: str | None,
wait: bool,
) -> None:
"""Create a new dedicated inference endpoint."""
Expand All @@ -177,6 +182,7 @@ def create(
disable_speculative_decoding=no_speculative_decoding,
state="STOPPED" if no_auto_start else "STARTED",
inactive_timeout=inactive_timeout,
user_specified_avzone=user_specified_avzone,
)
except InvalidRequestError as e:
print_api_error(e)
Expand All @@ -203,6 +209,8 @@ def create(
click.echo(" Auto-start: disabled", err=True)
if inactive_timeout is not None:
click.echo(f" Inactive timeout: {inactive_timeout} minutes", err=True)
if user_specified_avzone:
click.echo(f" Availability zone: {user_specified_avzone}", err=True)

click.echo(f"Endpoint created successfully, id: {response.id}", err=True)

Expand Down Expand Up @@ -432,3 +440,23 @@ def update(

click.echo("Successfully updated endpoint", err=True)
click.echo(endpoint_id)

@endpoints.command()
@click.option("--json", is_flag=True, help="Print output in JSON format")
@click.pass_obj
@handle_api_errors
def avzones(client: Together, json: bool) -> None:
"""List all available availability zones."""
avzones = client.endpoints.list_avzones()

if not avzones:
click.echo("No availability zones found", err=True)
return

if json:
import json as json_lib
click.echo(json_lib.dumps({"avzones": avzones}, indent=2))
else:
click.echo("Available zones:", err=True)
for avzone in sorted(avzones):
click.echo(f" {avzone}")
59 changes: 59 additions & 0 deletions src/together/resources/endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ def create(
disable_speculative_decoding: bool = True,
state: Literal["STARTED", "STOPPED"] = "STARTED",
inactive_timeout: Optional[int] = None,
user_specified_avzone: Optional[str] = None,
) -> DedicatedEndpoint:
"""
Create a new dedicated endpoint.
Expand All @@ -74,6 +75,7 @@ def create(
disable_speculative_decoding (bool, optional): Whether to disable speculative decoding. Defaults to False.
state (str, optional): The desired state of the endpoint. Defaults to "STARTED".
inactive_timeout (int, optional): The number of minutes of inactivity after which the endpoint will be automatically stopped. Set to 0 to disable automatic timeout.
user_specified_avzone (str, optional): Start endpoint in specified availability zone (e.g., us-central-4b).

Returns:
DedicatedEndpoint: Object containing endpoint information
Expand All @@ -100,6 +102,9 @@ def create(
if inactive_timeout is not None:
data["inactive_timeout"] = inactive_timeout

if user_specified_avzone is not None:
data["user_specified_avzone"] = user_specified_avzone

response, _, _ = requestor.request(
options=TogetherRequest(
method="POST",
Expand Down Expand Up @@ -257,6 +262,31 @@ def list_hardware(self, model: Optional[str] = None) -> List[HardwareWithStatus]

return [HardwareWithStatus(**item) for item in response.data["data"]]

def list_avzones(self) -> List[str]:
"""
List all available availability zones.

Returns:
List[str]: List of unique availability zones
"""
requestor = api_requestor.APIRequestor(
client=self._client,
)

response, _, _ = requestor.request(
options=TogetherRequest(
method="GET",
url="clusters/avzones",
),
stream=False,
)

assert isinstance(response, TogetherResponse)
assert isinstance(response.data, dict)
assert isinstance(response.data["avzones"], list)

return response.data["avzones"]


class AsyncEndpoints:
def __init__(self, client: TogetherClient) -> None:
Expand Down Expand Up @@ -308,6 +338,7 @@ async def create(
disable_speculative_decoding: bool = True,
state: Literal["STARTED", "STOPPED"] = "STARTED",
inactive_timeout: Optional[int] = None,
user_specified_avzone: Optional[str] = None,
) -> DedicatedEndpoint:
"""
Create a new dedicated endpoint.
Expand Down Expand Up @@ -348,6 +379,9 @@ async def create(
if inactive_timeout is not None:
data["inactive_timeout"] = inactive_timeout

if user_specified_avzone is not None:
data["user_specified_avzone"] = user_specified_avzone

response, _, _ = await requestor.arequest(
options=TogetherRequest(
method="POST",
Expand Down Expand Up @@ -506,3 +540,28 @@ async def list_hardware(
assert isinstance(response.data["data"], list)

return [HardwareWithStatus(**item) for item in response.data["data"]]

async def list_avzones(self) -> List[str]:
"""
List all available availability zones.

Returns:
List[str]: List of unique availability zones
"""
requestor = api_requestor.APIRequestor(
client=self._client,
)

response, _, _ = await requestor.arequest(
options=TogetherRequest(
method="GET",
url="clusters/avzones",
),
stream=False,
)

assert isinstance(response, TogetherResponse)
assert isinstance(response.data, dict)
assert isinstance(response.data["avzones"], list)

return response.data["avzones"]
Loading