From af5d28612200ae7d78651a7008ec43c59e192d80 Mon Sep 17 00:00:00 2001 From: mitali401 Date: Sat, 18 Oct 2025 14:10:55 -0700 Subject: [PATCH 1/2] add list avzone and deploy de in avzone --- src/together/cli/api/endpoints.py | 28 ++++++++++++++ src/together/resources/endpoints.py | 59 +++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+) diff --git a/src/together/cli/api/endpoints.py b/src/together/cli/api/endpoints.py index 3b1da3f4..28c7194f 100644 --- a/src/together/cli/api/endpoints.py +++ b/src/together/cli/api/endpoints.py @@ -132,6 +132,10 @@ def endpoints(ctx: click.Context) -> None: type=int, help="Number of minutes of inactivity after which the endpoint will be automatically stopped. Set to 0 to disable.", ) +@click.option( + "--user-specified-avzone", + help="User-specified availability zone (e.g., us-central-4b)", +) @click.option( "--wait", is_flag=True, @@ -152,6 +156,7 @@ def create( no_speculative_decoding: bool, no_auto_start: bool, inactive_timeout: int | None, + user_specified_avzone: str | None, wait: bool, ) -> None: """Create a new dedicated inference endpoint.""" @@ -177,6 +182,7 @@ def create( disable_speculative_decoding=no_speculative_decoding, state="STOPPED" if no_auto_start else "STARTED", inactive_timeout=inactive_timeout, + user_specified_avzone=user_specified_avzone, ) except InvalidRequestError as e: print_api_error(e) @@ -203,6 +209,8 @@ def create( click.echo(" Auto-start: disabled", err=True) if inactive_timeout is not None: click.echo(f" Inactive timeout: {inactive_timeout} minutes", err=True) + if user_specified_avzone: + click.echo(f" Availability zone: {user_specified_avzone}", err=True) click.echo(f"Endpoint created successfully, id: {response.id}", err=True) @@ -432,3 +440,23 @@ def update( click.echo("Successfully updated endpoint", err=True) click.echo(endpoint_id) + +@endpoints.command() +@click.option("--json", is_flag=True, help="Print output in JSON format") +@click.pass_obj +@handle_api_errors +def avzones(client: Together, json: bool) -> None: + """List all available availability zones.""" + avzones = client.endpoints.list_avzones() + + if not avzones: + click.echo("No availability zones found", err=True) + return + + if json: + import json as json_lib + click.echo(json_lib.dumps({"avzones": avzones}, indent=2)) + else: + click.echo("Available zones:", err=True) + for avzone in sorted(avzones): + click.echo(f" {avzone}") \ No newline at end of file diff --git a/src/together/resources/endpoints.py b/src/together/resources/endpoints.py index e40238ad..0bbba8ed 100644 --- a/src/together/resources/endpoints.py +++ b/src/together/resources/endpoints.py @@ -60,6 +60,7 @@ def create( disable_speculative_decoding: bool = True, state: Literal["STARTED", "STOPPED"] = "STARTED", inactive_timeout: Optional[int] = None, + user_specified_avzone: Optional[str] = None, ) -> DedicatedEndpoint: """ Create a new dedicated endpoint. @@ -74,6 +75,7 @@ def create( disable_speculative_decoding (bool, optional): Whether to disable speculative decoding. Defaults to False. state (str, optional): The desired state of the endpoint. Defaults to "STARTED". inactive_timeout (int, optional): The number of minutes of inactivity after which the endpoint will be automatically stopped. Set to 0 to disable automatic timeout. + user_specified_avzone (str, optional): Start endpoint in specified availability zone (e.g., us-central-4b). Returns: DedicatedEndpoint: Object containing endpoint information @@ -100,6 +102,9 @@ def create( if inactive_timeout is not None: data["inactive_timeout"] = inactive_timeout + if user_specified_avzone is not None: + data["user_specified_avzone"] = user_specified_avzone + response, _, _ = requestor.request( options=TogetherRequest( method="POST", @@ -257,6 +262,31 @@ def list_hardware(self, model: Optional[str] = None) -> List[HardwareWithStatus] return [HardwareWithStatus(**item) for item in response.data["data"]] + def list_avzones(self) -> List[str]: + """ + List all available availability zones. + + Returns: + List[str]: List of unique availability zones + """ + requestor = api_requestor.APIRequestor( + client=self._client, + ) + + response, _, _ = requestor.request( + options=TogetherRequest( + method="GET", + url="clusters/avzones", + ), + stream=False, + ) + + assert isinstance(response, TogetherResponse) + assert isinstance(response.data, dict) + assert isinstance(response.data["avzones"], list) + + return response.data["avzones"] + class AsyncEndpoints: def __init__(self, client: TogetherClient) -> None: @@ -308,6 +338,7 @@ async def create( disable_speculative_decoding: bool = True, state: Literal["STARTED", "STOPPED"] = "STARTED", inactive_timeout: Optional[int] = None, + user_specified_avzone: Optional[str] = None, ) -> DedicatedEndpoint: """ Create a new dedicated endpoint. @@ -348,6 +379,9 @@ async def create( if inactive_timeout is not None: data["inactive_timeout"] = inactive_timeout + if user_specified_avzone is not None: + data["user_specified_avzone"] = user_specified_avzone + response, _, _ = await requestor.arequest( options=TogetherRequest( method="POST", @@ -506,3 +540,28 @@ async def list_hardware( assert isinstance(response.data["data"], list) return [HardwareWithStatus(**item) for item in response.data["data"]] + + async def list_avzones(self) -> List[str]: + """ + List all available availability zones. + + Returns: + List[str]: List of unique availability zones + """ + requestor = api_requestor.APIRequestor( + client=self._client, + ) + + response, _, _ = await requestor.arequest( + options=TogetherRequest( + method="GET", + url="clusters/avzones", + ), + stream=False, + ) + + assert isinstance(response, TogetherResponse) + assert isinstance(response.data, dict) + assert isinstance(response.data["avzones"], list) + + return response.data["avzones"] \ No newline at end of file From f0ef2071aeb4b067a7c19d306a899973e14dcef0 Mon Sep 17 00:00:00 2001 From: Nikitha Suryadevara Date: Thu, 30 Oct 2025 21:08:14 -0700 Subject: [PATCH 2/2] added create endpoint with availability zone and list availability zones. still need to return availability zone per endpoint. --- src/together/cli/api/endpoints.py | 26 ++++++++++++++------------ src/together/resources/endpoints.py | 22 +++++++++++----------- 2 files changed, 25 insertions(+), 23 deletions(-) diff --git a/src/together/cli/api/endpoints.py b/src/together/cli/api/endpoints.py index 954f0b71..271f864b 100644 --- a/src/together/cli/api/endpoints.py +++ b/src/together/cli/api/endpoints.py @@ -133,8 +133,8 @@ def endpoints(ctx: click.Context) -> None: help="Number of minutes of inactivity after which the endpoint will be automatically stopped. Set to 0 to disable.", ) @click.option( - "--user-specified-avzone", - help="User-specified availability zone (e.g., us-central-4b)", + "--availability-zone", + help="Start endpoint in specified availability zone (e.g., us-central-4b)", ) @click.option( "--wait", @@ -156,7 +156,7 @@ def create( no_speculative_decoding: bool, no_auto_start: bool, inactive_timeout: int | None, - user_specified_avzone: str | None, + availability_zone: str | None, wait: bool, ) -> None: """Create a new dedicated inference endpoint.""" @@ -182,7 +182,7 @@ def create( disable_speculative_decoding=no_speculative_decoding, state="STOPPED" if no_auto_start else "STARTED", inactive_timeout=inactive_timeout, - user_specified_avzone=user_specified_avzone, + availability_zone=availability_zone, ) except InvalidRequestError as e: print_api_error(e) @@ -209,8 +209,8 @@ def create( click.echo(" Auto-start: disabled", err=True) if inactive_timeout is not None: click.echo(f" Inactive timeout: {inactive_timeout} minutes", err=True) - if user_specified_avzone: - click.echo(f" Availability zone: {user_specified_avzone}", err=True) + if availability_zone: + click.echo(f" Availability zone: {availability_zone}", err=True) click.echo(f"Endpoint created successfully, id: {response.id}", err=True) @@ -458,22 +458,24 @@ def update( click.echo("Successfully updated endpoint", err=True) click.echo(endpoint_id) + @endpoints.command() @click.option("--json", is_flag=True, help="Print output in JSON format") @click.pass_obj @handle_api_errors -def avzones(client: Together, json: bool) -> None: - """List all available availability zones.""" +def availability_zones(client: Together, json: bool) -> None: + """List all availability zones.""" avzones = client.endpoints.list_avzones() - + if not avzones: click.echo("No availability zones found", err=True) return - + if json: import json as json_lib + click.echo(json_lib.dumps({"avzones": avzones}, indent=2)) else: click.echo("Available zones:", err=True) - for avzone in sorted(avzones): - click.echo(f" {avzone}") \ No newline at end of file + for availability_zone in sorted(avzones): + click.echo(f" {availability_zone}") diff --git a/src/together/resources/endpoints.py b/src/together/resources/endpoints.py index 60f5f749..ba84c010 100644 --- a/src/together/resources/endpoints.py +++ b/src/together/resources/endpoints.py @@ -76,7 +76,7 @@ def create( disable_speculative_decoding: bool = True, state: Literal["STARTED", "STOPPED"] = "STARTED", inactive_timeout: Optional[int] = None, - user_specified_avzone: Optional[str] = None, + availability_zone: Optional[str] = None, ) -> DedicatedEndpoint: """ Create a new dedicated endpoint. @@ -91,7 +91,7 @@ def create( disable_speculative_decoding (bool, optional): Whether to disable speculative decoding. Defaults to False. state (str, optional): The desired state of the endpoint. Defaults to "STARTED". inactive_timeout (int, optional): The number of minutes of inactivity after which the endpoint will be automatically stopped. Set to 0 to disable automatic timeout. - user_specified_avzone (str, optional): Start endpoint in specified availability zone (e.g., us-central-4b). + availability_zone (str, optional): Start endpoint in specified availability zone (e.g., us-central-4b). Returns: DedicatedEndpoint: Object containing endpoint information @@ -118,8 +118,8 @@ def create( if inactive_timeout is not None: data["inactive_timeout"] = inactive_timeout - if user_specified_avzone is not None: - data["user_specified_avzone"] = user_specified_avzone + if availability_zone is not None: + data["availability_zone"] = availability_zone response, _, _ = requestor.request( options=TogetherRequest( @@ -292,7 +292,7 @@ def list_avzones(self) -> List[str]: response, _, _ = requestor.request( options=TogetherRequest( method="GET", - url="clusters/avzones", + url="clusters/availability-zones", ), stream=False, ) @@ -370,7 +370,7 @@ async def create( disable_speculative_decoding: bool = True, state: Literal["STARTED", "STOPPED"] = "STARTED", inactive_timeout: Optional[int] = None, - user_specified_avzone: Optional[str] = None, + availability_zone: Optional[str] = None, ) -> DedicatedEndpoint: """ Create a new dedicated endpoint. @@ -411,8 +411,8 @@ async def create( if inactive_timeout is not None: data["inactive_timeout"] = inactive_timeout - if user_specified_avzone is not None: - data["user_specified_avzone"] = user_specified_avzone + if availability_zone is not None: + data["availability_zone"] = availability_zone response, _, _ = await requestor.arequest( options=TogetherRequest( @@ -575,7 +575,7 @@ async def list_hardware( async def list_avzones(self) -> List[str]: """ - List all available availability zones. + List all availability zones. Returns: List[str]: List of unique availability zones @@ -587,7 +587,7 @@ async def list_avzones(self) -> List[str]: response, _, _ = await requestor.arequest( options=TogetherRequest( method="GET", - url="clusters/avzones", + url="clusters/availability-zones", ), stream=False, ) @@ -596,4 +596,4 @@ async def list_avzones(self) -> List[str]: assert isinstance(response.data, dict) assert isinstance(response.data["avzones"], list) - return response.data["avzones"] \ No newline at end of file + return response.data["avzones"]