diff --git a/.github/workflows/create-releases.yml b/.github/workflows/create-releases.yml
deleted file mode 100644
index 923abae..0000000
--- a/.github/workflows/create-releases.yml
+++ /dev/null
@@ -1,38 +0,0 @@
-name: Create releases
-on:
-  schedule:
-    - cron: '0 5 * * *' # every day at 5am UTC
-  push:
-    branches:
-      - main
-
-jobs:
-  release:
-    name: release
-    if: github.ref == 'refs/heads/main' && github.repository == 'WriterColab/sdk.python'
-    runs-on: ubuntu-latest
-
-    steps:
-      - uses: actions/checkout@v4
-
-      - uses: stainless-api/trigger-release-please@v1
-        id: release
-        with:
-          repo: ${{ github.event.repository.full_name }}
-          stainless-api-key: ${{ secrets.STAINLESS_API_KEY }}
-
-      - name: Install Rye
-        if: ${{ steps.release.outputs.releases_created }}
-        run: |
-          curl -sSf https://rye.astral.sh/get | bash
-          echo "$HOME/.rye/shims" >> $GITHUB_PATH
-        env:
-          RYE_VERSION: 0.24.0
-          RYE_INSTALL_OPTION: "--yes"
-
-      - name: Publish to PyPI
-        if: ${{ steps.release.outputs.releases_created }}
-        run: |
-          bash ./bin/publish-pypi
-        env:
-          PYPI_TOKEN: ${{ secrets.WRITER_PYPI_TOKEN || secrets.PYPI_TOKEN }}
diff --git a/.github/workflows/handle-release-pr-title-edit.yml b/.github/workflows/handle-release-pr-title-edit.yml
deleted file mode 100644
index e267b5c..0000000
--- a/.github/workflows/handle-release-pr-title-edit.yml
+++ /dev/null
@@ -1,25 +0,0 @@
-name: Handle release PR title edits
-on:
-  pull_request:
-    types:
-      - edited
-      - unlabeled
-
-jobs:
-  update_pr_content:
-    name: Update pull request content
-    if: |
-      ((github.event.action == 'edited' && github.event.changes.title.from != github.event.pull_request.title) ||
-      (github.event.action == 'unlabeled' && github.event.label.name == 'autorelease: custom version')) &&
-      startsWith(github.event.pull_request.head.ref, 'release-please--') &&
-      github.event.pull_request.state == 'open' &&
-      github.event.sender.login != 'stainless-bot' &&
-      github.event.sender.login != 'stainless-app' &&
-      github.repository == 'WriterColab/sdk.python'
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v4
-      - uses: stainless-api/trigger-release-please@v1
-        with:
-          repo: ${{ github.event.repository.full_name }}
-          stainless-api-key: ${{ secrets.STAINLESS_API_KEY }}
diff --git a/.github/workflows/publish-pypi.yml b/.github/workflows/publish-pypi.yml
index 9a1a325..b850d24 100644
--- a/.github/workflows/publish-pypi.yml
+++ b/.github/workflows/publish-pypi.yml
@@ -1,9 +1,13 @@
-# workflow for re-running publishing to PyPI in case it fails for some reason
-# you can run this workflow by navigating to https://www.github.com/WriterColab/sdk.python/actions/workflows/publish-pypi.yml
+# This workflow is triggered when a GitHub release is created.
+# It can also be run manually to re-publish to PyPI in case it failed for some reason.
+# You can run this workflow by navigating to https://www.github.com/writerai/writer-python/actions/workflows/publish-pypi.yml
 name: Publish PyPI
 on:
   workflow_dispatch:
 
+  release:
+    types: [published]
+
 jobs:
   publish:
     name: publish
diff --git a/.github/workflows/release-doctor.yml b/.github/workflows/release-doctor.yml
index 839b250..33a5d61 100644
--- a/.github/workflows/release-doctor.yml
+++ b/.github/workflows/release-doctor.yml
@@ -7,7 +7,7 @@ jobs:
   release_doctor:
     name: release doctor
     runs-on: ubuntu-latest
-    if: github.repository == 'WriterColab/sdk.python' && (github.event_name == 'push' || github.event_name == 'workflow_dispatch' || startsWith(github.head_ref, 'release-please') || github.head_ref == 'next')
+    if: github.repository == 'writerai/writer-python' && (github.event_name == 'push' || github.event_name == 'workflow_dispatch' || startsWith(github.head_ref, 'release-please') || github.head_ref == 'next')
 
     steps:
       - uses: actions/checkout@v4
@@ -16,5 +16,4 @@ jobs:
         run: |
           bash ./bin/check-release-environment
         env:
-          STAINLESS_API_KEY: ${{ secrets.STAINLESS_API_KEY }}
           PYPI_TOKEN: ${{ secrets.WRITER_PYPI_TOKEN || secrets.PYPI_TOKEN }}
diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index aaf968a..3d2ac0b 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "0.1.0-alpha.3"
+  ".": "0.1.0"
 }
\ No newline at end of file
diff --git a/.stats.yml b/.stats.yml
index 701fc27..0b0545e 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,2 +1,2 @@
 configured_endpoints: 3
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/writerai%2Fwriter-e5ad2fb12fbda084403c1696af9dbe7eeb5f0025134473dea7632339d4d7d00b.yml
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/writerai%2Fwriter-387e688cfbf5098041d47c9c918c15d4978f98768b4daf901267aea8affc0a30.yml
diff --git a/CHANGELOG.md b/CHANGELOG.md
index e030ce0..370e268 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,24 @@
 # Changelog
 
+## 0.1.0 (2024-06-05)
+
+Full Changelog: [v0.1.0-alpha.3...v0.1.0](https://github.com/writerai/writer-python/compare/v0.1.0-alpha.3...v0.1.0)
+
+### Features
+
+* **api:** update via SDK Studio ([fe993a2](https://github.com/writerai/writer-python/commit/fe993a27d2d71e3d9f989a1900c2b413a26f6954))
+* **api:** update via SDK Studio ([#10](https://github.com/writerai/writer-python/issues/10)) ([6ebe199](https://github.com/writerai/writer-python/commit/6ebe199e0b1043ed345ab6b3434d2ee116365737))
+* **api:** update via SDK Studio ([#12](https://github.com/writerai/writer-python/issues/12)) ([e668e94](https://github.com/writerai/writer-python/commit/e668e9498dab939754e4dbd351f8f29ece18b622))
+* **api:** update via SDK Studio ([#13](https://github.com/writerai/writer-python/issues/13)) ([aa08d76](https://github.com/writerai/writer-python/commit/aa08d76ac7a9dd0d8a40032bce986c23f8655130))
+* **api:** update via SDK Studio ([#14](https://github.com/writerai/writer-python/issues/14)) ([87ce38f](https://github.com/writerai/writer-python/commit/87ce38fa40ead36136a674bfc8c330c270fb8d5e))
+* **api:** update via SDK Studio ([#3](https://github.com/writerai/writer-python/issues/3)) ([3c3ae55](https://github.com/writerai/writer-python/commit/3c3ae55e3da824e8722514d589f9ccbeec62b1bb))
+* **api:** update via SDK Studio ([#4](https://github.com/writerai/writer-python/issues/4)) ([2523970](https://github.com/writerai/writer-python/commit/252397069fc7012de4328fa4f874ea674d507af1))
+
+
+### Chores
+
+* go live ([#2](https://github.com/writerai/writer-python/issues/2)) ([f812fc4](https://github.com/writerai/writer-python/commit/f812fc4f9c9137907bd790477651dc8425bac0e0))
+
 ## 0.1.0-alpha.3 (2024-05-24)
 
 Full Changelog: [v0.1.0-alpha.2...v0.1.0-alpha.3](https://github.com/WriterColab/sdk.python/compare/v0.1.0-alpha.2...v0.1.0-alpha.3)
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index c91e213..d41926b 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -59,7 +59,7 @@ If you’d like to use the repository from source, you can either install from g
 To install via git:
 
 ```bash
-pip install git+ssh://git@github.com/WriterColab/sdk.python.git
+pip install git+ssh://git@github.com/writerai/writer-python.git
 ```
 
 Alternatively, you can build from source and install the wheel file:
@@ -117,7 +117,7 @@ the changes aren't made through the automated pipeline, you may want to make rel
 
 ### Publish with a GitHub workflow
 
-You can release to package managers by using [the `Publish PyPI` GitHub action](https://www.github.com/WriterColab/sdk.python/actions/workflows/publish-pypi.yml). This requires a setup organization or repository secret to be set up.
+You can release to package managers by using [the `Publish PyPI` GitHub action](https://www.github.com/writerai/writer-python/actions/workflows/publish-pypi.yml). This requires a setup organization or repository secret to be set up.
 
 ### Publish manually
 
diff --git a/README.md b/README.md
index b8f8dd8..f703582 100644
--- a/README.md
+++ b/README.md
@@ -10,7 +10,7 @@ It is generated with [Stainless](https://www.stainlessapi.com/).
 
 ## Documentation
 
-The REST API documentation can be found [on dev.writer.com](https://dev.writer.com/docs/quickstart). The full API of this library can be found in [api.md](api.md).
+The REST API documentation can be found [on dev.writer.com](https://dev.writer.com/api-guides/introduction). The full API of this library can be found in [api.md](api.md).
 
 ## Installation
 
@@ -35,7 +35,7 @@ client = Writer(
 chat = client.chat.chat(
     messages=[
         {
-            "content": "Hello!",
+            "content": "string",
             "role": "user",
         }
     ],
@@ -68,7 +68,7 @@ async def main() -> None:
     chat = await client.chat.chat(
         messages=[
             {
-                "content": "Hello!",
+                "content": "string",
                 "role": "user",
             }
         ],
@@ -144,7 +144,7 @@ try:
     client.chat.chat(
         messages=[
             {
-                "content": "Hello!",
+                "content": "string",
                 "role": "user",
             }
         ],
@@ -195,7 +195,7 @@ client = Writer(
 client.with_options(max_retries=5).chat.chat(
     messages=[
         {
-            "content": "Hello!",
+            "content": "string",
             "role": "user",
         }
     ],
@@ -226,7 +226,7 @@ client = Writer(
 client.with_options(timeout=5.0).chat.chat(
     messages=[
         {
-            "content": "Hello!",
+            "content": "string",
             "role": "user",
         }
     ],
@@ -272,7 +272,7 @@ from writerai import Writer
 client = Writer()
 response = client.chat.with_raw_response.chat(
     messages=[{
-        "content": "Hello!",
+        "content": "string",
         "role": "user",
     }],
     model="palmyra-x-32k",
@@ -283,9 +283,9 @@ chat = response.parse()  # get the object that `chat.chat()` would have returned
 print(chat.id)
 ```
 
-These methods return an [`APIResponse`](https://github.com/WriterColab/sdk.python/tree/main/src/writerai/_response.py) object.
+These methods return an [`APIResponse`](https://github.com/writerai/writer-python/tree/main/src/writerai/_response.py) object.
 
-The async client returns an [`AsyncAPIResponse`](https://github.com/WriterColab/sdk.python/tree/main/src/writerai/_response.py) with the same structure, the only difference being `await`able methods for reading the response content.
+The async client returns an [`AsyncAPIResponse`](https://github.com/writerai/writer-python/tree/main/src/writerai/_response.py) with the same structure, the only difference being `await`able methods for reading the response content.
 
 #### `.with_streaming_response`
 
@@ -297,7 +297,7 @@ To stream the response body, use `.with_streaming_response` instead, which requi
 with client.chat.with_streaming_response.chat(
     messages=[
         {
-            "content": "Hello!",
+            "content": "string",
             "role": "user",
         }
     ],
@@ -381,7 +381,7 @@ This package generally follows [SemVer](https://semver.org/spec/v2.0.0.html) con
 
 We take backwards-compatibility seriously and work hard to ensure you can rely on a smooth upgrade experience.
 
-We are keen for your feedback; please open an [issue](https://www.github.com/WriterColab/sdk.python/issues) with questions, bugs, or suggestions.
+We are keen for your feedback; please open an [issue](https://www.github.com/writerai/writer-python/issues) with questions, bugs, or suggestions.
 
 ## Requirements
 
diff --git a/bin/check-release-environment b/bin/check-release-environment
index e504240..360bcdd 100644
--- a/bin/check-release-environment
+++ b/bin/check-release-environment
@@ -3,10 +3,6 @@
 warnings=()
 errors=()
 
-if [ -z "${STAINLESS_API_KEY}" ]; then
-  errors+=("The STAINLESS_API_KEY secret has not been set. Please contact Stainless for an API key & set it in your organization secrets on GitHub.")
-fi
-
 if [ -z "${PYPI_TOKEN}" ]; then
   warnings+=("The WRITER_PYPI_TOKEN secret has not been set. Please set it in either this repository's secrets or your organization secrets.")
 fi
diff --git a/pyproject.toml b/pyproject.toml
index 5e57313..24ad29f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "writer-sdk"
-version = "0.1.0-alpha.3"
+version = "0.1.0"
 description = "The official Python library for the writer API"
 dynamic = ["readme"]
 license = "Apache-2.0"
@@ -39,8 +39,8 @@ classifiers = [
 
 
 [project.urls]
-Homepage = "https://github.com/WriterColab/sdk.python"
-Repository = "https://github.com/WriterColab/sdk.python"
+Homepage = "https://github.com/writerai/writer-python"
+Repository = "https://github.com/writerai/writer-python"
 
 
 
@@ -108,7 +108,7 @@ path = "README.md"
 [[tool.hatch.metadata.hooks.fancy-pypi-readme.substitutions]]
 # replace relative links with absolute links
 pattern = '\[(.+?)\]\(((?!https?://)\S+?)\)'
-replacement = '[\1](https://github.com/WriterColab/sdk.python/tree/main/\g<2>)'
+replacement = '[\1](https://github.com/writerai/writer-python/tree/main/\g<2>)'
 
 [tool.black]
 line-length = 120
diff --git a/src/writerai/_version.py b/src/writerai/_version.py
index a25555d..200da27 100644
--- a/src/writerai/_version.py
+++ b/src/writerai/_version.py
@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 __title__ = "writerai"
-__version__ = "0.1.0-alpha.3"  # x-release-please-version
+__version__ = "0.1.0"  # x-release-please-version
diff --git a/src/writerai/resources/chat.py b/src/writerai/resources/chat.py
index 742b950..c2f21f1 100644
--- a/src/writerai/resources/chat.py
+++ b/src/writerai/resources/chat.py
@@ -61,9 +61,40 @@ def chat(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> Chat:
         """
-        Create chat completion
+        Chat completion
 
         Args:
+          messages: An array of message objects that form the conversation history or context for
+              the model to respond to. The array must contain at least one message.
+
+          model: Specifies the model to be used for generating responses. The chat model is
+              always `palmyra-x-002-32k` for conversational use.
+
+          max_tokens: Defines the maximum number of tokens (words and characters) that the model can
+              generate in the response. The default value is set to 16, but it can be adjusted
+              to allow for longer or shorter responses as needed.
+
+          n: Specifies the number of completions (responses) to generate from the model in a
+              single request. This parameter allows multiple responses to be generated,
+              offering a variety of potential replies from which to choose.
+
+          stop: A token or sequence of tokens that, when generated, will cause the model to stop
+              producing further content. This can be a single token or an array of tokens,
+              acting as a signal to end the output.
+
+          stream: Indicates whether the response should be streamed incrementally as it is
+              generated or only returned once fully complete. Streaming can be useful for
+              providing real-time feedback in interactive applications.
+
+          temperature: Controls the randomness or creativity of the model's responses. A higher
+              temperature results in more varied and less predictable text, while a lower
+              temperature produces more deterministic and conservative outputs.
+
+          top_p: Sets the threshold for "nucleus sampling," a technique to focus the model's
+              token generation on the most likely subset of tokens. Only tokens with
+              cumulative probability above this threshold are considered, controlling the
+              trade-off between creativity and coherence.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -94,9 +125,40 @@ def chat(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> Stream[ChatStreamingData]:
         """
-        Create chat completion
+        Chat completion
 
         Args:
+          messages: An array of message objects that form the conversation history or context for
+              the model to respond to. The array must contain at least one message.
+
+          model: Specifies the model to be used for generating responses. The chat model is
+              always `palmyra-x-002-32k` for conversational use.
+
+          stream: Indicates whether the response should be streamed incrementally as it is
+              generated or only returned once fully complete. Streaming can be useful for
+              providing real-time feedback in interactive applications.
+
+          max_tokens: Defines the maximum number of tokens (words and characters) that the model can
+              generate in the response. The default value is set to 16, but it can be adjusted
+              to allow for longer or shorter responses as needed.
+
+          n: Specifies the number of completions (responses) to generate from the model in a
+              single request. This parameter allows multiple responses to be generated,
+              offering a variety of potential replies from which to choose.
+
+          stop: A token or sequence of tokens that, when generated, will cause the model to stop
+              producing further content. This can be a single token or an array of tokens,
+              acting as a signal to end the output.
+
+          temperature: Controls the randomness or creativity of the model's responses. A higher
+              temperature results in more varied and less predictable text, while a lower
+              temperature produces more deterministic and conservative outputs.
+
+          top_p: Sets the threshold for "nucleus sampling," a technique to focus the model's
+              token generation on the most likely subset of tokens. Only tokens with
+              cumulative probability above this threshold are considered, controlling the
+              trade-off between creativity and coherence.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -127,9 +189,40 @@ def chat(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> Chat | Stream[ChatStreamingData]:
         """
-        Create chat completion
+        Chat completion
 
         Args:
+          messages: An array of message objects that form the conversation history or context for
+              the model to respond to. The array must contain at least one message.
+
+          model: Specifies the model to be used for generating responses. The chat model is
+              always `palmyra-x-002-32k` for conversational use.
+
+          stream: Indicates whether the response should be streamed incrementally as it is
+              generated or only returned once fully complete. Streaming can be useful for
+              providing real-time feedback in interactive applications.
+
+          max_tokens: Defines the maximum number of tokens (words and characters) that the model can
+              generate in the response. The default value is set to 16, but it can be adjusted
+              to allow for longer or shorter responses as needed.
+
+          n: Specifies the number of completions (responses) to generate from the model in a
+              single request. This parameter allows multiple responses to be generated,
+              offering a variety of potential replies from which to choose.
+
+          stop: A token or sequence of tokens that, when generated, will cause the model to stop
+              producing further content. This can be a single token or an array of tokens,
+              acting as a signal to end the output.
+
+          temperature: Controls the randomness or creativity of the model's responses. A higher
+              temperature results in more varied and less predictable text, while a lower
+              temperature produces more deterministic and conservative outputs.
+
+          top_p: Sets the threshold for "nucleus sampling," a technique to focus the model's
+              token generation on the most likely subset of tokens. Only tokens with
+              cumulative probability above this threshold are considered, controlling the
+              trade-off between creativity and coherence.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -212,9 +305,40 @@ async def chat(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> Chat:
         """
-        Create chat completion
+        Chat completion
 
         Args:
+          messages: An array of message objects that form the conversation history or context for
+              the model to respond to. The array must contain at least one message.
+
+          model: Specifies the model to be used for generating responses. The chat model is
+              always `palmyra-x-002-32k` for conversational use.
+
+          max_tokens: Defines the maximum number of tokens (words and characters) that the model can
+              generate in the response. The default value is set to 16, but it can be adjusted
+              to allow for longer or shorter responses as needed.
+
+          n: Specifies the number of completions (responses) to generate from the model in a
+              single request. This parameter allows multiple responses to be generated,
+              offering a variety of potential replies from which to choose.
+
+          stop: A token or sequence of tokens that, when generated, will cause the model to stop
+              producing further content. This can be a single token or an array of tokens,
+              acting as a signal to end the output.
+
+          stream: Indicates whether the response should be streamed incrementally as it is
+              generated or only returned once fully complete. Streaming can be useful for
+              providing real-time feedback in interactive applications.
+
+          temperature: Controls the randomness or creativity of the model's responses. A higher
+              temperature results in more varied and less predictable text, while a lower
+              temperature produces more deterministic and conservative outputs.
+
+          top_p: Sets the threshold for "nucleus sampling," a technique to focus the model's
+              token generation on the most likely subset of tokens. Only tokens with
+              cumulative probability above this threshold are considered, controlling the
+              trade-off between creativity and coherence.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -245,9 +369,40 @@ async def chat(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> AsyncStream[ChatStreamingData]:
         """
-        Create chat completion
+        Chat completion
 
         Args:
+          messages: An array of message objects that form the conversation history or context for
+              the model to respond to. The array must contain at least one message.
+
+          model: Specifies the model to be used for generating responses. The chat model is
+              always `palmyra-x-002-32k` for conversational use.
+
+          stream: Indicates whether the response should be streamed incrementally as it is
+              generated or only returned once fully complete. Streaming can be useful for
+              providing real-time feedback in interactive applications.
+
+          max_tokens: Defines the maximum number of tokens (words and characters) that the model can
+              generate in the response. The default value is set to 16, but it can be adjusted
+              to allow for longer or shorter responses as needed.
+
+          n: Specifies the number of completions (responses) to generate from the model in a
+              single request. This parameter allows multiple responses to be generated,
+              offering a variety of potential replies from which to choose.
+
+          stop: A token or sequence of tokens that, when generated, will cause the model to stop
+              producing further content. This can be a single token or an array of tokens,
+              acting as a signal to end the output.
+
+          temperature: Controls the randomness or creativity of the model's responses. A higher
+              temperature results in more varied and less predictable text, while a lower
+              temperature produces more deterministic and conservative outputs.
+
+          top_p: Sets the threshold for "nucleus sampling," a technique to focus the model's
+              token generation on the most likely subset of tokens. Only tokens with
+              cumulative probability above this threshold are considered, controlling the
+              trade-off between creativity and coherence.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -278,9 +433,40 @@ async def chat(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> Chat | AsyncStream[ChatStreamingData]:
         """
-        Create chat completion
+        Chat completion
 
         Args:
+          messages: An array of message objects that form the conversation history or context for
+              the model to respond to. The array must contain at least one message.
+
+          model: Specifies the model to be used for generating responses. The chat model is
+              always `palmyra-x-002-32k` for conversational use.
+
+          stream: Indicates whether the response should be streamed incrementally as it is
+              generated or only returned once fully complete. Streaming can be useful for
+              providing real-time feedback in interactive applications.
+
+          max_tokens: Defines the maximum number of tokens (words and characters) that the model can
+              generate in the response. The default value is set to 16, but it can be adjusted
+              to allow for longer or shorter responses as needed.
+
+          n: Specifies the number of completions (responses) to generate from the model in a
+              single request. This parameter allows multiple responses to be generated,
+              offering a variety of potential replies from which to choose.
+
+          stop: A token or sequence of tokens that, when generated, will cause the model to stop
+              producing further content. This can be a single token or an array of tokens,
+              acting as a signal to end the output.
+
+          temperature: Controls the randomness or creativity of the model's responses. A higher
+              temperature results in more varied and less predictable text, while a lower
+              temperature produces more deterministic and conservative outputs.
+
+          top_p: Sets the threshold for "nucleus sampling," a technique to focus the model's
+              token generation on the most likely subset of tokens. Only tokens with
+              cumulative probability above this threshold are considered, controlling the
+              trade-off between creativity and coherence.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
diff --git a/src/writerai/resources/completions.py b/src/writerai/resources/completions.py
index 19c6c30..9e773db 100644
--- a/src/writerai/resources/completions.py
+++ b/src/writerai/resources/completions.py
@@ -62,9 +62,36 @@ def create(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> Completion:
         """
-        Create completion using LLM model
+        Text generation
 
         Args:
+          model: The identifier of the model to be used for processing the request.
+
+          prompt: The input text that the model will process to generate a response.
+
+          best_of: Specifies the number of completions to generate and return the best one. Useful
+              for generating multiple outputs and choosing the best based on some criteria.
+
+          max_tokens: The maximum number of tokens that the model can generate in the response.
+
+          random_seed: A seed used to initialize the random number generator for the model, ensuring
+              reproducibility of the output when the same inputs are provided.
+
+          stop: Specifies stopping conditions for the model's output generation. This can be an
+              array of strings or a single string that the model will look for as a signal to
+              stop generating further tokens.
+
+          stream: Determines whether the model's output should be streamed. If true, the output is
+              generated and sent incrementally, which can be useful for real-time
+              applications.
+
+          temperature: Controls the randomness of the model's outputs. Higher values lead to more
+              random outputs, while lower values make the model more deterministic.
+
+          top_p: Used to control the nucleus sampling, where only the most probable tokens with a
+              cumulative probability of top_p are considered for sampling, providing a way to
+              fine-tune the randomness of predictions.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -96,9 +123,36 @@ def create(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> Stream[StreamingData]:
         """
-        Create completion using LLM model
+        Text generation
 
         Args:
+          model: The identifier of the model to be used for processing the request.
+
+          prompt: The input text that the model will process to generate a response.
+
+          stream: Determines whether the model's output should be streamed. If true, the output is
+              generated and sent incrementally, which can be useful for real-time
+              applications.
+
+          best_of: Specifies the number of completions to generate and return the best one. Useful
+              for generating multiple outputs and choosing the best based on some criteria.
+
+          max_tokens: The maximum number of tokens that the model can generate in the response.
+
+          random_seed: A seed used to initialize the random number generator for the model, ensuring
+              reproducibility of the output when the same inputs are provided.
+
+          stop: Specifies stopping conditions for the model's output generation. This can be an
+              array of strings or a single string that the model will look for as a signal to
+              stop generating further tokens.
+
+          temperature: Controls the randomness of the model's outputs. Higher values lead to more
+              random outputs, while lower values make the model more deterministic.
+
+          top_p: Used to control the nucleus sampling, where only the most probable tokens with a
+              cumulative probability of top_p are considered for sampling, providing a way to
+              fine-tune the randomness of predictions.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -130,9 +184,36 @@ def create(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> Completion | Stream[StreamingData]:
         """
-        Create completion using LLM model
+        Text generation
 
         Args:
+          model: The identifier of the model to be used for processing the request.
+
+          prompt: The input text that the model will process to generate a response.
+
+          stream: Determines whether the model's output should be streamed. If true, the output is
+              generated and sent incrementally, which can be useful for real-time
+              applications.
+
+          best_of: Specifies the number of completions to generate and return the best one. Useful
+              for generating multiple outputs and choosing the best based on some criteria.
+
+          max_tokens: The maximum number of tokens that the model can generate in the response.
+
+          random_seed: A seed used to initialize the random number generator for the model, ensuring
+              reproducibility of the output when the same inputs are provided.
+
+          stop: Specifies stopping conditions for the model's output generation. This can be an
+              array of strings or a single string that the model will look for as a signal to
+              stop generating further tokens.
+
+          temperature: Controls the randomness of the model's outputs. Higher values lead to more
+              random outputs, while lower values make the model more deterministic.
+
+          top_p: Used to control the nucleus sampling, where only the most probable tokens with a
+              cumulative probability of top_p are considered for sampling, providing a way to
+              fine-tune the randomness of predictions.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -218,9 +299,36 @@ async def create(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> Completion:
         """
-        Create completion using LLM model
+        Text generation
 
         Args:
+          model: The identifier of the model to be used for processing the request.
+
+          prompt: The input text that the model will process to generate a response.
+
+          best_of: Specifies the number of completions to generate and return the best one. Useful
+              for generating multiple outputs and choosing the best based on some criteria.
+
+          max_tokens: The maximum number of tokens that the model can generate in the response.
+
+          random_seed: A seed used to initialize the random number generator for the model, ensuring
+              reproducibility of the output when the same inputs are provided.
+
+          stop: Specifies stopping conditions for the model's output generation. This can be an
+              array of strings or a single string that the model will look for as a signal to
+              stop generating further tokens.
+
+          stream: Determines whether the model's output should be streamed. If true, the output is
+              generated and sent incrementally, which can be useful for real-time
+              applications.
+
+          temperature: Controls the randomness of the model's outputs. Higher values lead to more
+              random outputs, while lower values make the model more deterministic.
+
+          top_p: Used to control the nucleus sampling, where only the most probable tokens with a
+              cumulative probability of top_p are considered for sampling, providing a way to
+              fine-tune the randomness of predictions.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -252,9 +360,36 @@ async def create(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> AsyncStream[StreamingData]:
         """
-        Create completion using LLM model
+        Text generation
 
         Args:
+          model: The identifier of the model to be used for processing the request.
+
+          prompt: The input text that the model will process to generate a response.
+
+          stream: Determines whether the model's output should be streamed. If true, the output is
+              generated and sent incrementally, which can be useful for real-time
+              applications.
+
+          best_of: Specifies the number of completions to generate and return the best one. Useful
+              for generating multiple outputs and choosing the best based on some criteria.
+
+          max_tokens: The maximum number of tokens that the model can generate in the response.
+
+          random_seed: A seed used to initialize the random number generator for the model, ensuring
+              reproducibility of the output when the same inputs are provided.
+
+          stop: Specifies stopping conditions for the model's output generation. This can be an
+              array of strings or a single string that the model will look for as a signal to
+              stop generating further tokens.
+
+          temperature: Controls the randomness of the model's outputs. Higher values lead to more
+              random outputs, while lower values make the model more deterministic.
+
+          top_p: Used to control the nucleus sampling, where only the most probable tokens with a
+              cumulative probability of top_p are considered for sampling, providing a way to
+              fine-tune the randomness of predictions.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -286,9 +421,36 @@ async def create(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> Completion | AsyncStream[StreamingData]:
         """
-        Create completion using LLM model
+        Text generation
 
         Args:
+          model: The identifier of the model to be used for processing the request.
+
+          prompt: The input text that the model will process to generate a response.
+
+          stream: Determines whether the model's output should be streamed. If true, the output is
+              generated and sent incrementally, which can be useful for real-time
+              applications.
+
+          best_of: Specifies the number of completions to generate and return the best one. Useful
+              for generating multiple outputs and choosing the best based on some criteria.
+
+          max_tokens: The maximum number of tokens that the model can generate in the response.
+
+          random_seed: A seed used to initialize the random number generator for the model, ensuring
+              reproducibility of the output when the same inputs are provided.
+
+          stop: Specifies stopping conditions for the model's output generation. This can be an
+              array of strings or a single string that the model will look for as a signal to
+              stop generating further tokens.
+
+          temperature: Controls the randomness of the model's outputs. Higher values lead to more
+              random outputs, while lower values make the model more deterministic.
+
+          top_p: Used to control the nucleus sampling, where only the most probable tokens with a
+              cumulative probability of top_p are considered for sampling, providing a way to
+              fine-tune the randomness of predictions.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
diff --git a/src/writerai/resources/models.py b/src/writerai/resources/models.py
index 906b3f6..bcf0d35 100644
--- a/src/writerai/resources/models.py
+++ b/src/writerai/resources/models.py
@@ -40,7 +40,7 @@ def list(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ModelListResponse:
-        """List the available models"""
+        """List models"""
         return self._get(
             "/v1/models",
             options=make_request_options(
@@ -69,7 +69,7 @@ async def list(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ModelListResponse:
-        """List the available models"""
+        """List models"""
         return await self._get(
             "/v1/models",
             options=make_request_options(
diff --git a/src/writerai/types/chat.py b/src/writerai/types/chat.py
index a1782ea..3ff233f 100644
--- a/src/writerai/types/chat.py
+++ b/src/writerai/types/chat.py
@@ -10,21 +10,52 @@
 
 class ChoiceMessage(BaseModel):
     content: str
+    """The text content produced by the model.
+
+    This field contains the actual output generated, reflecting the model's response
+    to the input query or command.
+    """
 
     role: Literal["user", "assistant", "system"]
+    """
+    Specifies the role associated with the content, indicating whether the message
+    is from the 'assistant' or another defined role, helping to contextualize the
+    output within the interaction flow.
+    """
 
 
 class Choice(BaseModel):
     finish_reason: Literal["stop", "length", "content_filter"]
+    """Describes the condition under which the model ceased generating content.
+
+    Common reasons include 'length' (reached the maximum output size), 'stop'
+    (encountered a stop sequence), or 'content_filter' (harmful content filtered
+    out).
+    """
 
     message: ChoiceMessage
 
 
 class Chat(BaseModel):
     id: str
+    """A globally unique identifier (UUID) for the response generated by the API.
+
+    This ID can be used to reference the specific operation or transaction within
+    the system for tracking or debugging purposes.
+    """
 
     choices: List[Choice]
+    """
+    An array of objects representing the different outcomes or results produced by
+    the model based on the input provided.
+    """
 
     created: int
+    """The Unix timestamp (in seconds) when the response was created.
+
+    This timestamp can be used to verify the timing of the response relative to
+    other events or operations.
+    """
 
     model: str
+    """Identifies the specific model used to generate the response."""
diff --git a/src/writerai/types/chat_chat_params.py b/src/writerai/types/chat_chat_params.py
index 986b58d..f257ed8 100644
--- a/src/writerai/types/chat_chat_params.py
+++ b/src/writerai/types/chat_chat_params.py
@@ -10,18 +10,52 @@
 
 class ChatChatParamsBase(TypedDict, total=False):
     messages: Required[Iterable[Message]]
+    """
+    An array of message objects that form the conversation history or context for
+    the model to respond to. The array must contain at least one message.
+    """
 
     model: Required[str]
+    """Specifies the model to be used for generating responses.
+
+    The chat model is always `palmyra-x-002-32k` for conversational use.
+    """
 
     max_tokens: int
+    """
+    Defines the maximum number of tokens (words and characters) that the model can
+    generate in the response. The default value is set to 16, but it can be adjusted
+    to allow for longer or shorter responses as needed.
+    """
 
     n: int
+    """
+    Specifies the number of completions (responses) to generate from the model in a
+    single request. This parameter allows multiple responses to be generated,
+    offering a variety of potential replies from which to choose.
+    """
 
     stop: Union[List[str], str]
+    """
+    A token or sequence of tokens that, when generated, will cause the model to stop
+    producing further content. This can be a single token or an array of tokens,
+    acting as a signal to end the output.
+    """
 
     temperature: float
+    """Controls the randomness or creativity of the model's responses.
+
+    A higher temperature results in more varied and less predictable text, while a
+    lower temperature produces more deterministic and conservative outputs.
+    """
 
     top_p: float
+    """
+    Sets the threshold for "nucleus sampling," a technique to focus the model's
+    token generation on the most likely subset of tokens. Only tokens with
+    cumulative probability above this threshold are considered, controlling the
+    trade-off between creativity and coherence.
+    """
 
 
 class Message(TypedDict, total=False):
@@ -34,10 +68,20 @@ class Message(TypedDict, total=False):
 
 class ChatChatParamsNonStreaming(ChatChatParamsBase):
     stream: Literal[False]
+    """
+    Indicates whether the response should be streamed incrementally as it is
+    generated or only returned once fully complete. Streaming can be useful for
+    providing real-time feedback in interactive applications.
+    """
 
 
 class ChatChatParamsStreaming(ChatChatParamsBase):
     stream: Required[Literal[True]]
+    """
+    Indicates whether the response should be streamed incrementally as it is
+    generated or only returned once fully complete. Streaming can be useful for
+    providing real-time feedback in interactive applications.
+    """
 
 
 ChatChatParams = Union[ChatChatParamsNonStreaming, ChatChatParamsStreaming]
diff --git a/src/writerai/types/completion.py b/src/writerai/types/completion.py
index 20adc28..3631622 100644
--- a/src/writerai/types/completion.py
+++ b/src/writerai/types/completion.py
@@ -9,25 +9,51 @@
 
 class ChoiceLogProbsTopLogProb(BaseModel):
     additional_properties: Optional[float] = None
+    """For any additional_properties properties in the top_log_probs object"""
 
 
 class ChoiceLogProbs(BaseModel):
     text_offset: Optional[List[int]] = None
+    """
+    Positional indices of each token within the original input text, useful for
+    analysis and mapping.
+    """
 
     token_log_probs: Optional[List[float]] = None
+    """
+    Log probabilities for each token, indicating the likelihood of each token's
+    occurrence.
+    """
 
     tokens: Optional[List[str]] = None
+    """An array of tokens that comprise the generated text."""
 
     top_log_probs: Optional[List[ChoiceLogProbsTopLogProb]] = None
+    """
+    An array of mappings for each token to its top log probabilities, showing
+    detailed prediction probabilities.
+    """
 
 
 class Choice(BaseModel):
     text: str
+    """
+    The generated text output from the model, which forms the main content of the
+    response.
+    """
 
     log_probs: Optional[ChoiceLogProbs] = None
 
 
 class Completion(BaseModel):
     choices: List[Choice]
+    """
+    A list of choices generated by the model, each containing the text of the
+    completion and associated metadata such as log probabilities.
+    """
 
     model: Optional[str] = None
+    """
+    The identifier of the model that was used to generate the responses in the
+    'choices' array.
+    """
diff --git a/src/writerai/types/completion_create_params.py b/src/writerai/types/completion_create_params.py
index 60c8504..6436f60 100644
--- a/src/writerai/types/completion_create_params.py
+++ b/src/writerai/types/completion_create_params.py
@@ -10,28 +10,65 @@
 
 class CompletionCreateParamsBase(TypedDict, total=False):
     model: Required[str]
+    """The identifier of the model to be used for processing the request."""
 
     prompt: Required[str]
+    """The input text that the model will process to generate a response."""
 
     best_of: int
+    """Specifies the number of completions to generate and return the best one.
+
+    Useful for generating multiple outputs and choosing the best based on some
+    criteria.
+    """
 
     max_tokens: int
+    """The maximum number of tokens that the model can generate in the response."""
 
     random_seed: int
+    """
+    A seed used to initialize the random number generator for the model, ensuring
+    reproducibility of the output when the same inputs are provided.
+    """
 
     stop: Union[List[str], str]
+    """Specifies stopping conditions for the model's output generation.
+
+    This can be an array of strings or a single string that the model will look for
+    as a signal to stop generating further tokens.
+    """
 
     temperature: float
+    """Controls the randomness of the model's outputs.
+
+    Higher values lead to more random outputs, while lower values make the model
+    more deterministic.
+    """
 
     top_p: float
+    """
+    Used to control the nucleus sampling, where only the most probable tokens with a
+    cumulative probability of top_p are considered for sampling, providing a way to
+    fine-tune the randomness of predictions.
+    """
 
 
 class CompletionCreateParamsNonStreaming(CompletionCreateParamsBase):
     stream: Literal[False]
+    """Determines whether the model's output should be streamed.
+
+    If true, the output is generated and sent incrementally, which can be useful for
+    real-time applications.
+    """
 
 
 class CompletionCreateParamsStreaming(CompletionCreateParamsBase):
     stream: Required[Literal[True]]
+    """Determines whether the model's output should be streamed.
+
+    If true, the output is generated and sent incrementally, which can be useful for
+    real-time applications.
+    """
 
 
 CompletionCreateParams = Union[CompletionCreateParamsNonStreaming, CompletionCreateParamsStreaming]
diff --git a/src/writerai/types/model_list_response.py b/src/writerai/types/model_list_response.py
index 5ff56b6..db27d54 100644
--- a/src/writerai/types/model_list_response.py
+++ b/src/writerai/types/model_list_response.py
@@ -9,9 +9,12 @@
 
 class Model(BaseModel):
     id: str
+    """The ID of the particular LLM that you want to use"""
 
     name: str
+    """The name of the particular LLM that you want to use"""
 
 
 class ModelListResponse(BaseModel):
     models: List[Model]
+    """The identifier of the model to be used for processing the request."""
diff --git a/tests/api_resources/test_chat.py b/tests/api_resources/test_chat.py
index e6d7424..deaa8fc 100644
--- a/tests/api_resources/test_chat.py
+++ b/tests/api_resources/test_chat.py
@@ -22,11 +22,11 @@ def test_method_chat_overload_1(self, client: Writer) -> None:
         chat = client.chat.chat(
             messages=[
                 {
-                    "content": "Hello!",
+                    "content": "string",
                     "role": "user",
                 }
             ],
-            model="palmyra-x-chat-v2-32k",
+            model="palmyra-x-002-32k",
         )
         assert_matches_type(Chat, chat, path=["response"])
 
@@ -35,12 +35,12 @@ def test_method_chat_with_all_params_overload_1(self, client: Writer) -> None:
         chat = client.chat.chat(
             messages=[
                 {
-                    "content": "Hello!",
+                    "content": "string",
                     "role": "user",
                     "name": "string",
                 }
             ],
-            model="palmyra-x-chat-v2-32k",
+            model="palmyra-x-002-32k",
             max_tokens=0,
             n=0,
             stop=["string", "string", "string"],
@@ -55,11 +55,11 @@ def test_raw_response_chat_overload_1(self, client: Writer) -> None:
         response = client.chat.with_raw_response.chat(
             messages=[
                 {
-                    "content": "Hello!",
+                    "content": "string",
                     "role": "user",
                 }
             ],
-            model="palmyra-x-chat-v2-32k",
+            model="palmyra-x-002-32k",
         )
 
         assert response.is_closed is True
@@ -72,11 +72,11 @@ def test_streaming_response_chat_overload_1(self, client: Writer) -> None:
         with client.chat.with_streaming_response.chat(
             messages=[
                 {
-                    "content": "Hello!",
+                    "content": "string",
                     "role": "user",
                 }
             ],
-            model="palmyra-x-chat-v2-32k",
+            model="palmyra-x-002-32k",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -91,11 +91,11 @@ def test_method_chat_overload_2(self, client: Writer) -> None:
         chat_stream = client.chat.chat(
             messages=[
                 {
-                    "content": "Hello!",
+                    "content": "string",
                     "role": "user",
                 }
             ],
-            model="palmyra-x-chat-v2-32k",
+            model="palmyra-x-002-32k",
             stream=True,
         )
         chat_stream.response.close()
@@ -105,12 +105,12 @@ def test_method_chat_with_all_params_overload_2(self, client: Writer) -> None:
         chat_stream = client.chat.chat(
             messages=[
                 {
-                    "content": "Hello!",
+                    "content": "string",
                     "role": "user",
                     "name": "string",
                 }
             ],
-            model="palmyra-x-chat-v2-32k",
+            model="palmyra-x-002-32k",
             stream=True,
             max_tokens=0,
             n=0,
@@ -125,11 +125,11 @@ def test_raw_response_chat_overload_2(self, client: Writer) -> None:
         response = client.chat.with_raw_response.chat(
             messages=[
                 {
-                    "content": "Hello!",
+                    "content": "string",
                     "role": "user",
                 }
             ],
-            model="palmyra-x-chat-v2-32k",
+            model="palmyra-x-002-32k",
             stream=True,
         )
 
@@ -142,11 +142,11 @@ def test_streaming_response_chat_overload_2(self, client: Writer) -> None:
         with client.chat.with_streaming_response.chat(
             messages=[
                 {
-                    "content": "Hello!",
+                    "content": "string",
                     "role": "user",
                 }
             ],
-            model="palmyra-x-chat-v2-32k",
+            model="palmyra-x-002-32k",
             stream=True,
         ) as response:
             assert not response.is_closed
@@ -166,11 +166,11 @@ async def test_method_chat_overload_1(self, async_client: AsyncWriter) -> None:
         chat = await async_client.chat.chat(
             messages=[
                 {
-                    "content": "Hello!",
+                    "content": "string",
                     "role": "user",
                 }
             ],
-            model="palmyra-x-chat-v2-32k",
+            model="palmyra-x-002-32k",
         )
         assert_matches_type(Chat, chat, path=["response"])
 
@@ -179,12 +179,12 @@ async def test_method_chat_with_all_params_overload_1(self, async_client: AsyncW
         chat = await async_client.chat.chat(
             messages=[
                 {
-                    "content": "Hello!",
+                    "content": "string",
                     "role": "user",
                     "name": "string",
                 }
             ],
-            model="palmyra-x-chat-v2-32k",
+            model="palmyra-x-002-32k",
             max_tokens=0,
             n=0,
             stop=["string", "string", "string"],
@@ -199,11 +199,11 @@ async def test_raw_response_chat_overload_1(self, async_client: AsyncWriter) ->
         response = await async_client.chat.with_raw_response.chat(
             messages=[
                 {
-                    "content": "Hello!",
+                    "content": "string",
                     "role": "user",
                 }
             ],
-            model="palmyra-x-chat-v2-32k",
+            model="palmyra-x-002-32k",
         )
 
         assert response.is_closed is True
@@ -216,11 +216,11 @@ async def test_streaming_response_chat_overload_1(self, async_client: AsyncWrite
         async with async_client.chat.with_streaming_response.chat(
             messages=[
                 {
-                    "content": "Hello!",
+                    "content": "string",
                     "role": "user",
                 }
             ],
-            model="palmyra-x-chat-v2-32k",
+            model="palmyra-x-002-32k",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -235,11 +235,11 @@ async def test_method_chat_overload_2(self, async_client: AsyncWriter) -> None:
         chat_stream = await async_client.chat.chat(
             messages=[
                 {
-                    "content": "Hello!",
+                    "content": "string",
                     "role": "user",
                 }
             ],
-            model="palmyra-x-chat-v2-32k",
+            model="palmyra-x-002-32k",
             stream=True,
         )
         await chat_stream.response.aclose()
@@ -249,12 +249,12 @@ async def test_method_chat_with_all_params_overload_2(self, async_client: AsyncW
         chat_stream = await async_client.chat.chat(
             messages=[
                 {
-                    "content": "Hello!",
+                    "content": "string",
                     "role": "user",
                     "name": "string",
                 }
             ],
-            model="palmyra-x-chat-v2-32k",
+            model="palmyra-x-002-32k",
             stream=True,
             max_tokens=0,
             n=0,
@@ -269,11 +269,11 @@ async def test_raw_response_chat_overload_2(self, async_client: AsyncWriter) ->
         response = await async_client.chat.with_raw_response.chat(
             messages=[
                 {
-                    "content": "Hello!",
+                    "content": "string",
                     "role": "user",
                 }
             ],
-            model="palmyra-x-chat-v2-32k",
+            model="palmyra-x-002-32k",
             stream=True,
         )
 
@@ -286,11 +286,11 @@ async def test_streaming_response_chat_overload_2(self, async_client: AsyncWrite
         async with async_client.chat.with_streaming_response.chat(
             messages=[
                 {
-                    "content": "Hello!",
+                    "content": "string",
                     "role": "user",
                 }
             ],
-            model="palmyra-x-chat-v2-32k",
+            model="palmyra-x-002-32k",
             stream=True,
         ) as response:
             assert not response.is_closed
diff --git a/tests/api_resources/test_completions.py b/tests/api_resources/test_completions.py
index d3b3a48..4db055f 100644
--- a/tests/api_resources/test_completions.py
+++ b/tests/api_resources/test_completions.py
@@ -20,31 +20,31 @@ class TestCompletions:
     @parametrize
     def test_method_create_overload_1(self, client: Writer) -> None:
         completion = client.completions.create(
-            model="string",
-            prompt="string",
+            model="palmyra-x-002-instruct",
+            prompt="Write me an SEO article about...",
         )
         assert_matches_type(Completion, completion, path=["response"])
 
     @parametrize
     def test_method_create_with_all_params_overload_1(self, client: Writer) -> None:
         completion = client.completions.create(
-            model="string",
-            prompt="string",
-            best_of=0,
-            max_tokens=0,
-            random_seed=0,
-            stop=["string", "string", "string"],
+            model="palmyra-x-002-instruct",
+            prompt="Write me an SEO article about...",
+            best_of=1,
+            max_tokens=150,
+            random_seed=42,
+            stop=["."],
             stream=False,
-            temperature=0,
-            top_p=0,
+            temperature=0.7,
+            top_p=0.9,
         )
         assert_matches_type(Completion, completion, path=["response"])
 
     @parametrize
     def test_raw_response_create_overload_1(self, client: Writer) -> None:
         response = client.completions.with_raw_response.create(
-            model="string",
-            prompt="string",
+            model="palmyra-x-002-instruct",
+            prompt="Write me an SEO article about...",
         )
 
         assert response.is_closed is True
@@ -55,8 +55,8 @@ def test_raw_response_create_overload_1(self, client: Writer) -> None:
     @parametrize
     def test_streaming_response_create_overload_1(self, client: Writer) -> None:
         with client.completions.with_streaming_response.create(
-            model="string",
-            prompt="string",
+            model="palmyra-x-002-instruct",
+            prompt="Write me an SEO article about...",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -69,8 +69,8 @@ def test_streaming_response_create_overload_1(self, client: Writer) -> None:
     @parametrize
     def test_method_create_overload_2(self, client: Writer) -> None:
         completion_stream = client.completions.create(
-            model="string",
-            prompt="string",
+            model="palmyra-x-002-instruct",
+            prompt="Write me an SEO article about...",
             stream=True,
         )
         completion_stream.response.close()
@@ -78,23 +78,23 @@ def test_method_create_overload_2(self, client: Writer) -> None:
     @parametrize
     def test_method_create_with_all_params_overload_2(self, client: Writer) -> None:
         completion_stream = client.completions.create(
-            model="string",
-            prompt="string",
+            model="palmyra-x-002-instruct",
+            prompt="Write me an SEO article about...",
             stream=True,
-            best_of=0,
-            max_tokens=0,
-            random_seed=0,
-            stop=["string", "string", "string"],
-            temperature=0,
-            top_p=0,
+            best_of=1,
+            max_tokens=150,
+            random_seed=42,
+            stop=["."],
+            temperature=0.7,
+            top_p=0.9,
         )
         completion_stream.response.close()
 
     @parametrize
     def test_raw_response_create_overload_2(self, client: Writer) -> None:
         response = client.completions.with_raw_response.create(
-            model="string",
-            prompt="string",
+            model="palmyra-x-002-instruct",
+            prompt="Write me an SEO article about...",
             stream=True,
         )
 
@@ -105,8 +105,8 @@ def test_raw_response_create_overload_2(self, client: Writer) -> None:
     @parametrize
     def test_streaming_response_create_overload_2(self, client: Writer) -> None:
         with client.completions.with_streaming_response.create(
-            model="string",
-            prompt="string",
+            model="palmyra-x-002-instruct",
+            prompt="Write me an SEO article about...",
             stream=True,
         ) as response:
             assert not response.is_closed
@@ -124,31 +124,31 @@ class TestAsyncCompletions:
     @parametrize
     async def test_method_create_overload_1(self, async_client: AsyncWriter) -> None:
         completion = await async_client.completions.create(
-            model="string",
-            prompt="string",
+            model="palmyra-x-002-instruct",
+            prompt="Write me an SEO article about...",
         )
         assert_matches_type(Completion, completion, path=["response"])
 
     @parametrize
     async def test_method_create_with_all_params_overload_1(self, async_client: AsyncWriter) -> None:
         completion = await async_client.completions.create(
-            model="string",
-            prompt="string",
-            best_of=0,
-            max_tokens=0,
-            random_seed=0,
-            stop=["string", "string", "string"],
+            model="palmyra-x-002-instruct",
+            prompt="Write me an SEO article about...",
+            best_of=1,
+            max_tokens=150,
+            random_seed=42,
+            stop=["."],
             stream=False,
-            temperature=0,
-            top_p=0,
+            temperature=0.7,
+            top_p=0.9,
         )
         assert_matches_type(Completion, completion, path=["response"])
 
     @parametrize
     async def test_raw_response_create_overload_1(self, async_client: AsyncWriter) -> None:
         response = await async_client.completions.with_raw_response.create(
-            model="string",
-            prompt="string",
+            model="palmyra-x-002-instruct",
+            prompt="Write me an SEO article about...",
         )
 
         assert response.is_closed is True
@@ -159,8 +159,8 @@ async def test_raw_response_create_overload_1(self, async_client: AsyncWriter) -
     @parametrize
     async def test_streaming_response_create_overload_1(self, async_client: AsyncWriter) -> None:
         async with async_client.completions.with_streaming_response.create(
-            model="string",
-            prompt="string",
+            model="palmyra-x-002-instruct",
+            prompt="Write me an SEO article about...",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -173,8 +173,8 @@ async def test_streaming_response_create_overload_1(self, async_client: AsyncWri
     @parametrize
     async def test_method_create_overload_2(self, async_client: AsyncWriter) -> None:
         completion_stream = await async_client.completions.create(
-            model="string",
-            prompt="string",
+            model="palmyra-x-002-instruct",
+            prompt="Write me an SEO article about...",
             stream=True,
         )
         await completion_stream.response.aclose()
@@ -182,23 +182,23 @@ async def test_method_create_overload_2(self, async_client: AsyncWriter) -> None
     @parametrize
     async def test_method_create_with_all_params_overload_2(self, async_client: AsyncWriter) -> None:
         completion_stream = await async_client.completions.create(
-            model="string",
-            prompt="string",
+            model="palmyra-x-002-instruct",
+            prompt="Write me an SEO article about...",
             stream=True,
-            best_of=0,
-            max_tokens=0,
-            random_seed=0,
-            stop=["string", "string", "string"],
-            temperature=0,
-            top_p=0,
+            best_of=1,
+            max_tokens=150,
+            random_seed=42,
+            stop=["."],
+            temperature=0.7,
+            top_p=0.9,
         )
         await completion_stream.response.aclose()
 
     @parametrize
     async def test_raw_response_create_overload_2(self, async_client: AsyncWriter) -> None:
         response = await async_client.completions.with_raw_response.create(
-            model="string",
-            prompt="string",
+            model="palmyra-x-002-instruct",
+            prompt="Write me an SEO article about...",
             stream=True,
         )
 
@@ -209,8 +209,8 @@ async def test_raw_response_create_overload_2(self, async_client: AsyncWriter) -
     @parametrize
     async def test_streaming_response_create_overload_2(self, async_client: AsyncWriter) -> None:
         async with async_client.completions.with_streaming_response.create(
-            model="string",
-            prompt="string",
+            model="palmyra-x-002-instruct",
+            prompt="Write me an SEO article about...",
             stream=True,
         ) as response:
             assert not response.is_closed
diff --git a/tests/test_client.py b/tests/test_client.py
index 829fd50..4d863c8 100644
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -724,7 +724,7 @@ def test_retrying_timeout_errors_doesnt_leak(self, respx_mock: MockRouter) -> No
                     dict(
                         messages=[
                             {
-                                "content": "Hello!",
+                                "content": "string",
                                 "role": "user",
                             }
                         ],
@@ -750,7 +750,7 @@ def test_retrying_status_errors_doesnt_leak(self, respx_mock: MockRouter) -> Non
                     dict(
                         messages=[
                             {
-                                "content": "Hello!",
+                                "content": "string",
                                 "role": "user",
                             }
                         ],
@@ -1455,7 +1455,7 @@ async def test_retrying_timeout_errors_doesnt_leak(self, respx_mock: MockRouter)
                     dict(
                         messages=[
                             {
-                                "content": "Hello!",
+                                "content": "string",
                                 "role": "user",
                             }
                         ],
@@ -1481,7 +1481,7 @@ async def test_retrying_status_errors_doesnt_leak(self, respx_mock: MockRouter)
                     dict(
                         messages=[
                             {
-                                "content": "Hello!",
+                                "content": "string",
                                 "role": "user",
                             }
                         ],