From 09194dd5b4f3b3f1345899084fd7a91b629c01fa Mon Sep 17 00:00:00 2001 From: derek-thomas Date: Wed, 5 Jun 2024 15:23:13 +0400 Subject: [PATCH 1/4] Updating HW information --- ...ic_embedding_tei_inference_endpoints.ipynb | 20 +++++++++---------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/notebooks/en/automatic_embedding_tei_inference_endpoints.ipynb b/notebooks/en/automatic_embedding_tei_inference_endpoints.ipynb index 6ec9a2c9..19b5da8d 100644 --- a/notebooks/en/automatic_embedding_tei_inference_endpoints.ipynb +++ b/notebooks/en/automatic_embedding_tei_inference_endpoints.ipynb @@ -113,18 +113,16 @@ "id": "1e680f3d-4900-46cc-8b49-bb6ba3e27e2b", "metadata": {}, "source": [ - "Hugging Face offers a number of GPUs that you can choose from a number of GPUs that you can choose in Inference Endpoints. Here they are in table form:\n", + "Hugging Face offers a number of GPUs that you can choose from a number of GPUs that you can choose in Inference Endpoints. Check the [original documentation](https://huggingface.co/docs/inference-endpoints/en/pricing#gpu-instances) for GPU and alternative accelerators for information on\n", + "- Provider\n", + "- Instance Type\n", + "- Instance Size\n", + "- Hourly rate\n", + "- GPUs\n", + "- Memory\n", + "- Architecture\n", "\n", - "| GPU | instanceType | instanceSize | vRAM |\n", - "|---------------------|----------------|--------------|-------|\n", - "| 1x Nvidia Tesla T4 | g4dn.xlarge | small | 16GB |\n", - "| 4x Nvidia Tesla T4 | g4dn.12xlarge | large | 64GB |\n", - "| 1x Nvidia A10G | g5.2xlarge | medium | 24GB |\n", - "| 4x Nvidia A10G | g5.12xlarge | xxlarge | 96GB |\n", - "| 1x Nvidia A100* | p4de | xlarge | 80GB |\n", - "| 2x Nvidia A100* | p4de | 2xlarge | 160GB |\n", - "\n", - "\\*Note that for A100s you might get a note to email us to get access." + "\\*Note that for some architectures you might get a note to email us to get access." ] }, { From 1153f082f07581af163076d21c295402f4faa999 Mon Sep 17 00:00:00 2001 From: derek-thomas Date: Wed, 5 Jun 2024 15:26:47 +0400 Subject: [PATCH 2/4] Updating HW info in code --- .../en/automatic_embedding_tei_inference_endpoints.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/notebooks/en/automatic_embedding_tei_inference_endpoints.ipynb b/notebooks/en/automatic_embedding_tei_inference_endpoints.ipynb index 19b5da8d..c7c258e9 100644 --- a/notebooks/en/automatic_embedding_tei_inference_endpoints.ipynb +++ b/notebooks/en/automatic_embedding_tei_inference_endpoints.ipynb @@ -137,8 +137,8 @@ "# GPU Choice\n", "VENDOR=\"aws\"\n", "REGION=\"us-east-1\"\n", - "INSTANCE_SIZE=\"medium\"\n", - "INSTANCE_TYPE=\"g5.2xlarge\"" + "INSTANCE_SIZE=\"x1\"\n", + "INSTANCE_TYPE=\"nvidia-a10g\"" ] }, { From 665c196bc15feb4f5d3eac3755dd2230b54cd0b5 Mon Sep 17 00:00:00 2001 From: derek-thomas Date: Mon, 10 Jun 2024 21:20:52 +0400 Subject: [PATCH 3/4] Adding the current GPUs and accelerators --- ...ic_embedding_tei_inference_endpoints.ipynb | 37 ++++++++++++++----- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/notebooks/en/automatic_embedding_tei_inference_endpoints.ipynb b/notebooks/en/automatic_embedding_tei_inference_endpoints.ipynb index c7c258e9..23a287b5 100644 --- a/notebooks/en/automatic_embedding_tei_inference_endpoints.ipynb +++ b/notebooks/en/automatic_embedding_tei_inference_endpoints.ipynb @@ -113,16 +113,35 @@ "id": "1e680f3d-4900-46cc-8b49-bb6ba3e27e2b", "metadata": {}, "source": [ - "Hugging Face offers a number of GPUs that you can choose from a number of GPUs that you can choose in Inference Endpoints. Check the [original documentation](https://huggingface.co/docs/inference-endpoints/en/pricing#gpu-instances) for GPU and alternative accelerators for information on\n", - "- Provider\n", - "- Instance Type\n", - "- Instance Size\n", - "- Hourly rate\n", - "- GPUs\n", - "- Memory\n", - "- Architecture\n", + "Hugging Face offers a number of GPUs that you can choose from a number of GPUs that you can choose in Inference Endpoints. Check the [original documentation](https://huggingface.co/docs/inference-endpoints/en/pricing#gpu-instances) for GPU and alternative accelerators for information.\n", "\n", - "\\*Note that for some architectures you might get a note to email us to get access." + "\\*Note that for some architectures you might get a note to email us to get access.\n", + "\n", + "| Provider | Instance Type | Instance Size | Hourly rate | GPUs | Memory | Architecture |\n", + "|:--------:|:-------------:|:-------------:|:-----------:|:----:|:------:|:---------------:|\n", + "| aws | nvidia-a10g | x1 | $1 | 1 | 24GB | NVIDIA A10G |\n", + "| aws | nvidia-t4 | x1 | $0.5 | 1 | 14GB | NVIDIA T4 |\n", + "| aws | nvidia-t4 | x4 | $3 | 4 | 56GB | NVIDIA T4 |\n", + "| gcp | nvidia-l4 | x1 | $0.8 | 1 | 24GB | NVIDIA L4 |\n", + "| gcp | nvidia-l4 | x4 | $3.8 | 4 | 96GB | NVIDIA L4 |\n", + "| aws | nvidia-a100 | x1 | $4 | 1 | 80GB | NVIDIA A100 |\n", + "| aws | nvidia-a10g | x4 | $5 | 4 | 96GB | NVIDIA A10G |\n", + "| aws | nvidia-a100 | x2 | $8 | 2 | 160GB | NVIDIA A100 |\n", + "| aws | nvidia-a100 | x4 | $16 | 4 | 320GB | NVIDIA A100 |\n", + "| aws | nvidia-a100 | x8 | $32 | 8 | 640GB | NVIDIA A100 |\n", + "| gcp | nvidia-t4 | x1 | $0.5 | 1 | 16GB | NVIDIA T4 |\n", + "| gcp | nvidia-l4 | x1 | $1 | 1 | 24GB | NVIDIA L4 |\n", + "| gcp | nvidia-l4 | x4 | $5 | 4 | 96GB | NVIDIA L4 |\n", + "| gcp | nvidia-a100 | x1 | $6 | 1 | 80 GB | NVIDIA A100 |\n", + "| gcp | nvidia-a100 | x2 | $12 | 2 | 160 GB | NVIDIA A100 |\n", + "| gcp | nvidia-a100 | x4 | $24 | 4 | 320 GB | NVIDIA A100 |\n", + "| gcp | nvidia-a100 | x8 | $48 | 8 | 640 GB | NVIDIA A100 |\n", + "| gcp | nvidia-h100 | x1 | $12.5 | 1 | 80 GB | NVIDIA H100 |\n", + "| gcp | nvidia-h100 | x2 | $25 | 2 | 160 GB | NVIDIA H100 |\n", + "| gcp | nvidia-h100 | x4 | $50 | 4 | 320 GB | NVIDIA H100 |\n", + "| gcp | nvidia-h100 | x8 | $100 | 8 | 640 GB | NVIDIA H100 |\n", + "| aws | inf2 | x1 | $0.75 | 1 | 32GB | AWS Inferentia2 |\n", + "| aws | inf2 | x12 | $12 | 12 | 384GB | AWS Inferentia2 |" ] }, { From c4be0943102f99fb6b1cd6303248e35b78813143 Mon Sep 17 00:00:00 2001 From: derek-thomas Date: Thu, 13 Jun 2024 08:14:55 +0400 Subject: [PATCH 4/4] Adding tip syntax and simple wording --- .../en/automatic_embedding_tei_inference_endpoints.ipynb | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/notebooks/en/automatic_embedding_tei_inference_endpoints.ipynb b/notebooks/en/automatic_embedding_tei_inference_endpoints.ipynb index 23a287b5..f70f1903 100644 --- a/notebooks/en/automatic_embedding_tei_inference_endpoints.ipynb +++ b/notebooks/en/automatic_embedding_tei_inference_endpoints.ipynb @@ -113,9 +113,10 @@ "id": "1e680f3d-4900-46cc-8b49-bb6ba3e27e2b", "metadata": {}, "source": [ - "Hugging Face offers a number of GPUs that you can choose from a number of GPUs that you can choose in Inference Endpoints. Check the [original documentation](https://huggingface.co/docs/inference-endpoints/en/pricing#gpu-instances) for GPU and alternative accelerators for information.\n", + "Inference Endpoints offers a number of GPUs that you can choose from. Check the [documentation](https://huggingface.co/docs/inference-endpoints/en/pricing#gpu-instances) for GPU and alternative accelerators for information.\n", "\n", - "\\*Note that for some architectures you might get a note to email us to get access.\n", + "> [!TIP]\n", + "> You may need to email us for access to some architectures.\n", "\n", "| Provider | Instance Type | Instance Size | Hourly rate | GPUs | Memory | Architecture |\n", "|:--------:|:-------------:|:-------------:|:-----------:|:----:|:------:|:---------------:|\n",