diff --git a/notebooks/en/automatic_embedding_tei_inference_endpoints.ipynb b/notebooks/en/automatic_embedding_tei_inference_endpoints.ipynb index 6ec9a2c..f70f190 100644 --- a/notebooks/en/automatic_embedding_tei_inference_endpoints.ipynb +++ b/notebooks/en/automatic_embedding_tei_inference_endpoints.ipynb @@ -113,18 +113,36 @@ "id": "1e680f3d-4900-46cc-8b49-bb6ba3e27e2b", "metadata": {}, "source": [ - "Hugging Face offers a number of GPUs that you can choose from a number of GPUs that you can choose in Inference Endpoints. Here they are in table form:\n", + "Inference Endpoints offers a number of GPUs that you can choose from. Check the [documentation](https://huggingface.co/docs/inference-endpoints/en/pricing#gpu-instances) for GPU and alternative accelerators for information.\n", "\n", - "| GPU | instanceType | instanceSize | vRAM |\n", - "|---------------------|----------------|--------------|-------|\n", - "| 1x Nvidia Tesla T4 | g4dn.xlarge | small | 16GB |\n", - "| 4x Nvidia Tesla T4 | g4dn.12xlarge | large | 64GB |\n", - "| 1x Nvidia A10G | g5.2xlarge | medium | 24GB |\n", - "| 4x Nvidia A10G | g5.12xlarge | xxlarge | 96GB |\n", - "| 1x Nvidia A100* | p4de | xlarge | 80GB |\n", - "| 2x Nvidia A100* | p4de | 2xlarge | 160GB |\n", + "> [!TIP]\n", + "> You may need to email us for access to some architectures.\n", "\n", - "\\*Note that for A100s you might get a note to email us to get access." + "| Provider | Instance Type | Instance Size | Hourly rate | GPUs | Memory | Architecture |\n", + "|:--------:|:-------------:|:-------------:|:-----------:|:----:|:------:|:---------------:|\n", + "| aws | nvidia-a10g | x1 | $1 | 1 | 24GB | NVIDIA A10G |\n", + "| aws | nvidia-t4 | x1 | $0.5 | 1 | 14GB | NVIDIA T4 |\n", + "| aws | nvidia-t4 | x4 | $3 | 4 | 56GB | NVIDIA T4 |\n", + "| gcp | nvidia-l4 | x1 | $0.8 | 1 | 24GB | NVIDIA L4 |\n", + "| gcp | nvidia-l4 | x4 | $3.8 | 4 | 96GB | NVIDIA L4 |\n", + "| aws | nvidia-a100 | x1 | $4 | 1 | 80GB | NVIDIA A100 |\n", + "| aws | nvidia-a10g | x4 | $5 | 4 | 96GB | NVIDIA A10G |\n", + "| aws | nvidia-a100 | x2 | $8 | 2 | 160GB | NVIDIA A100 |\n", + "| aws | nvidia-a100 | x4 | $16 | 4 | 320GB | NVIDIA A100 |\n", + "| aws | nvidia-a100 | x8 | $32 | 8 | 640GB | NVIDIA A100 |\n", + "| gcp | nvidia-t4 | x1 | $0.5 | 1 | 16GB | NVIDIA T4 |\n", + "| gcp | nvidia-l4 | x1 | $1 | 1 | 24GB | NVIDIA L4 |\n", + "| gcp | nvidia-l4 | x4 | $5 | 4 | 96GB | NVIDIA L4 |\n", + "| gcp | nvidia-a100 | x1 | $6 | 1 | 80 GB | NVIDIA A100 |\n", + "| gcp | nvidia-a100 | x2 | $12 | 2 | 160 GB | NVIDIA A100 |\n", + "| gcp | nvidia-a100 | x4 | $24 | 4 | 320 GB | NVIDIA A100 |\n", + "| gcp | nvidia-a100 | x8 | $48 | 8 | 640 GB | NVIDIA A100 |\n", + "| gcp | nvidia-h100 | x1 | $12.5 | 1 | 80 GB | NVIDIA H100 |\n", + "| gcp | nvidia-h100 | x2 | $25 | 2 | 160 GB | NVIDIA H100 |\n", + "| gcp | nvidia-h100 | x4 | $50 | 4 | 320 GB | NVIDIA H100 |\n", + "| gcp | nvidia-h100 | x8 | $100 | 8 | 640 GB | NVIDIA H100 |\n", + "| aws | inf2 | x1 | $0.75 | 1 | 32GB | AWS Inferentia2 |\n", + "| aws | inf2 | x12 | $12 | 12 | 384GB | AWS Inferentia2 |" ] }, { @@ -139,8 +157,8 @@ "# GPU Choice\n", "VENDOR=\"aws\"\n", "REGION=\"us-east-1\"\n", - "INSTANCE_SIZE=\"medium\"\n", - "INSTANCE_TYPE=\"g5.2xlarge\"" + "INSTANCE_SIZE=\"x1\"\n", + "INSTANCE_TYPE=\"nvidia-a10g\"" ] }, {