configs/sagemaker.yml

# general configuration applicable to the entire app
general:
  name: sagemaker
#Instance follows the format below.
#   instance_id: {instance_id_here} if Instance_id, then you need to bring your own private key
#   private_key_fname: key_pair/fmbench_orchestrator_1-us-east-1
#                   OR
# - instance_type: {instance_name_here} 
#   region: {region_here}
#   ami_id: {ami_id_here}
#   device_name: /dev/sda1
#   ebs_del_on_termination: True | False
#   ebs_Iops: 16000
#   ebs_VolumeSize: {Volume_Size_Here}
#   ebs_VolumeType: {Volume_type_Here}
#   #Defaults to none, You can use either Reservation Id ARN or both  (The below 3 fields are optional)
#   CapacityReservationPreference: open | none 
#   CapacityReservationId: {The ID of the Capacity Reservation in which to run the instance.}
#   CapacityReservationResourceGroupArn: {The ARN of the Capacity Reservation resource group in which to run the instance.}

### REQUIRED WITH ABOVE:
#   startup_script: startup_scripts/gpu_ubuntu_startup.txt
#   post_startup_script: post_startup_scripts/fmbench.txt
#   fmbench_config: https://raw.githubusercontent.com/dheerajoruganty/multi-deploy-ec2/refs/heads/main/configs/config-ec2-llama3-8b.yml

### OPTIONAL:
#   fmbench_llm_tokenizer_fpath: fmbench_llm_utils/tokenizer.json
#   fmbench_llm_config_fpath: fmbench_llm_utils/config.json
#   fmbench_tokenizer_remote_dir: /tmp/fmbench-read/llama3_tokenizer/
#   # Timeout period in Seconds before a run is stopped
#   fmbench_complete_timeout: 1200

## US-EAST-1 Mapping:
# Neuron AMI : ami-05d498302130f9036
# DeepLearning AMI AL2 : ami-07f302d2a74e2b584
# Al2 AMI, CPU bench : ami-0e54eba7c51c234f6

# Take the below as list of dict as there might be 2 instances with the same AMI

defaults: &ec2_settings
  region: {{region}}
  ami_id: {{gpu}}
  device_name: /dev/sda1
  ebs_del_on_termination: True
  ebs_Iops: 16000
  ebs_VolumeSize: 250
  ebs_VolumeType: gp3
  startup_script: startup_scripts/ubuntu_startup.txt
  post_startup_script: post_startup_scripts/fmbench.txt
  post_startup_script_params:
    write_bucket: {{write_bucket}}
  # Timeout period in Seconds before a run is stopped
  fmbench_complete_timeout: 10000

instances:
- instance_type: m7a.xlarge
  <<: *ec2_settings
  fmbench_config: 
  - {{config_file}}