Skip to content

Commit

Permalink
feat: Add support for spot instance in Lingjun environment
Browse files Browse the repository at this point in the history
  • Loading branch information
luoyy82 committed Jul 17, 2024
1 parent 067f0f5 commit 3f582a2
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 1 deletion.
2 changes: 1 addition & 1 deletion pai/api/training_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ def create(
compute_resource = CreateTrainingJobRequestComputeResource(
ecs_count=instance_count,
ecs_spec=instance_type,
use_spot_instance=bool(spot_spec),
# use_spot_instance=bool(spot_spec),
spot_spec=spot_spec,
)
elif instance_spec:
Expand Down
10 changes: 10 additions & 0 deletions pai/job/_training_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -779,6 +779,16 @@ def _submit(
spot_spec["SpotDiscountLimit"] = self.spot_spec.spot_discount_limit
else:
spot_spec = None

# user vpc
if self.user_vpc_config:
user_vpc_config = {
"VpcId": self.user_vpc_config.vpc_id,
"SecurityGroupId": self.user_vpc_config.security_group_id,
}
else:
user_vpc_config = None

training_job_id = session.training_job_api.create(
instance_count=instance_count,
instance_spec=instance_spec.model_dump() if instance_spec else None,
Expand Down

0 comments on commit 3f582a2

Please sign in to comment.