Skip to content

Commit

Permalink
Merge pull request #6597 from ljosyula/trnInf
Browse files Browse the repository at this point in the history
Revert "Adding support for inf2 and trn1n instances"
  • Loading branch information
cPu1 committed May 10, 2023
2 parents c0bf524 + 253aad3 commit e96652d
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 63 deletions.
24 changes: 8 additions & 16 deletions integration/tests/inferentia/inferentia_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ import (
"context"
"os"
"testing"
"time"

"k8s.io/client-go/kubernetes"

Expand All @@ -30,22 +29,21 @@ var (
params *tests.Params
clusterWithNeuronPlugin string
clusterWithoutPlugin string
selectedNodeType string
)

func init() {
// Call testing.Init() prior to tests.NewParams(), as otherwise -test.* will not be recognised. See also: https://golang.org/doc/go1.13#testing
testing.Init()
params = tests.NewParams("inf")
params = tests.NewParams("inf1")
defaultCluster = params.ClusterName
noInstallCluster = params.NewClusterName("inf-no-plugin")
noInstallCluster = params.NewClusterName("inf1-no-plugin")
}

func TestInferentia(t *testing.T) {
testutils.RegisterAndRun(t)
}

const initNG = "inf-ng-0"
const initNG = "inf1-ng-0"

var _ = BeforeSuite(func() {
params.KubeconfigTemp = false
Expand All @@ -59,12 +57,6 @@ var _ = BeforeSuite(func() {
clusterWithoutPlugin = noInstallCluster
clusterWithNeuronPlugin = defaultCluster

var selectedNodeType = "inf1.xlarge"
currentDay := time.Now().Day()
if currentDay%2 == 0 {
selectedNodeType = "inf2.xlarge"
}

if !params.SkipCreate {
cmd := params.EksctlCreateCmd.WithArgs(
"cluster",
Expand All @@ -75,7 +67,7 @@ var _ = BeforeSuite(func() {
"--nodegroup-name", initNG,
"--node-labels", "ng-name="+initNG,
"--nodes", "1",
"--node-type", selectedNodeType,
"--node-type", "inf1.xlarge",
"--version", params.Version,
"--zones", "us-west-2a,us-west-2c,us-west-2d",
"--kubeconfig", params.KubeconfigPath,
Expand All @@ -90,7 +82,7 @@ var _ = BeforeSuite(func() {
"--nodegroup-name", initNG,
"--node-labels", "ng-name="+initNG,
"--nodes", "1",
"--node-type", selectedNodeType,
"--node-type", "inf1.xlarge",
"--version", params.Version,
"--zones", "us-west-2a,us-west-2c,us-west-2d",
"--kubeconfig", params.KubeconfigPath,
Expand All @@ -101,10 +93,10 @@ var _ = BeforeSuite(func() {

var _ = Describe("(Integration) Inferentia nodes", func() {
const (
newNG = "inf-ng-1"
newNG = "inf1-ng-1"
)

Context("cluster with inf nodes", func() {
Context("cluster with inf1 nodes", func() {
Context("by default", func() {
BeforeEach(func() {
cmd := params.EksctlUtilsCmd.WithArgs(
Expand Down Expand Up @@ -157,7 +149,7 @@ var _ = Describe("(Integration) Inferentia nodes", func() {
"--tags", "alpha.eksctl.io/description=eksctl integration test",
"--node-labels", "ng-name="+newNG,
"--nodes", "1",
"--node-type", selectedNodeType,
"--node-type", "inf1.xlarge",
"--version", params.Version,
)
Expect(cmd).To(RunSuccessfully())
Expand Down
23 changes: 8 additions & 15 deletions integration/tests/trainium/trainium_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,22 +37,21 @@ var (
clusterWithoutPlugin string
nodeZones string
clusterZones string
selectedNodeType string
)

func init() {
// Call testing.Init() prior to tests.NewParams(), as otherwise -test.* will not be recognised. See also: https://golang.org/doc/go1.13#testing
testing.Init()
params = tests.NewParams("trn")
params = tests.NewParams("trn1")
defaultCluster = params.ClusterName
noInstallCluster = params.NewClusterName("trn-no-plugin")
noInstallCluster = params.NewClusterName("trn1-no-plugin")
}

func TestTrainium(t *testing.T) {
testutils.RegisterAndRun(t)
}

const initNG = "trn-ng-0"
const initNG = "trn1-ng-0"

var _ = BeforeSuite(func() {
params.KubeconfigTemp = false
Expand All @@ -72,12 +71,6 @@ var _ = BeforeSuite(func() {
ec2API := ec2.NewFromConfig(cfg)
nodeZones, clusterZones = getAvailabilityZones(ctx, ec2API)

var selectedNodeType = "trn1.2xlarge"
currentDay := time.Now().Day()
if currentDay%2 == 0 {
selectedNodeType = "trn1n.32xlarge"
}

cmd := params.EksctlCreateCmd.WithArgs(
"cluster",
"--verbose", "4",
Expand All @@ -88,7 +81,7 @@ var _ = BeforeSuite(func() {
"--nodegroup-name", initNG,
"--node-labels", "ng-name="+initNG,
"--nodes", "1",
"--node-type", selectedNodeType,
"--node-type", "trn1.2xlarge",
"--node-zones", nodeZones,
"--version", params.Version,
"--kubeconfig", params.KubeconfigPath,
Expand All @@ -104,7 +97,7 @@ var _ = BeforeSuite(func() {
"--nodegroup-name", initNG,
"--node-labels", "ng-name="+initNG,
"--nodes", "1",
"--node-type", selectedNodeType,
"--node-type", "trn1.2xlarge",
"--node-zones", nodeZones,
"--version", params.Version,
"--kubeconfig", params.KubeconfigPath,
Expand All @@ -115,10 +108,10 @@ var _ = BeforeSuite(func() {

var _ = Describe("(Integration) Trainium nodes", func() {
const (
newNG = "trn-ng-1"
newNG = "trn1-ng-1"
)

Context("cluster with trn nodes", func() {
Context("cluster with trn1 nodes", func() {
Context("by default", func() {
BeforeEach(func() {
cmd := params.EksctlUtilsCmd.WithArgs(
Expand Down Expand Up @@ -172,7 +165,7 @@ var _ = Describe("(Integration) Trainium nodes", func() {
"--tags", "alpha.eksctl.io/description=eksctl integration test",
"--node-labels", "ng-name="+newNG,
"--nodes", "1",
"--node-type", selectedNodeType,
"--node-type", "trn1.2xlarge",
"--node-zones", nodeZones,
"--version", params.Version,
)
Expand Down
30 changes: 0 additions & 30 deletions pkg/apis/eksctl.io/v1alpha5/gpu_validation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,24 +53,12 @@ var _ = Describe("GPU instance support", func() {
expectUnsupportedErr: true,
instanceTypeName: "Inferentia",
}),
Entry("Bottlerocket", gpuInstanceEntry{
amiFamily: api.NodeImageFamilyBottlerocket,
gpuInstanceType: "inf2.xlarge",
expectUnsupportedErr: true,
instanceTypeName: "Inferentia",
}),
Entry("Bottlerocket", gpuInstanceEntry{
amiFamily: api.NodeImageFamilyBottlerocket,
gpuInstanceType: "trn1.2xlarge",
expectUnsupportedErr: true,
instanceTypeName: "Trainium",
}),
Entry("Bottlerocket", gpuInstanceEntry{
amiFamily: api.NodeImageFamilyBottlerocket,
gpuInstanceType: "trn1n.32xlarge",
expectUnsupportedErr: true,
instanceTypeName: "Trainium",
}),
)

DescribeTable("unmanaged nodegroups", func(e gpuInstanceEntry) {
Expand All @@ -92,18 +80,10 @@ var _ = Describe("GPU instance support", func() {
gpuInstanceType: "inf1.xlarge",
amiFamily: api.NodeImageFamilyAmazonLinux2,
}),
Entry("AL2", gpuInstanceEntry{
gpuInstanceType: "inf2.xlarge",
amiFamily: api.NodeImageFamilyAmazonLinux2,
}),
Entry("AL2", gpuInstanceEntry{
gpuInstanceType: "trn1.2xlarge",
amiFamily: api.NodeImageFamilyAmazonLinux2,
}),
Entry("AL2", gpuInstanceEntry{
gpuInstanceType: "trn1n.32xlarge",
amiFamily: api.NodeImageFamilyAmazonLinux2,
}),
Entry("AMI unset", gpuInstanceEntry{
gpuInstanceType: "g4dn.xlarge",
}),
Expand All @@ -116,21 +96,11 @@ var _ = Describe("GPU instance support", func() {
gpuInstanceType: "inf1.xlarge",
expectUnsupportedErr: true,
}),
Entry("Bottlerocket infra", gpuInstanceEntry{
amiFamily: api.NodeImageFamilyBottlerocket,
gpuInstanceType: "inf2.xlarge",
expectUnsupportedErr: true,
}),
Entry("Bottlerocket infra", gpuInstanceEntry{
amiFamily: api.NodeImageFamilyBottlerocket,
gpuInstanceType: "trn1.2xlarge",
expectUnsupportedErr: true,
}),
Entry("Bottlerocket infra", gpuInstanceEntry{
amiFamily: api.NodeImageFamilyBottlerocket,
gpuInstanceType: "trn1n.32xlarge",
expectUnsupportedErr: true,
}),
Entry("Bottlerocket nvidia", gpuInstanceEntry{
amiFamily: api.NodeImageFamilyBottlerocket,
gpuInstanceType: "g4dn.xlarge",
Expand Down
4 changes: 2 additions & 2 deletions pkg/utils/instance/instance.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,12 @@ func IsNvidiaInstanceType(instanceType string) bool {

// IsInferentiaInstanceType returns true if the instance type requires AWS Neuron
func IsInferentiaInstanceType(instanceType string) bool {
return strings.HasPrefix(instanceType, "inf1") || strings.HasPrefix(instanceType, "inf2")
return strings.HasPrefix(instanceType, "inf1")
}

// IsTrainiumnstanceType returns true if the instance type requires AWS Neuron
func IsTrainiumInstanceType(instanceType string) bool {
return strings.HasPrefix(instanceType, "trn1n") || strings.HasPrefix(instanceType, "trn1")
return strings.HasPrefix(instanceType, "trn1")
}

// GetSmallestInstanceType returns the smallest instance type in instanceTypes.
Expand Down

0 comments on commit e96652d

Please sign in to comment.