Skip to content

Commit

Permalink
Fallback to regular if cog base image can't be determined and flag is…
Browse files Browse the repository at this point in the history
… not explicitly set (#1872)

* Add base-image to gitignore

* This is a common artefact that we should prevent
from being checked in.

* Use a bool pointer as the cog base image flag

* Bool pointer allows for a tri-state, true, false
and not set.
* Mark the default if the pointer is not set to
true.
* Use IsUsingCogBaseImage function rather than
direct access to the pointer for boolean
Resolution.
* If the flag isn’t explicitly set, warn the user
and fall back to building without a base image.

* Add integration test for torch 1.13.0 fallback

* Create a project with torch 1.13.0
* Do not explicitly specify using base images
* Specify the schema so the schema is not inferred
due to failing on non-GPU machines
* Determine that the project builds successfully

* Fix build CLI entry

* Fix use of flag changed to determine explicit flag

* Add a new function that determines whether the
flag has been changed and outputs a bool pointer.
* Add tests for explicitly set both in the true
and false cases.
  • Loading branch information
8W9aG authored Aug 14, 2024
1 parent 2726dc7 commit 6028963
Show file tree
Hide file tree
Showing 14 changed files with 679 additions and 59 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,4 @@ pkg/dockerfile/embed/*.whl
docs/README.md
docs/CONTRIBUTING.md
venv
base-image
23 changes: 14 additions & 9 deletions pkg/cli/build.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ var buildUseCudaBaseImage string
var buildDockerfileFile string
var buildUseCogBaseImage bool

const useCogBaseImageFlagKey = "use-cog-base-image"

func newBuildCommand() *cobra.Command {
cmd := &cobra.Command{
Use: "build",
Expand Down Expand Up @@ -63,13 +65,7 @@ func buildCommand(cmd *cobra.Command, args []string) error {
return err
}

if err := image.Build(cfg, projectDir, imageName, buildSecrets, buildNoCache, buildSeparateWeights, buildUseCudaBaseImage, buildProgressOutput, buildSchemaFile, buildDockerfileFile, buildUseCogBaseImage); err != nil {
if buildUseCogBaseImage && cmd.Flags().Changed("use-cog-base-image") {
console.Infof("Build failed with Cog base image enabled by default. " +
"If you want to build without using pre-built base images, " +
"try `cog build --use-cog-base-image=false`.")
}

if err := image.Build(cfg, projectDir, imageName, buildSecrets, buildNoCache, buildSeparateWeights, buildUseCudaBaseImage, buildProgressOutput, buildSchemaFile, buildDockerfileFile, DetermineUseCogBaseImage(cmd)); err != nil {
return err
}

Expand Down Expand Up @@ -116,7 +112,7 @@ func addDockerfileFlag(cmd *cobra.Command) {
}

func addUseCogBaseImageFlag(cmd *cobra.Command) {
cmd.Flags().BoolVar(&buildUseCogBaseImage, "use-cog-base-image", true, "Use pre-built Cog base image for faster cold boots")
cmd.Flags().BoolVar(&buildUseCogBaseImage, useCogBaseImageFlagKey, true, "Use pre-built Cog base image for faster cold boots")
}

func addBuildTimestampFlag(cmd *cobra.Command) {
Expand All @@ -125,7 +121,7 @@ func addBuildTimestampFlag(cmd *cobra.Command) {
}

func checkMutuallyExclusiveFlags(cmd *cobra.Command, args []string) error {
flags := []string{"use-cog-base-image", "use-cuda-base-image", "dockerfile"}
flags := []string{useCogBaseImageFlagKey, "use-cuda-base-image", "dockerfile"}
var flagsSet []string
for _, flag := range flags {
if cmd.Flag(flag).Changed {
Expand All @@ -137,3 +133,12 @@ func checkMutuallyExclusiveFlags(cmd *cobra.Command, args []string) error {
}
return nil
}

func DetermineUseCogBaseImage(cmd *cobra.Command) *bool {
if !cmd.Flags().Changed(useCogBaseImageFlagKey) {
return nil
}
useCogBaseImage := new(bool)
*useCogBaseImage = buildUseCogBaseImage
return useCogBaseImage
}
5 changes: 4 additions & 1 deletion pkg/cli/debug.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,10 @@ func cmdDockerfile(cmd *cobra.Command, args []string) error {
}()

generator.SetUseCudaBaseImage(buildUseCudaBaseImage)
generator.SetUseCogBaseImage(buildUseCogBaseImage)
useCogBaseImage := DetermineUseCogBaseImage(cmd)
if useCogBaseImage != nil {
generator.SetUseCogBaseImage(*useCogBaseImage)
}

if buildSeparateWeights {
if imageName == "" {
Expand Down
2 changes: 1 addition & 1 deletion pkg/cli/predict.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ func cmdPredict(cmd *cobra.Command, args []string) error {
return err
}

if imageName, err = image.BuildBase(cfg, projectDir, buildUseCudaBaseImage, buildUseCogBaseImage, buildProgressOutput); err != nil {
if imageName, err = image.BuildBase(cfg, projectDir, buildUseCudaBaseImage, DetermineUseCogBaseImage(cmd), buildProgressOutput); err != nil {
return err
}

Expand Down
7 changes: 1 addition & 6 deletions pkg/cli/push.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,7 @@ func push(cmd *cobra.Command, args []string) error {
}
}

if err := image.Build(cfg, projectDir, imageName, buildSecrets, buildNoCache, buildSeparateWeights, buildUseCudaBaseImage, buildProgressOutput, buildSchemaFile, buildDockerfileFile, buildUseCogBaseImage); err != nil {
if buildUseCogBaseImage && cmd.Flags().Changed("use-cog-base-image") {
console.Infof("Push failed with Cog base image enabled by default. " +
"If you want to push without using pre-built base images, " +
"try `cog push --use-cog-base-image=false`.")
}
if err := image.Build(cfg, projectDir, imageName, buildSecrets, buildNoCache, buildSeparateWeights, buildUseCudaBaseImage, buildProgressOutput, buildSchemaFile, buildDockerfileFile, DetermineUseCogBaseImage(cmd)); err != nil {

return err
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/cli/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ func run(cmd *cobra.Command, args []string) error {
if err != nil {
return err
}
imageName, err := image.BuildBase(cfg, projectDir, buildUseCudaBaseImage, buildUseCogBaseImage, buildProgressOutput)
imageName, err := image.BuildBase(cfg, projectDir, buildUseCudaBaseImage, DetermineUseCogBaseImage(cmd), buildProgressOutput)
if err != nil {
return err
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/cli/train.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ func cmdTrain(cmd *cobra.Command, args []string) error {
return err
}

if imageName, err = image.BuildBase(cfg, projectDir, buildUseCudaBaseImage, buildUseCogBaseImage, buildProgressOutput); err != nil {
if imageName, err = image.BuildBase(cfg, projectDir, buildUseCudaBaseImage, DetermineUseCogBaseImage(cmd), buildProgressOutput); err != nil {
return err
}

Expand Down
87 changes: 51 additions & 36 deletions pkg/dockerfile/generator.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ type Generator struct {
GOARCH string

useCudaBaseImage bool
useCogBaseImage bool
useCogBaseImage *bool

// absolute path to tmpDir, a directory that will be cleaned up
tmpDir string
Expand Down Expand Up @@ -93,7 +93,7 @@ func NewGenerator(config *config.Config, dir string) (*Generator, error) {
relativeTmpDir: relativeTmpDir,
fileWalker: filepath.Walk,
useCudaBaseImage: true,
useCogBaseImage: false,
useCogBaseImage: nil,
}, nil
}

Expand All @@ -103,11 +103,16 @@ func (g *Generator) SetUseCudaBaseImage(argumentValue string) {
}

func (g *Generator) SetUseCogBaseImage(useCogBaseImage bool) {
g.useCogBaseImage = useCogBaseImage
g.useCogBaseImage = new(bool)
*g.useCogBaseImage = useCogBaseImage
}

func (g *Generator) IsUsingCogBaseImage() bool {
return g.useCogBaseImage
useCogBaseImage := g.useCogBaseImage
if useCogBaseImage != nil {
return *useCogBaseImage
}
return true
}

func (g *Generator) generateInitialSteps() (string, error) {
Expand All @@ -128,7 +133,7 @@ func (g *Generator) generateInitialSteps() (string, error) {
return "", err
}

if g.useCogBaseImage {
if g.IsUsingCogBaseImage() {
pipInstalls, err := g.pipInstalls()
if err != nil {
return "", err
Expand Down Expand Up @@ -264,37 +269,14 @@ func (g *Generator) Cleanup() error {
}

func (g *Generator) BaseImage() (string, error) {
if g.useCogBaseImage {
var changed bool
var err error

cudaVersion := g.Config.Build.CUDA

pythonVersion := g.Config.Build.PythonVersion
pythonVersion, changed, err = stripPatchVersion(pythonVersion)
if err != nil {
return "", err
}
if changed {
console.Warnf("Stripping patch version from Python version %s to %s", g.Config.Build.PythonVersion, pythonVersion)
}

torchVersion, _ := g.Config.TorchVersion()
torchVersion, changed, err = stripPatchVersion(torchVersion)
if err != nil {
return "", err
if g.IsUsingCogBaseImage() {
baseImage, err := g.determineBaseImageName()
if err == nil || g.useCogBaseImage != nil {
return baseImage, err
}
if changed {
console.Warnf("Stripping patch version from Torch version %s to %s", g.Config.Build.PythonVersion, pythonVersion)
}

// validate that the base image configuration exists
imageGenerator, err := NewBaseImageGenerator(cudaVersion, pythonVersion, torchVersion)
if err != nil {
return "", err
console.Warnf("Could not find a suitable base image, continuing without base image support (%v).", err)
}
baseImage := BaseImageName(imageGenerator.cudaVersion, imageGenerator.pythonVersion, imageGenerator.torchVersion)
return baseImage, nil
}

if g.Config.Build.GPU && g.useCudaBaseImage {
Expand Down Expand Up @@ -336,7 +318,7 @@ func (g *Generator) aptInstalls() (string, error) {
return "", nil
}

if g.useCogBaseImage {
if g.IsUsingCogBaseImage() {
packages = slices.FilterString(packages, func(pkg string) bool {
return !slices.ContainsString(baseImageSystemPackages, pkg)
})
Expand All @@ -348,7 +330,7 @@ func (g *Generator) aptInstalls() (string, error) {
}

func (g *Generator) installPython() (string, error) {
if g.Config.Build.GPU && g.useCudaBaseImage && !g.useCogBaseImage {
if g.Config.Build.GPU && g.useCudaBaseImage && !g.IsUsingCogBaseImage() {
return g.installPythonCUDA()
}
return "", nil
Expand Down Expand Up @@ -487,7 +469,7 @@ func (g *Generator) copyPipPackagesFromInstallStage() string {
// return "COPY --from=deps --link /dep COPY --from=deps /src"
// ...except it's actually /root/.pyenv/versions/3.8.17/lib/python3.8/site-packages
py := g.Config.Build.PythonVersion
if g.Config.Build.GPU && (g.useCudaBaseImage || g.useCogBaseImage) {
if g.Config.Build.GPU && (g.useCudaBaseImage || g.IsUsingCogBaseImage()) {
// this requires buildkit!
// we should check for buildkit and otherwise revert to symlinks or copying into /src
// we mount to avoid copying, which avoids having two copies in this layer
Expand Down Expand Up @@ -596,6 +578,39 @@ func (g *Generator) GenerateWeightsManifest() (*weights.Manifest, error) {
return m, nil
}

func (g *Generator) determineBaseImageName() (string, error) {
var changed bool
var err error

cudaVersion := g.Config.Build.CUDA

pythonVersion := g.Config.Build.PythonVersion
pythonVersion, changed, err = stripPatchVersion(pythonVersion)
if err != nil {
return "", err
}
if changed {
console.Warnf("Stripping patch version from Python version %s to %s", g.Config.Build.PythonVersion, pythonVersion)
}

torchVersion, _ := g.Config.TorchVersion()
torchVersion, changed, err = stripPatchVersion(torchVersion)
if err != nil {
return "", err
}
if changed {
console.Warnf("Stripping patch version from Torch version %s to %s", g.Config.Build.PythonVersion, pythonVersion)
}

// validate that the base image configuration exists
imageGenerator, err := NewBaseImageGenerator(cudaVersion, pythonVersion, torchVersion)
if err != nil {
return "", err
}
baseImage := BaseImageName(imageGenerator.cudaVersion, imageGenerator.pythonVersion, imageGenerator.torchVersion)
return baseImage, nil
}

func stripPatchVersion(versionString string) (string, bool, error) {
if versionString == "" {
return "", false, nil
Expand Down
8 changes: 8 additions & 0 deletions pkg/dockerfile/generator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ predict: predict.py:Predictor

gen, err := NewGenerator(conf, tmpDir)
require.NoError(t, err)
gen.SetUseCogBaseImage(false)
_, actual, _, err := gen.GenerateModelBaseWithSeparateWeights("r8.im/replicate/cog-test")
require.NoError(t, err)

Expand Down Expand Up @@ -123,6 +124,7 @@ predict: predict.py:Predictor
require.NoError(t, conf.ValidateAndComplete(""))
gen, err := NewGenerator(conf, tmpDir)
require.NoError(t, err)
gen.SetUseCogBaseImage(false)
_, actual, _, err := gen.GenerateModelBaseWithSeparateWeights("r8.im/replicate/cog-test")
require.NoError(t, err)

Expand Down Expand Up @@ -167,6 +169,7 @@ predict: predict.py:Predictor

gen, err := NewGenerator(conf, tmpDir)
require.NoError(t, err)
gen.SetUseCogBaseImage(false)
_, actual, _, err := gen.GenerateModelBaseWithSeparateWeights("r8.im/replicate/cog-test")
require.NoError(t, err)

Expand Down Expand Up @@ -218,6 +221,7 @@ predict: predict.py:Predictor

gen, err := NewGenerator(conf, tmpDir)
require.NoError(t, err)
gen.SetUseCogBaseImage(false)
_, actual, _, err := gen.GenerateModelBaseWithSeparateWeights("r8.im/replicate/cog-test")
require.NoError(t, err)

Expand Down Expand Up @@ -268,6 +272,7 @@ build:

gen, err := NewGenerator(conf, tmpDir)
require.NoError(t, err)
gen.SetUseCogBaseImage(false)
_, actual, _, err := gen.GenerateModelBaseWithSeparateWeights("r8.im/replicate/cog-test")
require.NoError(t, err)

Expand Down Expand Up @@ -303,6 +308,7 @@ build:

gen, err := NewGenerator(conf, tmpDir)
require.NoError(t, err)
gen.SetUseCogBaseImage(false)
_, actual, _, err := gen.GenerateModelBaseWithSeparateWeights("r8.im/replicate/cog-test")
require.NoError(t, err)
fmt.Println(actual)
Expand Down Expand Up @@ -356,6 +362,7 @@ predict: predict.py:Predictor

gen, err := NewGenerator(conf, tmpDir)
require.NoError(t, err)
gen.SetUseCogBaseImage(false)

gen.fileWalker = func(root string, walkFn filepath.WalkFunc) error {
for _, path := range []string{"checkpoints/large-a", "models/large-b", "root-large"} {
Expand Down Expand Up @@ -453,6 +460,7 @@ predict: predict.py:Predictor

gen, err := NewGenerator(conf, tmpDir)
require.NoError(t, err)
gen.SetUseCogBaseImage(false)
actual, err := gen.GenerateDockerfileWithoutSeparateWeights()
require.NoError(t, err)

Expand Down
12 changes: 8 additions & 4 deletions pkg/image/build.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ const bundledSchemaPy = ".cog/schema.py"
// Build a Cog model from a config
//
// This is separated out from docker.Build(), so that can be as close as possible to the behavior of 'docker build'.
func Build(cfg *config.Config, dir, imageName string, secrets []string, noCache, separateWeights bool, useCudaBaseImage string, progressOutput string, schemaFile string, dockerfileFile string, useCogBaseImage bool) error {
func Build(cfg *config.Config, dir, imageName string, secrets []string, noCache, separateWeights bool, useCudaBaseImage string, progressOutput string, schemaFile string, dockerfileFile string, useCogBaseImage *bool) error {
console.Infof("Building Docker image from environment in cog.yaml as %s...", imageName)

// remove bundled schema files that may be left from previous builds
Expand Down Expand Up @@ -56,7 +56,9 @@ func Build(cfg *config.Config, dir, imageName string, secrets []string, noCache,
}
}()
generator.SetUseCudaBaseImage(useCudaBaseImage)
generator.SetUseCogBaseImage(useCogBaseImage)
if useCogBaseImage != nil {
generator.SetUseCogBaseImage(*useCogBaseImage)
}

if generator.IsUsingCogBaseImage() {
cogBaseImageName, err = generator.BaseImage()
Expand Down Expand Up @@ -222,7 +224,7 @@ func Build(cfg *config.Config, dir, imageName string, secrets []string, noCache,
return nil
}

func BuildBase(cfg *config.Config, dir string, useCudaBaseImage string, useCogBaseImage bool, progressOutput string) (string, error) {
func BuildBase(cfg *config.Config, dir string, useCudaBaseImage string, useCogBaseImage *bool, progressOutput string) (string, error) {
// TODO: better image management so we don't eat up disk space
// https://github.com/replicate/cog/issues/80
imageName := config.BaseDockerImageName(dir)
Expand All @@ -239,7 +241,9 @@ func BuildBase(cfg *config.Config, dir string, useCudaBaseImage string, useCogBa
}()

generator.SetUseCudaBaseImage(useCudaBaseImage)
generator.SetUseCogBaseImage(useCogBaseImage)
if useCogBaseImage != nil {
generator.SetUseCogBaseImage(*useCogBaseImage)
}

dockerfileContents, err := generator.GenerateModelBase()
if err != nil {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
build:
gpu: true
python_version: "3.9"
python_packages:
- "torch==1.13.0"
predict: "predict.py:Predictor"
Loading

0 comments on commit 6028963

Please sign in to comment.