Skip to content

Commit 71ef005

Browse files
fix : handle deletions for both namespace id vs namespace name folders (#623)
* fix : handle deletions for both namespace id vs namespace name folders (#621) * fix : handle deletions for both namespace id vs namespace name folders * fix: test cases fix for job deleteion bug on namespace directory name change * fix: delete old namespace folder if empty after the delete api too * fix: dont expose dag dir deletion code * fix: prime scheduler adapt new method definitions Co-authored-by: Yash Bhardwaj <[email protected]>
1 parent eb50224 commit 71ef005

File tree

9 files changed

+111
-100
lines changed

9 files changed

+111
-100
lines changed

ext/scheduler/airflow2/airflow.go

Lines changed: 14 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -112,10 +112,12 @@ func (s *scheduler) DeployJobs(ctx context.Context, namespace models.NamespaceSp
112112
}
113113

114114
// deleteDirectoryIfEmpty remove jobs Folder if it exists
115-
func deleteDirectoryIfEmpty(ctx context.Context, jobsDir string, bucket Bucket) error {
115+
func deleteDirectoryIfEmpty(ctx context.Context, nsDirectoryIdentifier string, bucket Bucket) error {
116116
spanCtx, span := startChildSpan(ctx, "deleteDirectoryIfEmpty")
117117
span.End()
118118

119+
jobsDir := PathForJobDirectory(JobsDir, nsDirectoryIdentifier)
120+
119121
it := bucket.List(&blob.ListOptions{
120122
Prefix: jobsDir,
121123
})
@@ -157,20 +159,10 @@ func (s *scheduler) compileAndUpload(ctx context.Context, namespace models.Names
157159
return deployFailure
158160
}
159161
}
160-
err = deleteDirectoryIfEmpty(ctx, PathForJobDirectory(JobsDir, namespace.ID.String()), bucket)
161-
if err != nil {
162-
if gcerrors.Code(err) != gcerrors.NotFound {
163-
deployFailure := models.JobDeploymentFailure{
164-
JobName: currentJobSpec.Name,
165-
Message: "failed to cleanup old dags folder " + err.Error(),
166-
}
167-
return deployFailure
168-
}
169-
}
170162
return nil
171163
}
172164

173-
func (s *scheduler) DeleteJobs(ctx context.Context, namespace models.NamespaceSpec, jobNames []string, progressObserver progress.Observer) error {
165+
func (s *scheduler) DeleteJobs(ctx context.Context, nsDirectoryIdentifier string, namespace models.NamespaceSpec, jobNames []string, progressObserver progress.Observer) error {
174166
spanCtx, span := startChildSpan(ctx, "DeleteJobs")
175167
defer span.End()
176168

@@ -182,7 +174,7 @@ func (s *scheduler) DeleteJobs(ctx context.Context, namespace models.NamespaceSp
182174
if strings.TrimSpace(jobName) == "" {
183175
return ErrEmptyJobName
184176
}
185-
blobKey := PathFromJobName(JobsDir, namespace.ID.String(), jobName, JobsExtension)
177+
blobKey := PathFromJobName(JobsDir, nsDirectoryIdentifier, jobName, JobsExtension)
186178
if err := bucket.Delete(spanCtx, blobKey); err != nil {
187179
// ignore missing files
188180
if gcerrors.Code(err) != gcerrors.NotFound {
@@ -193,11 +185,17 @@ func (s *scheduler) DeleteJobs(ctx context.Context, namespace models.NamespaceSp
193185
Name: jobName,
194186
})
195187
}
188+
err = deleteDirectoryIfEmpty(ctx, nsDirectoryIdentifier, bucket)
189+
if err != nil {
190+
if gcerrors.Code(err) != gcerrors.NotFound {
191+
return err
192+
}
193+
}
196194
return nil
197195
}
198196

199197
// TODO list jobs should not refer from the scheduler, rather should list from db and it has notthing to do with scheduler.
200-
func (s *scheduler) ListJobs(ctx context.Context, namespace models.NamespaceSpec, opts models.SchedulerListOptions) ([]models.Job, error) {
198+
func (s *scheduler) ListJobs(ctx context.Context, nsDirectoryIdentifier string, namespace models.NamespaceSpec, opts models.SchedulerListOptions) ([]models.Job, error) {
201199
spanCtx, span := startChildSpan(ctx, "ListJobs")
202200
defer span.End()
203201

@@ -207,11 +205,10 @@ func (s *scheduler) ListJobs(ctx context.Context, namespace models.NamespaceSpec
207205
}
208206
defer bucket.Close()
209207

210-
namespaceID := namespace.ID.String()
211208
var jobs []models.Job
212209
// get all items under namespace directory
213210
it := bucket.List(&blob.ListOptions{
214-
Prefix: PathForJobDirectory(JobsDir, namespaceID),
211+
Prefix: PathForJobDirectory(JobsDir, nsDirectoryIdentifier),
215212
})
216213
for {
217214
obj, err := it.Next(spanCtx)
@@ -233,7 +230,7 @@ func (s *scheduler) ListJobs(ctx context.Context, namespace models.NamespaceSpec
233230
return jobs, nil
234231
}
235232
for idx, job := range jobs {
236-
jobs[idx].Contents, err = bucket.ReadAll(spanCtx, PathFromJobName(JobsDir, namespaceID, job.Name, JobsExtension))
233+
jobs[idx].Contents, err = bucket.ReadAll(spanCtx, PathFromJobName(JobsDir, nsDirectoryIdentifier, job.Name, JobsExtension))
237234
if err != nil {
238235
return nil, err
239236
}

ext/scheduler/airflow2/airflow_test.go

Lines changed: 27 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -134,10 +134,6 @@ func TestAirflow2(t *testing.T) {
134134
mockBucket.On("WriteAll", mock.Anything, "dags/__lib.py", airflow2.SharedLib, (*blob.WriterOptions)(nil)).Return(nil)
135135
mockBucket.On("WriteAll", ctx, fmt.Sprintf("dags/%s/%s.py", ns.Name, jobSpecs[0].Name), []byte("job-1-compiled"), (*blob.WriterOptions)(nil)).Return(nil)
136136
mockBucket.On("Delete", ctx, fmt.Sprintf("dags/%s/%s.py", ns.ID.String(), jobSpecs[0].Name)).Return(nil)
137-
mockBucket.On("Delete", ctx, fmt.Sprintf("dags/%s", ns.ID.String())).Return(nil)
138-
mockBucket.On("List", &blob.ListOptions{
139-
Prefix: fmt.Sprintf("dags/%s", ns.ID.String()),
140-
}).Return(&blob.ListIterator{})
141137

142138
expectedDeployDetail := models.JobDeploymentDetail{
143139
SuccessCount: 1,
@@ -211,23 +207,27 @@ func TestAirflow2(t *testing.T) {
211207
})
212208
t.Run("DeleteJobs", func(t *testing.T) {
213209
t.Run("should successfully delete jobs from blob buckets", func(t *testing.T) {
214-
jobKey := fmt.Sprintf("dags/%s/%s.py", nsUUID, jobSpecs[0].Name)
210+
jobKey := fmt.Sprintf("dags/%s/%s.py", ns.Name, jobSpecs[0].Name)
215211

216212
inMemBlob := memblob.OpenBucket(nil)
217213
_ = inMemBlob.WriteAll(ctx, jobKey, []byte("hello"), nil)
218214

219215
mockBucket := &MockedBucket{
220216
bucket: inMemBlob,
221217
}
222-
mockBucket.On("Delete", mock.Anything, fmt.Sprintf("dags/%s/%s.py", nsUUID, jobSpecs[0].Name)).Return(nil)
218+
mockBucket.On("Delete", mock.Anything, fmt.Sprintf("dags/%s/%s.py", ns.Name, jobSpecs[0].Name)).Return(nil)
219+
mockBucket.On("Delete", mock.Anything, fmt.Sprintf("dags/%s", ns.Name)).Return(nil)
220+
mockBucket.On("List", &blob.ListOptions{
221+
Prefix: fmt.Sprintf("dags/%s", ns.Name),
222+
}).Return(&blob.ListIterator{})
223223
defer mockBucket.AssertExpectations(t)
224224

225225
mockBucketFac := new(MockedBucketFactory)
226226
mockBucketFac.On("New", mock.Anything, proj).Return(mockBucket, nil)
227227
defer mockBucketFac.AssertExpectations(t)
228228

229229
air := airflow2.NewScheduler(mockBucketFac, nil, nil)
230-
err := air.DeleteJobs(ctx, ns, []string{"job-1"}, nil)
230+
err := air.DeleteJobs(ctx, ns.Name, ns, []string{"job-1"}, nil)
231231
assert.Nil(t, err)
232232

233233
jobStillExist, err := inMemBlob.Exists(ctx, jobKey)
@@ -239,15 +239,19 @@ func TestAirflow2(t *testing.T) {
239239
mockBucket := &MockedBucket{
240240
bucket: inMemBlob,
241241
}
242-
mockBucket.On("Delete", mock.Anything, fmt.Sprintf("dags/%s/%s.py", nsUUID, jobSpecs[0].Name)).Return(nil)
242+
mockBucket.On("Delete", mock.Anything, fmt.Sprintf("dags/%s/%s.py", ns.Name, jobSpecs[0].Name)).Return(nil)
243+
mockBucket.On("Delete", mock.Anything, fmt.Sprintf("dags/%s", ns.Name)).Return(nil)
244+
mockBucket.On("List", &blob.ListOptions{
245+
Prefix: fmt.Sprintf("dags/%s", ns.Name),
246+
}).Return(&blob.ListIterator{})
243247
defer mockBucket.AssertExpectations(t)
244248

245249
mockBucketFac := new(MockedBucketFactory)
246250
mockBucketFac.On("New", mock.Anything, proj).Return(mockBucket, nil)
247251
defer mockBucketFac.AssertExpectations(t)
248252

249253
air := airflow2.NewScheduler(mockBucketFac, nil, nil)
250-
err := air.DeleteJobs(ctx, ns, []string{"job-1"}, nil)
254+
err := air.DeleteJobs(ctx, ns.Name, ns, []string{"job-1"}, nil)
251255
assert.Nil(t, err)
252256
})
253257
})
@@ -257,11 +261,11 @@ func TestAirflow2(t *testing.T) {
257261
mockBucket := &MockedBucket{
258262
bucket: inMemBlob,
259263
}
260-
_ = inMemBlob.WriteAll(ctx, filepath.Join(airflow2.PathForJobDirectory(airflow2.JobsDir, ns.ID.String()), "file1.py"), []byte("test1"), nil)
261-
_ = inMemBlob.WriteAll(ctx, filepath.Join(airflow2.PathForJobDirectory(airflow2.JobsDir, ns.ID.String()), "file2.py"), []byte("test2"), nil)
264+
_ = inMemBlob.WriteAll(ctx, filepath.Join(airflow2.PathForJobDirectory(airflow2.JobsDir, ns.Name), "file1.py"), []byte("test1"), nil)
265+
_ = inMemBlob.WriteAll(ctx, filepath.Join(airflow2.PathForJobDirectory(airflow2.JobsDir, ns.Name), "file2.py"), []byte("test2"), nil)
262266
_ = inMemBlob.WriteAll(ctx, "bar.py", []byte("test3"), nil)
263267
mockBucket.On("List", &blob.ListOptions{
264-
Prefix: airflow2.PathForJobDirectory(airflow2.JobsDir, ns.ID.String()),
268+
Prefix: airflow2.PathForJobDirectory(airflow2.JobsDir, ns.Name),
265269
})
266270
defer mockBucket.AssertExpectations(t)
267271

@@ -270,7 +274,7 @@ func TestAirflow2(t *testing.T) {
270274
defer mockBucketFac.AssertExpectations(t)
271275

272276
air := airflow2.NewScheduler(mockBucketFac, nil, nil)
273-
respJobs, err := air.ListJobs(ctx, ns, models.SchedulerListOptions{OnlyName: true})
277+
respJobs, err := air.ListJobs(ctx, ns.Name, ns, models.SchedulerListOptions{OnlyName: true})
274278
assert.Nil(t, err)
275279
assert.Equal(t, 2, len(respJobs))
276280
})
@@ -279,11 +283,11 @@ func TestAirflow2(t *testing.T) {
279283
mockBucket := &MockedBucket{
280284
bucket: inMemBlob,
281285
}
282-
_ = inMemBlob.WriteAll(ctx, filepath.Join(airflow2.PathForJobDirectory(airflow2.JobsDir, ns.ID.String()), "file1.py"), []byte("test1"), nil)
283-
_ = inMemBlob.WriteAll(ctx, filepath.Join(airflow2.PathForJobDirectory(airflow2.JobsDir, ns.ID.String()), "file2.json"), []byte("test2"), nil)
286+
_ = inMemBlob.WriteAll(ctx, filepath.Join(airflow2.PathForJobDirectory(airflow2.JobsDir, ns.Name), "file1.py"), []byte("test1"), nil)
287+
_ = inMemBlob.WriteAll(ctx, filepath.Join(airflow2.PathForJobDirectory(airflow2.JobsDir, ns.Name), "file2.json"), []byte("test2"), nil)
284288
_ = inMemBlob.WriteAll(ctx, "bar.py", []byte("test3"), nil)
285289
mockBucket.On("List", &blob.ListOptions{
286-
Prefix: airflow2.PathForJobDirectory(airflow2.JobsDir, ns.ID.String()),
290+
Prefix: airflow2.PathForJobDirectory(airflow2.JobsDir, ns.Name),
287291
})
288292
defer mockBucket.AssertExpectations(t)
289293

@@ -292,7 +296,7 @@ func TestAirflow2(t *testing.T) {
292296
defer mockBucketFac.AssertExpectations(t)
293297

294298
air := airflow2.NewScheduler(mockBucketFac, nil, nil)
295-
respJobs, err := air.ListJobs(ctx, ns, models.SchedulerListOptions{OnlyName: true})
299+
respJobs, err := air.ListJobs(ctx, ns.Name, ns, models.SchedulerListOptions{OnlyName: true})
296300
assert.Nil(t, err)
297301
assert.Equal(t, 1, len(respJobs))
298302
})
@@ -301,21 +305,21 @@ func TestAirflow2(t *testing.T) {
301305
mockBucket := &MockedBucket{
302306
bucket: inMemBlob,
303307
}
304-
_ = inMemBlob.WriteAll(ctx, airflow2.PathFromJobName(airflow2.JobsDir, ns.ID.String(), "file1", airflow2.JobsExtension), []byte("test1"), nil)
305-
_ = inMemBlob.WriteAll(ctx, airflow2.PathFromJobName(airflow2.JobsDir, ns.ID.String(), "file2", airflow2.JobsExtension), []byte("test2"), nil)
308+
_ = inMemBlob.WriteAll(ctx, airflow2.PathFromJobName(airflow2.JobsDir, ns.Name, "file1", airflow2.JobsExtension), []byte("test1"), nil)
309+
_ = inMemBlob.WriteAll(ctx, airflow2.PathFromJobName(airflow2.JobsDir, ns.Name, "file2", airflow2.JobsExtension), []byte("test2"), nil)
306310
mockBucket.On("List", &blob.ListOptions{
307-
Prefix: airflow2.PathForJobDirectory(airflow2.JobsDir, ns.ID.String()),
311+
Prefix: airflow2.PathForJobDirectory(airflow2.JobsDir, ns.Name),
308312
})
309-
mockBucket.On("ReadAll", mock.Anything, airflow2.PathFromJobName(airflow2.JobsDir, ns.ID.String(), "file1", airflow2.JobsExtension))
310-
mockBucket.On("ReadAll", mock.Anything, airflow2.PathFromJobName(airflow2.JobsDir, ns.ID.String(), "file2", airflow2.JobsExtension))
313+
mockBucket.On("ReadAll", mock.Anything, airflow2.PathFromJobName(airflow2.JobsDir, ns.Name, "file1", airflow2.JobsExtension))
314+
mockBucket.On("ReadAll", mock.Anything, airflow2.PathFromJobName(airflow2.JobsDir, ns.Name, "file2", airflow2.JobsExtension))
311315
defer mockBucket.AssertExpectations(t)
312316

313317
mockBucketFac := new(MockedBucketFactory)
314318
mockBucketFac.On("New", mock.Anything, proj).Return(mockBucket, nil)
315319
defer mockBucketFac.AssertExpectations(t)
316320

317321
air := airflow2.NewScheduler(mockBucketFac, nil, nil)
318-
respJobs, err := air.ListJobs(ctx, ns, models.SchedulerListOptions{})
322+
respJobs, err := air.ListJobs(ctx, ns.Name, ns, models.SchedulerListOptions{})
319323
assert.Nil(t, err)
320324
assert.Equal(t, 2, len(respJobs))
321325
})

ext/scheduler/prime/scheduler.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ func (*Scheduler) VerifyJob(context.Context, models.NamespaceSpec, models.JobSpe
2323
return nil
2424
}
2525

26-
func (*Scheduler) ListJobs(context.Context, models.NamespaceSpec, models.SchedulerListOptions) ([]models.Job, error) {
26+
func (*Scheduler) ListJobs(context.Context, string, models.NamespaceSpec, models.SchedulerListOptions) ([]models.Job, error) {
2727
panic("implement me")
2828
}
2929

@@ -46,7 +46,7 @@ func (s *Scheduler) DeployJobs(ctx context.Context, namespace models.NamespaceSp
4646
return models.JobDeploymentDetail{}, nil
4747
}
4848

49-
func (*Scheduler) DeleteJobs(context.Context, models.NamespaceSpec, []string, progress.Observer) error {
49+
func (*Scheduler) DeleteJobs(context.Context, string, models.NamespaceSpec, []string, progress.Observer) error {
5050
return nil
5151
}
5252

job/deployer.go

Lines changed: 23 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -113,27 +113,32 @@ func (d *deployer) completeJobDeployment(ctx context.Context, jobDeployment mode
113113
}
114114

115115
func (d *deployer) cleanPerNamespace(ctx context.Context, namespaceSpec models.NamespaceSpec, jobs []models.JobSpec) error {
116-
// get all stored job names
117-
schedulerJobs, err := d.batchScheduler.ListJobs(ctx, namespaceSpec, models.SchedulerListOptions{OnlyName: true})
118-
if err != nil {
119-
return err
120-
}
121-
var destJobNames []string
122-
for _, j := range schedulerJobs {
123-
destJobNames = append(destJobNames, j.Name)
124-
}
125-
126-
// filter what we need to keep/delete
127-
var sourceJobNames []string
128-
for _, jobSpec := range jobs {
129-
sourceJobNames = append(sourceJobNames, jobSpec.Name)
116+
namespaceIdentifiers := []string{
117+
namespaceSpec.ID.String(), // old, kept for folder cleanup, to be removed after complete migration of name space folder #cleaup
118+
namespaceSpec.Name,
130119
}
131-
jobsToDelete := setSubtract(destJobNames, sourceJobNames)
132-
jobsToDelete = jobDeletionFilter(jobsToDelete)
133-
if len(jobsToDelete) > 0 {
134-
if err := d.batchScheduler.DeleteJobs(ctx, namespaceSpec, jobsToDelete, nil); err != nil {
120+
for _, nsDirectoryIdentifier := range namespaceIdentifiers {
121+
// get all stored job names
122+
schedulerJobs, err := d.batchScheduler.ListJobs(ctx, nsDirectoryIdentifier, namespaceSpec, models.SchedulerListOptions{OnlyName: true})
123+
if err != nil {
135124
return err
136125
}
126+
var destJobNames []string
127+
for _, j := range schedulerJobs {
128+
destJobNames = append(destJobNames, j.Name)
129+
}
130+
131+
// filter what we need to keep/delete
132+
var sourceJobNames []string
133+
for _, jobSpec := range jobs {
134+
sourceJobNames = append(sourceJobNames, jobSpec.Name)
135+
}
136+
jobsToDelete := setSubtract(destJobNames, sourceJobNames)
137+
if len(jobsToDelete) > 0 {
138+
if err := d.batchScheduler.DeleteJobs(ctx, nsDirectoryIdentifier, namespaceSpec, jobsToDelete, nil); err != nil {
139+
return err
140+
}
141+
}
137142
}
138143
return nil
139144
}

0 commit comments

Comments
 (0)