Skip to content
This repository was archived by the owner on Oct 9, 2023. It is now read-only.

Commit 4d5c265

Browse files
Ketan Umarelyft-buildnotify-15
authored andcommitted
Propagate container error to the node (#39)
- in the case when a container exits as success, but writes an error file, we would exit as success. This change actually returns the correct error status
1 parent 2575c97 commit 4d5c265

File tree

4 files changed

+162
-28
lines changed

4 files changed

+162
-28
lines changed

go/tasks/v1/config.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ const configSectionKey = "plugins"
77
const logConfigSectionKey = "logs"
88

99
type Config struct {
10-
IsDebugMode bool `json:"debug-mode" pflag:",Enable debug mode"`
10+
IsDebugMode bool `json:"debug-mode" pflag:",Enable debug mode"`
1111
EnabledPlugins *[]string `json:"enabled-plugins,omitempty" pflag:",List of enabled plugins"`
1212
}
1313

@@ -35,4 +35,4 @@ func GetConfig() *Config {
3535

3636
func GetLogConfig() *LogConfig {
3737
return config.GetSection(logConfigSectionKey).(*LogConfig)
38-
}
38+
}

go/tasks/v1/flytek8s/plugin_executor.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,7 @@ func (e *K8sTaskExecutor) CheckTaskStatus(ctx context.Context, taskCtx types.Tas
190190
return types.TaskStatusUndefined, nil, errors.Wrapf(errors.TaskEventRecordingFailed, err, "failed to record state transition [%v] -> [%v]", taskCtx.GetPhase(), status.Phase)
191191
}
192192
}
193-
return status, nil, err
193+
return finalStatus, nil, err
194194
}
195195

196196
func (e *K8sTaskExecutor) KillTask(ctx context.Context, taskCtx types.TaskContext) error {

go/tasks/v1/flytek8s/plugin_executor_test.go

Lines changed: 155 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,11 @@ package flytek8s_test
33
import (
44
"bytes"
55
"fmt"
6-
"github.com/lyft/flyteplugins/go/tasks/v1/errors"
76
"testing"
87
"time"
98

9+
"github.com/lyft/flyteplugins/go/tasks/v1/errors"
10+
1011
"github.com/lyft/flytestdlib/promutils"
1112

1213
"github.com/lyft/flyteidl/gen/pb-go/flyteidl/event"
@@ -152,24 +153,31 @@ func TestK8sTaskExecutor_StartTask(t *testing.T) {
152153

153154
func TestK8sTaskExecutor_CheckTaskStatus(t *testing.T) {
154155
ctx := context.TODO()
156+
c := flytek8s.InitializeFake()
155157

156-
evRecorder := &mocks2.EventRecorder{}
158+
t.Run("phaseChange", func(t *testing.T) {
157159

158-
tctx := getMockTaskContext()
159-
mockResourceHandler := &mocks.K8sResourceHandler{}
160-
k := flytek8s.NewK8sTaskExecutorForResource("x", &v1.Pod{}, mockResourceHandler, time.Second)
161-
mockResourceHandler.On("BuildIdentityResource", tctx).Return(&v1.Pod{}, nil)
160+
evRecorder := &mocks2.EventRecorder{}
161+
tctx := getMockTaskContext()
162+
mockResourceHandler := &mocks.K8sResourceHandler{}
163+
k := flytek8s.NewK8sTaskExecutorForResource("x", &v1.Pod{}, mockResourceHandler, time.Second)
164+
mockResourceHandler.On("BuildIdentityResource", tctx).Return(&v1.Pod{}, nil)
162165

163-
assert.NoError(t, k.Initialize(ctx, evRecorder, nil, func(ownerId string) {}, "x"))
164-
c := flytek8s.InitializeFake()
165-
testPod := &v1.Pod{}
166-
testPod.SetName(tctx.GetTaskExecutionID().GetGeneratedName())
167-
testPod.SetNamespace(tctx.GetNamespace())
168-
testPod.SetOwnerReferences([]v12.OwnerReference{tctx.GetOwnerReference()})
169-
_ = c.Delete(ctx, testPod)
170-
assert.NoError(t, c.Create(ctx, testPod))
166+
store, err := storage.NewDataStore(&storage.Config{
167+
Type: storage.TypeMemory,
168+
}, promutils.NewTestScope())
169+
assert.NoError(t, err)
171170

172-
t.Run("phaseChange", func(t *testing.T) {
171+
assert.NoError(t, k.Initialize(ctx, evRecorder, store, func(ownerId string) {}, "x"))
172+
testPod := &v1.Pod{}
173+
testPod.SetName(tctx.GetTaskExecutionID().GetGeneratedName())
174+
testPod.SetNamespace(tctx.GetNamespace())
175+
testPod.SetOwnerReferences([]v12.OwnerReference{tctx.GetOwnerReference()})
176+
_ = c.Delete(ctx, testPod)
177+
assert.NoError(t, c.Create(ctx, testPod))
178+
defer func() {
179+
_ = c.Delete(ctx, testPod)
180+
}()
173181

174182
info := &events.TaskEventInfo{}
175183
info.Logs = []*core.TaskLog{{Uri: "log1"}}
@@ -189,6 +197,28 @@ func TestK8sTaskExecutor_CheckTaskStatus(t *testing.T) {
189197

190198
t.Run("noChange", func(t *testing.T) {
191199

200+
evRecorder := &mocks2.EventRecorder{}
201+
tctx := getMockTaskContext()
202+
mockResourceHandler := &mocks.K8sResourceHandler{}
203+
k := flytek8s.NewK8sTaskExecutorForResource("x", &v1.Pod{}, mockResourceHandler, time.Second)
204+
mockResourceHandler.On("BuildIdentityResource", tctx).Return(&v1.Pod{}, nil)
205+
206+
store, err := storage.NewDataStore(&storage.Config{
207+
Type: storage.TypeMemory,
208+
}, promutils.NewTestScope())
209+
assert.NoError(t, err)
210+
211+
assert.NoError(t, k.Initialize(ctx, evRecorder, store, func(ownerId string) {}, "x"))
212+
testPod := &v1.Pod{}
213+
testPod.SetName(tctx.GetTaskExecutionID().GetGeneratedName())
214+
testPod.SetNamespace(tctx.GetNamespace())
215+
testPod.SetOwnerReferences([]v12.OwnerReference{tctx.GetOwnerReference()})
216+
_ = c.Delete(ctx, testPod)
217+
assert.NoError(t, c.Create(ctx, testPod))
218+
defer func() {
219+
_ = c.Delete(ctx, testPod)
220+
}()
221+
192222
info := &events.TaskEventInfo{}
193223
info.Logs = []*core.TaskLog{{Uri: "log1"}}
194224
expectedOldPhase := types.TaskPhaseRunning
@@ -204,25 +234,128 @@ func TestK8sTaskExecutor_CheckTaskStatus(t *testing.T) {
204234

205235
t.Run("resourceNotFound", func(t *testing.T) {
206236

237+
evRecorder := &mocks2.EventRecorder{}
238+
tctx := getMockTaskContext()
239+
mockResourceHandler := &mocks.K8sResourceHandler{}
240+
k := flytek8s.NewK8sTaskExecutorForResource("x", &v1.Pod{}, mockResourceHandler, time.Second)
241+
mockResourceHandler.On("BuildIdentityResource", tctx).Return(&v1.Pod{}, nil)
242+
243+
store, err := storage.NewDataStore(&storage.Config{
244+
Type: storage.TypeMemory,
245+
}, promutils.NewTestScope())
246+
assert.NoError(t, err)
247+
248+
assert.NoError(t, k.Initialize(ctx, evRecorder, store, func(ownerId string) {}, "x"))
249+
_ = flytek8s.InitializeFake()
250+
207251
info := &events.TaskEventInfo{}
208252
info.Logs = []*core.TaskLog{{Uri: "log1"}}
209253
expectedOldPhase := types.TaskPhaseRunning
210254
tctx.On("GetPhase").Return(expectedOldPhase)
211255

212-
// NOTE: We have deleted the object
213-
assert.NoError(t, c.Delete(ctx, testPod))
256+
evRecorder.On("RecordTaskEvent", mock.MatchedBy(func(c context.Context) bool { return true }),
257+
mock.MatchedBy(func(e *event.TaskExecutionEvent) bool { return true })).Return(nil)
258+
259+
s, state, err := k.CheckTaskStatus(ctx, tctx, nil)
260+
assert.Nil(t, state)
261+
assert.NoError(t, err)
262+
assert.Equal(t, types.TaskPhasePermanentFailure, s.Phase, "Expected failure got %s", s.Phase.String())
263+
})
264+
265+
t.Run("errorFileExit", func(t *testing.T) {
266+
267+
evRecorder := &mocks2.EventRecorder{}
268+
tctx := getMockTaskContext()
269+
mockResourceHandler := &mocks.K8sResourceHandler{}
270+
k := flytek8s.NewK8sTaskExecutorForResource("x", &v1.Pod{}, mockResourceHandler, time.Second)
271+
mockResourceHandler.On("BuildIdentityResource", tctx).Return(&v1.Pod{}, nil)
272+
273+
store, err := storage.NewDataStore(&storage.Config{
274+
Type: storage.TypeMemory,
275+
}, promutils.NewTestScope())
276+
assert.NoError(t, err)
277+
278+
assert.NoError(t, k.Initialize(ctx, evRecorder, store, func(ownerId string) {}, "x"))
279+
testPod := &v1.Pod{}
280+
testPod.SetName(tctx.GetTaskExecutionID().GetGeneratedName())
281+
testPod.SetNamespace(tctx.GetNamespace())
282+
testPod.SetOwnerReferences([]v12.OwnerReference{tctx.GetOwnerReference()})
283+
284+
_ = c.Delete(ctx, testPod)
285+
assert.NoError(t, c.Create(ctx, testPod))
286+
defer func() {
287+
_ = c.Delete(ctx, testPod)
288+
}()
289+
290+
assert.NoError(t, store.WriteProtobuf(ctx, tctx.GetErrorFile(), storage.Options{}, &core.ErrorDocument{
291+
Error: &core.ContainerError{
292+
Kind: core.ContainerError_NON_RECOVERABLE,
293+
Code: "code",
294+
Message: "pleh",
295+
},
296+
}))
297+
298+
info := &events.TaskEventInfo{}
299+
info.Logs = []*core.TaskLog{{Uri: "log1"}}
300+
expectedOldPhase := types.TaskPhaseQueued
301+
tctx.On("GetPhase").Return(expectedOldPhase)
302+
mockResourceHandler.On("GetTaskStatus", mock.MatchedBy(func(o *v1.Pod) bool { return true })).Return(types.TaskStatusSucceeded, nil, nil)
303+
304+
evRecorder.On("RecordTaskEvent", mock.MatchedBy(func(c context.Context) bool { return true }),
305+
mock.MatchedBy(func(e *event.TaskExecutionEvent) bool { return true })).Return(nil)
306+
307+
s, state, err := k.CheckTaskStatus(ctx, tctx, nil)
308+
assert.Nil(t, state)
309+
assert.NoError(t, err)
310+
assert.Equal(t, types.TaskPhasePermanentFailure, s.Phase)
311+
})
312+
313+
t.Run("errorFileExitRecoverable", func(t *testing.T) {
314+
315+
evRecorder := &mocks2.EventRecorder{}
316+
tctx := getMockTaskContext()
317+
mockResourceHandler := &mocks.K8sResourceHandler{}
318+
k := flytek8s.NewK8sTaskExecutorForResource("x", &v1.Pod{}, mockResourceHandler, time.Second)
319+
mockResourceHandler.On("BuildIdentityResource", tctx).Return(&v1.Pod{}, nil)
320+
321+
store, err := storage.NewDataStore(&storage.Config{
322+
Type: storage.TypeMemory,
323+
}, promutils.NewTestScope())
324+
assert.NoError(t, err)
325+
326+
assert.NoError(t, k.Initialize(ctx, evRecorder, store, func(ownerId string) {}, "x"))
327+
testPod := &v1.Pod{}
328+
testPod.SetName(tctx.GetTaskExecutionID().GetGeneratedName())
329+
testPod.SetNamespace(tctx.GetNamespace())
330+
testPod.SetOwnerReferences([]v12.OwnerReference{tctx.GetOwnerReference()})
331+
332+
_ = c.Delete(ctx, testPod)
333+
assert.NoError(t, c.Create(ctx, testPod))
214334
defer func() {
215-
// Lets add it back in for other tests to work
216-
assert.NoError(t, c.Create(ctx, testPod))
335+
_ = c.Delete(ctx, testPod)
217336
}()
218337

338+
assert.NoError(t, store.WriteProtobuf(ctx, tctx.GetErrorFile(), storage.Options{}, &core.ErrorDocument{
339+
Error: &core.ContainerError{
340+
Kind: core.ContainerError_RECOVERABLE,
341+
Code: "code",
342+
Message: "pleh",
343+
},
344+
}))
345+
346+
info := &events.TaskEventInfo{}
347+
info.Logs = []*core.TaskLog{{Uri: "log1"}}
348+
expectedOldPhase := types.TaskPhaseQueued
349+
tctx.On("GetPhase").Return(expectedOldPhase)
350+
mockResourceHandler.On("GetTaskStatus", mock.MatchedBy(func(o *v1.Pod) bool { return true })).Return(types.TaskStatusSucceeded, nil, nil)
351+
219352
evRecorder.On("RecordTaskEvent", mock.MatchedBy(func(c context.Context) bool { return true }),
220353
mock.MatchedBy(func(e *event.TaskExecutionEvent) bool { return true })).Return(nil)
221354

222355
s, state, err := k.CheckTaskStatus(ctx, tctx, nil)
223356
assert.Nil(t, state)
224357
assert.NoError(t, err)
225-
assert.Equal(t, types.TaskPhasePermanentFailure, s.Phase, "Expected failure got %s", s.Phase.String())
358+
assert.Equal(t, types.TaskPhaseRetryableFailure, s.Phase)
226359
})
227360
}
228361

@@ -253,7 +386,7 @@ func TestK8sTaskExecutor_HandleTaskSuccess(t *testing.T) {
253386
Message: "y",
254387
},
255388
}
256-
store.WriteProtobuf(ctx, tctx.GetErrorFile(), storage.Options{}, msg)
389+
assert.NoError(t, store.WriteProtobuf(ctx, tctx.GetErrorFile(), storage.Options{}, msg))
257390
assert.NoError(t, k.Initialize(ctx, nil, store, func(ownerId string) {}, ""))
258391
s, err := k.HandleTaskSuccess(ctx, tctx)
259392
assert.NoError(t, err)
@@ -287,7 +420,7 @@ func TestK8sTaskExecutor_HandleTaskSuccess(t *testing.T) {
287420
store, err := storage.NewDataStore(&storage.Config{Type: storage.TypeMemory}, promutils.NewTestScope())
288421
assert.NoError(t, err)
289422
r := bytes.NewReader([]byte{'x'})
290-
store.WriteRaw(ctx, tctx.GetErrorFile(), r.Size(), storage.Options{}, r)
423+
assert.NoError(t, store.WriteRaw(ctx, tctx.GetErrorFile(), r.Size(), storage.Options{}, r))
291424
assert.NoError(t, k.Initialize(ctx, nil, store, func(ownerId string) {}, ""))
292425
s, err := k.HandleTaskSuccess(ctx, tctx)
293426
assert.Error(t, err)

go/tasks/v1/utils/template.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,13 @@ package utils
33
import (
44
"context"
55
"fmt"
6-
"github.com/golang/protobuf/ptypes"
7-
"github.com/lyft/flyteidl/gen/pb-go/flyteidl/core"
8-
"github.com/lyft/flytestdlib/logger"
96
"reflect"
107
"regexp"
118
"strings"
9+
10+
"github.com/golang/protobuf/ptypes"
11+
"github.com/lyft/flyteidl/gen/pb-go/flyteidl/core"
12+
"github.com/lyft/flytestdlib/logger"
1213
)
1314

1415
var inputFileRegex = regexp.MustCompile(`(?i){{\s*[\.$]Input\s*}}`)

0 commit comments

Comments
 (0)