Skip to content

Commit 3c88993

Browse files
craig[bot]dt
andcommitted
Merge #158350
158350: jobs: record reason in job status during for-cause pauses r=dt a=dt When pausing for a specified reason, that reason should be shown as the secondary, informative status of the job aka its 'running_status'. This is done by setting the job's status using StatusStorage. Furthermore, a status set in pause-requested should be kept when the job moves to paused, unlike a status from running, as the status in pause-requested explains why the job moved to paused via the pause-request. Release note (ops change): Jobs that are paused due to a specific reason, including jobs which pause themselves when encountering errors such as running out of disk space, now record that reason in their displayed status field of SHOW JOBS. Epic: none. Co-authored-by: David Taylor <davidt@davidt.io>
2 parents eff8531 + 7870515 commit 3c88993

File tree

2 files changed

+18
-3
lines changed

2 files changed

+18
-3
lines changed

pkg/jobs/job_info_storage_test.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -896,9 +896,15 @@ func TestJobPauseStateTransitionsRecorded(t *testing.T) {
896896
require.NoError(t, err)
897897
jobutils.WaitForJobToRun(t, sql, job.ID())
898898

899-
sql.Exec(t, "PAUSE JOB $1", job.ID())
899+
sql.Exec(t, "PAUSE JOB $1 WITH REASON = 'test pause reason'", job.ID())
900900
jobutils.WaitForJobToPause(t, sql, job.ID())
901901

902+
// Verify the running_status shows the pause reason via SHOW JOB.
903+
sql.CheckQueryResults(t,
904+
fmt.Sprintf("SELECT running_status FROM [SHOW JOB %d]", job.ID()),
905+
[][]string{{"pausing: test pause reason"}},
906+
)
907+
902908
sql.Exec(t, "RESUME JOB $1", job.ID())
903909
jobutils.WaitForJobToRun(t, sql, job.ID())
904910

pkg/jobs/update.go

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -243,8 +243,12 @@ WHERE id = $1
243243
return err
244244
}
245245
// If we are changing state, we should clear out the status, unless
246-
// we are about to set it to something instead.
247-
if progress == nil || progress.StatusMessage == "" {
246+
// we are about to set it to something instead or if we are coming from a
247+
// pause requested state, in which case we already cleared it out once when
248+
// we entered the pause requested state and may have since set it to a pause
249+
// reason which we now want to preserve.
250+
noNewStatus := progress == nil || progress.StatusMessage == ""
251+
if noNewStatus && ju.md.State != StatePauseRequested {
248252
if err := j.StatusStorage().Clear(ctx, u.txn); err != nil {
249253
return err
250254
}
@@ -403,6 +407,11 @@ func (ju *JobUpdater) PauseRequestedWithFunc(
403407
return err
404408
}
405409
}
410+
if reason != "" {
411+
if err := StatusStorage(md.ID).Set(ctx, txn, fmt.Sprintf("pausing: %s", reason)); err != nil {
412+
return err
413+
}
414+
}
406415
ju.UpdateState(StatePauseRequested)
407416
md.Payload.PauseReason = reason
408417
ju.UpdatePayload(md.Payload)

0 commit comments

Comments
 (0)