fix(task): return empty string for unknown/empty Slurm states instead of defaulting to running

mapSlurmStateToTaskStatus previously defaulted to 'running' for empty
state arrays and unrecognized states. This was too aggressive — treating
unknown as actively running could cause incorrect status updates when
Slurm returns unexpected or empty state data.

Now empty/unknown states return an empty string, and refreshTaskStatus
skips the update in that case.
This commit is contained in:
dailz
2026-04-21 13:23:40 +08:00
parent 43329d2333
commit f13377ca7d
2 changed files with 17 additions and 15 deletions

View File

@@ -497,7 +497,7 @@ func uniqueInt64s(ids []int64) []int64 {
func (s *TaskService) mapSlurmStateToTaskStatus(slurmState []string) string {
if len(slurmState) == 0 {
return model.TaskStatusRunning
return ""
}
state := strings.ToUpper(slurmState[0])
@@ -511,7 +511,8 @@ func (s *TaskService) mapSlurmStateToTaskStatus(slurmState []string) string {
case "FAILED", "CANCELLED", "TIMEOUT", "NODE_FAIL", "OUT_OF_MEMORY", "PREEMPTED":
return model.TaskStatusFailed
default:
return model.TaskStatusRunning
s.logger.Warn("unrecognized slurm state, skipping update", zap.String("state", state))
return ""
}
}
@@ -542,15 +543,16 @@ func (s *TaskService) refreshTaskStatus(ctx context.Context, taskID int64) error
}
newStatus := s.mapSlurmStateToTaskStatus(jobResp.State)
if newStatus != task.Status {
s.logger.Info("updating task status from slurm",
zap.Int64("task_id", taskID),
zap.String("old_status", task.Status),
zap.String("new_status", newStatus),
)
return s.taskStore.UpdateStatus(ctx, taskID, newStatus, "")
if newStatus == "" || newStatus == task.Status {
return nil
}
return nil
s.logger.Info("updating task status from slurm",
zap.Int64("task_id", taskID),
zap.String("old_status", task.Status),
zap.String("new_status", newStatus),
)
return s.taskStore.UpdateStatus(ctx, taskID, newStatus, "")
}
func (s *TaskService) RefreshStaleTasks(ctx context.Context) error {

View File

@@ -97,7 +97,7 @@ func TestTaskService_MapSlurmState_AllStates(t *testing.T) {
{[]string{"OUT_OF_MEMORY"}, model.TaskStatusFailed},
{[]string{"PREEMPTED"}, model.TaskStatusFailed},
{[]string{"SPECIAL_EXIT"}, model.TaskStatusRunning},
{[]string{"unknown_state"}, model.TaskStatusRunning},
{[]string{"unknown_state"}, ""},
{[]string{"pending"}, model.TaskStatusQueued},
{[]string{"Running"}, model.TaskStatusRunning},
}
@@ -115,13 +115,13 @@ func TestTaskService_MapSlurmState_Empty(t *testing.T) {
defer env.close()
got := env.svc.mapSlurmStateToTaskStatus([]string{})
if got != model.TaskStatusRunning {
t.Errorf("mapSlurmStateToTaskStatus([]) = %q, want %q", got, model.TaskStatusRunning)
if got != "" {
t.Errorf("mapSlurmStateToTaskStatus([]) = %q, want empty string", got)
}
got = env.svc.mapSlurmStateToTaskStatus(nil)
if got != model.TaskStatusRunning {
t.Errorf("mapSlurmStateToTaskStatus(nil) = %q, want %q", got, model.TaskStatusRunning)
if got != "" {
t.Errorf("mapSlurmStateToTaskStatus(nil) = %q, want empty string", got)
}
}