fix(task): prevent duplicate Slurm job submission on backend restart
RecoverStuckTasks now skips tasks that already have a slurm_job_id, and ProcessTask adds a guard before the submitting step to prevent re-submission even if a task is incorrectly re-enqueued. Also deprecates POST /api/v1/jobs/submit endpoint (replaced by POST /tasks) and comments out related handlers and tests.
This commit is contained in:
@@ -22,29 +22,31 @@ func NewJobHandler(jobSvc *service.JobService, logger *zap.Logger) *JobHandler {
|
||||
return &JobHandler{jobSvc: jobSvc, logger: logger}
|
||||
}
|
||||
|
||||
// [已弃用] SubmitJob 已被 POST /tasks 取代。
|
||||
// 保留方法体以防需要回滚。
|
||||
// SubmitJob handles POST /api/v1/jobs/submit.
|
||||
func (h *JobHandler) SubmitJob(c *gin.Context) {
|
||||
var req model.SubmitJobRequest
|
||||
if err := c.ShouldBindJSON(&req); err != nil {
|
||||
h.logger.Warn("bad request", zap.String("method", "SubmitJob"), zap.String("error", "invalid request body"))
|
||||
server.BadRequest(c, "invalid request body")
|
||||
return
|
||||
}
|
||||
if req.Script == "" {
|
||||
h.logger.Warn("bad request", zap.String("method", "SubmitJob"), zap.String("error", "script is required"))
|
||||
server.BadRequest(c, "script is required")
|
||||
return
|
||||
}
|
||||
|
||||
resp, err := h.jobSvc.SubmitJob(c.Request.Context(), &req)
|
||||
if err != nil {
|
||||
h.logger.Error("handler error", zap.String("method", "SubmitJob"), zap.Int("status", http.StatusBadGateway), zap.Error(err))
|
||||
server.ErrorWithStatus(c, http.StatusBadGateway, "slurm error: "+err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
server.Created(c, resp)
|
||||
}
|
||||
// func (h *JobHandler) SubmitJob(c *gin.Context) {
|
||||
// var req model.SubmitJobRequest
|
||||
// if err := c.ShouldBindJSON(&req); err != nil {
|
||||
// h.logger.Warn("bad request", zap.String("method", "SubmitJob"), zap.String("error", "invalid request body"))
|
||||
// server.BadRequest(c, "invalid request body")
|
||||
// return
|
||||
// }
|
||||
// if req.Script == "" {
|
||||
// h.logger.Warn("bad request", zap.String("method", "SubmitJob"), zap.String("error", "script is required"))
|
||||
// server.BadRequest(c, "script is required")
|
||||
// return
|
||||
// }
|
||||
//
|
||||
// resp, err := h.jobSvc.SubmitJob(c.Request.Context(), &req)
|
||||
// if err != nil {
|
||||
// h.logger.Error("handler error", zap.String("method", "SubmitJob"), zap.Int("status", http.StatusBadGateway), zap.Error(err))
|
||||
// server.ErrorWithStatus(c, http.StatusBadGateway, "slurm error: "+err.Error())
|
||||
// return
|
||||
// }
|
||||
//
|
||||
// server.Created(c, resp)
|
||||
// }
|
||||
|
||||
// GetJobs handles GET /api/v1/jobs with pagination.
|
||||
func (h *JobHandler) GetJobs(c *gin.Context) {
|
||||
|
||||
@@ -1,9 +1,7 @@
|
||||
package handler
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
@@ -23,7 +21,7 @@ func setupJobRouter(h *JobHandler) *gin.Engine {
|
||||
v1 := r.Group("/api/v1")
|
||||
jobs := v1.Group("/jobs")
|
||||
{
|
||||
jobs.POST("/submit", h.SubmitJob)
|
||||
// jobs.POST("/submit", h.SubmitJob) // [已弃用] 已被 POST /tasks 取代
|
||||
jobs.GET("", h.GetJobs)
|
||||
jobs.GET("/history", h.GetJobHistory)
|
||||
jobs.GET("/:id", h.GetJob)
|
||||
@@ -61,6 +59,8 @@ func handlerLogs(logs *observer.ObservedLogs) []observer.LoggedEntry {
|
||||
return handler
|
||||
}
|
||||
|
||||
// [已弃用] SubmitJob 相关测试已被禁用,该接口已被 POST /tasks 取代。
|
||||
/*
|
||||
func TestSubmitJob_Success(t *testing.T) {
|
||||
mux := http.NewServeMux()
|
||||
mux.HandleFunc("/slurm/v0.0.40/job/submit", func(w http.ResponseWriter, r *http.Request) {
|
||||
@@ -171,6 +171,9 @@ func TestSubmitJob_SlurmError(t *testing.T) {
|
||||
t.Fatalf("expected 502, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
// --- Logging verification tests ---
|
||||
|
||||
func TestGetJobs_Success(t *testing.T) {
|
||||
mux := http.NewServeMux()
|
||||
@@ -462,6 +465,7 @@ func TestGetJobHistory_DefaultPagination(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
func TestSubmitJob_InvalidBody(t *testing.T) {
|
||||
mux := http.NewServeMux()
|
||||
srv, handler := setupJobHandler(mux)
|
||||
@@ -479,9 +483,11 @@ func TestSubmitJob_InvalidBody(t *testing.T) {
|
||||
t.Fatalf("expected 400, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
// --- Logging verification tests ---
|
||||
|
||||
/*
|
||||
func TestSubmitJob_InvalidBody_LogsWarn(t *testing.T) {
|
||||
mux := http.NewServeMux()
|
||||
srv, handler, logs := setupJobHandlerWithObserver(mux)
|
||||
@@ -614,6 +620,7 @@ func TestSubmitJob_Success_NoHandlerLogs(t *testing.T) {
|
||||
t.Errorf("expected no handler log entries on success, got %d", len(hLogs))
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
func TestGetJobs_Error_LogsError(t *testing.T) {
|
||||
mux := http.NewServeMux()
|
||||
|
||||
Reference in New Issue
Block a user