fix(task): prevent duplicate Slurm job submission on backend restart

RecoverStuckTasks now skips tasks that already have a slurm_job_id,
and ProcessTask adds a guard before the submitting step to prevent
re-submission even if a task is incorrectly re-enqueued.

Also deprecates POST /api/v1/jobs/submit endpoint (replaced by POST /tasks)
and comments out related handlers and tests.
This commit is contained in:
dailz
2026-04-21 10:57:38 +08:00
parent 4fd331ebd8
commit b90942de77
8 changed files with 61 additions and 35 deletions

View File

@@ -10,7 +10,7 @@ import (
)
type JobHandler interface {
SubmitJob(c *gin.Context)
// SubmitJob(c *gin.Context) // [已弃用] 已被 POST /tasks 取代
GetJobs(c *gin.Context)
GetJobHistory(c *gin.Context)
GetJob(c *gin.Context)
@@ -73,7 +73,7 @@ func NewRouter(jobH JobHandler, clusterH ClusterHandler, appH ApplicationHandler
v1 := r.Group("/api/v1")
jobs := v1.Group("/jobs")
jobs.POST("/submit", jobH.SubmitJob)
// jobs.POST("/submit", jobH.SubmitJob) // [已弃用] 已被 POST /tasks 取代
jobs.GET("", jobH.GetJobs)
jobs.GET("/history", jobH.GetJobHistory)
jobs.GET("/:id", jobH.GetJob)
@@ -144,7 +144,7 @@ func NewTestRouter() *gin.Engine {
func registerPlaceholderRoutes(v1 *gin.RouterGroup) {
jobs := v1.Group("/jobs")
jobs.POST("/submit", notImplemented)
// jobs.POST("/submit", notImplemented) // [已弃用] 已被 POST /tasks 取代
jobs.GET("", notImplemented)
jobs.GET("/history", notImplemented)
jobs.GET("/:id", notImplemented)

View File

@@ -17,7 +17,7 @@ func TestAllRoutesRegistered(t *testing.T) {
method string
path string
}{
{"POST", "/api/v1/jobs/submit"},
// {"POST", "/api/v1/jobs/submit"}, // [已弃用] 已被 POST /tasks 取代
{"GET", "/api/v1/jobs"},
{"GET", "/api/v1/jobs/history"},
{"GET", "/api/v1/jobs/:id"},