Files
hpc/internal/service/job_service_test.go
dailz d9ca9233b3 fix(service): correct CPU/memory mapping and add TRES/memory_used extraction
- Map CPUs to CpusPerTask (not MinimumCpus) for consistent SlurmDBD history

- Add Set:true to memory Uint64NoVal on submission

- Filter number=0 in mapUint64NoValToInt64 to avoid false zeros

- Extract peak memory from Steps.Tres.Requested.Max across all steps

- Add formatTresList, parseGresDetail, extractMemoryFromSteps helpers

- Update mapJobInfo and mapSlurmdbJob with new field mappings

Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-04-20 17:10:19 +08:00

1222 lines
35 KiB
Go

package service
import (
"context"
"encoding/json"
"net/http"
"net/http/httptest"
"strings"
"testing"
"gcy_hpc_server/internal/model"
"gcy_hpc_server/internal/slurm"
"go.uber.org/zap"
"go.uber.org/zap/zapcore"
"go.uber.org/zap/zaptest/observer"
)
func mockJobServer(handler http.HandlerFunc) (*slurm.Client, func()) {
srv := httptest.NewServer(handler)
client, _ := slurm.NewClient(srv.URL, srv.Client())
return client, srv.Close
}
func TestSubmitJob(t *testing.T) {
jobID := int32(123)
client, cleanup := mockJobServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodPost {
t.Errorf("expected POST, got %s", r.Method)
}
if r.URL.Path != "/slurm/v0.0.40/job/submit" {
t.Errorf("unexpected path: %s", r.URL.Path)
}
var body slurm.JobSubmitReq
if err := json.NewDecoder(r.Body).Decode(&body); err != nil {
t.Fatalf("decode body: %v", err)
}
if body.Job == nil || body.Job.Script == nil || *body.Job.Script != "#!/bin/bash\necho hello" {
t.Errorf("unexpected script in request body")
}
resp := slurm.OpenapiJobSubmitResponse{
Result: &slurm.JobSubmitResponseMsg{
JobID: &jobID,
},
}
json.NewEncoder(w).Encode(resp)
}))
defer cleanup()
svc := NewJobService(client, zap.NewNop())
resp, err := svc.SubmitJob(context.Background(), &model.SubmitJobRequest{
Script: "#!/bin/bash\necho hello",
Partition: "normal",
QOS: "high",
JobName: "test-job",
CPUs: 4,
TimeLimit: "60",
})
if err != nil {
t.Fatalf("SubmitJob: %v", err)
}
if resp.JobID != 123 {
t.Errorf("expected JobID 123, got %d", resp.JobID)
}
}
func TestSubmitJob_WithOptionalFields(t *testing.T) {
client, cleanup := mockJobServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
var body slurm.JobSubmitReq
if err := json.NewDecoder(r.Body).Decode(&body); err != nil {
t.Fatalf("decode body: %v", err)
}
if body.Job == nil {
t.Fatal("job desc is nil")
}
if body.Job.Partition != nil {
t.Error("expected partition nil for empty string")
}
if body.Job.CpusPerTask != nil {
t.Error("expected cpus_per_task nil when CPUs=0")
}
jobID := int32(456)
resp := slurm.OpenapiJobSubmitResponse{
Result: &slurm.JobSubmitResponseMsg{JobID: &jobID},
}
json.NewEncoder(w).Encode(resp)
}))
defer cleanup()
svc := NewJobService(client, zap.NewNop())
resp, err := svc.SubmitJob(context.Background(), &model.SubmitJobRequest{
Script: "echo hi",
})
if err != nil {
t.Fatalf("SubmitJob: %v", err)
}
if resp.JobID != 456 {
t.Errorf("expected JobID 456, got %d", resp.JobID)
}
}
func TestSubmitJob_Error(t *testing.T) {
client, cleanup := mockJobServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusInternalServerError)
w.Write([]byte(`{"error":"internal"}`))
}))
defer cleanup()
svc := NewJobService(client, zap.NewNop())
_, err := svc.SubmitJob(context.Background(), &model.SubmitJobRequest{
Script: "echo fail",
})
if err == nil {
t.Fatal("expected error, got nil")
}
}
func TestGetJobs(t *testing.T) {
jobID := int32(100)
name := "my-job"
partition := "gpu"
ts := int64(1700000000)
nodes := "node01"
client, cleanup := mockJobServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodGet {
t.Errorf("expected GET, got %s", r.Method)
}
resp := slurm.OpenapiJobInfoResp{
Jobs: slurm.JobInfoMsg{
{
JobID: &jobID,
Name: &name,
JobState: []string{"RUNNING"},
Partition: &partition,
SubmitTime: &slurm.Uint64NoVal{Number: &ts},
StartTime: &slurm.Uint64NoVal{Number: &ts},
EndTime: &slurm.Uint64NoVal{Number: &ts},
Nodes: &nodes,
},
},
}
json.NewEncoder(w).Encode(resp)
}))
defer cleanup()
svc := NewJobService(client, zap.NewNop())
result, err := svc.GetJobs(context.Background(), &model.JobListQuery{Page: 1, PageSize: 20})
if err != nil {
t.Fatalf("GetJobs: %v", err)
}
if result.Total != 1 {
t.Fatalf("expected total 1, got %d", result.Total)
}
if len(result.Jobs) != 1 {
t.Fatalf("expected 1 job, got %d", len(result.Jobs))
}
j := result.Jobs[0]
if j.JobID != 100 {
t.Errorf("expected JobID 100, got %d", j.JobID)
}
if j.Name != "my-job" {
t.Errorf("expected Name my-job, got %s", j.Name)
}
if len(j.State) != 1 || j.State[0] != "RUNNING" {
t.Errorf("expected State [RUNNING], got %v", j.State)
}
if j.Partition != "gpu" {
t.Errorf("expected Partition gpu, got %s", j.Partition)
}
if j.SubmitTime == nil || *j.SubmitTime != ts {
t.Errorf("expected SubmitTime %d, got %v", ts, j.SubmitTime)
}
if j.Nodes != "node01" {
t.Errorf("expected Nodes node01, got %s", j.Nodes)
}
if result.Page != 1 {
t.Errorf("expected Page 1, got %d", result.Page)
}
if result.PageSize != 20 {
t.Errorf("expected PageSize 20, got %d", result.PageSize)
}
}
func TestGetJob(t *testing.T) {
jobID := int32(200)
name := "single-job"
client, cleanup := mockJobServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
resp := slurm.OpenapiJobInfoResp{
Jobs: slurm.JobInfoMsg{
{
JobID: &jobID,
Name: &name,
},
},
}
json.NewEncoder(w).Encode(resp)
}))
defer cleanup()
svc := NewJobService(client, zap.NewNop())
job, err := svc.GetJob(context.Background(), "200")
if err != nil {
t.Fatalf("GetJob: %v", err)
}
if job == nil {
t.Fatal("expected job, got nil")
}
if job.JobID != 200 {
t.Errorf("expected JobID 200, got %d", job.JobID)
}
}
func TestGetJob_NotFound(t *testing.T) {
client, cleanup := mockJobServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
resp := slurm.OpenapiJobInfoResp{Jobs: slurm.JobInfoMsg{}}
json.NewEncoder(w).Encode(resp)
}))
defer cleanup()
svc := NewJobService(client, zap.NewNop())
job, err := svc.GetJob(context.Background(), "999")
if err != nil {
t.Fatalf("GetJob: %v", err)
}
if job != nil {
t.Errorf("expected nil for not found, got %+v", job)
}
}
func TestCancelJob(t *testing.T) {
client, cleanup := mockJobServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodDelete {
t.Errorf("expected DELETE, got %s", r.Method)
}
resp := slurm.OpenapiResp{}
json.NewEncoder(w).Encode(resp)
}))
defer cleanup()
svc := NewJobService(client, zap.NewNop())
err := svc.CancelJob(context.Background(), "300")
if err != nil {
t.Fatalf("CancelJob: %v", err)
}
}
func TestCancelJob_Error(t *testing.T) {
client, cleanup := mockJobServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusNotFound)
w.Write([]byte(`not found`))
}))
defer cleanup()
svc := NewJobService(client, zap.NewNop())
err := svc.CancelJob(context.Background(), "999")
if err == nil {
t.Fatal("expected error, got nil")
}
}
func TestGetJobHistory(t *testing.T) {
jobID1 := int32(10)
jobID2 := int32(20)
jobID3 := int32(30)
name1 := "hist-1"
name2 := "hist-2"
name3 := "hist-3"
submission1 := int64(1700000000)
submission2 := int64(1700001000)
submission3 := int64(1700002000)
partition := "normal"
client, cleanup := mockJobServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodGet {
t.Errorf("expected GET, got %s", r.Method)
}
users := r.URL.Query().Get("users")
if users != "testuser" {
t.Errorf("expected users=testuser, got %s", users)
}
resp := slurm.OpenapiSlurmdbdJobsResp{
Jobs: slurm.JobList{
{
JobID: &jobID1,
Name: &name1,
Partition: &partition,
State: &slurm.JobState{Current: []string{"COMPLETED"}},
Time: &slurm.JobTime{Submission: &submission1},
},
{
JobID: &jobID2,
Name: &name2,
Partition: &partition,
State: &slurm.JobState{Current: []string{"FAILED"}},
Time: &slurm.JobTime{Submission: &submission2},
},
{
JobID: &jobID3,
Name: &name3,
Partition: &partition,
State: &slurm.JobState{Current: []string{"CANCELLED"}},
Time: &slurm.JobTime{Submission: &submission3},
},
},
}
json.NewEncoder(w).Encode(resp)
}))
defer cleanup()
svc := NewJobService(client, zap.NewNop())
result, err := svc.GetJobHistory(context.Background(), &model.JobHistoryQuery{
Users: "testuser",
Page: 1,
PageSize: 2,
})
if err != nil {
t.Fatalf("GetJobHistory: %v", err)
}
if result.Total != 3 {
t.Errorf("expected Total 3, got %d", result.Total)
}
if result.Page != 1 {
t.Errorf("expected Page 1, got %d", result.Page)
}
if result.PageSize != 2 {
t.Errorf("expected PageSize 2, got %d", result.PageSize)
}
if len(result.Jobs) != 2 {
t.Fatalf("expected 2 jobs on page 1, got %d", len(result.Jobs))
}
if result.Jobs[0].JobID != 10 {
t.Errorf("expected first job ID 10, got %d", result.Jobs[0].JobID)
}
if result.Jobs[1].JobID != 20 {
t.Errorf("expected second job ID 20, got %d", result.Jobs[1].JobID)
}
if len(result.Jobs[0].State) != 1 || result.Jobs[0].State[0] != "COMPLETED" {
t.Errorf("expected state [COMPLETED], got %v", result.Jobs[0].State)
}
}
func TestGetJobHistory_Page2(t *testing.T) {
jobID1 := int32(10)
jobID2 := int32(20)
name1 := "a"
name2 := "b"
client, cleanup := mockJobServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
resp := slurm.OpenapiSlurmdbdJobsResp{
Jobs: slurm.JobList{
{JobID: &jobID1, Name: &name1},
{JobID: &jobID2, Name: &name2},
},
}
json.NewEncoder(w).Encode(resp)
}))
defer cleanup()
svc := NewJobService(client, zap.NewNop())
result, err := svc.GetJobHistory(context.Background(), &model.JobHistoryQuery{
Page: 2,
PageSize: 1,
})
if err != nil {
t.Fatalf("GetJobHistory: %v", err)
}
if result.Total != 2 {
t.Errorf("expected Total 2, got %d", result.Total)
}
if len(result.Jobs) != 1 {
t.Fatalf("expected 1 job on page 2, got %d", len(result.Jobs))
}
if result.Jobs[0].JobID != 20 {
t.Errorf("expected job ID 20, got %d", result.Jobs[0].JobID)
}
}
func TestGetJobHistory_DefaultPagination(t *testing.T) {
client, cleanup := mockJobServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
resp := slurm.OpenapiSlurmdbdJobsResp{Jobs: slurm.JobList{}}
json.NewEncoder(w).Encode(resp)
}))
defer cleanup()
svc := NewJobService(client, zap.NewNop())
result, err := svc.GetJobHistory(context.Background(), &model.JobHistoryQuery{})
if err != nil {
t.Fatalf("GetJobHistory: %v", err)
}
if result.Page != 1 {
t.Errorf("expected default page 1, got %d", result.Page)
}
if result.PageSize != 20 {
t.Errorf("expected default pageSize 20, got %d", result.PageSize)
}
}
func TestGetJobHistory_QueryMapping(t *testing.T) {
client, cleanup := mockJobServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
q := r.URL.Query()
if v := q.Get("account"); v != "proj1" {
t.Errorf("expected account=proj1, got %s", v)
}
if v := q.Get("partition"); v != "gpu" {
t.Errorf("expected partition=gpu, got %s", v)
}
if v := q.Get("state"); v != "COMPLETED" {
t.Errorf("expected state=COMPLETED, got %s", v)
}
if v := q.Get("job_name"); v != "myjob" {
t.Errorf("expected job_name=myjob, got %s", v)
}
if v := q.Get("start_time"); v != "1700000000" {
t.Errorf("expected start_time=1700000000, got %s", v)
}
if v := q.Get("end_time"); v != "1700099999" {
t.Errorf("expected end_time=1700099999, got %s", v)
}
resp := slurm.OpenapiSlurmdbdJobsResp{Jobs: slurm.JobList{}}
json.NewEncoder(w).Encode(resp)
}))
defer cleanup()
svc := NewJobService(client, zap.NewNop())
_, err := svc.GetJobHistory(context.Background(), &model.JobHistoryQuery{
Users: "testuser",
Account: "proj1",
Partition: "gpu",
State: "COMPLETED",
JobName: "myjob",
StartTime: "1700000000",
EndTime: "1700099999",
})
if err != nil {
t.Fatalf("GetJobHistory: %v", err)
}
}
func TestGetJobHistory_Error(t *testing.T) {
client, cleanup := mockJobServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusInternalServerError)
w.Write([]byte(`{"error":"db down"}`))
}))
defer cleanup()
svc := NewJobService(client, zap.NewNop())
_, err := svc.GetJobHistory(context.Background(), &model.JobHistoryQuery{})
if err == nil {
t.Fatal("expected error, got nil")
}
}
func TestMapJobInfo_ExitCode(t *testing.T) {
returnCode := int64(2)
ji := &slurm.JobInfo{
ExitCode: &slurm.ProcessExitCodeVerbose{
ReturnCode: &slurm.Uint32NoVal{Number: &returnCode},
},
}
resp := mapJobInfo(ji)
if resp.ExitCode == nil || *resp.ExitCode != 2 {
t.Errorf("expected exit code 2, got %v", resp.ExitCode)
}
}
func TestMapSlurmdbJob_NilFields(t *testing.T) {
j := &slurm.Job{}
resp := mapSlurmdbJob(j)
if resp.JobID != 0 {
t.Errorf("expected JobID 0, got %d", resp.JobID)
}
if resp.State != nil {
t.Errorf("expected nil State, got %v", resp.State)
}
if resp.SubmitTime != nil {
t.Errorf("expected nil SubmitTime, got %v", resp.SubmitTime)
}
}
// ---------------------------------------------------------------------------
// Structured logging tests using zaptest/observer
// ---------------------------------------------------------------------------
func newJobServiceWithObserver(srv *httptest.Server) (*JobService, *observer.ObservedLogs) {
core, recorded := observer.New(zapcore.DebugLevel)
l := zap.New(core)
client, _ := slurm.NewClient(srv.URL, srv.Client())
return NewJobService(client, l), recorded
}
func TestJobService_SubmitJob_SuccessLog(t *testing.T) {
jobID := int32(789)
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
resp := slurm.OpenapiJobSubmitResponse{
Result: &slurm.JobSubmitResponseMsg{JobID: &jobID},
}
json.NewEncoder(w).Encode(resp)
}))
defer srv.Close()
svc, recorded := newJobServiceWithObserver(srv)
_, err := svc.SubmitJob(context.Background(), &model.SubmitJobRequest{
Script: "echo hi",
JobName: "log-test-job",
})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
entries := recorded.All()
if len(entries) != 3 {
t.Fatalf("expected 3 log entries, got %d", len(entries))
}
if entries[2].Level != zapcore.InfoLevel {
t.Errorf("expected InfoLevel, got %v", entries[2].Level)
}
fields := entries[2].ContextMap()
if fields["job_name"] != "log-test-job" {
t.Errorf("expected job_name=log-test-job, got %v", fields["job_name"])
}
gotJobID, ok := fields["job_id"]
if !ok {
t.Fatal("expected job_id field in log entry")
}
if gotJobID != int32(789) && gotJobID != int64(789) {
t.Errorf("expected job_id=789, got %v (%T)", gotJobID, gotJobID)
}
}
func TestJobService_SubmitJob_ErrorLog(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusInternalServerError)
w.Write([]byte(`{"error":"internal"}`))
}))
defer srv.Close()
svc, recorded := newJobServiceWithObserver(srv)
_, err := svc.SubmitJob(context.Background(), &model.SubmitJobRequest{Script: "echo fail"})
if err == nil {
t.Fatal("expected error, got nil")
}
entries := recorded.All()
if len(entries) != 3 {
t.Fatalf("expected 3 log entries, got %d", len(entries))
}
if entries[2].Level != zapcore.ErrorLevel {
t.Errorf("expected ErrorLevel, got %v", entries[2].Level)
}
fields := entries[2].ContextMap()
if fields["operation"] != "submit" {
t.Errorf("expected operation=submit, got %v", fields["operation"])
}
if _, ok := fields["error"]; !ok {
t.Error("expected error field in log entry")
}
}
func TestJobService_CancelJob_SuccessLog(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
resp := slurm.OpenapiResp{}
json.NewEncoder(w).Encode(resp)
}))
defer srv.Close()
svc, recorded := newJobServiceWithObserver(srv)
err := svc.CancelJob(context.Background(), "555")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
entries := recorded.All()
if len(entries) != 3 {
t.Fatalf("expected 3 log entries, got %d", len(entries))
}
if entries[2].Level != zapcore.InfoLevel {
t.Errorf("expected InfoLevel, got %v", entries[2].Level)
}
fields := entries[2].ContextMap()
if fields["job_id"] != "555" {
t.Errorf("expected job_id=555, got %v", fields["job_id"])
}
}
func TestJobService_CancelJob_ErrorLog(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusNotFound)
w.Write([]byte(`not found`))
}))
defer srv.Close()
svc, recorded := newJobServiceWithObserver(srv)
err := svc.CancelJob(context.Background(), "999")
if err == nil {
t.Fatal("expected error, got nil")
}
entries := recorded.All()
if len(entries) != 3 {
t.Fatalf("expected 3 log entries, got %d", len(entries))
}
if entries[2].Level != zapcore.ErrorLevel {
t.Errorf("expected ErrorLevel, got %v", entries[2].Level)
}
fields := entries[2].ContextMap()
if fields["operation"] != "cancel" {
t.Errorf("expected operation=cancel, got %v", fields["operation"])
}
if fields["job_id"] != "999" {
t.Errorf("expected job_id=999, got %v", fields["job_id"])
}
if _, ok := fields["error"]; !ok {
t.Error("expected error field in log entry")
}
}
func TestJobService_GetJobs_ErrorLog(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusInternalServerError)
w.Write([]byte(`{"error":"down"}`))
}))
defer srv.Close()
svc, recorded := newJobServiceWithObserver(srv)
_, err := svc.GetJobs(context.Background(), &model.JobListQuery{Page: 1, PageSize: 20})
if err == nil {
t.Fatal("expected error, got nil")
}
entries := recorded.All()
if len(entries) != 3 {
t.Fatalf("expected 3 log entries, got %d", len(entries))
}
if entries[2].Level != zapcore.ErrorLevel {
t.Errorf("expected ErrorLevel, got %v", entries[2].Level)
}
fields := entries[2].ContextMap()
if fields["operation"] != "get_jobs" {
t.Errorf("expected operation=get_jobs, got %v", fields["operation"])
}
if _, ok := fields["error"]; !ok {
t.Error("expected error field in log entry")
}
}
func TestJobService_GetJob_ErrorLog(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusInternalServerError)
w.Write([]byte(`{"error":"down"}`))
}))
defer srv.Close()
svc, recorded := newJobServiceWithObserver(srv)
_, err := svc.GetJob(context.Background(), "200")
if err == nil {
t.Fatal("expected error, got nil")
}
entries := recorded.All()
if len(entries) != 3 {
t.Fatalf("expected 3 log entries, got %d", len(entries))
}
if entries[2].Level != zapcore.ErrorLevel {
t.Errorf("expected ErrorLevel, got %v", entries[2].Level)
}
fields := entries[2].ContextMap()
if fields["operation"] != "get_job" {
t.Errorf("expected operation=get_job, got %v", fields["operation"])
}
if fields["job_id"] != "200" {
t.Errorf("expected job_id=200, got %v", fields["job_id"])
}
if _, ok := fields["error"]; !ok {
t.Error("expected error field in log entry")
}
}
func TestJobService_GetJobHistory_ErrorLog(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusInternalServerError)
w.Write([]byte(`{"error":"db down"}`))
}))
defer srv.Close()
svc, recorded := newJobServiceWithObserver(srv)
_, err := svc.GetJobHistory(context.Background(), &model.JobHistoryQuery{})
if err == nil {
t.Fatal("expected error, got nil")
}
entries := recorded.All()
if len(entries) != 3 {
t.Fatalf("expected 3 log entries, got %d", len(entries))
}
if entries[2].Level != zapcore.ErrorLevel {
t.Errorf("expected ErrorLevel, got %v", entries[2].Level)
}
fields := entries[2].ContextMap()
if fields["operation"] != "get_job_history" {
t.Errorf("expected operation=get_job_history, got %v", fields["operation"])
}
if _, ok := fields["error"]; !ok {
t.Error("expected error field in log entry")
}
}
// ---------------------------------------------------------------------------
// Fallback to SlurmDBD history tests
// ---------------------------------------------------------------------------
func TestGetJob_FallbackToHistory_Found(t *testing.T) {
jobID := int32(198)
name := "hist-job"
ts := int64(1700000000)
client, cleanup := mockJobServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
switch r.URL.Path {
case "/slurm/v0.0.40/job/198":
w.WriteHeader(http.StatusNotFound)
json.NewEncoder(w).Encode(map[string]interface{}{
"errors": []map[string]interface{}{
{
"description": "Unable to query JobId=198",
"error_number": float64(2017),
"error": "Invalid job id specified",
"source": "_handle_job_get",
},
},
"jobs": []interface{}{},
})
case "/slurmdb/v0.0.40/job/198":
resp := slurm.OpenapiSlurmdbdJobsResp{
Jobs: slurm.JobList{
{
JobID: &jobID,
Name: &name,
State: &slurm.JobState{Current: []string{"COMPLETED"}},
Time: &slurm.JobTime{Submission: &ts, Start: &ts, End: &ts},
},
},
}
json.NewEncoder(w).Encode(resp)
default:
w.WriteHeader(http.StatusNotFound)
}
}))
defer cleanup()
svc := NewJobService(client, zap.NewNop())
job, err := svc.GetJob(context.Background(), "198")
if err != nil {
t.Fatalf("GetJob: %v", err)
}
if job == nil {
t.Fatal("expected job, got nil")
}
if job.JobID != 198 {
t.Errorf("expected JobID 198, got %d", job.JobID)
}
if job.Name != "hist-job" {
t.Errorf("expected Name hist-job, got %s", job.Name)
}
}
func TestGetJob_FallbackToHistory_NotFound(t *testing.T) {
client, cleanup := mockJobServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusNotFound)
}))
defer cleanup()
svc := NewJobService(client, zap.NewNop())
job, err := svc.GetJob(context.Background(), "999")
if err != nil {
t.Fatalf("GetJob: %v", err)
}
if job != nil {
t.Errorf("expected nil, got %+v", job)
}
}
func TestGetJob_FallbackToHistory_HistoryError(t *testing.T) {
client, cleanup := mockJobServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
switch r.URL.Path {
case "/slurm/v0.0.40/job/500":
w.WriteHeader(http.StatusNotFound)
json.NewEncoder(w).Encode(map[string]interface{}{
"errors": []map[string]interface{}{
{
"description": "Unable to query JobId=500",
"error_number": float64(2017),
"error": "Invalid job id specified",
"source": "_handle_job_get",
},
},
"jobs": []interface{}{},
})
case "/slurmdb/v0.0.40/job/500":
w.WriteHeader(http.StatusInternalServerError)
w.Write([]byte(`{"errors":[{"error":"db error"}]}`))
default:
w.WriteHeader(http.StatusNotFound)
}
}))
defer cleanup()
svc := NewJobService(client, zap.NewNop())
job, err := svc.GetJob(context.Background(), "500")
if err == nil {
t.Fatal("expected error, got nil")
}
if job != nil {
t.Errorf("expected nil job, got %+v", job)
}
if !strings.Contains(err.Error(), "get job history") {
t.Errorf("expected error to contain 'get job history', got %s", err.Error())
}
}
// ---------------------------------------------------------------------------
// New scheduling field mapping tests
// ---------------------------------------------------------------------------
func TestSubmitJob_AllSchedulingFields(t *testing.T) {
jobID := int32(999)
// Prepare all 22 new scheduling field values
var (
memoryPerNode = int64(4096)
memoryPerCpu = int64(1024)
nodes = "2"
tasks = int32(4)
cpusPerTask = int32(2)
constraints = "gpu&fast"
reservation = "resv01"
account = "proj-alpha"
nice = int32(100)
mailType = "BEGIN,END,FAIL"
mailUser = "admin@example.com"
stdOut = "/tmp/job_%j.out"
stdErr = "/tmp/job_%j.err"
stdIn = "/dev/null"
reqNodes = "node01,node02"
exclNodes = "node03,node04"
beginTime = int64(1700000000)
deadline = int64(1700099999)
array = "1-10"
dependency = "afterok:123"
requeue = true
killOnNodeFail = true
)
client, cleanup := mockJobServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
var body slurm.JobSubmitReq
if err := json.NewDecoder(r.Body).Decode(&body); err != nil {
t.Fatalf("decode body: %v", err)
}
if body.Job == nil {
t.Fatal("job desc is nil")
}
j := body.Job
// --- Existing fields still work ---
if j.Script == nil || *j.Script != "#!/bin/bash\necho test" {
t.Errorf("Script mismatch: %v", j.Script)
}
if j.Partition == nil || *j.Partition != "normal" {
t.Errorf("Partition mismatch: %v", j.Partition)
}
if j.Qos == nil || *j.Qos != "high" {
t.Errorf("QOS mismatch: %v", j.Qos)
}
if j.Name == nil || *j.Name != "full-test" {
t.Errorf("Name mismatch: %v", j.Name)
}
// CPUs=8 maps to CpusPerTask, then overridden by explicit CpusPerTask=2
if j.CpusPerTask == nil || *j.CpusPerTask != cpusPerTask {
t.Errorf("CpusPerTask mismatch: got %v, want %d (explicit CpusPerTask overrides CPUs)", j.CpusPerTask, cpusPerTask)
}
if j.MinimumCpus != nil {
t.Errorf("MinimumCpus should be nil, got %v", j.MinimumCpus)
}
// --- 22 new scheduling fields ---
// MemoryPerNode → *Uint64NoVal
if j.MemoryPerNode == nil || j.MemoryPerNode.Set == nil || !*j.MemoryPerNode.Set || j.MemoryPerNode.Number == nil || *j.MemoryPerNode.Number != memoryPerNode {
t.Errorf("MemoryPerNode mismatch: %v", j.MemoryPerNode)
}
// MemoryPerCpu → *Uint64NoVal
if j.MemoryPerCpu == nil || j.MemoryPerCpu.Set == nil || !*j.MemoryPerCpu.Set || j.MemoryPerCpu.Number == nil || *j.MemoryPerCpu.Number != memoryPerCpu {
t.Errorf("MemoryPerCpu mismatch: %v", j.MemoryPerCpu)
}
// Nodes → *string
if j.Nodes == nil || *j.Nodes != nodes {
t.Errorf("Nodes mismatch: %v", j.Nodes)
}
// Tasks → *int32
if j.Tasks == nil || *j.Tasks != tasks {
t.Errorf("Tasks mismatch: got %v, want %d", j.Tasks, tasks)
}
// CpusPerTask → *int32
if j.CpusPerTask == nil || *j.CpusPerTask != cpusPerTask {
t.Errorf("CpusPerTask mismatch: got %v, want %d", j.CpusPerTask, cpusPerTask)
}
// Constraints → *string
if j.Constraints == nil || *j.Constraints != constraints {
t.Errorf("Constraints mismatch: %v", j.Constraints)
}
// Reservation → *string
if j.Reservation == nil || *j.Reservation != reservation {
t.Errorf("Reservation mismatch: %v", j.Reservation)
}
// Account → *string
if j.Account == nil || *j.Account != account {
t.Errorf("Account mismatch: %v", j.Account)
}
// Nice → *int32
if j.Nice == nil || *j.Nice != nice {
t.Errorf("Nice mismatch: got %v, want %d", j.Nice, nice)
}
// MailType → []string (comma-split)
if len(j.MailType) != 3 || j.MailType[0] != "BEGIN" || j.MailType[1] != "END" || j.MailType[2] != "FAIL" {
t.Errorf("MailType mismatch: %v", j.MailType)
}
// MailUser → *string
if j.MailUser == nil || *j.MailUser != mailUser {
t.Errorf("MailUser mismatch: %v", j.MailUser)
}
// StandardOutput → *string
if j.StandardOutput == nil || *j.StandardOutput != stdOut {
t.Errorf("StandardOutput mismatch: %v", j.StandardOutput)
}
// StandardError → *string
if j.StandardError == nil || *j.StandardError != stdErr {
t.Errorf("StandardError mismatch: %v", j.StandardError)
}
// StandardInput → *string
if j.StandardInput == nil || *j.StandardInput != stdIn {
t.Errorf("StandardInput mismatch: %v", j.StandardInput)
}
// RequiredNodes → CSVString ([]string)
if len(j.RequiredNodes) != 2 || j.RequiredNodes[0] != "node01" || j.RequiredNodes[1] != "node02" {
t.Errorf("RequiredNodes mismatch: %v", j.RequiredNodes)
}
// ExcludedNodes → CSVString ([]string)
if len(j.ExcludedNodes) != 2 || j.ExcludedNodes[0] != "node03" || j.ExcludedNodes[1] != "node04" {
t.Errorf("ExcludedNodes mismatch: %v", j.ExcludedNodes)
}
// BeginTime → *Uint64NoVal
if j.BeginTime == nil || j.BeginTime.Number == nil || *j.BeginTime.Number != beginTime {
t.Errorf("BeginTime mismatch: %v", j.BeginTime)
}
// Deadline → *int64 (NO wrapper)
if j.Deadline == nil || *j.Deadline != deadline {
t.Errorf("Deadline mismatch: %v", j.Deadline)
}
// Array → *string
if j.Array == nil || *j.Array != array {
t.Errorf("Array mismatch: %v", j.Array)
}
// Dependency → *string
if j.Dependency == nil || *j.Dependency != dependency {
t.Errorf("Dependency mismatch: %v", j.Dependency)
}
// Requeue → *bool
if j.Requeue == nil || *j.Requeue != requeue {
t.Errorf("Requeue mismatch: %v", j.Requeue)
}
// KillOnNodeFail → *bool
if j.KillOnNodeFail == nil || *j.KillOnNodeFail != killOnNodeFail {
t.Errorf("KillOnNodeFail mismatch: %v", j.KillOnNodeFail)
}
resp := slurm.OpenapiJobSubmitResponse{
Result: &slurm.JobSubmitResponseMsg{JobID: &jobID},
}
json.NewEncoder(w).Encode(resp)
}))
defer cleanup()
svc := NewJobService(client, zap.NewNop())
resp, err := svc.SubmitJob(context.Background(), &model.SubmitJobRequest{
Script: "#!/bin/bash\necho test",
Partition: "normal",
QOS: "high",
JobName: "full-test",
CPUs: 8,
TimeLimit: "60",
MemoryPerNode: &memoryPerNode,
MemoryPerCpu: &memoryPerCpu,
Nodes: &nodes,
Tasks: &tasks,
CpusPerTask: &cpusPerTask,
Constraints: &constraints,
Reservation: &reservation,
Account: &account,
Nice: &nice,
MailType: &mailType,
MailUser: &mailUser,
StandardOutput: &stdOut,
StandardError: &stdErr,
StandardInput: &stdIn,
RequiredNodes: &reqNodes,
ExcludedNodes: &exclNodes,
BeginTime: &beginTime,
Deadline: &deadline,
Array: &array,
Dependency: &dependency,
Requeue: &requeue,
KillOnNodeFail: &killOnNodeFail,
})
if err != nil {
t.Fatalf("SubmitJob: %v", err)
}
if resp.JobID != 999 {
t.Errorf("expected JobID 999, got %d", resp.JobID)
}
}
func TestSubmitJob_BackwardCompat(t *testing.T) {
jobID := int32(555)
client, cleanup := mockJobServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
var body slurm.JobSubmitReq
if err := json.NewDecoder(r.Body).Decode(&body); err != nil {
t.Fatalf("decode body: %v", err)
}
if body.Job == nil {
t.Fatal("job desc is nil")
}
j := body.Job
// Existing fields: Script and WorkDir should be set
if j.Script == nil || *j.Script != "echo hi" {
t.Errorf("Script mismatch: %v", j.Script)
}
if j.CurrentWorkingDirectory == nil || *j.CurrentWorkingDirectory != "/home/user" {
t.Errorf("CurrentWorkingDirectory mismatch: %v", j.CurrentWorkingDirectory)
}
// All new scheduling fields should be nil/empty
if j.MemoryPerNode != nil {
t.Errorf("MemoryPerNode should be nil, got %v", j.MemoryPerNode)
}
if j.MemoryPerCpu != nil {
t.Errorf("MemoryPerCpu should be nil, got %v", j.MemoryPerCpu)
}
if j.Nodes != nil {
t.Errorf("Nodes should be nil, got %v", j.Nodes)
}
if j.Tasks != nil {
t.Errorf("Tasks should be nil, got %v", j.Tasks)
}
if j.CpusPerTask != nil {
t.Errorf("CpusPerTask should be nil, got %v", j.CpusPerTask)
}
if j.Constraints != nil {
t.Errorf("Constraints should be nil, got %v", j.Constraints)
}
if j.Reservation != nil {
t.Errorf("Reservation should be nil, got %v", j.Reservation)
}
if j.Account != nil {
t.Errorf("Account should be nil, got %v", j.Account)
}
if j.Nice != nil {
t.Errorf("Nice should be nil, got %v", j.Nice)
}
if len(j.MailType) != 0 {
t.Errorf("MailType should be empty, got %v", j.MailType)
}
if j.MailUser != nil {
t.Errorf("MailUser should be nil, got %v", j.MailUser)
}
if j.StandardOutput != nil {
t.Errorf("StandardOutput should be nil, got %v", j.StandardOutput)
}
if j.StandardError != nil {
t.Errorf("StandardError should be nil, got %v", j.StandardError)
}
if j.StandardInput != nil {
t.Errorf("StandardInput should be nil, got %v", j.StandardInput)
}
if len(j.RequiredNodes) != 0 {
t.Errorf("RequiredNodes should be empty, got %v", j.RequiredNodes)
}
if len(j.ExcludedNodes) != 0 {
t.Errorf("ExcludedNodes should be empty, got %v", j.ExcludedNodes)
}
if j.BeginTime != nil {
t.Errorf("BeginTime should be nil, got %v", j.BeginTime)
}
if j.Deadline != nil {
t.Errorf("Deadline should be nil, got %v", j.Deadline)
}
if j.Array != nil {
t.Errorf("Array should be nil, got %v", j.Array)
}
if j.Dependency != nil {
t.Errorf("Dependency should be nil, got %v", j.Dependency)
}
if j.Requeue != nil {
t.Errorf("Requeue should be nil, got %v", j.Requeue)
}
if j.KillOnNodeFail != nil {
t.Errorf("KillOnNodeFail should be nil, got %v", j.KillOnNodeFail)
}
resp := slurm.OpenapiJobSubmitResponse{
Result: &slurm.JobSubmitResponseMsg{JobID: &jobID},
}
json.NewEncoder(w).Encode(resp)
}))
defer cleanup()
svc := NewJobService(client, zap.NewNop())
resp, err := svc.SubmitJob(context.Background(), &model.SubmitJobRequest{
Script: "echo hi",
WorkDir: "/home/user",
})
if err != nil {
t.Fatalf("SubmitJob: %v", err)
}
if resp.JobID != 555 {
t.Errorf("expected JobID 555, got %d", resp.JobID)
}
}
func TestSubmitJob_MemoryBothSet(t *testing.T) {
jobID := int32(777)
memoryPerNode := int64(4096)
memoryPerCpu := int64(1024)
client, cleanup := mockJobServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
var body slurm.JobSubmitReq
if err := json.NewDecoder(r.Body).Decode(&body); err != nil {
t.Fatalf("decode body: %v", err)
}
if body.Job == nil {
t.Fatal("job desc is nil")
}
j := body.Job
// Both memory fields should be mapped independently
if j.MemoryPerNode == nil || j.MemoryPerNode.Set == nil || !*j.MemoryPerNode.Set || j.MemoryPerNode.Number == nil || *j.MemoryPerNode.Number != memoryPerNode {
t.Errorf("MemoryPerNode mismatch: %v", j.MemoryPerNode)
}
if j.MemoryPerCpu == nil || j.MemoryPerCpu.Set == nil || !*j.MemoryPerCpu.Set || j.MemoryPerCpu.Number == nil || *j.MemoryPerCpu.Number != memoryPerCpu {
t.Errorf("MemoryPerCpu mismatch: %v", j.MemoryPerCpu)
}
resp := slurm.OpenapiJobSubmitResponse{
Result: &slurm.JobSubmitResponseMsg{JobID: &jobID},
}
json.NewEncoder(w).Encode(resp)
}))
defer cleanup()
svc := NewJobService(client, zap.NewNop())
resp, err := svc.SubmitJob(context.Background(), &model.SubmitJobRequest{
Script: "echo mem",
MemoryPerNode: &memoryPerNode,
MemoryPerCpu: &memoryPerCpu,
})
if err != nil {
t.Fatalf("SubmitJob: %v", err)
}
if resp.JobID != 777 {
t.Errorf("expected JobID 777, got %d", resp.JobID)
}
}
func TestGetJob_FallbackToHistory_EmptyHistory(t *testing.T) {
client, cleanup := mockJobServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
switch r.URL.Path {
case "/slurm/v0.0.40/job/777":
w.WriteHeader(http.StatusNotFound)
json.NewEncoder(w).Encode(map[string]interface{}{
"errors": []map[string]interface{}{
{
"description": "Unable to query JobId=777",
"error_number": float64(2017),
"error": "Invalid job id specified",
"source": "_handle_job_get",
},
},
"jobs": []interface{}{},
})
case "/slurmdb/v0.0.40/job/777":
resp := slurm.OpenapiSlurmdbdJobsResp{Jobs: slurm.JobList{}}
json.NewEncoder(w).Encode(resp)
default:
w.WriteHeader(http.StatusNotFound)
}
}))
defer cleanup()
svc := NewJobService(client, zap.NewNop())
job, err := svc.GetJob(context.Background(), "777")
if err != nil {
t.Fatalf("GetJob: %v", err)
}
if job != nil {
t.Errorf("expected nil, got %+v", job)
}
}