Files
hpc/internal/testutil/mockslurm/server.go
dailz b9b2f0d9b4 feat(testutil): add MockSlurm, MockMinIO, TestEnv and 37 integration tests
- mockminio: in-memory ObjectStorage with all 11 methods, thread-safe, SHA256 ETag, Range support
- mockslurm: httptest server with 11 Slurm REST API endpoints, job eviction from active to history queue
- testenv: one-line test environment factory (SQLite + MockSlurm + MockMinIO + all stores/services/handlers + httptest server)
- integration tests: 37 tests covering Jobs(5), Cluster(5), App(6), Upload(5), File(4), Folder(4), Task(4), E2E(1)
- no external dependencies, no existing files modified
2026-04-16 13:23:27 +08:00

545 lines
15 KiB
Go

// Package mockslurm provides a complete HTTP mock server for the Slurm REST API.
// It supports all 11 endpoints (P0: 4 job + P1: 7 cluster/history) and includes
// job eviction from active to history queue on terminal states.
package mockslurm
import (
"encoding/json"
"net/http"
"net/http/httptest"
"strconv"
"strings"
"sync"
"time"
"gcy_hpc_server/internal/slurm"
)
// MockJob represents a job tracked by the mock server.
type MockJob struct {
JobID int32
Name string
State string // single state string for internal tracking
Script string
Partition string
WorkDir string
SubmitTime time.Time
StartTime *time.Time
EndTime *time.Time
ExitCode *int32
}
// MockNode represents a node tracked by the mock server.
type MockNode struct {
Name string
}
// MockPartition represents a partition tracked by the mock server.
type MockPartition struct {
Name string
}
// MockSlurm is the mock Slurm API server controller.
type MockSlurm struct {
mu sync.RWMutex
activeJobs map[int32]*MockJob
historyJobs map[int32]*MockJob
nodes []MockNode
partitions []MockPartition
nextID int32
server *httptest.Server
}
// NewMockSlurmServer creates and starts a mock Slurm REST API server.
// Returns the httptest.Server and the MockSlurm controller.
func NewMockSlurmServer() (*httptest.Server, *MockSlurm) {
m := &MockSlurm{
activeJobs: make(map[int32]*MockJob),
historyJobs: make(map[int32]*MockJob),
nodes: []MockNode{
{Name: "node01"},
{Name: "node02"},
{Name: "node03"},
},
partitions: []MockPartition{
{Name: "normal"},
{Name: "gpu"},
},
nextID: 1,
}
mux := http.NewServeMux()
// P0: Exact paths FIRST (before prefix paths)
mux.HandleFunc("/slurm/v0.0.40/job/submit", m.handleJobSubmit)
mux.HandleFunc("/slurm/v0.0.40/jobs", m.handleGetJobs)
// P0: Prefix path for /job/{id} — GET and DELETE
mux.HandleFunc("/slurm/v0.0.40/job/", m.handleJobByID)
// P1: Cluster endpoints
mux.HandleFunc("/slurm/v0.0.40/nodes", m.handleGetNodes)
mux.HandleFunc("/slurm/v0.0.40/node/", m.handleGetNode)
mux.HandleFunc("/slurm/v0.0.40/partitions", m.handleGetPartitions)
mux.HandleFunc("/slurm/v0.0.40/partition/", m.handleGetPartition)
mux.HandleFunc("/slurm/v0.0.40/diag", m.handleDiag)
// P1: SlurmDB endpoints
mux.HandleFunc("/slurmdb/v0.0.40/jobs", m.handleSlurmdbJobs)
mux.HandleFunc("/slurmdb/v0.0.40/job/", m.handleSlurmdbJob)
srv := httptest.NewServer(mux)
m.server = srv
return srv, m
}
// Server returns the underlying httptest.Server.
func (m *MockSlurm) Server() *httptest.Server {
return m.server
}
// ---------------------------------------------------------------------------
// Controller methods
// ---------------------------------------------------------------------------
// SetJobState transitions a job to the given state.
// Terminal states (COMPLETED/FAILED/CANCELLED/TIMEOUT) evict the job from
// activeJobs into historyJobs. RUNNING sets StartTime and stays active.
// PENDING stays in activeJobs.
func (m *MockSlurm) SetJobState(id int32, state string) {
m.mu.Lock()
defer m.mu.Unlock()
mj, ok := m.activeJobs[id]
if !ok {
return
}
now := time.Now()
switch state {
case "RUNNING":
mj.State = state
mj.StartTime = &now
case "COMPLETED", "FAILED", "CANCELLED", "TIMEOUT":
mj.State = state
mj.EndTime = &now
exitCode := int32(0)
if state != "COMPLETED" {
exitCode = 1
}
mj.ExitCode = &exitCode
delete(m.activeJobs, id)
m.historyJobs[id] = mj
case "PENDING":
mj.State = state
}
}
// GetJobState returns the current state of the job with the given ID.
// Returns empty string if the job is not found.
func (m *MockSlurm) GetJobState(id int32) string {
m.mu.RLock()
defer m.mu.RUnlock()
if mj, ok := m.activeJobs[id]; ok {
return mj.State
}
if mj, ok := m.historyJobs[id]; ok {
return mj.State
}
return ""
}
// GetAllActiveJobs returns all jobs currently in the active queue.
func (m *MockSlurm) GetAllActiveJobs() []*MockJob {
m.mu.RLock()
defer m.mu.RUnlock()
jobs := make([]*MockJob, 0, len(m.activeJobs))
for _, mj := range m.activeJobs {
jobs = append(jobs, mj)
}
return jobs
}
// GetAllHistoryJobs returns all jobs in the history queue.
func (m *MockSlurm) GetAllHistoryJobs() []*MockJob {
m.mu.RLock()
defer m.mu.RUnlock()
jobs := make([]*MockJob, 0, len(m.historyJobs))
for _, mj := range m.historyJobs {
jobs = append(jobs, mj)
}
return jobs
}
// ---------------------------------------------------------------------------
// P0: Job Core Endpoints
// ---------------------------------------------------------------------------
// POST /slurm/v0.0.40/job/submit
func (m *MockSlurm) handleJobSubmit(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodPost {
http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
return
}
var req slurm.JobSubmitReq
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
m.mu.Lock()
jobID := m.nextID
m.nextID++
job := &MockJob{
JobID: jobID,
State: "PENDING", // MUST be non-empty for mapSlurmStateToTaskStatus
SubmitTime: time.Now(),
}
if req.Script != nil {
job.Script = *req.Script
}
if req.Job != nil {
if req.Job.Name != nil {
job.Name = *req.Job.Name
}
if req.Job.Partition != nil {
job.Partition = *req.Job.Partition
}
if req.Job.CurrentWorkingDirectory != nil {
job.WorkDir = *req.Job.CurrentWorkingDirectory
}
if req.Job.Script != nil {
job.Script = *req.Job.Script
}
}
m.activeJobs[jobID] = job
m.mu.Unlock()
resp := NewSubmitResponse(jobID)
writeJSON(w, http.StatusOK, resp)
}
// GET /slurm/v0.0.40/jobs
func (m *MockSlurm) handleGetJobs(w http.ResponseWriter, r *http.Request) {
m.mu.RLock()
jobs := make([]slurm.JobInfo, 0, len(m.activeJobs))
for _, mj := range m.activeJobs {
jobs = append(jobs, m.mockJobToJobInfo(mj))
}
m.mu.RUnlock()
resp := NewJobInfoResponse(jobs)
writeJSON(w, http.StatusOK, resp)
}
// GET/DELETE /slurm/v0.0.40/job/{id}
func (m *MockSlurm) handleJobByID(w http.ResponseWriter, r *http.Request) {
segments := strings.Split(strings.TrimRight(r.URL.Path, "/"), "/")
// /slurm/v0.0.40/job/{id} → segments[0]="", [1]="slurm", [2]="v0.0.40", [3]="job", [4]=id
if len(segments) < 5 {
m.writeError(w, http.StatusBadRequest, "missing job id")
return
}
last := segments[4]
// Safety net: if "submit" leaks through prefix match, forward to submit handler
if last == "submit" {
m.handleJobSubmit(w, r)
return
}
id, err := strconv.ParseInt(last, 10, 32)
if err != nil {
m.writeError(w, http.StatusBadRequest, "invalid job id")
return
}
switch r.Method {
case http.MethodGet:
m.handleGetJobByID(w, int32(id))
case http.MethodDelete:
m.handleDeleteJobByID(w, int32(id))
default:
http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
}
}
func (m *MockSlurm) handleGetJobByID(w http.ResponseWriter, jobID int32) {
m.mu.RLock()
mj, ok := m.activeJobs[jobID]
m.mu.RUnlock()
if !ok {
m.writeError(w, http.StatusNotFound, "job not found")
return
}
ji := m.mockJobToJobInfo(mj)
resp := NewJobInfoResponse([]slurm.JobInfo{ji})
writeJSON(w, http.StatusOK, resp)
}
func (m *MockSlurm) handleDeleteJobByID(w http.ResponseWriter, jobID int32) {
m.SetJobState(jobID, "CANCELLED")
resp := NewDeleteResponse()
writeJSON(w, http.StatusOK, resp)
}
// ---------------------------------------------------------------------------
// P1: Cluster/History Endpoints
// ---------------------------------------------------------------------------
// GET /slurm/v0.0.40/nodes
func (m *MockSlurm) handleGetNodes(w http.ResponseWriter, r *http.Request) {
nodes := make([]slurm.Node, len(m.nodes))
for i, n := range m.nodes {
nodes[i] = slurm.Node{Name: slurm.Ptr(n.Name)}
}
resp := NewNodeResponse(nodes)
writeJSON(w, http.StatusOK, resp)
}
// GET /slurm/v0.0.40/node/{name}
func (m *MockSlurm) handleGetNode(w http.ResponseWriter, r *http.Request) {
segments := strings.Split(strings.TrimRight(r.URL.Path, "/"), "/")
if len(segments) < 5 {
m.writeError(w, http.StatusBadRequest, "missing node name")
return
}
nodeName := segments[4]
var found *slurm.Node
for _, n := range m.nodes {
if n.Name == nodeName {
found = &slurm.Node{Name: slurm.Ptr(n.Name)}
break
}
}
if found == nil {
m.writeError(w, http.StatusNotFound, "node not found")
return
}
resp := NewNodeResponse([]slurm.Node{*found})
writeJSON(w, http.StatusOK, resp)
}
// GET /slurm/v0.0.40/partitions
func (m *MockSlurm) handleGetPartitions(w http.ResponseWriter, r *http.Request) {
parts := make([]slurm.PartitionInfo, len(m.partitions))
for i, p := range m.partitions {
parts[i] = slurm.PartitionInfo{Name: slurm.Ptr(p.Name)}
}
resp := NewPartitionResponse(parts)
writeJSON(w, http.StatusOK, resp)
}
// GET /slurm/v0.0.40/partition/{name}
func (m *MockSlurm) handleGetPartition(w http.ResponseWriter, r *http.Request) {
segments := strings.Split(strings.TrimRight(r.URL.Path, "/"), "/")
if len(segments) < 5 {
m.writeError(w, http.StatusBadRequest, "missing partition name")
return
}
partName := segments[4]
var found *slurm.PartitionInfo
for _, p := range m.partitions {
if p.Name == partName {
found = &slurm.PartitionInfo{Name: slurm.Ptr(p.Name)}
break
}
}
if found == nil {
m.writeError(w, http.StatusNotFound, "partition not found")
return
}
resp := NewPartitionResponse([]slurm.PartitionInfo{*found})
writeJSON(w, http.StatusOK, resp)
}
// GET /slurm/v0.0.40/diag
func (m *MockSlurm) handleDiag(w http.ResponseWriter, r *http.Request) {
resp := NewDiagResponse()
writeJSON(w, http.StatusOK, resp)
}
// GET /slurmdb/v0.0.40/jobs — supports filter params: job_name, start_time, end_time
func (m *MockSlurm) handleSlurmdbJobs(w http.ResponseWriter, r *http.Request) {
m.mu.RLock()
defer m.mu.RUnlock()
jobs := make([]slurm.Job, 0)
for _, mj := range m.historyJobs {
// Filter by job_name
if name := r.URL.Query().Get("job_name"); name != "" && mj.Name != name {
continue
}
// Filter by start_time (job start must be >= filter start)
if startStr := r.URL.Query().Get("start_time"); startStr != "" {
if st, err := strconv.ParseInt(startStr, 10, 64); err == nil {
if mj.StartTime == nil || mj.StartTime.Unix() < st {
continue
}
}
}
// Filter by end_time (job end must be <= filter end)
if endStr := r.URL.Query().Get("end_time"); endStr != "" {
if et, err := strconv.ParseInt(endStr, 10, 64); err == nil {
if mj.EndTime == nil || mj.EndTime.Unix() > et {
continue
}
}
}
jobs = append(jobs, m.mockJobToSlurmDBJob(mj))
}
resp := NewJobHistoryResponse(jobs)
writeJSON(w, http.StatusOK, resp)
}
// GET /slurmdb/v0.0.40/job/{id} — returns OpenapiSlurmdbdJobsResp (with jobs array wrapper)
func (m *MockSlurm) handleSlurmdbJob(w http.ResponseWriter, r *http.Request) {
segments := strings.Split(strings.TrimRight(r.URL.Path, "/"), "/")
if len(segments) < 5 {
m.writeError(w, http.StatusNotFound, "job not found")
return
}
id, err := strconv.ParseInt(segments[4], 10, 32)
if err != nil {
m.writeError(w, http.StatusNotFound, "job not found")
return
}
m.mu.RLock()
mj, ok := m.historyJobs[int32(id)]
m.mu.RUnlock()
if !ok {
m.writeError(w, http.StatusNotFound, "job not found")
return
}
dbJob := m.mockJobToSlurmDBJob(mj)
resp := NewJobHistoryResponse([]slurm.Job{dbJob})
writeJSON(w, http.StatusOK, resp)
}
// ---------------------------------------------------------------------------
// Conversion helpers
// ---------------------------------------------------------------------------
// mockJobToJobInfo converts a MockJob to an active-endpoint JobInfo.
// Uses buildActiveJobState for flat []string state format: ["RUNNING"].
func (m *MockSlurm) mockJobToJobInfo(mj *MockJob) slurm.JobInfo {
ji := slurm.JobInfo{
JobID: slurm.Ptr(mj.JobID),
JobState: buildActiveJobState(mj.State), // MUST be non-empty []string
Name: slurm.Ptr(mj.Name),
Partition: slurm.Ptr(mj.Partition),
CurrentWorkingDirectory: slurm.Ptr(mj.WorkDir),
SubmitTime: &slurm.Uint64NoVal{Number: slurm.Ptr(mj.SubmitTime.Unix())},
}
if mj.StartTime != nil {
ji.StartTime = &slurm.Uint64NoVal{Number: slurm.Ptr(mj.StartTime.Unix())}
}
if mj.EndTime != nil {
ji.EndTime = &slurm.Uint64NoVal{Number: slurm.Ptr(mj.EndTime.Unix())}
}
if mj.ExitCode != nil {
ji.ExitCode = &slurm.ProcessExitCodeVerbose{
ReturnCode: &slurm.Uint32NoVal{Number: slurm.Ptr(int64(*mj.ExitCode))},
}
}
return ji
}
// mockJobToSlurmDBJob converts a MockJob to a SlurmDB history Job.
// Uses buildHistoryJobState for nested state format: {current: ["COMPLETED"], reason: ""}.
func (m *MockSlurm) mockJobToSlurmDBJob(mj *MockJob) slurm.Job {
dbJob := slurm.Job{
JobID: slurm.Ptr(mj.JobID),
Name: slurm.Ptr(mj.Name),
Partition: slurm.Ptr(mj.Partition),
WorkingDirectory: slurm.Ptr(mj.WorkDir),
Script: slurm.Ptr(mj.Script),
State: buildHistoryJobState(mj.State),
Time: &slurm.JobTime{
Submission: slurm.Ptr(mj.SubmitTime.Unix()),
},
}
if mj.StartTime != nil {
dbJob.Time.Start = slurm.Ptr(mj.StartTime.Unix())
}
if mj.EndTime != nil {
dbJob.Time.End = slurm.Ptr(mj.EndTime.Unix())
}
if mj.ExitCode != nil {
dbJob.ExitCode = &slurm.ProcessExitCodeVerbose{
ReturnCode: &slurm.Uint32NoVal{Number: slurm.Ptr(int64(*mj.ExitCode))},
}
}
return dbJob
}
// ---------------------------------------------------------------------------
// Error helpers
// ---------------------------------------------------------------------------
// writeJSON writes a JSON response with the given status code.
func writeJSON(w http.ResponseWriter, code int, v interface{}) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(code)
json.NewEncoder(w).Encode(v)
}
// writeError writes an HTTP error with an OpenapiResp body containing
// meta and errors fields. This is critical for CheckResponse/IsNotFound
// to work correctly — the response body must be parseable as OpenapiResp.
func (m *MockSlurm) writeError(w http.ResponseWriter, statusCode int, message string) {
meta := slurm.OpenapiMeta{
Plugin: &slurm.MetaPlugin{
Type: slurm.Ptr("openapi/v0.0.40"),
Name: slurm.Ptr(""),
},
Slurm: &slurm.MetaSlurm{
Version: &slurm.MetaSlurmVersion{
Major: slurm.Ptr("24"),
Micro: slurm.Ptr("0"),
Minor: slurm.Ptr("5"),
},
Release: slurm.Ptr("24.05.0"),
},
}
resp := slurm.OpenapiResp{
Meta: &meta,
Errors: slurm.OpenapiErrors{
{
ErrorNumber: slurm.Ptr(int32(0)),
Error: slurm.Ptr(message),
},
},
Warnings: slurm.OpenapiWarnings{},
}
writeJSON(w, statusCode, resp)
}