Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
468 lines
12 KiB
Go
468 lines
12 KiB
Go
package service
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"net/http"
|
|
"net/http/httptest"
|
|
"os"
|
|
"path/filepath"
|
|
"testing"
|
|
|
|
"gcy_hpc_server/internal/slurm"
|
|
|
|
"go.uber.org/zap"
|
|
"go.uber.org/zap/zapcore"
|
|
"go.uber.org/zap/zaptest/observer"
|
|
)
|
|
|
|
func mockServer(handler http.HandlerFunc) (*slurm.Client, func()) {
|
|
srv := httptest.NewServer(handler)
|
|
client, _ := slurm.NewClient(srv.URL, srv.Client())
|
|
return client, srv.Close
|
|
}
|
|
|
|
func TestGetNodes(t *testing.T) {
|
|
client, cleanup := mockServer(func(w http.ResponseWriter, r *http.Request) {
|
|
if r.URL.Path != "/slurm/v0.0.40/nodes" {
|
|
t.Errorf("unexpected path: %s", r.URL.Path)
|
|
}
|
|
resp := map[string]interface{}{
|
|
"nodes": []map[string]interface{}{
|
|
{
|
|
"name": "node1",
|
|
"state": []string{"IDLE"},
|
|
"cpus": 64,
|
|
"real_memory": 256000,
|
|
"alloc_memory": 0,
|
|
"architecture": "x86_64",
|
|
"operating_system": "Linux 5.15",
|
|
},
|
|
},
|
|
}
|
|
w.Header().Set("Content-Type", "application/json")
|
|
json.NewEncoder(w).Encode(resp)
|
|
})
|
|
defer cleanup()
|
|
|
|
svc := NewClusterService(client, zap.NewNop())
|
|
nodes, err := svc.GetNodes(context.Background())
|
|
if err != nil {
|
|
t.Fatalf("GetNodes returned error: %v", err)
|
|
}
|
|
if len(nodes) != 1 {
|
|
t.Fatalf("expected 1 node, got %d", len(nodes))
|
|
}
|
|
n := nodes[0]
|
|
if n.Name != "node1" {
|
|
t.Errorf("expected name node1, got %s", n.Name)
|
|
}
|
|
if len(n.State) != 1 || n.State[0] != "IDLE" {
|
|
t.Errorf("expected state [IDLE], got %v", n.State)
|
|
}
|
|
if n.CPUs != 64 {
|
|
t.Errorf("expected 64 CPUs, got %d", n.CPUs)
|
|
}
|
|
if n.RealMemory != 256000 {
|
|
t.Errorf("expected real_memory 256000, got %d", n.RealMemory)
|
|
}
|
|
if n.Arch != "x86_64" {
|
|
t.Errorf("expected arch x86_64, got %s", n.Arch)
|
|
}
|
|
if n.OS != "Linux 5.15" {
|
|
t.Errorf("expected OS 'Linux 5.15', got %s", n.OS)
|
|
}
|
|
}
|
|
|
|
func TestGetNodes_Empty(t *testing.T) {
|
|
client, cleanup := mockServer(func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
json.NewEncoder(w).Encode(map[string]interface{}{})
|
|
})
|
|
defer cleanup()
|
|
|
|
svc := NewClusterService(client, zap.NewNop())
|
|
nodes, err := svc.GetNodes(context.Background())
|
|
if err != nil {
|
|
t.Fatalf("GetNodes returned error: %v", err)
|
|
}
|
|
if nodes != nil {
|
|
t.Errorf("expected nil for empty response, got %v", nodes)
|
|
}
|
|
}
|
|
|
|
func TestGetNode(t *testing.T) {
|
|
client, cleanup := mockServer(func(w http.ResponseWriter, r *http.Request) {
|
|
if r.URL.Path != "/slurm/v0.0.40/node/node1" {
|
|
t.Errorf("unexpected path: %s", r.URL.Path)
|
|
}
|
|
resp := map[string]interface{}{
|
|
"nodes": []map[string]interface{}{
|
|
{"name": "node1", "state": []string{"ALLOCATED"}, "cpus": 32},
|
|
},
|
|
}
|
|
w.Header().Set("Content-Type", "application/json")
|
|
json.NewEncoder(w).Encode(resp)
|
|
})
|
|
defer cleanup()
|
|
|
|
svc := NewClusterService(client, zap.NewNop())
|
|
node, err := svc.GetNode(context.Background(), "node1")
|
|
if err != nil {
|
|
t.Fatalf("GetNode returned error: %v", err)
|
|
}
|
|
if node == nil {
|
|
t.Fatal("expected node, got nil")
|
|
}
|
|
if node.Name != "node1" {
|
|
t.Errorf("expected name node1, got %s", node.Name)
|
|
}
|
|
}
|
|
|
|
func TestGetNode_NotFound(t *testing.T) {
|
|
client, cleanup := mockServer(func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
json.NewEncoder(w).Encode(map[string]interface{}{})
|
|
})
|
|
defer cleanup()
|
|
|
|
svc := NewClusterService(client, zap.NewNop())
|
|
node, err := svc.GetNode(context.Background(), "missing")
|
|
if err != nil {
|
|
t.Fatalf("GetNode returned error: %v", err)
|
|
}
|
|
if node != nil {
|
|
t.Errorf("expected nil for missing node, got %+v", node)
|
|
}
|
|
}
|
|
|
|
func TestGetPartitions(t *testing.T) {
|
|
client, cleanup := mockServer(func(w http.ResponseWriter, r *http.Request) {
|
|
if r.URL.Path != "/slurm/v0.0.40/partitions" {
|
|
t.Errorf("unexpected path: %s", r.URL.Path)
|
|
}
|
|
resp := map[string]interface{}{
|
|
"partitions": []map[string]interface{}{
|
|
{
|
|
"name": "normal",
|
|
"partition": map[string]interface{}{
|
|
"state": []string{"UP"},
|
|
},
|
|
"nodes": map[string]interface{}{
|
|
"configured": "node[1-10]",
|
|
"total": 10,
|
|
},
|
|
"cpus": map[string]interface{}{
|
|
"total": 640,
|
|
},
|
|
"maximums": map[string]interface{}{
|
|
"time": map[string]interface{}{
|
|
"set": true,
|
|
"infinite": false,
|
|
"number": 86400,
|
|
},
|
|
},
|
|
},
|
|
},
|
|
}
|
|
w.Header().Set("Content-Type", "application/json")
|
|
json.NewEncoder(w).Encode(resp)
|
|
})
|
|
defer cleanup()
|
|
|
|
svc := NewClusterService(client, zap.NewNop())
|
|
partitions, err := svc.GetPartitions(context.Background())
|
|
if err != nil {
|
|
t.Fatalf("GetPartitions returned error: %v", err)
|
|
}
|
|
if len(partitions) != 1 {
|
|
t.Fatalf("expected 1 partition, got %d", len(partitions))
|
|
}
|
|
p := partitions[0]
|
|
if p.Name != "normal" {
|
|
t.Errorf("expected name normal, got %s", p.Name)
|
|
}
|
|
if len(p.State) != 1 || p.State[0] != "UP" {
|
|
t.Errorf("expected state [UP], got %v", p.State)
|
|
}
|
|
if p.Nodes != "node[1-10]" {
|
|
t.Errorf("expected nodes 'node[1-10]', got %s", p.Nodes)
|
|
}
|
|
if p.TotalCPUs != 640 {
|
|
t.Errorf("expected 640 total CPUs, got %d", p.TotalCPUs)
|
|
}
|
|
if p.TotalNodes != 10 {
|
|
t.Errorf("expected 10 total nodes, got %d", p.TotalNodes)
|
|
}
|
|
if p.MaxTime != "86400" {
|
|
t.Errorf("expected max_time '86400', got %s", p.MaxTime)
|
|
}
|
|
}
|
|
|
|
func TestGetPartitions_Empty(t *testing.T) {
|
|
client, cleanup := mockServer(func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
json.NewEncoder(w).Encode(map[string]interface{}{})
|
|
})
|
|
defer cleanup()
|
|
|
|
svc := NewClusterService(client, zap.NewNop())
|
|
partitions, err := svc.GetPartitions(context.Background())
|
|
if err != nil {
|
|
t.Fatalf("GetPartitions returned error: %v", err)
|
|
}
|
|
if partitions != nil {
|
|
t.Errorf("expected nil for empty response, got %v", partitions)
|
|
}
|
|
}
|
|
|
|
func TestGetPartition(t *testing.T) {
|
|
client, cleanup := mockServer(func(w http.ResponseWriter, r *http.Request) {
|
|
if r.URL.Path != "/slurm/v0.0.40/partition/gpu" {
|
|
t.Errorf("unexpected path: %s", r.URL.Path)
|
|
}
|
|
resp := map[string]interface{}{
|
|
"partitions": []map[string]interface{}{
|
|
{
|
|
"name": "gpu",
|
|
"partition": map[string]interface{}{
|
|
"state": []string{"UP"},
|
|
},
|
|
"nodes": map[string]interface{}{
|
|
"configured": "gpu[1-4]",
|
|
"total": 4,
|
|
},
|
|
"maximums": map[string]interface{}{
|
|
"time": map[string]interface{}{
|
|
"set": true,
|
|
"infinite": true,
|
|
},
|
|
},
|
|
},
|
|
},
|
|
}
|
|
w.Header().Set("Content-Type", "application/json")
|
|
json.NewEncoder(w).Encode(resp)
|
|
})
|
|
defer cleanup()
|
|
|
|
svc := NewClusterService(client, zap.NewNop())
|
|
part, err := svc.GetPartition(context.Background(), "gpu")
|
|
if err != nil {
|
|
t.Fatalf("GetPartition returned error: %v", err)
|
|
}
|
|
if part == nil {
|
|
t.Fatal("expected partition, got nil")
|
|
}
|
|
if part.Name != "gpu" {
|
|
t.Errorf("expected name gpu, got %s", part.Name)
|
|
}
|
|
if part.MaxTime != "UNLIMITED" {
|
|
t.Errorf("expected max_time UNLIMITED, got %s", part.MaxTime)
|
|
}
|
|
}
|
|
|
|
func TestGetPartition_NotFound(t *testing.T) {
|
|
client, cleanup := mockServer(func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
json.NewEncoder(w).Encode(map[string]interface{}{})
|
|
})
|
|
defer cleanup()
|
|
|
|
svc := NewClusterService(client, zap.NewNop())
|
|
part, err := svc.GetPartition(context.Background(), "missing")
|
|
if err != nil {
|
|
t.Fatalf("GetPartition returned error: %v", err)
|
|
}
|
|
if part != nil {
|
|
t.Errorf("expected nil for missing partition, got %+v", part)
|
|
}
|
|
}
|
|
|
|
func TestGetDiag(t *testing.T) {
|
|
client, cleanup := mockServer(func(w http.ResponseWriter, r *http.Request) {
|
|
if r.URL.Path != "/slurm/v0.0.40/diag" {
|
|
t.Errorf("unexpected path: %s", r.URL.Path)
|
|
}
|
|
resp := map[string]interface{}{
|
|
"statistics": map[string]interface{}{
|
|
"server_thread_count": 10,
|
|
"agent_queue_size": 5,
|
|
"jobs_submitted": 100,
|
|
"jobs_running": 20,
|
|
"schedule_queue_length": 3,
|
|
},
|
|
}
|
|
w.Header().Set("Content-Type", "application/json")
|
|
json.NewEncoder(w).Encode(resp)
|
|
})
|
|
defer cleanup()
|
|
|
|
svc := NewClusterService(client, zap.NewNop())
|
|
diag, err := svc.GetDiag(context.Background())
|
|
if err != nil {
|
|
t.Fatalf("GetDiag returned error: %v", err)
|
|
}
|
|
if diag == nil {
|
|
t.Fatal("expected diag response, got nil")
|
|
}
|
|
if diag.Statistics == nil {
|
|
t.Fatal("expected statistics, got nil")
|
|
}
|
|
if diag.Statistics.ServerThreadCount == nil || *diag.Statistics.ServerThreadCount != 10 {
|
|
t.Errorf("expected server_thread_count 10, got %v", diag.Statistics.ServerThreadCount)
|
|
}
|
|
}
|
|
|
|
func TestNewSlurmClient(t *testing.T) {
|
|
dir := t.TempDir()
|
|
keyPath := filepath.Join(dir, "jwt.key")
|
|
os.WriteFile(keyPath, make([]byte, 32), 0644)
|
|
|
|
client, err := NewSlurmClient("http://localhost:6820", "root", keyPath)
|
|
if err != nil {
|
|
t.Fatalf("NewSlurmClient returned error: %v", err)
|
|
}
|
|
if client == nil {
|
|
t.Fatal("expected client, got nil")
|
|
}
|
|
}
|
|
|
|
func newClusterServiceWithObserver(srv *httptest.Server) (*ClusterService, *observer.ObservedLogs) {
|
|
core, recorded := observer.New(zapcore.DebugLevel)
|
|
l := zap.New(core)
|
|
client, _ := slurm.NewClient(srv.URL, srv.Client())
|
|
return NewClusterService(client, l), recorded
|
|
}
|
|
|
|
func errorServer() *httptest.Server {
|
|
return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
w.WriteHeader(http.StatusInternalServerError)
|
|
w.Write([]byte(`{"errors": [{"error": "internal server error"}]}`))
|
|
}))
|
|
}
|
|
|
|
func TestClusterService_GetNodes_ErrorLogging(t *testing.T) {
|
|
srv := errorServer()
|
|
defer srv.Close()
|
|
|
|
svc, logs := newClusterServiceWithObserver(srv)
|
|
_, err := svc.GetNodes(context.Background())
|
|
if err == nil {
|
|
t.Fatal("expected error, got nil")
|
|
}
|
|
|
|
if logs.Len() != 3 {
|
|
t.Fatalf("expected 3 log entries, got %d", logs.Len())
|
|
}
|
|
entry := logs.All()[2]
|
|
if entry.Level != zapcore.ErrorLevel {
|
|
t.Errorf("expected ErrorLevel, got %v", entry.Level)
|
|
}
|
|
if len(entry.Context) == 0 {
|
|
t.Error("expected structured fields in log entry")
|
|
}
|
|
}
|
|
|
|
func TestClusterService_GetNode_ErrorLogging(t *testing.T) {
|
|
srv := errorServer()
|
|
defer srv.Close()
|
|
|
|
svc, logs := newClusterServiceWithObserver(srv)
|
|
_, err := svc.GetNode(context.Background(), "test-node")
|
|
if err == nil {
|
|
t.Fatal("expected error, got nil")
|
|
}
|
|
|
|
if logs.Len() != 3 {
|
|
t.Fatalf("expected 3 log entries, got %d", logs.Len())
|
|
}
|
|
entry := logs.All()[2]
|
|
if entry.Level != zapcore.ErrorLevel {
|
|
t.Errorf("expected ErrorLevel, got %v", entry.Level)
|
|
}
|
|
|
|
hasName := false
|
|
for _, f := range entry.Context {
|
|
if f.Key == "name" && f.String == "test-node" {
|
|
hasName = true
|
|
}
|
|
}
|
|
if !hasName {
|
|
t.Error("expected 'name' field with value 'test-node' in log entry")
|
|
}
|
|
}
|
|
|
|
func TestClusterService_GetPartitions_ErrorLogging(t *testing.T) {
|
|
srv := errorServer()
|
|
defer srv.Close()
|
|
|
|
svc, logs := newClusterServiceWithObserver(srv)
|
|
_, err := svc.GetPartitions(context.Background())
|
|
if err == nil {
|
|
t.Fatal("expected error, got nil")
|
|
}
|
|
|
|
if logs.Len() != 3 {
|
|
t.Fatalf("expected 3 log entries, got %d", logs.Len())
|
|
}
|
|
entry := logs.All()[2]
|
|
if entry.Level != zapcore.ErrorLevel {
|
|
t.Errorf("expected ErrorLevel, got %v", entry.Level)
|
|
}
|
|
if len(entry.Context) == 0 {
|
|
t.Error("expected structured fields in log entry")
|
|
}
|
|
}
|
|
|
|
func TestClusterService_GetPartition_ErrorLogging(t *testing.T) {
|
|
srv := errorServer()
|
|
defer srv.Close()
|
|
|
|
svc, logs := newClusterServiceWithObserver(srv)
|
|
_, err := svc.GetPartition(context.Background(), "test-partition")
|
|
if err == nil {
|
|
t.Fatal("expected error, got nil")
|
|
}
|
|
|
|
if logs.Len() != 3 {
|
|
t.Fatalf("expected 3 log entries, got %d", logs.Len())
|
|
}
|
|
entry := logs.All()[2]
|
|
if entry.Level != zapcore.ErrorLevel {
|
|
t.Errorf("expected ErrorLevel, got %v", entry.Level)
|
|
}
|
|
|
|
hasName := false
|
|
for _, f := range entry.Context {
|
|
if f.Key == "name" && f.String == "test-partition" {
|
|
hasName = true
|
|
}
|
|
}
|
|
if !hasName {
|
|
t.Error("expected 'name' field with value 'test-partition' in log entry")
|
|
}
|
|
}
|
|
|
|
func TestClusterService_GetDiag_ErrorLogging(t *testing.T) {
|
|
srv := errorServer()
|
|
defer srv.Close()
|
|
|
|
svc, logs := newClusterServiceWithObserver(srv)
|
|
_, err := svc.GetDiag(context.Background())
|
|
if err == nil {
|
|
t.Fatal("expected error, got nil")
|
|
}
|
|
|
|
if logs.Len() != 3 {
|
|
t.Fatalf("expected 3 log entries, got %d", logs.Len())
|
|
}
|
|
entry := logs.All()[2]
|
|
if entry.Level != zapcore.ErrorLevel {
|
|
t.Errorf("expected ErrorLevel, got %v", entry.Level)
|
|
}
|
|
if len(entry.Context) == 0 {
|
|
t.Error("expected structured fields in log entry")
|
|
}
|
|
}
|