From 824d9e816faf6ca8a3aeabd40d9f643acf6fbff5 Mon Sep 17 00:00:00 2001 From: dailz Date: Fri, 10 Apr 2026 11:12:51 +0800 Subject: [PATCH] feat(service): map additional Slurm SDK fields and fix ExitCode/Default bugs Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent) Co-authored-by: Sisyphus --- internal/service/cluster_service.go | 113 ++++++++++++++++++++++++---- internal/service/job_service.go | 46 +++++++++++ 2 files changed, 146 insertions(+), 13 deletions(-) diff --git a/internal/service/cluster_service.go b/internal/service/cluster_service.go index 3cd9426..0fbd402 100644 --- a/internal/service/cluster_service.go +++ b/internal/service/cluster_service.go @@ -46,6 +46,27 @@ func uint32NoValString(v *slurm.Uint32NoVal) string { return "" } +func derefUint64NoValInt64(v *slurm.Uint64NoVal) *int64 { + if v != nil && v.Number != nil { + return v.Number + } + return nil +} + +func derefCSVString(cs *slurm.CSVString) string { + if cs == nil || len(*cs) == 0 { + return "" + } + result := "" + for i, s := range *cs { + if i > 0 { + result += "," + } + result += s + } + return result +} + type ClusterService struct { client *slurm.Client logger *zap.Logger @@ -227,20 +248,42 @@ func (s *ClusterService) GetDiag(ctx context.Context) (*slurm.OpenapiDiagResp, e func mapNode(n slurm.Node) model.NodeResponse { return model.NodeResponse{ - Name: derefStr(n.Name), - State: n.State, - CPUs: derefInt32(n.Cpus), - RealMemory: derefInt64(n.RealMemory), - AllocMem: derefInt64(n.AllocMemory), - Arch: derefStr(n.Architecture), - OS: derefStr(n.OperatingSystem), + Name: derefStr(n.Name), + State: n.State, + CPUs: derefInt32(n.Cpus), + AllocCpus: n.AllocCpus, + Cores: n.Cores, + Sockets: n.Sockets, + Threads: n.Threads, + RealMemory: derefInt64(n.RealMemory), + AllocMemory: derefInt64(n.AllocMemory), + FreeMem: derefUint64NoValInt64(n.FreeMem), + CpuLoad: n.CpuLoad, + Arch: derefStr(n.Architecture), + OS: derefStr(n.OperatingSystem), + Gres: derefStr(n.Gres), + GresUsed: derefStr(n.GresUsed), + Reason: derefStr(n.Reason), + ReasonSetByUser: derefStr(n.ReasonSetByUser), + Address: derefStr(n.Address), + Hostname: derefStr(n.Hostname), + Weight: n.Weight, + Features: derefCSVString(n.Features), + ActiveFeatures: derefCSVString(n.ActiveFeatures), } } func mapPartition(pi slurm.PartitionInfo) model.PartitionResponse { var state []string + var isDefault bool if pi.Partition != nil { state = pi.Partition.State + for _, s := range state { + if s == "DEFAULT" { + isDefault = true + break + } + } } var nodes string if pi.Nodes != nil { @@ -258,12 +301,56 @@ func mapPartition(pi slurm.PartitionInfo) model.PartitionResponse { if pi.Maximums != nil { maxTime = uint32NoValString(pi.Maximums.Time) } + var maxNodes *int32 + if pi.Maximums != nil { + maxNodes = mapUint32NoValToInt32(pi.Maximums.Nodes) + } + var maxCPUsPerNode *int32 + if pi.Maximums != nil { + maxCPUsPerNode = pi.Maximums.CpusPerNode + } + var minNodes *int32 + if pi.Minimums != nil { + minNodes = pi.Minimums.Nodes + } + var defaultTime string + if pi.Defaults != nil { + defaultTime = uint32NoValString(pi.Defaults.Time) + } + var graceTime *int32 = pi.GraceTime + var priority *int32 + if pi.Priority != nil { + priority = pi.Priority.JobFactor + } + var qosAllowed, qosDeny, qosAssigned string + if pi.QOS != nil { + qosAllowed = derefStr(pi.QOS.Allowed) + qosDeny = derefStr(pi.QOS.Deny) + qosAssigned = derefStr(pi.QOS.Assigned) + } + var accountsAllowed, accountsDeny string + if pi.Accounts != nil { + accountsAllowed = derefStr(pi.Accounts.Allowed) + accountsDeny = derefStr(pi.Accounts.Deny) + } return model.PartitionResponse{ - Name: derefStr(pi.Name), - State: state, - Nodes: nodes, - TotalCPUs: totalCPUs, - TotalNodes: totalNodes, - MaxTime: maxTime, + Name: derefStr(pi.Name), + State: state, + Default: isDefault, + Nodes: nodes, + TotalNodes: totalNodes, + TotalCPUs: totalCPUs, + MaxTime: maxTime, + MaxNodes: maxNodes, + MaxCPUsPerNode: maxCPUsPerNode, + MinNodes: minNodes, + DefaultTime: defaultTime, + GraceTime: graceTime, + Priority: priority, + QOSAllowed: qosAllowed, + QOSDeny: qosDeny, + QOSAssigned: qosAssigned, + AccountsAllowed: accountsAllowed, + AccountsDeny: accountsDeny, } } diff --git a/internal/service/job_service.go b/internal/service/job_service.go index fe5067f..63e9a4e 100644 --- a/internal/service/job_service.go +++ b/internal/service/job_service.go @@ -282,6 +282,14 @@ func strToPtrOrNil(s string) *string { return &s } +func mapUint32NoValToInt32(v *slurm.Uint32NoVal) *int32 { + if v != nil && v.Number != nil { + n := int32(*v.Number) + return &n + } + return nil +} + // mapJobInfo maps SDK JobInfo to API JobResponse. func mapJobInfo(ji *slurm.JobInfo) model.JobResponse { resp := model.JobResponse{} @@ -295,6 +303,17 @@ func mapJobInfo(ji *slurm.JobInfo) model.JobResponse { if ji.Partition != nil { resp.Partition = *ji.Partition } + resp.Account = derefStr(ji.Account) + resp.User = derefStr(ji.UserName) + resp.Cluster = derefStr(ji.Cluster) + resp.QOS = derefStr(ji.Qos) + resp.Priority = mapUint32NoValToInt32(ji.Priority) + resp.TimeLimit = uint32NoValString(ji.TimeLimit) + resp.StateReason = derefStr(ji.StateReason) + resp.Cpus = mapUint32NoValToInt32(ji.Cpus) + resp.Tasks = mapUint32NoValToInt32(ji.Tasks) + resp.NodeCount = mapUint32NoValToInt32(ji.NodeCount) + resp.BatchHost = derefStr(ji.BatchHost) if ji.SubmitTime != nil && ji.SubmitTime.Number != nil { resp.SubmitTime = ji.SubmitTime.Number } @@ -311,6 +330,13 @@ func mapJobInfo(ji *slurm.JobInfo) model.JobResponse { if ji.Nodes != nil { resp.Nodes = *ji.Nodes } + resp.StdOut = derefStr(ji.StandardOutput) + resp.StdErr = derefStr(ji.StandardError) + resp.StdIn = derefStr(ji.StandardInput) + resp.WorkDir = derefStr(ji.CurrentWorkingDirectory) + resp.Command = derefStr(ji.Command) + resp.ArrayJobID = mapUint32NoValToInt32(ji.ArrayJobID) + resp.ArrayTaskID = mapUint32NoValToInt32(ji.ArrayTaskID) return resp } @@ -325,11 +351,20 @@ func mapSlurmdbJob(j *slurm.Job) model.JobResponse { } if j.State != nil { resp.State = j.State.Current + resp.StateReason = derefStr(j.State.Reason) } if j.Partition != nil { resp.Partition = *j.Partition } + resp.Account = derefStr(j.Account) + if j.User != nil { + resp.User = *j.User + } + resp.Cluster = derefStr(j.Cluster) + resp.QOS = derefStr(j.Qos) + resp.Priority = mapUint32NoValToInt32(j.Priority) if j.Time != nil { + resp.TimeLimit = uint32NoValString(j.Time.Limit) if j.Time.Submission != nil { resp.SubmitTime = j.Time.Submission } @@ -340,8 +375,19 @@ func mapSlurmdbJob(j *slurm.Job) model.JobResponse { resp.EndTime = j.Time.End } } + if j.ExitCode != nil && j.ExitCode.ReturnCode != nil && j.ExitCode.ReturnCode.Number != nil { + code := int32(*j.ExitCode.ReturnCode.Number) + resp.ExitCode = &code + } if j.Nodes != nil { resp.Nodes = *j.Nodes } + if j.Required != nil { + resp.Cpus = j.Required.CPUs + } + if j.AllocationNodes != nil { + resp.NodeCount = j.AllocationNodes + } + resp.WorkDir = derefStr(j.WorkingDirectory) return resp }