Files
hpc/internal/service/cluster_service.go
dailz 4903f7d07f feat: 添加业务服务层和结构化日志
- JobService: 提交、查询、取消、历史记录,记录关键操作日志

- ClusterService: 节点、分区、诊断查询,记录错误日志

- NewSlurmClient: JWT 认证 HTTP 客户端工厂

- 所有构造函数接受 *zap.Logger 参数实现依赖注入

- 提交/取消成功记录 Info,API 错误记录 Error

- 完整 TDD 测试,使用 zaptest/observer 验证日志输出

Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-04-10 08:39:46 +08:00

168 lines
4.0 KiB
Go

package service
import (
"context"
"fmt"
"strconv"
"gcy_hpc_server/internal/model"
"gcy_hpc_server/internal/slurm"
"go.uber.org/zap"
)
func derefStr(s *string) string {
if s == nil {
return ""
}
return *s
}
func derefInt32(i *int32) int32 {
if i == nil {
return 0
}
return *i
}
func derefInt64(i *int64) int64 {
if i == nil {
return 0
}
return *i
}
func uint32NoValString(v *slurm.Uint32NoVal) string {
if v == nil {
return ""
}
if v.Infinite != nil && *v.Infinite {
return "UNLIMITED"
}
if v.Number != nil {
return strconv.FormatInt(*v.Number, 10)
}
return ""
}
type ClusterService struct {
client *slurm.Client
logger *zap.Logger
}
func NewClusterService(client *slurm.Client, logger *zap.Logger) *ClusterService {
return &ClusterService{client: client, logger: logger}
}
func (s *ClusterService) GetNodes(ctx context.Context) ([]model.NodeResponse, error) {
resp, _, err := s.client.Nodes.GetNodes(ctx, nil)
if err != nil {
s.logger.Error("failed to get nodes", zap.Error(err))
return nil, fmt.Errorf("get nodes: %w", err)
}
if resp.Nodes == nil {
return nil, nil
}
result := make([]model.NodeResponse, 0, len(*resp.Nodes))
for _, n := range *resp.Nodes {
result = append(result, mapNode(n))
}
return result, nil
}
func (s *ClusterService) GetNode(ctx context.Context, name string) (*model.NodeResponse, error) {
resp, _, err := s.client.Nodes.GetNode(ctx, name, nil)
if err != nil {
s.logger.Error("failed to get node", zap.String("name", name), zap.Error(err))
return nil, fmt.Errorf("get node %s: %w", name, err)
}
if resp.Nodes == nil || len(*resp.Nodes) == 0 {
return nil, nil
}
n := (*resp.Nodes)[0]
mapped := mapNode(n)
return &mapped, nil
}
func (s *ClusterService) GetPartitions(ctx context.Context) ([]model.PartitionResponse, error) {
resp, _, err := s.client.Partitions.GetPartitions(ctx, nil)
if err != nil {
s.logger.Error("failed to get partitions", zap.Error(err))
return nil, fmt.Errorf("get partitions: %w", err)
}
if resp.Partitions == nil {
return nil, nil
}
result := make([]model.PartitionResponse, 0, len(*resp.Partitions))
for _, pi := range *resp.Partitions {
result = append(result, mapPartition(pi))
}
return result, nil
}
func (s *ClusterService) GetPartition(ctx context.Context, name string) (*model.PartitionResponse, error) {
resp, _, err := s.client.Partitions.GetPartition(ctx, name, nil)
if err != nil {
s.logger.Error("failed to get partition", zap.String("name", name), zap.Error(err))
return nil, fmt.Errorf("get partition %s: %w", name, err)
}
if resp.Partitions == nil || len(*resp.Partitions) == 0 {
return nil, nil
}
p := (*resp.Partitions)[0]
mapped := mapPartition(p)
return &mapped, nil
}
func (s *ClusterService) GetDiag(ctx context.Context) (*slurm.OpenapiDiagResp, error) {
resp, _, err := s.client.Diag.GetDiag(ctx)
if err != nil {
s.logger.Error("failed to get diag", zap.Error(err))
return nil, fmt.Errorf("get diag: %w", err)
}
return resp, nil
}
func mapNode(n slurm.Node) model.NodeResponse {
return model.NodeResponse{
Name: derefStr(n.Name),
State: n.State,
CPUs: derefInt32(n.Cpus),
RealMemory: derefInt64(n.RealMemory),
AllocMem: derefInt64(n.AllocMemory),
Arch: derefStr(n.Architecture),
OS: derefStr(n.OperatingSystem),
}
}
func mapPartition(pi slurm.PartitionInfo) model.PartitionResponse {
var state []string
if pi.Partition != nil {
state = pi.Partition.State
}
var nodes string
if pi.Nodes != nil {
nodes = derefStr(pi.Nodes.Configured)
}
var totalCPUs int32
if pi.CPUs != nil {
totalCPUs = derefInt32(pi.CPUs.Total)
}
var totalNodes int32
if pi.Nodes != nil {
totalNodes = derefInt32(pi.Nodes.Total)
}
var maxTime string
if pi.Maximums != nil {
maxTime = uint32NoValString(pi.Maximums.Time)
}
return model.PartitionResponse{
Name: derefStr(pi.Name),
State: state,
Nodes: nodes,
TotalCPUs: totalCPUs,
TotalNodes: totalNodes,
MaxTime: maxTime,
}
}