feat: 添加业务服务层和结构化日志
- JobService: 提交、查询、取消、历史记录,记录关键操作日志 - ClusterService: 节点、分区、诊断查询,记录错误日志 - NewSlurmClient: JWT 认证 HTTP 客户端工厂 - 所有构造函数接受 *zap.Logger 参数实现依赖注入 - 提交/取消成功记录 Info,API 错误记录 Error - 完整 TDD 测试,使用 zaptest/observer 验证日志输出 Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
This commit is contained in:
167
internal/service/cluster_service.go
Normal file
167
internal/service/cluster_service.go
Normal file
@@ -0,0 +1,167 @@
|
||||
package service
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strconv"
|
||||
|
||||
"gcy_hpc_server/internal/model"
|
||||
"gcy_hpc_server/internal/slurm"
|
||||
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
func derefStr(s *string) string {
|
||||
if s == nil {
|
||||
return ""
|
||||
}
|
||||
return *s
|
||||
}
|
||||
|
||||
func derefInt32(i *int32) int32 {
|
||||
if i == nil {
|
||||
return 0
|
||||
}
|
||||
return *i
|
||||
}
|
||||
|
||||
func derefInt64(i *int64) int64 {
|
||||
if i == nil {
|
||||
return 0
|
||||
}
|
||||
return *i
|
||||
}
|
||||
|
||||
func uint32NoValString(v *slurm.Uint32NoVal) string {
|
||||
if v == nil {
|
||||
return ""
|
||||
}
|
||||
if v.Infinite != nil && *v.Infinite {
|
||||
return "UNLIMITED"
|
||||
}
|
||||
if v.Number != nil {
|
||||
return strconv.FormatInt(*v.Number, 10)
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
type ClusterService struct {
|
||||
client *slurm.Client
|
||||
logger *zap.Logger
|
||||
}
|
||||
|
||||
func NewClusterService(client *slurm.Client, logger *zap.Logger) *ClusterService {
|
||||
return &ClusterService{client: client, logger: logger}
|
||||
}
|
||||
|
||||
func (s *ClusterService) GetNodes(ctx context.Context) ([]model.NodeResponse, error) {
|
||||
resp, _, err := s.client.Nodes.GetNodes(ctx, nil)
|
||||
if err != nil {
|
||||
s.logger.Error("failed to get nodes", zap.Error(err))
|
||||
return nil, fmt.Errorf("get nodes: %w", err)
|
||||
}
|
||||
if resp.Nodes == nil {
|
||||
return nil, nil
|
||||
}
|
||||
result := make([]model.NodeResponse, 0, len(*resp.Nodes))
|
||||
for _, n := range *resp.Nodes {
|
||||
result = append(result, mapNode(n))
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (s *ClusterService) GetNode(ctx context.Context, name string) (*model.NodeResponse, error) {
|
||||
resp, _, err := s.client.Nodes.GetNode(ctx, name, nil)
|
||||
if err != nil {
|
||||
s.logger.Error("failed to get node", zap.String("name", name), zap.Error(err))
|
||||
return nil, fmt.Errorf("get node %s: %w", name, err)
|
||||
}
|
||||
if resp.Nodes == nil || len(*resp.Nodes) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
n := (*resp.Nodes)[0]
|
||||
mapped := mapNode(n)
|
||||
return &mapped, nil
|
||||
}
|
||||
|
||||
func (s *ClusterService) GetPartitions(ctx context.Context) ([]model.PartitionResponse, error) {
|
||||
resp, _, err := s.client.Partitions.GetPartitions(ctx, nil)
|
||||
if err != nil {
|
||||
s.logger.Error("failed to get partitions", zap.Error(err))
|
||||
return nil, fmt.Errorf("get partitions: %w", err)
|
||||
}
|
||||
if resp.Partitions == nil {
|
||||
return nil, nil
|
||||
}
|
||||
result := make([]model.PartitionResponse, 0, len(*resp.Partitions))
|
||||
for _, pi := range *resp.Partitions {
|
||||
result = append(result, mapPartition(pi))
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (s *ClusterService) GetPartition(ctx context.Context, name string) (*model.PartitionResponse, error) {
|
||||
resp, _, err := s.client.Partitions.GetPartition(ctx, name, nil)
|
||||
if err != nil {
|
||||
s.logger.Error("failed to get partition", zap.String("name", name), zap.Error(err))
|
||||
return nil, fmt.Errorf("get partition %s: %w", name, err)
|
||||
}
|
||||
if resp.Partitions == nil || len(*resp.Partitions) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
p := (*resp.Partitions)[0]
|
||||
mapped := mapPartition(p)
|
||||
return &mapped, nil
|
||||
}
|
||||
|
||||
func (s *ClusterService) GetDiag(ctx context.Context) (*slurm.OpenapiDiagResp, error) {
|
||||
resp, _, err := s.client.Diag.GetDiag(ctx)
|
||||
if err != nil {
|
||||
s.logger.Error("failed to get diag", zap.Error(err))
|
||||
return nil, fmt.Errorf("get diag: %w", err)
|
||||
}
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
func mapNode(n slurm.Node) model.NodeResponse {
|
||||
return model.NodeResponse{
|
||||
Name: derefStr(n.Name),
|
||||
State: n.State,
|
||||
CPUs: derefInt32(n.Cpus),
|
||||
RealMemory: derefInt64(n.RealMemory),
|
||||
AllocMem: derefInt64(n.AllocMemory),
|
||||
Arch: derefStr(n.Architecture),
|
||||
OS: derefStr(n.OperatingSystem),
|
||||
}
|
||||
}
|
||||
|
||||
func mapPartition(pi slurm.PartitionInfo) model.PartitionResponse {
|
||||
var state []string
|
||||
if pi.Partition != nil {
|
||||
state = pi.Partition.State
|
||||
}
|
||||
var nodes string
|
||||
if pi.Nodes != nil {
|
||||
nodes = derefStr(pi.Nodes.Configured)
|
||||
}
|
||||
var totalCPUs int32
|
||||
if pi.CPUs != nil {
|
||||
totalCPUs = derefInt32(pi.CPUs.Total)
|
||||
}
|
||||
var totalNodes int32
|
||||
if pi.Nodes != nil {
|
||||
totalNodes = derefInt32(pi.Nodes.Total)
|
||||
}
|
||||
var maxTime string
|
||||
if pi.Maximums != nil {
|
||||
maxTime = uint32NoValString(pi.Maximums.Time)
|
||||
}
|
||||
return model.PartitionResponse{
|
||||
Name: derefStr(pi.Name),
|
||||
State: state,
|
||||
Nodes: nodes,
|
||||
TotalCPUs: totalCPUs,
|
||||
TotalNodes: totalNodes,
|
||||
MaxTime: maxTime,
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user