feat(service): add debug logging for Slurm API calls with request/response body and latency
Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
This commit is contained in:
@@ -4,6 +4,7 @@ import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"gcy_hpc_server/internal/model"
|
||||
"gcy_hpc_server/internal/slurm"
|
||||
@@ -55,11 +56,30 @@ func NewClusterService(client *slurm.Client, logger *zap.Logger) *ClusterService
|
||||
}
|
||||
|
||||
func (s *ClusterService) GetNodes(ctx context.Context) ([]model.NodeResponse, error) {
|
||||
s.logger.Debug("slurm API request",
|
||||
zap.String("operation", "GetNodes"),
|
||||
)
|
||||
|
||||
start := time.Now()
|
||||
resp, _, err := s.client.Nodes.GetNodes(ctx, nil)
|
||||
took := time.Since(start)
|
||||
|
||||
if err != nil {
|
||||
s.logger.Debug("slurm API error response",
|
||||
zap.String("operation", "GetNodes"),
|
||||
zap.Duration("took", took),
|
||||
zap.Error(err),
|
||||
)
|
||||
s.logger.Error("failed to get nodes", zap.Error(err))
|
||||
return nil, fmt.Errorf("get nodes: %w", err)
|
||||
}
|
||||
|
||||
s.logger.Debug("slurm API response",
|
||||
zap.String("operation", "GetNodes"),
|
||||
zap.Duration("took", took),
|
||||
zap.Any("body", resp),
|
||||
)
|
||||
|
||||
if resp.Nodes == nil {
|
||||
return nil, nil
|
||||
}
|
||||
@@ -71,11 +91,33 @@ func (s *ClusterService) GetNodes(ctx context.Context) ([]model.NodeResponse, er
|
||||
}
|
||||
|
||||
func (s *ClusterService) GetNode(ctx context.Context, name string) (*model.NodeResponse, error) {
|
||||
s.logger.Debug("slurm API request",
|
||||
zap.String("operation", "GetNode"),
|
||||
zap.String("node_name", name),
|
||||
)
|
||||
|
||||
start := time.Now()
|
||||
resp, _, err := s.client.Nodes.GetNode(ctx, name, nil)
|
||||
took := time.Since(start)
|
||||
|
||||
if err != nil {
|
||||
s.logger.Debug("slurm API error response",
|
||||
zap.String("operation", "GetNode"),
|
||||
zap.String("node_name", name),
|
||||
zap.Duration("took", took),
|
||||
zap.Error(err),
|
||||
)
|
||||
s.logger.Error("failed to get node", zap.String("name", name), zap.Error(err))
|
||||
return nil, fmt.Errorf("get node %s: %w", name, err)
|
||||
}
|
||||
|
||||
s.logger.Debug("slurm API response",
|
||||
zap.String("operation", "GetNode"),
|
||||
zap.String("node_name", name),
|
||||
zap.Duration("took", took),
|
||||
zap.Any("body", resp),
|
||||
)
|
||||
|
||||
if resp.Nodes == nil || len(*resp.Nodes) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
@@ -85,11 +127,30 @@ func (s *ClusterService) GetNode(ctx context.Context, name string) (*model.NodeR
|
||||
}
|
||||
|
||||
func (s *ClusterService) GetPartitions(ctx context.Context) ([]model.PartitionResponse, error) {
|
||||
s.logger.Debug("slurm API request",
|
||||
zap.String("operation", "GetPartitions"),
|
||||
)
|
||||
|
||||
start := time.Now()
|
||||
resp, _, err := s.client.Partitions.GetPartitions(ctx, nil)
|
||||
took := time.Since(start)
|
||||
|
||||
if err != nil {
|
||||
s.logger.Debug("slurm API error response",
|
||||
zap.String("operation", "GetPartitions"),
|
||||
zap.Duration("took", took),
|
||||
zap.Error(err),
|
||||
)
|
||||
s.logger.Error("failed to get partitions", zap.Error(err))
|
||||
return nil, fmt.Errorf("get partitions: %w", err)
|
||||
}
|
||||
|
||||
s.logger.Debug("slurm API response",
|
||||
zap.String("operation", "GetPartitions"),
|
||||
zap.Duration("took", took),
|
||||
zap.Any("body", resp),
|
||||
)
|
||||
|
||||
if resp.Partitions == nil {
|
||||
return nil, nil
|
||||
}
|
||||
@@ -101,11 +162,33 @@ func (s *ClusterService) GetPartitions(ctx context.Context) ([]model.PartitionRe
|
||||
}
|
||||
|
||||
func (s *ClusterService) GetPartition(ctx context.Context, name string) (*model.PartitionResponse, error) {
|
||||
s.logger.Debug("slurm API request",
|
||||
zap.String("operation", "GetPartition"),
|
||||
zap.String("partition_name", name),
|
||||
)
|
||||
|
||||
start := time.Now()
|
||||
resp, _, err := s.client.Partitions.GetPartition(ctx, name, nil)
|
||||
took := time.Since(start)
|
||||
|
||||
if err != nil {
|
||||
s.logger.Debug("slurm API error response",
|
||||
zap.String("operation", "GetPartition"),
|
||||
zap.String("partition_name", name),
|
||||
zap.Duration("took", took),
|
||||
zap.Error(err),
|
||||
)
|
||||
s.logger.Error("failed to get partition", zap.String("name", name), zap.Error(err))
|
||||
return nil, fmt.Errorf("get partition %s: %w", name, err)
|
||||
}
|
||||
|
||||
s.logger.Debug("slurm API response",
|
||||
zap.String("operation", "GetPartition"),
|
||||
zap.String("partition_name", name),
|
||||
zap.Duration("took", took),
|
||||
zap.Any("body", resp),
|
||||
)
|
||||
|
||||
if resp.Partitions == nil || len(*resp.Partitions) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
@@ -115,11 +198,30 @@ func (s *ClusterService) GetPartition(ctx context.Context, name string) (*model.
|
||||
}
|
||||
|
||||
func (s *ClusterService) GetDiag(ctx context.Context) (*slurm.OpenapiDiagResp, error) {
|
||||
s.logger.Debug("slurm API request",
|
||||
zap.String("operation", "GetDiag"),
|
||||
)
|
||||
|
||||
start := time.Now()
|
||||
resp, _, err := s.client.Diag.GetDiag(ctx)
|
||||
took := time.Since(start)
|
||||
|
||||
if err != nil {
|
||||
s.logger.Debug("slurm API error response",
|
||||
zap.String("operation", "GetDiag"),
|
||||
zap.Duration("took", took),
|
||||
zap.Error(err),
|
||||
)
|
||||
s.logger.Error("failed to get diag", zap.Error(err))
|
||||
return nil, fmt.Errorf("get diag: %w", err)
|
||||
}
|
||||
|
||||
s.logger.Debug("slurm API response",
|
||||
zap.String("operation", "GetDiag"),
|
||||
zap.Duration("took", took),
|
||||
zap.Any("body", resp),
|
||||
)
|
||||
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user