Files
hpc/internal/service/application_service.go
dailz a65c8762af fix(service): add environment variables and fix work directory permissions for Slurm job submission
Slurm requires environment variables in job submission; without them it returns 'batch job cannot run without an environment'. Also chmod the entire directory path to 0777 to bypass umask, ensuring Slurm and compute node users can write.

Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-04-14 13:06:51 +08:00

204 lines
6.1 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package service
import (
"context"
"encoding/json"
"fmt"
"math/rand"
"os"
"path/filepath"
"regexp"
"sort"
"strconv"
"strings"
"time"
"gcy_hpc_server/internal/model"
"gcy_hpc_server/internal/store"
"go.uber.org/zap"
)
var paramNameRegex = regexp.MustCompile(`^[A-Za-z_][A-Za-z0-9_]*$`)
// ApplicationService handles parameter validation, script rendering, and job
// submission for parameterized HPC applications.
type ApplicationService struct {
store *store.ApplicationStore
jobSvc *JobService
workDirBase string
logger *zap.Logger
}
func NewApplicationService(store *store.ApplicationStore, jobSvc *JobService, workDirBase string, logger *zap.Logger) *ApplicationService {
return &ApplicationService{store: store, jobSvc: jobSvc, workDirBase: workDirBase, logger: logger}
}
// ValidateParams checks that all required parameters are present and values match their types.
// Parameters not in the schema are silently ignored.
func (s *ApplicationService) ValidateParams(params []model.ParameterSchema, values map[string]string) error {
var errs []string
for _, p := range params {
if !paramNameRegex.MatchString(p.Name) {
errs = append(errs, fmt.Sprintf("invalid parameter name %q: must match ^[A-Za-z_][A-Za-z0-9_]*$", p.Name))
continue
}
val, ok := values[p.Name]
if p.Required && !ok {
errs = append(errs, fmt.Sprintf("required parameter %q is missing", p.Name))
continue
}
if !ok {
continue
}
switch p.Type {
case model.ParamTypeInteger:
if _, err := strconv.Atoi(val); err != nil {
errs = append(errs, fmt.Sprintf("parameter %q must be an integer, got %q", p.Name, val))
}
case model.ParamTypeBoolean:
if val != "true" && val != "false" && val != "1" && val != "0" {
errs = append(errs, fmt.Sprintf("parameter %q must be a boolean (true/false/1/0), got %q", p.Name, val))
}
case model.ParamTypeEnum:
if len(p.Options) > 0 {
found := false
for _, opt := range p.Options {
if val == opt {
found = true
break
}
}
if !found {
errs = append(errs, fmt.Sprintf("parameter %q must be one of %v, got %q", p.Name, p.Options, val))
}
}
case model.ParamTypeFile, model.ParamTypeDirectory:
case model.ParamTypeString:
}
}
if len(errs) > 0 {
return fmt.Errorf("parameter validation failed: %s", strings.Join(errs, "; "))
}
return nil
}
// RenderScript replaces $PARAM tokens in the template with user-provided values.
// Only tokens defined in the schema are replaced. Replacement is done longest-name-first
// to avoid partial matches (e.g., $JOB_NAME before $JOB).
// All values are shell-escaped using single-quote wrapping.
func (s *ApplicationService) RenderScript(template string, params []model.ParameterSchema, values map[string]string) string {
sorted := make([]model.ParameterSchema, len(params))
copy(sorted, params)
sort.Slice(sorted, func(i, j int) bool {
return len(sorted[i].Name) > len(sorted[j].Name)
})
result := template
for _, p := range sorted {
val, ok := values[p.Name]
if !ok {
if p.Default != "" {
val = p.Default
} else {
continue
}
}
escaped := "'" + strings.ReplaceAll(val, "'", "'\\''") + "'"
result = strings.ReplaceAll(result, "$"+p.Name, escaped)
}
return result
}
// ListApplications delegates to the store.
func (s *ApplicationService) ListApplications(ctx context.Context, page, pageSize int) ([]model.Application, int, error) {
return s.store.List(ctx, page, pageSize)
}
// CreateApplication delegates to the store.
func (s *ApplicationService) CreateApplication(ctx context.Context, req *model.CreateApplicationRequest) (int64, error) {
return s.store.Create(ctx, req)
}
// GetApplication delegates to the store.
func (s *ApplicationService) GetApplication(ctx context.Context, id int64) (*model.Application, error) {
return s.store.GetByID(ctx, id)
}
// UpdateApplication delegates to the store.
func (s *ApplicationService) UpdateApplication(ctx context.Context, id int64, req *model.UpdateApplicationRequest) error {
return s.store.Update(ctx, id, req)
}
// DeleteApplication delegates to the store.
func (s *ApplicationService) DeleteApplication(ctx context.Context, id int64) error {
return s.store.Delete(ctx, id)
}
// SubmitFromApplication orchestrates the full submission flow:
// 1. Fetch application by ID
// 2. Parse parameters schema
// 3. Validate parameter values
// 4. Render script template
// 5. Submit job via JobService
func (s *ApplicationService) SubmitFromApplication(ctx context.Context, applicationID int64, values map[string]string) (*model.JobResponse, error) {
app, err := s.store.GetByID(ctx, applicationID)
if err != nil {
return nil, fmt.Errorf("get application: %w", err)
}
if app == nil {
return nil, fmt.Errorf("application %d not found", applicationID)
}
var params []model.ParameterSchema
if len(app.Parameters) > 0 {
if err := json.Unmarshal(app.Parameters, &params); err != nil {
return nil, fmt.Errorf("parse parameters: %w", err)
}
}
if err := s.ValidateParams(params, values); err != nil {
return nil, err
}
rendered := s.RenderScript(app.ScriptTemplate, params, values)
workDir := ""
if s.workDirBase != "" {
safeName := sanitizeDirName(app.Name)
subDir := time.Now().Format("20060102_150405") + "_" + randomSuffix(4)
workDir = filepath.Join(s.workDirBase, safeName, subDir)
if err := os.MkdirAll(workDir, 0777); err != nil {
return nil, fmt.Errorf("create work directory %s: %w", workDir, err)
}
// 绕过 umask确保整条路径都有写权限
for dir := workDir; dir != s.workDirBase; dir = filepath.Dir(dir) {
os.Chmod(dir, 0777)
}
os.Chmod(s.workDirBase, 0777)
}
req := &model.SubmitJobRequest{Script: rendered, WorkDir: workDir}
return s.jobSvc.SubmitJob(ctx, req)
}
func sanitizeDirName(name string) string {
replacer := strings.NewReplacer(" ", "_", "/", "_", "\\", "_", ":", "_", "*", "_", "?", "_", "\"", "_", "<", "_", ">", "_", "|", "_")
return replacer.Replace(name)
}
func randomSuffix(n int) string {
const charset = "abcdefghijklmnopqrstuvwxyz0123456789"
b := make([]byte, n)
for i := range b {
b[i] = charset[rand.Intn(len(charset))]
}
return string(b)
}