Slurm requires environment variables in job submission; without them it returns 'batch job cannot run without an environment'. Also chmod the entire directory path to 0777 to bypass umask, ensuring Slurm and compute node users can write. Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
204 lines
6.1 KiB
Go
204 lines
6.1 KiB
Go
package service
|
||
|
||
import (
|
||
"context"
|
||
"encoding/json"
|
||
"fmt"
|
||
"math/rand"
|
||
"os"
|
||
"path/filepath"
|
||
"regexp"
|
||
"sort"
|
||
"strconv"
|
||
"strings"
|
||
"time"
|
||
|
||
"gcy_hpc_server/internal/model"
|
||
"gcy_hpc_server/internal/store"
|
||
|
||
"go.uber.org/zap"
|
||
)
|
||
|
||
var paramNameRegex = regexp.MustCompile(`^[A-Za-z_][A-Za-z0-9_]*$`)
|
||
|
||
// ApplicationService handles parameter validation, script rendering, and job
|
||
// submission for parameterized HPC applications.
|
||
type ApplicationService struct {
|
||
store *store.ApplicationStore
|
||
jobSvc *JobService
|
||
workDirBase string
|
||
logger *zap.Logger
|
||
}
|
||
|
||
func NewApplicationService(store *store.ApplicationStore, jobSvc *JobService, workDirBase string, logger *zap.Logger) *ApplicationService {
|
||
return &ApplicationService{store: store, jobSvc: jobSvc, workDirBase: workDirBase, logger: logger}
|
||
}
|
||
|
||
// ValidateParams checks that all required parameters are present and values match their types.
|
||
// Parameters not in the schema are silently ignored.
|
||
func (s *ApplicationService) ValidateParams(params []model.ParameterSchema, values map[string]string) error {
|
||
var errs []string
|
||
|
||
for _, p := range params {
|
||
if !paramNameRegex.MatchString(p.Name) {
|
||
errs = append(errs, fmt.Sprintf("invalid parameter name %q: must match ^[A-Za-z_][A-Za-z0-9_]*$", p.Name))
|
||
continue
|
||
}
|
||
|
||
val, ok := values[p.Name]
|
||
|
||
if p.Required && !ok {
|
||
errs = append(errs, fmt.Sprintf("required parameter %q is missing", p.Name))
|
||
continue
|
||
}
|
||
|
||
if !ok {
|
||
continue
|
||
}
|
||
|
||
switch p.Type {
|
||
case model.ParamTypeInteger:
|
||
if _, err := strconv.Atoi(val); err != nil {
|
||
errs = append(errs, fmt.Sprintf("parameter %q must be an integer, got %q", p.Name, val))
|
||
}
|
||
case model.ParamTypeBoolean:
|
||
if val != "true" && val != "false" && val != "1" && val != "0" {
|
||
errs = append(errs, fmt.Sprintf("parameter %q must be a boolean (true/false/1/0), got %q", p.Name, val))
|
||
}
|
||
case model.ParamTypeEnum:
|
||
if len(p.Options) > 0 {
|
||
found := false
|
||
for _, opt := range p.Options {
|
||
if val == opt {
|
||
found = true
|
||
break
|
||
}
|
||
}
|
||
if !found {
|
||
errs = append(errs, fmt.Sprintf("parameter %q must be one of %v, got %q", p.Name, p.Options, val))
|
||
}
|
||
}
|
||
case model.ParamTypeFile, model.ParamTypeDirectory:
|
||
case model.ParamTypeString:
|
||
}
|
||
}
|
||
|
||
if len(errs) > 0 {
|
||
return fmt.Errorf("parameter validation failed: %s", strings.Join(errs, "; "))
|
||
}
|
||
return nil
|
||
}
|
||
|
||
// RenderScript replaces $PARAM tokens in the template with user-provided values.
|
||
// Only tokens defined in the schema are replaced. Replacement is done longest-name-first
|
||
// to avoid partial matches (e.g., $JOB_NAME before $JOB).
|
||
// All values are shell-escaped using single-quote wrapping.
|
||
func (s *ApplicationService) RenderScript(template string, params []model.ParameterSchema, values map[string]string) string {
|
||
sorted := make([]model.ParameterSchema, len(params))
|
||
copy(sorted, params)
|
||
sort.Slice(sorted, func(i, j int) bool {
|
||
return len(sorted[i].Name) > len(sorted[j].Name)
|
||
})
|
||
|
||
result := template
|
||
for _, p := range sorted {
|
||
val, ok := values[p.Name]
|
||
if !ok {
|
||
if p.Default != "" {
|
||
val = p.Default
|
||
} else {
|
||
continue
|
||
}
|
||
}
|
||
escaped := "'" + strings.ReplaceAll(val, "'", "'\\''") + "'"
|
||
result = strings.ReplaceAll(result, "$"+p.Name, escaped)
|
||
}
|
||
return result
|
||
}
|
||
|
||
// ListApplications delegates to the store.
|
||
func (s *ApplicationService) ListApplications(ctx context.Context, page, pageSize int) ([]model.Application, int, error) {
|
||
return s.store.List(ctx, page, pageSize)
|
||
}
|
||
|
||
// CreateApplication delegates to the store.
|
||
func (s *ApplicationService) CreateApplication(ctx context.Context, req *model.CreateApplicationRequest) (int64, error) {
|
||
return s.store.Create(ctx, req)
|
||
}
|
||
|
||
// GetApplication delegates to the store.
|
||
func (s *ApplicationService) GetApplication(ctx context.Context, id int64) (*model.Application, error) {
|
||
return s.store.GetByID(ctx, id)
|
||
}
|
||
|
||
// UpdateApplication delegates to the store.
|
||
func (s *ApplicationService) UpdateApplication(ctx context.Context, id int64, req *model.UpdateApplicationRequest) error {
|
||
return s.store.Update(ctx, id, req)
|
||
}
|
||
|
||
// DeleteApplication delegates to the store.
|
||
func (s *ApplicationService) DeleteApplication(ctx context.Context, id int64) error {
|
||
return s.store.Delete(ctx, id)
|
||
}
|
||
|
||
// SubmitFromApplication orchestrates the full submission flow:
|
||
// 1. Fetch application by ID
|
||
// 2. Parse parameters schema
|
||
// 3. Validate parameter values
|
||
// 4. Render script template
|
||
// 5. Submit job via JobService
|
||
func (s *ApplicationService) SubmitFromApplication(ctx context.Context, applicationID int64, values map[string]string) (*model.JobResponse, error) {
|
||
app, err := s.store.GetByID(ctx, applicationID)
|
||
if err != nil {
|
||
return nil, fmt.Errorf("get application: %w", err)
|
||
}
|
||
if app == nil {
|
||
return nil, fmt.Errorf("application %d not found", applicationID)
|
||
}
|
||
|
||
var params []model.ParameterSchema
|
||
if len(app.Parameters) > 0 {
|
||
if err := json.Unmarshal(app.Parameters, ¶ms); err != nil {
|
||
return nil, fmt.Errorf("parse parameters: %w", err)
|
||
}
|
||
}
|
||
|
||
if err := s.ValidateParams(params, values); err != nil {
|
||
return nil, err
|
||
}
|
||
|
||
rendered := s.RenderScript(app.ScriptTemplate, params, values)
|
||
|
||
workDir := ""
|
||
if s.workDirBase != "" {
|
||
safeName := sanitizeDirName(app.Name)
|
||
subDir := time.Now().Format("20060102_150405") + "_" + randomSuffix(4)
|
||
workDir = filepath.Join(s.workDirBase, safeName, subDir)
|
||
if err := os.MkdirAll(workDir, 0777); err != nil {
|
||
return nil, fmt.Errorf("create work directory %s: %w", workDir, err)
|
||
}
|
||
// 绕过 umask,确保整条路径都有写权限
|
||
for dir := workDir; dir != s.workDirBase; dir = filepath.Dir(dir) {
|
||
os.Chmod(dir, 0777)
|
||
}
|
||
os.Chmod(s.workDirBase, 0777)
|
||
}
|
||
|
||
req := &model.SubmitJobRequest{Script: rendered, WorkDir: workDir}
|
||
return s.jobSvc.SubmitJob(ctx, req)
|
||
}
|
||
|
||
func sanitizeDirName(name string) string {
|
||
replacer := strings.NewReplacer(" ", "_", "/", "_", "\\", "_", ":", "_", "*", "_", "?", "_", "\"", "_", "<", "_", ">", "_", "|", "_")
|
||
return replacer.Replace(name)
|
||
}
|
||
|
||
func randomSuffix(n int) string {
|
||
const charset = "abcdefghijklmnopqrstuvwxyz0123456789"
|
||
b := make([]byte, n)
|
||
for i := range b {
|
||
b[i] = charset[rand.Intn(len(charset))]
|
||
}
|
||
return string(b)
|
||
}
|