feat(service): add task defaults, job status, and cluster helpers

Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
This commit is contained in:
dailz
2026-04-20 10:38:41 +08:00
parent f894e870ed
commit 166ca3092c
3 changed files with 151 additions and 9 deletions

View File

@@ -122,13 +122,40 @@ func (s *TaskService) CreateTask(ctx context.Context, req *model.CreateTaskReque
// 8. Create task record
task := &model.Task{
TaskName: taskName,
AppID: app.ID,
AppName: app.Name,
Status: model.TaskStatusSubmitted,
Values: valuesJSON,
InputFileIDs: fileIDsJSON,
SubmittedAt: time.Now(),
TaskName: taskName,
AppID: app.ID,
AppName: app.Name,
Status: model.TaskStatusSubmitted,
Values: valuesJSON,
InputFileIDs: fileIDsJSON,
SubmittedAt: time.Now(),
Partition: derefStr(req.Partition),
Cpus: req.Cpus,
MemoryPerNode: req.MemoryPerNode,
MemoryPerCpu: req.MemoryPerCpu,
TimeLimit: req.TimeLimit,
QOS: req.QOS,
JobName: req.JobName,
Nodes: req.Nodes,
Tasks: req.Tasks,
CpusPerTask: req.CpusPerTask,
Constraints: req.Constraints,
Reservation: req.Reservation,
Account: req.Account,
Nice: req.Nice,
MailType: req.MailType,
MailUser: req.MailUser,
StandardOutput: req.StandardOutput,
StandardError: req.StandardError,
StandardInput: req.StandardInput,
RequiredNodes: req.RequiredNodes,
ExcludedNodes: req.ExcludedNodes,
BeginTime: req.BeginTime,
Deadline: req.Deadline,
Array: req.Array,
Dependency: req.Dependency,
Requeue: req.Requeue,
KillOnNodeFail: req.KillOnNodeFail,
}
taskID, err := s.taskStore.Create(ctx, task)
@@ -309,6 +336,17 @@ func (s *TaskService) ProcessTask(ctx context.Context, taskID int64) error {
}
}
// 注入默认调度参数(仅在内存中,不持久化到数据库)
if task.TimeLimit == nil {
task.TimeLimit = int32Ptr(10080) // 168 小时
}
if task.StandardOutput == nil {
task.StandardOutput = strToPtrOrNil(filepath.Join(workDir, "slurm-%j.out"))
}
if task.StandardError == nil {
task.StandardError = strToPtrOrNil(filepath.Join(workDir, "slurm-%j.err"))
}
// 17. Render script
rendered := RenderScript(app.ScriptTemplate, params, values)
s.logger.Info("rendered script",
@@ -319,8 +357,35 @@ func (s *TaskService) ProcessTask(ctx context.Context, taskID int64) error {
// 18. Submit to Slurm
jobResp, err := s.jobSvc.SubmitJob(ctx, &model.SubmitJobRequest{
Script: rendered,
WorkDir: workDir,
Script: rendered,
WorkDir: workDir,
Partition: task.Partition,
CPUs: derefInt32(task.Cpus),
TimeLimit: derefInt32ToStr(task.TimeLimit),
QOS: derefStr(task.QOS),
JobName: derefStr(task.JobName),
MemoryPerNode: task.MemoryPerNode,
MemoryPerCpu: task.MemoryPerCpu,
Nodes: task.Nodes,
Tasks: task.Tasks,
CpusPerTask: task.CpusPerTask,
Constraints: task.Constraints,
Reservation: task.Reservation,
Account: task.Account,
Nice: task.Nice,
MailType: task.MailType,
MailUser: task.MailUser,
StandardOutput: task.StandardOutput,
StandardError: task.StandardError,
StandardInput: task.StandardInput,
RequiredNodes: task.RequiredNodes,
ExcludedNodes: task.ExcludedNodes,
BeginTime: task.BeginTime,
Deadline: task.Deadline,
Array: task.Array,
Dependency: task.Dependency,
Requeue: task.Requeue,
KillOnNodeFail: task.KillOnNodeFail,
})
if err != nil {
return fail(model.TaskStepSubmitting, fmt.Sprintf("submit job: %v", err))