package app import ( "context" "sync" "time" "go.uber.org/zap" ) // TaskPollable defines the interface for refreshing stale task statuses // and recovering stuck tasks. type TaskPollable interface { RefreshStaleTasks(ctx context.Context) error RecoverStuckTasks(ctx context.Context) } // TaskPoller periodically polls Slurm for task status updates and recovers // stuck tasks via TaskPollable. type TaskPoller struct { taskSvc TaskPollable interval time.Duration cancel context.CancelFunc wg sync.WaitGroup logger *zap.Logger } // NewTaskPoller creates a new TaskPoller with the given service, interval, and logger. func NewTaskPoller(taskSvc TaskPollable, interval time.Duration, logger *zap.Logger) *TaskPoller { return &TaskPoller{ taskSvc: taskSvc, interval: interval, logger: logger, } } // Start launches background goroutines that periodically refresh stale tasks // and recover stuck tasks. func (p *TaskPoller) Start(ctx context.Context) { ctx, p.cancel = context.WithCancel(ctx) p.wg.Add(1) go func() { defer p.wg.Done() ticker := time.NewTicker(p.interval) defer ticker.Stop() for { select { case <-ctx.Done(): return case <-ticker.C: if err := p.taskSvc.RefreshStaleTasks(ctx); err != nil { p.logger.Error("failed to refresh stale tasks", zap.Error(err)) } } } }() p.wg.Add(1) go func() { defer p.wg.Done() ticker := time.NewTicker(5 * time.Minute) defer ticker.Stop() for { select { case <-ctx.Done(): return case <-ticker.C: p.taskSvc.RecoverStuckTasks(ctx) } } }() } // Stop cancels the background goroutines and waits for them to finish. func (p *TaskPoller) Stop() { if p.cancel != nil { p.cancel() } p.wg.Wait() }