Files
hpc/internal/app/app.go
dailz c0176d7764 feat(app): wire file storage DI, cleanup worker, and integration tests
Add DI wiring with graceful MinIO fallback, background cleanup worker for expired sessions and leaked multipart uploads, and end-to-end integration tests.

Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-04-15 09:23:25 +08:00

204 lines
5.6 KiB
Go

package app
import (
"context"
"errors"
"fmt"
"net/http"
"os"
"os/signal"
"syscall"
"time"
"gcy_hpc_server/internal/config"
"gcy_hpc_server/internal/handler"
"gcy_hpc_server/internal/server"
"gcy_hpc_server/internal/service"
"gcy_hpc_server/internal/slurm"
"gcy_hpc_server/internal/storage"
"gcy_hpc_server/internal/store"
"go.uber.org/zap"
"gorm.io/gorm"
)
type App struct {
cfg *config.Config
logger *zap.Logger
db *gorm.DB
server *http.Server
cancelCleanup context.CancelFunc
}
// NewApp initializes all application dependencies: DB, Slurm client, services, handlers, router.
func NewApp(cfg *config.Config, logger *zap.Logger) (*App, error) {
gormDB, err := initDB(cfg, logger)
if err != nil {
return nil, err
}
slurmClient, err := initSlurmClient(cfg)
if err != nil {
closeDB(gormDB)
return nil, err
}
srv, cancelCleanup := initHTTPServer(cfg, gormDB, slurmClient, logger)
return &App{
cfg: cfg,
logger: logger,
db: gormDB,
server: srv,
cancelCleanup: cancelCleanup,
}, nil
}
// Run starts the HTTP server and blocks until a shutdown signal or server error.
func (a *App) Run() error {
errCh := make(chan error, 1)
go func() {
a.logger.Info("starting server", zap.String("addr", a.server.Addr))
if err := a.server.ListenAndServe(); err != nil && err != http.ErrServerClosed {
errCh <- fmt.Errorf("server listen: %w", err)
}
}()
quit := make(chan os.Signal, 1)
signal.Notify(quit, syscall.SIGINT, syscall.SIGTERM)
select {
case err := <-errCh:
// Server crashed before receiving a signal — clean up resources before
// returning, because the caller may call os.Exit and skip deferred Close().
a.logger.Error("server exited unexpectedly", zap.Error(err))
_ = a.Close()
return err
case sig := <-quit:
a.logger.Info("received shutdown signal", zap.String("signal", sig.String()))
}
a.logger.Info("shutting down server...")
return a.Close()
}
// Close cleans up all resources: HTTP server and database connections.
func (a *App) Close() error {
var errs []error
if a.cancelCleanup != nil {
a.cancelCleanup()
}
if a.server != nil {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
if err := a.server.Shutdown(ctx); err != nil && err != http.ErrServerClosed {
errs = append(errs, fmt.Errorf("shutdown http server: %w", err))
}
}
if a.db != nil {
sqlDB, err := a.db.DB()
if err != nil {
errs = append(errs, fmt.Errorf("get underlying sql.DB: %w", err))
} else if err := sqlDB.Close(); err != nil {
errs = append(errs, fmt.Errorf("close database: %w", err))
}
}
return errors.Join(errs...)
}
// ---------------------------------------------------------------------------
// Initialization helpers
// ---------------------------------------------------------------------------
func initDB(cfg *config.Config, logger *zap.Logger) (*gorm.DB, error) {
gormDB, err := store.NewGormDB(cfg.MySQLDSN, logger, cfg.Log.GormLevel)
if err != nil {
return nil, fmt.Errorf("init database: %w", err)
}
if err := store.AutoMigrate(gormDB); err != nil {
closeDB(gormDB)
return nil, fmt.Errorf("run migrations: %w", err)
}
return gormDB, nil
}
func closeDB(db *gorm.DB) {
if db == nil {
return
}
if sqlDB, err := db.DB(); err == nil {
_ = sqlDB.Close()
}
}
func initSlurmClient(cfg *config.Config) (*slurm.Client, error) {
client, err := service.NewSlurmClient(cfg.SlurmAPIURL, cfg.SlurmUserName, cfg.SlurmJWTKeyPath)
if err != nil {
return nil, fmt.Errorf("init slurm client: %w", err)
}
return client, nil
}
func initHTTPServer(cfg *config.Config, db *gorm.DB, slurmClient *slurm.Client, logger *zap.Logger) (*http.Server, context.CancelFunc) {
jobSvc := service.NewJobService(slurmClient, logger)
clusterSvc := service.NewClusterService(slurmClient, logger)
jobH := handler.NewJobHandler(jobSvc, logger)
clusterH := handler.NewClusterHandler(clusterSvc, logger)
appStore := store.NewApplicationStore(db)
appSvc := service.NewApplicationService(appStore, jobSvc, cfg.WorkDirBase, logger)
appH := handler.NewApplicationHandler(appSvc, logger)
// File storage initialization
minioClient, err := storage.NewMinioClient(cfg.Minio)
if err != nil {
logger.Warn("failed to initialize MinIO client, file storage disabled", zap.Error(err))
}
var uploadH *handler.UploadHandler
var fileH *handler.FileHandler
var folderH *handler.FolderHandler
if minioClient != nil {
blobStore := store.NewBlobStore(db)
fileStore := store.NewFileStore(db)
folderStore := store.NewFolderStore(db)
uploadStore := store.NewUploadStore(db)
uploadSvc := service.NewUploadService(minioClient, blobStore, fileStore, uploadStore, cfg.Minio, db, logger)
folderSvc := service.NewFolderService(folderStore, fileStore, logger)
fileSvc := service.NewFileService(minioClient, blobStore, fileStore, cfg.Minio.Bucket, db, logger)
uploadH = handler.NewUploadHandler(uploadSvc, logger)
fileH = handler.NewFileHandler(fileSvc, logger)
folderH = handler.NewFolderHandler(folderSvc, logger)
cleanupCtx, cancelCleanup := context.WithCancel(context.Background())
go startCleanupWorker(cleanupCtx, uploadStore, minioClient, cfg.Minio.Bucket, logger)
router := server.NewRouter(jobH, clusterH, appH, uploadH, fileH, folderH, logger)
addr := ":" + cfg.ServerPort
return &http.Server{
Addr: addr,
Handler: router,
}, cancelCleanup
}
router := server.NewRouter(jobH, clusterH, appH, uploadH, fileH, folderH, logger)
addr := ":" + cfg.ServerPort
return &http.Server{
Addr: addr,
Handler: router,
}, nil
}