feat(platform): Implement platform infrastructure with cloud-native support
- Add storage service (LocalAdapter + OSSAdapter stub) - Add database connection pool with graceful shutdown - Add logging system with winston (JSON format) - Add environment config management - Add async job queue (MemoryQueue + DatabaseQueue stub) - Add cache service (MemoryCache + RedisCache stub) - Add health check endpoints for SAE - Add monitoring metrics for DB, memory, API Key Features: - Zero-code switching between local and cloud environments - Adapter pattern for multi-environment support - Backward compatible with legacy modules - Ready for Aliyun Serverless deployment Related: Platform Infrastructure Planning (docs/09-鏋舵瀯瀹炴柦/04-骞冲彴鍩虹璁炬柦瑙勫垝.md)
This commit is contained in:
221
backend/src/common/health/healthCheck.ts
Normal file
221
backend/src/common/health/healthCheck.ts
Normal file
@@ -0,0 +1,221 @@
|
||||
import { FastifyInstance, FastifyRequest, FastifyReply } from 'fastify'
|
||||
import { prisma, getDatabaseConnectionCount } from '../../config/database.js'
|
||||
|
||||
/**
|
||||
* 健康检查响应
|
||||
*/
|
||||
export interface HealthCheckResponse {
|
||||
status: 'ok' | 'error' | 'degraded'
|
||||
timestamp: number
|
||||
uptime: number
|
||||
checks?: Record<string, {
|
||||
status: 'ok' | 'error' | 'degraded'
|
||||
message?: string
|
||||
details?: any
|
||||
}>
|
||||
}
|
||||
|
||||
/**
|
||||
* 注册健康检查路由
|
||||
*
|
||||
* 提供两个端点:
|
||||
* 1. /health/liveness - SAE存活检查(简单响应)
|
||||
* 2. /health/readiness - SAE就绪检查(检查依赖服务)
|
||||
*
|
||||
* @example
|
||||
* ```typescript
|
||||
* import { registerHealthRoutes } from '@/common/health'
|
||||
*
|
||||
* // 在Fastify应用启动时注册
|
||||
* await registerHealthRoutes(app)
|
||||
* ```
|
||||
*/
|
||||
export async function registerHealthRoutes(app: FastifyInstance): Promise<void> {
|
||||
/**
|
||||
* 存活检查(Liveness Probe)
|
||||
*
|
||||
* 用途:检测应用是否还活着
|
||||
* 检查内容:最基础的响应
|
||||
* 失败后果:SAE会重启容器
|
||||
*
|
||||
* GET /health/liveness
|
||||
*/
|
||||
app.get('/health/liveness', async (
|
||||
_request: FastifyRequest,
|
||||
reply: FastifyReply
|
||||
) => {
|
||||
const response: HealthCheckResponse = {
|
||||
status: 'ok',
|
||||
timestamp: Date.now(),
|
||||
uptime: process.uptime()
|
||||
}
|
||||
|
||||
return reply.status(200).send(response)
|
||||
})
|
||||
|
||||
/**
|
||||
* 就绪检查(Readiness Probe)
|
||||
*
|
||||
* 用途:检测应用是否准备好接收流量
|
||||
* 检查内容:数据库连接、关键依赖服务
|
||||
* 失败后果:SAE会暂时移除该实例的流量
|
||||
*
|
||||
* GET /health/readiness
|
||||
*/
|
||||
app.get('/health/readiness', async (
|
||||
_request: FastifyRequest,
|
||||
reply: FastifyReply
|
||||
) => {
|
||||
const checks: Record<string, any> = {}
|
||||
let overallStatus: 'ok' | 'error' | 'degraded' = 'ok'
|
||||
|
||||
// ========== 检查数据库连接 ==========
|
||||
try {
|
||||
await prisma.$queryRaw`SELECT 1`
|
||||
|
||||
// 获取当前连接数
|
||||
const connectionCount = await getDatabaseConnectionCount()
|
||||
const maxConnections = Number(process.env.DB_MAX_CONNECTIONS) || 400
|
||||
const connectionUsage = (connectionCount / maxConnections) * 100
|
||||
|
||||
checks.database = {
|
||||
status: connectionUsage > 90 ? 'degraded' : 'ok',
|
||||
message: connectionUsage > 90
|
||||
? 'Connection pool usage high'
|
||||
: 'Connected',
|
||||
details: {
|
||||
currentConnections: connectionCount,
|
||||
maxConnections,
|
||||
usagePercent: Math.round(connectionUsage)
|
||||
}
|
||||
}
|
||||
|
||||
if (connectionUsage > 90) {
|
||||
overallStatus = 'degraded'
|
||||
}
|
||||
} catch (error: any) {
|
||||
checks.database = {
|
||||
status: 'error',
|
||||
message: 'Database connection failed',
|
||||
details: {
|
||||
error: error.message
|
||||
}
|
||||
}
|
||||
overallStatus = 'error'
|
||||
}
|
||||
|
||||
// ========== 检查内存使用 ==========
|
||||
const memUsage = process.memoryUsage()
|
||||
const memUsageMB = {
|
||||
rss: Math.round(memUsage.rss / 1024 / 1024),
|
||||
heapTotal: Math.round(memUsage.heapTotal / 1024 / 1024),
|
||||
heapUsed: Math.round(memUsage.heapUsed / 1024 / 1024),
|
||||
external: Math.round(memUsage.external / 1024 / 1024)
|
||||
}
|
||||
|
||||
// 堆内存使用超过80%告警
|
||||
const heapUsagePercent = (memUsage.heapUsed / memUsage.heapTotal) * 100
|
||||
|
||||
checks.memory = {
|
||||
status: heapUsagePercent > 90 ? 'degraded' : 'ok',
|
||||
message: heapUsagePercent > 90
|
||||
? 'High memory usage'
|
||||
: 'Normal',
|
||||
details: memUsageMB
|
||||
}
|
||||
|
||||
if (heapUsagePercent > 90 && overallStatus === 'ok') {
|
||||
overallStatus = 'degraded'
|
||||
}
|
||||
|
||||
// ========== 返回响应 ==========
|
||||
const response: HealthCheckResponse = {
|
||||
status: overallStatus,
|
||||
timestamp: Date.now(),
|
||||
uptime: process.uptime(),
|
||||
checks
|
||||
}
|
||||
|
||||
const statusCode = overallStatus === 'error' ? 503 : 200
|
||||
|
||||
return reply.status(statusCode).send(response)
|
||||
})
|
||||
|
||||
/**
|
||||
* 详细健康检查(开发用)
|
||||
*
|
||||
* GET /health
|
||||
*/
|
||||
app.get('/health', async (
|
||||
_request: FastifyRequest,
|
||||
reply: FastifyReply
|
||||
) => {
|
||||
const checks: Record<string, any> = {}
|
||||
let overallStatus: 'ok' | 'error' | 'degraded' = 'ok'
|
||||
|
||||
// ========== 数据库检查 ==========
|
||||
try {
|
||||
const startTime = Date.now()
|
||||
await prisma.$queryRaw`SELECT 1`
|
||||
const responseTime = Date.now() - startTime
|
||||
|
||||
const connectionCount = await getDatabaseConnectionCount()
|
||||
const maxConnections = Number(process.env.DB_MAX_CONNECTIONS) || 400
|
||||
|
||||
checks.database = {
|
||||
status: 'ok',
|
||||
responseTime: `${responseTime}ms`,
|
||||
connections: {
|
||||
current: connectionCount,
|
||||
max: maxConnections,
|
||||
usage: `${Math.round((connectionCount / maxConnections) * 100)}%`
|
||||
}
|
||||
}
|
||||
} catch (error: any) {
|
||||
checks.database = {
|
||||
status: 'error',
|
||||
error: error.message
|
||||
}
|
||||
overallStatus = 'error'
|
||||
}
|
||||
|
||||
// ========== 环境信息 ==========
|
||||
checks.environment = {
|
||||
nodeVersion: process.version,
|
||||
platform: process.platform,
|
||||
nodeEnv: process.env.NODE_ENV || 'development',
|
||||
pid: process.pid,
|
||||
uptime: `${Math.round(process.uptime())}s`
|
||||
}
|
||||
|
||||
// ========== 内存信息 ==========
|
||||
const memUsage = process.memoryUsage()
|
||||
checks.memory = {
|
||||
rss: `${Math.round(memUsage.rss / 1024 / 1024)}MB`,
|
||||
heapTotal: `${Math.round(memUsage.heapTotal / 1024 / 1024)}MB`,
|
||||
heapUsed: `${Math.round(memUsage.heapUsed / 1024 / 1024)}MB`,
|
||||
external: `${Math.round(memUsage.external / 1024 / 1024)}MB`
|
||||
}
|
||||
|
||||
// ========== CPU信息 ==========
|
||||
checks.cpu = {
|
||||
usage: process.cpuUsage(),
|
||||
loadAverage: process.platform !== 'win32' ? require('os').loadavg() : 'N/A'
|
||||
}
|
||||
|
||||
// ========== 返回响应 ==========
|
||||
const response = {
|
||||
status: overallStatus,
|
||||
timestamp: new Date().toISOString(),
|
||||
checks
|
||||
}
|
||||
|
||||
return reply.status(200).send(response)
|
||||
})
|
||||
|
||||
console.log('[Health] Health check routes registered:')
|
||||
console.log(' - GET /health/liveness')
|
||||
console.log(' - GET /health/readiness')
|
||||
console.log(' - GET /health (detailed)')
|
||||
}
|
||||
|
||||
26
backend/src/common/health/index.ts
Normal file
26
backend/src/common/health/index.ts
Normal file
@@ -0,0 +1,26 @@
|
||||
/**
|
||||
* 健康检查统一导出
|
||||
*
|
||||
* 提供SAE健康检查端点,用于存活和就绪探测。
|
||||
*
|
||||
* @module health
|
||||
*
|
||||
* @example
|
||||
* ```typescript
|
||||
* import { registerHealthRoutes } from '@/common/health'
|
||||
* import Fastify from 'fastify'
|
||||
*
|
||||
* const app = Fastify()
|
||||
*
|
||||
* // 注册健康检查路由
|
||||
* await registerHealthRoutes(app)
|
||||
*
|
||||
* // SAE配置示例:
|
||||
* // - Liveness Probe: GET /health/liveness (每10秒检查一次)
|
||||
* // - Readiness Probe: GET /health/readiness (每5秒检查一次)
|
||||
* ```
|
||||
*/
|
||||
|
||||
export { registerHealthRoutes } from './healthCheck.js'
|
||||
export type { HealthCheckResponse } from './healthCheck.js'
|
||||
|
||||
Reference in New Issue
Block a user