feat(platform): Complete Postgres-Only architecture refactoring (Phase 1-7)
Major Changes: - Implement Platform-Only architecture pattern (unified task management) - Add PostgresCacheAdapter for unified caching (platform_schema.app_cache) - Add PgBossQueue for job queue management (platform_schema.job) - Implement CheckpointService using job.data (generic for all modules) - Add intelligent threshold-based dual-mode processing (THRESHOLD=50) - Add task splitting mechanism (auto chunk size recommendation) - Refactor ASL screening service with smart mode selection - Refactor DC extraction service with smart mode selection - Register workers for ASL and DC modules Technical Highlights: - All task management data stored in platform_schema.job.data (JSONB) - Business tables remain clean (no task management fields) - CheckpointService is generic (shared by all modules) - Zero code duplication (DRY principle) - Follows 3-layer architecture principle - Zero additional cost (no Redis needed, save 8400 CNY/year) Code Statistics: - New code: ~1750 lines - Modified code: ~500 lines - Test code: ~1800 lines - Documentation: ~3000 lines Testing: - Unit tests: 8/8 passed - Integration tests: 2/2 passed - Architecture validation: passed - Linter errors: 0 Files: - Platform layer: PostgresCacheAdapter, PgBossQueue, CheckpointService, utils - ASL module: screeningService, screeningWorker - DC module: ExtractionController, extractionWorker - Tests: 11 test files - Docs: Updated 4 key documents Status: Phase 1-7 completed, Phase 8-9 pending
This commit is contained in:
258
backend/src/common/jobs/CheckpointService.ts
Normal file
258
backend/src/common/jobs/CheckpointService.ts
Normal file
@@ -0,0 +1,258 @@
|
||||
/**
|
||||
* 断点续传服务(Platform层统一实现)
|
||||
*
|
||||
* ✅ 重构:利用 pg-boss 的 job.data 字段存储断点信息
|
||||
* 不在业务表中存储,符合3层架构原则
|
||||
*
|
||||
* 优点:
|
||||
* 1. 统一管理:所有模块(ASL、DC、SSA等)共用一套逻辑
|
||||
* 2. 数据一致:断点数据与任务数据在同一处
|
||||
* 3. 查询高效:无需JOIN,直接读取job.data
|
||||
* 4. 易维护:只需维护一处代码
|
||||
*/
|
||||
|
||||
import { PrismaClient } from '@prisma/client';
|
||||
|
||||
/**
|
||||
* 断点数据结构
|
||||
*/
|
||||
export interface CheckpointData {
|
||||
/** 当前批次索引 */
|
||||
currentBatchIndex: number;
|
||||
|
||||
/** 当前处理的项索引(在整个数组中的位置) */
|
||||
currentIndex: number;
|
||||
|
||||
/** 已处理的批次数 */
|
||||
processedBatches: number;
|
||||
|
||||
/** 总批次数 */
|
||||
totalBatches: number;
|
||||
|
||||
/** 中间结果(可选) */
|
||||
intermediateResult?: any;
|
||||
|
||||
/** 额外元数据 */
|
||||
metadata?: Record<string, any>;
|
||||
|
||||
/** 最后更新时间 */
|
||||
lastUpdate?: Date;
|
||||
}
|
||||
|
||||
/**
|
||||
* pg-boss Job 数据结构
|
||||
*/
|
||||
interface PgBossJob {
|
||||
id: string;
|
||||
name: string;
|
||||
data: any; // JSONB
|
||||
state: string;
|
||||
priority: number;
|
||||
retry_limit: number;
|
||||
retry_count: number;
|
||||
retry_delay: number;
|
||||
retry_backoff: boolean;
|
||||
start_after: Date;
|
||||
started_on: Date | null;
|
||||
singleton_key: string | null;
|
||||
singleton_on: Date | null;
|
||||
expire_in: any; // interval
|
||||
created_on: Date;
|
||||
completed_on: Date | null;
|
||||
keep_until: Date;
|
||||
}
|
||||
|
||||
/**
|
||||
* 断点续传服务
|
||||
*
|
||||
* @example
|
||||
* ```typescript
|
||||
* const service = new CheckpointService(prisma);
|
||||
*
|
||||
* // 保存断点到 pg-boss job.data
|
||||
* await service.saveCheckpoint(jobId, {
|
||||
* currentBatchIndex: 5,
|
||||
* currentIndex: 250,
|
||||
* processedBatches: 5,
|
||||
* totalBatches: 20
|
||||
* });
|
||||
*
|
||||
* // 从 pg-boss job.data 读取断点
|
||||
* const checkpoint = await service.loadCheckpoint(jobId);
|
||||
* if (checkpoint) {
|
||||
* startFrom = checkpoint.currentIndex;
|
||||
* }
|
||||
*
|
||||
* // 清除断点
|
||||
* await service.clearCheckpoint(jobId);
|
||||
* ```
|
||||
*/
|
||||
export class CheckpointService {
|
||||
constructor(private prisma: PrismaClient) {}
|
||||
|
||||
/**
|
||||
* 保存任务断点(更新 pg-boss job.data)
|
||||
*
|
||||
* @param jobId pg-boss 任务ID
|
||||
* @param checkpoint 断点数据
|
||||
*/
|
||||
async saveCheckpoint(jobId: string, checkpoint: CheckpointData): Promise<void> {
|
||||
try {
|
||||
// 读取当前 job.data
|
||||
const rows = await this.prisma.$queryRaw<PgBossJob[]>`
|
||||
SELECT id, data
|
||||
FROM platform_schema.job
|
||||
WHERE id = ${jobId}::uuid
|
||||
LIMIT 1
|
||||
`;
|
||||
const job = rows[0] || null;
|
||||
|
||||
if (!job) {
|
||||
throw new Error(`Job not found: ${jobId}`);
|
||||
}
|
||||
|
||||
// 合并断点数据到 job.data
|
||||
const updatedData = {
|
||||
...(job.data || {}),
|
||||
checkpoint: {
|
||||
...checkpoint,
|
||||
lastUpdate: new Date()
|
||||
}
|
||||
};
|
||||
|
||||
// 更新 job.data
|
||||
await this.prisma.$executeRaw`
|
||||
UPDATE platform_schema.job
|
||||
SET data = ${JSON.stringify(updatedData)}::jsonb
|
||||
WHERE id = ${jobId}::uuid
|
||||
`;
|
||||
|
||||
console.log(`[CheckpointService] Checkpoint saved for job: ${jobId}`, {
|
||||
batchIndex: checkpoint.currentBatchIndex,
|
||||
index: checkpoint.currentIndex
|
||||
});
|
||||
|
||||
} catch (error) {
|
||||
console.error(`[CheckpointService] Failed to save checkpoint for job ${jobId}:`, error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 加载任务断点(从 pg-boss job.data 读取)
|
||||
*
|
||||
* @param jobId pg-boss 任务ID
|
||||
* @returns 断点数据,如果不存在则返回 null
|
||||
*/
|
||||
async loadCheckpoint(jobId: string): Promise<CheckpointData | null> {
|
||||
try {
|
||||
const rows = await this.prisma.$queryRaw<PgBossJob[]>`
|
||||
SELECT id, data
|
||||
FROM platform_schema.job
|
||||
WHERE id = ${jobId}::uuid
|
||||
LIMIT 1
|
||||
`;
|
||||
const job = rows[0] || null;
|
||||
|
||||
if (!job || !job.data?.checkpoint) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return job.data.checkpoint as CheckpointData;
|
||||
|
||||
} catch (error) {
|
||||
console.error(`[CheckpointService] Failed to load checkpoint for job ${jobId}:`, error);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 清除任务断点(从 pg-boss job.data 中删除)
|
||||
*
|
||||
* @param jobId pg-boss 任务ID
|
||||
*/
|
||||
async clearCheckpoint(jobId: string): Promise<void> {
|
||||
try {
|
||||
// 读取当前 job.data
|
||||
const rows = await this.prisma.$queryRaw<PgBossJob[]>`
|
||||
SELECT id, data
|
||||
FROM platform_schema.job
|
||||
WHERE id = ${jobId}::uuid
|
||||
LIMIT 1
|
||||
`;
|
||||
const job = rows[0] || null;
|
||||
|
||||
if (!job) {
|
||||
console.log(`[CheckpointService] Job not found: ${jobId}`);
|
||||
return;
|
||||
}
|
||||
|
||||
// 删除 checkpoint 字段
|
||||
const updatedData = { ...(job.data || {}) };
|
||||
delete updatedData.checkpoint;
|
||||
|
||||
// 更新 job.data
|
||||
await this.prisma.$executeRaw`
|
||||
UPDATE platform_schema.job
|
||||
SET data = ${JSON.stringify(updatedData)}::jsonb
|
||||
WHERE id = ${jobId}::uuid
|
||||
`;
|
||||
|
||||
console.log(`[CheckpointService] Checkpoint cleared for job: ${jobId}`);
|
||||
|
||||
} catch (error) {
|
||||
console.error(`[CheckpointService] Failed to clear checkpoint for job ${jobId}:`, error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取任务的批次进度
|
||||
*
|
||||
* @param jobId pg-boss 任务ID
|
||||
* @returns 批次进度信息
|
||||
*/
|
||||
async getProgress(jobId: string): Promise<{
|
||||
currentBatch: number;
|
||||
totalBatches: number;
|
||||
processedBatches: number;
|
||||
percentage: number;
|
||||
} | null> {
|
||||
try {
|
||||
const checkpoint = await this.loadCheckpoint(jobId);
|
||||
|
||||
if (!checkpoint) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const percentage = checkpoint.totalBatches > 0
|
||||
? Math.round((checkpoint.processedBatches / checkpoint.totalBatches) * 100)
|
||||
: 0;
|
||||
|
||||
return {
|
||||
currentBatch: checkpoint.currentBatchIndex,
|
||||
totalBatches: checkpoint.totalBatches,
|
||||
processedBatches: checkpoint.processedBatches,
|
||||
percentage
|
||||
};
|
||||
|
||||
} catch (error) {
|
||||
console.error(`[CheckpointService] Failed to get progress for job ${jobId}:`, error);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 检查任务是否可以从断点恢复
|
||||
*
|
||||
* @param jobId pg-boss 任务ID
|
||||
* @returns 是否存在有效断点
|
||||
*/
|
||||
async canResume(jobId: string): Promise<boolean> {
|
||||
const checkpoint = await this.loadCheckpoint(jobId);
|
||||
return checkpoint !== null && checkpoint.processedBatches < checkpoint.totalBatches;
|
||||
}
|
||||
}
|
||||
|
||||
// 导出类(不导出单例,由使用方创建实例)
|
||||
// export const checkpointService = new CheckpointService(prisma);
|
||||
Reference in New Issue
Block a user