Major Changes: - Implement Platform-Only architecture pattern (unified task management) - Add PostgresCacheAdapter for unified caching (platform_schema.app_cache) - Add PgBossQueue for job queue management (platform_schema.job) - Implement CheckpointService using job.data (generic for all modules) - Add intelligent threshold-based dual-mode processing (THRESHOLD=50) - Add task splitting mechanism (auto chunk size recommendation) - Refactor ASL screening service with smart mode selection - Refactor DC extraction service with smart mode selection - Register workers for ASL and DC modules Technical Highlights: - All task management data stored in platform_schema.job.data (JSONB) - Business tables remain clean (no task management fields) - CheckpointService is generic (shared by all modules) - Zero code duplication (DRY principle) - Follows 3-layer architecture principle - Zero additional cost (no Redis needed, save 8400 CNY/year) Code Statistics: - New code: ~1750 lines - Modified code: ~500 lines - Test code: ~1800 lines - Documentation: ~3000 lines Testing: - Unit tests: 8/8 passed - Integration tests: 2/2 passed - Architecture validation: passed - Linter errors: 0 Files: - Platform layer: PostgresCacheAdapter, PgBossQueue, CheckpointService, utils - ASL module: screeningService, screeningWorker - DC module: ExtractionController, extractionWorker - Tests: 11 test files - Docs: Updated 4 key documents Status: Phase 1-7 completed, Phase 8-9 pending
259 lines
6.5 KiB
TypeScript
259 lines
6.5 KiB
TypeScript
/**
|
||
* 断点续传服务(Platform层统一实现)
|
||
*
|
||
* ✅ 重构:利用 pg-boss 的 job.data 字段存储断点信息
|
||
* 不在业务表中存储,符合3层架构原则
|
||
*
|
||
* 优点:
|
||
* 1. 统一管理:所有模块(ASL、DC、SSA等)共用一套逻辑
|
||
* 2. 数据一致:断点数据与任务数据在同一处
|
||
* 3. 查询高效:无需JOIN,直接读取job.data
|
||
* 4. 易维护:只需维护一处代码
|
||
*/
|
||
|
||
import { PrismaClient } from '@prisma/client';
|
||
|
||
/**
|
||
* 断点数据结构
|
||
*/
|
||
export interface CheckpointData {
|
||
/** 当前批次索引 */
|
||
currentBatchIndex: number;
|
||
|
||
/** 当前处理的项索引(在整个数组中的位置) */
|
||
currentIndex: number;
|
||
|
||
/** 已处理的批次数 */
|
||
processedBatches: number;
|
||
|
||
/** 总批次数 */
|
||
totalBatches: number;
|
||
|
||
/** 中间结果(可选) */
|
||
intermediateResult?: any;
|
||
|
||
/** 额外元数据 */
|
||
metadata?: Record<string, any>;
|
||
|
||
/** 最后更新时间 */
|
||
lastUpdate?: Date;
|
||
}
|
||
|
||
/**
|
||
* pg-boss Job 数据结构
|
||
*/
|
||
interface PgBossJob {
|
||
id: string;
|
||
name: string;
|
||
data: any; // JSONB
|
||
state: string;
|
||
priority: number;
|
||
retry_limit: number;
|
||
retry_count: number;
|
||
retry_delay: number;
|
||
retry_backoff: boolean;
|
||
start_after: Date;
|
||
started_on: Date | null;
|
||
singleton_key: string | null;
|
||
singleton_on: Date | null;
|
||
expire_in: any; // interval
|
||
created_on: Date;
|
||
completed_on: Date | null;
|
||
keep_until: Date;
|
||
}
|
||
|
||
/**
|
||
* 断点续传服务
|
||
*
|
||
* @example
|
||
* ```typescript
|
||
* const service = new CheckpointService(prisma);
|
||
*
|
||
* // 保存断点到 pg-boss job.data
|
||
* await service.saveCheckpoint(jobId, {
|
||
* currentBatchIndex: 5,
|
||
* currentIndex: 250,
|
||
* processedBatches: 5,
|
||
* totalBatches: 20
|
||
* });
|
||
*
|
||
* // 从 pg-boss job.data 读取断点
|
||
* const checkpoint = await service.loadCheckpoint(jobId);
|
||
* if (checkpoint) {
|
||
* startFrom = checkpoint.currentIndex;
|
||
* }
|
||
*
|
||
* // 清除断点
|
||
* await service.clearCheckpoint(jobId);
|
||
* ```
|
||
*/
|
||
export class CheckpointService {
|
||
constructor(private prisma: PrismaClient) {}
|
||
|
||
/**
|
||
* 保存任务断点(更新 pg-boss job.data)
|
||
*
|
||
* @param jobId pg-boss 任务ID
|
||
* @param checkpoint 断点数据
|
||
*/
|
||
async saveCheckpoint(jobId: string, checkpoint: CheckpointData): Promise<void> {
|
||
try {
|
||
// 读取当前 job.data
|
||
const rows = await this.prisma.$queryRaw<PgBossJob[]>`
|
||
SELECT id, data
|
||
FROM platform_schema.job
|
||
WHERE id = ${jobId}::uuid
|
||
LIMIT 1
|
||
`;
|
||
const job = rows[0] || null;
|
||
|
||
if (!job) {
|
||
throw new Error(`Job not found: ${jobId}`);
|
||
}
|
||
|
||
// 合并断点数据到 job.data
|
||
const updatedData = {
|
||
...(job.data || {}),
|
||
checkpoint: {
|
||
...checkpoint,
|
||
lastUpdate: new Date()
|
||
}
|
||
};
|
||
|
||
// 更新 job.data
|
||
await this.prisma.$executeRaw`
|
||
UPDATE platform_schema.job
|
||
SET data = ${JSON.stringify(updatedData)}::jsonb
|
||
WHERE id = ${jobId}::uuid
|
||
`;
|
||
|
||
console.log(`[CheckpointService] Checkpoint saved for job: ${jobId}`, {
|
||
batchIndex: checkpoint.currentBatchIndex,
|
||
index: checkpoint.currentIndex
|
||
});
|
||
|
||
} catch (error) {
|
||
console.error(`[CheckpointService] Failed to save checkpoint for job ${jobId}:`, error);
|
||
throw error;
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 加载任务断点(从 pg-boss job.data 读取)
|
||
*
|
||
* @param jobId pg-boss 任务ID
|
||
* @returns 断点数据,如果不存在则返回 null
|
||
*/
|
||
async loadCheckpoint(jobId: string): Promise<CheckpointData | null> {
|
||
try {
|
||
const rows = await this.prisma.$queryRaw<PgBossJob[]>`
|
||
SELECT id, data
|
||
FROM platform_schema.job
|
||
WHERE id = ${jobId}::uuid
|
||
LIMIT 1
|
||
`;
|
||
const job = rows[0] || null;
|
||
|
||
if (!job || !job.data?.checkpoint) {
|
||
return null;
|
||
}
|
||
|
||
return job.data.checkpoint as CheckpointData;
|
||
|
||
} catch (error) {
|
||
console.error(`[CheckpointService] Failed to load checkpoint for job ${jobId}:`, error);
|
||
return null;
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 清除任务断点(从 pg-boss job.data 中删除)
|
||
*
|
||
* @param jobId pg-boss 任务ID
|
||
*/
|
||
async clearCheckpoint(jobId: string): Promise<void> {
|
||
try {
|
||
// 读取当前 job.data
|
||
const rows = await this.prisma.$queryRaw<PgBossJob[]>`
|
||
SELECT id, data
|
||
FROM platform_schema.job
|
||
WHERE id = ${jobId}::uuid
|
||
LIMIT 1
|
||
`;
|
||
const job = rows[0] || null;
|
||
|
||
if (!job) {
|
||
console.log(`[CheckpointService] Job not found: ${jobId}`);
|
||
return;
|
||
}
|
||
|
||
// 删除 checkpoint 字段
|
||
const updatedData = { ...(job.data || {}) };
|
||
delete updatedData.checkpoint;
|
||
|
||
// 更新 job.data
|
||
await this.prisma.$executeRaw`
|
||
UPDATE platform_schema.job
|
||
SET data = ${JSON.stringify(updatedData)}::jsonb
|
||
WHERE id = ${jobId}::uuid
|
||
`;
|
||
|
||
console.log(`[CheckpointService] Checkpoint cleared for job: ${jobId}`);
|
||
|
||
} catch (error) {
|
||
console.error(`[CheckpointService] Failed to clear checkpoint for job ${jobId}:`, error);
|
||
throw error;
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 获取任务的批次进度
|
||
*
|
||
* @param jobId pg-boss 任务ID
|
||
* @returns 批次进度信息
|
||
*/
|
||
async getProgress(jobId: string): Promise<{
|
||
currentBatch: number;
|
||
totalBatches: number;
|
||
processedBatches: number;
|
||
percentage: number;
|
||
} | null> {
|
||
try {
|
||
const checkpoint = await this.loadCheckpoint(jobId);
|
||
|
||
if (!checkpoint) {
|
||
return null;
|
||
}
|
||
|
||
const percentage = checkpoint.totalBatches > 0
|
||
? Math.round((checkpoint.processedBatches / checkpoint.totalBatches) * 100)
|
||
: 0;
|
||
|
||
return {
|
||
currentBatch: checkpoint.currentBatchIndex,
|
||
totalBatches: checkpoint.totalBatches,
|
||
processedBatches: checkpoint.processedBatches,
|
||
percentage
|
||
};
|
||
|
||
} catch (error) {
|
||
console.error(`[CheckpointService] Failed to get progress for job ${jobId}:`, error);
|
||
return null;
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 检查任务是否可以从断点恢复
|
||
*
|
||
* @param jobId pg-boss 任务ID
|
||
* @returns 是否存在有效断点
|
||
*/
|
||
async canResume(jobId: string): Promise<boolean> {
|
||
const checkpoint = await this.loadCheckpoint(jobId);
|
||
return checkpoint !== null && checkpoint.processedBatches < checkpoint.totalBatches;
|
||
}
|
||
}
|
||
|
||
// 导出类(不导出单例,由使用方创建实例)
|
||
// export const checkpointService = new CheckpointService(prisma);
|