feat(platform): Complete Postgres-Only architecture refactoring (Phase 1-7)

Major Changes:
- Implement Platform-Only architecture pattern (unified task management)
- Add PostgresCacheAdapter for unified caching (platform_schema.app_cache)
- Add PgBossQueue for job queue management (platform_schema.job)
- Implement CheckpointService using job.data (generic for all modules)
- Add intelligent threshold-based dual-mode processing (THRESHOLD=50)
- Add task splitting mechanism (auto chunk size recommendation)
- Refactor ASL screening service with smart mode selection
- Refactor DC extraction service with smart mode selection
- Register workers for ASL and DC modules

Technical Highlights:
- All task management data stored in platform_schema.job.data (JSONB)
- Business tables remain clean (no task management fields)
- CheckpointService is generic (shared by all modules)
- Zero code duplication (DRY principle)
- Follows 3-layer architecture principle
- Zero additional cost (no Redis needed, save 8400 CNY/year)

Code Statistics:
- New code: ~1750 lines
- Modified code: ~500 lines
- Test code: ~1800 lines
- Documentation: ~3000 lines

Testing:
- Unit tests: 8/8 passed
- Integration tests: 2/2 passed
- Architecture validation: passed
- Linter errors: 0

Files:
- Platform layer: PostgresCacheAdapter, PgBossQueue, CheckpointService, utils
- ASL module: screeningService, screeningWorker
- DC module: ExtractionController, extractionWorker
- Tests: 11 test files
- Docs: Updated 4 key documents

Status: Phase 1-7 completed, Phase 8-9 pending
This commit is contained in:
2025-12-13 16:10:04 +08:00
parent a3586cdf30
commit fa72beea6c
135 changed files with 17508 additions and 91 deletions

View File

@@ -0,0 +1,258 @@
/**
* 断点续传服务Platform层统一实现
*
* ✅ 重构:利用 pg-boss 的 job.data 字段存储断点信息
* 不在业务表中存储符合3层架构原则
*
* 优点:
* 1. 统一管理所有模块ASL、DC、SSA等共用一套逻辑
* 2. 数据一致:断点数据与任务数据在同一处
* 3. 查询高效无需JOIN直接读取job.data
* 4. 易维护:只需维护一处代码
*/
import { PrismaClient } from '@prisma/client';
/**
* 断点数据结构
*/
export interface CheckpointData {
/** 当前批次索引 */
currentBatchIndex: number;
/** 当前处理的项索引(在整个数组中的位置) */
currentIndex: number;
/** 已处理的批次数 */
processedBatches: number;
/** 总批次数 */
totalBatches: number;
/** 中间结果(可选) */
intermediateResult?: any;
/** 额外元数据 */
metadata?: Record<string, any>;
/** 最后更新时间 */
lastUpdate?: Date;
}
/**
* pg-boss Job 数据结构
*/
interface PgBossJob {
id: string;
name: string;
data: any; // JSONB
state: string;
priority: number;
retry_limit: number;
retry_count: number;
retry_delay: number;
retry_backoff: boolean;
start_after: Date;
started_on: Date | null;
singleton_key: string | null;
singleton_on: Date | null;
expire_in: any; // interval
created_on: Date;
completed_on: Date | null;
keep_until: Date;
}
/**
* 断点续传服务
*
* @example
* ```typescript
* const service = new CheckpointService(prisma);
*
* // 保存断点到 pg-boss job.data
* await service.saveCheckpoint(jobId, {
* currentBatchIndex: 5,
* currentIndex: 250,
* processedBatches: 5,
* totalBatches: 20
* });
*
* // 从 pg-boss job.data 读取断点
* const checkpoint = await service.loadCheckpoint(jobId);
* if (checkpoint) {
* startFrom = checkpoint.currentIndex;
* }
*
* // 清除断点
* await service.clearCheckpoint(jobId);
* ```
*/
export class CheckpointService {
constructor(private prisma: PrismaClient) {}
/**
* 保存任务断点(更新 pg-boss job.data
*
* @param jobId pg-boss 任务ID
* @param checkpoint 断点数据
*/
async saveCheckpoint(jobId: string, checkpoint: CheckpointData): Promise<void> {
try {
// 读取当前 job.data
const rows = await this.prisma.$queryRaw<PgBossJob[]>`
SELECT id, data
FROM platform_schema.job
WHERE id = ${jobId}::uuid
LIMIT 1
`;
const job = rows[0] || null;
if (!job) {
throw new Error(`Job not found: ${jobId}`);
}
// 合并断点数据到 job.data
const updatedData = {
...(job.data || {}),
checkpoint: {
...checkpoint,
lastUpdate: new Date()
}
};
// 更新 job.data
await this.prisma.$executeRaw`
UPDATE platform_schema.job
SET data = ${JSON.stringify(updatedData)}::jsonb
WHERE id = ${jobId}::uuid
`;
console.log(`[CheckpointService] Checkpoint saved for job: ${jobId}`, {
batchIndex: checkpoint.currentBatchIndex,
index: checkpoint.currentIndex
});
} catch (error) {
console.error(`[CheckpointService] Failed to save checkpoint for job ${jobId}:`, error);
throw error;
}
}
/**
* 加载任务断点(从 pg-boss job.data 读取)
*
* @param jobId pg-boss 任务ID
* @returns 断点数据,如果不存在则返回 null
*/
async loadCheckpoint(jobId: string): Promise<CheckpointData | null> {
try {
const rows = await this.prisma.$queryRaw<PgBossJob[]>`
SELECT id, data
FROM platform_schema.job
WHERE id = ${jobId}::uuid
LIMIT 1
`;
const job = rows[0] || null;
if (!job || !job.data?.checkpoint) {
return null;
}
return job.data.checkpoint as CheckpointData;
} catch (error) {
console.error(`[CheckpointService] Failed to load checkpoint for job ${jobId}:`, error);
return null;
}
}
/**
* 清除任务断点(从 pg-boss job.data 中删除)
*
* @param jobId pg-boss 任务ID
*/
async clearCheckpoint(jobId: string): Promise<void> {
try {
// 读取当前 job.data
const rows = await this.prisma.$queryRaw<PgBossJob[]>`
SELECT id, data
FROM platform_schema.job
WHERE id = ${jobId}::uuid
LIMIT 1
`;
const job = rows[0] || null;
if (!job) {
console.log(`[CheckpointService] Job not found: ${jobId}`);
return;
}
// 删除 checkpoint 字段
const updatedData = { ...(job.data || {}) };
delete updatedData.checkpoint;
// 更新 job.data
await this.prisma.$executeRaw`
UPDATE platform_schema.job
SET data = ${JSON.stringify(updatedData)}::jsonb
WHERE id = ${jobId}::uuid
`;
console.log(`[CheckpointService] Checkpoint cleared for job: ${jobId}`);
} catch (error) {
console.error(`[CheckpointService] Failed to clear checkpoint for job ${jobId}:`, error);
throw error;
}
}
/**
* 获取任务的批次进度
*
* @param jobId pg-boss 任务ID
* @returns 批次进度信息
*/
async getProgress(jobId: string): Promise<{
currentBatch: number;
totalBatches: number;
processedBatches: number;
percentage: number;
} | null> {
try {
const checkpoint = await this.loadCheckpoint(jobId);
if (!checkpoint) {
return null;
}
const percentage = checkpoint.totalBatches > 0
? Math.round((checkpoint.processedBatches / checkpoint.totalBatches) * 100)
: 0;
return {
currentBatch: checkpoint.currentBatchIndex,
totalBatches: checkpoint.totalBatches,
processedBatches: checkpoint.processedBatches,
percentage
};
} catch (error) {
console.error(`[CheckpointService] Failed to get progress for job ${jobId}:`, error);
return null;
}
}
/**
* 检查任务是否可以从断点恢复
*
* @param jobId pg-boss 任务ID
* @returns 是否存在有效断点
*/
async canResume(jobId: string): Promise<boolean> {
const checkpoint = await this.loadCheckpoint(jobId);
return checkpoint !== null && checkpoint.processedBatches < checkpoint.totalBatches;
}
}
// 导出类(不导出单例,由使用方创建实例)
// export const checkpointService = new CheckpointService(prisma);