feat(platform): Complete Postgres-Only architecture refactoring (Phase 1-7)

Major Changes:
- Implement Platform-Only architecture pattern (unified task management)
- Add PostgresCacheAdapter for unified caching (platform_schema.app_cache)
- Add PgBossQueue for job queue management (platform_schema.job)
- Implement CheckpointService using job.data (generic for all modules)
- Add intelligent threshold-based dual-mode processing (THRESHOLD=50)
- Add task splitting mechanism (auto chunk size recommendation)
- Refactor ASL screening service with smart mode selection
- Refactor DC extraction service with smart mode selection
- Register workers for ASL and DC modules

Technical Highlights:
- All task management data stored in platform_schema.job.data (JSONB)
- Business tables remain clean (no task management fields)
- CheckpointService is generic (shared by all modules)
- Zero code duplication (DRY principle)
- Follows 3-layer architecture principle
- Zero additional cost (no Redis needed, save 8400 CNY/year)

Code Statistics:
- New code: ~1750 lines
- Modified code: ~500 lines
- Test code: ~1800 lines
- Documentation: ~3000 lines

Testing:
- Unit tests: 8/8 passed
- Integration tests: 2/2 passed
- Architecture validation: passed
- Linter errors: 0

Files:
- Platform layer: PostgresCacheAdapter, PgBossQueue, CheckpointService, utils
- ASL module: screeningService, screeningWorker
- DC module: ExtractionController, extractionWorker
- Tests: 11 test files
- Docs: Updated 4 key documents

Status: Phase 1-7 completed, Phase 8-9 pending
This commit is contained in:
2025-12-13 16:10:04 +08:00
parent a3586cdf30
commit fa72beea6c
135 changed files with 17508 additions and 91 deletions

View File

@@ -24,6 +24,8 @@ import { conflictDetectionService } from '../services/ConflictDetectionService.j
import { storage } from '../../../../common/storage/index.js';
import { logger } from '../../../../common/logging/index.js';
import { prisma } from '../../../../config/database.js';
import { jobQueue } from '../../../../common/jobs/index.js';
import { splitIntoChunks, recommendChunkSize } from '../../../../common/jobs/utils.js';
import * as xlsx from 'xlsx';
export class ExtractionController {
@@ -277,22 +279,111 @@ export class ExtractionController {
});
logger.info('[API] Items created', { count: itemsData.length });
// 5. 启动异步任务
// TODO: 使用jobQueue.add()
// 暂时直接调用
logger.info('[API] Starting batch extraction (async)', { taskId: task.id });
// 5. 智能选择处理模式(✅ Platform-Only架构
const QUEUE_THRESHOLD = 50; // 50条以下直接处理50条以上使用队列
const useQueue = itemsData.length >= QUEUE_THRESHOLD;
dualModelExtractionService.batchExtract(task.id)
.then(() => {
logger.info('[API] Batch extraction completed successfully', { taskId: task.id });
})
.catch(err => {
logger.error('[API] Batch extraction failed', {
error: err.message,
stack: err.stack,
taskId: task.id
if (useQueue) {
// ============================================
// 模式A队列模式≥50条
// ============================================
logger.info('[API] Using queue mode with task splitting', {
totalItems: itemsData.length,
threshold: QUEUE_THRESHOLD
});
// 获取所有创建的 items需要获取ID
const items = await prisma.dCExtractionItem.findMany({
where: { taskId: task.id },
orderBy: { rowIndex: 'asc' }
});
// 推荐批次大小
const chunkSize = recommendChunkSize('extraction', items.length);
const chunks = splitIntoChunks(items, chunkSize);
logger.info('[API] Task splitting completed', {
totalItems: items.length,
chunkSize,
totalBatches: chunks.length
});
// 更新任务状态
await prisma.dCExtractionTask.update({
where: { id: task.id },
data: {
status: 'processing',
startedAt: new Date()
}
});
// 推送批次任务到队列
const jobPromises = chunks.map(async (chunk, batchIndex) => {
const itemIds = chunk.map(item => item.id);
return await jobQueue.push('dc:extraction:batch', {
// 业务信息
taskId: task.id,
itemIds,
diseaseType,
reportType,
// ✅ 任务拆分信息(存储在 job.data 中)
batchIndex,
totalBatches: chunks.length,
startIndex: batchIndex * chunkSize,
endIndex: Math.min((batchIndex + 1) * chunkSize, items.length),
// ✅ 进度追踪(初始化)
processedCount: 0,
cleanCount: 0,
conflictCount: 0,
failedCount: 0,
});
});
await Promise.all(jobPromises);
logger.info('[API] All batch jobs pushed to queue', {
taskId: task.id,
totalBatches: chunks.length,
queueType: 'pg-boss'
});
console.log('\n🚀 数据提取任务已启动 (队列模式):');
console.log(` 任务ID: ${task.id}`);
console.log(` 总记录数: ${items.length}`);
console.log(` 批次大小: ${chunkSize} 条/批`);
console.log(` 总批次数: ${chunks.length}`);
console.log(` 队列类型: pg-boss (持久化 + 断点续传)`);
} else {
// ============================================
// 模式B直接模式<50条
// ============================================
logger.info('[API] Using direct mode (small task)', {
totalItems: itemsData.length,
threshold: QUEUE_THRESHOLD
});
// 直接处理(不使用队列,快速响应)
dualModelExtractionService.batchExtract(task.id)
.then(() => {
logger.info('[API] Batch extraction completed successfully', { taskId: task.id });
})
.catch(err => {
logger.error('[API] Batch extraction failed', {
error: err.message,
stack: err.stack,
taskId: task.id
});
});
console.log('\n🚀 数据提取任务已启动 (直接模式):');
console.log(` 任务ID: ${task.id}`);
console.log(` 总记录数: ${itemsData.length}`);
console.log(` 处理模式: 直接处理(快速模式)`);
}
logger.info('[API] Task created', { taskId: task.id, itemCount: data.length });