feat(platform): Complete Postgres-Only architecture refactoring (Phase 1-7)

Major Changes: - Implement Platform-Only architecture pattern (unified task management) - Add PostgresCacheAdapter for unified caching (platform_schema.app_cache) - Add PgBossQueue for job queue management (platform_schema.job) - Implement CheckpointService using job.data (generic for all modules) - Add intelligent threshold-based dual-mode processing (THRESHOLD=50) - Add task splitting mechanism (auto chunk size recommendation) - Refactor ASL screening service with smart mode selection - Refactor DC extraction service with smart mode selection - Register workers for ASL and DC modules Technical Highlights: - All task management data stored in platform_schema.job.data (JSONB) - Business tables remain clean (no task management fields) - CheckpointService is generic (shared by all modules) - Zero code duplication (DRY principle) - Follows 3-layer architecture principle - Zero additional cost (no Redis needed, save 8400 CNY/year) Code Statistics: - New code: ~1750 lines - Modified code: ~500 lines - Test code: ~1800 lines - Documentation: ~3000 lines Testing: - Unit tests: 8/8 passed - Integration tests: 2/2 passed - Architecture validation: passed - Linter errors: 0 Files: - Platform layer: PostgresCacheAdapter, PgBossQueue, CheckpointService, utils - ASL module: screeningService, screeningWorker - DC module: ExtractionController, extractionWorker - Tests: 11 test files - Docs: Updated 4 key documents Status: Phase 1-7 completed, Phase 8-9 pending
2025-12-13 16:10:04 +08:00
parent a3586cdf30
commit fa72beea6c
135 changed files with 17508 additions and 91 deletions
--- a/backend/src/modules/dc/tool-b/controllers/ExtractionController.ts
+++ b/backend/src/modules/dc/tool-b/controllers/ExtractionController.ts
@@ -24,6 +24,8 @@ import { conflictDetectionService } from '../services/ConflictDetectionService.j
 import { storage } from '../../../../common/storage/index.js';
 import { logger } from '../../../../common/logging/index.js';
 import { prisma } from '../../../../config/database.js';
+import { jobQueue } from '../../../../common/jobs/index.js';
+import { splitIntoChunks, recommendChunkSize } from '../../../../common/jobs/utils.js';
 import * as xlsx from 'xlsx';

 export class ExtractionController {
@@ -277,22 +279,111 @@ export class ExtractionController {
      });
      logger.info('[API] Items created', { count: itemsData.length });
      
-      // 5. 启动异步任务
-      // TODO: 使用jobQueue.add()
-      // 暂时直接调用
-      logger.info('[API] Starting batch extraction (async)', { taskId: task.id });
+      // 5. 智能选择处理模式（✅ Platform-Only架构）
+      const QUEUE_THRESHOLD = 50; // 50条以下直接处理，50条以上使用队列
+      const useQueue = itemsData.length >= QUEUE_THRESHOLD;
      
-      dualModelExtractionService.batchExtract(task.id)
-        .then(() => {
-          logger.info('[API] Batch extraction completed successfully', { taskId: task.id });
-        })
-        .catch(err => {
-          logger.error('[API] Batch extraction failed', { 
-            error: err.message,
-            stack: err.stack,
-            taskId: task.id 
+      if (useQueue) {
+        // ============================================
+        // 模式A：队列模式（≥50条）
+        // ============================================
+        logger.info('[API] Using queue mode with task splitting', {
+          totalItems: itemsData.length,
+          threshold: QUEUE_THRESHOLD
+        });
+        
+        // 获取所有创建的 items（需要获取ID）
+        const items = await prisma.dCExtractionItem.findMany({
+          where: { taskId: task.id },
+          orderBy: { rowIndex: 'asc' }
+        });
+        
+        // 推荐批次大小
+        const chunkSize = recommendChunkSize('extraction', items.length);
+        const chunks = splitIntoChunks(items, chunkSize);
+        
+        logger.info('[API] Task splitting completed', {
+          totalItems: items.length,
+          chunkSize,
+          totalBatches: chunks.length
+        });
+        
+        // 更新任务状态
+        await prisma.dCExtractionTask.update({
+          where: { id: task.id },
+          data: {
+            status: 'processing',
+            startedAt: new Date()
+          }
+        });
+        
+        // 推送批次任务到队列
+        const jobPromises = chunks.map(async (chunk, batchIndex) => {
+          const itemIds = chunk.map(item => item.id);
+          
+          return await jobQueue.push('dc:extraction:batch', {
+            // 业务信息
+            taskId: task.id,
+            itemIds,
+            diseaseType,
+            reportType,
+            
+            // ✅ 任务拆分信息（存储在 job.data 中）
+            batchIndex,
+            totalBatches: chunks.length,
+            startIndex: batchIndex * chunkSize,
+            endIndex: Math.min((batchIndex + 1) * chunkSize, items.length),
+            
+            // ✅ 进度追踪（初始化）
+            processedCount: 0,
+            cleanCount: 0,
+            conflictCount: 0,
+            failedCount: 0,
          });
        });
+        
+        await Promise.all(jobPromises);
+        
+        logger.info('[API] All batch jobs pushed to queue', {
+          taskId: task.id,
+          totalBatches: chunks.length,
+          queueType: 'pg-boss'
+        });
+        
+        console.log('\n🚀 数据提取任务已启动 (队列模式):');
+        console.log(`  任务ID: ${task.id}`);
+        console.log(`  总记录数: ${items.length}`);
+        console.log(`  批次大小: ${chunkSize} 条/批`);
+        console.log(`  总批次数: ${chunks.length}`);
+        console.log(`  队列类型: pg-boss (持久化 + 断点续传)`);
+        
+      } else {
+        // ============================================
+        // 模式B：直接模式（<50条）
+        // ============================================
+        logger.info('[API] Using direct mode (small task)', {
+          totalItems: itemsData.length,
+          threshold: QUEUE_THRESHOLD
+        });
+        
+        // 直接处理（不使用队列，快速响应）
+        dualModelExtractionService.batchExtract(task.id)
+          .then(() => {
+            logger.info('[API] Batch extraction completed successfully', { taskId: task.id });
+          })
+          .catch(err => {
+            logger.error('[API] Batch extraction failed', { 
+              error: err.message,
+              stack: err.stack,
+              taskId: task.id 
+            });
+          });
+        
+        console.log('\n🚀 数据提取任务已启动 (直接模式):');
+        console.log(`  任务ID: ${task.id}`);
+        console.log(`  总记录数: ${itemsData.length}`);
+        console.log(`  处理模式: 直接处理（快速模式）`);
+      }
      
      logger.info('[API] Task created', { taskId: task.id, itemCount: data.length });