Files
AIclinicalresearch/backend/src/modules/asl/fulltext-screening/services/FulltextScreeningService.ts
HaHafeng ef967d7d7c build(backend): Complete Node.js backend deployment preparation
Major changes:
- Add Docker configuration (Dockerfile, .dockerignore)
- Fix 200+ TypeScript compilation errors
- Add Prisma schema relations for all models (30+ relations)
- Update tsconfig.json to relax non-critical checks
- Optimize Docker build with local dist strategy

Technical details:
- Exclude test files from TypeScript compilation
- Add manual relations for ASL, PKB, DC, AIA modules
- Use type assertions for JSON/Buffer compatibility
- Fix pg-boss, extractionWorker, and other legacy code issues

Build result:
- Docker image: 838MB (compressed ~186MB)
- Successfully pushed to ACR
- Zero TypeScript compilation errors

Related docs:
- Update deployment documentation
- Add Python microservice SAE deployment guide
2025-12-24 22:12:00 +08:00

716 lines
21 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* 全文复筛服务
*
* 功能:
* - 批量处理文献全文筛选
* - 集成LLM服务、验证器、冲突检测
* - 并发控制与进度跟踪
* - 容错与重试机制
*
* @module FulltextScreeningService
*/
import { PrismaClient } from '@prisma/client';
import PQueue from 'p-queue';
import { LLM12FieldsService, LLM12FieldsMode } from '../../common/llm/LLM12FieldsService.js';
import { MedicalLogicValidator } from '../../common/validation/MedicalLogicValidator.js';
import { EvidenceChainValidator } from '../../common/validation/EvidenceChainValidator.js';
import { ConflictDetectionService } from '../../common/validation/ConflictDetectionService.js';
import { logger } from '../../../../common/logging/index.js';
const prisma = new PrismaClient();
// =====================================================
// 类型定义
// =====================================================
export interface FulltextScreeningConfig {
modelA: string;
modelB: string;
promptVersion?: string;
concurrency?: number; // 并发数默认3
maxRetries?: number; // 最大重试次数默认2
skipExtraction?: boolean; // 跳过全文提取(用于测试)
}
export interface ScreeningProgress {
taskId: string;
status: 'pending' | 'running' | 'completed' | 'failed';
totalCount: number;
processedCount: number;
successCount: number;
failedCount: number;
degradedCount: number;
totalTokens: number;
totalCost: number;
startedAt: Date | null;
completedAt: Date | null;
estimatedEndAt: Date | null;
currentLiterature?: string;
}
export interface SingleLiteratureResult {
success: boolean;
isDegraded: boolean;
error?: string;
tokens: number;
cost: number;
}
// =====================================================
// 全文复筛服务
// =====================================================
export class FulltextScreeningService {
private llmService: LLM12FieldsService;
private medicalLogicValidator: MedicalLogicValidator;
private evidenceChainValidator: EvidenceChainValidator;
private conflictDetectionService: ConflictDetectionService;
constructor() {
this.llmService = new LLM12FieldsService();
this.medicalLogicValidator = new MedicalLogicValidator();
this.evidenceChainValidator = new EvidenceChainValidator();
this.conflictDetectionService = new ConflictDetectionService();
}
// =====================================================
// 1. 任务处理入口
// =====================================================
/**
* 启动全文复筛任务
*
* @param projectId - 项目ID
* @param literatureIds - 文献ID列表
* @param config - 筛选配置
* @returns 任务ID
*/
async createAndProcessTask(
projectId: string,
literatureIds: string[],
config: FulltextScreeningConfig
): Promise<string> {
logger.info('Creating fulltext screening task', {
projectId,
literatureCount: literatureIds.length,
config,
});
// 1. 获取项目和文献数据
const project = await prisma.aslScreeningProject.findUnique({
where: { id: projectId },
});
if (!project) {
throw new Error(`Project not found: ${projectId}`);
}
const literatures = await prisma.aslLiterature.findMany({
where: {
id: { in: literatureIds },
projectId,
},
});
if (literatures.length === 0) {
throw new Error('No valid literatures found');
}
logger.info(`Found ${literatures.length} literatures to process`);
// 2. 创建任务记录
const task = await prisma.aslFulltextScreeningTask.create({
data: {
id: `task_${Date.now()}_${Math.random().toString(36).slice(2, 9)}`,
projectId,
modelA: config.modelA,
modelB: config.modelB,
promptVersion: config.promptVersion || 'v1.0.0-mvp',
status: 'pending',
totalCount: literatures.length,
processedCount: 0,
successCount: 0,
failedCount: 0,
degradedCount: 0,
totalTokens: 0,
totalCost: 0,
},
});
logger.info(`Task created: ${task.id}`);
// 3. 异步处理任务(不等待完成)
this.processTaskInBackground(task.id, literatures, project, config).catch((error) => {
logger.error('Task processing failed', { taskId: task.id, error });
});
return task.id;
}
/**
* 后台处理任务(核心逻辑)
*
* @param taskId - 任务ID
* @param literatures - 文献列表
* @param project - 项目信息
* @param config - 筛选配置
*/
private async processTaskInBackground(
taskId: string,
literatures: any[],
project: any,
config: FulltextScreeningConfig
): Promise<void> {
const startTime = Date.now();
try {
// 1. 更新任务状态为运行中
await prisma.aslFulltextScreeningTask.update({
where: { id: taskId },
data: {
status: 'running',
startedAt: new Date(),
},
});
logger.info(`Task started: ${taskId}`, {
totalCount: literatures.length,
concurrency: config.concurrency || 3,
});
// 2. 构建PICOS上下文
const picosContext = {
P: project.picoCriteria.P || '',
I: project.picoCriteria.I || '',
C: project.picoCriteria.C || '',
O: project.picoCriteria.O || '',
S: project.picoCriteria.S || '',
inclusionCriteria: project.inclusionCriteria || '',
exclusionCriteria: project.exclusionCriteria || '',
};
// 3. 并发处理文献
const concurrency = config.concurrency || 3;
const queue = new PQueue({ concurrency });
let processedCount = 0;
let successCount = 0;
let failedCount = 0;
let degradedCount = 0;
let totalTokens = 0;
let totalCost = 0;
const tasks = literatures.map((literature, index) =>
queue.add(async () => {
const litStartTime = Date.now();
logger.info(`[${index + 1}/${literatures.length}] Processing: ${literature.title}`);
try {
// 处理单篇文献
const result = await this.screenLiteratureWithRetry(
taskId,
project.id,
literature,
picosContext,
config
);
// 更新统计
processedCount++;
if (result.success) {
successCount++;
if (result.isDegraded) {
degradedCount++;
}
} else {
failedCount++;
}
totalTokens += result.tokens;
totalCost += result.cost;
// 更新进度
await this.updateTaskProgress(taskId, {
processedCount,
successCount,
failedCount,
degradedCount,
totalTokens,
totalCost,
startTime,
});
const litDuration = Date.now() - litStartTime;
logger.info(
`[${index + 1}/${literatures.length}] ✅ Success: ${literature.title} (${litDuration}ms, ${result.tokens} tokens, $${result.cost.toFixed(4)})`
);
} catch (error: any) {
processedCount++;
failedCount++;
logger.error(`[${index + 1}/${literatures.length}] ❌ Failed: ${literature.title}`, {
error: error.message,
});
// 更新进度(失败)
await this.updateTaskProgress(taskId, {
processedCount,
successCount,
failedCount,
degradedCount,
totalTokens,
totalCost,
startTime,
});
}
})
);
// 等待所有任务完成
await Promise.all(tasks);
// 4. 完成任务
await this.completeTask(taskId, {
status: 'completed',
totalTokens,
totalCost,
successCount,
failedCount,
degradedCount,
});
const duration = Date.now() - startTime;
logger.info(`Task completed: ${taskId}`, {
duration: `${(duration / 1000).toFixed(1)}s`,
totalCount: literatures.length,
successCount,
failedCount,
degradedCount,
totalTokens,
totalCost: `$${totalCost.toFixed(4)}`,
});
} catch (error: any) {
logger.error(`Task failed: ${taskId}`, { error: error.message, stack: error.stack });
await prisma.aslFulltextScreeningTask.update({
where: { id: taskId },
data: {
status: 'failed',
completedAt: new Date(),
errorMessage: error.message,
errorStack: error.stack,
},
});
}
}
// =====================================================
// 2. 单篇文献筛选
// =====================================================
/**
* 处理单篇文献(带重试)
*
* @param taskId - 任务ID
* @param projectId - 项目ID
* @param literature - 文献信息
* @param picosContext - PICOS上下文
* @param config - 筛选配置
* @returns 处理结果
*/
private async screenLiteratureWithRetry(
taskId: string,
projectId: string,
literature: any,
picosContext: any,
config: FulltextScreeningConfig
): Promise<SingleLiteratureResult> {
const maxRetries = config.maxRetries || 2;
for (let attempt = 1; attempt <= maxRetries; attempt++) {
try {
return await this.screenLiterature(taskId, projectId, literature, picosContext, config);
} catch (error: any) {
logger.warn(`Retry ${attempt}/${maxRetries} for literature ${literature.id}`, {
error: error.message,
});
if (attempt === maxRetries) {
// 最后一次重试失败,抛出错误
throw error;
}
// 等待后重试(指数退避)
await new Promise((resolve) => setTimeout(resolve, 1000 * attempt));
}
}
throw new Error('Unreachable code');
}
/**
* 处理单篇文献(核心逻辑)
*
* @param taskId - 任务ID
* @param projectId - 项目ID
* @param literature - 文献信息
* @param picosContext - PICOS上下文
* @param config - 筛选配置
* @returns 处理结果
*/
private async screenLiterature(
taskId: string,
projectId: string,
literature: any,
picosContext: any,
config: FulltextScreeningConfig
): Promise<SingleLiteratureResult> {
// 1. 获取PDF Buffer
let pdfBuffer: Buffer;
let filename: string;
if (config.skipExtraction) {
// 测试模式创建一个简单的文本Buffer模拟PDF
const testContent = `# ${literature.title}\n\n## Abstract\n${literature.abstract}`;
pdfBuffer = Buffer.from(testContent, 'utf-8');
filename = `test_${literature.id}.txt`;
logger.info(`[TEST MODE] Using title+abstract as test PDF`);
} else {
// 生产模式从存储中获取PDF
if (!literature.pdfStorageRef) {
throw new Error(`No PDF available for literature ${literature.id}`);
}
// TODO: 从OSS/Dify加载PDF Buffer
// pdfBuffer = await pdfStorageService.downloadPDF(literature.pdfStorageRef);
// 临时方案:使用测试数据
const testContent = `# ${literature.title}\n\n## Abstract\n${literature.abstract}`;
pdfBuffer = Buffer.from(testContent, 'utf-8');
filename = `${literature.id}.pdf`;
logger.warn(`[TODO] PDF loading not implemented, using test data for ${literature.id}`);
}
// 2. 调用LLM服务双模型
const llmResult = await this.llmService.processDualModels(
LLM12FieldsMode.SCREENING,
config.modelA,
config.modelB,
pdfBuffer,
filename,
picosContext
);
// 检查至少有一个模型成功
if (!llmResult.resultA && !llmResult.resultB) {
throw new Error(`Both models failed in dual-model processing`);
}
// 3. 验证器处理
// 3.1 医学逻辑验证
const medicalLogicIssuesA = llmResult.resultA?.result
? this.medicalLogicValidator.validate(llmResult.resultA.result)
: [];
const medicalLogicIssuesB = llmResult.resultB?.result
? this.medicalLogicValidator.validate(llmResult.resultB.result)
: [];
// 3.2 证据链验证
const evidenceChainIssuesA = llmResult.resultA?.result
? this.evidenceChainValidator.validate(llmResult.resultA.result)
: [];
const evidenceChainIssuesB = llmResult.resultB?.result
? this.evidenceChainValidator.validate(llmResult.resultB.result)
: [];
// 3.3 冲突检测
let conflictResult = null;
if (llmResult.resultA?.result && llmResult.resultB?.result) {
conflictResult = this.conflictDetectionService.detectScreeningConflict(
llmResult.resultA.result,
llmResult.resultB.result
);
}
// 4. 保存结果到数据库
const totalTokens = (llmResult.resultA?.tokenUsage || 0) + (llmResult.resultB?.tokenUsage || 0);
const totalCost = (llmResult.resultA?.cost || 0) + (llmResult.resultB?.cost || 0);
await prisma.aslFulltextScreeningResult.create({
data: {
id: `result_${Date.now()}_${Math.random().toString(36).slice(2, 9)}`,
taskId,
projectId,
literatureId: literature.id,
// Model A 结果
modelAName: config.modelA,
modelAStatus: llmResult.resultA ? 'success' : 'failed',
modelAFields: llmResult.resultA?.result?.fields || null,
modelAOverall: llmResult.resultA?.result?.overall || null,
modelAProcessingLog: llmResult.resultA?.result?.processingLog || null,
modelAVerification: llmResult.resultA?.result?.verification || null,
modelATokens: llmResult.resultA?.tokenUsage || 0,
modelACost: llmResult.resultA?.cost || 0,
modelAError: null,
// Model B 结果
modelBName: config.modelB,
modelBStatus: llmResult.resultB ? 'success' : 'failed',
modelBFields: llmResult.resultB?.result?.fields || null,
modelBOverall: llmResult.resultB?.result?.overall || null,
modelBProcessingLog: llmResult.resultB?.result?.processingLog || null,
modelBVerification: llmResult.resultB?.result?.verification || null,
modelBTokens: llmResult.resultB?.tokenUsage || 0,
modelBCost: llmResult.resultB?.cost || 0,
modelBError: null,
// 验证结果
medicalLogicIssues: {
modelA: medicalLogicIssuesA,
modelB: medicalLogicIssuesB,
} as any,
evidenceChainIssues: {
modelA: evidenceChainIssuesA,
modelB: evidenceChainIssuesB,
} as any,
// 冲突检测
isConflict: conflictResult ? conflictResult.hasConflict : false,
conflictSeverity: conflictResult?.severity || null,
conflictFields: conflictResult?.conflictFields || [],
conflictDetails: (conflictResult || null) as any,
reviewPriority: conflictResult?.reviewPriority || 50,
// 处理状态
processingStatus: 'completed',
isDegraded: llmResult.degradedMode || false,
degradedModel: llmResult.failedModel || null,
processedAt: new Date(),
promptVersion: config.promptVersion || 'v1.0.0-mvp',
// 原始输出(用于审计)
rawOutputA: (llmResult.resultA || null) as any,
rawOutputB: (llmResult.resultB || null) as any,
},
});
// 5. 返回结果
return {
success: true,
isDegraded: llmResult.degradedMode || false,
tokens: totalTokens,
cost: totalCost,
};
}
// =====================================================
// 3. 进度更新
// =====================================================
/**
* 更新任务进度
*
* @param taskId - 任务ID
* @param progress - 进度信息
*/
private async updateTaskProgress(
taskId: string,
progress: {
processedCount: number;
successCount: number;
failedCount: number;
degradedCount: number;
totalTokens: number;
totalCost: number;
startTime: number;
}
): Promise<void> {
// 计算预估结束时间
const elapsed = Date.now() - progress.startTime;
const avgTimePerItem = elapsed / progress.processedCount;
const task = await prisma.aslFulltextScreeningTask.findUnique({
where: { id: taskId },
});
if (!task) {
logger.warn(`Task not found: ${taskId}`);
return;
}
const remainingItems = task.totalCount - progress.processedCount;
const estimatedRemainingTime = avgTimePerItem * remainingItems;
const estimatedEndAt = new Date(Date.now() + estimatedRemainingTime);
// 更新数据库
await prisma.aslFulltextScreeningTask.update({
where: { id: taskId },
data: {
processedCount: progress.processedCount,
successCount: progress.successCount,
failedCount: progress.failedCount,
degradedCount: progress.degradedCount,
totalTokens: progress.totalTokens,
totalCost: progress.totalCost,
estimatedEndAt,
},
});
}
// =====================================================
// 4. 任务完成
// =====================================================
/**
* 标记任务完成
*
* @param taskId - 任务ID
* @param summary - 任务摘要
*/
private async completeTask(
taskId: string,
summary: {
status: 'completed' | 'failed';
totalTokens: number;
totalCost: number;
successCount: number;
failedCount: number;
degradedCount: number;
}
): Promise<void> {
await prisma.aslFulltextScreeningTask.update({
where: { id: taskId },
data: {
status: summary.status,
completedAt: new Date(),
totalTokens: summary.totalTokens,
totalCost: summary.totalCost,
successCount: summary.successCount,
failedCount: summary.failedCount,
degradedCount: summary.degradedCount,
},
});
logger.info(`Task marked as ${summary.status}: ${taskId}`);
}
// =====================================================
// 5. 查询接口
// =====================================================
/**
* 获取任务进度
*
* @param taskId - 任务ID
* @returns 进度信息
*/
async getTaskProgress(taskId: string): Promise<ScreeningProgress | null> {
const task = await prisma.aslFulltextScreeningTask.findUnique({
where: { id: taskId },
});
if (!task) {
return null;
}
return {
taskId: task.id,
status: task.status as any,
totalCount: task.totalCount,
processedCount: task.processedCount,
successCount: task.successCount,
failedCount: task.failedCount,
degradedCount: task.degradedCount,
totalTokens: task.totalTokens || 0,
totalCost: task.totalCost || 0,
startedAt: task.startedAt,
completedAt: task.completedAt,
estimatedEndAt: task.estimatedEndAt,
};
}
/**
* 获取任务结果列表
*
* @param taskId - 任务ID
* @param filter - 过滤条件
* @returns 结果列表
*/
async getTaskResults(
taskId: string,
filter?: {
conflictOnly?: boolean;
page?: number;
pageSize?: number;
}
): Promise<{ results: any[]; total: number }> {
const page = filter?.page || 1;
const pageSize = filter?.pageSize || 50;
const skip = (page - 1) * pageSize;
const where: any = { taskId };
if (filter?.conflictOnly) {
where.isConflict = true;
}
const [results, total] = await Promise.all([
prisma.aslFulltextScreeningResult.findMany({
where,
include: {
literature: {
select: {
id: true,
title: true,
authors: true,
journal: true,
publicationYear: true,
},
},
},
orderBy: [
{ isConflict: 'desc' },
{ reviewPriority: 'desc' },
{ processedAt: 'desc' },
],
skip,
take: pageSize,
}),
prisma.aslFulltextScreeningResult.count({ where }),
]);
return { results, total };
}
/**
* 更新人工复核决策
*
* @param resultId - 结果ID
* @param decision - 决策信息
*/
async updateReviewDecision(
resultId: string,
decision: {
finalDecision: 'include' | 'exclude';
finalDecisionBy: string;
exclusionReason?: string;
reviewNotes?: string;
}
): Promise<void> {
await prisma.aslFulltextScreeningResult.update({
where: { id: resultId },
data: {
finalDecision: decision.finalDecision,
finalDecisionBy: decision.finalDecisionBy,
finalDecisionAt: new Date(),
exclusionReason: decision.exclusionReason || null,
reviewNotes: decision.reviewNotes || null,
},
});
logger.info(`Review decision updated: ${resultId}`, { decision: decision.finalDecision });
}
}
// 导出单例
export const fulltextScreeningService = new FulltextScreeningService();