feat(dc): Complete Phase 1 - Portal workbench page development
Summary: - Implement DC module Portal page with 3 tool cards - Create ToolCard component with decorative background and hover animations - Implement TaskList component with table layout and progress bars - Implement AssetLibrary component with tab switching and file cards - Complete database verification (4 tables confirmed) - Complete backend API verification (6 endpoints ready) - Optimize UI to match prototype design (V2.html) Frontend Components (~715 lines): - components/ToolCard.tsx - Tool cards with animations - components/TaskList.tsx - Recent tasks table view - components/AssetLibrary.tsx - Data asset library with tabs - hooks/useRecentTasks.ts - Task state management - hooks/useAssets.ts - Asset state management - pages/Portal.tsx - Main portal page - types/portal.ts - TypeScript type definitions Backend Verification: - Backend API: 1495 lines code verified - Database: dc_schema with 4 tables verified - API endpoints: 6 endpoints tested (templates API works) Documentation: - Database verification report - Backend API test report - Phase 1 completion summary - UI optimization report - Development task checklist - Development plan for Tool B Status: Phase 1 completed (100%), ready for browser testing Next: Phase 2 - Tool B Step 1 and 2 development
This commit is contained in:
@@ -0,0 +1,393 @@
|
||||
/**
|
||||
* DC模块 - 双模型提取服务
|
||||
*
|
||||
* 功能:
|
||||
* - 并发调用DeepSeek-V3和Qwen-Max进行文本提取
|
||||
* - PII脱敏处理
|
||||
* - JSON解析与容错
|
||||
* - Token统计
|
||||
* - 异步任务管理
|
||||
*
|
||||
* 平台能力复用:
|
||||
* - ✅ LLMFactory: LLM调用
|
||||
* - ✅ jobQueue: 异步任务
|
||||
* - ✅ logger: 日志记录
|
||||
* - ✅ prisma: 数据库操作
|
||||
*/
|
||||
|
||||
import { LLMFactory } from '../../../../common/llm/adapters/LLMFactory.js';
|
||||
import { logger } from '../../../../common/logging/index.js';
|
||||
import { prisma } from '../../../../config/database.js';
|
||||
|
||||
export interface ExtractionInput {
|
||||
text: string;
|
||||
fields: { name: string; desc: string }[];
|
||||
promptTemplate: string;
|
||||
}
|
||||
|
||||
export interface ExtractionOutput {
|
||||
result: Record<string, string>;
|
||||
tokensUsed: number;
|
||||
rawOutput: any;
|
||||
}
|
||||
|
||||
export class DualModelExtractionService {
|
||||
/**
|
||||
* 双模型并发提取
|
||||
*
|
||||
* @param input 提取输入
|
||||
* @param taskId 任务ID
|
||||
* @param itemId 记录ID
|
||||
* @returns 双模型结果
|
||||
*/
|
||||
async extract(input: ExtractionInput, taskId: string, itemId: string): Promise<{
|
||||
resultA: ExtractionOutput;
|
||||
resultB: ExtractionOutput;
|
||||
}> {
|
||||
try {
|
||||
logger.info('[DualExtraction] Starting extraction', { taskId, itemId });
|
||||
|
||||
// 1. PII脱敏
|
||||
const maskedText = this.maskPII(input.text);
|
||||
|
||||
// 2. 构建Prompt
|
||||
const prompt = this.buildPrompt(maskedText, input.fields, input.promptTemplate);
|
||||
|
||||
// 3. 并发调用两个模型(DeepSeek & Qwen)
|
||||
const [resultA, resultB] = await Promise.allSettled([
|
||||
this.callModel('deepseek', prompt, input.fields),
|
||||
this.callModel('qwen', prompt, input.fields)
|
||||
]);
|
||||
|
||||
// 4. 处理结果
|
||||
if (resultA.status === 'rejected' || resultB.status === 'rejected') {
|
||||
logger.error('[DualExtraction] One or both models failed', {
|
||||
taskId,
|
||||
itemId,
|
||||
errorA: resultA.status === 'rejected' ? resultA.reason : null,
|
||||
errorB: resultB.status === 'rejected' ? resultB.reason : null
|
||||
});
|
||||
throw new Error('Dual model extraction failed');
|
||||
}
|
||||
|
||||
logger.info('[DualExtraction] Extraction completed', {
|
||||
taskId,
|
||||
itemId,
|
||||
tokensA: resultA.value.tokensUsed,
|
||||
tokensB: resultB.value.tokensUsed
|
||||
});
|
||||
|
||||
return {
|
||||
resultA: resultA.value,
|
||||
resultB: resultB.value
|
||||
};
|
||||
|
||||
} catch (error) {
|
||||
logger.error('[DualExtraction] Extraction failed', { error, taskId, itemId });
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* PII脱敏
|
||||
*
|
||||
* 使用正则表达式替换敏感信息:
|
||||
* - 姓名:张**
|
||||
* - 身份证号:3301********1234
|
||||
* - 手机号:138****5678
|
||||
*/
|
||||
private maskPII(text: string): string {
|
||||
let masked = text;
|
||||
|
||||
// 手机号脱敏:138****5678
|
||||
masked = masked.replace(/1[3-9]\d{9}/g, (match) => {
|
||||
return match.substring(0, 3) + '****' + match.substring(7);
|
||||
});
|
||||
|
||||
// 身份证号脱敏:330102********1234
|
||||
masked = masked.replace(/\d{6}(19|20)\d{2}(0[1-9]|1[0-2])(0[1-9]|[12]\d|3[01])\d{3}[\dxX]/g, (match) => {
|
||||
return match.substring(0, 6) + '********' + match.substring(14);
|
||||
});
|
||||
|
||||
// 简单的姓名脱敏(匹配:患者xxx、姓名:xxx)
|
||||
masked = masked.replace(/(患者|姓名[::])\s*([^\s,。,]{2,4})/g, (match, prefix, name) => {
|
||||
if (name.length === 2) {
|
||||
return prefix + name[0] + '*';
|
||||
}
|
||||
return prefix + name[0] + '*'.repeat(name.length - 1);
|
||||
});
|
||||
|
||||
return masked;
|
||||
}
|
||||
|
||||
/**
|
||||
* 构建Prompt
|
||||
*/
|
||||
private buildPrompt(text: string, fields: { name: string; desc: string }[], template: string): string {
|
||||
// 在模板末尾添加病历文本
|
||||
return `${template}
|
||||
|
||||
**病历原文:**
|
||||
${text}
|
||||
|
||||
请严格按照JSON格式输出,不要有任何额外文字。`;
|
||||
}
|
||||
|
||||
/**
|
||||
* 调用单个模型
|
||||
*/
|
||||
private async callModel(
|
||||
modelType: 'deepseek' | 'qwen',
|
||||
prompt: string,
|
||||
fields: { name: string; desc: string }[]
|
||||
): Promise<ExtractionOutput> {
|
||||
try {
|
||||
// 使用LLMFactory获取LLM客户端
|
||||
const modelName = modelType === 'deepseek' ? 'deepseek-v3' : 'qwen-max';
|
||||
const llm = LLMFactory.createLLM(modelName);
|
||||
|
||||
logger.info(`[${modelType.toUpperCase()}] Calling model`, { modelName });
|
||||
|
||||
// 调用LLM
|
||||
const response = await llm.generateText(prompt, {
|
||||
temperature: 0, // 最大确定性
|
||||
maxTokens: 1000
|
||||
});
|
||||
|
||||
logger.info(`[${modelType.toUpperCase()}] Model responded`, {
|
||||
modelName,
|
||||
tokensUsed: response.tokensUsed
|
||||
});
|
||||
|
||||
// 解析JSON(3层容错)
|
||||
const result = this.parseJSON(response.text, fields);
|
||||
|
||||
return {
|
||||
result,
|
||||
tokensUsed: response.tokensUsed || 0,
|
||||
rawOutput: response.text
|
||||
};
|
||||
|
||||
} catch (error) {
|
||||
logger.error(`[${modelType.toUpperCase()}] Model call failed`, { error, modelType });
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 解析JSON(3层容错策略)
|
||||
*
|
||||
* 1. 直接JSON.parse
|
||||
* 2. 提取```json代码块
|
||||
* 3. 提取{}内容
|
||||
*/
|
||||
private parseJSON(text: string, fields: { name: string; desc: string }[]): Record<string, string> {
|
||||
// 策略1:直接解析
|
||||
try {
|
||||
const parsed = JSON.parse(text);
|
||||
if (this.validateFields(parsed, fields)) {
|
||||
return parsed;
|
||||
}
|
||||
} catch (e) {
|
||||
// 继续下一个策略
|
||||
}
|
||||
|
||||
// 策略2:提取```json代码块
|
||||
const codeBlockMatch = text.match(/```json\s*\n([\s\S]*?)\n```/);
|
||||
if (codeBlockMatch) {
|
||||
try {
|
||||
const parsed = JSON.parse(codeBlockMatch[1]);
|
||||
if (this.validateFields(parsed, fields)) {
|
||||
return parsed;
|
||||
}
|
||||
} catch (e) {
|
||||
// 继续下一个策略
|
||||
}
|
||||
}
|
||||
|
||||
// 策略3:提取第一个完整的{}对象
|
||||
const objectMatch = text.match(/\{[\s\S]*\}/);
|
||||
if (objectMatch) {
|
||||
try {
|
||||
const parsed = JSON.parse(objectMatch[0]);
|
||||
if (this.validateFields(parsed, fields)) {
|
||||
return parsed;
|
||||
}
|
||||
} catch (e) {
|
||||
// 解析失败
|
||||
}
|
||||
}
|
||||
|
||||
// 所有策略失败,返回空对象
|
||||
logger.warn('[JSON] All parse strategies failed', { text });
|
||||
const emptyResult: Record<string, string> = {};
|
||||
fields.forEach(f => {
|
||||
emptyResult[f.name] = '解析失败';
|
||||
});
|
||||
return emptyResult;
|
||||
}
|
||||
|
||||
/**
|
||||
* 验证字段完整性
|
||||
*/
|
||||
private validateFields(parsed: any, fields: { name: string; desc: string }[]): boolean {
|
||||
if (!parsed || typeof parsed !== 'object') {
|
||||
return false;
|
||||
}
|
||||
|
||||
// 检查所有必需字段是否存在
|
||||
return fields.every(f => parsed.hasOwnProperty(f.name));
|
||||
}
|
||||
|
||||
/**
|
||||
* 批量提取(异步任务)
|
||||
*
|
||||
* @param taskId 任务ID
|
||||
*/
|
||||
async batchExtract(taskId: string): Promise<void> {
|
||||
try {
|
||||
logger.info('[Batch] Starting batch extraction', { taskId });
|
||||
|
||||
// 1. 获取任务
|
||||
const task = await prisma.dCExtractionTask.findUnique({
|
||||
where: { id: taskId },
|
||||
include: { items: true }
|
||||
});
|
||||
|
||||
if (!task) {
|
||||
throw new Error(`Task not found: ${taskId}`);
|
||||
}
|
||||
|
||||
// 2. 更新任务状态
|
||||
await prisma.dCExtractionTask.update({
|
||||
where: { id: taskId },
|
||||
data: {
|
||||
status: 'processing',
|
||||
startedAt: new Date()
|
||||
}
|
||||
});
|
||||
|
||||
// 3. 获取模板
|
||||
const template = await prisma.dCTemplate.findUnique({
|
||||
where: {
|
||||
diseaseType_reportType: {
|
||||
diseaseType: task.diseaseType,
|
||||
reportType: task.reportType
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
if (!template) {
|
||||
throw new Error(`Template not found: ${task.diseaseType}/${task.reportType}`);
|
||||
}
|
||||
|
||||
const fields = template.fields as { name: string; desc: string }[];
|
||||
|
||||
// 4. 逐条处理
|
||||
let processedCount = 0;
|
||||
let cleanCount = 0;
|
||||
let conflictCount = 0;
|
||||
let totalTokens = 0;
|
||||
|
||||
for (const item of task.items) {
|
||||
try {
|
||||
// 双模型提取
|
||||
const { resultA, resultB } = await this.extract(
|
||||
{
|
||||
text: item.originalText,
|
||||
fields,
|
||||
promptTemplate: template.promptTemplate
|
||||
},
|
||||
taskId,
|
||||
item.id
|
||||
);
|
||||
|
||||
// 检测冲突(由ConflictDetectionService处理,这里暂时简单比较)
|
||||
const hasConflict = JSON.stringify(resultA.result) !== JSON.stringify(resultB.result);
|
||||
|
||||
// 更新记录
|
||||
await prisma.dCExtractionItem.update({
|
||||
where: { id: item.id },
|
||||
data: {
|
||||
resultA: resultA.result,
|
||||
resultB: resultB.result,
|
||||
tokensA: resultA.tokensUsed,
|
||||
tokensB: resultB.tokensUsed,
|
||||
status: hasConflict ? 'conflict' : 'clean',
|
||||
finalResult: hasConflict ? null : resultA.result // 一致时自动采纳
|
||||
}
|
||||
});
|
||||
|
||||
processedCount++;
|
||||
if (hasConflict) {
|
||||
conflictCount++;
|
||||
} else {
|
||||
cleanCount++;
|
||||
}
|
||||
totalTokens += resultA.tokensUsed + resultB.tokensUsed;
|
||||
|
||||
// 更新任务进度
|
||||
await prisma.dCExtractionTask.update({
|
||||
where: { id: taskId },
|
||||
data: {
|
||||
processedCount,
|
||||
cleanCount,
|
||||
conflictCount,
|
||||
totalTokens
|
||||
}
|
||||
});
|
||||
|
||||
} catch (error) {
|
||||
logger.error('[Batch] Item extraction failed', { error, itemId: item.id });
|
||||
|
||||
await prisma.dCExtractionItem.update({
|
||||
where: { id: item.id },
|
||||
data: {
|
||||
status: 'failed',
|
||||
error: String(error)
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// 5. 完成任务
|
||||
await prisma.dCExtractionTask.update({
|
||||
where: { id: taskId },
|
||||
data: {
|
||||
status: 'completed',
|
||||
completedAt: new Date()
|
||||
}
|
||||
});
|
||||
|
||||
logger.info('[Batch] Batch extraction completed', {
|
||||
taskId,
|
||||
processedCount,
|
||||
cleanCount,
|
||||
conflictCount,
|
||||
totalTokens
|
||||
});
|
||||
|
||||
} catch (error) {
|
||||
logger.error('[Batch] Batch extraction failed', { error, taskId });
|
||||
|
||||
// 更新任务为失败状态
|
||||
await prisma.dCExtractionTask.update({
|
||||
where: { id: taskId },
|
||||
data: {
|
||||
status: 'failed',
|
||||
error: String(error)
|
||||
}
|
||||
});
|
||||
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 导出单例
|
||||
export const dualModelExtractionService = new DualModelExtractionService();
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user