feat(dc): Complete Phase 1 - Portal workbench page development

Summary:
- Implement DC module Portal page with 3 tool cards
- Create ToolCard component with decorative background and hover animations
- Implement TaskList component with table layout and progress bars
- Implement AssetLibrary component with tab switching and file cards
- Complete database verification (4 tables confirmed)
- Complete backend API verification (6 endpoints ready)
- Optimize UI to match prototype design (V2.html)

Frontend Components (~715 lines):
- components/ToolCard.tsx - Tool cards with animations
- components/TaskList.tsx - Recent tasks table view
- components/AssetLibrary.tsx - Data asset library with tabs
- hooks/useRecentTasks.ts - Task state management
- hooks/useAssets.ts - Asset state management
- pages/Portal.tsx - Main portal page
- types/portal.ts - TypeScript type definitions

Backend Verification:
- Backend API: 1495 lines code verified
- Database: dc_schema with 4 tables verified
- API endpoints: 6 endpoints tested (templates API works)

Documentation:
- Database verification report
- Backend API test report
- Phase 1 completion summary
- UI optimization report
- Development task checklist
- Development plan for Tool B

Status: Phase 1 completed (100%), ready for browser testing
Next: Phase 2 - Tool B Step 1 and 2 development
This commit is contained in:
2025-12-02 21:53:24 +08:00
parent f240aa9236
commit d4d33528c7
83 changed files with 21863 additions and 1601 deletions

View File

@@ -0,0 +1,393 @@
/**
* DC模块 - 双模型提取服务
*
* 功能:
* - 并发调用DeepSeek-V3和Qwen-Max进行文本提取
* - PII脱敏处理
* - JSON解析与容错
* - Token统计
* - 异步任务管理
*
* 平台能力复用:
* - ✅ LLMFactory: LLM调用
* - ✅ jobQueue: 异步任务
* - ✅ logger: 日志记录
* - ✅ prisma: 数据库操作
*/
import { LLMFactory } from '../../../../common/llm/adapters/LLMFactory.js';
import { logger } from '../../../../common/logging/index.js';
import { prisma } from '../../../../config/database.js';
export interface ExtractionInput {
text: string;
fields: { name: string; desc: string }[];
promptTemplate: string;
}
export interface ExtractionOutput {
result: Record<string, string>;
tokensUsed: number;
rawOutput: any;
}
export class DualModelExtractionService {
/**
* 双模型并发提取
*
* @param input 提取输入
* @param taskId 任务ID
* @param itemId 记录ID
* @returns 双模型结果
*/
async extract(input: ExtractionInput, taskId: string, itemId: string): Promise<{
resultA: ExtractionOutput;
resultB: ExtractionOutput;
}> {
try {
logger.info('[DualExtraction] Starting extraction', { taskId, itemId });
// 1. PII脱敏
const maskedText = this.maskPII(input.text);
// 2. 构建Prompt
const prompt = this.buildPrompt(maskedText, input.fields, input.promptTemplate);
// 3. 并发调用两个模型DeepSeek & Qwen
const [resultA, resultB] = await Promise.allSettled([
this.callModel('deepseek', prompt, input.fields),
this.callModel('qwen', prompt, input.fields)
]);
// 4. 处理结果
if (resultA.status === 'rejected' || resultB.status === 'rejected') {
logger.error('[DualExtraction] One or both models failed', {
taskId,
itemId,
errorA: resultA.status === 'rejected' ? resultA.reason : null,
errorB: resultB.status === 'rejected' ? resultB.reason : null
});
throw new Error('Dual model extraction failed');
}
logger.info('[DualExtraction] Extraction completed', {
taskId,
itemId,
tokensA: resultA.value.tokensUsed,
tokensB: resultB.value.tokensUsed
});
return {
resultA: resultA.value,
resultB: resultB.value
};
} catch (error) {
logger.error('[DualExtraction] Extraction failed', { error, taskId, itemId });
throw error;
}
}
/**
* PII脱敏
*
* 使用正则表达式替换敏感信息:
* - 姓名:张**
* - 身份证号3301********1234
* - 手机号138****5678
*/
private maskPII(text: string): string {
let masked = text;
// 手机号脱敏138****5678
masked = masked.replace(/1[3-9]\d{9}/g, (match) => {
return match.substring(0, 3) + '****' + match.substring(7);
});
// 身份证号脱敏330102********1234
masked = masked.replace(/\d{6}(19|20)\d{2}(0[1-9]|1[0-2])(0[1-9]|[12]\d|3[01])\d{3}[\dxX]/g, (match) => {
return match.substring(0, 6) + '********' + match.substring(14);
});
// 简单的姓名脱敏匹配患者xxx、姓名xxx
masked = masked.replace(/(患者|姓名[:])\s*([^\s。,]{2,4})/g, (match, prefix, name) => {
if (name.length === 2) {
return prefix + name[0] + '*';
}
return prefix + name[0] + '*'.repeat(name.length - 1);
});
return masked;
}
/**
* 构建Prompt
*/
private buildPrompt(text: string, fields: { name: string; desc: string }[], template: string): string {
// 在模板末尾添加病历文本
return `${template}
**病历原文:**
${text}
请严格按照JSON格式输出不要有任何额外文字。`;
}
/**
* 调用单个模型
*/
private async callModel(
modelType: 'deepseek' | 'qwen',
prompt: string,
fields: { name: string; desc: string }[]
): Promise<ExtractionOutput> {
try {
// 使用LLMFactory获取LLM客户端
const modelName = modelType === 'deepseek' ? 'deepseek-v3' : 'qwen-max';
const llm = LLMFactory.createLLM(modelName);
logger.info(`[${modelType.toUpperCase()}] Calling model`, { modelName });
// 调用LLM
const response = await llm.generateText(prompt, {
temperature: 0, // 最大确定性
maxTokens: 1000
});
logger.info(`[${modelType.toUpperCase()}] Model responded`, {
modelName,
tokensUsed: response.tokensUsed
});
// 解析JSON3层容错
const result = this.parseJSON(response.text, fields);
return {
result,
tokensUsed: response.tokensUsed || 0,
rawOutput: response.text
};
} catch (error) {
logger.error(`[${modelType.toUpperCase()}] Model call failed`, { error, modelType });
throw error;
}
}
/**
* 解析JSON3层容错策略
*
* 1. 直接JSON.parse
* 2. 提取```json代码块
* 3. 提取{}内容
*/
private parseJSON(text: string, fields: { name: string; desc: string }[]): Record<string, string> {
// 策略1直接解析
try {
const parsed = JSON.parse(text);
if (this.validateFields(parsed, fields)) {
return parsed;
}
} catch (e) {
// 继续下一个策略
}
// 策略2提取```json代码块
const codeBlockMatch = text.match(/```json\s*\n([\s\S]*?)\n```/);
if (codeBlockMatch) {
try {
const parsed = JSON.parse(codeBlockMatch[1]);
if (this.validateFields(parsed, fields)) {
return parsed;
}
} catch (e) {
// 继续下一个策略
}
}
// 策略3提取第一个完整的{}对象
const objectMatch = text.match(/\{[\s\S]*\}/);
if (objectMatch) {
try {
const parsed = JSON.parse(objectMatch[0]);
if (this.validateFields(parsed, fields)) {
return parsed;
}
} catch (e) {
// 解析失败
}
}
// 所有策略失败,返回空对象
logger.warn('[JSON] All parse strategies failed', { text });
const emptyResult: Record<string, string> = {};
fields.forEach(f => {
emptyResult[f.name] = '解析失败';
});
return emptyResult;
}
/**
* 验证字段完整性
*/
private validateFields(parsed: any, fields: { name: string; desc: string }[]): boolean {
if (!parsed || typeof parsed !== 'object') {
return false;
}
// 检查所有必需字段是否存在
return fields.every(f => parsed.hasOwnProperty(f.name));
}
/**
* 批量提取(异步任务)
*
* @param taskId 任务ID
*/
async batchExtract(taskId: string): Promise<void> {
try {
logger.info('[Batch] Starting batch extraction', { taskId });
// 1. 获取任务
const task = await prisma.dCExtractionTask.findUnique({
where: { id: taskId },
include: { items: true }
});
if (!task) {
throw new Error(`Task not found: ${taskId}`);
}
// 2. 更新任务状态
await prisma.dCExtractionTask.update({
where: { id: taskId },
data: {
status: 'processing',
startedAt: new Date()
}
});
// 3. 获取模板
const template = await prisma.dCTemplate.findUnique({
where: {
diseaseType_reportType: {
diseaseType: task.diseaseType,
reportType: task.reportType
}
}
});
if (!template) {
throw new Error(`Template not found: ${task.diseaseType}/${task.reportType}`);
}
const fields = template.fields as { name: string; desc: string }[];
// 4. 逐条处理
let processedCount = 0;
let cleanCount = 0;
let conflictCount = 0;
let totalTokens = 0;
for (const item of task.items) {
try {
// 双模型提取
const { resultA, resultB } = await this.extract(
{
text: item.originalText,
fields,
promptTemplate: template.promptTemplate
},
taskId,
item.id
);
// 检测冲突由ConflictDetectionService处理这里暂时简单比较
const hasConflict = JSON.stringify(resultA.result) !== JSON.stringify(resultB.result);
// 更新记录
await prisma.dCExtractionItem.update({
where: { id: item.id },
data: {
resultA: resultA.result,
resultB: resultB.result,
tokensA: resultA.tokensUsed,
tokensB: resultB.tokensUsed,
status: hasConflict ? 'conflict' : 'clean',
finalResult: hasConflict ? null : resultA.result // 一致时自动采纳
}
});
processedCount++;
if (hasConflict) {
conflictCount++;
} else {
cleanCount++;
}
totalTokens += resultA.tokensUsed + resultB.tokensUsed;
// 更新任务进度
await prisma.dCExtractionTask.update({
where: { id: taskId },
data: {
processedCount,
cleanCount,
conflictCount,
totalTokens
}
});
} catch (error) {
logger.error('[Batch] Item extraction failed', { error, itemId: item.id });
await prisma.dCExtractionItem.update({
where: { id: item.id },
data: {
status: 'failed',
error: String(error)
}
});
}
}
// 5. 完成任务
await prisma.dCExtractionTask.update({
where: { id: taskId },
data: {
status: 'completed',
completedAt: new Date()
}
});
logger.info('[Batch] Batch extraction completed', {
taskId,
processedCount,
cleanCount,
conflictCount,
totalTokens
});
} catch (error) {
logger.error('[Batch] Batch extraction failed', { error, taskId });
// 更新任务为失败状态
await prisma.dCExtractionTask.update({
where: { id: taskId },
data: {
status: 'failed',
error: String(error)
}
});
throw error;
}
}
}
// 导出单例
export const dualModelExtractionService = new DualModelExtractionService();