feat(dc): Complete Phase 1 - Portal workbench page development
Summary: - Implement DC module Portal page with 3 tool cards - Create ToolCard component with decorative background and hover animations - Implement TaskList component with table layout and progress bars - Implement AssetLibrary component with tab switching and file cards - Complete database verification (4 tables confirmed) - Complete backend API verification (6 endpoints ready) - Optimize UI to match prototype design (V2.html) Frontend Components (~715 lines): - components/ToolCard.tsx - Tool cards with animations - components/TaskList.tsx - Recent tasks table view - components/AssetLibrary.tsx - Data asset library with tabs - hooks/useRecentTasks.ts - Task state management - hooks/useAssets.ts - Asset state management - pages/Portal.tsx - Main portal page - types/portal.ts - TypeScript type definitions Backend Verification: - Backend API: 1495 lines code verified - Database: dc_schema with 4 tables verified - API endpoints: 6 endpoints tested (templates API works) Documentation: - Database verification report - Backend API test report - Phase 1 completion summary - UI optimization report - Development task checklist - Development plan for Tool B Status: Phase 1 completed (100%), ready for browser testing Next: Phase 2 - Tool B Step 1 and 2 development
This commit is contained in:
@@ -0,0 +1,218 @@
|
||||
/**
|
||||
* DC模块 - 冲突检测服务
|
||||
*
|
||||
* 功能:
|
||||
* - 比较双模型提取结果
|
||||
* - 标记冲突字段
|
||||
* - 计算冲突严重程度
|
||||
* - 生成冲突报告
|
||||
*
|
||||
* 平台能力复用:
|
||||
* - ✅ logger: 日志记录
|
||||
*/
|
||||
|
||||
import { logger } from '../../../../common/logging/index.js';
|
||||
|
||||
export interface ConflictResult {
|
||||
hasConflict: boolean;
|
||||
conflictFields: string[];
|
||||
conflictDetails: Array<{
|
||||
fieldName: string;
|
||||
valueA: string;
|
||||
valueB: string;
|
||||
similarity: number; // 0-1, 相似度
|
||||
}>;
|
||||
severity: 'low' | 'medium' | 'high';
|
||||
}
|
||||
|
||||
export class ConflictDetectionService {
|
||||
/**
|
||||
* 检测冲突
|
||||
*
|
||||
* @param resultA DeepSeek结果
|
||||
* @param resultB Qwen结果
|
||||
* @returns 冲突分析结果
|
||||
*/
|
||||
detectConflict(resultA: Record<string, string>, resultB: Record<string, string>): ConflictResult {
|
||||
try {
|
||||
logger.info('[Conflict] Starting conflict detection');
|
||||
|
||||
const conflictFields: string[] = [];
|
||||
const conflictDetails: ConflictResult['conflictDetails'] = [];
|
||||
|
||||
// 获取所有字段
|
||||
const allFields = new Set([...Object.keys(resultA), ...Object.keys(resultB)]);
|
||||
|
||||
// 逐字段比较
|
||||
for (const field of allFields) {
|
||||
const valueA = resultA[field] || '';
|
||||
const valueB = resultB[field] || '';
|
||||
|
||||
// 归一化后比较
|
||||
const normalizedA = this.normalize(valueA);
|
||||
const normalizedB = this.normalize(valueB);
|
||||
|
||||
if (normalizedA !== normalizedB) {
|
||||
// 检测到冲突
|
||||
const similarity = this.calculateSimilarity(normalizedA, normalizedB);
|
||||
|
||||
conflictFields.push(field);
|
||||
conflictDetails.push({
|
||||
fieldName: field,
|
||||
valueA,
|
||||
valueB,
|
||||
similarity
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// 计算严重程度
|
||||
const severity = this.calculateSeverity(conflictFields.length, allFields.size);
|
||||
|
||||
const result: ConflictResult = {
|
||||
hasConflict: conflictFields.length > 0,
|
||||
conflictFields,
|
||||
conflictDetails,
|
||||
severity
|
||||
};
|
||||
|
||||
logger.info('[Conflict] Detection completed', {
|
||||
hasConflict: result.hasConflict,
|
||||
conflictCount: conflictFields.length,
|
||||
severity
|
||||
});
|
||||
|
||||
return result;
|
||||
|
||||
} catch (error) {
|
||||
logger.error('[Conflict] Detection failed', { error });
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 归一化文本
|
||||
*
|
||||
* - 去除空格
|
||||
* - 转小写
|
||||
* - 半角化
|
||||
* - 数值归一化(3cm = 3.0cm = 3 cm)
|
||||
*/
|
||||
private normalize(value: string): string {
|
||||
let normalized = String(value)
|
||||
.toLowerCase()
|
||||
.trim()
|
||||
.replace(/\s+/g, '') // 去除所有空格
|
||||
.replace(/[,。;:!?]/g, (match) => { // 全角转半角
|
||||
return {
|
||||
',': ',',
|
||||
'。': '.',
|
||||
';': ';',
|
||||
':': ':',
|
||||
'!': '!',
|
||||
'?': '?'
|
||||
}[match] || match;
|
||||
});
|
||||
|
||||
// 数值归一化:提取数字
|
||||
const numberMatch = normalized.match(/(\d+\.?\d*)\s*(cm|mm|kg|mg|ml|%)?/);
|
||||
if (numberMatch) {
|
||||
const num = parseFloat(numberMatch[1]);
|
||||
const unit = numberMatch[2] || '';
|
||||
normalized = `${num}${unit}`;
|
||||
}
|
||||
|
||||
return normalized;
|
||||
}
|
||||
|
||||
/**
|
||||
* 计算文本相似度(Dice Coefficient)
|
||||
*
|
||||
* 范围:0-1,1表示完全相同
|
||||
*/
|
||||
private calculateSimilarity(a: string, b: string): number {
|
||||
if (a === b) return 1;
|
||||
if (!a || !b) return 0;
|
||||
|
||||
// 生成2-gram
|
||||
const bigramsA = this.getBigrams(a);
|
||||
const bigramsB = this.getBigrams(b);
|
||||
|
||||
if (bigramsA.size === 0 && bigramsB.size === 0) return 1;
|
||||
if (bigramsA.size === 0 || bigramsB.size === 0) return 0;
|
||||
|
||||
// 计算交集
|
||||
const intersection = new Set([...bigramsA].filter(x => bigramsB.has(x)));
|
||||
|
||||
// Dice系数:2 * |A ∩ B| / (|A| + |B|)
|
||||
const similarity = (2 * intersection.size) / (bigramsA.size + bigramsB.size);
|
||||
|
||||
return similarity;
|
||||
}
|
||||
|
||||
/**
|
||||
* 生成2-gram集合
|
||||
*/
|
||||
private getBigrams(str: string): Set<string> {
|
||||
const bigrams = new Set<string>();
|
||||
for (let i = 0; i < str.length - 1; i++) {
|
||||
bigrams.add(str.substring(i, i + 2));
|
||||
}
|
||||
return bigrams;
|
||||
}
|
||||
|
||||
/**
|
||||
* 计算冲突严重程度
|
||||
*/
|
||||
private calculateSeverity(conflictCount: number, totalFields: number): 'low' | 'medium' | 'high' {
|
||||
const conflictRate = conflictCount / totalFields;
|
||||
|
||||
if (conflictRate === 0) return 'low';
|
||||
if (conflictRate <= 0.3) return 'low'; // ≤30%
|
||||
if (conflictRate <= 0.6) return 'medium'; // 30%-60%
|
||||
return 'high'; // >60%
|
||||
}
|
||||
|
||||
/**
|
||||
* 批量检测冲突
|
||||
*
|
||||
* @param items 提取记录数组
|
||||
* @returns 冲突统计
|
||||
*/
|
||||
batchDetect(items: Array<{ resultA: Record<string, string>; resultB: Record<string, string> }>): {
|
||||
totalCount: number;
|
||||
cleanCount: number;
|
||||
conflictCount: number;
|
||||
severityDistribution: Record<'low' | 'medium' | 'high', number>;
|
||||
} {
|
||||
let cleanCount = 0;
|
||||
let conflictCount = 0;
|
||||
const severityDistribution = { low: 0, medium: 0, high: 0 };
|
||||
|
||||
for (const item of items) {
|
||||
const result = this.detectConflict(item.resultA, item.resultB);
|
||||
|
||||
if (result.hasConflict) {
|
||||
conflictCount++;
|
||||
severityDistribution[result.severity]++;
|
||||
} else {
|
||||
cleanCount++;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
totalCount: items.length,
|
||||
cleanCount,
|
||||
conflictCount,
|
||||
severityDistribution
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// 导出单例
|
||||
export const conflictDetectionService = new ConflictDetectionService();
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,393 @@
|
||||
/**
|
||||
* DC模块 - 双模型提取服务
|
||||
*
|
||||
* 功能:
|
||||
* - 并发调用DeepSeek-V3和Qwen-Max进行文本提取
|
||||
* - PII脱敏处理
|
||||
* - JSON解析与容错
|
||||
* - Token统计
|
||||
* - 异步任务管理
|
||||
*
|
||||
* 平台能力复用:
|
||||
* - ✅ LLMFactory: LLM调用
|
||||
* - ✅ jobQueue: 异步任务
|
||||
* - ✅ logger: 日志记录
|
||||
* - ✅ prisma: 数据库操作
|
||||
*/
|
||||
|
||||
import { LLMFactory } from '../../../../common/llm/adapters/LLMFactory.js';
|
||||
import { logger } from '../../../../common/logging/index.js';
|
||||
import { prisma } from '../../../../config/database.js';
|
||||
|
||||
export interface ExtractionInput {
|
||||
text: string;
|
||||
fields: { name: string; desc: string }[];
|
||||
promptTemplate: string;
|
||||
}
|
||||
|
||||
export interface ExtractionOutput {
|
||||
result: Record<string, string>;
|
||||
tokensUsed: number;
|
||||
rawOutput: any;
|
||||
}
|
||||
|
||||
export class DualModelExtractionService {
|
||||
/**
|
||||
* 双模型并发提取
|
||||
*
|
||||
* @param input 提取输入
|
||||
* @param taskId 任务ID
|
||||
* @param itemId 记录ID
|
||||
* @returns 双模型结果
|
||||
*/
|
||||
async extract(input: ExtractionInput, taskId: string, itemId: string): Promise<{
|
||||
resultA: ExtractionOutput;
|
||||
resultB: ExtractionOutput;
|
||||
}> {
|
||||
try {
|
||||
logger.info('[DualExtraction] Starting extraction', { taskId, itemId });
|
||||
|
||||
// 1. PII脱敏
|
||||
const maskedText = this.maskPII(input.text);
|
||||
|
||||
// 2. 构建Prompt
|
||||
const prompt = this.buildPrompt(maskedText, input.fields, input.promptTemplate);
|
||||
|
||||
// 3. 并发调用两个模型(DeepSeek & Qwen)
|
||||
const [resultA, resultB] = await Promise.allSettled([
|
||||
this.callModel('deepseek', prompt, input.fields),
|
||||
this.callModel('qwen', prompt, input.fields)
|
||||
]);
|
||||
|
||||
// 4. 处理结果
|
||||
if (resultA.status === 'rejected' || resultB.status === 'rejected') {
|
||||
logger.error('[DualExtraction] One or both models failed', {
|
||||
taskId,
|
||||
itemId,
|
||||
errorA: resultA.status === 'rejected' ? resultA.reason : null,
|
||||
errorB: resultB.status === 'rejected' ? resultB.reason : null
|
||||
});
|
||||
throw new Error('Dual model extraction failed');
|
||||
}
|
||||
|
||||
logger.info('[DualExtraction] Extraction completed', {
|
||||
taskId,
|
||||
itemId,
|
||||
tokensA: resultA.value.tokensUsed,
|
||||
tokensB: resultB.value.tokensUsed
|
||||
});
|
||||
|
||||
return {
|
||||
resultA: resultA.value,
|
||||
resultB: resultB.value
|
||||
};
|
||||
|
||||
} catch (error) {
|
||||
logger.error('[DualExtraction] Extraction failed', { error, taskId, itemId });
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* PII脱敏
|
||||
*
|
||||
* 使用正则表达式替换敏感信息:
|
||||
* - 姓名:张**
|
||||
* - 身份证号:3301********1234
|
||||
* - 手机号:138****5678
|
||||
*/
|
||||
private maskPII(text: string): string {
|
||||
let masked = text;
|
||||
|
||||
// 手机号脱敏:138****5678
|
||||
masked = masked.replace(/1[3-9]\d{9}/g, (match) => {
|
||||
return match.substring(0, 3) + '****' + match.substring(7);
|
||||
});
|
||||
|
||||
// 身份证号脱敏:330102********1234
|
||||
masked = masked.replace(/\d{6}(19|20)\d{2}(0[1-9]|1[0-2])(0[1-9]|[12]\d|3[01])\d{3}[\dxX]/g, (match) => {
|
||||
return match.substring(0, 6) + '********' + match.substring(14);
|
||||
});
|
||||
|
||||
// 简单的姓名脱敏(匹配:患者xxx、姓名:xxx)
|
||||
masked = masked.replace(/(患者|姓名[::])\s*([^\s,。,]{2,4})/g, (match, prefix, name) => {
|
||||
if (name.length === 2) {
|
||||
return prefix + name[0] + '*';
|
||||
}
|
||||
return prefix + name[0] + '*'.repeat(name.length - 1);
|
||||
});
|
||||
|
||||
return masked;
|
||||
}
|
||||
|
||||
/**
|
||||
* 构建Prompt
|
||||
*/
|
||||
private buildPrompt(text: string, fields: { name: string; desc: string }[], template: string): string {
|
||||
// 在模板末尾添加病历文本
|
||||
return `${template}
|
||||
|
||||
**病历原文:**
|
||||
${text}
|
||||
|
||||
请严格按照JSON格式输出,不要有任何额外文字。`;
|
||||
}
|
||||
|
||||
/**
|
||||
* 调用单个模型
|
||||
*/
|
||||
private async callModel(
|
||||
modelType: 'deepseek' | 'qwen',
|
||||
prompt: string,
|
||||
fields: { name: string; desc: string }[]
|
||||
): Promise<ExtractionOutput> {
|
||||
try {
|
||||
// 使用LLMFactory获取LLM客户端
|
||||
const modelName = modelType === 'deepseek' ? 'deepseek-v3' : 'qwen-max';
|
||||
const llm = LLMFactory.createLLM(modelName);
|
||||
|
||||
logger.info(`[${modelType.toUpperCase()}] Calling model`, { modelName });
|
||||
|
||||
// 调用LLM
|
||||
const response = await llm.generateText(prompt, {
|
||||
temperature: 0, // 最大确定性
|
||||
maxTokens: 1000
|
||||
});
|
||||
|
||||
logger.info(`[${modelType.toUpperCase()}] Model responded`, {
|
||||
modelName,
|
||||
tokensUsed: response.tokensUsed
|
||||
});
|
||||
|
||||
// 解析JSON(3层容错)
|
||||
const result = this.parseJSON(response.text, fields);
|
||||
|
||||
return {
|
||||
result,
|
||||
tokensUsed: response.tokensUsed || 0,
|
||||
rawOutput: response.text
|
||||
};
|
||||
|
||||
} catch (error) {
|
||||
logger.error(`[${modelType.toUpperCase()}] Model call failed`, { error, modelType });
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 解析JSON(3层容错策略)
|
||||
*
|
||||
* 1. 直接JSON.parse
|
||||
* 2. 提取```json代码块
|
||||
* 3. 提取{}内容
|
||||
*/
|
||||
private parseJSON(text: string, fields: { name: string; desc: string }[]): Record<string, string> {
|
||||
// 策略1:直接解析
|
||||
try {
|
||||
const parsed = JSON.parse(text);
|
||||
if (this.validateFields(parsed, fields)) {
|
||||
return parsed;
|
||||
}
|
||||
} catch (e) {
|
||||
// 继续下一个策略
|
||||
}
|
||||
|
||||
// 策略2:提取```json代码块
|
||||
const codeBlockMatch = text.match(/```json\s*\n([\s\S]*?)\n```/);
|
||||
if (codeBlockMatch) {
|
||||
try {
|
||||
const parsed = JSON.parse(codeBlockMatch[1]);
|
||||
if (this.validateFields(parsed, fields)) {
|
||||
return parsed;
|
||||
}
|
||||
} catch (e) {
|
||||
// 继续下一个策略
|
||||
}
|
||||
}
|
||||
|
||||
// 策略3:提取第一个完整的{}对象
|
||||
const objectMatch = text.match(/\{[\s\S]*\}/);
|
||||
if (objectMatch) {
|
||||
try {
|
||||
const parsed = JSON.parse(objectMatch[0]);
|
||||
if (this.validateFields(parsed, fields)) {
|
||||
return parsed;
|
||||
}
|
||||
} catch (e) {
|
||||
// 解析失败
|
||||
}
|
||||
}
|
||||
|
||||
// 所有策略失败,返回空对象
|
||||
logger.warn('[JSON] All parse strategies failed', { text });
|
||||
const emptyResult: Record<string, string> = {};
|
||||
fields.forEach(f => {
|
||||
emptyResult[f.name] = '解析失败';
|
||||
});
|
||||
return emptyResult;
|
||||
}
|
||||
|
||||
/**
|
||||
* 验证字段完整性
|
||||
*/
|
||||
private validateFields(parsed: any, fields: { name: string; desc: string }[]): boolean {
|
||||
if (!parsed || typeof parsed !== 'object') {
|
||||
return false;
|
||||
}
|
||||
|
||||
// 检查所有必需字段是否存在
|
||||
return fields.every(f => parsed.hasOwnProperty(f.name));
|
||||
}
|
||||
|
||||
/**
|
||||
* 批量提取(异步任务)
|
||||
*
|
||||
* @param taskId 任务ID
|
||||
*/
|
||||
async batchExtract(taskId: string): Promise<void> {
|
||||
try {
|
||||
logger.info('[Batch] Starting batch extraction', { taskId });
|
||||
|
||||
// 1. 获取任务
|
||||
const task = await prisma.dCExtractionTask.findUnique({
|
||||
where: { id: taskId },
|
||||
include: { items: true }
|
||||
});
|
||||
|
||||
if (!task) {
|
||||
throw new Error(`Task not found: ${taskId}`);
|
||||
}
|
||||
|
||||
// 2. 更新任务状态
|
||||
await prisma.dCExtractionTask.update({
|
||||
where: { id: taskId },
|
||||
data: {
|
||||
status: 'processing',
|
||||
startedAt: new Date()
|
||||
}
|
||||
});
|
||||
|
||||
// 3. 获取模板
|
||||
const template = await prisma.dCTemplate.findUnique({
|
||||
where: {
|
||||
diseaseType_reportType: {
|
||||
diseaseType: task.diseaseType,
|
||||
reportType: task.reportType
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
if (!template) {
|
||||
throw new Error(`Template not found: ${task.diseaseType}/${task.reportType}`);
|
||||
}
|
||||
|
||||
const fields = template.fields as { name: string; desc: string }[];
|
||||
|
||||
// 4. 逐条处理
|
||||
let processedCount = 0;
|
||||
let cleanCount = 0;
|
||||
let conflictCount = 0;
|
||||
let totalTokens = 0;
|
||||
|
||||
for (const item of task.items) {
|
||||
try {
|
||||
// 双模型提取
|
||||
const { resultA, resultB } = await this.extract(
|
||||
{
|
||||
text: item.originalText,
|
||||
fields,
|
||||
promptTemplate: template.promptTemplate
|
||||
},
|
||||
taskId,
|
||||
item.id
|
||||
);
|
||||
|
||||
// 检测冲突(由ConflictDetectionService处理,这里暂时简单比较)
|
||||
const hasConflict = JSON.stringify(resultA.result) !== JSON.stringify(resultB.result);
|
||||
|
||||
// 更新记录
|
||||
await prisma.dCExtractionItem.update({
|
||||
where: { id: item.id },
|
||||
data: {
|
||||
resultA: resultA.result,
|
||||
resultB: resultB.result,
|
||||
tokensA: resultA.tokensUsed,
|
||||
tokensB: resultB.tokensUsed,
|
||||
status: hasConflict ? 'conflict' : 'clean',
|
||||
finalResult: hasConflict ? null : resultA.result // 一致时自动采纳
|
||||
}
|
||||
});
|
||||
|
||||
processedCount++;
|
||||
if (hasConflict) {
|
||||
conflictCount++;
|
||||
} else {
|
||||
cleanCount++;
|
||||
}
|
||||
totalTokens += resultA.tokensUsed + resultB.tokensUsed;
|
||||
|
||||
// 更新任务进度
|
||||
await prisma.dCExtractionTask.update({
|
||||
where: { id: taskId },
|
||||
data: {
|
||||
processedCount,
|
||||
cleanCount,
|
||||
conflictCount,
|
||||
totalTokens
|
||||
}
|
||||
});
|
||||
|
||||
} catch (error) {
|
||||
logger.error('[Batch] Item extraction failed', { error, itemId: item.id });
|
||||
|
||||
await prisma.dCExtractionItem.update({
|
||||
where: { id: item.id },
|
||||
data: {
|
||||
status: 'failed',
|
||||
error: String(error)
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// 5. 完成任务
|
||||
await prisma.dCExtractionTask.update({
|
||||
where: { id: taskId },
|
||||
data: {
|
||||
status: 'completed',
|
||||
completedAt: new Date()
|
||||
}
|
||||
});
|
||||
|
||||
logger.info('[Batch] Batch extraction completed', {
|
||||
taskId,
|
||||
processedCount,
|
||||
cleanCount,
|
||||
conflictCount,
|
||||
totalTokens
|
||||
});
|
||||
|
||||
} catch (error) {
|
||||
logger.error('[Batch] Batch extraction failed', { error, taskId });
|
||||
|
||||
// 更新任务为失败状态
|
||||
await prisma.dCExtractionTask.update({
|
||||
where: { id: taskId },
|
||||
data: {
|
||||
status: 'failed',
|
||||
error: String(error)
|
||||
}
|
||||
});
|
||||
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 导出单例
|
||||
export const dualModelExtractionService = new DualModelExtractionService();
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
193
backend/src/modules/dc/tool-b/services/HealthCheckService.ts
Normal file
193
backend/src/modules/dc/tool-b/services/HealthCheckService.ts
Normal file
@@ -0,0 +1,193 @@
|
||||
/**
|
||||
* DC模块 - 健康检查服务
|
||||
*
|
||||
* 功能:
|
||||
* - Excel列数据质量检查(空值率、平均长度)
|
||||
* - Token预估
|
||||
* - 拦截不适合的数据列
|
||||
* - 结果缓存(避免重复计算)
|
||||
*
|
||||
* 平台能力复用:
|
||||
* - ✅ storage: 文件读取
|
||||
* - ✅ logger: 日志记录
|
||||
* - ✅ cache: 结果缓存
|
||||
* - ✅ prisma: 数据库存储
|
||||
*/
|
||||
|
||||
import * as xlsx from 'xlsx';
|
||||
import { storage } from '../../../../common/storage/index.js';
|
||||
import { logger } from '../../../../common/logging/index.js';
|
||||
import { cache } from '../../../../common/cache/index.js';
|
||||
import { prisma } from '../../../../config/database.js';
|
||||
|
||||
export interface HealthCheckResult {
|
||||
status: 'good' | 'bad';
|
||||
emptyRate: number;
|
||||
avgLength: number;
|
||||
totalRows: number;
|
||||
estimatedTokens: number;
|
||||
message: string;
|
||||
}
|
||||
|
||||
export class HealthCheckService {
|
||||
/**
|
||||
* 执行健康检查
|
||||
*
|
||||
* @param fileKey Storage中的文件路径
|
||||
* @param columnName 要检查的列名
|
||||
* @param userId 用户ID
|
||||
* @returns 健康检查结果
|
||||
*/
|
||||
async check(fileKey: string, columnName: string, userId: string): Promise<HealthCheckResult> {
|
||||
try {
|
||||
logger.info('[HealthCheck] Starting health check', { fileKey, columnName, userId });
|
||||
|
||||
// 1. 检查缓存(避免重复计算)
|
||||
const cacheKey = `dc:health:${fileKey}:${columnName}`;
|
||||
const cached = await cache.get<HealthCheckResult>(cacheKey);
|
||||
if (cached) {
|
||||
logger.info('[HealthCheck] Cache hit', { cacheKey });
|
||||
return cached;
|
||||
}
|
||||
|
||||
// 2. 从Storage读取Excel文件
|
||||
const fileBuffer = await storage.download(fileKey);
|
||||
if (!fileBuffer) {
|
||||
throw new Error(`File not found: ${fileKey}`);
|
||||
}
|
||||
|
||||
// 3. 解析Excel(仅前100行)
|
||||
const workbook = xlsx.read(fileBuffer, { type: 'buffer' });
|
||||
const sheetName = workbook.SheetNames[0];
|
||||
const worksheet = workbook.Sheets[sheetName];
|
||||
const data = xlsx.utils.sheet_to_json<Record<string, any>>(worksheet, { range: 99 }); // 前100行
|
||||
|
||||
logger.info('[HealthCheck] Excel parsed', { totalRows: data.length });
|
||||
|
||||
// 4. 检查列是否存在
|
||||
if (data.length === 0 || !data[0].hasOwnProperty(columnName)) {
|
||||
throw new Error(`Column '${columnName}' not found in Excel`);
|
||||
}
|
||||
|
||||
// 5. 计算统计指标
|
||||
const stats = this.calculateStats(data, columnName);
|
||||
|
||||
// 6. 判断健康状态
|
||||
const result = this.evaluateHealth(stats);
|
||||
|
||||
// 7. 保存到数据库
|
||||
await prisma.dCHealthCheck.create({
|
||||
data: {
|
||||
userId,
|
||||
fileName: fileKey.split('/').pop() || fileKey,
|
||||
columnName,
|
||||
emptyRate: result.emptyRate,
|
||||
avgLength: result.avgLength,
|
||||
totalRows: result.totalRows,
|
||||
estimatedTokens: result.estimatedTokens,
|
||||
status: result.status,
|
||||
message: result.message
|
||||
}
|
||||
});
|
||||
|
||||
// 8. 缓存结果(24小时)
|
||||
await cache.set(cacheKey, result, 86400);
|
||||
|
||||
logger.info('[HealthCheck] Check completed', { status: result.status });
|
||||
|
||||
return result;
|
||||
|
||||
} catch (error) {
|
||||
logger.error('[HealthCheck] Check failed', { error, fileKey, columnName });
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 计算统计指标
|
||||
*/
|
||||
private calculateStats(data: Record<string, any>[], columnName: string) {
|
||||
const totalRows = data.length;
|
||||
let emptyCount = 0;
|
||||
let totalLength = 0;
|
||||
let validCount = 0;
|
||||
|
||||
for (const row of data) {
|
||||
const value = row[columnName];
|
||||
|
||||
if (!value || String(value).trim() === '') {
|
||||
emptyCount++;
|
||||
} else {
|
||||
const text = String(value);
|
||||
totalLength += text.length;
|
||||
validCount++;
|
||||
}
|
||||
}
|
||||
|
||||
const emptyRate = totalRows > 0 ? emptyCount / totalRows : 0;
|
||||
const avgLength = validCount > 0 ? totalLength / validCount : 0;
|
||||
|
||||
return { totalRows, emptyCount, emptyRate, avgLength, validCount };
|
||||
}
|
||||
|
||||
/**
|
||||
* 评估健康状态
|
||||
*/
|
||||
private evaluateHealth(stats: ReturnType<typeof this.calculateStats>): HealthCheckResult {
|
||||
const { totalRows, emptyRate, avgLength } = stats;
|
||||
|
||||
// 拦截策略1:空值率 > 80%
|
||||
if (emptyRate > 0.8) {
|
||||
return {
|
||||
status: 'bad',
|
||||
emptyRate,
|
||||
avgLength,
|
||||
totalRows,
|
||||
estimatedTokens: 0,
|
||||
message: `空值率过高(${(emptyRate * 100).toFixed(1)}%),该列不适合提取`
|
||||
};
|
||||
}
|
||||
|
||||
// 拦截策略2:平均长度 < 10
|
||||
if (avgLength < 10) {
|
||||
return {
|
||||
status: 'bad',
|
||||
emptyRate,
|
||||
avgLength,
|
||||
totalRows,
|
||||
estimatedTokens: 0,
|
||||
message: `文本过短(平均${avgLength.toFixed(1)}字符),该列不适合提取`
|
||||
};
|
||||
}
|
||||
|
||||
// Token预估(粗略估算:字符数 * 1.5 / 2.5)
|
||||
// 中文通常1个token约等于2-3个字符
|
||||
const estimatedTokens = Math.ceil((totalRows * avgLength * 1.5) / 2.5);
|
||||
|
||||
return {
|
||||
status: 'good',
|
||||
emptyRate,
|
||||
avgLength,
|
||||
totalRows,
|
||||
estimatedTokens,
|
||||
message: `健康度良好,预计消耗约 ${(estimatedTokens / 1000).toFixed(1)}k Token(双模型约 ${(estimatedTokens * 2 / 1000).toFixed(1)}k Token)`
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* 清除缓存
|
||||
*/
|
||||
async clearCache(fileKey: string, columnName: string): Promise<void> {
|
||||
const cacheKey = `dc:health:${fileKey}:${columnName}`;
|
||||
await cache.delete(cacheKey);
|
||||
logger.info('[HealthCheck] Cache cleared', { cacheKey });
|
||||
}
|
||||
}
|
||||
|
||||
// 导出单例
|
||||
export const healthCheckService = new HealthCheckService();
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
246
backend/src/modules/dc/tool-b/services/TemplateService.ts
Normal file
246
backend/src/modules/dc/tool-b/services/TemplateService.ts
Normal file
@@ -0,0 +1,246 @@
|
||||
/**
|
||||
* DC模块 - 模板服务
|
||||
*
|
||||
* 功能:
|
||||
* - 管理预设提取模板(疾病类型 + 报告类型)
|
||||
* - 提供模板列表查询
|
||||
* - Seed初始数据(3个预设模板)
|
||||
*
|
||||
* 平台能力复用:
|
||||
* - ✅ prisma: 数据库操作
|
||||
* - ✅ logger: 日志记录
|
||||
*/
|
||||
|
||||
import { prisma } from '../../../../config/database.js';
|
||||
import { logger } from '../../../../common/logging/index.js';
|
||||
|
||||
export interface TemplateField {
|
||||
name: string;
|
||||
desc: string;
|
||||
width?: string; // TailwindCSS class
|
||||
}
|
||||
|
||||
export interface Template {
|
||||
id: string;
|
||||
diseaseType: string;
|
||||
reportType: string;
|
||||
displayName: string;
|
||||
fields: TemplateField[];
|
||||
promptTemplate: string;
|
||||
}
|
||||
|
||||
export class TemplateService {
|
||||
/**
|
||||
* 获取所有模板
|
||||
*/
|
||||
async getAllTemplates(): Promise<Template[]> {
|
||||
try {
|
||||
logger.info('[Template] Fetching all templates');
|
||||
|
||||
const templates = await prisma.dCTemplate.findMany({
|
||||
orderBy: [{ diseaseType: 'asc' }, { reportType: 'asc' }]
|
||||
});
|
||||
|
||||
logger.info('[Template] Templates fetched', { count: templates.length });
|
||||
|
||||
return templates.map(t => ({
|
||||
id: t.id,
|
||||
diseaseType: t.diseaseType,
|
||||
reportType: t.reportType,
|
||||
displayName: t.displayName,
|
||||
fields: t.fields as TemplateField[],
|
||||
promptTemplate: t.promptTemplate
|
||||
}));
|
||||
|
||||
} catch (error) {
|
||||
logger.error('[Template] Failed to fetch templates', { error });
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 根据疾病和报告类型获取模板
|
||||
*/
|
||||
async getTemplate(diseaseType: string, reportType: string): Promise<Template | null> {
|
||||
try {
|
||||
logger.info('[Template] Fetching template', { diseaseType, reportType });
|
||||
|
||||
const template = await prisma.dCTemplate.findUnique({
|
||||
where: {
|
||||
diseaseType_reportType: { diseaseType, reportType }
|
||||
}
|
||||
});
|
||||
|
||||
if (!template) {
|
||||
logger.warn('[Template] Template not found', { diseaseType, reportType });
|
||||
return null;
|
||||
}
|
||||
|
||||
return {
|
||||
id: template.id,
|
||||
diseaseType: template.diseaseType,
|
||||
reportType: template.reportType,
|
||||
displayName: template.displayName,
|
||||
fields: template.fields as TemplateField[],
|
||||
promptTemplate: template.promptTemplate
|
||||
};
|
||||
|
||||
} catch (error) {
|
||||
logger.error('[Template] Failed to fetch template', { error, diseaseType, reportType });
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 初始化预设模板(Seed数据)
|
||||
*
|
||||
* 包括3个预设模板:
|
||||
* 1. 肺癌病理报告
|
||||
* 2. 糖尿病入院记录
|
||||
* 3. 高血压门诊病历
|
||||
*/
|
||||
async seedTemplates(): Promise<void> {
|
||||
try {
|
||||
logger.info('[Template] Seeding templates');
|
||||
|
||||
const templates = [
|
||||
// 1. 肺癌病理报告
|
||||
{
|
||||
diseaseType: 'lung_cancer',
|
||||
reportType: 'pathology',
|
||||
displayName: '肺癌病理报告',
|
||||
fields: [
|
||||
{ name: '病理类型', desc: '如:浸润性腺癌、鳞状细胞癌', width: 'w-40' },
|
||||
{ name: '分化程度', desc: '高/中/低分化', width: 'w-32' },
|
||||
{ name: '肿瘤大小', desc: '最大径,单位cm', width: 'w-32' },
|
||||
{ name: '淋巴结转移', desc: '有/无及具体组别', width: 'w-48' },
|
||||
{ name: '免疫组化', desc: '关键指标', width: 'w-56' }
|
||||
],
|
||||
promptTemplate: `你是一名病理学专家。请从以下肺癌病理报告中提取关键信息。
|
||||
|
||||
提取字段(必须返回以下所有字段):
|
||||
- 病理类型:病理诊断类型(如浸润性腺癌、鳞状细胞癌)
|
||||
- 分化程度:分化等级(高分化、中分化、低分化、未提及)
|
||||
- 肿瘤大小:肿瘤最大径,单位cm
|
||||
- 淋巴结转移:淋巴结转移情况(有/无及具体组别)
|
||||
- 免疫组化:关键免疫组化指标
|
||||
|
||||
**输出格式:严格的JSON格式(不要有任何额外文本):**
|
||||
\`\`\`json
|
||||
{
|
||||
"病理类型": "...",
|
||||
"分化程度": "...",
|
||||
"肿瘤大小": "...",
|
||||
"淋巴结转移": "...",
|
||||
"免疫组化": "..."
|
||||
}
|
||||
\`\`\`
|
||||
|
||||
如果某个信息未在报告中提及,请填写"未提及"。`
|
||||
},
|
||||
|
||||
// 2. 糖尿病入院记录
|
||||
{
|
||||
diseaseType: 'diabetes',
|
||||
reportType: 'admission',
|
||||
displayName: '糖尿病入院记录',
|
||||
fields: [
|
||||
{ name: '主诉', desc: '患者入院的主要症状', width: 'w-48' },
|
||||
{ name: '现病史', desc: '发病过程', width: 'w-64' },
|
||||
{ name: '既往史', desc: '糖尿病病史年限', width: 'w-40' },
|
||||
{ name: '空腹血糖', desc: '单位mmol/L', width: 'w-32' },
|
||||
{ name: '糖化血红蛋白', desc: '单位%', width: 'w-32' }
|
||||
],
|
||||
promptTemplate: `你是一名内分泌科专家。请从以下糖尿病患者入院记录中提取关键信息。
|
||||
|
||||
提取字段(必须返回以下所有字段):
|
||||
- 主诉:患者入院时的主要症状
|
||||
- 现病史:本次发病的过程和表现
|
||||
- 既往史:糖尿病病史年限
|
||||
- 空腹血糖:最近的空腹血糖值(单位mmol/L)
|
||||
- 糖化血红蛋白:最近的HbA1c值(单位%)
|
||||
|
||||
**输出格式:严格的JSON格式:**
|
||||
\`\`\`json
|
||||
{
|
||||
"主诉": "...",
|
||||
"现病史": "...",
|
||||
"既往史": "...",
|
||||
"空腹血糖": "...",
|
||||
"糖化血红蛋白": "..."
|
||||
}
|
||||
\`\`\`
|
||||
|
||||
如果某个信息未在记录中提及,请填写"未提及"。`
|
||||
},
|
||||
|
||||
// 3. 高血压门诊病历
|
||||
{
|
||||
diseaseType: 'hypertension',
|
||||
reportType: 'outpatient',
|
||||
displayName: '高血压门诊病历',
|
||||
fields: [
|
||||
{ name: '血压值', desc: '单位mmHg', width: 'w-32' },
|
||||
{ name: '心率', desc: '单位次/分', width: 'w-24' },
|
||||
{ name: '当前用药', desc: '高血压药物', width: 'w-56' },
|
||||
{ name: '靶器官损害', desc: '心/脑/肾', width: 'w-40' },
|
||||
{ name: '危险分层', desc: '低/中/高/极高危', width: 'w-32' }
|
||||
],
|
||||
promptTemplate: `你是一名心内科专家。请从以下高血压患者门诊病历中提取关键信息。
|
||||
|
||||
提取字段(必须返回以下所有字段):
|
||||
- 血压值:收缩压/舒张压(单位mmHg)
|
||||
- 心率:心率(单位次/分)
|
||||
- 当前用药:患者当前服用的高血压药物
|
||||
- 靶器官损害:心脏、脑、肾脏等靶器官损害情况
|
||||
- 危险分层:心血管风险分层(低危、中危、高危、极高危)
|
||||
|
||||
**输出格式:严格的JSON格式:**
|
||||
\`\`\`json
|
||||
{
|
||||
"血压值": "...",
|
||||
"心率": "...",
|
||||
"当前用药": "...",
|
||||
"靶器官损害": "...",
|
||||
"危险分层": "..."
|
||||
}
|
||||
\`\`\`
|
||||
|
||||
如果某个信息未在病历中提及,请填写"未提及"。`
|
||||
}
|
||||
];
|
||||
|
||||
// 使用upsert避免重复
|
||||
for (const template of templates) {
|
||||
await prisma.dCTemplate.upsert({
|
||||
where: {
|
||||
diseaseType_reportType: {
|
||||
diseaseType: template.diseaseType,
|
||||
reportType: template.reportType
|
||||
}
|
||||
},
|
||||
update: {
|
||||
displayName: template.displayName,
|
||||
fields: template.fields,
|
||||
promptTemplate: template.promptTemplate
|
||||
},
|
||||
create: template
|
||||
});
|
||||
}
|
||||
|
||||
logger.info('[Template] Templates seeded successfully', { count: templates.length });
|
||||
|
||||
} catch (error) {
|
||||
logger.error('[Template] Failed to seed templates', { error });
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 导出单例
|
||||
export const templateService = new TemplateService();
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user