Major Changes: - Database: Install pg_bigm/pgvector plugins, create test database - Python service: v1.0 -> v1.1, add pymupdf4llm/openpyxl/pypandoc - Node.js backend: v1.3 -> v1.7, fix pino-pretty and ES Module imports - Frontend: v1.2 -> v1.3, skip TypeScript check for deployment - Code recovery: Restore empty files from local backup Technical Fixes: - Fix pino-pretty error in production (conditional loading) - Fix ES Module import paths (add .js extensions) - Fix OSSAdapter TypeScript errors - Update Prisma Schema (63 models, 16 schemas) - Update environment variables (DATABASE_URL, EXTRACTION_SERVICE_URL, OSS) - Remove deprecated variables (REDIS_URL, DIFY_API_URL, DIFY_API_KEY) Documentation: - Create 0126 deployment folder with 8 documents - Update database development standards v2.0 - Update SAE deployment status records Deployment Status: - PostgreSQL: ai_clinical_research_test with plugins - Python: v1.1 @ 172.17.173.84:8000 - Backend: v1.7 @ 172.17.173.89:3001 - Frontend: v1.3 @ 172.17.173.90:80 Tested: All services running successfully on SAE
294 lines
5.6 KiB
TypeScript
294 lines
5.6 KiB
TypeScript
/**
|
||
* DC模块 - 冲突检测服务
|
||
*
|
||
* 功能:
|
||
* - 比较双模型提取结果
|
||
* - 标记冲突字段
|
||
* - 计算冲突严重程度
|
||
* - 生成冲突报告
|
||
*
|
||
* 平台能力复用:
|
||
* - ✅ logger: 日志记录
|
||
*/
|
||
|
||
import { logger } from '../../../../common/logging/index.js';
|
||
|
||
export interface ConflictResult {
|
||
hasConflict: boolean;
|
||
conflictFields: string[];
|
||
conflictDetails: Array<{
|
||
fieldName: string;
|
||
valueA: string;
|
||
valueB: string;
|
||
similarity: number; // 0-1, 相似度
|
||
}>;
|
||
severity: 'low' | 'medium' | 'high';
|
||
}
|
||
|
||
export class ConflictDetectionService {
|
||
/**
|
||
* 检测冲突
|
||
*
|
||
* @param resultA DeepSeek结果
|
||
* @param resultB Qwen结果
|
||
* @returns 冲突分析结果
|
||
*/
|
||
detectConflict(resultA: Record<string, string>, resultB: Record<string, string>): ConflictResult {
|
||
try {
|
||
logger.info('[Conflict] Starting conflict detection');
|
||
|
||
const conflictFields: string[] = [];
|
||
const conflictDetails: ConflictResult['conflictDetails'] = [];
|
||
|
||
// 获取所有字段
|
||
const allFields = new Set([...Object.keys(resultA), ...Object.keys(resultB)]);
|
||
|
||
// 逐字段比较
|
||
for (const field of allFields) {
|
||
const valueA = resultA[field] || '';
|
||
const valueB = resultB[field] || '';
|
||
|
||
// 归一化后比较
|
||
const normalizedA = this.normalize(valueA);
|
||
const normalizedB = this.normalize(valueB);
|
||
|
||
if (normalizedA !== normalizedB) {
|
||
// 检测到冲突
|
||
const similarity = this.calculateSimilarity(normalizedA, normalizedB);
|
||
|
||
conflictFields.push(field);
|
||
conflictDetails.push({
|
||
fieldName: field,
|
||
valueA,
|
||
valueB,
|
||
similarity
|
||
});
|
||
}
|
||
}
|
||
|
||
// 计算严重程度
|
||
const severity = this.calculateSeverity(conflictFields.length, allFields.size);
|
||
|
||
const result: ConflictResult = {
|
||
hasConflict: conflictFields.length > 0,
|
||
conflictFields,
|
||
conflictDetails,
|
||
severity
|
||
};
|
||
|
||
logger.info('[Conflict] Detection completed', {
|
||
hasConflict: result.hasConflict,
|
||
conflictCount: conflictFields.length,
|
||
severity
|
||
});
|
||
|
||
return result;
|
||
|
||
} catch (error) {
|
||
logger.error('[Conflict] Detection failed', { error });
|
||
throw error;
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 归一化文本
|
||
*
|
||
* - 去除空格
|
||
* - 转小写
|
||
* - 半角化
|
||
* - 数值归一化(3cm = 3.0cm = 3 cm)
|
||
*/
|
||
private normalize(value: string): string {
|
||
let normalized = String(value)
|
||
.toLowerCase()
|
||
.trim()
|
||
.replace(/\s+/g, '') // 去除所有空格
|
||
.replace(/[,。;:!?]/g, (match) => { // 全角转半角
|
||
return {
|
||
',': ',',
|
||
'。': '.',
|
||
';': ';',
|
||
':': ':',
|
||
'!': '!',
|
||
'?': '?'
|
||
}[match] || match;
|
||
});
|
||
|
||
// 数值归一化:提取数字
|
||
const numberMatch = normalized.match(/(\d+\.?\d*)\s*(cm|mm|kg|mg|ml|%)?/);
|
||
if (numberMatch) {
|
||
const num = parseFloat(numberMatch[1]);
|
||
const unit = numberMatch[2] || '';
|
||
normalized = `${num}${unit}`;
|
||
}
|
||
|
||
return normalized;
|
||
}
|
||
|
||
/**
|
||
* 计算文本相似度(Dice Coefficient)
|
||
*
|
||
* 范围:0-1,1表示完全相同
|
||
*/
|
||
private calculateSimilarity(a: string, b: string): number {
|
||
if (a === b) return 1;
|
||
if (!a || !b) return 0;
|
||
|
||
// 生成2-gram
|
||
const bigramsA = this.getBigrams(a);
|
||
const bigramsB = this.getBigrams(b);
|
||
|
||
if (bigramsA.size === 0 && bigramsB.size === 0) return 1;
|
||
if (bigramsA.size === 0 || bigramsB.size === 0) return 0;
|
||
|
||
// 计算交集
|
||
const intersection = new Set([...bigramsA].filter(x => bigramsB.has(x)));
|
||
|
||
// Dice系数:2 * |A ∩ B| / (|A| + |B|)
|
||
const similarity = (2 * intersection.size) / (bigramsA.size + bigramsB.size);
|
||
|
||
return similarity;
|
||
}
|
||
|
||
/**
|
||
* 生成2-gram集合
|
||
*/
|
||
private getBigrams(str: string): Set<string> {
|
||
const bigrams = new Set<string>();
|
||
for (let i = 0; i < str.length - 1; i++) {
|
||
bigrams.add(str.substring(i, i + 2));
|
||
}
|
||
return bigrams;
|
||
}
|
||
|
||
/**
|
||
* 计算冲突严重程度
|
||
*/
|
||
private calculateSeverity(conflictCount: number, totalFields: number): 'low' | 'medium' | 'high' {
|
||
const conflictRate = conflictCount / totalFields;
|
||
|
||
if (conflictRate === 0) return 'low';
|
||
if (conflictRate <= 0.3) return 'low'; // ≤30%
|
||
if (conflictRate <= 0.6) return 'medium'; // 30%-60%
|
||
return 'high'; // >60%
|
||
}
|
||
|
||
/**
|
||
* 批量检测冲突
|
||
*
|
||
* @param items 提取记录数组
|
||
* @returns 冲突统计
|
||
*/
|
||
batchDetect(items: Array<{ resultA: Record<string, string>; resultB: Record<string, string> }>): {
|
||
totalCount: number;
|
||
cleanCount: number;
|
||
conflictCount: number;
|
||
severityDistribution: Record<'low' | 'medium' | 'high', number>;
|
||
} {
|
||
let cleanCount = 0;
|
||
let conflictCount = 0;
|
||
const severityDistribution = { low: 0, medium: 0, high: 0 };
|
||
|
||
for (const item of items) {
|
||
const result = this.detectConflict(item.resultA, item.resultB);
|
||
|
||
if (result.hasConflict) {
|
||
conflictCount++;
|
||
severityDistribution[result.severity]++;
|
||
} else {
|
||
cleanCount++;
|
||
}
|
||
}
|
||
|
||
return {
|
||
totalCount: items.length,
|
||
cleanCount,
|
||
conflictCount,
|
||
severityDistribution
|
||
};
|
||
}
|
||
}
|
||
|
||
// 导出单例
|
||
export const conflictDetectionService = new ConflictDetectionService();
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|