Files
AIclinicalresearch/backend/src/modules/dc/tool-b/services/ConflictDetectionService.ts
HaHafeng 2481b786d8 deploy: Complete 0126-27 deployment - database upgrade, services update, code recovery
Major Changes:
- Database: Install pg_bigm/pgvector plugins, create test database
- Python service: v1.0 -> v1.1, add pymupdf4llm/openpyxl/pypandoc
- Node.js backend: v1.3 -> v1.7, fix pino-pretty and ES Module imports
- Frontend: v1.2 -> v1.3, skip TypeScript check for deployment
- Code recovery: Restore empty files from local backup

Technical Fixes:
- Fix pino-pretty error in production (conditional loading)
- Fix ES Module import paths (add .js extensions)
- Fix OSSAdapter TypeScript errors
- Update Prisma Schema (63 models, 16 schemas)
- Update environment variables (DATABASE_URL, EXTRACTION_SERVICE_URL, OSS)
- Remove deprecated variables (REDIS_URL, DIFY_API_URL, DIFY_API_KEY)

Documentation:
- Create 0126 deployment folder with 8 documents
- Update database development standards v2.0
- Update SAE deployment status records

Deployment Status:
- PostgreSQL: ai_clinical_research_test with plugins
- Python: v1.1 @ 172.17.173.84:8000
- Backend: v1.7 @ 172.17.173.89:3001
- Frontend: v1.3 @ 172.17.173.90:80

Tested: All services running successfully on SAE
2026-01-27 08:13:27 +08:00

294 lines
5.6 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* DC模块 - 冲突检测服务
*
* 功能:
* - 比较双模型提取结果
* - 标记冲突字段
* - 计算冲突严重程度
* - 生成冲突报告
*
* 平台能力复用:
* - ✅ logger: 日志记录
*/
import { logger } from '../../../../common/logging/index.js';
export interface ConflictResult {
hasConflict: boolean;
conflictFields: string[];
conflictDetails: Array<{
fieldName: string;
valueA: string;
valueB: string;
similarity: number; // 0-1, 相似度
}>;
severity: 'low' | 'medium' | 'high';
}
export class ConflictDetectionService {
/**
* 检测冲突
*
* @param resultA DeepSeek结果
* @param resultB Qwen结果
* @returns 冲突分析结果
*/
detectConflict(resultA: Record<string, string>, resultB: Record<string, string>): ConflictResult {
try {
logger.info('[Conflict] Starting conflict detection');
const conflictFields: string[] = [];
const conflictDetails: ConflictResult['conflictDetails'] = [];
// 获取所有字段
const allFields = new Set([...Object.keys(resultA), ...Object.keys(resultB)]);
// 逐字段比较
for (const field of allFields) {
const valueA = resultA[field] || '';
const valueB = resultB[field] || '';
// 归一化后比较
const normalizedA = this.normalize(valueA);
const normalizedB = this.normalize(valueB);
if (normalizedA !== normalizedB) {
// 检测到冲突
const similarity = this.calculateSimilarity(normalizedA, normalizedB);
conflictFields.push(field);
conflictDetails.push({
fieldName: field,
valueA,
valueB,
similarity
});
}
}
// 计算严重程度
const severity = this.calculateSeverity(conflictFields.length, allFields.size);
const result: ConflictResult = {
hasConflict: conflictFields.length > 0,
conflictFields,
conflictDetails,
severity
};
logger.info('[Conflict] Detection completed', {
hasConflict: result.hasConflict,
conflictCount: conflictFields.length,
severity
});
return result;
} catch (error) {
logger.error('[Conflict] Detection failed', { error });
throw error;
}
}
/**
* 归一化文本
*
* - 去除空格
* - 转小写
* - 半角化
* - 数值归一化3cm = 3.0cm = 3 cm
*/
private normalize(value: string): string {
let normalized = String(value)
.toLowerCase()
.trim()
.replace(/\s+/g, '') // 去除所有空格
.replace(/[,。;:!?]/g, (match) => { // 全角转半角
return {
'': ',',
'。': '.',
'': ';',
'': ':',
'': '!',
'': '?'
}[match] || match;
});
// 数值归一化:提取数字
const numberMatch = normalized.match(/(\d+\.?\d*)\s*(cm|mm|kg|mg|ml|%)?/);
if (numberMatch) {
const num = parseFloat(numberMatch[1]);
const unit = numberMatch[2] || '';
normalized = `${num}${unit}`;
}
return normalized;
}
/**
* 计算文本相似度Dice Coefficient
*
* 范围0-11表示完全相同
*/
private calculateSimilarity(a: string, b: string): number {
if (a === b) return 1;
if (!a || !b) return 0;
// 生成2-gram
const bigramsA = this.getBigrams(a);
const bigramsB = this.getBigrams(b);
if (bigramsA.size === 0 && bigramsB.size === 0) return 1;
if (bigramsA.size === 0 || bigramsB.size === 0) return 0;
// 计算交集
const intersection = new Set([...bigramsA].filter(x => bigramsB.has(x)));
// Dice系数2 * |A ∩ B| / (|A| + |B|)
const similarity = (2 * intersection.size) / (bigramsA.size + bigramsB.size);
return similarity;
}
/**
* 生成2-gram集合
*/
private getBigrams(str: string): Set<string> {
const bigrams = new Set<string>();
for (let i = 0; i < str.length - 1; i++) {
bigrams.add(str.substring(i, i + 2));
}
return bigrams;
}
/**
* 计算冲突严重程度
*/
private calculateSeverity(conflictCount: number, totalFields: number): 'low' | 'medium' | 'high' {
const conflictRate = conflictCount / totalFields;
if (conflictRate === 0) return 'low';
if (conflictRate <= 0.3) return 'low'; // ≤30%
if (conflictRate <= 0.6) return 'medium'; // 30%-60%
return 'high'; // >60%
}
/**
* 批量检测冲突
*
* @param items 提取记录数组
* @returns 冲突统计
*/
batchDetect(items: Array<{ resultA: Record<string, string>; resultB: Record<string, string> }>): {
totalCount: number;
cleanCount: number;
conflictCount: number;
severityDistribution: Record<'low' | 'medium' | 'high', number>;
} {
let cleanCount = 0;
let conflictCount = 0;
const severityDistribution = { low: 0, medium: 0, high: 0 };
for (const item of items) {
const result = this.detectConflict(item.resultA, item.resultB);
if (result.hasConflict) {
conflictCount++;
severityDistribution[result.severity]++;
} else {
cleanCount++;
}
}
return {
totalCount: items.length,
cleanCount,
conflictCount,
severityDistribution
};
}
}
// 导出单例
export const conflictDetectionService = new ConflictDetectionService();