Files
AIclinicalresearch/backend/src/modules/dc/tool-b/services/ConflictDetectionService.ts
HaHafeng 74cf346453 feat(dc/tool-c): Add missing value imputation feature with 6 methods and MICE
Major features:
1. Missing value imputation (6 simple methods + MICE):
   - Mean/Median/Mode/Constant imputation
   - Forward fill (ffill) and Backward fill (bfill) for time series
   - MICE multivariate imputation (in progress, shape issue to fix)

2. Auto precision detection:
   - Automatically match decimal places of original data
   - Prevent false precision (e.g. 13.57 instead of 13.566716417910449)

3. Categorical variable detection:
   - Auto-detect and skip categorical columns in MICE
   - Show warnings for unsuitable columns
   - Suggest mode imputation for categorical data

4. UI improvements:
   - Rename button: "Delete Missing" to "Missing Value Handling"
   - Remove standalone "Dedup" and "MICE" buttons
   - 3-tab dialog: Delete / Fill / Advanced Fill
   - Display column statistics and recommended methods
   - Extended warning messages (8 seconds for skipped columns)

5. Bug fixes:
   - Fix sessionService.updateSessionData -> saveProcessedData
   - Fix OperationResult interface (add message and stats)
   - Fix Toolbar button labels and removal

Modified files:
Python: operations/fillna.py (new, 556 lines), main.py (3 new endpoints)
Backend: QuickActionService.ts, QuickActionController.ts, routes/index.ts
Frontend: MissingValueDialog.tsx (new, 437 lines), Toolbar.tsx, index.tsx
Tests: test_fillna_operations.py (774 lines), test scripts and docs
Docs: 5 documentation files updated

Known issues:
- MICE imputation has DataFrame shape mismatch issue (under debugging)
- Workaround: Use 6 simple imputation methods first

Status: Development complete, MICE debugging in progress
Lines added: ~2000 lines across 3 tiers
2025-12-10 13:06:00 +08:00

229 lines
5.5 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* DC模块 - 冲突检测服务
*
* 功能:
* - 比较双模型提取结果
* - 标记冲突字段
* - 计算冲突严重程度
* - 生成冲突报告
*
* 平台能力复用:
* - ✅ logger: 日志记录
*/
import { logger } from '../../../../common/logging/index.js';
export interface ConflictResult {
hasConflict: boolean;
conflictFields: string[];
conflictDetails: Array<{
fieldName: string;
valueA: string;
valueB: string;
similarity: number; // 0-1, 相似度
}>;
severity: 'low' | 'medium' | 'high';
}
export class ConflictDetectionService {
/**
* 检测冲突
*
* @param resultA DeepSeek结果
* @param resultB Qwen结果
* @returns 冲突分析结果
*/
detectConflict(resultA: Record<string, string>, resultB: Record<string, string>): ConflictResult {
try {
logger.info('[Conflict] Starting conflict detection');
const conflictFields: string[] = [];
const conflictDetails: ConflictResult['conflictDetails'] = [];
// 获取所有字段
const allFields = new Set([...Object.keys(resultA), ...Object.keys(resultB)]);
// 逐字段比较
for (const field of allFields) {
const valueA = resultA[field] || '';
const valueB = resultB[field] || '';
// 归一化后比较
const normalizedA = this.normalize(valueA);
const normalizedB = this.normalize(valueB);
if (normalizedA !== normalizedB) {
// 检测到冲突
const similarity = this.calculateSimilarity(normalizedA, normalizedB);
conflictFields.push(field);
conflictDetails.push({
fieldName: field,
valueA,
valueB,
similarity
});
}
}
// 计算严重程度
const severity = this.calculateSeverity(conflictFields.length, allFields.size);
const result: ConflictResult = {
hasConflict: conflictFields.length > 0,
conflictFields,
conflictDetails,
severity
};
logger.info('[Conflict] Detection completed', {
hasConflict: result.hasConflict,
conflictCount: conflictFields.length,
severity
});
return result;
} catch (error) {
logger.error('[Conflict] Detection failed', { error });
throw error;
}
}
/**
* 归一化文本
*
* - 去除空格
* - 转小写
* - 半角化
* - 数值归一化3cm = 3.0cm = 3 cm
*/
private normalize(value: string): string {
let normalized = String(value)
.toLowerCase()
.trim()
.replace(/\s+/g, '') // 去除所有空格
.replace(/[,。;:!?]/g, (match) => { // 全角转半角
return {
'': ',',
'。': '.',
'': ';',
'': ':',
'': '!',
'': '?'
}[match] || match;
});
// 数值归一化:提取数字
const numberMatch = normalized.match(/(\d+\.?\d*)\s*(cm|mm|kg|mg|ml|%)?/);
if (numberMatch) {
const num = parseFloat(numberMatch[1]);
const unit = numberMatch[2] || '';
normalized = `${num}${unit}`;
}
return normalized;
}
/**
* 计算文本相似度Dice Coefficient
*
* 范围0-11表示完全相同
*/
private calculateSimilarity(a: string, b: string): number {
if (a === b) return 1;
if (!a || !b) return 0;
// 生成2-gram
const bigramsA = this.getBigrams(a);
const bigramsB = this.getBigrams(b);
if (bigramsA.size === 0 && bigramsB.size === 0) return 1;
if (bigramsA.size === 0 || bigramsB.size === 0) return 0;
// 计算交集
const intersection = new Set([...bigramsA].filter(x => bigramsB.has(x)));
// Dice系数2 * |A ∩ B| / (|A| + |B|)
const similarity = (2 * intersection.size) / (bigramsA.size + bigramsB.size);
return similarity;
}
/**
* 生成2-gram集合
*/
private getBigrams(str: string): Set<string> {
const bigrams = new Set<string>();
for (let i = 0; i < str.length - 1; i++) {
bigrams.add(str.substring(i, i + 2));
}
return bigrams;
}
/**
* 计算冲突严重程度
*/
private calculateSeverity(conflictCount: number, totalFields: number): 'low' | 'medium' | 'high' {
const conflictRate = conflictCount / totalFields;
if (conflictRate === 0) return 'low';
if (conflictRate <= 0.3) return 'low'; // ≤30%
if (conflictRate <= 0.6) return 'medium'; // 30%-60%
return 'high'; // >60%
}
/**
* 批量检测冲突
*
* @param items 提取记录数组
* @returns 冲突统计
*/
batchDetect(items: Array<{ resultA: Record<string, string>; resultB: Record<string, string> }>): {
totalCount: number;
cleanCount: number;
conflictCount: number;
severityDistribution: Record<'low' | 'medium' | 'high', number>;
} {
let cleanCount = 0;
let conflictCount = 0;
const severityDistribution = { low: 0, medium: 0, high: 0 };
for (const item of items) {
const result = this.detectConflict(item.resultA, item.resultB);
if (result.hasConflict) {
conflictCount++;
severityDistribution[result.severity]++;
} else {
cleanCount++;
}
}
return {
totalCount: items.length,
cleanCount,
conflictCount,
severityDistribution
};
}
}
// 导出单例
export const conflictDetectionService = new ConflictDetectionService();