AIclinicalresearch/backend/src/modules/dc/tool-b/services/ConflictDetectionService.ts

/**
 * DC模块 - 冲突检测服务
 *
 * 功能：
 * - 比较双模型提取结果
 * - 标记冲突字段
 * - 计算冲突严重程度
 * - 生成冲突报告
 *
 * 平台能力复用：
 * - ✅ logger: 日志记录
 */

import { logger } from '../../../../common/logging/index.js';

export interface ConflictResult {
  hasConflict: boolean;
  conflictFields: string[];
  conflictDetails: Array<{
    fieldName: string;
    valueA: string;
    valueB: string;
    similarity: number; // 0-1, 相似度
  }>;
  severity: 'low' | 'medium' | 'high';
}

export class ConflictDetectionService {
  /**
   * 检测冲突
   *
   * @param resultA DeepSeek结果
   * @param resultB Qwen结果
   * @returns 冲突分析结果
   */
  detectConflict(resultA: Record<string, string>, resultB: Record<string, string>): ConflictResult {
    try {
      logger.info('[Conflict] Starting conflict detection');

      const conflictFields: string[] = [];
      const conflictDetails: ConflictResult['conflictDetails'] = [];

      // 获取所有字段
      const allFields = new Set([...Object.keys(resultA), ...Object.keys(resultB)]);

      // 逐字段比较
      for (const field of allFields) {
        const valueA = resultA[field] || '';
        const valueB = resultB[field] || '';

        // 归一化后比较
        const normalizedA = this.normalize(valueA);
        const normalizedB = this.normalize(valueB);

        if (normalizedA !== normalizedB) {
          // 检测到冲突
          const similarity = this.calculateSimilarity(normalizedA, normalizedB);

          conflictFields.push(field);
          conflictDetails.push({
            fieldName: field,
            valueA,
            valueB,
            similarity
          });
        }
      }

      // 计算严重程度
      const severity = this.calculateSeverity(conflictFields.length, allFields.size);

      const result: ConflictResult = {
        hasConflict: conflictFields.length > 0,
        conflictFields,
        conflictDetails,
        severity
      };

      logger.info('[Conflict] Detection completed', {
        hasConflict: result.hasConflict,
        conflictCount: conflictFields.length,
        severity
      });

      return result;

    } catch (error) {
      logger.error('[Conflict] Detection failed', { error });
      throw error;
    }
  }

  /**
   * 归一化文本
   *
   * - 去除空格
   * - 转小写
   * - 半角化
   * - 数值归一化（3cm = 3.0cm = 3 cm）
   */
  private normalize(value: string): string {
    let normalized = String(value)
      .toLowerCase()
      .trim()
      .replace(/\s+/g, '') // 去除所有空格
      .replace(/[，。；：！？]/g, (match) => { // 全角转半角
        return {
          '，': ',',
          '。': '.',
          '；': ';',
          '：': ':',
          '！': '!',
          '？': '?'
        }[match] || match;
      });

    // 数值归一化：提取数字
    const numberMatch = normalized.match(/(\d+\.?\d*)\s*(cm|mm|kg|mg|ml|%)?/);
    if (numberMatch) {
      const num = parseFloat(numberMatch[1]);
      const unit = numberMatch[2] || '';
      normalized = `${num}${unit}`;
    }

    return normalized;
  }

  /**
   * 计算文本相似度（Dice Coefficient）
   *
   * 范围：0-1，1表示完全相同
   */
  private calculateSimilarity(a: string, b: string): number {
    if (a === b) return 1;
    if (!a || !b) return 0;

    // 生成2-gram
    const bigramsA = this.getBigrams(a);
    const bigramsB = this.getBigrams(b);

    if (bigramsA.size === 0 && bigramsB.size === 0) return 1;
    if (bigramsA.size === 0 || bigramsB.size === 0) return 0;

    // 计算交集
    const intersection = new Set([...bigramsA].filter(x => bigramsB.has(x)));

    // Dice系数：2 * |A ∩ B| / (|A| + |B|)
    const similarity = (2 * intersection.size) / (bigramsA.size + bigramsB.size);

    return similarity;
  }

  /**
   * 生成2-gram集合
   */
  private getBigrams(str: string): Set<string> {
    const bigrams = new Set<string>();
    for (let i = 0; i < str.length - 1; i++) {
      bigrams.add(str.substring(i, i + 2));
    }
    return bigrams;
  }

  /**
   * 计算冲突严重程度
   */
  private calculateSeverity(conflictCount: number, totalFields: number): 'low' | 'medium' | 'high' {
    const conflictRate = conflictCount / totalFields;

    if (conflictRate === 0) return 'low';
    if (conflictRate <= 0.3) return 'low'; // ≤30%
    if (conflictRate <= 0.6) return 'medium'; // 30%-60%
    return 'high'; // >60%
  }

  /**
   * 批量检测冲突
   *
   * @param items 提取记录数组
   * @returns 冲突统计
   */
  batchDetect(items: Array<{ resultA: Record<string, string>; resultB: Record<string, string> }>): {
    totalCount: number;
    cleanCount: number;
    conflictCount: number;
    severityDistribution: Record<'low' | 'medium' | 'high', number>;
  } {
    let cleanCount = 0;
    let conflictCount = 0;
    const severityDistribution = { low: 0, medium: 0, high: 0 };

    for (const item of items) {
      const result = this.detectConflict(item.resultA, item.resultB);

      if (result.hasConflict) {
        conflictCount++;
        severityDistribution[result.severity]++;
      } else {
        cleanCount++;
      }
    }

    return {
      totalCount: items.length,
      cleanCount,
      conflictCount,
      severityDistribution
    };
  }
}

// 导出单例
export const conflictDetectionService = new ConflictDetectionService();