AIclinicalresearch/backend/src/modules/asl/common/llm/LLM12FieldsService.ts

import { logger } from '../../../../common/logging/index.js';
import { LLMFactory } from '../../../../common/llm/adapters/LLMFactory.js';
import { ILLMAdapter, ModelType } from '../../../../common/llm/adapters/types.js';
import { cache } from '../../../../common/cache/index.js';
import { PromptBuilder, PICOSContext, DEFAULT_MVP_CONFIG } from './PromptBuilder.js';
import { ExtractionClient } from '../../../../common/document/ExtractionClient.js';
import { calculateTokens } from '../utils/tokenCalculator.js';
import { jsonrepair } from 'jsonrepair';
import * as crypto from 'crypto';

/**
 * 模型名称映射：从用户友好的名称映射到内部ModelType
 * 与标题摘要初筛保持一致
 */
const MODEL_NAME_MAP: Record<string, ModelType> = {
  'deepseek-chat': 'deepseek-v3',
  'deepseek-v3': 'deepseek-v3',
  'qwen-max': 'qwen3-72b',        // ⭐ qwen-max = Qwen最新最强模型
  'qwen-plus': 'qwen3-72b',       // qwen-plus = Qwen2.5-72B (次选)
  'qwen3-72b': 'qwen3-72b',
  'qwen-long': 'qwen-long',
  'gpt-4o': 'gpt-5',              // ⭐ gpt-4o 映射到 gpt-5
  'gpt-5-pro': 'gpt-5',
  'gpt-5': 'gpt-5',
  'claude-sonnet-4.5': 'claude-4.5',  // ⭐ claude-sonnet-4.5 映射
  'claude-sonnet-4-5-20250929': 'claude-4.5',
  'claude-4.5': 'claude-4.5',
};

/**
 * LLM处理模式
 */
export enum LLM12FieldsMode {
  SCREENING = '12fields-screening', // 评估模式（全文复筛）
  EXTRACTION = '12fields-extraction', // 提取模式（全文提取，未来）
}

/**
 * LLM处理结果
 */
export interface LLMResult {
  result: any; // 解析后的JSON结果
  processingTime: number; // 处理时间（毫秒）
  tokenUsage: number; // Token使用量
  cost: number; // 成本（人民币）
  extractionMethod: string; // 'nougat' | 'pymupdf'
  structuredFormat: boolean; // 是否为结构化格式（Markdown）
  rawResponse: string; // 原始响应（用于调试）
}

/**
 * Nougat提取选项
 */
interface NougatExtractionOptions {
  preferNougat: boolean; // 是否优先使用Nougat（英文论文）
  nougatQualityThreshold: number; // Nougat质量阈值（0.0-1.0，低于此值降级到PyMuPDF）
}

/**
 * LLM 12字段处理服务
 *
 * 功能：
 * 1. 全文提取（Nougat优先）
 * 2. Prompt动态组装
 * 3. LLM调用（支持DeepSeek-V3、Qwen3-Max等）
 * 4. 结果缓存
 * 5. 双模型并行调用
 */
export class LLM12FieldsService {
  private promptBuilder: PromptBuilder;
  private extractionClient: ExtractionClient;
  private nougatOptions: NougatExtractionOptions;

  constructor(options?: {
    promptBuilder?: PromptBuilder;
    extractionClient?: ExtractionClient;
    nougatOptions?: Partial<NougatExtractionOptions>;
  }) {
    this.promptBuilder = options?.promptBuilder || new PromptBuilder();
    this.extractionClient = options?.extractionClient || new ExtractionClient();
    this.nougatOptions = {
      preferNougat: true,
      nougatQualityThreshold: 0.8,
      ...options?.nougatOptions,
    };
  }

  /**
   * 处理12字段（screening or extraction）
   *
   * 策略：全文一次性输入，通过Prompt工程优化
   */
  async process12Fields(
    mode: LLM12FieldsMode,
    model: string, // 'deepseek-v3' | 'qwen-max' | 'deepseek-chat' 等用户友好名称
    pdfBuffer: Buffer,
    filename: string,
    picosContext: PICOSContext
  ): Promise<LLMResult> {
    const startTime = Date.now();
    logger.info(`Starting 12-fields processing with model: ${model}, mode: ${mode}`);

    // 映射模型名称到ModelType
    const modelType = MODEL_NAME_MAP[model];
    if (!modelType) {
      throw new Error(
        `Unsupported model name: ${model}. Supported models: ${Object.keys(MODEL_NAME_MAP).join(', ')}`
      );
    }

    // Step 1: 提取全文（Nougat优先）
    const { fullTextMarkdown, extractionMethod, structuredFormat } =
      await this.extractFullTextStructured(pdfBuffer, filename);

    logger.info(
      `Full-text extracted, method: ${extractionMethod}, structured: ${structuredFormat}, length: ${fullTextMarkdown.length} chars`
    );

    // Step 2: 检查缓存
    const cacheKey = this.generateCacheKey(mode, model, fullTextMarkdown, picosContext);
    const cached = await this.checkCache(cacheKey);
    if (cached) {
      logger.info('Cache hit, returning cached result');
      return cached;
    }

    // Step 3: 构建Prompt
    const { systemPrompt, userPrompt } = await this.promptBuilder.buildFullPrompt({
      picosContext,
      fullTextContent: fullTextMarkdown,
      documentFormat: structuredFormat ? 'markdown' : 'plaintext',
      estimatedWordCount: Math.floor(fullTextMarkdown.length / 1.5), // 粗略估算字数
      modelName: model,
      includeCochraneStandards: DEFAULT_MVP_CONFIG.cochraneStandards,
      includeFewShotExamples: DEFAULT_MVP_CONFIG.fewShotExamples,
    });

    logger.info(
      `Prompt built, system: ${systemPrompt.length} chars, user: ${userPrompt.length} chars`
    );

    // Step 4: 调用LLM
    const llmAdapter = LLMFactory.getAdapter(modelType);
    const llmResponse = await this.callLLMWithRetry(
      llmAdapter,
      systemPrompt,
      userPrompt,
      mode
    );

    // Step 5: 解析结果
    const parsedResult = this.parseResponse(llmResponse);

    // Step 6: 计算Token和成本
    const tokenUsage = calculateTokens(systemPrompt + userPrompt + llmResponse);
    const cost = this.calculateCost(model, tokenUsage);

    const result: LLMResult = {
      result: parsedResult,
      processingTime: Date.now() - startTime,
      tokenUsage,
      cost,
      extractionMethod,
      structuredFormat,
      rawResponse: llmResponse,
    };

    // Step 7: 缓存结果
    await this.cacheResult(cacheKey, result);

    logger.info(
      `12-fields processing completed, time: ${result.processingTime}ms, tokens: ${tokenUsage}, cost: ¥${cost.toFixed(4)}`
    );

    return result;
  }

  /**
   * 双模型并行调用（容错版本）
   *
   * 使用Promise.allSettled确保单个模型失败不影响另一个
   *
   * 容错策略：
   * - 双模型成功：正常返回
   * - 单模型失败：返回成功的模型结果，标记降级模式
   * - 双模型失败：抛出异常
   */
  async processDualModels(
    mode: LLM12FieldsMode,
    modelA: string = 'deepseek-v3',
    modelB: string = 'qwen-max',
    pdfBuffer: Buffer,
    filename: string,
    picosContext: PICOSContext
  ): Promise<{
    resultA: LLMResult | null;
    resultB: LLMResult | null;
    degradedMode: boolean;
    failedModel?: string;
  }> {
    logger.info(`Starting dual-model processing: ${modelA} + ${modelB}`);

    // 使用allSettled确保一个失败不影响另一个
    const [settledA, settledB] = await Promise.allSettled([
      this.process12Fields(mode, modelA, pdfBuffer, filename, picosContext),
      this.process12Fields(mode, modelB, pdfBuffer, filename, picosContext),
    ]);

    // 提取结果
    const resultA = settledA.status === 'fulfilled' ? settledA.value : null;
    const resultB = settledB.status === 'fulfilled' ? settledB.value : null;

    // ========================================
    // 容错逻辑
    // ========================================

    // 情况1：双模型都失败 ❌
    if (!resultA && !resultB) {
      const errorA = settledA.status === 'rejected' ? settledA.reason : 'unknown';
      const errorB = settledB.status === 'rejected' ? settledB.reason : 'unknown';

      logger.error('Both models failed', {
        modelA,
        modelB,
        errorA: errorA?.message || String(errorA),
        errorB: errorB?.message || String(errorB)
      });

      throw new Error(
        `Both models (${modelA} and ${modelB}) failed to process. ` +
        `${modelA} error: ${errorA?.message || errorA}. ` +
        `${modelB} error: ${errorB?.message || errorB}.`
      );
    }

    // 情况2：模型A失败，使用模型B ⚠️
    if (!resultA && resultB) {
      const errorA = settledA.status === 'rejected' ? settledA.reason : 'unknown';

      logger.warn(`Model ${modelA} failed, using ${modelB} only (degraded mode)`, {
        failedModel: modelA,
        error: errorA?.message || String(errorA),
        successModelCost: resultB.cost
      });

      return {
        resultA: null,
        resultB,
        degradedMode: true,
        failedModel: modelA
      };
    }

    // 情况3：模型B失败，使用模型A ⚠️
    if (resultA && !resultB) {
      const errorB = settledB.status === 'rejected' ? settledB.reason : 'unknown';

      logger.warn(`Model ${modelB} failed, using ${modelA} only (degraded mode)`, {
        failedModel: modelB,
        error: errorB?.message || String(errorB),
        successModelCost: resultA.cost
      });

      return {
        resultA,
        resultB: null,
        degradedMode: true,
        failedModel: modelB
      };
    }

    // 情况4：双模型都成功 ✅
    logger.info(
      `Dual-model processing completed successfully, total cost: ¥${(resultA!.cost + resultB!.cost).toFixed(4)}`
    );

    return {
      resultA,
      resultB,
      degradedMode: false
    };
  }

  /**
   * 提取全文（Nougat优先策略）
   */
  private async extractFullTextStructured(
    pdfBuffer: Buffer,
    filename: string
  ): Promise<{
    fullTextMarkdown: string;
    extractionMethod: 'nougat' | 'pymupdf';
    structuredFormat: boolean;
  }> {
    logger.info('Extracting full-text with Nougat-first strategy...');

    // Step 1: 检测语言（通过Python microservice）
    // 注意：这里简化了，实际可能需要先用PyMuPDF提取少量文本检测语言
    // 为了性能，我们直接尝试Nougat，失败则降级

    // Step 2: 优先尝试Nougat（英文论文效果最好）
    if (this.nougatOptions.preferNougat) {
      try {
        const nougatResult = await this.extractionClient.extractPdf(pdfBuffer, filename);

        // 检查Nougat质量
        if (
          nougatResult.method === 'nougat' &&
          (nougatResult.quality || 0) >= this.nougatOptions.nougatQualityThreshold
        ) {
          logger.info('✅ Using Nougat extraction (structured Markdown)');
          return {
            fullTextMarkdown: nougatResult.text,
            extractionMethod: 'nougat',
            structuredFormat: true, // Nougat输出Markdown
          };
        } else {
          logger.warn(
            `⚠️ Nougat quality too low (${nougatResult.quality}), falling back to PyMuPDF`
          );
        }
      } catch (error) {
        logger.warn(`⚠️ Nougat extraction failed: ${(error as Error).message}, falling back to PyMuPDF`);
      }
    }

    // Step 3: 降级使用PyMuPDF
    logger.info('Using PyMuPDF extraction (plaintext)');
    try {
      const pymupdfResult = await this.extractionClient.extractPdf(pdfBuffer, filename);

      return {
        fullTextMarkdown: pymupdfResult.text,
        extractionMethod: 'pymupdf',
        structuredFormat: false, // PyMuPDF输出纯文本
      };
    } catch (error) {
      // Step 4: 最后的fallback - 直接使用Buffer内容（测试模式）
      logger.warn(`⚠️ PyMuPDF extraction also failed: ${(error as Error).message}, using buffer content directly`);

      const textContent = pdfBuffer.toString('utf-8');
      logger.info('✅ Using buffer content as plain text (test mode)');

      return {
        fullTextMarkdown: textContent,
        extractionMethod: 'pymupdf', // 标记为pymupdf以保持一致性
        structuredFormat: false,
      };
    }
  }

  /**
   * 调用LLM（带重试）
   */
  private async callLLMWithRetry(
    adapter: ILLMAdapter,
    systemPrompt: string,
    userPrompt: string,
    _mode: LLM12FieldsMode,
    maxRetries: number = 2
  ): Promise<string> {
    let lastError: Error | null = null;

    for (let attempt = 0; attempt <= maxRetries; attempt++) {
      try {
        logger.info(`LLM call attempt ${attempt + 1}/${maxRetries + 1}`);

        const response = await adapter.chat(
          [
            { role: 'system', content: systemPrompt },
            { role: 'user', content: userPrompt },
          ],
          {
            temperature: 0.1, // 低温度，提高一致性
            maxTokens: 8000, // 足够输出12字段+处理日志
          }
        );

        return response.content ?? '';
      } catch (error) {
        lastError = error as Error;
        logger.error(`LLM call attempt ${attempt + 1} failed: ${(error as Error).message}`);

        if (attempt < maxRetries) {
          // 指数退避
          const waitTime = Math.pow(2, attempt) * 1000;
          logger.info(`Retrying in ${waitTime}ms...`);
          await new Promise((resolve) => setTimeout(resolve, waitTime));
        }
      }
    }

    throw new Error(`LLM call failed after ${maxRetries + 1} attempts: ${lastError?.message}`);
  }

  /**
   * 解析LLM响应（3层容错策略）
   *
   * Layer 1: 严格JSON解析
   * Layer 2: JSON自动修复（jsonrepair）
   * Layer 3: 提取代码块并解析
   */
  private parseResponse(response: string): any {
    // ========================================
    // Layer 1: 严格JSON解析
    // ========================================
    try {
      const result = JSON.parse(response);
      logger.info('JSON parsed successfully (Layer 1: strict)');
      return result;
    } catch (layer1Error) {
      logger.warn('Layer 1 failed: strict JSON parsing failed, trying Layer 2...');
    }

    // ========================================
    // Layer 2: JSON自动修复
    // ========================================
    try {
      const repaired = jsonrepair(response);
      const result = JSON.parse(repaired);

      logger.warn('JSON auto-repaired (Layer 2)', {
        originalLength: response.length,
        repairedLength: repaired.length,
        message: 'LLM output had format issues, auto-repaired successfully'
      });

      return result;
    } catch (layer2Error) {
      logger.warn('Layer 2 failed: JSON repair failed, trying Layer 3...');
    }

    // ========================================
    // Layer 3: 提取代码块
    // ========================================
    let layer3Error: Error | null = null;
    try {
      // 匹配多种代码块格式
      const patterns = [
        /```json\s*\n([\s\S]*?)\n```/,   // ```json ... ```
        /```\s*\n([\s\S]*?)\n```/,       // ``` ... ```
        /\{[\s\S]*\}/,                   // 直接匹配 {...}
      ];

      for (const pattern of patterns) {
        const match = response.match(pattern);
        if (match) {
          const extracted = match[1] || match[0];

          // 先尝试严格解析提取的内容
          try {
            const result = JSON.parse(extracted);
            logger.warn('JSON extracted from code block (Layer 3)', {
              pattern: pattern.source,
              message: 'LLM wrapped JSON in code block'
            });
            return result;
          } catch {
            // 尝试修复提取的内容
            const repaired = jsonrepair(extracted);
            const result = JSON.parse(repaired);
            logger.warn('JSON extracted and repaired (Layer 3)', {
              pattern: pattern.source,
              message: 'LLM wrapped JSON in code block with format issues'
            });
            return result;
          }
        }
      }

      throw new Error('No valid JSON found in response');
    } catch (error) {
      layer3Error = error as Error;
      logger.error('All 3 layers failed to parse JSON');
    }

    // ========================================
    // 最终失败：记录详细错误
    // ========================================
    const err = layer3Error || new Error('Unknown parsing error');
    logger.error('Failed to parse LLM response after all 3 layers', {
      error: err.message,
      responsePreview: response.substring(0, 500),
      responseLength: response.length
    });

    throw new Error(
      `Invalid JSON response from LLM after 3 parsing attempts: ${err.message}. ` +
      `Please check logs for response preview.`
    );
  }

  /**
   * 生成缓存Key
   */
  private generateCacheKey(
    mode: LLM12FieldsMode,
    model: string,
    fullText: string,
    picosContext: PICOSContext
  ): string {
    const hash = crypto
      .createHash('sha256')
      .update(fullText + JSON.stringify(picosContext))
      .digest('hex')
      .substring(0, 16);

    return `llm:${mode}:${model}:${hash}`;
  }

  /**
   * 检查缓存
   */
  private async checkCache(cacheKey: string): Promise<LLMResult | null> {
    try {
      const cached = await cache.get(cacheKey);
      return cached ? JSON.parse(cached) : null;
    } catch (error) {
      logger.warn(`Cache check failed: ${(error as Error).message}`);
      return null;
    }
  }

  /**
   * 缓存结果
   */
  private async cacheResult(cacheKey: string, result: LLMResult): Promise<void> {
    try {
      // 缓存1小时
      await cache.set(cacheKey, JSON.stringify(result), 3600);
      logger.info(`Result cached with key: ${cacheKey}`);
    } catch (error) {
      logger.warn(`Cache set failed: ${(error as Error).message}`);
    }
  }

  /**
   * 计算成本（人民币）
   */
  private calculateCost(model: string, tokenUsage: number): number {
    // 成本表（人民币/1K tokens）
    const COST_TABLE: Record<string, number> = {
      'deepseek-v3': 0.001, // ¥0.001/1K tokens
      'qwen-max': 0.004, // ¥0.004/1K tokens
      'qwen-plus': 0.002, // ¥0.002/1K tokens
      'qwen-turbo': 0.0008, // ¥0.0008/1K tokens
      'gpt-4o': 0.03, // $0.005/1K tokens ≈ ¥0.03/1K tokens
      'claude-3.5-sonnet': 0.02, // $0.003/1K tokens ≈ ¥0.02/1K tokens
    };

    const costPerK = COST_TABLE[model] || 0.01; // 默认值
    return (tokenUsage / 1000) * costPerK;
  }
}

/**
 * 创建LLM12FieldsService单例
 */
export const llm12FieldsService = new LLM12FieldsService();