import { logger } from '../../../../common/logging/index.js'; import { LLMFactory } from '../../../../common/llm/adapters/LLMFactory.js'; import { ILLMAdapter, ModelType } from '../../../../common/llm/adapters/types.js'; import { cache } from '../../../../common/cache/index.js'; import { PromptBuilder, PICOSContext, DEFAULT_MVP_CONFIG } from './PromptBuilder.js'; import { ExtractionClient } from '../../../../common/document/ExtractionClient.js'; import { calculateTokens } from '../utils/tokenCalculator.js'; import { jsonrepair } from 'jsonrepair'; import * as crypto from 'crypto'; /** * 模型名称映射:从用户友好的名称映射到内部ModelType * 与标题摘要初筛保持一致 */ const MODEL_NAME_MAP: Record = { 'deepseek-chat': 'deepseek-v3', 'deepseek-v3': 'deepseek-v3', 'qwen-max': 'qwen3-72b', // ⭐ qwen-max = Qwen最新最强模型 'qwen-plus': 'qwen3-72b', // qwen-plus = Qwen2.5-72B (次选) 'qwen3-72b': 'qwen3-72b', 'qwen-long': 'qwen-long', 'gpt-4o': 'gpt-5', // ⭐ gpt-4o 映射到 gpt-5 'gpt-5-pro': 'gpt-5', 'gpt-5': 'gpt-5', 'claude-sonnet-4.5': 'claude-4.5', // ⭐ claude-sonnet-4.5 映射 'claude-sonnet-4-5-20250929': 'claude-4.5', 'claude-4.5': 'claude-4.5', }; /** * LLM处理模式 */ export enum LLM12FieldsMode { SCREENING = '12fields-screening', // 评估模式(全文复筛) EXTRACTION = '12fields-extraction', // 提取模式(全文提取,未来) } /** * LLM处理结果 */ export interface LLMResult { result: any; // 解析后的JSON结果 processingTime: number; // 处理时间(毫秒) tokenUsage: number; // Token使用量 cost: number; // 成本(人民币) extractionMethod: string; // 'nougat' | 'pymupdf' structuredFormat: boolean; // 是否为结构化格式(Markdown) rawResponse: string; // 原始响应(用于调试) } /** * Nougat提取选项 */ interface NougatExtractionOptions { preferNougat: boolean; // 是否优先使用Nougat(英文论文) nougatQualityThreshold: number; // Nougat质量阈值(0.0-1.0,低于此值降级到PyMuPDF) } /** * LLM 12字段处理服务 * * 功能: * 1. 全文提取(Nougat优先) * 2. Prompt动态组装 * 3. LLM调用(支持DeepSeek-V3、Qwen3-Max等) * 4. 结果缓存 * 5. 双模型并行调用 */ export class LLM12FieldsService { private promptBuilder: PromptBuilder; private extractionClient: ExtractionClient; private nougatOptions: NougatExtractionOptions; constructor(options?: { promptBuilder?: PromptBuilder; extractionClient?: ExtractionClient; nougatOptions?: Partial; }) { this.promptBuilder = options?.promptBuilder || new PromptBuilder(); this.extractionClient = options?.extractionClient || new ExtractionClient(); this.nougatOptions = { preferNougat: true, nougatQualityThreshold: 0.8, ...options?.nougatOptions, }; } /** * 处理12字段(screening or extraction) * * 策略:全文一次性输入,通过Prompt工程优化 */ async process12Fields( mode: LLM12FieldsMode, model: string, // 'deepseek-v3' | 'qwen-max' | 'deepseek-chat' 等用户友好名称 pdfBuffer: Buffer, filename: string, picosContext: PICOSContext ): Promise { const startTime = Date.now(); logger.info(`Starting 12-fields processing with model: ${model}, mode: ${mode}`); // 映射模型名称到ModelType const modelType = MODEL_NAME_MAP[model]; if (!modelType) { throw new Error( `Unsupported model name: ${model}. Supported models: ${Object.keys(MODEL_NAME_MAP).join(', ')}` ); } // Step 1: 提取全文(Nougat优先) const { fullTextMarkdown, extractionMethod, structuredFormat } = await this.extractFullTextStructured(pdfBuffer, filename); logger.info( `Full-text extracted, method: ${extractionMethod}, structured: ${structuredFormat}, length: ${fullTextMarkdown.length} chars` ); // Step 2: 检查缓存 const cacheKey = this.generateCacheKey(mode, model, fullTextMarkdown, picosContext); const cached = await this.checkCache(cacheKey); if (cached) { logger.info('Cache hit, returning cached result'); return cached; } // Step 3: 构建Prompt const { systemPrompt, userPrompt } = await this.promptBuilder.buildFullPrompt({ picosContext, fullTextContent: fullTextMarkdown, documentFormat: structuredFormat ? 'markdown' : 'plaintext', estimatedWordCount: Math.floor(fullTextMarkdown.length / 1.5), // 粗略估算字数 modelName: model, includeCochraneStandards: DEFAULT_MVP_CONFIG.cochraneStandards, includeFewShotExamples: DEFAULT_MVP_CONFIG.fewShotExamples, }); logger.info( `Prompt built, system: ${systemPrompt.length} chars, user: ${userPrompt.length} chars` ); // Step 4: 调用LLM const llmAdapter = LLMFactory.getAdapter(modelType); const llmResponse = await this.callLLMWithRetry( llmAdapter, systemPrompt, userPrompt, mode ); // Step 5: 解析结果 const parsedResult = this.parseResponse(llmResponse); // Step 6: 计算Token和成本 const tokenUsage = calculateTokens(systemPrompt + userPrompt + llmResponse); const cost = this.calculateCost(model, tokenUsage); const result: LLMResult = { result: parsedResult, processingTime: Date.now() - startTime, tokenUsage, cost, extractionMethod, structuredFormat, rawResponse: llmResponse, }; // Step 7: 缓存结果 await this.cacheResult(cacheKey, result); logger.info( `12-fields processing completed, time: ${result.processingTime}ms, tokens: ${tokenUsage}, cost: ¥${cost.toFixed(4)}` ); return result; } /** * 双模型并行调用(容错版本) * * 使用Promise.allSettled确保单个模型失败不影响另一个 * * 容错策略: * - 双模型成功:正常返回 * - 单模型失败:返回成功的模型结果,标记降级模式 * - 双模型失败:抛出异常 */ async processDualModels( mode: LLM12FieldsMode, modelA: string = 'deepseek-v3', modelB: string = 'qwen-max', pdfBuffer: Buffer, filename: string, picosContext: PICOSContext ): Promise<{ resultA: LLMResult | null; resultB: LLMResult | null; degradedMode: boolean; failedModel?: string; }> { logger.info(`Starting dual-model processing: ${modelA} + ${modelB}`); // 使用allSettled确保一个失败不影响另一个 const [settledA, settledB] = await Promise.allSettled([ this.process12Fields(mode, modelA, pdfBuffer, filename, picosContext), this.process12Fields(mode, modelB, pdfBuffer, filename, picosContext), ]); // 提取结果 const resultA = settledA.status === 'fulfilled' ? settledA.value : null; const resultB = settledB.status === 'fulfilled' ? settledB.value : null; // ======================================== // 容错逻辑 // ======================================== // 情况1:双模型都失败 ❌ if (!resultA && !resultB) { const errorA = settledA.status === 'rejected' ? settledA.reason : 'unknown'; const errorB = settledB.status === 'rejected' ? settledB.reason : 'unknown'; logger.error('Both models failed', { modelA, modelB, errorA: errorA?.message || String(errorA), errorB: errorB?.message || String(errorB) }); throw new Error( `Both models (${modelA} and ${modelB}) failed to process. ` + `${modelA} error: ${errorA?.message || errorA}. ` + `${modelB} error: ${errorB?.message || errorB}.` ); } // 情况2:模型A失败,使用模型B ⚠️ if (!resultA && resultB) { const errorA = settledA.status === 'rejected' ? settledA.reason : 'unknown'; logger.warn(`Model ${modelA} failed, using ${modelB} only (degraded mode)`, { failedModel: modelA, error: errorA?.message || String(errorA), successModelCost: resultB.cost }); return { resultA: null, resultB, degradedMode: true, failedModel: modelA }; } // 情况3:模型B失败,使用模型A ⚠️ if (resultA && !resultB) { const errorB = settledB.status === 'rejected' ? settledB.reason : 'unknown'; logger.warn(`Model ${modelB} failed, using ${modelA} only (degraded mode)`, { failedModel: modelB, error: errorB?.message || String(errorB), successModelCost: resultA.cost }); return { resultA, resultB: null, degradedMode: true, failedModel: modelB }; } // 情况4:双模型都成功 ✅ logger.info( `Dual-model processing completed successfully, total cost: ¥${(resultA!.cost + resultB!.cost).toFixed(4)}` ); return { resultA, resultB, degradedMode: false }; } /** * 提取全文(Nougat优先策略) */ private async extractFullTextStructured( pdfBuffer: Buffer, filename: string ): Promise<{ fullTextMarkdown: string; extractionMethod: 'nougat' | 'pymupdf'; structuredFormat: boolean; }> { logger.info('Extracting full-text with Nougat-first strategy...'); // Step 1: 检测语言(通过Python microservice) // 注意:这里简化了,实际可能需要先用PyMuPDF提取少量文本检测语言 // 为了性能,我们直接尝试Nougat,失败则降级 // Step 2: 优先尝试Nougat(英文论文效果最好) if (this.nougatOptions.preferNougat) { try { const nougatResult = await this.extractionClient.extractPdf(pdfBuffer, filename); // 检查Nougat质量 if ( nougatResult.method === 'nougat' && (nougatResult.quality || 0) >= this.nougatOptions.nougatQualityThreshold ) { logger.info('✅ Using Nougat extraction (structured Markdown)'); return { fullTextMarkdown: nougatResult.text, extractionMethod: 'nougat', structuredFormat: true, // Nougat输出Markdown }; } else { logger.warn( `⚠️ Nougat quality too low (${nougatResult.quality}), falling back to PyMuPDF` ); } } catch (error) { logger.warn(`⚠️ Nougat extraction failed: ${(error as Error).message}, falling back to PyMuPDF`); } } // Step 3: 降级使用PyMuPDF logger.info('Using PyMuPDF extraction (plaintext)'); try { const pymupdfResult = await this.extractionClient.extractPdf(pdfBuffer, filename); return { fullTextMarkdown: pymupdfResult.text, extractionMethod: 'pymupdf', structuredFormat: false, // PyMuPDF输出纯文本 }; } catch (error) { // Step 4: 最后的fallback - 直接使用Buffer内容(测试模式) logger.warn(`⚠️ PyMuPDF extraction also failed: ${(error as Error).message}, using buffer content directly`); const textContent = pdfBuffer.toString('utf-8'); logger.info('✅ Using buffer content as plain text (test mode)'); return { fullTextMarkdown: textContent, extractionMethod: 'pymupdf', // 标记为pymupdf以保持一致性 structuredFormat: false, }; } } /** * 调用LLM(带重试) */ private async callLLMWithRetry( adapter: ILLMAdapter, systemPrompt: string, userPrompt: string, _mode: LLM12FieldsMode, maxRetries: number = 2 ): Promise { let lastError: Error | null = null; for (let attempt = 0; attempt <= maxRetries; attempt++) { try { logger.info(`LLM call attempt ${attempt + 1}/${maxRetries + 1}`); const response = await adapter.chat( [ { role: 'system', content: systemPrompt }, { role: 'user', content: userPrompt }, ], { temperature: 0.1, // 低温度,提高一致性 maxTokens: 8000, // 足够输出12字段+处理日志 } ); return response.content ?? ''; } catch (error) { lastError = error as Error; logger.error(`LLM call attempt ${attempt + 1} failed: ${(error as Error).message}`); if (attempt < maxRetries) { // 指数退避 const waitTime = Math.pow(2, attempt) * 1000; logger.info(`Retrying in ${waitTime}ms...`); await new Promise((resolve) => setTimeout(resolve, waitTime)); } } } throw new Error(`LLM call failed after ${maxRetries + 1} attempts: ${lastError?.message}`); } /** * 解析LLM响应(3层容错策略) * * Layer 1: 严格JSON解析 * Layer 2: JSON自动修复(jsonrepair) * Layer 3: 提取代码块并解析 */ private parseResponse(response: string): any { // ======================================== // Layer 1: 严格JSON解析 // ======================================== try { const result = JSON.parse(response); logger.info('JSON parsed successfully (Layer 1: strict)'); return result; } catch (layer1Error) { logger.warn('Layer 1 failed: strict JSON parsing failed, trying Layer 2...'); } // ======================================== // Layer 2: JSON自动修复 // ======================================== try { const repaired = jsonrepair(response); const result = JSON.parse(repaired); logger.warn('JSON auto-repaired (Layer 2)', { originalLength: response.length, repairedLength: repaired.length, message: 'LLM output had format issues, auto-repaired successfully' }); return result; } catch (layer2Error) { logger.warn('Layer 2 failed: JSON repair failed, trying Layer 3...'); } // ======================================== // Layer 3: 提取代码块 // ======================================== let layer3Error: Error | null = null; try { // 匹配多种代码块格式 const patterns = [ /```json\s*\n([\s\S]*?)\n```/, // ```json ... ``` /```\s*\n([\s\S]*?)\n```/, // ``` ... ``` /\{[\s\S]*\}/, // 直接匹配 {...} ]; for (const pattern of patterns) { const match = response.match(pattern); if (match) { const extracted = match[1] || match[0]; // 先尝试严格解析提取的内容 try { const result = JSON.parse(extracted); logger.warn('JSON extracted from code block (Layer 3)', { pattern: pattern.source, message: 'LLM wrapped JSON in code block' }); return result; } catch { // 尝试修复提取的内容 const repaired = jsonrepair(extracted); const result = JSON.parse(repaired); logger.warn('JSON extracted and repaired (Layer 3)', { pattern: pattern.source, message: 'LLM wrapped JSON in code block with format issues' }); return result; } } } throw new Error('No valid JSON found in response'); } catch (error) { layer3Error = error as Error; logger.error('All 3 layers failed to parse JSON'); } // ======================================== // 最终失败:记录详细错误 // ======================================== const err = layer3Error || new Error('Unknown parsing error'); logger.error('Failed to parse LLM response after all 3 layers', { error: err.message, responsePreview: response.substring(0, 500), responseLength: response.length }); throw new Error( `Invalid JSON response from LLM after 3 parsing attempts: ${err.message}. ` + `Please check logs for response preview.` ); } /** * 生成缓存Key */ private generateCacheKey( mode: LLM12FieldsMode, model: string, fullText: string, picosContext: PICOSContext ): string { const hash = crypto .createHash('sha256') .update(fullText + JSON.stringify(picosContext)) .digest('hex') .substring(0, 16); return `llm:${mode}:${model}:${hash}`; } /** * 检查缓存 */ private async checkCache(cacheKey: string): Promise { try { const cached = await cache.get(cacheKey); return cached ? JSON.parse(cached) : null; } catch (error) { logger.warn(`Cache check failed: ${(error as Error).message}`); return null; } } /** * 缓存结果 */ private async cacheResult(cacheKey: string, result: LLMResult): Promise { try { // 缓存1小时 await cache.set(cacheKey, JSON.stringify(result), 3600); logger.info(`Result cached with key: ${cacheKey}`); } catch (error) { logger.warn(`Cache set failed: ${(error as Error).message}`); } } /** * 计算成本(人民币) */ private calculateCost(model: string, tokenUsage: number): number { // 成本表(人民币/1K tokens) const COST_TABLE: Record = { 'deepseek-v3': 0.001, // ¥0.001/1K tokens 'qwen-max': 0.004, // ¥0.004/1K tokens 'qwen-plus': 0.002, // ¥0.002/1K tokens 'qwen-turbo': 0.0008, // ¥0.0008/1K tokens 'gpt-4o': 0.03, // $0.005/1K tokens ≈ ¥0.03/1K tokens 'claude-3.5-sonnet': 0.02, // $0.003/1K tokens ≈ ¥0.02/1K tokens }; const costPerK = COST_TABLE[model] || 0.01; // 默认值 return (tokenUsage / 1000) * costPerK; } } /** * 创建LLM12FieldsService单例 */ export const llm12FieldsService = new LLM12FieldsService();