/** * RVW稿件审查模块 - 方法学评估服务 * @module rvw/services/methodologyService * * Phase 3.5.5 改造:使用 PromptService 替代文件读取 * - 支持灰度预览(调试者看 DRAFT,普通用户看 ACTIVE) * - 三级容灾(数据库→缓存→兜底) */ import { LLMFactory } from '../../../common/llm/adapters/LLMFactory.js'; import { ModelType } from '../../../common/llm/adapters/types.js'; import { logger } from '../../../common/logging/index.js'; import { createHash } from 'crypto'; import { prisma } from '../../../config/database.js'; import { getPromptService } from '../../../common/prompt/index.js'; import { MethodologyCheckpoint, MethodologyIssue, MethodologyPart, MethodologyReview } from '../types/index.js'; import { parseJSONFromLLMResponse } from './utils.js'; import { composeRvwSystemPrompt, getRvwProtocol } from './promptProtocols.js'; const METHODOLOGY_CONCLUSIONS = ['直接接收', '小修', '大修', '拒稿'] as const; type MethodologyConclusion = typeof METHODOLOGY_CONCLUSIONS[number]; const METHODOLOGY_CHECKPOINT_ITEMS = [ '设计类型界定', '纳入/排除标准', '样本代表性', '对照组设置', '干预与观察细节', '效应指标选择', '设计要素完整性', '样本量估算', '质控与伦理', '基础参数明示', '分布特征', '多因素调整', '缺失值处理', '一致性检查', '前提条件检验', '多重比较校正', '统计量规范性', '效应量表达', '逻辑一致性', '图表准确性', ] as const; const METHODOLOGY_CHECKPOINT_STATUSES = ['pass', 'minor_issue', 'major_issue', 'not_mentioned'] as const; type MethodologyCheckpointStatus = typeof METHODOLOGY_CHECKPOINT_STATUSES[number]; type SectionKey = 'A' | 'B' | 'C'; interface MethodologySectionDef { key: SectionKey; part: string; start: number; end: number; } interface SectionReviewResult { part: string; score: number; issues: MethodologyIssue[]; checkpoints: MethodologyCheckpoint[]; } const METHODOLOGY_SECTION_DEFS: MethodologySectionDef[] = [ { key: 'A', part: '科研设计评估', start: 1, end: 9 }, { key: 'B', part: '统计学方法描述评估', start: 10, end: 14 }, { key: 'C', part: '统计分析与结果评估', start: 15, end: 20 }, ]; function inferConclusionFromScore(score: number): MethodologyConclusion { if (score >= 90) return '直接接收'; if (score >= 75) return '小修'; if (score >= 60) return '大修'; return '拒稿'; } function getCheckpointItemsBySection(section: MethodologySectionDef): Array<{ id: number; item: string }> { const result: Array<{ id: number; item: string }> = []; for (let id = section.start; id <= section.end; id += 1) { result.push({ id, item: METHODOLOGY_CHECKPOINT_ITEMS[id - 1] }); } return result; } function buildSectionProtocol(section: MethodologySectionDef): string { const checkpointLines = getCheckpointItemsBySection(section) .map(cp => `${cp.id}. ${cp.item}`) .join('\n'); return `【系统输出协议(分治子任务-${section.key},研发固化)】 请严格仅输出 JSON(不要 Markdown、不要代码块、不要解释文字),结构如下: { "part": "${section.part}", "score": 0, "issues": [ { "type": "问题类型", "severity": "major", "description": "问题描述", "location": "位置(如:方法学第2段)", "suggestion": "可执行修改建议" } ], "checkpoints": [ { "id": ${section.start}, "item": "${METHODOLOGY_CHECKPOINT_ITEMS[section.start - 1]}", "status": "major_issue", "finding": "该检查点发现", "suggestion": "可执行建议" } ] } 约束: 1) 仅评估本子任务范围:id ${section.start}-${section.end} 2) checkpoints 必须严格覆盖本范围全部 id(不可缺失、不可越界) 3) checkpoints[].status 只能是 "pass" | "minor_issue" | "major_issue" | "not_mentioned" 4) score 必须是 0-100 数字 5) issues 为该分项问题清单,无问题时返回 [] 本子任务检查点如下: ${checkpointLines}`; } function isValidSectionReview(result: unknown, section: MethodologySectionDef): result is SectionReviewResult { if (!result || typeof result !== 'object') return false; const data = result as Record; if (!Array.isArray(data.issues)) return false; if (!Array.isArray(data.checkpoints)) return false; if (typeof data.score !== 'number') return false; if (typeof data.part !== 'string') return false; const checkpoints = data.checkpoints as unknown[]; const ids = checkpoints .map(cp => (cp && typeof cp === 'object' ? Number((cp as Record).id) : NaN)) .filter(id => Number.isInteger(id)); const expected = new Set(Array.from({ length: section.end - section.start + 1 }, (_, i) => section.start + i)); return ids.some(id => expected.has(id)); } function normalizeMethodologyIssues(input: unknown): MethodologyIssue[] { if (!Array.isArray(input)) return []; return input .filter(row => row && typeof row === 'object') .map((row) => { const issue = row as Record; return { type: typeof issue.type === 'string' && issue.type.trim() ? issue.type.trim() : '未分类问题', severity: issue.severity === 'major' ? 'major' : 'minor', description: typeof issue.description === 'string' && issue.description.trim() ? issue.description.trim() : '未提供详细描述', location: typeof issue.location === 'string' && issue.location.trim() ? issue.location.trim() : '未标注', suggestion: typeof issue.suggestion === 'string' && issue.suggestion.trim() ? issue.suggestion.trim() : '请补充可执行修改建议', }; }); } function normalizeSectionReview(raw: SectionReviewResult, section: MethodologySectionDef): SectionReviewResult { const issues = normalizeMethodologyIssues(raw.issues); const score = Math.max(0, Math.min(100, Math.round(raw.score))); const checkpointMap = new Map(); if (Array.isArray(raw.checkpoints)) { for (const cp of raw.checkpoints) { const id = Number(cp.id); if (!Number.isInteger(id) || id < section.start || id > section.end) continue; const status = typeof cp.status === 'string' && METHODOLOGY_CHECKPOINT_STATUSES.includes(cp.status as MethodologyCheckpointStatus) ? cp.status as MethodologyCheckpointStatus : 'not_mentioned'; checkpointMap.set(id, { id, item: typeof cp.item === 'string' && cp.item.trim() ? cp.item.trim() : METHODOLOGY_CHECKPOINT_ITEMS[id - 1], status, finding: typeof cp.finding === 'string' && cp.finding.trim() ? cp.finding.trim() : '该检查点未被充分展开', suggestion: typeof cp.suggestion === 'string' && cp.suggestion.trim() ? cp.suggestion.trim() : undefined, }); } } const checkpoints: MethodologyCheckpoint[] = []; for (let id = section.start; id <= section.end; id += 1) { checkpoints.push( checkpointMap.get(id) ?? { id, item: METHODOLOGY_CHECKPOINT_ITEMS[id - 1], status: 'not_mentioned', finding: '该检查点未被模型明确覆盖,请人工复核。', } ); } return { part: section.part, score, issues, checkpoints, }; } function buildSectionFallback(section: MethodologySectionDef, reason: string): SectionReviewResult { const checkpoints: MethodologyCheckpoint[] = []; for (let id = section.start; id <= section.end; id += 1) { checkpoints.push({ id, item: METHODOLOGY_CHECKPOINT_ITEMS[id - 1], status: 'not_mentioned', finding: `分段评估失败:${reason}`, suggestion: '建议重试或人工复核该检查点。', }); } return { part: section.part, score: 60, issues: [{ type: '执行降级', severity: 'minor', description: `该分段评估未正常完成(${reason})`, location: '系统执行层', suggestion: '建议重试方法学评估任务或查看后端日志。', }], checkpoints, }; } function isValidMethodologyReview(result: unknown): result is MethodologyReview { if (!result || typeof result !== 'object') return false; const data = result as Record; if (typeof data.overall_score !== 'number') return false; if (!Array.isArray(data.parts)) return false; if (typeof data.summary !== 'string') return false; if (data.conclusion != null && typeof data.conclusion !== 'string') return false; if (data.checkpoints != null && !Array.isArray(data.checkpoints)) return false; return true; } function normalizeMethodologyCheckpoints(input: unknown): MethodologyCheckpoint[] { const normalizedMap = new Map(); if (Array.isArray(input)) { for (const cp of input) { if (!cp || typeof cp !== 'object') continue; const row = cp as Record; const id = typeof row.id === 'number' ? row.id : Number(row.id); if (!Number.isInteger(id) || id < 1 || id > 20) continue; const status = typeof row.status === 'string' && METHODOLOGY_CHECKPOINT_STATUSES.includes(row.status as MethodologyCheckpointStatus) ? row.status as MethodologyCheckpointStatus : 'not_mentioned'; const item = typeof row.item === 'string' && row.item.trim() ? row.item.trim() : METHODOLOGY_CHECKPOINT_ITEMS[id - 1]; const finding = typeof row.finding === 'string' && row.finding.trim() ? row.finding.trim() : '该检查点未被充分展开'; const suggestion = typeof row.suggestion === 'string' && row.suggestion.trim() ? row.suggestion.trim() : undefined; normalizedMap.set(id, { id, item, status, finding, suggestion }); } } return METHODOLOGY_CHECKPOINT_ITEMS.map((item, idx) => { const id = idx + 1; return normalizedMap.get(id) ?? { id, item, status: 'not_mentioned', finding: '该检查点未被模型明确覆盖,请人工复核。', }; }); } function normalizeMethodologyReview(result: MethodologyReview): MethodologyReview { const conclusion = (result.conclusion && METHODOLOGY_CONCLUSIONS.includes(result.conclusion as MethodologyConclusion)) ? result.conclusion : undefined; const checkpoints = normalizeMethodologyCheckpoints(result.checkpoints); const missingCount = checkpoints.filter(cp => cp.status === 'not_mentioned').length; if (missingCount > 0) { logger.warn('[RVW:Methodology] 20项检查点覆盖不完整', { missingCount }); } return { ...result, conclusion, checkpoints, }; } function aggregateMethodologySections(sections: SectionReviewResult[]): MethodologyReview { const parts: MethodologyPart[] = sections.map(section => ({ part: section.part, score: section.score, issues: section.issues, })); const checkpoints = normalizeMethodologyCheckpoints(sections.flatMap(section => section.checkpoints)); const validScores = parts.map(part => part.score).filter(score => Number.isFinite(score)); const overall_score = validScores.length > 0 ? Math.round(validScores.reduce((sum, score) => sum + score, 0) / validScores.length) : 60; const majorCount = checkpoints.filter(cp => cp.status === 'major_issue').length; const minorCount = checkpoints.filter(cp => cp.status === 'minor_issue').length; const uncoveredCount = checkpoints.filter(cp => cp.status === 'not_mentioned').length; const topFindings = checkpoints .filter(cp => cp.status === 'major_issue' || cp.status === 'minor_issue') .slice(0, 3) .map(cp => `${cp.id}.${cp.item}`) .join(';'); const summary = majorCount + minorCount === 0 ? '方法学20项检查点未发现明确缺陷,整体统计学规范性较好。' : `方法学评估发现 ${majorCount} 个严重问题、${minorCount} 个一般问题${topFindings ? `,重点涉及:${topFindings}` : ''}。`; let conclusion: MethodologyConclusion = inferConclusionFromScore(overall_score); if (majorCount >= 8) conclusion = '拒稿'; else if (majorCount >= 4 || uncoveredCount >= 4) conclusion = '大修'; else if (majorCount >= 1 || minorCount >= 3 || uncoveredCount > 0) conclusion = '小修'; return normalizeMethodologyReview({ overall_score, summary, conclusion, checkpoints, parts, }); } async function reviewMethodologySection( llmAdapter: ReturnType, businessPrompt: string, text: string, section: MethodologySectionDef ): Promise { const messages = [ { role: 'system' as const, content: `${businessPrompt}\n\n${buildSectionProtocol(section)}` }, { role: 'user' as const, content: `请仅评估“${section.part}”(检查点 ${section.start}-${section.end}),并按协议返回 JSON。\n\n稿件内容如下:\n${text}` }, ]; const response = await llmAdapter.chat(messages, { temperature: 0.2, maxTokens: 2800, }); const content = response.content ?? ''; try { const parsed = parseJSONFromLLMResponse(content); if (!isValidSectionReview(parsed, section)) { throw new Error('section json invalid'); } return normalizeSectionReview(parsed, section); } catch { const repairMessages = [ { role: 'system' as const, content: `你是 JSON 结构化助手。把输入文本转成目标 JSON。\n\n${buildSectionProtocol(section)}`, }, { role: 'user' as const, content: `请将以下方法学评估文本重组为目标 JSON(仅检查点 ${section.start}-${section.end}):\n\n${content}`, }, ]; const repaired = await llmAdapter.chat(repairMessages, { temperature: 0.1, maxTokens: 1800, }); const repairedContent = repaired.content ?? ''; const repairedParsed = parseJSONFromLLMResponse(repairedContent); if (!isValidSectionReview(repairedParsed, section)) { throw new Error('section repair invalid'); } return normalizeSectionReview(repairedParsed, section); } } async function reviewMethodologyLegacy( businessPrompt: string, text: string, modelType: ModelType ): Promise { const llmAdapter = LLMFactory.getAdapter(modelType); const messages = [ { role: 'system' as const, content: composeRvwSystemPrompt('methodology', businessPrompt) }, { role: 'user' as const, content: `请对以下稿件进行方法学评估。\n\n稿件内容如下:\n${text}` }, ]; const response = await llmAdapter.chat(messages, { temperature: 0.3, maxTokens: 5000, }); const methContent = response.content ?? ''; try { const result = parseJSONFromLLMResponse(methContent); if (!isValidMethodologyReview(result)) throw new Error('invalid json'); return normalizeMethodologyReview(result); } catch { return repairMethodologyToJson(methContent, modelType); } } async function repairMethodologyToJson( rawContent: string, modelType: ModelType ): Promise { logger.warn('[RVW:Methodology] 首次解析失败,尝试 LLM 结构化修复'); const llmAdapter = LLMFactory.getAdapter(modelType); const repairMessages = [ { role: 'system' as const, content: `你是 JSON 结构化助手。你的唯一任务是把输入文本转换成目标 JSON。\n\n${getRvwProtocol('methodology')}`, }, { role: 'user' as const, content: `请将以下“方法学评估文本”重组为目标 JSON。\n\n${rawContent}`, }, ]; const repaired = await llmAdapter.chat(repairMessages, { temperature: 0.1, maxTokens: 4000, }); const repairedContent = repaired.content ?? ''; const parsed = parseJSONFromLLMResponse(repairedContent); if (!isValidMethodologyReview(parsed)) { throw new Error('方法学评估结果结构化修复失败(JSON字段不完整)'); } return normalizeMethodologyReview(parsed); } /** * 方法学评估 * @param text 稿件文本 * @param modelType 模型类型 * @param userId 用户ID(用于灰度预览判断) * @returns 评估结果 */ export async function reviewMethodology( text: string, modelType: ModelType = 'deepseek-v3', userId?: string ): Promise { try { // 1. 从 PromptService 获取系统Prompt(支持灰度预览) const promptService = getPromptService(prisma); const { content: businessPrompt, isDraft, version } = await promptService.get( 'RVW_METHODOLOGY', {}, { userId } ); const promptFingerprint = createHash('sha1').update(businessPrompt).digest('hex').slice(0, 12); logger.info('[RVW:Methodology] Prompt 已加载', { userId, isDraft, version, promptFingerprint, }); const llmAdapter = LLMFactory.getAdapter(modelType); logger.info('[RVW:Methodology] 开始分治并行评估', { modelType, sections: METHODOLOGY_SECTION_DEFS.map(section => `${section.part}(${section.start}-${section.end})`), }); const settled = await Promise.allSettled( METHODOLOGY_SECTION_DEFS.map(section => reviewMethodologySection(llmAdapter, businessPrompt, text, section) ) ); const sectionResults: SectionReviewResult[] = []; let fulfilledCount = 0; for (let i = 0; i < settled.length; i += 1) { const outcome = settled[i]; const section = METHODOLOGY_SECTION_DEFS[i]; if (outcome.status === 'fulfilled') { fulfilledCount += 1; sectionResults.push(outcome.value); } else { const reason = outcome.reason instanceof Error ? outcome.reason.message : String(outcome.reason); logger.warn('[RVW:Methodology] 分段评估失败,使用降级结果', { section: section.part, reason, }); sectionResults.push(buildSectionFallback(section, reason)); } } if (fulfilledCount === 0) { logger.warn('[RVW:Methodology] 分治并行全部失败,回退 legacy 模式'); return await reviewMethodologyLegacy(businessPrompt, text, modelType); } const merged = aggregateMethodologySections(sectionResults); logger.info('[RVW:Methodology] 分治评估完成', { fulfilledSections: fulfilledCount, overallScore: merged.overall_score, conclusion: merged.conclusion, missingCheckpoints: merged.checkpoints?.filter(cp => cp.status === 'not_mentioned').length ?? 0, }); return merged; } catch (error) { logger.error('[RVW:Methodology] 方法学评估失败', { error: error instanceof Error ? error.message : 'Unknown error', stack: error instanceof Error ? error.stack : undefined, }); throw new Error(`方法学评估失败: ${error instanceof Error ? error.message : 'Unknown error'}`); } }