Files
AIclinicalresearch/backend/src/modules/rvw/services/methodologyService.ts
HaHafeng ba464082cb feat(core): finalize rvw stability updates and pending module changes
Summary:
- Harden RVW prompt protocol handling and methodology review flow with 20-checkpoint coverage, divide-and-conquer execution, and timeout tuning
- Update RVW frontend methodology report rendering to show real structured outputs and grouped checkpoint sections
- Include pending backend/frontend updates across IIT admin, SSA, extraction forensics, and related integration files
- Sync system and RVW status documentation, deployment checklist, and RVW architecture/plan docs

Validation:
- Verified lint diagnostics for touched RVW backend/frontend files show no new errors
- Kept backup dump files and local test artifacts untracked

Made-with: Cursor
2026-03-14 00:00:04 +08:00

507 lines
19 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* RVW稿件审查模块 - 方法学评估服务
* @module rvw/services/methodologyService
*
* Phase 3.5.5 改造:使用 PromptService 替代文件读取
* - 支持灰度预览(调试者看 DRAFT普通用户看 ACTIVE
* - 三级容灾(数据库→缓存→兜底)
*/
import { LLMFactory } from '../../../common/llm/adapters/LLMFactory.js';
import { ModelType } from '../../../common/llm/adapters/types.js';
import { logger } from '../../../common/logging/index.js';
import { createHash } from 'crypto';
import { prisma } from '../../../config/database.js';
import { getPromptService } from '../../../common/prompt/index.js';
import { MethodologyCheckpoint, MethodologyIssue, MethodologyPart, MethodologyReview } from '../types/index.js';
import { parseJSONFromLLMResponse } from './utils.js';
import { composeRvwSystemPrompt, getRvwProtocol } from './promptProtocols.js';
const METHODOLOGY_CONCLUSIONS = ['直接接收', '小修', '大修', '拒稿'] as const;
type MethodologyConclusion = typeof METHODOLOGY_CONCLUSIONS[number];
const METHODOLOGY_CHECKPOINT_ITEMS = [
'设计类型界定',
'纳入/排除标准',
'样本代表性',
'对照组设置',
'干预与观察细节',
'效应指标选择',
'设计要素完整性',
'样本量估算',
'质控与伦理',
'基础参数明示',
'分布特征',
'多因素调整',
'缺失值处理',
'一致性检查',
'前提条件检验',
'多重比较校正',
'统计量规范性',
'效应量表达',
'逻辑一致性',
'图表准确性',
] as const;
const METHODOLOGY_CHECKPOINT_STATUSES = ['pass', 'minor_issue', 'major_issue', 'not_mentioned'] as const;
type MethodologyCheckpointStatus = typeof METHODOLOGY_CHECKPOINT_STATUSES[number];
type SectionKey = 'A' | 'B' | 'C';
interface MethodologySectionDef {
key: SectionKey;
part: string;
start: number;
end: number;
}
interface SectionReviewResult {
part: string;
score: number;
issues: MethodologyIssue[];
checkpoints: MethodologyCheckpoint[];
}
const METHODOLOGY_SECTION_DEFS: MethodologySectionDef[] = [
{ key: 'A', part: '科研设计评估', start: 1, end: 9 },
{ key: 'B', part: '统计学方法描述评估', start: 10, end: 14 },
{ key: 'C', part: '统计分析与结果评估', start: 15, end: 20 },
];
function inferConclusionFromScore(score: number): MethodologyConclusion {
if (score >= 90) return '直接接收';
if (score >= 75) return '小修';
if (score >= 60) return '大修';
return '拒稿';
}
function getCheckpointItemsBySection(section: MethodologySectionDef): Array<{ id: number; item: string }> {
const result: Array<{ id: number; item: string }> = [];
for (let id = section.start; id <= section.end; id += 1) {
result.push({ id, item: METHODOLOGY_CHECKPOINT_ITEMS[id - 1] });
}
return result;
}
function buildSectionProtocol(section: MethodologySectionDef): string {
const checkpointLines = getCheckpointItemsBySection(section)
.map(cp => `${cp.id}. ${cp.item}`)
.join('\n');
return `【系统输出协议(分治子任务-${section.key},研发固化)】
请严格仅输出 JSON不要 Markdown、不要代码块、不要解释文字结构如下
{
"part": "${section.part}",
"score": 0,
"issues": [
{
"type": "问题类型",
"severity": "major",
"description": "问题描述",
"location": "位置方法学第2段",
"suggestion": "可执行修改建议"
}
],
"checkpoints": [
{
"id": ${section.start},
"item": "${METHODOLOGY_CHECKPOINT_ITEMS[section.start - 1]}",
"status": "major_issue",
"finding": "该检查点发现",
"suggestion": "可执行建议"
}
]
}
约束:
1) 仅评估本子任务范围id ${section.start}-${section.end}
2) checkpoints 必须严格覆盖本范围全部 id不可缺失、不可越界
3) checkpoints[].status 只能是 "pass" | "minor_issue" | "major_issue" | "not_mentioned"
4) score 必须是 0-100 数字
5) issues 为该分项问题清单,无问题时返回 []
本子任务检查点如下:
${checkpointLines}`;
}
function isValidSectionReview(result: unknown, section: MethodologySectionDef): result is SectionReviewResult {
if (!result || typeof result !== 'object') return false;
const data = result as Record<string, unknown>;
if (!Array.isArray(data.issues)) return false;
if (!Array.isArray(data.checkpoints)) return false;
if (typeof data.score !== 'number') return false;
if (typeof data.part !== 'string') return false;
const checkpoints = data.checkpoints as unknown[];
const ids = checkpoints
.map(cp => (cp && typeof cp === 'object' ? Number((cp as Record<string, unknown>).id) : NaN))
.filter(id => Number.isInteger(id));
const expected = new Set(Array.from({ length: section.end - section.start + 1 }, (_, i) => section.start + i));
return ids.some(id => expected.has(id));
}
function normalizeMethodologyIssues(input: unknown): MethodologyIssue[] {
if (!Array.isArray(input)) return [];
return input
.filter(row => row && typeof row === 'object')
.map((row) => {
const issue = row as Record<string, unknown>;
return {
type: typeof issue.type === 'string' && issue.type.trim() ? issue.type.trim() : '未分类问题',
severity: issue.severity === 'major' ? 'major' : 'minor',
description: typeof issue.description === 'string' && issue.description.trim() ? issue.description.trim() : '未提供详细描述',
location: typeof issue.location === 'string' && issue.location.trim() ? issue.location.trim() : '未标注',
suggestion: typeof issue.suggestion === 'string' && issue.suggestion.trim() ? issue.suggestion.trim() : '请补充可执行修改建议',
};
});
}
function normalizeSectionReview(raw: SectionReviewResult, section: MethodologySectionDef): SectionReviewResult {
const issues = normalizeMethodologyIssues(raw.issues);
const score = Math.max(0, Math.min(100, Math.round(raw.score)));
const checkpointMap = new Map<number, MethodologyCheckpoint>();
if (Array.isArray(raw.checkpoints)) {
for (const cp of raw.checkpoints) {
const id = Number(cp.id);
if (!Number.isInteger(id) || id < section.start || id > section.end) continue;
const status = typeof cp.status === 'string' && METHODOLOGY_CHECKPOINT_STATUSES.includes(cp.status as MethodologyCheckpointStatus)
? cp.status as MethodologyCheckpointStatus
: 'not_mentioned';
checkpointMap.set(id, {
id,
item: typeof cp.item === 'string' && cp.item.trim() ? cp.item.trim() : METHODOLOGY_CHECKPOINT_ITEMS[id - 1],
status,
finding: typeof cp.finding === 'string' && cp.finding.trim() ? cp.finding.trim() : '该检查点未被充分展开',
suggestion: typeof cp.suggestion === 'string' && cp.suggestion.trim() ? cp.suggestion.trim() : undefined,
});
}
}
const checkpoints: MethodologyCheckpoint[] = [];
for (let id = section.start; id <= section.end; id += 1) {
checkpoints.push(
checkpointMap.get(id) ?? {
id,
item: METHODOLOGY_CHECKPOINT_ITEMS[id - 1],
status: 'not_mentioned',
finding: '该检查点未被模型明确覆盖,请人工复核。',
}
);
}
return {
part: section.part,
score,
issues,
checkpoints,
};
}
function buildSectionFallback(section: MethodologySectionDef, reason: string): SectionReviewResult {
const checkpoints: MethodologyCheckpoint[] = [];
for (let id = section.start; id <= section.end; id += 1) {
checkpoints.push({
id,
item: METHODOLOGY_CHECKPOINT_ITEMS[id - 1],
status: 'not_mentioned',
finding: `分段评估失败:${reason}`,
suggestion: '建议重试或人工复核该检查点。',
});
}
return {
part: section.part,
score: 60,
issues: [{
type: '执行降级',
severity: 'minor',
description: `该分段评估未正常完成(${reason}`,
location: '系统执行层',
suggestion: '建议重试方法学评估任务或查看后端日志。',
}],
checkpoints,
};
}
function isValidMethodologyReview(result: unknown): result is MethodologyReview {
if (!result || typeof result !== 'object') return false;
const data = result as Record<string, unknown>;
if (typeof data.overall_score !== 'number') return false;
if (!Array.isArray(data.parts)) return false;
if (typeof data.summary !== 'string') return false;
if (data.conclusion != null && typeof data.conclusion !== 'string') return false;
if (data.checkpoints != null && !Array.isArray(data.checkpoints)) return false;
return true;
}
function normalizeMethodologyCheckpoints(input: unknown): MethodologyCheckpoint[] {
const normalizedMap = new Map<number, MethodologyCheckpoint>();
if (Array.isArray(input)) {
for (const cp of input) {
if (!cp || typeof cp !== 'object') continue;
const row = cp as Record<string, unknown>;
const id = typeof row.id === 'number' ? row.id : Number(row.id);
if (!Number.isInteger(id) || id < 1 || id > 20) continue;
const status = typeof row.status === 'string' && METHODOLOGY_CHECKPOINT_STATUSES.includes(row.status as MethodologyCheckpointStatus)
? row.status as MethodologyCheckpointStatus
: 'not_mentioned';
const item = typeof row.item === 'string' && row.item.trim()
? row.item.trim()
: METHODOLOGY_CHECKPOINT_ITEMS[id - 1];
const finding = typeof row.finding === 'string' && row.finding.trim()
? row.finding.trim()
: '该检查点未被充分展开';
const suggestion = typeof row.suggestion === 'string' && row.suggestion.trim()
? row.suggestion.trim()
: undefined;
normalizedMap.set(id, { id, item, status, finding, suggestion });
}
}
return METHODOLOGY_CHECKPOINT_ITEMS.map((item, idx) => {
const id = idx + 1;
return normalizedMap.get(id) ?? {
id,
item,
status: 'not_mentioned',
finding: '该检查点未被模型明确覆盖,请人工复核。',
};
});
}
function normalizeMethodologyReview(result: MethodologyReview): MethodologyReview {
const conclusion = (result.conclusion && METHODOLOGY_CONCLUSIONS.includes(result.conclusion as MethodologyConclusion))
? result.conclusion
: undefined;
const checkpoints = normalizeMethodologyCheckpoints(result.checkpoints);
const missingCount = checkpoints.filter(cp => cp.status === 'not_mentioned').length;
if (missingCount > 0) {
logger.warn('[RVW:Methodology] 20项检查点覆盖不完整', { missingCount });
}
return {
...result,
conclusion,
checkpoints,
};
}
function aggregateMethodologySections(sections: SectionReviewResult[]): MethodologyReview {
const parts: MethodologyPart[] = sections.map(section => ({
part: section.part,
score: section.score,
issues: section.issues,
}));
const checkpoints = normalizeMethodologyCheckpoints(sections.flatMap(section => section.checkpoints));
const validScores = parts.map(part => part.score).filter(score => Number.isFinite(score));
const overall_score = validScores.length > 0
? Math.round(validScores.reduce((sum, score) => sum + score, 0) / validScores.length)
: 60;
const majorCount = checkpoints.filter(cp => cp.status === 'major_issue').length;
const minorCount = checkpoints.filter(cp => cp.status === 'minor_issue').length;
const uncoveredCount = checkpoints.filter(cp => cp.status === 'not_mentioned').length;
const topFindings = checkpoints
.filter(cp => cp.status === 'major_issue' || cp.status === 'minor_issue')
.slice(0, 3)
.map(cp => `${cp.id}.${cp.item}`)
.join('');
const summary = majorCount + minorCount === 0
? '方法学20项检查点未发现明确缺陷整体统计学规范性较好。'
: `方法学评估发现 ${majorCount} 个严重问题、${minorCount} 个一般问题${topFindings ? `,重点涉及:${topFindings}` : ''}`;
let conclusion: MethodologyConclusion = inferConclusionFromScore(overall_score);
if (majorCount >= 8) conclusion = '拒稿';
else if (majorCount >= 4 || uncoveredCount >= 4) conclusion = '大修';
else if (majorCount >= 1 || minorCount >= 3 || uncoveredCount > 0) conclusion = '小修';
return normalizeMethodologyReview({
overall_score,
summary,
conclusion,
checkpoints,
parts,
});
}
async function reviewMethodologySection(
llmAdapter: ReturnType<typeof LLMFactory.getAdapter>,
businessPrompt: string,
text: string,
section: MethodologySectionDef
): Promise<SectionReviewResult> {
const messages = [
{ role: 'system' as const, content: `${businessPrompt}\n\n${buildSectionProtocol(section)}` },
{ role: 'user' as const, content: `请仅评估“${section.part}”(检查点 ${section.start}-${section.end}),并按协议返回 JSON。\n\n稿件内容如下\n${text}` },
];
const response = await llmAdapter.chat(messages, {
temperature: 0.2,
maxTokens: 2800,
});
const content = response.content ?? '';
try {
const parsed = parseJSONFromLLMResponse<SectionReviewResult>(content);
if (!isValidSectionReview(parsed, section)) {
throw new Error('section json invalid');
}
return normalizeSectionReview(parsed, section);
} catch {
const repairMessages = [
{
role: 'system' as const,
content: `你是 JSON 结构化助手。把输入文本转成目标 JSON。\n\n${buildSectionProtocol(section)}`,
},
{
role: 'user' as const,
content: `请将以下方法学评估文本重组为目标 JSON仅检查点 ${section.start}-${section.end}\n\n${content}`,
},
];
const repaired = await llmAdapter.chat(repairMessages, {
temperature: 0.1,
maxTokens: 1800,
});
const repairedContent = repaired.content ?? '';
const repairedParsed = parseJSONFromLLMResponse<SectionReviewResult>(repairedContent);
if (!isValidSectionReview(repairedParsed, section)) {
throw new Error('section repair invalid');
}
return normalizeSectionReview(repairedParsed, section);
}
}
async function reviewMethodologyLegacy(
businessPrompt: string,
text: string,
modelType: ModelType
): Promise<MethodologyReview> {
const llmAdapter = LLMFactory.getAdapter(modelType);
const messages = [
{ role: 'system' as const, content: composeRvwSystemPrompt('methodology', businessPrompt) },
{ role: 'user' as const, content: `请对以下稿件进行方法学评估。\n\n稿件内容如下\n${text}` },
];
const response = await llmAdapter.chat(messages, {
temperature: 0.3,
maxTokens: 5000,
});
const methContent = response.content ?? '';
try {
const result = parseJSONFromLLMResponse<MethodologyReview>(methContent);
if (!isValidMethodologyReview(result)) throw new Error('invalid json');
return normalizeMethodologyReview(result);
} catch {
return repairMethodologyToJson(methContent, modelType);
}
}
async function repairMethodologyToJson(
rawContent: string,
modelType: ModelType
): Promise<MethodologyReview> {
logger.warn('[RVW:Methodology] 首次解析失败,尝试 LLM 结构化修复');
const llmAdapter = LLMFactory.getAdapter(modelType);
const repairMessages = [
{
role: 'system' as const,
content: `你是 JSON 结构化助手。你的唯一任务是把输入文本转换成目标 JSON。\n\n${getRvwProtocol('methodology')}`,
},
{
role: 'user' as const,
content: `请将以下“方法学评估文本”重组为目标 JSON。\n\n${rawContent}`,
},
];
const repaired = await llmAdapter.chat(repairMessages, {
temperature: 0.1,
maxTokens: 4000,
});
const repairedContent = repaired.content ?? '';
const parsed = parseJSONFromLLMResponse<MethodologyReview>(repairedContent);
if (!isValidMethodologyReview(parsed)) {
throw new Error('方法学评估结果结构化修复失败JSON字段不完整');
}
return normalizeMethodologyReview(parsed);
}
/**
* 方法学评估
* @param text 稿件文本
* @param modelType 模型类型
* @param userId 用户ID用于灰度预览判断
* @returns 评估结果
*/
export async function reviewMethodology(
text: string,
modelType: ModelType = 'deepseek-v3',
userId?: string
): Promise<MethodologyReview> {
try {
// 1. 从 PromptService 获取系统Prompt支持灰度预览
const promptService = getPromptService(prisma);
const { content: businessPrompt, isDraft, version } = await promptService.get(
'RVW_METHODOLOGY',
{},
{ userId }
);
const promptFingerprint = createHash('sha1').update(businessPrompt).digest('hex').slice(0, 12);
logger.info('[RVW:Methodology] Prompt 已加载', {
userId,
isDraft,
version,
promptFingerprint,
});
const llmAdapter = LLMFactory.getAdapter(modelType);
logger.info('[RVW:Methodology] 开始分治并行评估', {
modelType,
sections: METHODOLOGY_SECTION_DEFS.map(section => `${section.part}(${section.start}-${section.end})`),
});
const settled = await Promise.allSettled(
METHODOLOGY_SECTION_DEFS.map(section =>
reviewMethodologySection(llmAdapter, businessPrompt, text, section)
)
);
const sectionResults: SectionReviewResult[] = [];
let fulfilledCount = 0;
for (let i = 0; i < settled.length; i += 1) {
const outcome = settled[i];
const section = METHODOLOGY_SECTION_DEFS[i];
if (outcome.status === 'fulfilled') {
fulfilledCount += 1;
sectionResults.push(outcome.value);
} else {
const reason = outcome.reason instanceof Error ? outcome.reason.message : String(outcome.reason);
logger.warn('[RVW:Methodology] 分段评估失败,使用降级结果', {
section: section.part,
reason,
});
sectionResults.push(buildSectionFallback(section, reason));
}
}
if (fulfilledCount === 0) {
logger.warn('[RVW:Methodology] 分治并行全部失败,回退 legacy 模式');
return await reviewMethodologyLegacy(businessPrompt, text, modelType);
}
const merged = aggregateMethodologySections(sectionResults);
logger.info('[RVW:Methodology] 分治评估完成', {
fulfilledSections: fulfilledCount,
overallScore: merged.overall_score,
conclusion: merged.conclusion,
missingCheckpoints: merged.checkpoints?.filter(cp => cp.status === 'not_mentioned').length ?? 0,
});
return merged;
} catch (error) {
logger.error('[RVW:Methodology] 方法学评估失败', {
error: error instanceof Error ? error.message : 'Unknown error',
stack: error instanceof Error ? error.stack : undefined,
});
throw new Error(`方法学评估失败: ${error instanceof Error ? error.message : 'Unknown error'}`);
}
}