feat(core): finalize rvw stability updates and pending module changes

Summary:
- Harden RVW prompt protocol handling and methodology review flow with 20-checkpoint coverage, divide-and-conquer execution, and timeout tuning
- Update RVW frontend methodology report rendering to show real structured outputs and grouped checkpoint sections
- Include pending backend/frontend updates across IIT admin, SSA, extraction forensics, and related integration files
- Sync system and RVW status documentation, deployment checklist, and RVW architecture/plan docs

Validation:
- Verified lint diagnostics for touched RVW backend/frontend files show no new errors
- Kept backup dump files and local test artifacts untracked

Made-with: Cursor
This commit is contained in:
2026-03-14 00:00:04 +08:00
parent 6edfad032f
commit ba464082cb
35 changed files with 1575 additions and 268 deletions

View File

@@ -10,10 +10,415 @@
import { LLMFactory } from '../../../common/llm/adapters/LLMFactory.js';
import { ModelType } from '../../../common/llm/adapters/types.js';
import { logger } from '../../../common/logging/index.js';
import { createHash } from 'crypto';
import { prisma } from '../../../config/database.js';
import { getPromptService } from '../../../common/prompt/index.js';
import { MethodologyReview } from '../types/index.js';
import { MethodologyCheckpoint, MethodologyIssue, MethodologyPart, MethodologyReview } from '../types/index.js';
import { parseJSONFromLLMResponse } from './utils.js';
import { composeRvwSystemPrompt, getRvwProtocol } from './promptProtocols.js';
const METHODOLOGY_CONCLUSIONS = ['直接接收', '小修', '大修', '拒稿'] as const;
type MethodologyConclusion = typeof METHODOLOGY_CONCLUSIONS[number];
const METHODOLOGY_CHECKPOINT_ITEMS = [
'设计类型界定',
'纳入/排除标准',
'样本代表性',
'对照组设置',
'干预与观察细节',
'效应指标选择',
'设计要素完整性',
'样本量估算',
'质控与伦理',
'基础参数明示',
'分布特征',
'多因素调整',
'缺失值处理',
'一致性检查',
'前提条件检验',
'多重比较校正',
'统计量规范性',
'效应量表达',
'逻辑一致性',
'图表准确性',
] as const;
const METHODOLOGY_CHECKPOINT_STATUSES = ['pass', 'minor_issue', 'major_issue', 'not_mentioned'] as const;
type MethodologyCheckpointStatus = typeof METHODOLOGY_CHECKPOINT_STATUSES[number];
type SectionKey = 'A' | 'B' | 'C';
interface MethodologySectionDef {
key: SectionKey;
part: string;
start: number;
end: number;
}
interface SectionReviewResult {
part: string;
score: number;
issues: MethodologyIssue[];
checkpoints: MethodologyCheckpoint[];
}
const METHODOLOGY_SECTION_DEFS: MethodologySectionDef[] = [
{ key: 'A', part: '科研设计评估', start: 1, end: 9 },
{ key: 'B', part: '统计学方法描述评估', start: 10, end: 14 },
{ key: 'C', part: '统计分析与结果评估', start: 15, end: 20 },
];
function inferConclusionFromScore(score: number): MethodologyConclusion {
if (score >= 90) return '直接接收';
if (score >= 75) return '小修';
if (score >= 60) return '大修';
return '拒稿';
}
function getCheckpointItemsBySection(section: MethodologySectionDef): Array<{ id: number; item: string }> {
const result: Array<{ id: number; item: string }> = [];
for (let id = section.start; id <= section.end; id += 1) {
result.push({ id, item: METHODOLOGY_CHECKPOINT_ITEMS[id - 1] });
}
return result;
}
function buildSectionProtocol(section: MethodologySectionDef): string {
const checkpointLines = getCheckpointItemsBySection(section)
.map(cp => `${cp.id}. ${cp.item}`)
.join('\n');
return `【系统输出协议(分治子任务-${section.key},研发固化)】
请严格仅输出 JSON不要 Markdown、不要代码块、不要解释文字结构如下
{
"part": "${section.part}",
"score": 0,
"issues": [
{
"type": "问题类型",
"severity": "major",
"description": "问题描述",
"location": "位置方法学第2段",
"suggestion": "可执行修改建议"
}
],
"checkpoints": [
{
"id": ${section.start},
"item": "${METHODOLOGY_CHECKPOINT_ITEMS[section.start - 1]}",
"status": "major_issue",
"finding": "该检查点发现",
"suggestion": "可执行建议"
}
]
}
约束:
1) 仅评估本子任务范围id ${section.start}-${section.end}
2) checkpoints 必须严格覆盖本范围全部 id不可缺失、不可越界
3) checkpoints[].status 只能是 "pass" | "minor_issue" | "major_issue" | "not_mentioned"
4) score 必须是 0-100 数字
5) issues 为该分项问题清单,无问题时返回 []
本子任务检查点如下:
${checkpointLines}`;
}
function isValidSectionReview(result: unknown, section: MethodologySectionDef): result is SectionReviewResult {
if (!result || typeof result !== 'object') return false;
const data = result as Record<string, unknown>;
if (!Array.isArray(data.issues)) return false;
if (!Array.isArray(data.checkpoints)) return false;
if (typeof data.score !== 'number') return false;
if (typeof data.part !== 'string') return false;
const checkpoints = data.checkpoints as unknown[];
const ids = checkpoints
.map(cp => (cp && typeof cp === 'object' ? Number((cp as Record<string, unknown>).id) : NaN))
.filter(id => Number.isInteger(id));
const expected = new Set(Array.from({ length: section.end - section.start + 1 }, (_, i) => section.start + i));
return ids.some(id => expected.has(id));
}
function normalizeMethodologyIssues(input: unknown): MethodologyIssue[] {
if (!Array.isArray(input)) return [];
return input
.filter(row => row && typeof row === 'object')
.map((row) => {
const issue = row as Record<string, unknown>;
return {
type: typeof issue.type === 'string' && issue.type.trim() ? issue.type.trim() : '未分类问题',
severity: issue.severity === 'major' ? 'major' : 'minor',
description: typeof issue.description === 'string' && issue.description.trim() ? issue.description.trim() : '未提供详细描述',
location: typeof issue.location === 'string' && issue.location.trim() ? issue.location.trim() : '未标注',
suggestion: typeof issue.suggestion === 'string' && issue.suggestion.trim() ? issue.suggestion.trim() : '请补充可执行修改建议',
};
});
}
function normalizeSectionReview(raw: SectionReviewResult, section: MethodologySectionDef): SectionReviewResult {
const issues = normalizeMethodologyIssues(raw.issues);
const score = Math.max(0, Math.min(100, Math.round(raw.score)));
const checkpointMap = new Map<number, MethodologyCheckpoint>();
if (Array.isArray(raw.checkpoints)) {
for (const cp of raw.checkpoints) {
const id = Number(cp.id);
if (!Number.isInteger(id) || id < section.start || id > section.end) continue;
const status = typeof cp.status === 'string' && METHODOLOGY_CHECKPOINT_STATUSES.includes(cp.status as MethodologyCheckpointStatus)
? cp.status as MethodologyCheckpointStatus
: 'not_mentioned';
checkpointMap.set(id, {
id,
item: typeof cp.item === 'string' && cp.item.trim() ? cp.item.trim() : METHODOLOGY_CHECKPOINT_ITEMS[id - 1],
status,
finding: typeof cp.finding === 'string' && cp.finding.trim() ? cp.finding.trim() : '该检查点未被充分展开',
suggestion: typeof cp.suggestion === 'string' && cp.suggestion.trim() ? cp.suggestion.trim() : undefined,
});
}
}
const checkpoints: MethodologyCheckpoint[] = [];
for (let id = section.start; id <= section.end; id += 1) {
checkpoints.push(
checkpointMap.get(id) ?? {
id,
item: METHODOLOGY_CHECKPOINT_ITEMS[id - 1],
status: 'not_mentioned',
finding: '该检查点未被模型明确覆盖,请人工复核。',
}
);
}
return {
part: section.part,
score,
issues,
checkpoints,
};
}
function buildSectionFallback(section: MethodologySectionDef, reason: string): SectionReviewResult {
const checkpoints: MethodologyCheckpoint[] = [];
for (let id = section.start; id <= section.end; id += 1) {
checkpoints.push({
id,
item: METHODOLOGY_CHECKPOINT_ITEMS[id - 1],
status: 'not_mentioned',
finding: `分段评估失败:${reason}`,
suggestion: '建议重试或人工复核该检查点。',
});
}
return {
part: section.part,
score: 60,
issues: [{
type: '执行降级',
severity: 'minor',
description: `该分段评估未正常完成(${reason}`,
location: '系统执行层',
suggestion: '建议重试方法学评估任务或查看后端日志。',
}],
checkpoints,
};
}
function isValidMethodologyReview(result: unknown): result is MethodologyReview {
if (!result || typeof result !== 'object') return false;
const data = result as Record<string, unknown>;
if (typeof data.overall_score !== 'number') return false;
if (!Array.isArray(data.parts)) return false;
if (typeof data.summary !== 'string') return false;
if (data.conclusion != null && typeof data.conclusion !== 'string') return false;
if (data.checkpoints != null && !Array.isArray(data.checkpoints)) return false;
return true;
}
function normalizeMethodologyCheckpoints(input: unknown): MethodologyCheckpoint[] {
const normalizedMap = new Map<number, MethodologyCheckpoint>();
if (Array.isArray(input)) {
for (const cp of input) {
if (!cp || typeof cp !== 'object') continue;
const row = cp as Record<string, unknown>;
const id = typeof row.id === 'number' ? row.id : Number(row.id);
if (!Number.isInteger(id) || id < 1 || id > 20) continue;
const status = typeof row.status === 'string' && METHODOLOGY_CHECKPOINT_STATUSES.includes(row.status as MethodologyCheckpointStatus)
? row.status as MethodologyCheckpointStatus
: 'not_mentioned';
const item = typeof row.item === 'string' && row.item.trim()
? row.item.trim()
: METHODOLOGY_CHECKPOINT_ITEMS[id - 1];
const finding = typeof row.finding === 'string' && row.finding.trim()
? row.finding.trim()
: '该检查点未被充分展开';
const suggestion = typeof row.suggestion === 'string' && row.suggestion.trim()
? row.suggestion.trim()
: undefined;
normalizedMap.set(id, { id, item, status, finding, suggestion });
}
}
return METHODOLOGY_CHECKPOINT_ITEMS.map((item, idx) => {
const id = idx + 1;
return normalizedMap.get(id) ?? {
id,
item,
status: 'not_mentioned',
finding: '该检查点未被模型明确覆盖,请人工复核。',
};
});
}
function normalizeMethodologyReview(result: MethodologyReview): MethodologyReview {
const conclusion = (result.conclusion && METHODOLOGY_CONCLUSIONS.includes(result.conclusion as MethodologyConclusion))
? result.conclusion
: undefined;
const checkpoints = normalizeMethodologyCheckpoints(result.checkpoints);
const missingCount = checkpoints.filter(cp => cp.status === 'not_mentioned').length;
if (missingCount > 0) {
logger.warn('[RVW:Methodology] 20项检查点覆盖不完整', { missingCount });
}
return {
...result,
conclusion,
checkpoints,
};
}
function aggregateMethodologySections(sections: SectionReviewResult[]): MethodologyReview {
const parts: MethodologyPart[] = sections.map(section => ({
part: section.part,
score: section.score,
issues: section.issues,
}));
const checkpoints = normalizeMethodologyCheckpoints(sections.flatMap(section => section.checkpoints));
const validScores = parts.map(part => part.score).filter(score => Number.isFinite(score));
const overall_score = validScores.length > 0
? Math.round(validScores.reduce((sum, score) => sum + score, 0) / validScores.length)
: 60;
const majorCount = checkpoints.filter(cp => cp.status === 'major_issue').length;
const minorCount = checkpoints.filter(cp => cp.status === 'minor_issue').length;
const uncoveredCount = checkpoints.filter(cp => cp.status === 'not_mentioned').length;
const topFindings = checkpoints
.filter(cp => cp.status === 'major_issue' || cp.status === 'minor_issue')
.slice(0, 3)
.map(cp => `${cp.id}.${cp.item}`)
.join('');
const summary = majorCount + minorCount === 0
? '方法学20项检查点未发现明确缺陷整体统计学规范性较好。'
: `方法学评估发现 ${majorCount} 个严重问题、${minorCount} 个一般问题${topFindings ? `,重点涉及:${topFindings}` : ''}`;
let conclusion: MethodologyConclusion = inferConclusionFromScore(overall_score);
if (majorCount >= 8) conclusion = '拒稿';
else if (majorCount >= 4 || uncoveredCount >= 4) conclusion = '大修';
else if (majorCount >= 1 || minorCount >= 3 || uncoveredCount > 0) conclusion = '小修';
return normalizeMethodologyReview({
overall_score,
summary,
conclusion,
checkpoints,
parts,
});
}
async function reviewMethodologySection(
llmAdapter: ReturnType<typeof LLMFactory.getAdapter>,
businessPrompt: string,
text: string,
section: MethodologySectionDef
): Promise<SectionReviewResult> {
const messages = [
{ role: 'system' as const, content: `${businessPrompt}\n\n${buildSectionProtocol(section)}` },
{ role: 'user' as const, content: `请仅评估“${section.part}”(检查点 ${section.start}-${section.end}),并按协议返回 JSON。\n\n稿件内容如下\n${text}` },
];
const response = await llmAdapter.chat(messages, {
temperature: 0.2,
maxTokens: 2800,
});
const content = response.content ?? '';
try {
const parsed = parseJSONFromLLMResponse<SectionReviewResult>(content);
if (!isValidSectionReview(parsed, section)) {
throw new Error('section json invalid');
}
return normalizeSectionReview(parsed, section);
} catch {
const repairMessages = [
{
role: 'system' as const,
content: `你是 JSON 结构化助手。把输入文本转成目标 JSON。\n\n${buildSectionProtocol(section)}`,
},
{
role: 'user' as const,
content: `请将以下方法学评估文本重组为目标 JSON仅检查点 ${section.start}-${section.end}\n\n${content}`,
},
];
const repaired = await llmAdapter.chat(repairMessages, {
temperature: 0.1,
maxTokens: 1800,
});
const repairedContent = repaired.content ?? '';
const repairedParsed = parseJSONFromLLMResponse<SectionReviewResult>(repairedContent);
if (!isValidSectionReview(repairedParsed, section)) {
throw new Error('section repair invalid');
}
return normalizeSectionReview(repairedParsed, section);
}
}
async function reviewMethodologyLegacy(
businessPrompt: string,
text: string,
modelType: ModelType
): Promise<MethodologyReview> {
const llmAdapter = LLMFactory.getAdapter(modelType);
const messages = [
{ role: 'system' as const, content: composeRvwSystemPrompt('methodology', businessPrompt) },
{ role: 'user' as const, content: `请对以下稿件进行方法学评估。\n\n稿件内容如下\n${text}` },
];
const response = await llmAdapter.chat(messages, {
temperature: 0.3,
maxTokens: 5000,
});
const methContent = response.content ?? '';
try {
const result = parseJSONFromLLMResponse<MethodologyReview>(methContent);
if (!isValidMethodologyReview(result)) throw new Error('invalid json');
return normalizeMethodologyReview(result);
} catch {
return repairMethodologyToJson(methContent, modelType);
}
}
async function repairMethodologyToJson(
rawContent: string,
modelType: ModelType
): Promise<MethodologyReview> {
logger.warn('[RVW:Methodology] 首次解析失败,尝试 LLM 结构化修复');
const llmAdapter = LLMFactory.getAdapter(modelType);
const repairMessages = [
{
role: 'system' as const,
content: `你是 JSON 结构化助手。你的唯一任务是把输入文本转换成目标 JSON。\n\n${getRvwProtocol('methodology')}`,
},
{
role: 'user' as const,
content: `请将以下“方法学评估文本”重组为目标 JSON。\n\n${rawContent}`,
},
];
const repaired = await llmAdapter.chat(repairMessages, {
temperature: 0.1,
maxTokens: 4000,
});
const repairedContent = repaired.content ?? '';
const parsed = parseJSONFromLLMResponse<MethodologyReview>(repairedContent);
if (!isValidMethodologyReview(parsed)) {
throw new Error('方法学评估结果结构化修复失败JSON字段不完整');
}
return normalizeMethodologyReview(parsed);
}
/**
* 方法学评估
@@ -30,44 +435,62 @@ export async function reviewMethodology(
try {
// 1. 从 PromptService 获取系统Prompt支持灰度预览
const promptService = getPromptService(prisma);
const { content: systemPrompt, isDraft } = await promptService.get(
const { content: businessPrompt, isDraft, version } = await promptService.get(
'RVW_METHODOLOGY',
{},
{ userId }
);
const promptFingerprint = createHash('sha1').update(businessPrompt).digest('hex').slice(0, 12);
if (isDraft) {
logger.info('[RVW:Methodology] 使用 DRAFT 版本 Prompt调试模式', { userId });
}
logger.info('[RVW:Methodology] Prompt 已加载', {
userId,
isDraft,
version,
promptFingerprint,
});
// 2. 构建消息
const messages = [
{ role: 'system' as const, content: systemPrompt },
{ role: 'user' as const, content: `请对以下稿件进行方法学评估:\n\n${text}` },
];
// 3. 调用LLM
logger.info('[RVW:Methodology] 开始方法学评估', { modelType });
const llmAdapter = LLMFactory.getAdapter(modelType);
const response = await llmAdapter.chat(messages, {
temperature: 0.3,
maxTokens: 8000,
});
const methContent = response.content ?? '';
logger.info('[RVW:Methodology] 评估完成', {
modelType,
responseLength: methContent.length
logger.info('[RVW:Methodology] 开始分治并行评估', {
modelType,
sections: METHODOLOGY_SECTION_DEFS.map(section => `${section.part}(${section.start}-${section.end})`),
});
// 4. 解析JSON响应
const result = parseJSONFromLLMResponse<MethodologyReview>(methContent);
// 5. 验证响应格式
if (!result || typeof result.overall_score !== 'number' || !Array.isArray(result.parts)) {
throw new Error('LLM返回的数据格式不正确');
const settled = await Promise.allSettled(
METHODOLOGY_SECTION_DEFS.map(section =>
reviewMethodologySection(llmAdapter, businessPrompt, text, section)
)
);
const sectionResults: SectionReviewResult[] = [];
let fulfilledCount = 0;
for (let i = 0; i < settled.length; i += 1) {
const outcome = settled[i];
const section = METHODOLOGY_SECTION_DEFS[i];
if (outcome.status === 'fulfilled') {
fulfilledCount += 1;
sectionResults.push(outcome.value);
} else {
const reason = outcome.reason instanceof Error ? outcome.reason.message : String(outcome.reason);
logger.warn('[RVW:Methodology] 分段评估失败,使用降级结果', {
section: section.part,
reason,
});
sectionResults.push(buildSectionFallback(section, reason));
}
}
return result;
if (fulfilledCount === 0) {
logger.warn('[RVW:Methodology] 分治并行全部失败,回退 legacy 模式');
return await reviewMethodologyLegacy(businessPrompt, text, modelType);
}
const merged = aggregateMethodologySections(sectionResults);
logger.info('[RVW:Methodology] 分治评估完成', {
fulfilledSections: fulfilledCount,
overallScore: merged.overall_score,
conclusion: merged.conclusion,
missingCheckpoints: merged.checkpoints?.filter(cp => cp.status === 'not_mentioned').length ?? 0,
});
return merged;
} catch (error) {
logger.error('[RVW:Methodology] 方法学评估失败', {
error: error instanceof Error ? error.message : 'Unknown error',