Summary: - Harden RVW prompt protocol handling and methodology review flow with 20-checkpoint coverage, divide-and-conquer execution, and timeout tuning - Update RVW frontend methodology report rendering to show real structured outputs and grouped checkpoint sections - Include pending backend/frontend updates across IIT admin, SSA, extraction forensics, and related integration files - Sync system and RVW status documentation, deployment checklist, and RVW architecture/plan docs Validation: - Verified lint diagnostics for touched RVW backend/frontend files show no new errors - Kept backup dump files and local test artifacts untracked Made-with: Cursor
507 lines
19 KiB
TypeScript
507 lines
19 KiB
TypeScript
/**
|
||
* RVW稿件审查模块 - 方法学评估服务
|
||
* @module rvw/services/methodologyService
|
||
*
|
||
* Phase 3.5.5 改造:使用 PromptService 替代文件读取
|
||
* - 支持灰度预览(调试者看 DRAFT,普通用户看 ACTIVE)
|
||
* - 三级容灾(数据库→缓存→兜底)
|
||
*/
|
||
|
||
import { LLMFactory } from '../../../common/llm/adapters/LLMFactory.js';
|
||
import { ModelType } from '../../../common/llm/adapters/types.js';
|
||
import { logger } from '../../../common/logging/index.js';
|
||
import { createHash } from 'crypto';
|
||
import { prisma } from '../../../config/database.js';
|
||
import { getPromptService } from '../../../common/prompt/index.js';
|
||
import { MethodologyCheckpoint, MethodologyIssue, MethodologyPart, MethodologyReview } from '../types/index.js';
|
||
import { parseJSONFromLLMResponse } from './utils.js';
|
||
import { composeRvwSystemPrompt, getRvwProtocol } from './promptProtocols.js';
|
||
|
||
const METHODOLOGY_CONCLUSIONS = ['直接接收', '小修', '大修', '拒稿'] as const;
|
||
type MethodologyConclusion = typeof METHODOLOGY_CONCLUSIONS[number];
|
||
const METHODOLOGY_CHECKPOINT_ITEMS = [
|
||
'设计类型界定',
|
||
'纳入/排除标准',
|
||
'样本代表性',
|
||
'对照组设置',
|
||
'干预与观察细节',
|
||
'效应指标选择',
|
||
'设计要素完整性',
|
||
'样本量估算',
|
||
'质控与伦理',
|
||
'基础参数明示',
|
||
'分布特征',
|
||
'多因素调整',
|
||
'缺失值处理',
|
||
'一致性检查',
|
||
'前提条件检验',
|
||
'多重比较校正',
|
||
'统计量规范性',
|
||
'效应量表达',
|
||
'逻辑一致性',
|
||
'图表准确性',
|
||
] as const;
|
||
const METHODOLOGY_CHECKPOINT_STATUSES = ['pass', 'minor_issue', 'major_issue', 'not_mentioned'] as const;
|
||
type MethodologyCheckpointStatus = typeof METHODOLOGY_CHECKPOINT_STATUSES[number];
|
||
type SectionKey = 'A' | 'B' | 'C';
|
||
|
||
interface MethodologySectionDef {
|
||
key: SectionKey;
|
||
part: string;
|
||
start: number;
|
||
end: number;
|
||
}
|
||
|
||
interface SectionReviewResult {
|
||
part: string;
|
||
score: number;
|
||
issues: MethodologyIssue[];
|
||
checkpoints: MethodologyCheckpoint[];
|
||
}
|
||
|
||
const METHODOLOGY_SECTION_DEFS: MethodologySectionDef[] = [
|
||
{ key: 'A', part: '科研设计评估', start: 1, end: 9 },
|
||
{ key: 'B', part: '统计学方法描述评估', start: 10, end: 14 },
|
||
{ key: 'C', part: '统计分析与结果评估', start: 15, end: 20 },
|
||
];
|
||
|
||
function inferConclusionFromScore(score: number): MethodologyConclusion {
|
||
if (score >= 90) return '直接接收';
|
||
if (score >= 75) return '小修';
|
||
if (score >= 60) return '大修';
|
||
return '拒稿';
|
||
}
|
||
|
||
function getCheckpointItemsBySection(section: MethodologySectionDef): Array<{ id: number; item: string }> {
|
||
const result: Array<{ id: number; item: string }> = [];
|
||
for (let id = section.start; id <= section.end; id += 1) {
|
||
result.push({ id, item: METHODOLOGY_CHECKPOINT_ITEMS[id - 1] });
|
||
}
|
||
return result;
|
||
}
|
||
|
||
function buildSectionProtocol(section: MethodologySectionDef): string {
|
||
const checkpointLines = getCheckpointItemsBySection(section)
|
||
.map(cp => `${cp.id}. ${cp.item}`)
|
||
.join('\n');
|
||
|
||
return `【系统输出协议(分治子任务-${section.key},研发固化)】
|
||
请严格仅输出 JSON(不要 Markdown、不要代码块、不要解释文字),结构如下:
|
||
{
|
||
"part": "${section.part}",
|
||
"score": 0,
|
||
"issues": [
|
||
{
|
||
"type": "问题类型",
|
||
"severity": "major",
|
||
"description": "问题描述",
|
||
"location": "位置(如:方法学第2段)",
|
||
"suggestion": "可执行修改建议"
|
||
}
|
||
],
|
||
"checkpoints": [
|
||
{
|
||
"id": ${section.start},
|
||
"item": "${METHODOLOGY_CHECKPOINT_ITEMS[section.start - 1]}",
|
||
"status": "major_issue",
|
||
"finding": "该检查点发现",
|
||
"suggestion": "可执行建议"
|
||
}
|
||
]
|
||
}
|
||
约束:
|
||
1) 仅评估本子任务范围:id ${section.start}-${section.end}
|
||
2) checkpoints 必须严格覆盖本范围全部 id(不可缺失、不可越界)
|
||
3) checkpoints[].status 只能是 "pass" | "minor_issue" | "major_issue" | "not_mentioned"
|
||
4) score 必须是 0-100 数字
|
||
5) issues 为该分项问题清单,无问题时返回 []
|
||
本子任务检查点如下:
|
||
${checkpointLines}`;
|
||
}
|
||
|
||
function isValidSectionReview(result: unknown, section: MethodologySectionDef): result is SectionReviewResult {
|
||
if (!result || typeof result !== 'object') return false;
|
||
const data = result as Record<string, unknown>;
|
||
if (!Array.isArray(data.issues)) return false;
|
||
if (!Array.isArray(data.checkpoints)) return false;
|
||
if (typeof data.score !== 'number') return false;
|
||
if (typeof data.part !== 'string') return false;
|
||
const checkpoints = data.checkpoints as unknown[];
|
||
const ids = checkpoints
|
||
.map(cp => (cp && typeof cp === 'object' ? Number((cp as Record<string, unknown>).id) : NaN))
|
||
.filter(id => Number.isInteger(id));
|
||
const expected = new Set(Array.from({ length: section.end - section.start + 1 }, (_, i) => section.start + i));
|
||
return ids.some(id => expected.has(id));
|
||
}
|
||
|
||
function normalizeMethodologyIssues(input: unknown): MethodologyIssue[] {
|
||
if (!Array.isArray(input)) return [];
|
||
return input
|
||
.filter(row => row && typeof row === 'object')
|
||
.map((row) => {
|
||
const issue = row as Record<string, unknown>;
|
||
return {
|
||
type: typeof issue.type === 'string' && issue.type.trim() ? issue.type.trim() : '未分类问题',
|
||
severity: issue.severity === 'major' ? 'major' : 'minor',
|
||
description: typeof issue.description === 'string' && issue.description.trim() ? issue.description.trim() : '未提供详细描述',
|
||
location: typeof issue.location === 'string' && issue.location.trim() ? issue.location.trim() : '未标注',
|
||
suggestion: typeof issue.suggestion === 'string' && issue.suggestion.trim() ? issue.suggestion.trim() : '请补充可执行修改建议',
|
||
};
|
||
});
|
||
}
|
||
|
||
function normalizeSectionReview(raw: SectionReviewResult, section: MethodologySectionDef): SectionReviewResult {
|
||
const issues = normalizeMethodologyIssues(raw.issues);
|
||
const score = Math.max(0, Math.min(100, Math.round(raw.score)));
|
||
const checkpointMap = new Map<number, MethodologyCheckpoint>();
|
||
if (Array.isArray(raw.checkpoints)) {
|
||
for (const cp of raw.checkpoints) {
|
||
const id = Number(cp.id);
|
||
if (!Number.isInteger(id) || id < section.start || id > section.end) continue;
|
||
const status = typeof cp.status === 'string' && METHODOLOGY_CHECKPOINT_STATUSES.includes(cp.status as MethodologyCheckpointStatus)
|
||
? cp.status as MethodologyCheckpointStatus
|
||
: 'not_mentioned';
|
||
checkpointMap.set(id, {
|
||
id,
|
||
item: typeof cp.item === 'string' && cp.item.trim() ? cp.item.trim() : METHODOLOGY_CHECKPOINT_ITEMS[id - 1],
|
||
status,
|
||
finding: typeof cp.finding === 'string' && cp.finding.trim() ? cp.finding.trim() : '该检查点未被充分展开',
|
||
suggestion: typeof cp.suggestion === 'string' && cp.suggestion.trim() ? cp.suggestion.trim() : undefined,
|
||
});
|
||
}
|
||
}
|
||
|
||
const checkpoints: MethodologyCheckpoint[] = [];
|
||
for (let id = section.start; id <= section.end; id += 1) {
|
||
checkpoints.push(
|
||
checkpointMap.get(id) ?? {
|
||
id,
|
||
item: METHODOLOGY_CHECKPOINT_ITEMS[id - 1],
|
||
status: 'not_mentioned',
|
||
finding: '该检查点未被模型明确覆盖,请人工复核。',
|
||
}
|
||
);
|
||
}
|
||
|
||
return {
|
||
part: section.part,
|
||
score,
|
||
issues,
|
||
checkpoints,
|
||
};
|
||
}
|
||
|
||
function buildSectionFallback(section: MethodologySectionDef, reason: string): SectionReviewResult {
|
||
const checkpoints: MethodologyCheckpoint[] = [];
|
||
for (let id = section.start; id <= section.end; id += 1) {
|
||
checkpoints.push({
|
||
id,
|
||
item: METHODOLOGY_CHECKPOINT_ITEMS[id - 1],
|
||
status: 'not_mentioned',
|
||
finding: `分段评估失败:${reason}`,
|
||
suggestion: '建议重试或人工复核该检查点。',
|
||
});
|
||
}
|
||
|
||
return {
|
||
part: section.part,
|
||
score: 60,
|
||
issues: [{
|
||
type: '执行降级',
|
||
severity: 'minor',
|
||
description: `该分段评估未正常完成(${reason})`,
|
||
location: '系统执行层',
|
||
suggestion: '建议重试方法学评估任务或查看后端日志。',
|
||
}],
|
||
checkpoints,
|
||
};
|
||
}
|
||
|
||
function isValidMethodologyReview(result: unknown): result is MethodologyReview {
|
||
if (!result || typeof result !== 'object') return false;
|
||
const data = result as Record<string, unknown>;
|
||
if (typeof data.overall_score !== 'number') return false;
|
||
if (!Array.isArray(data.parts)) return false;
|
||
if (typeof data.summary !== 'string') return false;
|
||
if (data.conclusion != null && typeof data.conclusion !== 'string') return false;
|
||
if (data.checkpoints != null && !Array.isArray(data.checkpoints)) return false;
|
||
return true;
|
||
}
|
||
|
||
function normalizeMethodologyCheckpoints(input: unknown): MethodologyCheckpoint[] {
|
||
const normalizedMap = new Map<number, MethodologyCheckpoint>();
|
||
if (Array.isArray(input)) {
|
||
for (const cp of input) {
|
||
if (!cp || typeof cp !== 'object') continue;
|
||
const row = cp as Record<string, unknown>;
|
||
const id = typeof row.id === 'number' ? row.id : Number(row.id);
|
||
if (!Number.isInteger(id) || id < 1 || id > 20) continue;
|
||
const status = typeof row.status === 'string' && METHODOLOGY_CHECKPOINT_STATUSES.includes(row.status as MethodologyCheckpointStatus)
|
||
? row.status as MethodologyCheckpointStatus
|
||
: 'not_mentioned';
|
||
const item = typeof row.item === 'string' && row.item.trim()
|
||
? row.item.trim()
|
||
: METHODOLOGY_CHECKPOINT_ITEMS[id - 1];
|
||
const finding = typeof row.finding === 'string' && row.finding.trim()
|
||
? row.finding.trim()
|
||
: '该检查点未被充分展开';
|
||
const suggestion = typeof row.suggestion === 'string' && row.suggestion.trim()
|
||
? row.suggestion.trim()
|
||
: undefined;
|
||
normalizedMap.set(id, { id, item, status, finding, suggestion });
|
||
}
|
||
}
|
||
|
||
return METHODOLOGY_CHECKPOINT_ITEMS.map((item, idx) => {
|
||
const id = idx + 1;
|
||
return normalizedMap.get(id) ?? {
|
||
id,
|
||
item,
|
||
status: 'not_mentioned',
|
||
finding: '该检查点未被模型明确覆盖,请人工复核。',
|
||
};
|
||
});
|
||
}
|
||
|
||
function normalizeMethodologyReview(result: MethodologyReview): MethodologyReview {
|
||
const conclusion = (result.conclusion && METHODOLOGY_CONCLUSIONS.includes(result.conclusion as MethodologyConclusion))
|
||
? result.conclusion
|
||
: undefined;
|
||
const checkpoints = normalizeMethodologyCheckpoints(result.checkpoints);
|
||
const missingCount = checkpoints.filter(cp => cp.status === 'not_mentioned').length;
|
||
if (missingCount > 0) {
|
||
logger.warn('[RVW:Methodology] 20项检查点覆盖不完整', { missingCount });
|
||
}
|
||
return {
|
||
...result,
|
||
conclusion,
|
||
checkpoints,
|
||
};
|
||
}
|
||
|
||
function aggregateMethodologySections(sections: SectionReviewResult[]): MethodologyReview {
|
||
const parts: MethodologyPart[] = sections.map(section => ({
|
||
part: section.part,
|
||
score: section.score,
|
||
issues: section.issues,
|
||
}));
|
||
const checkpoints = normalizeMethodologyCheckpoints(sections.flatMap(section => section.checkpoints));
|
||
const validScores = parts.map(part => part.score).filter(score => Number.isFinite(score));
|
||
const overall_score = validScores.length > 0
|
||
? Math.round(validScores.reduce((sum, score) => sum + score, 0) / validScores.length)
|
||
: 60;
|
||
|
||
const majorCount = checkpoints.filter(cp => cp.status === 'major_issue').length;
|
||
const minorCount = checkpoints.filter(cp => cp.status === 'minor_issue').length;
|
||
const uncoveredCount = checkpoints.filter(cp => cp.status === 'not_mentioned').length;
|
||
const topFindings = checkpoints
|
||
.filter(cp => cp.status === 'major_issue' || cp.status === 'minor_issue')
|
||
.slice(0, 3)
|
||
.map(cp => `${cp.id}.${cp.item}`)
|
||
.join(';');
|
||
|
||
const summary = majorCount + minorCount === 0
|
||
? '方法学20项检查点未发现明确缺陷,整体统计学规范性较好。'
|
||
: `方法学评估发现 ${majorCount} 个严重问题、${minorCount} 个一般问题${topFindings ? `,重点涉及:${topFindings}` : ''}。`;
|
||
|
||
let conclusion: MethodologyConclusion = inferConclusionFromScore(overall_score);
|
||
if (majorCount >= 8) conclusion = '拒稿';
|
||
else if (majorCount >= 4 || uncoveredCount >= 4) conclusion = '大修';
|
||
else if (majorCount >= 1 || minorCount >= 3 || uncoveredCount > 0) conclusion = '小修';
|
||
|
||
return normalizeMethodologyReview({
|
||
overall_score,
|
||
summary,
|
||
conclusion,
|
||
checkpoints,
|
||
parts,
|
||
});
|
||
}
|
||
|
||
async function reviewMethodologySection(
|
||
llmAdapter: ReturnType<typeof LLMFactory.getAdapter>,
|
||
businessPrompt: string,
|
||
text: string,
|
||
section: MethodologySectionDef
|
||
): Promise<SectionReviewResult> {
|
||
const messages = [
|
||
{ role: 'system' as const, content: `${businessPrompt}\n\n${buildSectionProtocol(section)}` },
|
||
{ role: 'user' as const, content: `请仅评估“${section.part}”(检查点 ${section.start}-${section.end}),并按协议返回 JSON。\n\n稿件内容如下:\n${text}` },
|
||
];
|
||
const response = await llmAdapter.chat(messages, {
|
||
temperature: 0.2,
|
||
maxTokens: 2800,
|
||
});
|
||
const content = response.content ?? '';
|
||
try {
|
||
const parsed = parseJSONFromLLMResponse<SectionReviewResult>(content);
|
||
if (!isValidSectionReview(parsed, section)) {
|
||
throw new Error('section json invalid');
|
||
}
|
||
return normalizeSectionReview(parsed, section);
|
||
} catch {
|
||
const repairMessages = [
|
||
{
|
||
role: 'system' as const,
|
||
content: `你是 JSON 结构化助手。把输入文本转成目标 JSON。\n\n${buildSectionProtocol(section)}`,
|
||
},
|
||
{
|
||
role: 'user' as const,
|
||
content: `请将以下方法学评估文本重组为目标 JSON(仅检查点 ${section.start}-${section.end}):\n\n${content}`,
|
||
},
|
||
];
|
||
const repaired = await llmAdapter.chat(repairMessages, {
|
||
temperature: 0.1,
|
||
maxTokens: 1800,
|
||
});
|
||
const repairedContent = repaired.content ?? '';
|
||
const repairedParsed = parseJSONFromLLMResponse<SectionReviewResult>(repairedContent);
|
||
if (!isValidSectionReview(repairedParsed, section)) {
|
||
throw new Error('section repair invalid');
|
||
}
|
||
return normalizeSectionReview(repairedParsed, section);
|
||
}
|
||
}
|
||
|
||
async function reviewMethodologyLegacy(
|
||
businessPrompt: string,
|
||
text: string,
|
||
modelType: ModelType
|
||
): Promise<MethodologyReview> {
|
||
const llmAdapter = LLMFactory.getAdapter(modelType);
|
||
const messages = [
|
||
{ role: 'system' as const, content: composeRvwSystemPrompt('methodology', businessPrompt) },
|
||
{ role: 'user' as const, content: `请对以下稿件进行方法学评估。\n\n稿件内容如下:\n${text}` },
|
||
];
|
||
const response = await llmAdapter.chat(messages, {
|
||
temperature: 0.3,
|
||
maxTokens: 5000,
|
||
});
|
||
const methContent = response.content ?? '';
|
||
try {
|
||
const result = parseJSONFromLLMResponse<MethodologyReview>(methContent);
|
||
if (!isValidMethodologyReview(result)) throw new Error('invalid json');
|
||
return normalizeMethodologyReview(result);
|
||
} catch {
|
||
return repairMethodologyToJson(methContent, modelType);
|
||
}
|
||
}
|
||
|
||
async function repairMethodologyToJson(
|
||
rawContent: string,
|
||
modelType: ModelType
|
||
): Promise<MethodologyReview> {
|
||
logger.warn('[RVW:Methodology] 首次解析失败,尝试 LLM 结构化修复');
|
||
|
||
const llmAdapter = LLMFactory.getAdapter(modelType);
|
||
const repairMessages = [
|
||
{
|
||
role: 'system' as const,
|
||
content: `你是 JSON 结构化助手。你的唯一任务是把输入文本转换成目标 JSON。\n\n${getRvwProtocol('methodology')}`,
|
||
},
|
||
{
|
||
role: 'user' as const,
|
||
content: `请将以下“方法学评估文本”重组为目标 JSON。\n\n${rawContent}`,
|
||
},
|
||
];
|
||
|
||
const repaired = await llmAdapter.chat(repairMessages, {
|
||
temperature: 0.1,
|
||
maxTokens: 4000,
|
||
});
|
||
|
||
const repairedContent = repaired.content ?? '';
|
||
const parsed = parseJSONFromLLMResponse<MethodologyReview>(repairedContent);
|
||
|
||
if (!isValidMethodologyReview(parsed)) {
|
||
throw new Error('方法学评估结果结构化修复失败(JSON字段不完整)');
|
||
}
|
||
|
||
return normalizeMethodologyReview(parsed);
|
||
}
|
||
|
||
/**
|
||
* 方法学评估
|
||
* @param text 稿件文本
|
||
* @param modelType 模型类型
|
||
* @param userId 用户ID(用于灰度预览判断)
|
||
* @returns 评估结果
|
||
*/
|
||
export async function reviewMethodology(
|
||
text: string,
|
||
modelType: ModelType = 'deepseek-v3',
|
||
userId?: string
|
||
): Promise<MethodologyReview> {
|
||
try {
|
||
// 1. 从 PromptService 获取系统Prompt(支持灰度预览)
|
||
const promptService = getPromptService(prisma);
|
||
const { content: businessPrompt, isDraft, version } = await promptService.get(
|
||
'RVW_METHODOLOGY',
|
||
{},
|
||
{ userId }
|
||
);
|
||
const promptFingerprint = createHash('sha1').update(businessPrompt).digest('hex').slice(0, 12);
|
||
|
||
logger.info('[RVW:Methodology] Prompt 已加载', {
|
||
userId,
|
||
isDraft,
|
||
version,
|
||
promptFingerprint,
|
||
});
|
||
|
||
const llmAdapter = LLMFactory.getAdapter(modelType);
|
||
logger.info('[RVW:Methodology] 开始分治并行评估', {
|
||
modelType,
|
||
sections: METHODOLOGY_SECTION_DEFS.map(section => `${section.part}(${section.start}-${section.end})`),
|
||
});
|
||
|
||
const settled = await Promise.allSettled(
|
||
METHODOLOGY_SECTION_DEFS.map(section =>
|
||
reviewMethodologySection(llmAdapter, businessPrompt, text, section)
|
||
)
|
||
);
|
||
const sectionResults: SectionReviewResult[] = [];
|
||
let fulfilledCount = 0;
|
||
for (let i = 0; i < settled.length; i += 1) {
|
||
const outcome = settled[i];
|
||
const section = METHODOLOGY_SECTION_DEFS[i];
|
||
if (outcome.status === 'fulfilled') {
|
||
fulfilledCount += 1;
|
||
sectionResults.push(outcome.value);
|
||
} else {
|
||
const reason = outcome.reason instanceof Error ? outcome.reason.message : String(outcome.reason);
|
||
logger.warn('[RVW:Methodology] 分段评估失败,使用降级结果', {
|
||
section: section.part,
|
||
reason,
|
||
});
|
||
sectionResults.push(buildSectionFallback(section, reason));
|
||
}
|
||
}
|
||
|
||
if (fulfilledCount === 0) {
|
||
logger.warn('[RVW:Methodology] 分治并行全部失败,回退 legacy 模式');
|
||
return await reviewMethodologyLegacy(businessPrompt, text, modelType);
|
||
}
|
||
|
||
const merged = aggregateMethodologySections(sectionResults);
|
||
logger.info('[RVW:Methodology] 分治评估完成', {
|
||
fulfilledSections: fulfilledCount,
|
||
overallScore: merged.overall_score,
|
||
conclusion: merged.conclusion,
|
||
missingCheckpoints: merged.checkpoints?.filter(cp => cp.status === 'not_mentioned').length ?? 0,
|
||
});
|
||
return merged;
|
||
} catch (error) {
|
||
logger.error('[RVW:Methodology] 方法学评估失败', {
|
||
error: error instanceof Error ? error.message : 'Unknown error',
|
||
stack: error instanceof Error ? error.stack : undefined,
|
||
});
|
||
throw new Error(`方法学评估失败: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
||
}
|
||
}
|
||
|
||
|
||
|
||
|
||
|