feat(rvw): Complete V2.0 Week 3 - Statistical validation extension and UX improvements

Week 3 Development Summary:

- Implement negative sign normalization (6 Unicode variants)

- Enhance T-test validation with smart sample size extraction

- Enhance SE triangle and CI-P consistency validation with subrow support

- Add precise sub-cell highlighting for P-values in multi-line cells

- Add frontend issue type Chinese translations (6 new types)

- Add file format tips for PDF/DOC uploads

Technical improvements:

- Add _clean_statistical_text() in extractor.py

- Add _safe_float() wrapper in validator.py

- Add ForensicsReport.tsx component

- Update ISSUE_TYPE_LABELS translations

Documentation:

- Add 2026-02-18 development record

- Update RVW module status (v5.1)

- Update system status (v5.2)

Status: Week 3 complete, ready for Week 4 testing
Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
2026-02-18 18:26:16 +08:00
parent 9f256c4a02
commit f9ed0c2528
36 changed files with 2790 additions and 501 deletions

View File

@@ -54,6 +54,7 @@ export interface ForensicsTable {
headers?: string[];
rowCount: number;
colCount: number;
issues?: ForensicsIssue[]; // 每个表格的问题列表
}
export interface ForensicsIssue {
@@ -354,7 +355,7 @@ class ExtractionClient implements IExtractionClient {
formData.append('tolerance_percent', config.tolerancePercent.toString());
const response = await axios.post<ForensicsResult>(
`${this.baseUrl}/api/v1/forensics/analyze`,
`${this.baseUrl}/api/v1/forensics/analyze_docx`,
formData,
{
headers: {

View File

@@ -27,6 +27,17 @@ function getUserId(request: FastifyRequest): string {
return userId;
}
/**
* 获取租户ID从JWT Token中获取
*/
function getTenantId(request: FastifyRequest): string {
const tenantId = (request as any).user?.tenantId;
if (!tenantId) {
throw new Error('Tenant not found');
}
return tenantId;
}
// ==================== 任务创建 ====================
/**
@@ -43,7 +54,8 @@ export async function createTask(
) {
try {
const userId = getUserId(request);
logger.info('[RVW:Controller] 上传稿件', { userId });
const tenantId = getTenantId(request);
logger.info('[RVW:Controller] 上传稿件', { userId, tenantId });
// 获取上传的文件
const data = await request.file();
@@ -105,7 +117,7 @@ export async function createTask(
}
// 创建任务
const task = await reviewService.createTask(file, filename, userId, modelType);
const task = await reviewService.createTask(file, filename, userId, tenantId, modelType);
logger.info('[RVW:Controller] 任务已创建', { taskId: task.id });

View File

@@ -17,6 +17,24 @@ import { ModelType } from '../../../common/llm/adapters/types.js';
import { logger } from '../../../common/logging/index.js';
import { jobQueue } from '../../../common/jobs/index.js';
import { Prisma } from '@prisma/client';
import { storage } from '../../../common/storage/index.js';
import { randomUUID } from 'crypto';
import path from 'path';
/**
* 生成 RVW 模块的 OSS 存储 Key
* 格式: tenants/{tenantId}/users/{userId}/rvw/{taskId}/{filename}
*/
function generateRvwStorageKey(
tenantId: string,
userId: string,
taskId: string,
filename: string
): string {
const uuid = randomUUID().replace(/-/g, '').substring(0, 16);
const ext = path.extname(filename).toLowerCase();
return `tenants/${tenantId}/users/${userId}/rvw/${taskId}/${uuid}${ext}`;
}
import {
AgentType,
TaskStatus,
@@ -44,6 +62,7 @@ import {
* @param file 文件Buffer
* @param filename 文件名
* @param userId 用户ID
* @param tenantId 租户ID
* @param modelType 模型类型
* @returns 创建的任务
*/
@@ -51,11 +70,12 @@ export async function createTask(
file: Buffer,
filename: string,
userId: string,
tenantId: string,
modelType: ModelType = 'deepseek-v3'
) {
logger.info('[RVW] 创建审查任务', { filename, userId, modelType });
logger.info('[RVW] 创建审查任务', { filename, userId, tenantId, modelType });
// 创建任务记录状态为pending等待用户选择智能体后运行
// 1. 先创建任务记录获取 taskId
const task = await prisma.reviewTask.create({
data: {
userId,
@@ -70,12 +90,37 @@ export async function createTask(
logger.info('[RVW] 任务已创建', { taskId: task.id, status: task.status });
// 异步提取文档文本(预处理,不运行评估)
// 2. 生成 OSS 存储 Key 并上传文件
const storageKey = generateRvwStorageKey(tenantId, userId, task.id, filename);
let updatedTask = task;
try {
logger.info('[RVW] 开始上传文件到存储', { taskId: task.id, storageKey });
await storage.upload(storageKey, file);
logger.info('[RVW] 文件已上传到存储', { taskId: task.id, storageKey });
// 3. 更新任务的 filePath 字段
updatedTask = await prisma.reviewTask.update({
where: { id: task.id },
data: { filePath: storageKey },
});
logger.info('[RVW] 任务 filePath 已更新', { taskId: task.id, filePath: storageKey });
} catch (uploadError) {
logger.error('[RVW] 文件上传失败', {
taskId: task.id,
storageKey,
error: uploadError instanceof Error ? uploadError.message : 'Unknown error',
stack: uploadError instanceof Error ? uploadError.stack : undefined,
});
// 上传失败不阻塞任务创建DataForensicsSkill 会优雅降级
}
// 4. 异步提取文档文本(预处理,不运行评估)
extractDocumentAsync(task.id, file, filename).catch(error => {
logger.error('[RVW] 文档提取失败', { taskId: task.id, error: error.message });
});
return task;
return updatedTask;
}
/**
@@ -191,6 +236,7 @@ export async function runReview(params: RunReviewParams): Promise<{ jobId: strin
agents,
extractedText: task.extractedText,
modelType: (task.modelUsed || 'deepseek-v3') as ModelType,
__expireInSeconds: 10 * 60, // 10分钟超时审稿任务通常2-3分钟完成
});
logger.info('[RVW] 审查任务已推送到队列', {
@@ -364,6 +410,10 @@ export async function getTaskReport(userId: string, taskId: string): Promise<Rev
throw new Error(`报告尚未完成,当前状态: ${task.status}`);
}
// 从 contextData 中提取 forensicsResultV2.0 Skills 架构)
const contextData = task.contextData as { forensicsResult?: unknown } | null;
const forensicsResult = contextData?.forensicsResult ?? undefined;
return {
taskId: task.id,
fileName: task.fileName,
@@ -374,6 +424,7 @@ export async function getTaskReport(userId: string, taskId: string): Promise<Rev
overallScore: task.overallScore ?? undefined,
editorialReview: task.editorialReview as unknown as EditorialReview | undefined,
methodologyReview: task.methodologyReview as unknown as MethodologyReview | undefined,
forensicsResult: forensicsResult as ReviewReport['forensicsResult'],
completedAt: task.completedAt ?? undefined,
durationSeconds: task.durationSeconds ?? undefined,
};

View File

@@ -68,7 +68,7 @@ export function getMethodologyStatus(review: MethodologyReview | null | undefine
* @param editorialScore 稿约规范性分数
* @param methodologyScore 方法学分数
* @param agents 选择的智能体
* @returns 综合分数
* @returns 综合分数保留1位小数
*/
export function calculateOverallScore(
editorialScore: number | null | undefined,
@@ -78,18 +78,21 @@ export function calculateOverallScore(
const hasEditorial = agents.includes('editorial') && editorialScore != null;
const hasMethodology = agents.includes('methodology') && methodologyScore != null;
let score: number | null = null;
if (hasEditorial && hasMethodology) {
// 两个都选稿约40% + 方法学60%
return editorialScore! * 0.4 + methodologyScore! * 0.6;
score = editorialScore! * 0.4 + methodologyScore! * 0.6;
} else if (hasEditorial) {
// 只选规范性
return editorialScore!;
score = editorialScore!;
} else if (hasMethodology) {
// 只选方法学
return methodologyScore!;
score = methodologyScore!;
}
return null;
// 修复浮点数精度问题保留1位小数
return score !== null ? Math.round(score * 10) / 10 : null;
}
/**

View File

@@ -121,9 +121,8 @@ export class ContextBuilder {
if (!this.context.taskId) {
errors.push('taskId is required');
}
if (!this.context.documentPath) {
errors.push('documentPath is required');
}
// documentPath 是可选的DataForensicsSkill 需要,但 Editorial/Methodology 不需要
// DataForensicsSkill.canRun() 会检查 documentPath没有就跳过
if (this.context.documentContent === undefined) {
errors.push('documentContent is required');
}
@@ -147,9 +146,8 @@ export class ContextBuilder {
if (!this.context.taskId) {
errors.push('taskId is required');
}
if (!this.context.documentPath) {
errors.push('documentPath is required');
}
// documentPath 是可选的DataForensicsSkill 需要,但 Editorial/Methodology 不需要
// DataForensicsSkill.canRun() 会检查 documentPath没有就跳过
if (this.context.documentContent === undefined) {
errors.push('documentContent is required');
}

View File

@@ -58,19 +58,19 @@ export class SkillExecutor<TContext extends BaseSkillContext = SkillContext> {
...initialContext,
profile,
previousResults: [],
} as TContext;
} as unknown as TContext;
logger.info({
logger.info('[SkillExecutor] Starting pipeline execution', {
taskId: context.taskId,
profileId: profile.id,
pipelineLength: profile.pipeline.length,
}, '[SkillExecutor] Starting pipeline execution');
});
// 遍历 Pipeline
for (const item of profile.pipeline) {
// 跳过禁用的 Skill
if (!item.enabled) {
logger.debug({ skillId: item.skillId }, '[SkillExecutor] Skill disabled, skipping');
logger.debug('[SkillExecutor] Skill disabled, skipping', { skillId: item.skillId });
results.push(this.createSkippedResult(item.skillId, 'Skill disabled in profile'));
continue;
}
@@ -78,20 +78,20 @@ export class SkillExecutor<TContext extends BaseSkillContext = SkillContext> {
// 获取 Skill
const skill = SkillRegistry.get(item.skillId);
if (!skill) {
logger.warn({ skillId: item.skillId }, '[SkillExecutor] Skill not found in registry');
logger.warn('[SkillExecutor] Skill not found in registry', { skillId: item.skillId });
results.push(this.createSkippedResult(item.skillId, 'Skill not found'));
continue;
}
// 前置检查
if (skill.canRun && !skill.canRun(context as SkillContext)) {
logger.info({ skillId: item.skillId }, '[SkillExecutor] Skill pre-check failed, skipping');
if (skill.canRun && !skill.canRun(context as unknown as SkillContext)) {
logger.info('[SkillExecutor] Skill pre-check failed, skipping', { skillId: item.skillId });
results.push(this.createSkippedResult(item.skillId, 'Pre-check failed'));
continue;
}
// 执行 Skill
const result = await this.executeSkill(skill, context as SkillContext, item, profile);
const result = await this.executeSkill(skill, context as unknown as SkillContext, item, profile);
results.push(result);
// 调用完成回调V2.1 扩展点)
@@ -100,7 +100,7 @@ export class SkillExecutor<TContext extends BaseSkillContext = SkillContext> {
await this.config.onSkillComplete(item.skillId, result, context);
} catch (callbackError: unknown) {
const errorMessage = callbackError instanceof Error ? callbackError.message : String(callbackError);
logger.error({ skillId: item.skillId, error: errorMessage }, '[SkillExecutor] onSkillComplete callback failed');
logger.error('[SkillExecutor] onSkillComplete callback failed', { skillId: item.skillId, error: errorMessage });
}
}
@@ -112,7 +112,7 @@ export class SkillExecutor<TContext extends BaseSkillContext = SkillContext> {
// 检查是否需要中断
if (result.status === 'error' && !this.shouldContinue(item, profile)) {
logger.warn({ skillId: item.skillId }, '[SkillExecutor] Skill failed and continueOnError=false, stopping');
logger.warn('[SkillExecutor] Skill failed and continueOnError=false, stopping', { skillId: item.skillId });
break;
}
}
@@ -120,13 +120,13 @@ export class SkillExecutor<TContext extends BaseSkillContext = SkillContext> {
// 生成汇总
const summary = this.buildSummary(context.taskId, profile.id, results, startTime);
logger.info({
logger.info('[SkillExecutor] Pipeline execution completed', {
taskId: context.taskId,
overallStatus: summary.overallStatus,
totalTime: summary.totalExecutionTime,
successCount: summary.successCount,
errorCount: summary.errorCount,
}, '[SkillExecutor] Pipeline execution completed');
});
return summary;
}
@@ -144,23 +144,23 @@ export class SkillExecutor<TContext extends BaseSkillContext = SkillContext> {
const timeoutMultiplier = profile.globalConfig?.timeoutMultiplier ?? 1;
const timeout = Math.round((item.timeout ?? skill.metadata.defaultTimeout ?? this.config.defaultTimeout) * timeoutMultiplier);
logger.info({
logger.info('[SkillExecutor] Executing skill', {
skillId: skill.metadata.id,
taskId: context.taskId,
timeout,
}, '[SkillExecutor] Executing skill');
});
try {
// 带超时执行
const result = await this.executeWithTimeout(skill, context, item.config, timeout);
logger.info({
logger.info('[SkillExecutor] Skill execution completed', {
skillId: skill.metadata.id,
taskId: context.taskId,
status: result.status,
executionTime: result.executionTime,
issueCount: result.issues.length,
}, '[SkillExecutor] Skill execution completed');
});
return result;
} catch (error: unknown) {
@@ -169,11 +169,11 @@ export class SkillExecutor<TContext extends BaseSkillContext = SkillContext> {
// 判断是否超时
if (errorMessage === 'SKILL_TIMEOUT') {
logger.warn({
logger.warn('[SkillExecutor] Skill execution timed out', {
skillId: skill.metadata.id,
taskId: context.taskId,
timeout,
}, '[SkillExecutor] Skill execution timed out');
});
return {
skillId: skill.metadata.id,
@@ -192,11 +192,11 @@ export class SkillExecutor<TContext extends BaseSkillContext = SkillContext> {
}
// 其他错误
logger.error({
logger.error('[SkillExecutor] Skill execution failed', {
skillId: skill.metadata.id,
taskId: context.taskId,
error: errorMessage,
}, '[SkillExecutor] Skill execution failed');
});
return {
skillId: skill.metadata.id,

View File

@@ -7,7 +7,7 @@
* @since 2026-02-18
*/
import { JournalProfile, PipelineItem } from './types.js';
import { JournalProfile } from './types.js';
import { logger } from '../../../../common/logging/index.js';
/**
@@ -34,13 +34,13 @@ export const DEFAULT_PROFILE: JournalProfile = {
skillId: 'EditorialSkill',
enabled: true,
optional: false,
timeout: 45000,
timeout: 180000, // 180 秒
},
{
skillId: 'MethodologySkill',
enabled: true,
optional: false,
timeout: 45000,
timeout: 180000, // 180 秒
},
],
@@ -78,13 +78,13 @@ export const CHINESE_CORE_PROFILE: JournalProfile = {
config: {
standard: 'chinese-core',
},
timeout: 45000,
timeout: 180000, // 180 秒
},
{
skillId: 'MethodologySkill',
enabled: true,
optional: false,
timeout: 45000,
timeout: 180000, // 180 秒
},
],
@@ -154,11 +154,11 @@ export class ProfileResolver {
const profile = PROFILES.get(id);
if (!profile) {
logger.warn({ profileId: id }, '[ProfileResolver] Profile not found, using default');
logger.warn('[ProfileResolver] Profile not found, using default', { profileId: id });
return DEFAULT_PROFILE;
}
logger.debug({ profileId: id }, '[ProfileResolver] Profile resolved');
logger.debug('[ProfileResolver] Profile resolved', { profileId: id });
return profile;
}
@@ -196,10 +196,10 @@ export class ProfileResolver {
enabled: enabledSkills.has(item.skillId),
}));
logger.debug({
logger.debug('[ProfileResolver] Profile built from agents', {
selectedAgents,
enabledSkills: Array.from(enabledSkills),
}, '[ProfileResolver] Profile built from agents');
});
return baseProfile;
}
@@ -223,7 +223,7 @@ export class ProfileResolver {
*/
static register(profile: JournalProfile): void {
PROFILES.set(profile.id, profile);
logger.info({ profileId: profile.id }, '[ProfileResolver] Profile registered');
logger.info('[ProfileResolver] Profile registered', { profileId: profile.id });
}
/**

View File

@@ -24,11 +24,11 @@ class SkillRegistryClass {
const { id, version } = skill.metadata;
if (this.skills.has(id)) {
logger.warn({ skillId: id }, '[SkillRegistry] Skill already registered, overwriting');
logger.warn('[SkillRegistry] Skill already registered, overwriting', { skillId: id });
}
this.skills.set(id, skill);
logger.info({ skillId: id, version }, '[SkillRegistry] Skill registered');
logger.info('[SkillRegistry] Skill registered', { skillId: id, version });
}
/**
@@ -92,7 +92,7 @@ class SkillRegistryClass {
unregister(id: string): boolean {
const result = this.skills.delete(id);
if (result) {
logger.info({ skillId: id }, '[SkillRegistry] Skill unregistered');
logger.info('[SkillRegistry] Skill unregistered', { skillId: id });
}
return result;
}
@@ -118,7 +118,7 @@ class SkillRegistryClass {
*/
markInitialized(): void {
this.initialized = true;
logger.info({ skillCount: this.size }, '[SkillRegistry] Registry initialized');
logger.info('[SkillRegistry] Registry initialized', { skillCount: this.size });
}
/**

View File

@@ -111,7 +111,7 @@ export interface ForensicsResult {
* RVW 模块扩展字段
*/
export interface RvwContextExtras {
documentPath: string;
documentPath?: string; // 可选DataForensicsSkill 需要Editorial/Methodology 不需要
documentContent: string;
documentMeta?: DocumentMeta;
tables?: TableData[];

View File

@@ -20,6 +20,20 @@ import {
} from '../core/types.js';
import { logger } from '../../../../common/logging/index.js';
/**
* execute 方法的返回类型
* 不需要包含 skillId, skillName, startedAt, completedAt, executionTime
* 这些字段由 BaseSkill.run() 自动填充
*/
export type ExecuteResult = {
status: 'success' | 'warning' | 'error';
score?: number;
scoreLabel?: string;
issues: SkillResult['issues'];
data?: unknown;
error?: string;
};
/**
* Skill 基类
* 使用泛型支持不同上下文和配置类型
@@ -39,11 +53,12 @@ export abstract class BaseSkill<
/**
* 子类实现具体逻辑
* 返回值不需要包含 skillId, skillName, startedAt, completedAt, executionTime
*/
abstract execute(
context: TContext,
config?: TConfig
): Promise<Omit<SkillResult, 'skillId' | 'skillName' | 'startedAt' | 'completedAt'>>;
): Promise<ExecuteResult>;
/**
* 执行入口(统一处理日志、计时、配置验证等)
@@ -52,10 +67,10 @@ export abstract class BaseSkill<
const startedAt = new Date();
const startTime = Date.now();
logger.info({
logger.info(`[${this.metadata.id}] Starting execution`, {
skillId: this.metadata.id,
taskId: context.taskId,
}, `[${this.metadata.id}] Starting execution`);
});
try {
// 配置验证(使用 Zod
@@ -64,13 +79,13 @@ export abstract class BaseSkill<
const result = await this.execute(context, validatedConfig);
const executionTime = Date.now() - startTime;
logger.info({
logger.info(`[${this.metadata.id}] Execution completed`, {
skillId: this.metadata.id,
taskId: context.taskId,
status: result.status,
executionTime,
issueCount: result.issues.length,
}, `[${this.metadata.id}] Execution completed`);
});
return {
...result,
@@ -90,15 +105,15 @@ export abstract class BaseSkill<
: SkillErrorCodes.SKILL_EXECUTION_ERROR;
const errorMessage = isValidationError
? `配置验证失败: ${(error as z.ZodError).errors.map(e => e.message).join(', ')}`
? `配置验证失败: ${(error as z.ZodError).issues.map((e: z.ZodIssue) => e.message).join(', ')}`
: `执行失败: ${error instanceof Error ? error.message : String(error)}`;
logger.error({
logger.error(`[${this.metadata.id}] Execution failed`, {
skillId: this.metadata.id,
taskId: context.taskId,
error: error instanceof Error ? error.message : String(error),
errorType,
}, `[${this.metadata.id}] Execution failed`);
});
return {
skillId: this.metadata.id,

View File

@@ -8,11 +8,10 @@
* @since 2026-02-18
*/
import { BaseSkill } from './BaseSkill.js';
import { BaseSkill, ExecuteResult } from './BaseSkill.js';
import {
SkillMetadata,
SkillContext,
SkillResult,
DataForensicsConfigSchema,
DataForensicsConfig,
ForensicsResult,
@@ -23,19 +22,12 @@ import {
IExtractionClient,
ForensicsResult as ClientForensicsResult,
} from '../../../../common/document/ExtractionClient.js';
import { storage } from '../../../../common/storage/index.js';
import { logger } from '../../../../common/logging/index.js';
/**
* 安全:允许的文件存储路径前缀
*/
const ALLOWED_PATH_PREFIXES = [
'/app/uploads/', // Docker 容器内路径
'D:\\MyCursor\\', // 开发环境 Windows
'D:/MyCursor/', // 开发环境 Windows (forward slash)
'/tmp/rvw-uploads/', // 临时目录
'C:\\Users\\', // Windows 用户目录
'/home/', // Linux 用户目录
];
import * as fs from 'fs/promises';
import * as path from 'path';
import * as os from 'os';
import { randomUUID } from 'crypto';
/**
* 数据侦探 Skill
@@ -76,39 +68,35 @@ export class DataForensicsSkill extends BaseSkill<SkillContext, DataForensicsCon
/**
* 前置检查
* 增加路径安全验证(防止路径遍历攻击)
* 验证 documentPathOSS storage key格式
*/
canRun(context: SkillContext): boolean {
if (!context.documentPath) {
logger.warn({ taskId: context.taskId }, '[DataForensicsSkill] No document path');
logger.warn('[DataForensicsSkill] No document path (storageKey)', { taskId: context.taskId });
return false;
}
if (!context.documentPath.toLowerCase().endsWith('.docx')) {
logger.info({ taskId: context.taskId }, '[DataForensicsSkill] Not a .docx file, skipping');
logger.info('[DataForensicsSkill] Not a .docx file, skipping', { taskId: context.taskId });
return false;
}
// 安全检查:路径白名单
const normalizedPath = context.documentPath.replace(/\\/g, '/');
const isPathAllowed = ALLOWED_PATH_PREFIXES.some(prefix => {
const normalizedPrefix = prefix.replace(/\\/g, '/');
return normalizedPath.startsWith(normalizedPrefix);
});
if (!isPathAllowed) {
logger.error({
// 安全检查:OSS key 格式验证tenants/xxx/users/xxx/rvw/xxx/xxx.docx
const isOssKey = context.documentPath.startsWith('tenants/') ||
context.documentPath.startsWith('temp/');
if (!isOssKey) {
logger.warn('[DataForensicsSkill] Invalid storage key format', {
taskId: context.taskId,
documentPath: '[REDACTED]', // 不记录完整路径
}, '[DataForensicsSkill] Document path not in allowed prefixes (security check)');
});
return false;
}
// 检查是否包含路径遍历
if (context.documentPath.includes('..')) {
logger.error({
logger.error('[DataForensicsSkill] Path traversal detected (security check)', {
taskId: context.taskId,
}, '[DataForensicsSkill] Path traversal detected (security check)');
});
return false;
}
@@ -117,23 +105,46 @@ export class DataForensicsSkill extends BaseSkill<SkillContext, DataForensicsCon
/**
* 执行数据侦探
* 从 OSS 下载文件到临时目录,然后调用 Python 服务分析
*/
async execute(
context: SkillContext,
config?: DataForensicsConfig
): Promise<Omit<SkillResult, 'skillId' | 'skillName' | 'startedAt' | 'completedAt'>> {
): Promise<ExecuteResult> {
const checkLevel = config?.checkLevel || 'L1_L2_L25';
const tolerancePercent = config?.tolerancePercent || 0.1;
const storageKey = context.documentPath!;
logger.info({
logger.info('[DataForensicsSkill] Starting analysis', {
taskId: context.taskId,
storageKey,
checkLevel,
tolerancePercent,
}, '[DataForensicsSkill] Starting analysis');
});
// 创建临时文件路径
const tempDir = os.tmpdir();
const tempFilename = `rvw-${randomUUID()}.docx`;
const tempFilePath = path.join(tempDir, tempFilename);
try {
// 使用依赖注入的 client
const result = await this.extractionClient.analyzeDocx(context.documentPath, {
// 1. 从 OSS 下载文件到临时目录
logger.info('[DataForensicsSkill] Downloading file from storage', {
taskId: context.taskId,
storageKey,
tempFilePath,
});
const fileBuffer = await storage.download(storageKey);
await fs.writeFile(tempFilePath, fileBuffer);
logger.info('[DataForensicsSkill] File downloaded successfully', {
taskId: context.taskId,
fileSize: fileBuffer.length,
});
// 2. 调用 Python 服务分析临时文件
const result = await this.extractionClient.analyzeDocx(tempFilePath, {
checkLevel,
tolerancePercent,
});
@@ -159,13 +170,13 @@ export class DataForensicsSkill extends BaseSkill<SkillContext, DataForensicsCon
score = 100;
}
logger.info({
logger.info('[DataForensicsSkill] Analysis completed', {
taskId: context.taskId,
tableCount: forensicsResult.summary.totalTables,
issueCount: forensicsResult.summary.totalIssues,
errorCount: forensicsResult.summary.errorCount,
warningCount: forensicsResult.summary.warningCount,
}, '[DataForensicsSkill] Analysis completed');
});
return {
status,
@@ -178,10 +189,10 @@ export class DataForensicsSkill extends BaseSkill<SkillContext, DataForensicsCon
// 特殊处理Python 服务不可用时的优雅降级
const errorObj = error as NodeJS.ErrnoException;
if (errorObj.code === 'ECONNREFUSED' || errorObj.code === 'ETIMEDOUT') {
logger.warn({
logger.warn('[DataForensicsSkill] Python service unavailable, degrading gracefully', {
taskId: context.taskId,
error: errorObj.message,
}, '[DataForensicsSkill] Python service unavailable, degrading gracefully');
});
return {
status: 'warning',
@@ -199,39 +210,132 @@ export class DataForensicsSkill extends BaseSkill<SkillContext, DataForensicsCon
};
}
// 存储服务错误的优雅降级
if (errorObj.message?.includes('storage') || errorObj.message?.includes('OSS')) {
logger.warn('[DataForensicsSkill] Storage service error, degrading gracefully', {
taskId: context.taskId,
error: errorObj.message,
});
return {
status: 'warning',
issues: [{
severity: 'WARNING',
type: 'STORAGE_ERROR',
message: '文件存储服务暂不可用,已跳过表格验证。',
}],
data: {
tables: [],
methods: [],
issues: [],
summary: { totalTables: 0, totalIssues: 0, errorCount: 0, warningCount: 1 },
} as ForensicsResult,
};
}
throw error;
} finally {
// 3. 清理临时文件
try {
await fs.unlink(tempFilePath);
logger.debug('[DataForensicsSkill] Temp file cleaned up', { tempFilePath });
} catch {
// 忽略清理错误
}
}
}
/**
* 转换 Python 返回的结果为内部格式
*
* Python 返回格式:
* {
* success: boolean,
* methodsFound: string[],
* tables: [{ id, issues: [...], ... }],
* totalIssues: number,
* errorCount: number,
* warningCount: number
* }
*
* 转换为内部格式:
* {
* tables: [...],
* methods: [...],
* issues: [...], // 从 tables[].issues 收集
* summary: { totalTables, totalIssues, errorCount, warningCount }
* }
*/
private convertResult(result: ClientForensicsResult): ForensicsResult {
const issues: Issue[] = result.issues.map(issue => ({
severity: issue.severity,
type: issue.type,
message: issue.message,
location: issue.location,
evidence: issue.evidence,
}));
// 防御性检查
const rawTables = result.tables || [];
// Python 返回的是 methodsFound驼峰也可能是 methods
const rawMethods = (result as any).methodsFound || result.methods || [];
// 从 tables[].issues 中收集所有 issues
const allIssues: Issue[] = [];
for (const table of rawTables) {
const tableIssues = (table as any).issues || [];
for (const issue of tableIssues) {
allIssues.push({
severity: issue.severity,
type: issue.type,
message: issue.message,
location: issue.location,
evidence: issue.evidence,
});
}
}
// 也检查顶层的 issues兼容旧格式
const topLevelIssues = result.issues || [];
for (const issue of topLevelIssues) {
allIssues.push({
severity: issue.severity,
type: issue.type,
message: issue.message,
location: issue.location,
evidence: issue.evidence,
});
}
// 构建 summary从 Python 返回的顶层字段或 summary 对象)
const pyResult = result as any;
const summary = result.summary || {
totalTables: pyResult.totalTables ?? rawTables.length,
totalIssues: pyResult.totalIssues ?? allIssues.length,
errorCount: pyResult.errorCount ?? allIssues.filter(i => i.severity === 'ERROR').length,
warningCount: pyResult.warningCount ?? allIssues.filter(i => i.severity === 'WARNING').length,
};
return {
tables: result.tables.map(t => ({
id: t.id,
caption: t.caption,
data: t.data,
html: t.html,
headers: t.headers,
rowCount: t.rowCount,
colCount: t.colCount,
})),
methods: result.methods,
issues,
tables: rawTables.map(t => {
const tableIssues = ((t as any).issues || []).map((issue: any) => ({
severity: issue.severity,
type: issue.type,
message: issue.message,
location: issue.location,
evidence: issue.evidence,
}));
return {
id: t.id || '',
caption: t.caption || '',
data: t.data || [],
html: t.html || '',
headers: t.headers || [],
rowCount: t.rowCount || 0,
colCount: t.colCount || 0,
issues: tableIssues, // 保留每个表格的 issues
};
}),
methods: rawMethods,
issues: allIssues,
summary: {
totalTables: result.summary.totalTables,
totalIssues: result.summary.totalIssues,
errorCount: result.summary.errorCount,
warningCount: result.summary.warningCount,
totalTables: summary.totalTables ?? rawTables.length,
totalIssues: summary.totalIssues ?? allIssues.length,
errorCount: summary.errorCount ?? 0,
warningCount: summary.warningCount ?? 0,
},
};
}

View File

@@ -8,17 +8,16 @@
* @since 2026-02-18
*/
import { BaseSkill } from './BaseSkill.js';
import { BaseSkill, ExecuteResult } from './BaseSkill.js';
import {
SkillMetadata,
SkillContext,
SkillResult,
EditorialConfigSchema,
EditorialConfig,
Issue,
} from '../core/types.js';
import { reviewEditorialStandards } from '../../services/editorialService.js';
import { EditorialReview, EditorialItem } from '../../types/index.js';
import { EditorialReview } from '../../types/index.js';
import { logger } from '../../../../common/logging/index.js';
/**
@@ -45,7 +44,7 @@ export class EditorialSkill extends BaseSkill<SkillContext, EditorialConfig> {
inputs: ['documentContent'],
outputs: ['editorialResult'],
defaultTimeout: 45000, // 45 秒
defaultTimeout: 180000, // 180 秒LLM 调用可能较慢)
retryable: true,
icon: '📋',
@@ -57,18 +56,18 @@ export class EditorialSkill extends BaseSkill<SkillContext, EditorialConfig> {
*/
canRun(context: SkillContext): boolean {
if (!context.documentContent || context.documentContent.trim().length === 0) {
logger.warn({ taskId: context.taskId }, '[EditorialSkill] No document content');
logger.warn('[EditorialSkill] No document content', { taskId: context.taskId });
return false;
}
// 资源限制检查
const maxLength = DEFAULT_MAX_CONTENT_LENGTH;
if (context.documentContent.length > maxLength) {
logger.warn({
logger.warn('[EditorialSkill] Content too long', {
taskId: context.taskId,
contentLength: context.documentContent.length,
limit: maxLength,
}, '[EditorialSkill] Content too long');
});
return false;
}
@@ -81,23 +80,23 @@ export class EditorialSkill extends BaseSkill<SkillContext, EditorialConfig> {
async execute(
context: SkillContext,
config?: EditorialConfig
): Promise<Omit<SkillResult, 'skillId' | 'skillName' | 'startedAt' | 'completedAt'>> {
): Promise<ExecuteResult> {
const maxContentLength = config?.maxContentLength || DEFAULT_MAX_CONTENT_LENGTH;
logger.info({
logger.info('[EditorialSkill] Starting evaluation', {
taskId: context.taskId,
contentLength: context.documentContent.length,
}, '[EditorialSkill] Starting evaluation');
});
// 截断过长内容
let content = context.documentContent;
if (content.length > maxContentLength) {
content = content.substring(0, maxContentLength);
logger.warn({
logger.warn('[EditorialSkill] Content truncated', {
taskId: context.taskId,
originalLength: context.documentContent.length,
truncatedLength: maxContentLength,
}, '[EditorialSkill] Content truncated');
});
}
// 调用现有 editorialService
@@ -119,13 +118,13 @@ export class EditorialSkill extends BaseSkill<SkillContext, EditorialConfig> {
status = 'success';
}
logger.info({
logger.info('[EditorialSkill] Evaluation completed', {
taskId: context.taskId,
score: result.overall_score,
itemCount: result.items.length,
errorCount,
warningCount,
}, '[EditorialSkill] Evaluation completed');
});
return {
status,

View File

@@ -8,17 +8,16 @@
* @since 2026-02-18
*/
import { BaseSkill } from './BaseSkill.js';
import { BaseSkill, ExecuteResult } from './BaseSkill.js';
import {
SkillMetadata,
SkillContext,
SkillResult,
MethodologyConfigSchema,
MethodologyConfig,
Issue,
} from '../core/types.js';
import { reviewMethodology } from '../../services/methodologyService.js';
import { MethodologyReview, MethodologyIssue } from '../../types/index.js';
import { MethodologyReview } from '../../types/index.js';
import { logger } from '../../../../common/logging/index.js';
/**
@@ -45,7 +44,7 @@ export class MethodologySkill extends BaseSkill<SkillContext, MethodologyConfig>
inputs: ['documentContent', 'methods'],
outputs: ['methodologyResult'],
defaultTimeout: 45000, // 45 秒
defaultTimeout: 180000, // 180 秒(方法学分析需要更长时间)
retryable: true,
icon: '🔬',
@@ -57,18 +56,18 @@ export class MethodologySkill extends BaseSkill<SkillContext, MethodologyConfig>
*/
canRun(context: SkillContext): boolean {
if (!context.documentContent || context.documentContent.trim().length === 0) {
logger.warn({ taskId: context.taskId }, '[MethodologySkill] No document content');
logger.warn('[MethodologySkill] No document content', { taskId: context.taskId });
return false;
}
// 资源限制检查
const maxLength = DEFAULT_MAX_CONTENT_LENGTH;
if (context.documentContent.length > maxLength) {
logger.warn({
logger.warn('[MethodologySkill] Content too long', {
taskId: context.taskId,
contentLength: context.documentContent.length,
limit: maxLength,
}, '[MethodologySkill] Content too long');
});
return false;
}
@@ -81,34 +80,34 @@ export class MethodologySkill extends BaseSkill<SkillContext, MethodologyConfig>
async execute(
context: SkillContext,
config?: MethodologyConfig
): Promise<Omit<SkillResult, 'skillId' | 'skillName' | 'startedAt' | 'completedAt'>> {
): Promise<ExecuteResult> {
const maxContentLength = config?.maxContentLength || DEFAULT_MAX_CONTENT_LENGTH;
logger.info({
logger.info('[MethodologySkill] Starting evaluation', {
taskId: context.taskId,
contentLength: context.documentContent.length,
detectedMethods: context.methods?.length || 0,
}, '[MethodologySkill] Starting evaluation');
});
// 截断过长内容
let content = context.documentContent;
if (content.length > maxContentLength) {
content = content.substring(0, maxContentLength);
logger.warn({
logger.warn('[MethodologySkill] Content truncated', {
taskId: context.taskId,
originalLength: context.documentContent.length,
truncatedLength: maxContentLength,
}, '[MethodologySkill] Content truncated');
});
}
// 如果 DataForensicsSkill 提取了统计方法,可以添加到 prompt 中
// 目前 reviewMethodology 不支持此参数,留作未来扩展
const methodsHint = context.methods?.join(', ') || '';
if (methodsHint) {
logger.debug({
logger.debug('[MethodologySkill] Using detected methods as hint', {
taskId: context.taskId,
methodsHint,
}, '[MethodologySkill] Using detected methods as hint');
});
}
// 调用现有 methodologyService
@@ -130,13 +129,13 @@ export class MethodologySkill extends BaseSkill<SkillContext, MethodologyConfig>
status = 'success';
}
logger.info({
logger.info('[MethodologySkill] Evaluation completed', {
taskId: context.taskId,
score: result.overall_score,
partCount: result.parts.length,
errorCount,
warningCount,
}, '[MethodologySkill] Evaluation completed');
});
return {
status,

View File

@@ -0,0 +1,103 @@
/**
* RVW Skills 架构 - 快速验证脚本
*
* 运行方式: npx tsx src/modules/rvw/skills/test-skills.ts
*/
import { SkillRegistry } from './core/registry.js';
import { ProfileResolver, DEFAULT_PROFILE } from './core/profile.js';
import { ContextBuilder } from './core/context.js';
import { SkillExecutor } from './core/executor.js';
import { registerBuiltinSkills } from './library/index.js';
// 注册内置 Skills
registerBuiltinSkills();
async function main() {
console.log('='.repeat(60));
console.log('RVW Skills V2.0 架构验证');
console.log('='.repeat(60));
// 1. 测试 SkillRegistry
console.log('\n📋 1. SkillRegistry 验证');
console.log('-'.repeat(40));
const summary = SkillRegistry.getSummary();
console.log(` 已初始化: ${summary.initialized}`);
console.log(` 注册 Skills 数量: ${summary.skillCount}`);
console.log(` 分类统计:`, summary.categories);
const allSkills = SkillRegistry.getAllMetadata();
console.log('\n 已注册的 Skills:');
for (const skill of allSkills) {
console.log(` - ${skill.id} (${skill.name}) v${skill.version}`);
}
// 2. 测试 ProfileResolver
console.log('\n📋 2. ProfileResolver 验证');
console.log('-'.repeat(40));
const defaultProfile = ProfileResolver.resolve('default');
console.log(` 默认 Profile: ${defaultProfile.name}`);
console.log(` Pipeline 长度: ${defaultProfile.pipeline.length}`);
console.log(` Pipeline Skills:`);
for (const item of defaultProfile.pipeline) {
console.log(` - ${item.skillId} (enabled: ${item.enabled}, optional: ${item.optional})`);
}
// 测试动态 Profile
const dynamicProfile = ProfileResolver.resolveFromAgents(['editorial', 'methodology']);
console.log(`\n 动态 Profile (editorial + methodology):`);
const enabledSkills = dynamicProfile.pipeline.filter(p => p.enabled);
console.log(` 启用的 Skills: ${enabledSkills.map(p => p.skillId).join(', ')}`);
// 3. 测试 ContextBuilder
console.log('\n📋 3. ContextBuilder 验证');
console.log('-'.repeat(40));
const context = new ContextBuilder()
.taskId('test-task-123')
.userId('test-user-456')
.documentPath('D:/MyCursor/test/document.docx') // 使用允许的路径前缀
.documentContent('这是一篇测试论文的内容...')
.profile(defaultProfile)
.build();
console.log(` taskId: ${context.taskId}`);
console.log(` userId: ${context.userId}`);
console.log(` documentPath: ${context.documentPath}`);
console.log(` documentContent 长度: ${context.documentContent.length}`);
// 4. 测试 canRun 检查
console.log('\n📋 4. Skill canRun 检查');
console.log('-'.repeat(40));
for (const skill of SkillRegistry.getAll()) {
const canRun = skill.canRun ? skill.canRun(context) : true;
console.log(` ${skill.metadata.id}: canRun = ${canRun}`);
}
// 5. 验证总结
console.log('\n' + '='.repeat(60));
console.log('✅ Skills 架构核心组件验证完成!');
console.log('='.repeat(60));
// 检查是否有问题
if (summary.skillCount < 3) {
console.log('\n⚠ 警告: 注册的 Skills 数量少于预期 (预期 3 个)');
}
if (!SkillRegistry.has('DataForensicsSkill')) {
console.log('⚠️ 警告: DataForensicsSkill 未注册');
}
if (!SkillRegistry.has('EditorialSkill')) {
console.log('⚠️ 警告: EditorialSkill 未注册');
}
if (!SkillRegistry.has('MethodologySkill')) {
console.log('⚠️ 警告: MethodologySkill 未注册');
}
console.log('\n下一步: 启动后端服务,通过 API 测试完整流程');
}
main().catch(console.error);

View File

@@ -65,6 +65,45 @@ export interface MethodologyReview {
parts: MethodologyPart[];
}
// ==================== 数据验证DataForensics ====================
export interface ForensicsIssue {
severity: 'ERROR' | 'WARNING' | 'INFO';
type: string;
message: string;
location?: {
tableId?: string;
cellRef?: string;
paragraph?: number;
};
evidence?: Record<string, unknown>;
}
export interface ForensicsTable {
id: string;
caption: string;
html: string;
data: string[][];
headers: string[];
rowCount: number;
colCount: number;
skipped?: boolean;
skipReason?: string;
issues: ForensicsIssue[];
}
export interface ForensicsResult {
tables: ForensicsTable[];
methods: string[];
issues: ForensicsIssue[];
summary: {
totalTables: number;
totalIssues: number;
errorCount: number;
warningCount: number;
};
}
// ==================== 请求参数 ====================
/**
@@ -142,6 +181,7 @@ export interface ReviewReport {
overallScore?: number;
editorialReview?: EditorialReview;
methodologyReview?: MethodologyReview;
forensicsResult?: ForensicsResult;
completedAt?: Date;
durationSeconds?: number;
}

View File

@@ -65,16 +65,50 @@ function ensureSkillsInitialized() {
}
}
/**
* 清理卡住的任务(启动时调用)
* 当服务重启时,之前正在执行的任务会卡在 'reviewing' 状态
*/
async function cleanupStuckTasks(): Promise<void> {
try {
const stuckTasks = await prisma.reviewTask.updateMany({
where: {
status: {
in: ['reviewing', 'reviewing_editorial', 'reviewing_methodology'],
},
},
data: {
status: 'failed',
errorMessage: '服务重启导致任务中断,请重新提交',
},
});
if (stuckTasks.count > 0) {
logger.warn('[reviewWorker] Cleaned up stuck tasks on startup', {
count: stuckTasks.count,
});
console.log(`⚠️ 启动时清理了 ${stuckTasks.count} 个卡住的任务`);
}
} catch (error) {
logger.error('[reviewWorker] Failed to cleanup stuck tasks', {
error: error instanceof Error ? error.message : String(error),
});
}
}
/**
* 注册审查 Worker 到队列
*
* 此函数应在应用启动时调用index.ts
*/
export function registerReviewWorker() {
export async function registerReviewWorker() {
logger.info('[reviewWorker] Registering reviewWorker', {
useSkillsArchitecture: USE_SKILLS_ARCHITECTURE,
});
// 清理卡住的任务
await cleanupStuckTasks();
// 初始化 Skills
ensureSkillsInitialized();
@@ -113,6 +147,15 @@ export function registerReviewWorker() {
},
});
// 调试日志:检查 filePath
logger.info('[reviewWorker] Task info from DB', {
taskId,
filePath: existingTask?.filePath || '(empty)',
fileName: existingTask?.fileName,
fileSize: existingTask?.fileSize,
});
console.log(` 📁 filePath: ${existingTask?.filePath || '(空)'}`);
if (existingTask?.status === 'completed' && existingTask.completedAt) {
logger.warn('[reviewWorker] ⚠️ Task already completed, skipping', {
jobId: job.id,
@@ -223,8 +266,7 @@ export function registerReviewWorker() {
// ========================================
logger.info('[reviewWorker] Updating task result', { taskId });
// 构建 Skills 执行摘要V2.0 新增,存储到 picoExtract 字段)
// 注意picoExtract 字段暂时复用,未来迁移后移到专用字段
// 构建 Skills 执行摘要V2.0 新增,存储到专用 contextData 字段)
const skillsContext = USE_SKILLS_ARCHITECTURE && skillsSummary
? {
version: '2.0',
@@ -246,7 +288,7 @@ export function registerReviewWorker() {
status: 'completed',
editorialReview: editorialResult as unknown as Prisma.InputJsonValue ?? Prisma.JsonNull,
methodologyReview: methodologyResult as unknown as Prisma.InputJsonValue ?? Prisma.JsonNull,
picoExtract: skillsContext as unknown as Prisma.InputJsonValue ?? Prisma.JsonNull,
contextData: skillsContext as unknown as Prisma.InputJsonValue ?? Prisma.JsonNull,
overallScore,
editorialScore: editorialScore,
methodologyScore: methodologyScore,