From 9f256c4a021169301ffd7e5beaa9f7cf869542dc Mon Sep 17 00:00:00 2001 From: HaHafeng Date: Wed, 18 Feb 2026 10:09:40 +0800 Subject: [PATCH] feat(rvw): implement Skills architecture (Day 7-10) - Add Skills core framework (types, registry, executor, profile, context) - Implement DataForensicsSkill with DI, path security, graceful degradation - Implement EditorialSkill and MethodologySkill wrapping existing services - Extend ExtractionClient with IExtractionClient interface and analyzeDocx - Refactor reviewWorker to support V1/V2 architecture switching - Add Zod config validation and generic type support - Update development docs and module status Day 7: Skills core framework (~700 lines) Day 8: DataForensicsSkill + ExtractionClient extension (~400 lines) Day 9: EditorialSkill + MethodologySkill (~350 lines) Day 10: ReviewWorker integration (~280 lines) Co-authored-by: Cursor --- .../src/common/document/ExtractionClient.ts | 131 +- .../src/modules/rvw/skills/core/context.ts | 224 ++ .../src/modules/rvw/skills/core/executor.ts | 333 +++ backend/src/modules/rvw/skills/core/index.ts | 31 + .../src/modules/rvw/skills/core/profile.ts | 258 ++ .../src/modules/rvw/skills/core/registry.ts | 163 ++ backend/src/modules/rvw/skills/core/types.ts | 333 +++ backend/src/modules/rvw/skills/index.ts | 12 + .../modules/rvw/skills/library/BaseSkill.ts | 154 + .../rvw/skills/library/DataForensicsSkill.ts | 241 ++ .../rvw/skills/library/EditorialSkill.ts | 187 ++ .../rvw/skills/library/MethodologySkill.ts | 191 ++ .../src/modules/rvw/skills/library/index.ts | 54 + .../src/modules/rvw/workers/reviewWorker.ts | 267 +- .../00-系统当前状态与开发指南.md | 21 +- .../RVW-稿件审查系统/00-模块当前状态与开发指南.md | 19 +- .../04-开发计划/RVW V2.0 Skills 架构技术设计文档.md | 2601 +++++++++++++++++ ...围术期大量输血的术前危险因素分析及输血策略2月27 - 副本.docx | Bin 162 -> 0 bytes .../2026-02-18-Day7-10-Skills架构开发记录.md | 331 +++ .../06-开发记录/RVW V2.0 Skills 架构深度审查报告.md | 124 + 20 files changed, 5603 insertions(+), 72 deletions(-) create mode 100644 backend/src/modules/rvw/skills/core/context.ts create mode 100644 backend/src/modules/rvw/skills/core/executor.ts create mode 100644 backend/src/modules/rvw/skills/core/index.ts create mode 100644 backend/src/modules/rvw/skills/core/profile.ts create mode 100644 backend/src/modules/rvw/skills/core/registry.ts create mode 100644 backend/src/modules/rvw/skills/core/types.ts create mode 100644 backend/src/modules/rvw/skills/index.ts create mode 100644 backend/src/modules/rvw/skills/library/BaseSkill.ts create mode 100644 backend/src/modules/rvw/skills/library/DataForensicsSkill.ts create mode 100644 backend/src/modules/rvw/skills/library/EditorialSkill.ts create mode 100644 backend/src/modules/rvw/skills/library/MethodologySkill.ts create mode 100644 backend/src/modules/rvw/skills/library/index.ts create mode 100644 docs/03-业务模块/RVW-稿件审查系统/04-开发计划/RVW V2.0 Skills 架构技术设计文档.md delete mode 100644 docs/03-业务模块/RVW-稿件审查系统/05-测试文档/~$瘤患者围术期大量输血的术前危险因素分析及输血策略2月27 - 副本.docx create mode 100644 docs/03-业务模块/RVW-稿件审查系统/06-开发记录/2026-02-18-Day7-10-Skills架构开发记录.md create mode 100644 docs/03-业务模块/RVW-稿件审查系统/06-开发记录/RVW V2.0 Skills 架构深度审查报告.md diff --git a/backend/src/common/document/ExtractionClient.ts b/backend/src/common/document/ExtractionClient.ts index 2cba6deb..ab2bfd07 100644 --- a/backend/src/common/document/ExtractionClient.ts +++ b/backend/src/common/document/ExtractionClient.ts @@ -1,9 +1,13 @@ import FormData from 'form-data'; import axios from 'axios'; +import * as fs from 'fs'; +import * as path from 'path'; /** * Extraction Service Client * 调用Python微服务进行文档提取 + * + * @version 2.0.0 - 新增数据侦探 API (analyzeDocx) */ const EXTRACTION_SERVICE_URL = process.env.EXTRACTION_SERVICE_URL || 'http://localhost:8000'; @@ -22,12 +26,78 @@ export interface ExtractionResult { file_size?: number; page_count?: number; has_tables?: boolean; - [key: string]: any; + [key: string]: unknown; }; error?: string; } -class ExtractionClient { +/** + * 数据侦探结果(Python 返回) + */ +export interface ForensicsResult { + tables: ForensicsTable[]; + methods: string[]; + issues: ForensicsIssue[]; + summary: { + totalTables: number; + totalIssues: number; + errorCount: number; + warningCount: number; + }; +} + +export interface ForensicsTable { + id: string; + caption: string; + data: string[][]; + html?: string; + headers?: string[]; + rowCount: number; + colCount: number; +} + +export interface ForensicsIssue { + severity: 'ERROR' | 'WARNING' | 'INFO'; + type: string; + message: string; + location?: { + tableId?: string; + cellRef?: string; + paragraph?: number; + lineRange?: [number, number]; + }; + evidence?: { + expected?: string | number; + actual?: string | number; + formula?: string; + [key: string]: unknown; + }; +} + +/** + * 数据侦探配置 + */ +export interface ForensicsConfig { + checkLevel: 'L1' | 'L1_L2' | 'L1_L2_L25'; + tolerancePercent: number; +} + +/** + * IExtractionClient 接口 + * 用于依赖注入,便于测试 Mock + */ +export interface IExtractionClient { + health(): Promise<{ status: string; checks: unknown; timestamp: string }>; + extractDocument(file: Buffer, filename: string): Promise; + extractPdf(file: Buffer, filename: string, method?: 'auto' | 'nougat' | 'pymupdf'): Promise; + extractDocx(file: Buffer, filename: string): Promise; + extractTxt(file: Buffer, filename: string): Promise; + detectLanguage(file: Buffer, filename: string): Promise<{ language: string; chinese_ratio: number; chinese_chars: number; total_chars: number }>; + getPdfStrategy(file: Buffer, filename: string): Promise<{ detected_language: string; recommended_method: string; reason: string; nougat_available: boolean }>; + analyzeDocx(filePath: string, config: ForensicsConfig): Promise; +} + +class ExtractionClient implements IExtractionClient { private baseUrl: string; constructor(baseUrl: string = EXTRACTION_SERVICE_URL) { @@ -260,6 +330,63 @@ class ExtractionClient { throw new Error('Get PDF strategy failed'); } } + + /** + * 🆕 数据侦探 API - 分析 Word 文档 + * 提取表格并进行数据验证(L1 算术 + L2 统计 + L2.5 一致性) + * + * @param filePath 文件路径(服务端路径) + * @param config 侦探配置 + * @returns 侦探结果 + */ + async analyzeDocx( + filePath: string, + config: ForensicsConfig + ): Promise { + try { + // 读取文件 + const file = fs.readFileSync(filePath); + const filename = path.basename(filePath); + + const formData = new FormData(); + formData.append('file', file, filename); + formData.append('check_level', config.checkLevel); + formData.append('tolerance_percent', config.tolerancePercent.toString()); + + const response = await axios.post( + `${this.baseUrl}/api/v1/forensics/analyze`, + formData, + { + headers: { + ...formData.getHeaders(), + }, + timeout: 60000, // 60 秒超时 + } + ); + + return response.data; + } catch (error) { + console.error('[ExtractionClient] Forensics analysis failed:', error); + + if (axios.isAxiosError(error)) { + if (error.code === 'ECONNREFUSED') { + const err = new Error('Forensics service unavailable'); + (err as NodeJS.ErrnoException).code = 'ECONNREFUSED'; + throw err; + } + if (error.code === 'ETIMEDOUT') { + const err = new Error('Forensics service timeout'); + (err as NodeJS.ErrnoException).code = 'ETIMEDOUT'; + throw err; + } + if (error.response) { + throw new Error(`Forensics analysis failed: ${error.response.data.detail || error.message}`); + } + } + + throw new Error('Forensics analysis failed'); + } + } } // 导出类和单例 diff --git a/backend/src/modules/rvw/skills/core/context.ts b/backend/src/modules/rvw/skills/core/context.ts new file mode 100644 index 00000000..68feab49 --- /dev/null +++ b/backend/src/modules/rvw/skills/core/context.ts @@ -0,0 +1,224 @@ +/** + * RVW V2.0 Skills 架构 - 上下文管理 + * + * 提供上下文构建和管理功能 + * + * @version 2.0.0 + * @since 2026-02-18 + */ + +import { + SkillContext, + SkillResult, + TableData, + JournalProfile, + DocumentMeta, +} from './types.js'; + +/** + * 上下文构建器 + * 辅助创建和管理 SkillContext + */ +export class ContextBuilder { + private context: Partial; + + constructor() { + this.context = { + previousResults: [], + }; + } + + /** + * 设置任务 ID + */ + taskId(taskId: string): this { + this.context.taskId = taskId; + return this; + } + + /** + * 设置用户 ID + */ + userId(userId?: string): this { + this.context.userId = userId; + return this; + } + + /** + * 设置文档路径 + */ + documentPath(path: string): this { + this.context.documentPath = path; + return this; + } + + /** + * 设置文档内容 + */ + documentContent(content: string): this { + this.context.documentContent = content; + return this; + } + + /** + * 设置文档元信息 + */ + documentMeta(meta: DocumentMeta): this { + this.context.documentMeta = meta; + return this; + } + + /** + * 设置 Profile + */ + profile(profile: JournalProfile): this { + this.context.profile = profile; + return this; + } + + /** + * 设置表格数据(通常由 DataForensicsSkill 填充) + */ + tables(tables: TableData[]): this { + this.context.tables = tables; + return this; + } + + /** + * 设置检测到的统计方法 + */ + methods(methods: string[]): this { + this.context.methods = methods; + return this; + } + + /** + * 添加前置结果 + */ + addPreviousResult(result: SkillResult): this { + if (!this.context.previousResults) { + this.context.previousResults = []; + } + this.context.previousResults.push(result); + return this; + } + + /** + * 设置前置结果列表 + */ + previousResults(results: SkillResult[]): this { + this.context.previousResults = results; + return this; + } + + /** + * 构建上下文 + */ + build(): SkillContext { + // 验证必填字段 + const errors: string[] = []; + + if (!this.context.taskId) { + errors.push('taskId is required'); + } + if (!this.context.documentPath) { + errors.push('documentPath is required'); + } + if (this.context.documentContent === undefined) { + errors.push('documentContent is required'); + } + if (!this.context.profile) { + errors.push('profile is required'); + } + + if (errors.length > 0) { + throw new Error(`ContextBuilder validation failed: ${errors.join(', ')}`); + } + + return this.context as SkillContext; + } + + /** + * 构建部分上下文(用于 Executor) + */ + buildPartial(): Omit { + const errors: string[] = []; + + if (!this.context.taskId) { + errors.push('taskId is required'); + } + if (!this.context.documentPath) { + errors.push('documentPath is required'); + } + if (this.context.documentContent === undefined) { + errors.push('documentContent is required'); + } + + if (errors.length > 0) { + throw new Error(`ContextBuilder validation failed: ${errors.join(', ')}`); + } + + const { profile, previousResults, ...partial } = this.context; + return partial as Omit; + } + + /** + * 重置构建器 + */ + reset(): this { + this.context = { + previousResults: [], + }; + return this; + } +} + +/** + * 任务数据接口(用于 createContextFromTask) + */ +export interface TaskData { + id: string; + userId: string; + filePath: string; + content?: string | null; + fileName: string; // 对应数据库 file_name 字段 + fileSize?: number | null; +} + +/** + * 从数据库任务记录创建上下文 + */ +export function createContextFromTask( + task: TaskData, + profile: JournalProfile +): SkillContext { + return new ContextBuilder() + .taskId(task.id) + .userId(task.userId) + .documentPath(task.filePath) + .documentContent(task.content || '') + .documentMeta({ + filename: task.fileName, + fileSize: task.fileSize || 0, + }) + .profile(profile) + .build(); +} + +/** + * 从数据库任务记录创建部分上下文(用于 Executor) + */ +export function createPartialContextFromTask( + task: TaskData +): Omit { + return new ContextBuilder() + .taskId(task.id) + .userId(task.userId) + .documentPath(task.filePath) + .documentContent(task.content || '') + .documentMeta({ + filename: task.fileName, + fileSize: task.fileSize || 0, + }) + .buildPartial(); +} diff --git a/backend/src/modules/rvw/skills/core/executor.ts b/backend/src/modules/rvw/skills/core/executor.ts new file mode 100644 index 00000000..def2e72e --- /dev/null +++ b/backend/src/modules/rvw/skills/core/executor.ts @@ -0,0 +1,333 @@ +/** + * RVW V2.0 Skills 架构 - Skill 执行引擎 + * + * 负责按 Profile 配置顺序执行 Skills,支持超时熔断和故障隔离 + * + * @version 2.0.0 + * @since 2026-02-18 + */ + +import { + Skill, + SkillContext, + SkillResult, + SkillConfig, + ExecutorConfig, + ExecutionSummary, + PipelineItem, + JournalProfile, + BaseSkillContext, + SkillErrorCodes, +} from './types.js'; +import { SkillRegistry } from './registry.js'; +import { logger } from '../../../../common/logging/index.js'; + +/** + * 默认执行器配置 + */ +const DEFAULT_EXECUTOR_CONFIG: ExecutorConfig = { + defaultTimeout: 30000, // 30 秒 + maxRetries: 0, + retryDelay: 1000, + continueOnError: true, + logLevel: 'info', +}; + +/** + * Skill 执行引擎 + */ +export class SkillExecutor { + private config: ExecutorConfig; + + constructor(config?: Partial>) { + this.config = { ...DEFAULT_EXECUTOR_CONFIG, ...config } as ExecutorConfig; + } + + /** + * 执行 Pipeline + */ + async execute( + profile: JournalProfile, + initialContext: Omit + ): Promise { + const startTime = Date.now(); + const results: SkillResult[] = []; + + // 构建完整上下文 + const context = { + ...initialContext, + profile, + previousResults: [], + } as TContext; + + logger.info({ + taskId: context.taskId, + profileId: profile.id, + pipelineLength: profile.pipeline.length, + }, '[SkillExecutor] Starting pipeline execution'); + + // 遍历 Pipeline + for (const item of profile.pipeline) { + // 跳过禁用的 Skill + if (!item.enabled) { + logger.debug({ skillId: item.skillId }, '[SkillExecutor] Skill disabled, skipping'); + results.push(this.createSkippedResult(item.skillId, 'Skill disabled in profile')); + continue; + } + + // 获取 Skill + const skill = SkillRegistry.get(item.skillId); + if (!skill) { + logger.warn({ skillId: item.skillId }, '[SkillExecutor] Skill not found in registry'); + results.push(this.createSkippedResult(item.skillId, 'Skill not found')); + continue; + } + + // 前置检查 + if (skill.canRun && !skill.canRun(context as SkillContext)) { + logger.info({ skillId: item.skillId }, '[SkillExecutor] Skill pre-check failed, skipping'); + results.push(this.createSkippedResult(item.skillId, 'Pre-check failed')); + continue; + } + + // 执行 Skill + const result = await this.executeSkill(skill, context as SkillContext, item, profile); + results.push(result); + + // 调用完成回调(V2.1 扩展点) + if (this.config.onSkillComplete) { + try { + await this.config.onSkillComplete(item.skillId, result, context); + } catch (callbackError: unknown) { + const errorMessage = callbackError instanceof Error ? callbackError.message : String(callbackError); + logger.error({ skillId: item.skillId, error: errorMessage }, '[SkillExecutor] onSkillComplete callback failed'); + } + } + + // 更新上下文(传递给后续 Skills) + context.previousResults.push(result); + + // 更新共享数据 + this.updateContextWithResult(context, skill, result); + + // 检查是否需要中断 + if (result.status === 'error' && !this.shouldContinue(item, profile)) { + logger.warn({ skillId: item.skillId }, '[SkillExecutor] Skill failed and continueOnError=false, stopping'); + break; + } + } + + // 生成汇总 + const summary = this.buildSummary(context.taskId, profile.id, results, startTime); + + logger.info({ + taskId: context.taskId, + overallStatus: summary.overallStatus, + totalTime: summary.totalExecutionTime, + successCount: summary.successCount, + errorCount: summary.errorCount, + }, '[SkillExecutor] Pipeline execution completed'); + + return summary; + } + + /** + * 执行单个 Skill(带超时和重试) + */ + private async executeSkill( + skill: Skill, + context: SkillContext, + item: PipelineItem, + profile: JournalProfile + ): Promise { + const startedAt = new Date(); + const timeoutMultiplier = profile.globalConfig?.timeoutMultiplier ?? 1; + const timeout = Math.round((item.timeout ?? skill.metadata.defaultTimeout ?? this.config.defaultTimeout) * timeoutMultiplier); + + logger.info({ + skillId: skill.metadata.id, + taskId: context.taskId, + timeout, + }, '[SkillExecutor] Executing skill'); + + try { + // 带超时执行 + const result = await this.executeWithTimeout(skill, context, item.config, timeout); + + logger.info({ + skillId: skill.metadata.id, + taskId: context.taskId, + status: result.status, + executionTime: result.executionTime, + issueCount: result.issues.length, + }, '[SkillExecutor] Skill execution completed'); + + return result; + } catch (error: unknown) { + const executionTime = Date.now() - startedAt.getTime(); + const errorMessage = error instanceof Error ? error.message : String(error); + + // 判断是否超时 + if (errorMessage === 'SKILL_TIMEOUT') { + logger.warn({ + skillId: skill.metadata.id, + taskId: context.taskId, + timeout, + }, '[SkillExecutor] Skill execution timed out'); + + return { + skillId: skill.metadata.id, + skillName: skill.metadata.name, + status: 'timeout', + issues: [{ + severity: 'WARNING', + type: SkillErrorCodes.SKILL_TIMEOUT, + message: `${skill.metadata.name} 执行超时 (${timeout}ms),已跳过`, + }], + executionTime: timeout, + timedOut: true, + startedAt, + completedAt: new Date(), + }; + } + + // 其他错误 + logger.error({ + skillId: skill.metadata.id, + taskId: context.taskId, + error: errorMessage, + }, '[SkillExecutor] Skill execution failed'); + + return { + skillId: skill.metadata.id, + skillName: skill.metadata.name, + status: 'error', + issues: [{ + severity: 'ERROR', + type: SkillErrorCodes.SKILL_EXECUTION_ERROR, + message: `${skill.metadata.name} 执行失败: ${errorMessage}`, + }], + executionTime, + error: errorMessage, + startedAt, + completedAt: new Date(), + }; + } + } + + /** + * 带超时执行 + */ + private async executeWithTimeout( + skill: Skill, + context: SkillContext, + config: SkillConfig | undefined, + timeout: number + ): Promise { + return Promise.race([ + skill.run(context, config), + new Promise((_, reject) => + setTimeout(() => reject(new Error('SKILL_TIMEOUT')), timeout) + ), + ]); + } + + /** + * 根据 Skill 结果更新上下文 + */ + private updateContextWithResult(context: TContext, skill: Skill, result: SkillResult): void { + // DataForensicsSkill 的特殊处理 + if (skill.metadata.id === 'DataForensicsSkill' && result.status !== 'error') { + const rvwContext = context as unknown as SkillContext; + const data = result.data as { + tables?: unknown[]; + methods?: string[]; + } | undefined; + + if (data) { + if (data.tables) { + rvwContext.tables = data.tables as SkillContext['tables']; + } + if (data.methods) { + rvwContext.methods = data.methods; + } + rvwContext.forensicsResult = data as SkillContext['forensicsResult']; + } + } + } + + /** + * 创建跳过结果 + */ + private createSkippedResult(skillId: string, reason: string): SkillResult { + const now = new Date(); + return { + skillId, + skillName: skillId, + status: 'skipped', + issues: [{ + severity: 'INFO', + type: 'SKILL_SKIPPED', + message: reason, + }], + executionTime: 0, + startedAt: now, + completedAt: now, + }; + } + + /** + * 判断是否继续执行 + */ + private shouldContinue(item: PipelineItem, profile: JournalProfile): boolean { + if (item.optional) return true; + return profile.globalConfig?.continueOnError ?? this.config.continueOnError; + } + + /** + * 构建执行汇总 + */ + private buildSummary( + taskId: string, + profileId: string, + results: SkillResult[], + startTime: number + ): ExecutionSummary { + const completedAt = new Date(); + const totalExecutionTime = Date.now() - startTime; + + const successCount = results.filter(r => r.status === 'success').length; + const warningCount = results.filter(r => r.status === 'warning').length; + const errorCount = results.filter(r => r.status === 'error').length; + const skippedCount = results.filter(r => r.status === 'skipped').length; + const timeoutCount = results.filter(r => r.status === 'timeout').length; + + let overallStatus: 'success' | 'partial' | 'failed'; + if (errorCount === 0 && timeoutCount === 0) { + overallStatus = 'success'; + } else if (successCount > 0) { + overallStatus = 'partial'; + } else { + overallStatus = 'failed'; + } + + return { + taskId, + profileId, + overallStatus, + totalSkills: results.length, + successCount, + warningCount, + errorCount, + skippedCount, + timeoutCount, + results, + totalExecutionTime, + startedAt: new Date(startTime), + completedAt, + }; + } +} + +// 导出默认实例 +export const defaultExecutor = new SkillExecutor(); diff --git a/backend/src/modules/rvw/skills/core/index.ts b/backend/src/modules/rvw/skills/core/index.ts new file mode 100644 index 00000000..24364fc5 --- /dev/null +++ b/backend/src/modules/rvw/skills/core/index.ts @@ -0,0 +1,31 @@ +/** + * RVW V2.0 Skills 架构 - Core 模块统一导出 + * + * @version 2.0.0 + * @since 2026-02-18 + */ + +// 类型定义 +export * from './types.js'; + +// 注册表 +export { SkillRegistry } from './registry.js'; + +// 执行器 +export { SkillExecutor, defaultExecutor } from './executor.js'; + +// Profile 解析器 +export { + ProfileResolver, + DEFAULT_PROFILE, + CHINESE_CORE_PROFILE, + QUICK_FORENSICS_PROFILE, +} from './profile.js'; + +// 上下文管理 +export { + ContextBuilder, + createContextFromTask, + createPartialContextFromTask, +} from './context.js'; +export type { TaskData } from './context.js'; diff --git a/backend/src/modules/rvw/skills/core/profile.ts b/backend/src/modules/rvw/skills/core/profile.ts new file mode 100644 index 00000000..61aedd80 --- /dev/null +++ b/backend/src/modules/rvw/skills/core/profile.ts @@ -0,0 +1,258 @@ +/** + * RVW V2.0 Skills 架构 - Profile 配置解析器 + * + * MVP 阶段使用硬编码配置,V2.1 将支持数据库存储 + * + * @version 2.0.0 + * @since 2026-02-18 + */ + +import { JournalProfile, PipelineItem } from './types.js'; +import { logger } from '../../../../common/logging/index.js'; + +/** + * 默认 Profile 配置 + */ +export const DEFAULT_PROFILE: JournalProfile = { + id: 'default', + name: '通用期刊配置', + description: 'RVW V2.0 默认审稿配置,适用于大多数期刊', + version: '1.0.0', + + pipeline: [ + { + skillId: 'DataForensicsSkill', + enabled: true, + optional: true, // 数据侦探失败不影响其他审稿 + config: { + checkLevel: 'L1_L2_L25', + tolerancePercent: 0.1, + }, + timeout: 60000, // 60 秒(需要调用 Python) + }, + { + skillId: 'EditorialSkill', + enabled: true, + optional: false, + timeout: 45000, + }, + { + skillId: 'MethodologySkill', + enabled: true, + optional: false, + timeout: 45000, + }, + ], + + globalConfig: { + strictness: 'STANDARD', + continueOnError: true, + timeoutMultiplier: 1.0, + }, +}; + +/** + * 中文核心期刊 Profile + */ +export const CHINESE_CORE_PROFILE: JournalProfile = { + id: 'chinese-core', + name: '中文核心期刊配置', + description: '适用于中文核心期刊,对数据准确性要求更高', + version: '1.0.0', + + pipeline: [ + { + skillId: 'DataForensicsSkill', + enabled: true, + optional: false, // 中文核心对数据准确性要求高 + config: { + checkLevel: 'L1_L2_L25', + tolerancePercent: 0.05, // 更严格的容错 + }, + timeout: 60000, + }, + { + skillId: 'EditorialSkill', + enabled: true, + optional: false, + config: { + standard: 'chinese-core', + }, + timeout: 45000, + }, + { + skillId: 'MethodologySkill', + enabled: true, + optional: false, + timeout: 45000, + }, + ], + + globalConfig: { + strictness: 'STRICT', + continueOnError: false, // 严格模式,失败即停止 + }, +}; + +/** + * 快速预览 Profile(仅数据侦探) + */ +export const QUICK_FORENSICS_PROFILE: JournalProfile = { + id: 'quick-forensics', + name: '快速数据侦探', + description: '仅执行数据侦探,用于快速预览表格验证结果', + version: '1.0.0', + + pipeline: [ + { + skillId: 'DataForensicsSkill', + enabled: true, + optional: false, + config: { + checkLevel: 'L1_L2_L25', + tolerancePercent: 0.1, + }, + timeout: 60000, + }, + ], + + globalConfig: { + strictness: 'STANDARD', + continueOnError: true, + }, +}; + +/** + * 所有预定义 Profiles + */ +const PROFILES: Map = new Map([ + ['default', DEFAULT_PROFILE], + ['chinese-core', CHINESE_CORE_PROFILE], + ['quick-forensics', QUICK_FORENSICS_PROFILE], +]); + +/** + * V1.0 Agent 到 V2.0 Skill 的映射 + */ +const AGENT_TO_SKILL_MAP: Record = { + 'editorial': 'EditorialSkill', + 'methodology': 'MethodologySkill', + 'forensics': 'DataForensicsSkill', +}; + +/** + * Profile 解析器 + */ +export class ProfileResolver { + /** + * 获取 Profile + * MVP 阶段:从内存 Map 获取 + * V2.1 阶段:从数据库获取 + */ + static resolve(profileId?: string): JournalProfile { + const id = profileId || 'default'; + const profile = PROFILES.get(id); + + if (!profile) { + logger.warn({ profileId: id }, '[ProfileResolver] Profile not found, using default'); + return DEFAULT_PROFILE; + } + + logger.debug({ profileId: id }, '[ProfileResolver] Profile resolved'); + return profile; + } + + /** + * 根据用户选择的 Agents 动态构建 Profile + * 兼容 V1.0 的 selectedAgents 参数 + */ + static resolveFromAgents(selectedAgents?: string[]): JournalProfile { + const baseProfile = JSON.parse(JSON.stringify(DEFAULT_PROFILE)) as JournalProfile; + baseProfile.id = 'dynamic'; + baseProfile.name = '动态生成配置'; + + if (!selectedAgents || selectedAgents.length === 0) { + logger.debug('[ProfileResolver] No agents selected, using full default profile'); + return baseProfile; + } + + // 收集需要启用的 Skills + const enabledSkills = new Set(); + + // DataForensicsSkill 始终启用 + enabledSkills.add('DataForensicsSkill'); + + // 根据 V1.0 Agent 选择映射到 V2.0 Skills + for (const agent of selectedAgents) { + const skillId = AGENT_TO_SKILL_MAP[agent]; + if (skillId) { + enabledSkills.add(skillId); + } + } + + // 更新 Pipeline + baseProfile.pipeline = baseProfile.pipeline.map(item => ({ + ...item, + enabled: enabledSkills.has(item.skillId), + })); + + logger.debug({ + selectedAgents, + enabledSkills: Array.from(enabledSkills), + }, '[ProfileResolver] Profile built from agents'); + + return baseProfile; + } + + /** + * 获取所有可用 Profiles(用于 UI) + */ + static getAllProfiles(): JournalProfile[] { + return Array.from(PROFILES.values()); + } + + /** + * 获取 Profile ID 列表 + */ + static getProfileIds(): string[] { + return Array.from(PROFILES.keys()); + } + + /** + * 注册新 Profile(V2.1 支持动态添加) + */ + static register(profile: JournalProfile): void { + PROFILES.set(profile.id, profile); + logger.info({ profileId: profile.id }, '[ProfileResolver] Profile registered'); + } + + /** + * 验证 Profile 配置 + */ + static validate(profile: JournalProfile): { valid: boolean; errors: string[] } { + const errors: string[] = []; + + if (!profile.id) { + errors.push('Profile ID is required'); + } + + if (!profile.name) { + errors.push('Profile name is required'); + } + + if (!profile.pipeline || profile.pipeline.length === 0) { + errors.push('Pipeline must have at least one skill'); + } + + for (const item of profile.pipeline || []) { + if (!item.skillId) { + errors.push('Pipeline item must have skillId'); + } + } + + return { + valid: errors.length === 0, + errors, + }; + } +} diff --git a/backend/src/modules/rvw/skills/core/registry.ts b/backend/src/modules/rvw/skills/core/registry.ts new file mode 100644 index 00000000..f0796166 --- /dev/null +++ b/backend/src/modules/rvw/skills/core/registry.ts @@ -0,0 +1,163 @@ +/** + * RVW V2.0 Skills 架构 - Skill 注册表 + * + * 单例模式,管理所有已注册的 Skills + * + * @version 2.0.0 + * @since 2026-02-18 + */ + +import { Skill, SkillMetadata, SkillCategory } from './types.js'; +import { logger } from '../../../../common/logging/index.js'; + +/** + * Skill 注册表类 + */ +class SkillRegistryClass { + private skills: Map = new Map(); + private initialized: boolean = false; + + /** + * 注册 Skill + */ + register(skill: Skill): void { + const { id, version } = skill.metadata; + + if (this.skills.has(id)) { + logger.warn({ skillId: id }, '[SkillRegistry] Skill already registered, overwriting'); + } + + this.skills.set(id, skill); + logger.info({ skillId: id, version }, '[SkillRegistry] Skill registered'); + } + + /** + * 批量注册 + */ + registerAll(skills: Skill[]): void { + for (const skill of skills) { + this.register(skill); + } + } + + /** + * 获取 Skill + */ + get(id: string): Skill | undefined { + return this.skills.get(id); + } + + /** + * 获取 Skill(必须存在) + */ + getRequired(id: string): Skill { + const skill = this.skills.get(id); + if (!skill) { + throw new Error(`Skill not found: ${id}`); + } + return skill; + } + + /** + * 检查 Skill 是否存在 + */ + has(id: string): boolean { + return this.skills.has(id); + } + + /** + * 获取所有已注册的 Skill + */ + getAll(): Skill[] { + return Array.from(this.skills.values()); + } + + /** + * 获取所有 Skill 元数据(用于 UI 展示) + */ + getAllMetadata(): SkillMetadata[] { + return this.getAll().map(skill => skill.metadata); + } + + /** + * 按分类获取 Skills + */ + getByCategory(category: SkillCategory): Skill[] { + return this.getAll().filter(skill => skill.metadata.category === category); + } + + /** + * 注销 Skill + */ + unregister(id: string): boolean { + const result = this.skills.delete(id); + if (result) { + logger.info({ skillId: id }, '[SkillRegistry] Skill unregistered'); + } + return result; + } + + /** + * 清空所有 Skills(测试用) + */ + clear(): void { + this.skills.clear(); + this.initialized = false; + logger.debug('[SkillRegistry] All skills cleared'); + } + + /** + * 获取注册的 Skill 数量 + */ + get size(): number { + return this.skills.size; + } + + /** + * 标记为已初始化 + */ + markInitialized(): void { + this.initialized = true; + logger.info({ skillCount: this.size }, '[SkillRegistry] Registry initialized'); + } + + /** + * 检查是否已初始化 + */ + isInitialized(): boolean { + return this.initialized; + } + + /** + * 获取注册表状态摘要 + */ + getSummary(): { + initialized: boolean; + skillCount: number; + categories: Record; + } { + const categories: Record = { + forensics: 0, + editorial: 0, + methodology: 0, + guardrail: 0, + knowledge: 0, + }; + + for (const skill of this.skills.values()) { + const category = skill.metadata.category; + if (category in categories) { + categories[category]++; + } + } + + return { + initialized: this.initialized, + skillCount: this.size, + categories, + }; + } +} + +// 导出单例 +export const SkillRegistry = new SkillRegistryClass(); diff --git a/backend/src/modules/rvw/skills/core/types.ts b/backend/src/modules/rvw/skills/core/types.ts new file mode 100644 index 00000000..ff27ccab --- /dev/null +++ b/backend/src/modules/rvw/skills/core/types.ts @@ -0,0 +1,333 @@ +/** + * RVW V2.0 Skills 架构 - 核心类型定义 + * + * ⚠️ 注意:此文件未来将移动到 common/skills/core/types.ts + * ⚠️ 禁止在此文件中 import modules/rvw 下的业务代码 + * + * @version 2.0.0 + * @since 2026-02-18 + */ + +import { z } from 'zod'; + +// ========================================== +// Skill 基础类型定义(通用) +// ========================================== + +/** + * 问题严重程度 + */ +export type IssueSeverity = 'ERROR' | 'WARNING' | 'INFO'; + +/** + * Skill 执行状态 + */ +export type SkillStatus = 'success' | 'warning' | 'error' | 'timeout' | 'skipped'; + +/** + * Skill 分类 + */ +export type SkillCategory = 'forensics' | 'editorial' | 'methodology' | 'guardrail' | 'knowledge'; + +/** + * 问题定位信息 + */ +export interface IssueLocation { + tableId?: string; // 表格 ID + cellRef?: string; // R1C1 坐标,如 "R3C4" + paragraph?: number; // 段落编号 + lineRange?: [number, number]; // 行范围 +} + +/** + * 问题详情 + */ +export interface Issue { + severity: IssueSeverity; + type: string; // 问题类型代码(如 ARITHMETIC_SUM_MISMATCH) + message: string; // 人类可读描述 + location?: IssueLocation; + evidence?: { + expected?: string | number; + actual?: string | number; + formula?: string; + [key: string]: unknown; + }; +} + +/** + * 表格数据结构 + */ +export interface TableData { + id: string; + caption: string; + data: string[][]; + html?: string; + headers?: string[]; + rowCount: number; + colCount: number; +} + +// ========================================== +// Skill Context(共享上下文)- 泛型设计 +// ========================================== + +/** + * 基础上下文接口(通用) + * 使用泛型,支持不同业务模块扩展 + */ +export interface BaseSkillContext { + taskId: string; + userId?: string; + previousResults: SkillResult[]; + profile: TProfile; +} + +/** + * 文档元信息 + */ +export interface DocumentMeta { + filename: string; + fileSize: number; + pageCount?: number; +} + +/** + * 数据侦探结果(Python 返回) + */ +export interface ForensicsResult { + tables: TableData[]; + methods: string[]; + issues: Issue[]; + summary: { + totalTables: number; + totalIssues: number; + errorCount: number; + warningCount: number; + }; +} + +/** + * RVW 模块扩展字段 + */ +export interface RvwContextExtras { + documentPath: string; + documentContent: string; + documentMeta?: DocumentMeta; + tables?: TableData[]; + methods?: string[]; + forensicsResult?: ForensicsResult; +} + +/** + * RVW Skill 执行上下文(组合类型) + */ +export interface SkillContext extends BaseSkillContext, RvwContextExtras {} + +// ========================================== +// Skill Result(执行结果) +// ========================================== + +/** + * Skill 执行结果 + */ +export interface SkillResult { + skillId: string; + skillName: string; + status: SkillStatus; + score?: number; + scoreLabel?: string; + issues: Issue[]; + data?: unknown; + executionTime: number; + timedOut?: boolean; + error?: string; + startedAt: Date; + completedAt: Date; +} + +// ========================================== +// Skill 接口定义 +// ========================================== + +/** + * Skill 元数据(用于注册和 UI 展示) + */ +export interface SkillMetadata { + id: string; + name: string; + description: string; + version: string; + category: SkillCategory; + inputs: string[]; + outputs: string[]; + configSchema?: z.ZodSchema; + defaultTimeout: number; + retryable: boolean; + icon?: string; + color?: string; +} + +/** + * Skill 配置(运行时) + */ +export interface SkillConfig { + [key: string]: unknown; +} + +// ========================================== +// Zod 配置 Schema 定义 +// ========================================== + +/** + * DataForensicsSkill 配置 Schema + */ +export const DataForensicsConfigSchema = z.object({ + checkLevel: z.enum(['L1', 'L1_L2', 'L1_L2_L25']).default('L1_L2_L25'), + tolerancePercent: z.number().min(0).max(1).default(0.1), +}); +export type DataForensicsConfig = z.infer; + +/** + * EditorialSkill 配置 Schema + */ +export const EditorialConfigSchema = z.object({ + standard: z.enum(['default', 'chinese-core', 'international']).default('default'), + maxContentLength: z.number().default(100000), +}); +export type EditorialConfig = z.infer; + +/** + * MethodologySkill 配置 Schema + */ +export const MethodologyConfigSchema = z.object({ + focusAreas: z.array(z.string()).default(['design', 'statistics', 'reporting']), + maxContentLength: z.number().default(100000), +}); +export type MethodologyConfig = z.infer; + +// ========================================== +// Skill 接口(通用泛型) +// ========================================== + +/** + * Skill 接口 + * 使用泛型支持不同上下文和配置类型 + */ +export interface Skill< + TContext extends BaseSkillContext = SkillContext, + TConfig extends SkillConfig = SkillConfig +> { + readonly metadata: SkillMetadata; + readonly configSchema?: z.ZodSchema; + + run(context: TContext, config?: TConfig): Promise; + validateConfig?(config: unknown): TConfig; + canRun?(context: TContext): boolean; +} + +// ========================================== +// Profile 配置 +// ========================================== + +/** + * Pipeline 中的 Skill 配置项 + */ +export interface PipelineItem { + skillId: string; + enabled: boolean; + config?: SkillConfig; + timeout?: number; + optional?: boolean; +} + +/** + * 严格程度 + */ +export type Strictness = 'STRICT' | 'STANDARD' | 'LENIENT'; + +/** + * 全局配置 + */ +export interface GlobalConfig { + strictness: Strictness; + timeoutMultiplier?: number; + continueOnError?: boolean; + maxConcurrency?: number; +} + +/** + * 期刊 Profile 配置 + */ +export interface JournalProfile { + id: string; + name: string; + description?: string; + pipeline: PipelineItem[]; + globalConfig?: GlobalConfig; + version: string; + createdAt?: Date; + updatedAt?: Date; + createdBy?: string; +} + +// ========================================== +// Executor 配置 +// ========================================== + +/** + * Skill 执行器配置 + */ +export interface ExecutorConfig { + defaultTimeout: number; + maxRetries: number; + retryDelay: number; + continueOnError: boolean; + logLevel: 'debug' | 'info' | 'warn' | 'error'; + + /** + * Skill 执行完成回调(V2.1 扩展点) + * 可用于:增量持久化、实时状态推送、监控上报 + */ + onSkillComplete?: ( + skillId: string, + result: SkillResult, + context: TContext + ) => Promise; +} + +/** + * 执行结果汇总 + */ +export interface ExecutionSummary { + taskId: string; + profileId: string; + overallStatus: 'success' | 'partial' | 'failed'; + totalSkills: number; + successCount: number; + warningCount: number; + errorCount: number; + skippedCount: number; + timeoutCount: number; + results: SkillResult[]; + totalExecutionTime: number; + startedAt: Date; + completedAt: Date; +} + +// ========================================== +// 错误码定义 +// ========================================== + +export const SkillErrorCodes = { + SKILL_NOT_FOUND: 'SKILL_NOT_FOUND', + SKILL_TIMEOUT: 'SKILL_TIMEOUT', + SKILL_EXECUTION_ERROR: 'SKILL_EXECUTION_ERROR', + CONFIG_VALIDATION_ERROR: 'CONFIG_VALIDATION_ERROR', + PROFILE_NOT_FOUND: 'PROFILE_NOT_FOUND', + CONTEXT_INVALID: 'CONTEXT_INVALID', + SECURITY_PATH_VIOLATION: 'SECURITY_PATH_VIOLATION', + RESOURCE_LIMIT_EXCEEDED: 'RESOURCE_LIMIT_EXCEEDED', +} as const; + +export type SkillErrorCode = typeof SkillErrorCodes[keyof typeof SkillErrorCodes]; diff --git a/backend/src/modules/rvw/skills/index.ts b/backend/src/modules/rvw/skills/index.ts new file mode 100644 index 00000000..3eb08bb2 --- /dev/null +++ b/backend/src/modules/rvw/skills/index.ts @@ -0,0 +1,12 @@ +/** + * RVW V2.0 Skills 架构 - 模块主入口 + * + * @version 2.0.0 + * @since 2026-02-18 + */ + +// 核心框架 +export * from './core/index.js'; + +// Skills Library +export * from './library/index.js'; diff --git a/backend/src/modules/rvw/skills/library/BaseSkill.ts b/backend/src/modules/rvw/skills/library/BaseSkill.ts new file mode 100644 index 00000000..8da0cbaa --- /dev/null +++ b/backend/src/modules/rvw/skills/library/BaseSkill.ts @@ -0,0 +1,154 @@ +/** + * RVW V2.0 Skills 架构 - Skill 基类 + * + * 提供通用功能,简化 Skill 实现 + * 内置 Zod 配置验证 + * + * @version 2.0.0 + * @since 2026-02-18 + */ + +import { z } from 'zod'; +import { + Skill, + SkillMetadata, + SkillContext, + SkillResult, + SkillConfig, + BaseSkillContext, + SkillErrorCodes, +} from '../core/types.js'; +import { logger } from '../../../../common/logging/index.js'; + +/** + * Skill 基类 + * 使用泛型支持不同上下文和配置类型 + * 内置 Zod 配置验证 + */ +export abstract class BaseSkill< + TContext extends BaseSkillContext = SkillContext, + TConfig extends SkillConfig = SkillConfig +> implements Skill { + + abstract readonly metadata: SkillMetadata; + + /** + * 配置 Schema(子类定义) + */ + readonly configSchema?: z.ZodSchema; + + /** + * 子类实现具体逻辑 + */ + abstract execute( + context: TContext, + config?: TConfig + ): Promise>; + + /** + * 执行入口(统一处理日志、计时、配置验证等) + */ + async run(context: TContext, config?: TConfig): Promise { + const startedAt = new Date(); + const startTime = Date.now(); + + logger.info({ + skillId: this.metadata.id, + taskId: context.taskId, + }, `[${this.metadata.id}] Starting execution`); + + try { + // 配置验证(使用 Zod) + const validatedConfig = this.validateConfig(config); + + const result = await this.execute(context, validatedConfig); + const executionTime = Date.now() - startTime; + + logger.info({ + skillId: this.metadata.id, + taskId: context.taskId, + status: result.status, + executionTime, + issueCount: result.issues.length, + }, `[${this.metadata.id}] Execution completed`); + + return { + ...result, + skillId: this.metadata.id, + skillName: this.metadata.name, + executionTime, + startedAt, + completedAt: new Date(), + }; + } catch (error: unknown) { + const executionTime = Date.now() - startTime; + + // 区分 Zod 验证错误和执行错误 + const isValidationError = error instanceof z.ZodError; + const errorType = isValidationError + ? SkillErrorCodes.CONFIG_VALIDATION_ERROR + : SkillErrorCodes.SKILL_EXECUTION_ERROR; + + const errorMessage = isValidationError + ? `配置验证失败: ${(error as z.ZodError).errors.map(e => e.message).join(', ')}` + : `执行失败: ${error instanceof Error ? error.message : String(error)}`; + + logger.error({ + skillId: this.metadata.id, + taskId: context.taskId, + error: error instanceof Error ? error.message : String(error), + errorType, + }, `[${this.metadata.id}] Execution failed`); + + return { + skillId: this.metadata.id, + skillName: this.metadata.name, + status: 'error', + issues: [{ + severity: 'ERROR', + type: errorType, + message: errorMessage, + }], + error: error instanceof Error ? error.message : String(error), + executionTime, + startedAt, + completedAt: new Date(), + }; + } + } + + /** + * 配置验证(使用 Zod Schema) + * 子类可覆盖以实现自定义验证 + */ + validateConfig(config: unknown): TConfig { + if (this.configSchema) { + return this.configSchema.parse(config ?? {}); + } + return (config ?? {}) as TConfig; + } + + /** + * 默认前置检查(子类可覆盖) + */ + canRun(_context: TContext): boolean { + return true; + } + + /** + * 辅助方法:从上下文获取前置 Skill 结果 + */ + protected getPreviousResult(context: TContext, skillId: string): SkillResult | undefined { + return context.previousResults.find(r => r.skillId === skillId); + } + + /** + * 辅助方法:获取评分标签 + */ + protected getScoreLabel(score: number): string { + if (score >= 90) return '优秀'; + if (score >= 80) return '良好'; + if (score >= 60) return '合格'; + return '需改进'; + } +} diff --git a/backend/src/modules/rvw/skills/library/DataForensicsSkill.ts b/backend/src/modules/rvw/skills/library/DataForensicsSkill.ts new file mode 100644 index 00000000..b5f606fc --- /dev/null +++ b/backend/src/modules/rvw/skills/library/DataForensicsSkill.ts @@ -0,0 +1,241 @@ +/** + * RVW V2.0 Skills 架构 - 数据侦探 Skill + * + * 调用 Python 服务进行表格提取和数据验证 + * 特性:依赖注入、路径安全检查、优雅降级 + * + * @version 2.0.0 + * @since 2026-02-18 + */ + +import { BaseSkill } from './BaseSkill.js'; +import { + SkillMetadata, + SkillContext, + SkillResult, + DataForensicsConfigSchema, + DataForensicsConfig, + ForensicsResult, + Issue, +} from '../core/types.js'; +import { + extractionClient, + IExtractionClient, + ForensicsResult as ClientForensicsResult, +} from '../../../../common/document/ExtractionClient.js'; +import { logger } from '../../../../common/logging/index.js'; + +/** + * 安全:允许的文件存储路径前缀 + */ +const ALLOWED_PATH_PREFIXES = [ + '/app/uploads/', // Docker 容器内路径 + 'D:\\MyCursor\\', // 开发环境 Windows + 'D:/MyCursor/', // 开发环境 Windows (forward slash) + '/tmp/rvw-uploads/', // 临时目录 + 'C:\\Users\\', // Windows 用户目录 + '/home/', // Linux 用户目录 +]; + +/** + * 数据侦探 Skill + * 依赖注入:ExtractionClient 可在测试中 Mock + */ +export class DataForensicsSkill extends BaseSkill { + /** + * 依赖注入:ExtractionClient + */ + private readonly extractionClient: IExtractionClient; + + constructor(client?: IExtractionClient) { + super(); + this.extractionClient = client || extractionClient; + } + + /** + * Zod 配置 Schema + */ + readonly configSchema = DataForensicsConfigSchema; + + readonly metadata: SkillMetadata = { + id: 'DataForensicsSkill', + name: '数据侦探', + description: '提取 Word 文档表格,验证数据算术正确性和统计学一致性', + version: '2.0.0', + category: 'forensics', + + inputs: ['documentPath'], + outputs: ['tables', 'methods', 'forensicsResult'], + + defaultTimeout: 60000, // 60 秒 + retryable: true, + + icon: '🐍', + color: '#3776ab', + }; + + /** + * 前置检查 + * 增加路径安全验证(防止路径遍历攻击) + */ + canRun(context: SkillContext): boolean { + if (!context.documentPath) { + logger.warn({ taskId: context.taskId }, '[DataForensicsSkill] No document path'); + return false; + } + + if (!context.documentPath.toLowerCase().endsWith('.docx')) { + logger.info({ taskId: context.taskId }, '[DataForensicsSkill] Not a .docx file, skipping'); + return false; + } + + // 安全检查:路径白名单 + const normalizedPath = context.documentPath.replace(/\\/g, '/'); + const isPathAllowed = ALLOWED_PATH_PREFIXES.some(prefix => { + const normalizedPrefix = prefix.replace(/\\/g, '/'); + return normalizedPath.startsWith(normalizedPrefix); + }); + + if (!isPathAllowed) { + logger.error({ + taskId: context.taskId, + documentPath: '[REDACTED]', // 不记录完整路径 + }, '[DataForensicsSkill] Document path not in allowed prefixes (security check)'); + return false; + } + + // 检查是否包含路径遍历 + if (context.documentPath.includes('..')) { + logger.error({ + taskId: context.taskId, + }, '[DataForensicsSkill] Path traversal detected (security check)'); + return false; + } + + return true; + } + + /** + * 执行数据侦探 + */ + async execute( + context: SkillContext, + config?: DataForensicsConfig + ): Promise> { + const checkLevel = config?.checkLevel || 'L1_L2_L25'; + const tolerancePercent = config?.tolerancePercent || 0.1; + + logger.info({ + taskId: context.taskId, + checkLevel, + tolerancePercent, + }, '[DataForensicsSkill] Starting analysis'); + + try { + // 使用依赖注入的 client + const result = await this.extractionClient.analyzeDocx(context.documentPath, { + checkLevel, + tolerancePercent, + }); + + // 转换为内部格式 + const forensicsResult = this.convertResult(result); + + // 计算状态和评分 + const hasErrors = forensicsResult.summary.errorCount > 0; + const hasWarnings = forensicsResult.summary.warningCount > 0; + + let status: 'success' | 'warning' | 'error'; + let score: number; + + if (hasErrors) { + status = 'error'; + score = Math.max(0, 100 - forensicsResult.summary.errorCount * 20); + } else if (hasWarnings) { + status = 'warning'; + score = Math.max(60, 100 - forensicsResult.summary.warningCount * 5); + } else { + status = 'success'; + score = 100; + } + + logger.info({ + taskId: context.taskId, + tableCount: forensicsResult.summary.totalTables, + issueCount: forensicsResult.summary.totalIssues, + errorCount: forensicsResult.summary.errorCount, + warningCount: forensicsResult.summary.warningCount, + }, '[DataForensicsSkill] Analysis completed'); + + return { + status, + score, + scoreLabel: this.getScoreLabel(score), + issues: forensicsResult.issues, + data: forensicsResult, + }; + } catch (error: unknown) { + // 特殊处理:Python 服务不可用时的优雅降级 + const errorObj = error as NodeJS.ErrnoException; + if (errorObj.code === 'ECONNREFUSED' || errorObj.code === 'ETIMEDOUT') { + logger.warn({ + taskId: context.taskId, + error: errorObj.message, + }, '[DataForensicsSkill] Python service unavailable, degrading gracefully'); + + return { + status: 'warning', + issues: [{ + severity: 'WARNING', + type: 'SERVICE_UNAVAILABLE', + message: '数据验证服务暂不可用,已跳过表格验证。建议稍后重试。', + }], + data: { + tables: [], + methods: [], + issues: [], + summary: { totalTables: 0, totalIssues: 0, errorCount: 0, warningCount: 1 }, + } as ForensicsResult, + }; + } + + throw error; + } + } + + /** + * 转换 Python 返回的结果为内部格式 + */ + private convertResult(result: ClientForensicsResult): ForensicsResult { + const issues: Issue[] = result.issues.map(issue => ({ + severity: issue.severity, + type: issue.type, + message: issue.message, + location: issue.location, + evidence: issue.evidence, + })); + + return { + tables: result.tables.map(t => ({ + id: t.id, + caption: t.caption, + data: t.data, + html: t.html, + headers: t.headers, + rowCount: t.rowCount, + colCount: t.colCount, + })), + methods: result.methods, + issues, + summary: { + totalTables: result.summary.totalTables, + totalIssues: result.summary.totalIssues, + errorCount: result.summary.errorCount, + warningCount: result.summary.warningCount, + }, + }; + } +} + +// 导出单例 +export const dataForensicsSkill = new DataForensicsSkill(); diff --git a/backend/src/modules/rvw/skills/library/EditorialSkill.ts b/backend/src/modules/rvw/skills/library/EditorialSkill.ts new file mode 100644 index 00000000..4085b785 --- /dev/null +++ b/backend/src/modules/rvw/skills/library/EditorialSkill.ts @@ -0,0 +1,187 @@ +/** + * RVW V2.0 Skills 架构 - 稿约规范性评估 Skill + * + * 封装现有的 editorialService + * 特性:资源限制检查、LLM 调用 + * + * @version 2.0.0 + * @since 2026-02-18 + */ + +import { BaseSkill } from './BaseSkill.js'; +import { + SkillMetadata, + SkillContext, + SkillResult, + EditorialConfigSchema, + EditorialConfig, + Issue, +} from '../core/types.js'; +import { reviewEditorialStandards } from '../../services/editorialService.js'; +import { EditorialReview, EditorialItem } from '../../types/index.js'; +import { logger } from '../../../../common/logging/index.js'; + +/** + * 默认最大内容长度 + */ +const DEFAULT_MAX_CONTENT_LENGTH = 100000; + +/** + * 稿约规范性评估 Skill + */ +export class EditorialSkill extends BaseSkill { + /** + * Zod 配置 Schema + */ + readonly configSchema = EditorialConfigSchema; + + readonly metadata: SkillMetadata = { + id: 'EditorialSkill', + name: '稿约规范性评估', + description: '评估稿件是否符合期刊稿约规范(11项标准)', + version: '2.0.0', + category: 'editorial', + + inputs: ['documentContent'], + outputs: ['editorialResult'], + + defaultTimeout: 45000, // 45 秒 + retryable: true, + + icon: '📋', + color: '#52c41a', + }; + + /** + * 前置检查 + */ + canRun(context: SkillContext): boolean { + if (!context.documentContent || context.documentContent.trim().length === 0) { + logger.warn({ taskId: context.taskId }, '[EditorialSkill] No document content'); + return false; + } + + // 资源限制检查 + const maxLength = DEFAULT_MAX_CONTENT_LENGTH; + if (context.documentContent.length > maxLength) { + logger.warn({ + taskId: context.taskId, + contentLength: context.documentContent.length, + limit: maxLength, + }, '[EditorialSkill] Content too long'); + return false; + } + + return true; + } + + /** + * 执行稿约规范性评估 + */ + async execute( + context: SkillContext, + config?: EditorialConfig + ): Promise> { + const maxContentLength = config?.maxContentLength || DEFAULT_MAX_CONTENT_LENGTH; + + logger.info({ + taskId: context.taskId, + contentLength: context.documentContent.length, + }, '[EditorialSkill] Starting evaluation'); + + // 截断过长内容 + let content = context.documentContent; + if (content.length > maxContentLength) { + content = content.substring(0, maxContentLength); + logger.warn({ + taskId: context.taskId, + originalLength: context.documentContent.length, + truncatedLength: maxContentLength, + }, '[EditorialSkill] Content truncated'); + } + + // 调用现有 editorialService + const result = await reviewEditorialStandards(content, 'deepseek-v3', context.userId); + + // 转换为 SkillResult 格式 + const issues = this.convertToIssues(result); + + // 计算状态 + const errorCount = issues.filter(i => i.severity === 'ERROR').length; + const warningCount = issues.filter(i => i.severity === 'WARNING').length; + + let status: 'success' | 'warning' | 'error'; + if (errorCount > 0) { + status = 'error'; + } else if (warningCount > 0) { + status = 'warning'; + } else { + status = 'success'; + } + + logger.info({ + taskId: context.taskId, + score: result.overall_score, + itemCount: result.items.length, + errorCount, + warningCount, + }, '[EditorialSkill] Evaluation completed'); + + return { + status, + score: result.overall_score, + scoreLabel: this.getScoreLabel(result.overall_score), + issues, + data: result, + }; + } + + /** + * 将 EditorialReview 转换为 Issue 列表 + */ + private convertToIssues(result: EditorialReview): Issue[] { + const issues: Issue[] = []; + + for (const item of result.items) { + if (item.status === 'fail') { + issues.push({ + severity: 'ERROR', + type: `EDITORIAL_${this.normalizeType(item.criterion)}`, + message: item.issues.join('; ') || item.criterion, + evidence: { + criterion: item.criterion, + score: item.score, + suggestions: item.suggestions, + }, + }); + } else if (item.status === 'warning') { + issues.push({ + severity: 'WARNING', + type: `EDITORIAL_${this.normalizeType(item.criterion)}`, + message: item.issues.join('; ') || item.criterion, + evidence: { + criterion: item.criterion, + score: item.score, + suggestions: item.suggestions, + }, + }); + } + } + + return issues; + } + + /** + * 规范化类型名称 + */ + private normalizeType(criterion: string): string { + return criterion + .toUpperCase() + .replace(/\s+/g, '_') + .replace(/[^A-Z0-9_]/g, '') + .substring(0, 30); + } +} + +// 导出单例 +export const editorialSkill = new EditorialSkill(); diff --git a/backend/src/modules/rvw/skills/library/MethodologySkill.ts b/backend/src/modules/rvw/skills/library/MethodologySkill.ts new file mode 100644 index 00000000..b05d97bb --- /dev/null +++ b/backend/src/modules/rvw/skills/library/MethodologySkill.ts @@ -0,0 +1,191 @@ +/** + * RVW V2.0 Skills 架构 - 方法学评估 Skill + * + * 封装现有的 methodologyService + * 特性:资源限制检查、可利用前置 Skill 的统计方法检测结果 + * + * @version 2.0.0 + * @since 2026-02-18 + */ + +import { BaseSkill } from './BaseSkill.js'; +import { + SkillMetadata, + SkillContext, + SkillResult, + MethodologyConfigSchema, + MethodologyConfig, + Issue, +} from '../core/types.js'; +import { reviewMethodology } from '../../services/methodologyService.js'; +import { MethodologyReview, MethodologyIssue } from '../../types/index.js'; +import { logger } from '../../../../common/logging/index.js'; + +/** + * 默认最大内容长度 + */ +const DEFAULT_MAX_CONTENT_LENGTH = 100000; + +/** + * 方法学评估 Skill + */ +export class MethodologySkill extends BaseSkill { + /** + * Zod 配置 Schema + */ + readonly configSchema = MethodologyConfigSchema; + + readonly metadata: SkillMetadata = { + id: 'MethodologySkill', + name: '方法学评估', + description: '评估研究设计、统计方法和结果报告的科学性(20个检查点)', + version: '2.0.0', + category: 'methodology', + + inputs: ['documentContent', 'methods'], + outputs: ['methodologyResult'], + + defaultTimeout: 45000, // 45 秒 + retryable: true, + + icon: '🔬', + color: '#722ed1', + }; + + /** + * 前置检查 + */ + canRun(context: SkillContext): boolean { + if (!context.documentContent || context.documentContent.trim().length === 0) { + logger.warn({ taskId: context.taskId }, '[MethodologySkill] No document content'); + return false; + } + + // 资源限制检查 + const maxLength = DEFAULT_MAX_CONTENT_LENGTH; + if (context.documentContent.length > maxLength) { + logger.warn({ + taskId: context.taskId, + contentLength: context.documentContent.length, + limit: maxLength, + }, '[MethodologySkill] Content too long'); + return false; + } + + return true; + } + + /** + * 执行方法学评估 + */ + async execute( + context: SkillContext, + config?: MethodologyConfig + ): Promise> { + const maxContentLength = config?.maxContentLength || DEFAULT_MAX_CONTENT_LENGTH; + + logger.info({ + taskId: context.taskId, + contentLength: context.documentContent.length, + detectedMethods: context.methods?.length || 0, + }, '[MethodologySkill] Starting evaluation'); + + // 截断过长内容 + let content = context.documentContent; + if (content.length > maxContentLength) { + content = content.substring(0, maxContentLength); + logger.warn({ + taskId: context.taskId, + originalLength: context.documentContent.length, + truncatedLength: maxContentLength, + }, '[MethodologySkill] Content truncated'); + } + + // 如果 DataForensicsSkill 提取了统计方法,可以添加到 prompt 中 + // 目前 reviewMethodology 不支持此参数,留作未来扩展 + const methodsHint = context.methods?.join(', ') || ''; + if (methodsHint) { + logger.debug({ + taskId: context.taskId, + methodsHint, + }, '[MethodologySkill] Using detected methods as hint'); + } + + // 调用现有 methodologyService + const result = await reviewMethodology(content, 'deepseek-v3', context.userId); + + // 转换为 SkillResult 格式 + const issues = this.convertToIssues(result); + + // 计算状态 + const errorCount = issues.filter(i => i.severity === 'ERROR').length; + const warningCount = issues.filter(i => i.severity === 'WARNING').length; + + let status: 'success' | 'warning' | 'error'; + if (errorCount > 0) { + status = 'error'; + } else if (warningCount > 0) { + status = 'warning'; + } else { + status = 'success'; + } + + logger.info({ + taskId: context.taskId, + score: result.overall_score, + partCount: result.parts.length, + errorCount, + warningCount, + }, '[MethodologySkill] Evaluation completed'); + + return { + status, + score: result.overall_score, + scoreLabel: this.getScoreLabel(result.overall_score), + issues, + data: result, + }; + } + + /** + * 将 MethodologyReview 转换为 Issue 列表 + */ + private convertToIssues(result: MethodologyReview): Issue[] { + const issues: Issue[] = []; + + for (const part of result.parts) { + for (const issue of part.issues) { + issues.push({ + severity: issue.severity === 'major' ? 'ERROR' : 'WARNING', + type: `METHODOLOGY_${this.normalizeType(issue.type)}`, + message: issue.description, + location: { + paragraph: undefined, // 可以根据 issue.location 解析 + }, + evidence: { + part: part.part, + issueType: issue.type, + location: issue.location, + suggestion: issue.suggestion, + }, + }); + } + } + + return issues; + } + + /** + * 规范化类型名称 + */ + private normalizeType(type: string): string { + return type + .toUpperCase() + .replace(/\s+/g, '_') + .replace(/[^A-Z0-9_]/g, '') + .substring(0, 30); + } +} + +// 导出单例 +export const methodologySkill = new MethodologySkill(); diff --git a/backend/src/modules/rvw/skills/library/index.ts b/backend/src/modules/rvw/skills/library/index.ts new file mode 100644 index 00000000..acc49a83 --- /dev/null +++ b/backend/src/modules/rvw/skills/library/index.ts @@ -0,0 +1,54 @@ +/** + * RVW V2.0 Skills 架构 - Skills Library 统一导出 + * + * 提供 Skills 注册入口和统一导出 + * + * @version 2.0.0 + * @since 2026-02-18 + */ + +import { SkillRegistry } from '../core/registry.js'; +import { dataForensicsSkill, DataForensicsSkill } from './DataForensicsSkill.js'; +import { editorialSkill, EditorialSkill } from './EditorialSkill.js'; +import { methodologySkill, MethodologySkill } from './MethodologySkill.js'; + +/** + * 注册所有内置 Skills + */ +export function registerBuiltinSkills(): void { + SkillRegistry.registerAll([ + dataForensicsSkill, + editorialSkill, + methodologySkill, + ]); + + SkillRegistry.markInitialized(); +} + +/** + * 获取所有内置 Skills(用于测试) + */ +export function getBuiltinSkills() { + return [ + dataForensicsSkill, + editorialSkill, + methodologySkill, + ]; +} + +// 导出 Skill 类(用于类型引用和测试) +export { + DataForensicsSkill, + EditorialSkill, + MethodologySkill, +}; + +// 导出单例(用于直接调用) +export { + dataForensicsSkill, + editorialSkill, + methodologySkill, +}; + +// 导出基类 +export { BaseSkill } from './BaseSkill.js'; diff --git a/backend/src/modules/rvw/workers/reviewWorker.ts b/backend/src/modules/rvw/workers/reviewWorker.ts index 80bb98cd..e79d7c0f 100644 --- a/backend/src/modules/rvw/workers/reviewWorker.ts +++ b/backend/src/modules/rvw/workers/reviewWorker.ts @@ -1,15 +1,18 @@ /** * RVW稿件审查 Worker(Platform-Only架构) * + * V2.0 Skills 架构改造: + * - 使用 SkillExecutor 执行 Skills Pipeline + * - 支持 Profile 配置的审稿策略 + * - 保留向后兼容(通过环境变量控制) + * * ✅ Platform-Only架构: * - 使用 pg-boss 队列处理审查任务 * - 任务状态存储在 job.state (pg-boss管理) * - 审查结果更新到 ReviewTask表(业务信息) * - * 任务流程: - * 1. 获取任务信息和提取的文本 - * 2. 根据选择的智能体执行审查 - * 3. 更新任务状态和结果 + * @version 2.0.0 + * @since 2026-02-18 */ import { prisma } from '../../../config/database.js'; @@ -24,6 +27,21 @@ import { calculateOverallScore, getMethodologyStatus } from '../services/utils.j import type { AgentType, EditorialReview, MethodologyReview } from '../types/index.js'; import { activityService } from '../../../common/services/activity.service.js'; +// V2.0 Skills 架构导入 +import { + SkillExecutor, + ProfileResolver, + createPartialContextFromTask, + registerBuiltinSkills, + ExecutionSummary, +} from '../skills/index.js'; + +/** + * 是否使用 V2.0 Skills 架构 + * 通过环境变量控制,默认开启(MVP 阶段可随时回滚) + */ +const USE_SKILLS_ARCHITECTURE = process.env.RVW_USE_SKILLS !== 'false'; + /** * 审查任务数据结构 */ @@ -35,15 +53,32 @@ interface ReviewJob { modelType: ModelType; } +/** + * 初始化 Skills(仅执行一次) + */ +let skillsInitialized = false; +function ensureSkillsInitialized() { + if (!skillsInitialized && USE_SKILLS_ARCHITECTURE) { + registerBuiltinSkills(); + skillsInitialized = true; + logger.info('[reviewWorker] Skills architecture initialized'); + } +} + /** * 注册审查 Worker 到队列 * * 此函数应在应用启动时调用(index.ts) */ export function registerReviewWorker() { - logger.info('[reviewWorker] Registering reviewWorker'); + logger.info('[reviewWorker] Registering reviewWorker', { + useSkillsArchitecture: USE_SKILLS_ARCHITECTURE, + }); - // 注册审查Worker(队列名使用下划线,不用冒号) + // 初始化 Skills + ensureSkillsInitialized(); + + // 注册审查Worker jobQueue.process('rvw_review_task', async (job: Job) => { const { taskId, userId, agents, extractedText, modelType } = job.data; const startTime = Date.now(); @@ -54,6 +89,7 @@ export function registerReviewWorker() { userId, agents, textLength: extractedText.length, + useSkillsArchitecture: USE_SKILLS_ARCHITECTURE, }); console.log(`\n📝 处理审查任务`); @@ -61,12 +97,20 @@ export function registerReviewWorker() { console.log(` Task ID: ${taskId}`); console.log(` 智能体: ${agents.join(', ')}`); console.log(` 文本长度: ${extractedText.length} 字符`); + console.log(` 架构: ${USE_SKILLS_ARCHITECTURE ? 'V2.0 Skills' : 'V1.0 Legacy'}`); try { // ✅ 检查任务是否已经完成(防止重复处理) const existingTask = await prisma.reviewTask.findUnique({ where: { id: taskId }, - select: { status: true, completedAt: true, overallScore: true }, + select: { + status: true, + completedAt: true, + overallScore: true, + filePath: true, + fileName: true, + fileSize: true, + }, }); if (existingTask?.status === 'completed' && existingTask.completedAt) { @@ -77,83 +121,132 @@ export function registerReviewWorker() { overallScore: existingTask.overallScore, }); console.log(`\n⚠️ 任务已完成,跳过重复处理`); - console.log(` Task ID: ${taskId}`); - console.log(` 完成时间: ${existingTask.completedAt}`); - console.log(` 得分: ${existingTask.overallScore}`); return { taskId, skipped: true, reason: 'Task already completed', }; } + // ======================================== - // 1. 运行选中的智能体 + // 根据架构选择执行路径 // ======================================== let editorialResult: EditorialReview | null = null; let methodologyResult: MethodologyReview | null = null; + let skillsSummary: ExecutionSummary | null = null; - if (agents.includes('editorial')) { - // 更新进度状态 - await prisma.reviewTask.update({ - where: { id: taskId }, - data: { status: 'reviewing_editorial' }, - }); - - logger.info('[reviewWorker] Running editorial review', { taskId }); - console.log(' 🔍 运行稿约规范性智能体...'); - - // ✅ Phase 3.5.5: 传递 userId 支持灰度预览 - editorialResult = await reviewEditorialStandards(extractedText, modelType, userId); - - logger.info('[reviewWorker] Editorial review completed', { + if (USE_SKILLS_ARCHITECTURE) { + // ======================================== + // V2.0 Skills 架构 + // ======================================== + skillsSummary = await executeWithSkills( taskId, - score: editorialResult?.overall_score, - }); - console.log(` ✅ 稿约规范性完成,得分: ${editorialResult?.overall_score}`); - } + userId, + agents, + extractedText, + existingTask?.filePath || '', + existingTask?.fileName || 'unknown.docx', + existingTask?.fileSize || 0 + ); - if (agents.includes('methodology')) { - // 更新进度状态 - await prisma.reviewTask.update({ - where: { id: taskId }, - data: { status: 'reviewing_methodology' }, - }); + // 从 Skills 结果中提取兼容数据 + const editorialSkillResult = skillsSummary.results.find(r => r.skillId === 'EditorialSkill'); + const methodologySkillResult = skillsSummary.results.find(r => r.skillId === 'MethodologySkill'); - logger.info('[reviewWorker] Running methodology review', { taskId }); - console.log(' 🔬 运行方法学智能体...'); - - // ✅ Phase 3.5.5: 传递 userId 支持灰度预览 - methodologyResult = await reviewMethodology(extractedText, modelType, userId); - - logger.info('[reviewWorker] Methodology review completed', { + if (editorialSkillResult?.status !== 'skipped' && editorialSkillResult?.data) { + editorialResult = editorialSkillResult.data as EditorialReview; + } + if (methodologySkillResult?.status !== 'skipped' && methodologySkillResult?.data) { + methodologyResult = methodologySkillResult.data as MethodologyReview; + } + + logger.info('[reviewWorker] Skills execution completed', { taskId, - score: methodologyResult?.overall_score, + overallStatus: skillsSummary.overallStatus, + skillCount: skillsSummary.totalSkills, + successCount: skillsSummary.successCount, + errorCount: skillsSummary.errorCount, }); - console.log(` ✅ 方法学评估完成,得分: ${methodologyResult?.overall_score}`); + } else { + // ======================================== + // V1.0 Legacy 架构 + // ======================================== + if (agents.includes('editorial')) { + await prisma.reviewTask.update({ + where: { id: taskId }, + data: { status: 'reviewing_editorial' }, + }); + + logger.info('[reviewWorker] Running editorial review (legacy)', { taskId }); + console.log(' 🔍 运行稿约规范性智能体...'); + + editorialResult = await reviewEditorialStandards(extractedText, modelType, userId); + + logger.info('[reviewWorker] Editorial review completed', { + taskId, + score: editorialResult?.overall_score, + }); + console.log(` ✅ 稿约规范性完成,得分: ${editorialResult?.overall_score}`); + } + + if (agents.includes('methodology')) { + await prisma.reviewTask.update({ + where: { id: taskId }, + data: { status: 'reviewing_methodology' }, + }); + + logger.info('[reviewWorker] Running methodology review (legacy)', { taskId }); + console.log(' 🔬 运行方法学智能体...'); + + methodologyResult = await reviewMethodology(extractedText, modelType, userId); + + logger.info('[reviewWorker] Methodology review completed', { + taskId, + score: methodologyResult?.overall_score, + }); + console.log(` ✅ 方法学评估完成,得分: ${methodologyResult?.overall_score}`); + } } // ======================================== - // 2. 计算综合分数 + // 计算综合分数 // ======================================== const editorialScore = editorialResult?.overall_score ?? null; const methodologyScore = methodologyResult?.overall_score ?? null; const overallScore = calculateOverallScore(editorialScore, methodologyScore, agents); - // 计算耗时 const endTime = Date.now(); const durationSeconds = Math.floor((endTime - startTime) / 1000); // ======================================== - // 3. 更新任务结果 + // 更新任务结果 // ======================================== logger.info('[reviewWorker] Updating task result', { taskId }); + // 构建 Skills 执行摘要(V2.0 新增,存储到 picoExtract 字段) + // 注意:picoExtract 字段暂时复用,未来迁移后移到专用字段 + const skillsContext = USE_SKILLS_ARCHITECTURE && skillsSummary + ? { + version: '2.0', + executedAt: new Date().toISOString(), + summary: { + overallStatus: skillsSummary.overallStatus, + totalSkills: skillsSummary.totalSkills, + successCount: skillsSummary.successCount, + errorCount: skillsSummary.errorCount, + totalExecutionTime: skillsSummary.totalExecutionTime, + }, + forensicsResult: skillsSummary.results.find(r => r.skillId === 'DataForensicsSkill')?.data, + } + : null; + await prisma.reviewTask.update({ where: { id: taskId }, data: { status: 'completed', editorialReview: editorialResult as unknown as Prisma.InputJsonValue ?? Prisma.JsonNull, methodologyReview: methodologyResult as unknown as Prisma.InputJsonValue ?? Prisma.JsonNull, + picoExtract: skillsContext as unknown as Prisma.InputJsonValue ?? Prisma.JsonNull, overallScore, editorialScore: editorialScore, methodologyScore: methodologyScore, @@ -171,6 +264,7 @@ export function registerReviewWorker() { methodologyScore, overallScore, durationSeconds, + architecture: USE_SKILLS_ARCHITECTURE ? 'skills' : 'legacy', }); console.log('\n✅ 审查完成:'); @@ -179,7 +273,7 @@ export function registerReviewWorker() { console.log(` 耗时: ${durationSeconds}秒`); // ======================================== - // 4. 埋点:记录审查完成 + // 埋点:记录审查完成 // ======================================== try { const user = await prisma.user.findUnique({ @@ -201,7 +295,6 @@ export function registerReviewWorker() { ); } } catch (e) { - // 埋点失败不影响主业务 logger.warn('[reviewWorker] 埋点失败', { error: e }); } @@ -212,23 +305,27 @@ export function registerReviewWorker() { methodologyScore, durationSeconds, success: true, + architecture: USE_SKILLS_ARCHITECTURE ? 'skills' : 'legacy', }; - } catch (error: any) { + } catch (error: unknown) { + const errorMessage = error instanceof Error ? error.message : String(error); + const errorStack = error instanceof Error ? error.stack : undefined; + logger.error('[reviewWorker] ❌ Review failed', { jobId: job.id, taskId, - error: error.message, - stack: error.stack, + error: errorMessage, + stack: errorStack, }); - console.error(`\n❌ 审查失败: ${error.message}`); + console.error(`\n❌ 审查失败: ${errorMessage}`); // 更新任务状态为失败 await prisma.reviewTask.update({ where: { id: taskId }, data: { status: 'failed', - errorMessage: error.message || 'Review failed', + errorMessage: errorMessage || 'Review failed', }, }); @@ -240,4 +337,66 @@ export function registerReviewWorker() { logger.info('[reviewWorker] ✅ Worker registered: rvw_review_task'); } +/** + * 使用 V2.0 Skills 架构执行审查 + */ +async function executeWithSkills( + taskId: string, + userId: string, + agents: AgentType[], + extractedText: string, + filePath: string, + fileName: string, + fileSize: number +): Promise { + // 更新状态 + await prisma.reviewTask.update({ + where: { id: taskId }, + data: { status: 'reviewing' }, + }); + // 构建 Profile + const profile = ProfileResolver.resolveFromAgents(agents); + + logger.info('[reviewWorker] Using Skills architecture', { + taskId, + profileId: profile.id, + pipelineLength: profile.pipeline.length, + }); + + console.log(` 🚀 使用 V2.0 Skills 架构`); + console.log(` Profile: ${profile.name}`); + console.log(` Pipeline: ${profile.pipeline.map(p => p.skillId).join(' → ')}`); + + // 构建上下文 + const partialContext = createPartialContextFromTask({ + id: taskId, + userId, + filePath, + content: extractedText, + fileName, + fileSize, + }); + + // 执行 Pipeline + const executor = new SkillExecutor(); + const summary = await executor.execute(profile, partialContext); + + // 输出执行结果 + console.log(`\n 📊 Skills 执行结果:`); + for (const result of summary.results) { + const statusIcon = result.status === 'success' ? '✅' : + result.status === 'warning' ? '⚠️' : + result.status === 'error' ? '❌' : + result.status === 'skipped' ? '⏭️' : '⏱️'; + console.log(` ${statusIcon} ${result.skillName}: ${result.status} (${result.executionTime}ms)`); + if (result.score !== undefined) { + console.log(` 得分: ${result.score}`); + } + if (result.issues.length > 0) { + console.log(` 问题: ${result.issues.length} 个`); + } + } + + return summary; +} diff --git a/docs/00-系统总体设计/00-系统当前状态与开发指南.md b/docs/00-系统总体设计/00-系统当前状态与开发指南.md index b6bcae03..9f79ca73 100644 --- a/docs/00-系统总体设计/00-系统当前状态与开发指南.md +++ b/docs/00-系统总体设计/00-系统当前状态与开发指南.md @@ -1,10 +1,11 @@ # AIclinicalresearch 系统当前状态与开发指南 -> **文档版本:** v5.0 +> **文档版本:** v5.1 > **创建日期:** 2025-11-28 > **维护者:** 开发团队 -> **最后更新:** 2026-02-17 +> **最后更新:** 2026-02-18 > **🎉 重大里程碑:** +> - **2026-02-18:RVW V2.0 Skills 架构完成!** Skills 核心框架 + 3个 Skill 实现 + ReviewWorker 改造 > - **2026-02-17:RVW V2.0 "数据侦探" Day 6 完成!** L2统计验证器 + L2.5一致性取证(SE三角验证、SD>Mean) > - **2026-02-08:IIT 事件级质控 V3.1 开发完成!** record+event 独立质控 + 规则动态过滤 + 报告去重 + AI对话增强 > - **2026-02-08:IIT 质控驾驶舱 UI 完成!** XML 临床切片格式 + 质控驾驶舱 + 热力图 + 详情抽屉 @@ -17,13 +18,13 @@ > - **2026-01-24:Protocol Agent 框架完成!** 可复用Agent框架+5阶段对话流程 > - **2026-01-22:OSS 存储集成完成!** 阿里云 OSS 正式接入平台基础层 > -> **最新进展(RVW V2.0 "数据侦探" 2026-02-17):** -> - ✅ **L2 统计验证器**:CI↔P值一致性检查、T检验逆向验证 -> - ✅ **L2.5 一致性取证**:SE三角验证(Logistic/Cox回归)、SD>Mean检查 -> - ✅ **Error/Warning 分级**:容错阈值配置,避免"狼来了"效应 -> - ✅ **多格式 CI 解析**:支持5+种医学文献常见CI格式 -> - ✅ **单元测试通过**:4/4 功能模块测试全部通过 -> - ✅ **真实文档验证**:5篇测试稿件处理成功,2个合理WARNING +> **最新进展(RVW V2.0 Skills 架构 2026-02-18):** +> - ✅ **Skills 核心框架**:types、registry、executor、profile、context +> - ✅ **Zod 配置验证**:运行时类型安全 +> - ✅ **DataForensicsSkill**:依赖注入 + 路径安全 + 优雅降级 +> - ✅ **EditorialSkill + MethodologySkill**:封装现有服务 +> - ✅ **ReviewWorker 改造**:集成 SkillExecutor,支持 V1/V2 架构切换 +> - ✅ **12 个新文件**:约 1735 行代码 > > **部署状态:** ✅ 生产环境运行中 | 公网地址:http://8.140.53.236/ > **REDCap 状态:** ✅ 生产环境运行中 | 地址:https://redcap.xunzhengyixue.com/ @@ -66,7 +67,7 @@ | **IIT** | IIT Manager Agent | AI驱动IIT研究助手 - 双脑架构+REDCap集成 | ⭐⭐⭐⭐⭐ | 🎉 **事件级质控V3.1完成(设计100%,代码60%)** | **P0** | | **SSA** | 智能统计分析 | 队列/预测模型/RCT分析 | ⭐⭐⭐⭐⭐ | 📋 规划中 | P2 | | **ST** | 统计分析工具 | 100+轻量化统计工具 | ⭐⭐⭐⭐ | 📋 规划中 | P2 | -| **RVW** | 稿件审查系统 | 方法学评估 + 🆕数据侦探(L1/L2/L2.5验证)+ Word导出 | ⭐⭐⭐⭐ | 🚀 **V2.0开发中(Week2 Day6完成)** - 统计验证器+一致性取证 | P1 | +| **RVW** | 稿件审查系统 | 方法学评估 + 🆕数据侦探(L1/L2/L2.5验证)+ Skills架构 + Word导出 | ⭐⭐⭐⭐ | 🚀 **V2.0开发中(Week2 Day10完成)** - Skills核心框架+Skill实现+Worker改造 | P1 | | **ADMIN** | 运营管理端 | Prompt管理、租户管理、用户管理、运营监控、系统知识库 | ⭐⭐⭐⭐⭐ | 🎉 **Phase 4.6完成(88%)** - Prompt知识库集成+动态注入 | **P0** | --- diff --git a/docs/03-业务模块/RVW-稿件审查系统/00-模块当前状态与开发指南.md b/docs/03-业务模块/RVW-稿件审查系统/00-模块当前状态与开发指南.md index 07415172..85f99920 100644 --- a/docs/03-业务模块/RVW-稿件审查系统/00-模块当前状态与开发指南.md +++ b/docs/03-业务模块/RVW-稿件审查系统/00-模块当前状态与开发指南.md @@ -1,18 +1,21 @@ # RVW稿件审查模块 - 当前状态与开发指南 -> **文档版本:** v4.0 +> **文档版本:** v5.0 > **创建日期:** 2026-01-07 -> **最后更新:** 2026-02-17 +> **最后更新:** 2026-02-18 > **维护者:** 开发团队 -> **当前状态:** 🚀 **V2.0 "数据侦探" 开发中(Week 2 Day 6 完成)** +> **当前状态:** 🚀 **V2.0 "数据侦探" 开发中(Week 2 Day 10 完成)** > **文档目的:** 快速了解RVW模块状态,为新AI助手提供上下文 > -> **🎉 V2.0 进展(2026-02-17):** +> **🎉 V2.0 进展(2026-02-18):** > - ✅ **L1 算术验证器**:行列加总、百分比验证(Day 3) > - ✅ **L2 统计验证器**:CI↔P 值一致性、T检验逆向验证(Day 6) > - ✅ **L2.5 一致性取证**:SE三角验证、SD>Mean检查(Day 6 终审提权) > - ✅ **Word 文档解析**:python-docx 表格提取(Day 2) -> - ⏳ **Skills 框架**:Day 7-10 计划 +> - ✅ **Skills 核心框架**:types、registry、executor、profile、context(Day 7) +> - ✅ **DataForensicsSkill**:依赖注入、路径安全、优雅降级(Day 8) +> - ✅ **EditorialSkill + MethodologySkill**:封装现有服务(Day 9) +> - ✅ **ReviewWorker 改造**:集成 SkillExecutor,支持 V1/V2 切换(Day 10) --- @@ -85,7 +88,11 @@ backend/src/modules/rvw/ │ ├── methodologyService.ts # 方法学评估 │ └── utils.ts # 工具函数 ├── workers/ -│ └── reviewWorker.ts # pg-boss异步任务处理 +│ └── reviewWorker.ts # pg-boss异步任务处理(V2.0 Skills集成) +├── skills/ # 🆕 V2.0 Skills 架构 +│ ├── core/ # 核心框架(types, registry, executor等) +│ ├── library/ # Skill 实现(Forensics, Editorial, Methodology) +│ └── index.ts # 模块入口 ├── types/index.ts # TypeScript类型定义 └── __tests__/ # API测试脚本 diff --git a/docs/03-业务模块/RVW-稿件审查系统/04-开发计划/RVW V2.0 Skills 架构技术设计文档.md b/docs/03-业务模块/RVW-稿件审查系统/04-开发计划/RVW V2.0 Skills 架构技术设计文档.md new file mode 100644 index 00000000..e35ea015 --- /dev/null +++ b/docs/03-业务模块/RVW-稿件审查系统/04-开发计划/RVW V2.0 Skills 架构技术设计文档.md @@ -0,0 +1,2601 @@ +# RVW V2.0 Skills 架构技术设计文档 + +> **文档版本:** v1.1 (含审查意见) +> **创建日期:** 2026-02-17 +> **最后更新:** 2026-02-17 +> **维护者:** 开发团队 +> **状态:** ✅ 审查通过,准许开发 +> **关联开发:** Day 7-10(Week 2) +> **审查报告:** [RVW V2.0 Skills 架构深度审查报告](../06-开发记录/RVW%20V2.0%20Skills%20架构深度审查报告.md) + +--- + +## 📋 目录 + +1. [设计目标](#1-设计目标) +2. [架构总览](#2-架构总览) +3. [核心概念定义](#3-核心概念定义) +4. [核心组件设计](#4-核心组件设计) +5. [Skill 实现规范](#5-skill-实现规范) +6. [Profile 配置系统](#6-profile-配置系统) +7. [执行引擎设计](#7-执行引擎设计) +8. [与现有系统集成](#8-与现有系统集成) +9. [安全性设计](#9-安全性设计) 🆕 +10. [系统演进战略](#10-系统演进战略) 🆕 +11. [演进路线图](#11-演进路线图) +12. [开发计划](#12-开发计划) +13. [附录](#13-附录) + +--- + +## 1. 设计目标 + +### 1.1 核心理念 + +**认知依赖注入 (Cognitive Dependency Injection)** —— 将审稿能力封装为原子化的 Skills,通过配置引擎(Profile)动态注入给审稿流程。 + +### 1.2 设计原则 + +| 原则 | 说明 | 实践 | +|------|------|------| +| **配置优于代码** | 审稿策略通过配置定义,非硬编码 | Profile 配置文件/数据库 | +| **渐进式复杂度** | 简单场景用默认配置,复杂场景可深度定制 | 分层配置体系 | +| **故障隔离** | 单个 Skill 失败不影响整体流程 | 超时熔断 + 降级策略 | +| **可观测性** | 每个 Skill 执行有完整日志和指标 | 结构化日志 + 执行时间追踪 | +| **向后兼容** | 新架构兼容现有 V1.0 代码 | 渐进式迁移 | +| 🆕 **核心解耦** | skills/core 必须与业务代码解耦 | 泛型设计 + 禁止反向依赖 | +| 🆕 **类型安全** | 配置参数运行时验证 | Zod Schema 验证 | +| 🆕 **可测试性** | 外部依赖可 Mock | 依赖注入模式 | + +### 1.3 🆕 架构红线(审查意见) + +> ⚠️ **核心框架解耦红线**:`skills/core` 目录下的代码必须是**纯粹通用**的,严禁依赖业务代码。 + +``` +✅ 允许的依赖方向: + skills/library/* → skills/core/* + skills/library/* → common/* + modules/rvw/workers/* → skills/* + +❌ 禁止的依赖方向(红线): + skills/core/* → modules/rvw/* ❌ 禁止 + skills/core/* → skills/library/* ❌ 禁止 +``` + +**原因**:Skills 框架未来将下沉到 `common/skills`,成为全系统(IIT、AIA、ASL)的通用能力底座。如果 core 耦合了 RVW 业务代码,将无法复用。 + +### 1.3 关键指标 + +| 指标 | MVP 目标 | V2.1 目标 | +|------|---------|-----------| +| Skill 执行超时熔断 | 30s | 可配置 | +| 单个 Skill 失败后继续执行 | ✅ 支持 | ✅ 支持 | +| Profile 配置方式 | 代码硬编码 | 数据库 + UI | +| 新增 Skill 方式 | 开发部署 | 热加载插件 | + +--- + +## 2. 架构总览 + +### 2.1 系统架构图 + +``` +┌─────────────────────────────────────────────────────────────────────────────┐ +│ 输入层 (Input Layer) │ +│ ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ │ +│ │ 稿件文件 │ │ 用户选择 │ │ 期刊配置 │ │ +│ │ (Word/PDF) │ │ (Agents) │ │ (Profile) │ │ +│ └────────┬────────┘ └────────┬────────┘ └────────┬────────┘ │ +└────────────┼─────────────────────┼─────────────────────┼────────────────────┘ + │ │ │ + ▼ ▼ ▼ +┌─────────────────────────────────────────────────────────────────────────────┐ +│ 编排引擎 (Orchestration Engine) │ +│ │ +│ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │ +│ │ Profile │───▶│ Skill │───▶│ Skill │ │ +│ │ Resolver │ │ Router │ │ Executor │ │ +│ │ 配置解析器 │ │ 路由分发 │ │ 执行引擎 │ │ +│ └──────────────┘ └──────────────┘ └──────────────┘ │ +│ │ │ │ │ +│ │ ▼ │ │ +│ │ ┌──────────────┐ │ │ +│ │ │ Skill │ │ │ +│ └──────────▶│ Registry │◀──────────┘ │ +│ │ 技能注册表 │ │ +│ └──────────────┘ │ +│ │ │ +└─────────────────────────────┼──────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────────────────┐ +│ 原子能力库 (Skills Library) │ +│ │ +│ ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ │ +│ │ 🐍 DataForensics│ │ 📋 Editorial │ │ 🔬 Methodology │ │ +│ │ Skill │ │ Skill │ │ Skill │ │ +│ │ 数据侦探 │ │ 稿约规范性 │ │ 方法学评估 │ │ +│ │ (Python调用) │ │ (LLM调用) │ │ (LLM调用) │ │ +│ └─────────────────┘ └─────────────────┘ └─────────────────┘ │ +│ │ +│ ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ │ +│ │ 🛡️ Political │ │ 🧠 MedicalLogic │ │ 📊 Benchmark │ V2.1+ │ +│ │ Guardrail │ │ Skill │ │ Skill │ │ +│ │ 政治审查 │ │ 医学常识校验 │ │ 竞品对标 │ │ +│ └─────────────────┘ └─────────────────┘ └─────────────────┘ │ +└─────────────────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────────────────┐ +│ 共享上下文 (Shared Context) │ +│ ┌──────────────────────────────────────────────────────────────────┐ │ +│ │ taskId | documentContent | tables[] | methods[] | skillResults[] │ │ +│ └──────────────────────────────────────────────────────────────────┘ │ +└─────────────────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────────────────┐ +│ 输出层 (Output Layer) │ +│ ┌─────────────────────────────────────────────────────────────────────┐ │ +│ │ 📊 ReviewTask.contextData → 前端报告渲染 → Word 导出 │ │ +│ └─────────────────────────────────────────────────────────────────────┘ │ +└─────────────────────────────────────────────────────────────────────────────┘ +``` + +### 2.2 数据流 + +``` +用户上传 .docx + │ + ▼ +ReviewService.createTask() + │ + ├─── 文本提取 (ExtractionClient → Python) + │ + ▼ +pg-boss 队列 (rvw_review_task) + │ + ▼ +ReviewWorker.process() + │ + ├─── ProfileResolver.resolve(taskId) → JournalProfile + │ + ├─── SkillRouter.buildPipeline(profile) → Skill[] + │ + ├─── SkillExecutor.execute(skills, context) + │ │ + │ ├─── DataForensicsSkill.run() → Python API + │ │ └─── 返回 tables[], methods[], issues[] + │ │ + │ ├─── EditorialSkill.run() → LLM + │ │ └─── 返回 editorialResult + │ │ + │ └─── MethodologySkill.run() → LLM + │ └─── 返回 methodologyResult + │ + └─── 汇总结果 → ReviewTask.contextData +``` + +### 2.3 目录结构 + +``` +backend/src/modules/rvw/ +├── routes/ +│ └── index.ts # API 路由 +├── controllers/ +│ └── reviewController.ts # 控制器 +├── services/ +│ ├── reviewService.ts # 核心服务(改造) +│ ├── editorialService.ts # → 迁移到 EditorialSkill +│ └── methodologyService.ts # → 迁移到 MethodologySkill +├── workers/ +│ └── reviewWorker.ts # pg-boss Worker(改造) +├── skills/ # 🆕 Skills 模块 +│ ├── core/ # 核心框架 +│ │ ├── types.ts # 类型定义 +│ │ ├── registry.ts # SkillRegistry +│ │ ├── executor.ts # SkillExecutor +│ │ ├── context.ts # SharedContext +│ │ └── profile.ts # ProfileResolver +│ ├── library/ # Skill 实现 +│ │ ├── DataForensicsSkill.ts +│ │ ├── EditorialSkill.ts +│ │ ├── MethodologySkill.ts +│ │ └── index.ts # 统一导出 +│ ├── profiles/ # Profile 配置 +│ │ ├── default.ts # 默认配置 +│ │ └── index.ts +│ └── index.ts # 模块入口 +└── types/ + └── index.ts +``` + +--- + +## 3. 核心概念定义 + +### 3.1 Skill(技能) + +Skill 是 RVW 系统中最小的可复用审稿能力单元。它封装了特定的审稿逻辑,可以是: +- **原生计算**:调用 Python 进行统计验证 +- **LLM 调用**:调用大模型进行文本分析 +- **知识检索**:调用向量数据库进行知识查询 +- **外部服务**:调用第三方 API + +**Skill 的三要素**: + +| 要素 | 说明 | 示例 | +|------|------|------| +| **语义接口** | 描述 Skill 的功能,供 LLM 理解 | "验证表格数据的算术正确性" | +| **数据契约** | 严格定义的输入输出 Schema | `input: TableData[], output: Issue[]` | +| **原生函数** | 实际执行逻辑的代码 | `DataForensicsSkill.run()` | + +### 3.2 Profile(配置档案) + +Profile 定义了特定期刊/场景的审稿策略,包括: +- 启用哪些 Skills +- 每个 Skill 的配置参数 +- 执行顺序和依赖关系 +- 严格程度等级 + +### 3.3 Pipeline(执行管线) + +Pipeline 是根据 Profile 生成的 Skill 执行序列,支持: +- 顺序执行 +- 并行执行(V2.1) +- 条件分支(V2.2) + +### 3.4 Context(共享上下文) + +Context 是 Skills 之间传递数据的载体,包含: +- 任务基础信息 +- 文档内容 +- 提取的表格数据 +- 检测到的统计方法 +- 前置 Skill 的执行结果 + +--- + +## 4. 核心组件设计 + +### 4.1 类型定义 (types.ts) + +> 🆕 **审查意见采纳**: +> - 使用泛型 `TContext` 和 `TConfig` 确保框架通用性 +> - 引入 Zod 进行运行时配置验证 +> - 核心类型不包含 RVW 业务特定代码 + +```typescript +// backend/src/modules/rvw/skills/core/types.ts +// ⚠️ 注意:此文件未来将移动到 common/skills/core/types.ts +// ⚠️ 禁止在此文件中 import modules/rvw 下的业务代码 + +import { z } from 'zod'; + +/** + * ========================================== + * Skill 基础类型定义(通用) + * ========================================== + */ + +/** + * 问题严重程度 + */ +export type IssueSeverity = 'ERROR' | 'WARNING' | 'INFO'; + +/** + * Skill 执行状态 + */ +export type SkillStatus = 'success' | 'warning' | 'error' | 'timeout' | 'skipped'; + +/** + * Skill 分类 + */ +export type SkillCategory = 'forensics' | 'editorial' | 'methodology' | 'guardrail' | 'knowledge'; + +/** + * 问题定位信息 + */ +export interface IssueLocation { + tableId?: string; // 表格 ID + cellRef?: string; // R1C1 坐标,如 "R3C4" + paragraph?: number; // 段落编号 + lineRange?: [number, number]; // 行范围 +} + +/** + * 问题详情 + */ +export interface Issue { + severity: IssueSeverity; // 严重程度 + type: string; // 问题类型代码(如 ARITHMETIC_SUM_MISMATCH) + message: string; // 人类可读描述 + location?: IssueLocation; // 问题位置 + evidence?: { // 证据数据 + expected?: string | number; + actual?: string | number; + formula?: string; + [key: string]: any; + }; +} + +/** + * 表格数据结构 + */ +export interface TableData { + id: string; // 表格唯一标识 + caption: string; // 表格标题 + data: string[][]; // 二维数组数据 + html?: string; // 预渲染的 HTML 片段 + headers?: string[]; // 表头行 + rowCount: number; + colCount: number; +} + +/** + * ========================================== + * Skill Context(共享上下文)- 🆕 泛型设计 + * ========================================== + */ + +/** + * 基础上下文接口(通用) + * 🆕 使用泛型,支持不同业务模块扩展 + */ +export interface BaseSkillContext { + // 基础信息(通用) + taskId: string; + userId?: string; + + // 前置 Skill 结果(由 Executor 自动填充) + previousResults: SkillResult[]; + + // Profile 配置(泛型) + profile: TProfile; + + // 业务扩展字段(泛型) + extras?: TExtras; +} + +/** + * RVW 模块扩展字段 + * 🆕 业务特定字段放在 extras 或独立接口中 + */ +export interface RvwContextExtras { + documentPath: string; // 原始文件路径(用于 Python 处理) + documentContent: string; // Markdown 格式的文档内容 + documentMeta?: { + filename: string; + fileSize: number; + pageCount?: number; + }; + + // 数据侦探产出(DataForensicsSkill 填充) + tables?: TableData[]; + methods?: string[]; + forensicsResult?: ForensicsResult; +} + +/** + * RVW Skill 执行上下文(组合类型) + * 🆕 通过组合基础接口 + 业务扩展实现 + */ +export type SkillContext = BaseSkillContext & RvwContextExtras; + +/** + * 数据侦探结果(Python 返回) + */ +export interface ForensicsResult { + tables: TableData[]; + methods: string[]; + issues: Issue[]; + summary: { + totalTables: number; + totalIssues: number; + errorCount: number; + warningCount: number; + }; +} + +/** + * ========================================== + * Skill Result(执行结果) + * ========================================== + */ + +/** + * Skill 执行结果 + */ +export interface SkillResult { + skillId: string; // Skill 标识 + skillName: string; // Skill 名称 + status: SkillStatus; // 执行状态 + + // 评分(可选) + score?: number; // 0-100 评分 + scoreLabel?: string; // 评分标签(如 "优秀"、"良好") + + // 问题列表 + issues: Issue[]; + + // 结构化数据(Skill 特定) + data?: any; + + // 执行元信息 + executionTime: number; // 执行耗时 (ms) + timedOut?: boolean; // 是否超时 + error?: string; // 错误信息 + + // 时间戳 + startedAt: Date; + completedAt: Date; +} + +/** + * ========================================== + * Skill 接口定义 + * ========================================== + */ + +/** + * Skill 元数据(用于注册和 UI 展示) + */ +export interface SkillMetadata { + id: string; // 唯一标识(如 DataForensicsSkill) + name: string; // 显示名称 + description: string; // 功能描述 + version: string; // 版本号(语义化版本) + category: SkillCategory; // 分类 + + // 依赖声明 + inputs: string[]; // 需要的上下文字段 + outputs: string[]; // 产出的上下文字段 + + // 配置 Schema(JSON Schema 格式,用于 UI 生成配置表单) + configSchema?: object; + + // 执行配置 + defaultTimeout: number; // 默认超时时间 (ms) + retryable: boolean; // 是否可重试 + + // UI 相关 + icon?: string; // 图标 + color?: string; // 主题色 +} + +/** + * Skill 配置(运行时) + * 🆕 使用 Zod Schema 进行运行时验证 + */ +export interface SkillConfig { + [key: string]: unknown; +} + +/** + * 🆕 Zod 配置验证示例 + * 每个 Skill 应定义自己的配置 Schema + */ +// 示例:DataForensicsSkill 配置 Schema +export const DataForensicsConfigSchema = z.object({ + checkLevel: z.enum(['L1', 'L1_L2', 'L1_L2_L25']).default('L1_L2_L25'), + tolerancePercent: z.number().min(0).max(1).default(0.1), +}); +export type DataForensicsConfig = z.infer; + +// 示例:EditorialSkill 配置 Schema +export const EditorialConfigSchema = z.object({ + standard: z.enum(['default', 'chinese-core', 'international']).default('default'), + maxContentLength: z.number().default(100000), // 🆕 安全:资源限制 +}); +export type EditorialConfig = z.infer; + +/** + * Skill 接口(通用) + * 🆕 使用泛型支持不同上下文和配置类型 + * 所有 Skill 必须实现此接口 + */ +export interface Skill< + TContext extends BaseSkillContext = SkillContext, + TConfig extends SkillConfig = SkillConfig +> { + /** + * Skill 元数据 + */ + readonly metadata: SkillMetadata; + + /** + * 🆕 配置 Schema(Zod)- 用于运行时验证 + */ + readonly configSchema?: z.ZodSchema; + + /** + * 执行 Skill + * @param context 共享上下文 + * @param config 运行时配置(已验证) + * @returns 执行结果 + */ + run(context: TContext, config?: TConfig): Promise; + + /** + * 验证配置是否合法 + * 🆕 默认使用 configSchema 验证,可覆盖 + */ + validateConfig?(config: unknown): TConfig; + + /** + * 预检查 + * 在执行前检查前置条件,返回 false 则跳过执行 + * 🆕 可用于安全检查(如文档长度限制) + */ + canRun?(context: TContext): boolean; +} + +/** + * ========================================== + * Profile 配置 + * ========================================== + */ + +/** + * Pipeline 中的 Skill 配置项 + */ +export interface PipelineItem { + skillId: string; // Skill ID + enabled: boolean; // 是否启用 + config?: SkillConfig; // Skill 配置 + timeout?: number; // 自定义超时(覆盖默认值) + optional?: boolean; // 是否可选(失败不影响整体) +} + +/** + * 期刊 Profile 配置 + */ +export interface JournalProfile { + id: string; // Profile ID + name: string; // 期刊名称 + description?: string; // 描述 + + // 执行管线 + pipeline: PipelineItem[]; + + // 全局配置 + globalConfig?: { + strictness: 'STRICT' | 'STANDARD' | 'LENIENT'; + timeoutMultiplier?: number; // 超时系数 + continueOnError?: boolean; // 单个失败是否继续 + maxConcurrency?: number; // 最大并发数(V2.1) + }; + + // 元信息 + version: string; + createdAt?: Date; + updatedAt?: Date; + createdBy?: string; +} + +/** + * ========================================== + * Executor 配置 + * ========================================== + */ + +/** + * Skill 执行器配置 + * 🆕 新增 onSkillComplete 回调(审查建议:预留扩展点,V2.1 实现增量持久化) + */ +export interface ExecutorConfig { + defaultTimeout: number; // 默认超时 (ms),30000 + maxRetries: number; // 最大重试次数,0 + retryDelay: number; // 重试延迟 (ms),1000 + continueOnError: boolean; // 失败继续执行,true + logLevel: 'debug' | 'info' | 'warn' | 'error'; + + /** + * 🆕 Skill 执行完成回调(V2.1 扩展点) + * 可用于:增量持久化、实时状态推送、监控上报 + * MVP 阶段不实现,仅预留接口 + */ + onSkillComplete?: ( + skillId: string, + result: SkillResult, + context: TContext + ) => Promise; +} + +/** + * 执行结果汇总 + */ +export interface ExecutionSummary { + taskId: string; + profileId: string; + + // 状态 + overallStatus: 'success' | 'partial' | 'failed'; + + // 统计 + totalSkills: number; + successCount: number; + warningCount: number; + errorCount: number; + skippedCount: number; + timeoutCount: number; + + // 详细结果 + results: SkillResult[]; + + // 时间 + totalExecutionTime: number; + startedAt: Date; + completedAt: Date; +} +``` + +### 4.2 Skill 注册表 (registry.ts) + +```typescript +// backend/src/modules/rvw/skills/core/registry.ts + +import { Skill, SkillMetadata } from './types'; +import { logger } from '@/common/logging'; + +/** + * Skill 注册表 + * 单例模式,管理所有已注册的 Skills + */ +class SkillRegistryClass { + private skills: Map = new Map(); + private initialized: boolean = false; + + /** + * 注册 Skill + */ + register(skill: Skill): void { + const { id } = skill.metadata; + + if (this.skills.has(id)) { + logger.warn({ skillId: id }, 'Skill already registered, overwriting'); + } + + this.skills.set(id, skill); + logger.info({ skillId: id, version: skill.metadata.version }, 'Skill registered'); + } + + /** + * 批量注册 + */ + registerAll(skills: Skill[]): void { + skills.forEach(skill => this.register(skill)); + } + + /** + * 获取 Skill + */ + get(id: string): Skill | undefined { + return this.skills.get(id); + } + + /** + * 获取 Skill(必须存在) + */ + getRequired(id: string): Skill { + const skill = this.skills.get(id); + if (!skill) { + throw new Error(`Skill not found: ${id}`); + } + return skill; + } + + /** + * 检查 Skill 是否存在 + */ + has(id: string): boolean { + return this.skills.has(id); + } + + /** + * 获取所有已注册的 Skill + */ + getAll(): Skill[] { + return Array.from(this.skills.values()); + } + + /** + * 获取所有 Skill 元数据(用于 UI 展示) + */ + getAllMetadata(): SkillMetadata[] { + return this.getAll().map(skill => skill.metadata); + } + + /** + * 按分类获取 Skills + */ + getByCategory(category: string): Skill[] { + return this.getAll().filter(skill => skill.metadata.category === category); + } + + /** + * 注销 Skill + */ + unregister(id: string): boolean { + const result = this.skills.delete(id); + if (result) { + logger.info({ skillId: id }, 'Skill unregistered'); + } + return result; + } + + /** + * 清空所有 Skills(测试用) + */ + clear(): void { + this.skills.clear(); + this.initialized = false; + } + + /** + * 获取注册的 Skill 数量 + */ + get size(): number { + return this.skills.size; + } + + /** + * 标记为已初始化 + */ + markInitialized(): void { + this.initialized = true; + logger.info({ skillCount: this.size }, 'SkillRegistry initialized'); + } + + /** + * 检查是否已初始化 + */ + isInitialized(): boolean { + return this.initialized; + } +} + +// 导出单例 +export const SkillRegistry = new SkillRegistryClass(); +``` + +### 4.3 执行引擎 (executor.ts) + +```typescript +// backend/src/modules/rvw/skills/core/executor.ts + +import { + Skill, + SkillContext, + SkillResult, + SkillConfig, + ExecutorConfig, + ExecutionSummary, + PipelineItem, + JournalProfile, +} from './types'; +import { SkillRegistry } from './registry'; +import { logger } from '@/common/logging'; + +/** + * 默认执行器配置 + */ +const DEFAULT_EXECUTOR_CONFIG: ExecutorConfig = { + defaultTimeout: 30000, // 30 秒 + maxRetries: 0, + retryDelay: 1000, + continueOnError: true, + logLevel: 'info', +}; + +/** + * Skill 执行引擎 + * 负责按 Profile 配置顺序执行 Skills + */ +export class SkillExecutor { + private config: ExecutorConfig; + + constructor(config?: Partial) { + this.config = { ...DEFAULT_EXECUTOR_CONFIG, ...config }; + } + + /** + * 执行 Pipeline + * @param profile 期刊配置 + * @param initialContext 初始上下文 + * @returns 执行汇总 + */ + async execute(profile: JournalProfile, initialContext: Omit): Promise { + const startTime = Date.now(); + const results: SkillResult[] = []; + + // 构建完整上下文 + const context: SkillContext = { + ...initialContext, + profile, + previousResults: [], + }; + + logger.info({ + taskId: context.taskId, + profileId: profile.id, + pipelineLength: profile.pipeline.length, + }, 'Starting skill pipeline execution'); + + // 遍历 Pipeline + for (const item of profile.pipeline) { + // 跳过禁用的 Skill + if (!item.enabled) { + logger.debug({ skillId: item.skillId }, 'Skill disabled, skipping'); + results.push(this.createSkippedResult(item.skillId, 'Skill disabled in profile')); + continue; + } + + // 获取 Skill + const skill = SkillRegistry.get(item.skillId); + if (!skill) { + logger.warn({ skillId: item.skillId }, 'Skill not found in registry'); + results.push(this.createSkippedResult(item.skillId, 'Skill not found')); + continue; + } + + // 前置检查 + if (skill.canRun && !skill.canRun(context)) { + logger.info({ skillId: item.skillId }, 'Skill pre-check failed, skipping'); + results.push(this.createSkippedResult(item.skillId, 'Pre-check failed')); + continue; + } + + // 执行 Skill + const result = await this.executeSkill(skill, context, item); + results.push(result); + + // 更新上下文(传递给后续 Skills) + context.previousResults.push(result); + + // 更新共享数据(如 DataForensicsSkill 的输出) + this.updateContextWithResult(context, skill, result); + + // 检查是否需要中断 + if (result.status === 'error' && !this.shouldContinue(item, profile)) { + logger.warn({ skillId: item.skillId }, 'Skill failed and continueOnError=false, stopping pipeline'); + break; + } + } + + // 生成汇总 + const summary = this.buildSummary(context.taskId, profile.id, results, startTime); + + logger.info({ + taskId: context.taskId, + overallStatus: summary.overallStatus, + totalTime: summary.totalExecutionTime, + }, 'Skill pipeline execution completed'); + + return summary; + } + + /** + * 执行单个 Skill(带超时和重试) + */ + private async executeSkill( + skill: Skill, + context: SkillContext, + item: PipelineItem + ): Promise { + const startedAt = new Date(); + const timeout = item.timeout || skill.metadata.defaultTimeout || this.config.defaultTimeout; + + logger.info({ + skillId: skill.metadata.id, + timeout, + }, 'Executing skill'); + + try { + // 带超时执行 + const result = await this.executeWithTimeout(skill, context, item.config, timeout); + + logger.info({ + skillId: skill.metadata.id, + status: result.status, + executionTime: result.executionTime, + issueCount: result.issues.length, + }, 'Skill execution completed'); + + return result; + } catch (error: any) { + const executionTime = Date.now() - startedAt.getTime(); + + // 判断是否超时 + if (error.message === 'SKILL_TIMEOUT') { + logger.warn({ + skillId: skill.metadata.id, + timeout, + }, 'Skill execution timed out'); + + return { + skillId: skill.metadata.id, + skillName: skill.metadata.name, + status: 'timeout', + issues: [{ + severity: 'WARNING', + type: 'SKILL_TIMEOUT', + message: `${skill.metadata.name} 执行超时 (${timeout}ms),已跳过`, + }], + executionTime: timeout, + timedOut: true, + startedAt, + completedAt: new Date(), + }; + } + + // 其他错误 + logger.error({ + skillId: skill.metadata.id, + error: error.message, + stack: error.stack, + }, 'Skill execution failed'); + + return { + skillId: skill.metadata.id, + skillName: skill.metadata.name, + status: 'error', + issues: [{ + severity: 'ERROR', + type: 'SKILL_EXECUTION_ERROR', + message: `${skill.metadata.name} 执行失败: ${error.message}`, + }], + executionTime, + error: error.message, + startedAt, + completedAt: new Date(), + }; + } + } + + /** + * 带超时执行 + */ + private async executeWithTimeout( + skill: Skill, + context: SkillContext, + config: SkillConfig | undefined, + timeout: number + ): Promise { + return Promise.race([ + skill.run(context, config), + new Promise((_, reject) => + setTimeout(() => reject(new Error('SKILL_TIMEOUT')), timeout) + ), + ]); + } + + /** + * 根据 Skill 结果更新上下文 + */ + private updateContextWithResult(context: SkillContext, skill: Skill, result: SkillResult): void { + // DataForensicsSkill 的特殊处理 + if (skill.metadata.id === 'DataForensicsSkill' && result.status === 'success') { + const data = result.data as any; + if (data) { + context.tables = data.tables; + context.methods = data.methods; + context.forensicsResult = data; + } + } + + // 其他 Skills 可以在此添加特殊处理 + } + + /** + * 创建跳过结果 + */ + private createSkippedResult(skillId: string, reason: string): SkillResult { + return { + skillId, + skillName: skillId, + status: 'skipped', + issues: [{ + severity: 'INFO', + type: 'SKILL_SKIPPED', + message: reason, + }], + executionTime: 0, + startedAt: new Date(), + completedAt: new Date(), + }; + } + + /** + * 判断是否继续执行 + */ + private shouldContinue(item: PipelineItem, profile: JournalProfile): boolean { + // 显式标记为 optional 的 Skill 失败后继续 + if (item.optional) return true; + + // 检查全局配置 + return profile.globalConfig?.continueOnError ?? this.config.continueOnError; + } + + /** + * 构建执行汇总 + */ + private buildSummary( + taskId: string, + profileId: string, + results: SkillResult[], + startTime: number + ): ExecutionSummary { + const completedAt = new Date(); + const totalExecutionTime = Date.now() - startTime; + + const successCount = results.filter(r => r.status === 'success').length; + const warningCount = results.filter(r => r.status === 'warning').length; + const errorCount = results.filter(r => r.status === 'error').length; + const skippedCount = results.filter(r => r.status === 'skipped').length; + const timeoutCount = results.filter(r => r.status === 'timeout').length; + + // 计算整体状态 + let overallStatus: 'success' | 'partial' | 'failed'; + if (errorCount === 0 && timeoutCount === 0) { + overallStatus = 'success'; + } else if (successCount > 0) { + overallStatus = 'partial'; + } else { + overallStatus = 'failed'; + } + + return { + taskId, + profileId, + overallStatus, + totalSkills: results.length, + successCount, + warningCount, + errorCount, + skippedCount, + timeoutCount, + results, + totalExecutionTime, + startedAt: new Date(startTime), + completedAt, + }; + } +} + +// 导出默认实例 +export const defaultExecutor = new SkillExecutor(); +``` + +### 4.4 Profile 解析器 (profile.ts) + +```typescript +// backend/src/modules/rvw/skills/core/profile.ts + +import { JournalProfile, PipelineItem } from './types'; +import { logger } from '@/common/logging'; + +/** + * 默认 Profile 配置 + */ +export const DEFAULT_PROFILE: JournalProfile = { + id: 'default', + name: '通用期刊配置', + description: 'RVW V2.0 默认审稿配置,适用于大多数期刊', + version: '1.0.0', + + pipeline: [ + { + skillId: 'DataForensicsSkill', + enabled: true, + optional: true, // 数据侦探失败不影响其他审稿 + config: { + checkLevel: 'L1_L2_L25', + tolerancePercent: 0.1, + }, + timeout: 60000, // 60 秒(需要调用 Python) + }, + { + skillId: 'EditorialSkill', + enabled: true, + optional: false, + timeout: 45000, + }, + { + skillId: 'MethodologySkill', + enabled: true, + optional: false, + timeout: 45000, + }, + ], + + globalConfig: { + strictness: 'STANDARD', + continueOnError: true, + timeoutMultiplier: 1.0, + }, +}; + +/** + * 中文核心期刊 Profile + */ +export const CHINESE_CORE_PROFILE: JournalProfile = { + id: 'chinese-core', + name: '中文核心期刊配置', + description: '适用于中文核心期刊,包含政治审查(V2.1)', + version: '1.0.0', + + pipeline: [ + // V2.1: { skillId: 'PoliticalGuardrailSkill', enabled: true, optional: false }, + { + skillId: 'DataForensicsSkill', + enabled: true, + optional: false, // 中文核心对数据准确性要求高 + config: { + checkLevel: 'L1_L2_L25', + tolerancePercent: 0.05, // 更严格的容错 + }, + timeout: 60000, + }, + { + skillId: 'EditorialSkill', + enabled: true, + optional: false, + config: { + standard: 'chinese-core', + }, + }, + { + skillId: 'MethodologySkill', + enabled: true, + optional: false, + }, + ], + + globalConfig: { + strictness: 'STRICT', + continueOnError: false, // 严格模式,失败即停止 + }, +}; + +/** + * 所有预定义 Profiles + */ +const PROFILES: Map = new Map([ + ['default', DEFAULT_PROFILE], + ['chinese-core', CHINESE_CORE_PROFILE], +]); + +/** + * Profile 解析器 + */ +export class ProfileResolver { + /** + * 获取 Profile + * MVP 阶段:从内存 Map 获取 + * V2.1 阶段:从数据库获取 + */ + static resolve(profileId?: string): JournalProfile { + const id = profileId || 'default'; + const profile = PROFILES.get(id); + + if (!profile) { + logger.warn({ profileId: id }, 'Profile not found, using default'); + return DEFAULT_PROFILE; + } + + return profile; + } + + /** + * 根据用户选择的 Agents 动态构建 Profile + * 兼容 V1.0 的 selectedAgents 参数 + */ + static resolveFromAgents(selectedAgents?: string[]): JournalProfile { + const baseProfile = { ...DEFAULT_PROFILE }; + + if (!selectedAgents || selectedAgents.length === 0) { + return baseProfile; + } + + // 根据选择的 Agents 调整 Pipeline + const enabledSkills = new Set(); + + // 数据侦探始终启用(如果有表格验证需求) + enabledSkills.add('DataForensicsSkill'); + + if (selectedAgents.includes('editorial')) { + enabledSkills.add('EditorialSkill'); + } + if (selectedAgents.includes('methodology')) { + enabledSkills.add('MethodologySkill'); + } + + // 如果都没选,启用全部 + if (!selectedAgents.includes('editorial') && !selectedAgents.includes('methodology')) { + enabledSkills.add('EditorialSkill'); + enabledSkills.add('MethodologySkill'); + } + + // 更新 Pipeline + baseProfile.pipeline = baseProfile.pipeline.map(item => ({ + ...item, + enabled: enabledSkills.has(item.skillId), + })); + + return baseProfile; + } + + /** + * 获取所有可用 Profiles(用于 UI) + */ + static getAllProfiles(): JournalProfile[] { + return Array.from(PROFILES.values()); + } + + /** + * 注册新 Profile(V2.1 支持动态添加) + */ + static register(profile: JournalProfile): void { + PROFILES.set(profile.id, profile); + logger.info({ profileId: profile.id }, 'Profile registered'); + } +} +``` + +### 4.5 共享上下文管理 (context.ts) + +```typescript +// backend/src/modules/rvw/skills/core/context.ts + +import { SkillContext, SkillResult, TableData, JournalProfile } from './types'; + +/** + * 上下文构建器 + * 辅助创建和管理 SkillContext + */ +export class ContextBuilder { + private context: Partial; + + constructor() { + this.context = { + previousResults: [], + }; + } + + /** + * 设置任务 ID + */ + taskId(taskId: string): this { + this.context.taskId = taskId; + return this; + } + + /** + * 设置用户 ID + */ + userId(userId?: string): this { + this.context.userId = userId; + return this; + } + + /** + * 设置文档路径 + */ + documentPath(path: string): this { + this.context.documentPath = path; + return this; + } + + /** + * 设置文档内容 + */ + documentContent(content: string): this { + this.context.documentContent = content; + return this; + } + + /** + * 设置文档元信息 + */ + documentMeta(meta: { filename: string; fileSize: number; pageCount?: number }): this { + this.context.documentMeta = meta; + return this; + } + + /** + * 设置 Profile + */ + profile(profile: JournalProfile): this { + this.context.profile = profile; + return this; + } + + /** + * 设置表格数据(通常由 DataForensicsSkill 填充) + */ + tables(tables: TableData[]): this { + this.context.tables = tables; + return this; + } + + /** + * 设置检测到的统计方法 + */ + methods(methods: string[]): this { + this.context.methods = methods; + return this; + } + + /** + * 添加前置结果 + */ + addPreviousResult(result: SkillResult): this { + this.context.previousResults = this.context.previousResults || []; + this.context.previousResults.push(result); + return this; + } + + /** + * 构建上下文 + */ + build(): SkillContext { + // 验证必填字段 + if (!this.context.taskId) { + throw new Error('taskId is required'); + } + if (!this.context.documentPath) { + throw new Error('documentPath is required'); + } + if (!this.context.documentContent) { + throw new Error('documentContent is required'); + } + if (!this.context.profile) { + throw new Error('profile is required'); + } + + return this.context as SkillContext; + } +} + +/** + * 从数据库任务记录创建上下文 + */ +export function createContextFromTask( + task: { + id: string; + userId: string; + filePath: string; + content?: string; + originalName: string; + fileSize?: number; + }, + profile: JournalProfile +): Omit { + return { + taskId: task.id, + userId: task.userId, + documentPath: task.filePath, + documentContent: task.content || '', + documentMeta: { + filename: task.originalName, + fileSize: task.fileSize || 0, + }, + }; +} +``` + +--- + +## 5. Skill 实现规范 + +### 5.1 Skill 基类 + +> 🆕 **审查意见采纳**: +> - 使用 Zod 进行配置验证 +> - 泛型设计支持不同上下文和配置类型 + +```typescript +// backend/src/modules/rvw/skills/library/BaseSkill.ts + +import { z } from 'zod'; +import { + Skill, + SkillMetadata, + SkillContext, + SkillResult, + SkillConfig, + BaseSkillContext +} from '../core/types'; +import { logger } from '@/common/logging'; + +/** + * Skill 基类 + * 🆕 使用泛型支持不同上下文和配置类型 + * 🆕 内置 Zod 配置验证 + */ +export abstract class BaseSkill< + TContext extends BaseSkillContext = SkillContext, + TConfig extends SkillConfig = SkillConfig +> implements Skill { + + abstract readonly metadata: SkillMetadata; + + /** + * 🆕 配置 Schema(子类定义) + */ + readonly configSchema?: z.ZodSchema; + + /** + * 子类实现具体逻辑 + */ + abstract execute( + context: TContext, + config?: TConfig + ): Promise>; + + /** + * 执行入口(统一处理日志、计时、配置验证等) + */ + async run(context: TContext, config?: TConfig): Promise { + const startedAt = new Date(); + const startTime = Date.now(); + + logger.info({ + skillId: this.metadata.id, + taskId: context.taskId, + }, `[${this.metadata.id}] Starting execution`); + + try { + // 🆕 配置验证(使用 Zod) + const validatedConfig = this.validateConfig(config); + + const result = await this.execute(context, validatedConfig); + const executionTime = Date.now() - startTime; + + logger.info({ + skillId: this.metadata.id, + taskId: context.taskId, + status: result.status, + executionTime, + issueCount: result.issues.length, + }, `[${this.metadata.id}] Execution completed`); + + return { + ...result, + skillId: this.metadata.id, + skillName: this.metadata.name, + executionTime, + startedAt, + completedAt: new Date(), + }; + } catch (error: any) { + const executionTime = Date.now() - startTime; + + // 🆕 区分 Zod 验证错误和执行错误 + const isValidationError = error instanceof z.ZodError; + const errorType = isValidationError ? 'CONFIG_VALIDATION_ERROR' : 'EXECUTION_ERROR'; + const errorMessage = isValidationError + ? `配置验证失败: ${error.errors.map(e => e.message).join(', ')}` + : `执行失败: ${error.message}`; + + logger.error({ + skillId: this.metadata.id, + taskId: context.taskId, + error: error.message, + stack: error.stack, + errorType, + }, `[${this.metadata.id}] Execution failed`); + + return { + skillId: this.metadata.id, + skillName: this.metadata.name, + status: 'error', + issues: [{ + severity: 'ERROR', + type: errorType, + message: errorMessage, + }], + error: error.message, + executionTime, + startedAt, + completedAt: new Date(), + }; + } + } + + /** + * 🆕 配置验证(使用 Zod Schema) + * 子类可覆盖以实现自定义验证 + */ + validateConfig(config: unknown): TConfig { + if (this.configSchema) { + return this.configSchema.parse(config); + } + return (config || {}) as TConfig; + } + + /** + * 默认前置检查(子类可覆盖) + */ + canRun(context: TContext): boolean { + return true; + } + + /** + * 辅助方法:从上下文获取前置 Skill 结果 + */ + protected getPreviousResult(context: TContext, skillId: string): SkillResult | undefined { + return context.previousResults.find(r => r.skillId === skillId); + } +} +``` + +### 5.2 DataForensicsSkill 实现 + +> 🆕 **审查意见采纳**: +> - 使用依赖注入(ExtractionClient 可 Mock) +> - Zod Schema 配置验证 +> - 安全检查(路径白名单) + +```typescript +// backend/src/modules/rvw/skills/library/DataForensicsSkill.ts + +import { z } from 'zod'; +import { BaseSkill } from './BaseSkill'; +import { + SkillMetadata, + SkillContext, + SkillResult, + Issue, + ForensicsResult, + DataForensicsConfigSchema, + DataForensicsConfig, +} from '../core/types'; +import { ExtractionClient, IExtractionClient } from '@/common/extraction/client'; +import { logger } from '@/common/logging'; + +/** + * 🆕 安全:允许的文件存储路径前缀 + */ +const ALLOWED_PATH_PREFIXES = [ + '/app/uploads/', // Docker 容器内路径 + 'D:\\MyCursor\\', // 开发环境 + '/tmp/rvw-uploads/', // 临时目录 +]; + +/** + * 数据侦探 Skill + * 🆕 依赖注入:ExtractionClient 可在测试中 Mock + */ +export class DataForensicsSkill extends BaseSkill { + + /** + * 🆕 依赖注入:ExtractionClient + */ + private readonly extractionClient: IExtractionClient; + + constructor(extractionClient?: IExtractionClient) { + super(); + this.extractionClient = extractionClient || ExtractionClient; + } + + /** + * 🆕 Zod 配置 Schema + */ + readonly configSchema = DataForensicsConfigSchema; + + readonly metadata: SkillMetadata = { + id: 'DataForensicsSkill', + name: '数据侦探', + description: '提取 Word 文档表格,验证数据算术正确性和统计学一致性', + version: '2.0.0', + category: 'forensics', + + inputs: ['documentPath'], + outputs: ['tables', 'methods', 'forensicsResult'], + + configSchema: DataForensicsConfigSchema, + + defaultTimeout: 60000, // 60 秒 + retryable: true, + + icon: '🐍', + color: '#3776ab', + }; + + /** + * 前置检查 + * 🆕 增加路径安全验证(防止路径遍历攻击) + */ + canRun(context: SkillContext): boolean { + if (!context.documentPath) { + logger.warn({ taskId: context.taskId }, 'DataForensicsSkill: No document path'); + return false; + } + + if (!context.documentPath.toLowerCase().endsWith('.docx')) { + logger.info({ taskId: context.taskId }, 'DataForensicsSkill: Not a .docx file, skipping'); + return false; + } + + // 🆕 安全检查:路径白名单 + const isPathAllowed = ALLOWED_PATH_PREFIXES.some(prefix => + context.documentPath.startsWith(prefix) + ); + if (!isPathAllowed) { + logger.error({ + taskId: context.taskId, + documentPath: context.documentPath, + }, 'DataForensicsSkill: Document path not in allowed prefixes (security check)'); + return false; + } + + return true; + } + + /** + * 执行数据侦探 + */ + async execute( + context: SkillContext, + config?: DataForensicsConfig + ): Promise> { + // 🆕 配置已通过 Zod 验证,类型安全 + const checkLevel = config?.checkLevel || 'L1_L2_L25'; + const tolerancePercent = config?.tolerancePercent || 0.1; + + logger.info({ + taskId: context.taskId, + checkLevel, + tolerancePercent, + }, 'DataForensicsSkill: Starting analysis'); + + try { + // 🆕 使用依赖注入的 client(便于测试 Mock) + const result = await this.extractionClient.analyzeDocx(context.documentPath, { + checkLevel, + tolerancePercent, + }); + + // 解析结果 + const forensicsResult: ForensicsResult = { + tables: result.tables || [], + methods: result.methods || [], + issues: result.issues || [], + summary: result.summary || { + totalTables: result.tables?.length || 0, + totalIssues: result.issues?.length || 0, + errorCount: result.issues?.filter((i: Issue) => i.severity === 'ERROR').length || 0, + warningCount: result.issues?.filter((i: Issue) => i.severity === 'WARNING').length || 0, + }, + }; + + // 计算状态和评分 + const hasErrors = forensicsResult.summary.errorCount > 0; + const hasWarnings = forensicsResult.summary.warningCount > 0; + + let status: 'success' | 'warning' | 'error'; + let score: number; + + if (hasErrors) { + status = 'error'; + score = Math.max(0, 100 - forensicsResult.summary.errorCount * 20); + } else if (hasWarnings) { + status = 'warning'; + score = Math.max(60, 100 - forensicsResult.summary.warningCount * 5); + } else { + status = 'success'; + score = 100; + } + + return { + status, + score, + scoreLabel: this.getScoreLabel(score), + issues: forensicsResult.issues, + data: forensicsResult, + }; + } catch (error: any) { + // 特殊处理:Python 服务不可用时的降级 + if (error.code === 'ECONNREFUSED' || error.code === 'ETIMEDOUT') { + logger.warn({ + taskId: context.taskId, + error: error.message, + }, 'DataForensicsSkill: Python service unavailable, degrading gracefully'); + + return { + status: 'warning', + issues: [{ + severity: 'WARNING', + type: 'SERVICE_UNAVAILABLE', + message: '数据验证服务暂不可用,已跳过表格验证。建议稍后重试。', + }], + data: { + tables: [], + methods: [], + issues: [], + summary: { totalTables: 0, totalIssues: 0, errorCount: 0, warningCount: 1 }, + }, + }; + } + + throw error; + } + } + + private getScoreLabel(score: number): string { + if (score >= 90) return '优秀'; + if (score >= 80) return '良好'; + if (score >= 60) return '合格'; + return '需改进'; + } +} + +// 导出单例 +export const dataForensicsSkill = new DataForensicsSkill(); +``` + +### 5.3 EditorialSkill 实现 + +```typescript +// backend/src/modules/rvw/skills/library/EditorialSkill.ts + +import { BaseSkill } from './BaseSkill'; +import { SkillMetadata, SkillContext, SkillResult, SkillConfig, Issue } from '../core/types'; +import { editorialService } from '@/modules/rvw/services/editorialService'; +import { logger } from '@/common/logging'; + +/** + * 稿约规范性评估 Skill + * 封装现有的 editorialService + */ +export class EditorialSkill extends BaseSkill { + readonly metadata: SkillMetadata = { + id: 'EditorialSkill', + name: '稿约规范性评估', + description: '评估稿件是否符合期刊稿约规范(11项标准)', + version: '2.0.0', + category: 'editorial', + + inputs: ['documentContent'], + outputs: ['editorialResult'], + + configSchema: { + type: 'object', + properties: { + standard: { + type: 'string', + enum: ['default', 'chinese-core', 'international'], + default: 'default', + description: '稿约标准类型', + }, + }, + }, + + defaultTimeout: 45000, // 45 秒 + retryable: true, + + icon: '📋', + color: '#52c41a', + }; + + /** + * 前置检查 + */ + canRun(context: SkillContext): boolean { + if (!context.documentContent || context.documentContent.trim().length === 0) { + logger.warn({ taskId: context.taskId }, 'EditorialSkill: No document content'); + return false; + } + return true; + } + + /** + * 执行稿约规范性评估 + */ + async execute(context: SkillContext, config?: SkillConfig): Promise> { + logger.info({ + taskId: context.taskId, + contentLength: context.documentContent.length, + }, 'EditorialSkill: Starting evaluation'); + + // 调用现有 editorialService + const result = await editorialService.evaluate(context.taskId, context.documentContent); + + // 转换为 SkillResult 格式 + const issues: Issue[] = []; + + // 解析 LLM 返回的结构化结果 + if (result.result?.checkItems) { + for (const item of result.result.checkItems) { + if (item.status === 'error' || item.status === '不符合') { + issues.push({ + severity: 'ERROR', + type: `EDITORIAL_${item.id}`, + message: item.suggestion || item.name, + evidence: { detail: item.detail }, + }); + } else if (item.status === 'warning' || item.status === '部分符合') { + issues.push({ + severity: 'WARNING', + type: `EDITORIAL_${item.id}`, + message: item.suggestion || item.name, + evidence: { detail: item.detail }, + }); + } + } + } + + // 计算状态 + const errorCount = issues.filter(i => i.severity === 'ERROR').length; + const warningCount = issues.filter(i => i.severity === 'WARNING').length; + + let status: 'success' | 'warning' | 'error'; + if (errorCount > 0) { + status = 'error'; + } else if (warningCount > 0) { + status = 'warning'; + } else { + status = 'success'; + } + + return { + status, + score: result.result?.score, + scoreLabel: result.result?.scoreLabel, + issues, + data: result.result, + }; + } +} + +// 导出单例 +export const editorialSkill = new EditorialSkill(); +``` + +### 5.4 MethodologySkill 实现 + +```typescript +// backend/src/modules/rvw/skills/library/MethodologySkill.ts + +import { BaseSkill } from './BaseSkill'; +import { SkillMetadata, SkillContext, SkillResult, SkillConfig, Issue } from '../core/types'; +import { methodologyService } from '@/modules/rvw/services/methodologyService'; +import { logger } from '@/common/logging'; + +/** + * 方法学评估 Skill + * 封装现有的 methodologyService + */ +export class MethodologySkill extends BaseSkill { + readonly metadata: SkillMetadata = { + id: 'MethodologySkill', + name: '方法学评估', + description: '评估研究设计、统计方法和结果报告的科学性(20个检查点)', + version: '2.0.0', + category: 'methodology', + + inputs: ['documentContent', 'methods'], + outputs: ['methodologyResult'], + + configSchema: { + type: 'object', + properties: { + focusAreas: { + type: 'array', + items: { type: 'string' }, + default: ['design', 'statistics', 'reporting'], + description: '重点关注领域', + }, + }, + }, + + defaultTimeout: 45000, // 45 秒 + retryable: true, + + icon: '🔬', + color: '#722ed1', + }; + + /** + * 前置检查 + */ + canRun(context: SkillContext): boolean { + if (!context.documentContent || context.documentContent.trim().length === 0) { + logger.warn({ taskId: context.taskId }, 'MethodologySkill: No document content'); + return false; + } + return true; + } + + /** + * 执行方法学评估 + */ + async execute(context: SkillContext, config?: SkillConfig): Promise> { + logger.info({ + taskId: context.taskId, + contentLength: context.documentContent.length, + detectedMethods: context.methods?.length || 0, + }, 'MethodologySkill: Starting evaluation'); + + // 如果 DataForensicsSkill 提取了统计方法,传递给方法学评估 + const methodsHint = context.methods?.join(', ') || ''; + + // 调用现有 methodologyService + const result = await methodologyService.evaluate(context.taskId, context.documentContent, methodsHint); + + // 转换为 SkillResult 格式 + const issues: Issue[] = []; + + // 解析 LLM 返回的结构化结果 + if (result.result?.sections) { + for (const section of result.result.sections) { + for (const item of section.items || []) { + if (item.status === 'error' || item.status === '🔴错误') { + issues.push({ + severity: 'ERROR', + type: `METHODOLOGY_${section.id}_${item.id}`, + message: item.suggestion || item.description, + evidence: { section: section.name, detail: item.detail }, + }); + } else if (item.status === 'warning' || item.status === '🟡存疑') { + issues.push({ + severity: 'WARNING', + type: `METHODOLOGY_${section.id}_${item.id}`, + message: item.suggestion || item.description, + evidence: { section: section.name, detail: item.detail }, + }); + } + } + } + } + + // 计算状态 + const errorCount = issues.filter(i => i.severity === 'ERROR').length; + const warningCount = issues.filter(i => i.severity === 'WARNING').length; + + let status: 'success' | 'warning' | 'error'; + if (errorCount > 0) { + status = 'error'; + } else if (warningCount > 0) { + status = 'warning'; + } else { + status = 'success'; + } + + return { + status, + score: result.result?.score, + scoreLabel: result.result?.overallStatus, + issues, + data: result.result, + }; + } +} + +// 导出单例 +export const methodologySkill = new MethodologySkill(); +``` + +### 5.5 Skill 注册入口 + +```typescript +// backend/src/modules/rvw/skills/library/index.ts + +import { SkillRegistry } from '../core/registry'; +import { dataForensicsSkill, DataForensicsSkill } from './DataForensicsSkill'; +import { editorialSkill, EditorialSkill } from './EditorialSkill'; +import { methodologySkill, MethodologySkill } from './MethodologySkill'; + +/** + * 注册所有内置 Skills + */ +export function registerBuiltinSkills(): void { + SkillRegistry.registerAll([ + dataForensicsSkill, + editorialSkill, + methodologySkill, + ]); + + SkillRegistry.markInitialized(); +} + +// 导出 Skill 类(用于类型引用) +export { + DataForensicsSkill, + EditorialSkill, + MethodologySkill, +}; + +// 导出单例(用于直接调用) +export { + dataForensicsSkill, + editorialSkill, + methodologySkill, +}; +``` + +--- + +## 6. Profile 配置系统 + +### 6.1 MVP 阶段:代码硬编码 + +如第 4.4 节所示,MVP 阶段 Profile 以 TypeScript 常量形式存在于代码中。 + +### 6.2 V2.1 阶段:数据库存储 + UI 管理 + +```typescript +// 数据库 Schema(Prisma) +model JournalProfile { + id String @id @default(uuid()) + name String + description String? + version String + pipeline Json // PipelineItem[] + globalConfig Json? // GlobalConfig + isDefault Boolean @default(false) + createdAt DateTime @default(now()) + updatedAt DateTime @updatedAt + createdBy String? + + @@schema("rvw_schema") +} +``` + +**运营管理端 UI 功能**: + +| 功能 | 说明 | +|------|------| +| Profile 列表 | 查看、搜索、筛选所有 Profile | +| Profile 编辑 | 可视化编辑 Pipeline,拖拽调整顺序 | +| Skill 配置 | 配置每个 Skill 的参数(基于 configSchema 生成表单) | +| 版本管理 | 保存 Profile 历史版本,支持回滚 | +| 复制创建 | 从现有 Profile 复制创建新 Profile | + +### 6.3 V2.2+ 阶段:Skill 插件化 + +``` +开发者工作流: +1. 开发新 Skill(遵循 Skill 接口规范) +2. 打包为独立 npm 包或 .js 文件 +3. 上传到管理端 +4. 系统热加载 Skill +5. 在 Profile 中引用新 Skill +``` + +**安全考虑**: +- Skill 代码沙箱执行 +- 资源使用限制(CPU、内存、网络) +- 代码签名验证 + +--- + +## 7. 执行引擎设计 + +### 7.1 顺序执行(MVP) + +``` +DataForensicsSkill → EditorialSkill → MethodologySkill + ↓ ↓ ↓ + tables/methods editorialResult methodologyResult + └───────────────────┴──────────────────┘ + ↓ + contextData (汇总) +``` + +### 7.2 并行执行(V2.1) + +``` + ┌─── EditorialSkill ───┐ +DataForensicsSkill ─┤ ├─→ 汇总 + └─ MethodologySkill ───┘ +``` + +**实现思路**: + +```typescript +// V2.1: 支持并行执行组 +interface PipelineItem { + skillId: string; + enabled: boolean; + config?: SkillConfig; + + // 🆕 并行执行配置 + parallel?: boolean; // 是否可与其他 parallel=true 的 Skill 并行 + dependsOn?: string[]; // 依赖的 Skill ID(需等待这些完成) +} +``` + +### 7.3 条件分支(V2.2) + +``` +DataForensicsSkill + ↓ + [有统计表格?] + ├── 是 → StatisticalValidationSkill + └── 否 → 跳过 + ↓ + [是英文稿件?] + ├── 是 → EnglishEditorialSkill + └── 否 → ChineseEditorialSkill +``` + +--- + +## 8. 与现有系统集成 + +### 8.1 ReviewWorker 改造 + +```typescript +// backend/src/modules/rvw/workers/reviewWorker.ts + +import { SkillExecutor } from '../skills/core/executor'; +import { ProfileResolver } from '../skills/core/profile'; +import { createContextFromTask } from '../skills/core/context'; +import { registerBuiltinSkills } from '../skills/library'; + +// 初始化时注册 Skills +registerBuiltinSkills(); + +/** + * 处理审稿任务 + */ +async function processReviewTask(job: Job): Promise { + const { taskId, selectedAgents } = job.data; + + // 获取任务详情 + const task = await prisma.reviewTask.findUnique({ where: { id: taskId } }); + if (!task) throw new Error(`Task not found: ${taskId}`); + + // 解析 Profile + const profile = ProfileResolver.resolveFromAgents(selectedAgents); + + // 构建上下文 + const context = createContextFromTask(task, profile); + + // 执行 Skills Pipeline + const executor = new SkillExecutor(); + const summary = await executor.execute(profile, context); + + // 保存结果 + await prisma.reviewTask.update({ + where: { id: taskId }, + data: { + status: mapSummaryToStatus(summary), + contextData: summary, + editorialScore: extractEditorialScore(summary), + methodologyScore: extractMethodologyScore(summary), + completedAt: new Date(), + }, + }); +} +``` + +### 8.2 ExtractionClient 扩展 + +```typescript +// backend/src/common/extraction/client.ts + +/** + * 🆕 新增:数据侦探 API + */ +async function analyzeDocx(filePath: string, config: { + checkLevel: string; + tolerancePercent: number; +}): Promise { + const response = await fetch(`${PYTHON_SERVICE_URL}/api/v1/forensics/analyze_docx`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + file_path: filePath, + check_level: config.checkLevel, + tolerance_percent: config.tolerancePercent, + }), + timeout: 60000, // 60 秒超时 + }); + + if (!response.ok) { + throw new Error(`Forensics API error: ${response.status}`); + } + + return response.json(); +} +``` + +### 8.3 前端适配 + +```typescript +// frontend-v2/src/modules/rvw/types/index.ts + +/** + * 更新 TaskDetail 类型以支持 Skills 架构 + */ +interface ReviewTask { + id: string; + // ... 现有字段 + + // 🆕 Skills 执行结果 + contextData?: { + overallStatus: 'success' | 'partial' | 'failed'; + results: SkillResult[]; + totalExecutionTime: number; + }; + + // 🆕 数据侦探结果 + forensicsResult?: { + tables: TableData[]; + issues: Issue[]; + summary: { ... }; + }; +} +``` + +--- + +## 9. 安全性设计 🆕 + +> 此章节根据审查意见新增,涵盖路径安全和资源保护。 + +### 9.1 路径遍历攻击防护 + +**风险场景**:`documentPath` 如果由用户可控,可能导致路径遍历攻击(如 `../../../etc/passwd`)。 + +**防护措施**: + +```typescript +// 1. 路径白名单验证(在 Skill 中实现) +const ALLOWED_PATH_PREFIXES = [ + '/app/uploads/', // 生产环境 + 'D:\\MyCursor\\', // 开发环境 + '/tmp/rvw-uploads/', // 临时目录 +]; + +function validatePath(path: string): boolean { + // 规范化路径 + const normalizedPath = path.normalize(path); + + // 检查是否包含路径遍历 + if (normalizedPath.includes('..')) { + return false; + } + + // 检查白名单 + return ALLOWED_PATH_PREFIXES.some(prefix => + normalizedPath.startsWith(prefix) + ); +} + +// 2. 在 canRun() 中进行检查 +canRun(context: SkillContext): boolean { + if (!validatePath(context.documentPath)) { + logger.error({ path: context.documentPath }, 'Security: Path validation failed'); + return false; + } + return true; +} +``` + +### 9.2 资源耗尽防护 + +**风险场景**:`documentContent` 如果过大,可能导致 LLM 调用超长、内存耗尽。 + +**防护措施**: + +```typescript +// 在 EditorialSkill.canRun() 中检查 +const MAX_CONTENT_LENGTH = 100000; // 10万字符 ≈ 5万中文字 + +canRun(context: SkillContext): boolean { + if (context.documentContent.length > MAX_CONTENT_LENGTH) { + logger.warn({ + taskId: context.taskId, + contentLength: context.documentContent.length, + limit: MAX_CONTENT_LENGTH, + }, 'EditorialSkill: Content too long, will be truncated or rejected'); + + // 策略选择:拒绝 / 截断 / 分块处理 + return false; // MVP: 拒绝 + } + return true; +} +``` + +### 9.3 超时熔断 + +**风险场景**:Skill 执行时间过长,阻塞整个 Pipeline。 + +**防护措施**(已在 SkillExecutor 中实现): + +```typescript +// 默认 30 秒超时 +const DEFAULT_TIMEOUT = 30000; + +// 每个 Skill 可配置独立超时 +pipeline: [ + { skillId: 'DataForensicsSkill', timeout: 60000 }, // 60s + { skillId: 'EditorialSkill', timeout: 45000 }, // 45s +] +``` + +### 9.4 敏感信息保护 + +**最佳实践**: + +| 场景 | 措施 | +|------|------| +| 日志中的文件路径 | 脱敏处理,仅记录文件名 | +| LLM 调用日志 | 不记录完整 documentContent | +| 错误堆栈 | 生产环境过滤敏感路径 | + +--- + +## 10. 系统演进战略 🆕 + +> 此章节根据审查意见新增,涵盖 Skills 框架的长期演进方向。 + +### 10.1 演进路径 + +``` +┌─────────────────────────────────────────────────────────────────────────────┐ +│ Skills 框架演进路径 │ +│ │ +│ Phase 1: RVW 孵化 Phase 2: 下沉共享 Phase 3: 全系统统一 │ +│ ───────────────── ───────────────── ───────────────── │ +│ modules/rvw/skills/ → common/skills/ → 所有模块复用 │ +│ - 仅 RVW 使用 - 通用框架 - IIT: SkillRunner │ +│ - 快速迭代验证 - 模块独立实现 - AIA: 多 Agent Skills│ +│ - 紧耦合可接受 - 松耦合要求 - ASL: 知识库 Skills │ +│ │ +│ Timeline: MVP (Week 2) Timeline: V2.1 Timeline: V2.2+ │ +└─────────────────────────────────────────────────────────────────────────────┘ +``` + +### 10.2 Phase 1: RVW 孵化(当前) + +**策略**:"先试点,后下沉" + +| 原则 | 说明 | +|------|------| +| 快速验证 | 在 RVW 中验证架构可行性 | +| 适度耦合 | 允许 RVW 特定代码存在于 library/ | +| 核心解耦 | **skills/core/ 必须通用** | + +**核心解耦红线**(再次强调): + +```typescript +// ❌ 禁止:core 依赖业务代码 +// skills/core/types.ts +import { ReviewTask } from '@/modules/rvw/types'; // ❌ + +// ✅ 正确:使用泛型 +export interface BaseSkillContext { + taskId: string; + profile: TProfile; + // ... +} +``` + +### 10.3 Phase 2: 下沉共享 + +**触发条件**:RVW Skills 架构稳定 + 其他模块需要类似能力。 + +**迁移步骤**: + +1. 将 `modules/rvw/skills/core/` → `common/skills/core/` +2. RVW 改为从 `common/skills` 导入 +3. `modules/rvw/skills/library/` 保留(RVW 特定 Skills) + +**目录结构变化**: + +``` +common/ +├── skills/ # 🆕 通用 Skills 框架 +│ ├── core/ +│ │ ├── types.ts # 泛型类型定义 +│ │ ├── registry.ts +│ │ ├── executor.ts +│ │ └── context.ts +│ └── index.ts + +modules/ +├── rvw/ +│ └── skills/ +│ ├── library/ # RVW 特定 Skills +│ │ ├── DataForensicsSkill.ts +│ │ ├── EditorialSkill.ts +│ │ └── MethodologySkill.ts +│ ├── profiles/ +│ │ └── rvw-profiles.ts +│ └── index.ts # 导出 RVW Context 类型 +│ +├── iit/ +│ └── skills/ +│ └── library/ +│ ├── HardRuleSkill.ts +│ └── SoftRuleSkill.ts +``` + +### 10.4 Phase 3: 全系统统一 + +**愿景**:所有业务模块共享 Skills 框架,但各自实现业务 Skills。 + +| 模块 | Skills 示例 | +|------|-------------| +| **RVW** | DataForensicsSkill, EditorialSkill, MethodologySkill | +| **IIT** | HardRuleSkill, SoftRuleSkill, RedcapSyncSkill | +| **AIA** | ChatSkill, KnowledgeRetrievalSkill, ToolCallSkill | +| **ASL** | DocumentParseSkill, KnowledgeIndexSkill | + +### 10.5 V2.1 增量持久化(预留) + +> 审查建议保留为 V2.1 功能,MVP 仅预留接口。 + +**场景**:Pipeline 执行到一半中断(如服务重启),需要支持断点续跑。 + +**实现方案**(V2.1): + +```typescript +// ExecutorConfig 中已预留 onSkillComplete 回调 +interface ExecutorConfig { + // ... + onSkillComplete?: (skillId: string, result: SkillResult, context: SkillContext) => Promise; +} + +// V2.1 实现 +const executor = new SkillExecutor({ + onSkillComplete: async (skillId, result, context) => { + // 增量保存到数据库 + await prisma.skillExecutionLog.create({ + data: { + taskId: context.taskId, + skillId, + result: JSON.stringify(result), + completedAt: new Date(), + }, + }); + }, +}); +``` + +--- + +## 11. 演进路线图 + +### 11.1 MVP (V2.0) - 当前目标 + +| 能力 | 状态 | +|------|------| +| Skill 接口定义(含 Zod 验证) | 📋 Day 7 | +| SkillRegistry | 📋 Day 7 | +| SkillExecutor(顺序执行 + 超时熔断 + onSkillComplete 预留) | 📋 Day 7 | +| DataForensicsSkill(含依赖注入 + 安全检查) | 📋 Day 8 | +| EditorialSkill(封装现有) | 📋 Day 9 | +| MethodologySkill(封装现有) | 📋 Day 9 | +| ReviewWorker 改造 | 📋 Day 10 | +| Profile 硬编码 | 📋 Day 10 | + +### 11.2 V2.1 - Profile 配置化 + 增量持久化 + +| 能力 | 说明 | +|------|------| +| Profile 数据库存储 | JournalProfile 表 | +| Profile 管理 UI | 运营管理端 | +| 并行执行支持 | Pipeline 优化 | +| 🆕 增量持久化 | onSkillComplete 回调实现 | +| 🆕 Skills 框架下沉 | 迁移到 common/skills | +| 政治审查 Skill | PoliticalGuardrailSkill | +| 医学常识 Skill | MedicalLogicSkill | + +### 11.3 V2.2+ - Skill 插件化 + +| 能力 | 说明 | +|------|------| +| Skill 热加载 | 动态加载 .js/.ts | +| Skill 市场 | 内置 + 第三方 | +| 条件分支执行 | Pipeline DSL | +| AI 原生 Skill | Prompt-as-Skill | +| 全系统统一 | IIT/AIA/ASL 复用 Skills 框架 | + +--- + +## 12. 开发计划 + +### 12.1 Day 7:Skills 核心框架 + +| 任务 | 产出物 | 预估时间 | +|------|--------|---------| +| 创建 skills 目录结构 | 目录 + index.ts | 15min | +| 实现 types.ts(含泛型 + Zod Schema) | 所有类型定义 | 1.5h | +| 实现 registry.ts | SkillRegistry | 30min | +| 实现 executor.ts(含 onSkillComplete 预留) | SkillExecutor(含超时) | 1.5h | +| 实现 profile.ts | ProfileResolver + 默认配置 | 30min | +| 实现 context.ts | ContextBuilder | 30min | +| 单元测试 | 核心逻辑测试 | 1h | + +### 12.2 Day 8:DataForensicsSkill + +| 任务 | 产出物 | 预估时间 | +|------|--------|---------| +| 实现 BaseSkill(含 Zod 验证) | 基类 | 45min | +| 实现 DataForensicsSkill(含依赖注入 + 路径安全) | 调用 Python API | 2h | +| 扩展 ExtractionClient(含接口定义) | IExtractionClient + analyzeDocx | 45min | +| 集成测试 | 端到端测试 | 1h | + +### 12.3 Day 9:EditorialSkill + MethodologySkill + +| 任务 | 产出物 | 预估时间 | +|------|--------|---------| +| 实现 EditorialSkill(含资源限制) | 封装 editorialService | 1h | +| 实现 MethodologySkill | 封装 methodologyService | 1h | +| 注册入口 | library/index.ts | 15min | +| 集成测试 | 全流程测试 | 1h | + +### 12.4 Day 10:ReviewService 改造 + +| 任务 | 产出物 | 预估时间 | +|------|--------|---------| +| 改造 reviewWorker | 使用 SkillExecutor | 2h | +| 更新 contextData 存储 | 适配新结构 | 1h | +| 前端 API 兼容性验证 | 接口测试 | 1h | +| 文档更新 | 更新开发指南 | 30min | + +--- + +## 13. 附录 + +### 13.1 错误码定义 + +| 错误码 | 说明 | +|--------|------| +| `SKILL_NOT_FOUND` | Skill 未注册 | +| `SKILL_TIMEOUT` | Skill 执行超时 | +| `SKILL_EXECUTION_ERROR` | Skill 执行异常 | +| `CONFIG_VALIDATION_ERROR` | 🆕 Zod 配置验证失败 | +| `PROFILE_NOT_FOUND` | Profile 不存在 | +| `CONTEXT_INVALID` | 上下文数据不完整 | +| 🆕 `SECURITY_PATH_VIOLATION` | 路径安全检查失败 | +| 🆕 `RESOURCE_LIMIT_EXCEEDED` | 资源限制超出(如内容过长) | + +### 13.2 日志规范 + +```typescript +// 标准日志格式 +logger.info({ + skillId: 'DataForensicsSkill', + taskId: 'xxx-xxx', + action: 'execute', + duration: 1234, + status: 'success', +}, 'Skill execution completed'); + +// 🆕 安全事件日志(需要告警) +logger.error({ + skillId: 'DataForensicsSkill', + taskId: 'xxx-xxx', + documentPath: '[REDACTED]', // 不记录完整路径 + errorType: 'SECURITY_PATH_VIOLATION', +}, 'Security: Path validation failed'); +``` + +### 13.3 监控指标 + +| 指标 | 类型 | 说明 | +|------|------|------| +| `rvw.skill.execution.duration` | Histogram | Skill 执行时间 | +| `rvw.skill.execution.status` | Counter | 执行状态计数 | +| `rvw.skill.timeout.count` | Counter | 超时次数 | +| `rvw.pipeline.execution.duration` | Histogram | Pipeline 总执行时间 | +| 🆕 `rvw.skill.config_validation.failure` | Counter | 配置验证失败次数 | +| 🆕 `rvw.skill.security.path_violation` | Counter | 路径安全拦截次数 | + +--- + +*文档结束* + +**变更记录**: +| 版本 | 日期 | 变更内容 | +|------|------|---------| +| v1.0 | 2026-02-17 | 初始版本 | +| v1.1 | 2026-02-17 | 根据审查意见更新:
- 新增 1.3 架构红线(解耦原则)
- types.ts 使用泛型 + Zod 验证
- DataForensicsSkill 依赖注入
- 新增第 9 章安全性设计
- 新增第 10 章系统演进战略
- ExecutorConfig 预留 onSkillComplete | diff --git a/docs/03-业务模块/RVW-稿件审查系统/05-测试文档/~$瘤患者围术期大量输血的术前危险因素分析及输血策略2月27 - 副本.docx b/docs/03-业务模块/RVW-稿件审查系统/05-测试文档/~$瘤患者围术期大量输血的术前危险因素分析及输血策略2月27 - 副本.docx deleted file mode 100644 index 19613f386749dd14a05e979b9c6341d3729bb32e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 162 gcmZRr%E(O0XCM) { + taskId: string; + userId?: string; + previousResults: SkillResult[]; + profile: TProfile; +} + +// RVW 扩展字段 +interface RvwContextExtras { + documentPath: string; + documentContent: string; + tables?: TableData[]; + methods?: string[]; + forensicsResult?: ForensicsResult; +} + +// RVW 完整上下文 +interface SkillContext extends BaseSkillContext, RvwContextExtras {} +``` + +#### 3.1.2 Zod 配置 Schema + +```typescript +const DataForensicsConfigSchema = z.object({ + checkLevel: z.enum(['L1', 'L1_L2', 'L1_L2_L25']).default('L1_L2_L25'), + tolerancePercent: z.number().min(0).max(1).default(0.1), +}); + +const EditorialConfigSchema = z.object({ + standard: z.enum(['default', 'chinese-core', 'international']).default('default'), + maxContentLength: z.number().default(100000), +}); +``` + +### 3.2 执行引擎 (executor.ts) + +#### 3.2.1 核心功能 + +- **Pipeline 执行**:按 Profile 配置顺序执行 Skills +- **超时熔断**:可配置超时时间,默认 30 秒 +- **故障隔离**:单个 Skill 失败不影响整体(可配置) +- **上下文传递**:前置 Skill 结果自动注入后续上下文 +- **回调扩展点**:`onSkillComplete` 预留增量持久化 + +#### 3.2.2 执行流程 + +``` +Profile.pipeline.forEach(item => { + 1. 检查 enabled 状态 + 2. 获取 Skill 实例 + 3. 执行 canRun 前置检查 + 4. 带超时执行 skill.run() + 5. 调用 onSkillComplete 回调(V2.1) + 6. 更新上下文 previousResults + 7. 检查 continueOnError 策略 +}) +``` + +### 3.3 Profile 配置 (profile.ts) + +#### 3.3.1 预设 Profiles + +| Profile ID | 名称 | Pipeline | 特点 | +|------------|------|----------|------| +| `default` | 通用期刊配置 | Forensics → Editorial → Methodology | 标准模式 | +| `chinese-core` | 中文核心期刊 | 同上 | 严格模式,失败即停止 | +| `quick-forensics` | 快速数据侦探 | 仅 Forensics | 快速预览 | + +#### 3.3.2 V1 兼容 + +```typescript +// 将 V1.0 的 selectedAgents 映射到 V2.0 Skills +const AGENT_TO_SKILL_MAP = { + 'editorial': 'EditorialSkill', + 'methodology': 'MethodologySkill', + 'forensics': 'DataForensicsSkill', +}; + +// ProfileResolver.resolveFromAgents(['editorial', 'methodology']) +// → 动态生成包含这些 Skills 的 Profile +``` + +### 3.4 DataForensicsSkill 安全设计 + +#### 3.4.1 路径白名单 + +```typescript +const ALLOWED_PATH_PREFIXES = [ + '/app/uploads/', // Docker 容器 + 'D:\\MyCursor\\', // 开发环境 + '/tmp/rvw-uploads/', // 临时目录 +]; + +canRun(context: SkillContext): boolean { + // 安全检查:路径白名单 + const isPathAllowed = ALLOWED_PATH_PREFIXES.some(prefix => + context.documentPath.startsWith(prefix) + ); + + // 检查路径遍历 + if (context.documentPath.includes('..')) { + return false; // 拒绝 + } +} +``` + +#### 3.4.2 依赖注入 + +```typescript +class DataForensicsSkill { + private readonly extractionClient: IExtractionClient; + + constructor(client?: IExtractionClient) { + this.extractionClient = client || extractionClient; + } +} + +// 测试时可注入 Mock +const mockClient: IExtractionClient = { ... }; +const skill = new DataForensicsSkill(mockClient); +``` + +### 3.5 ReviewWorker 改造 + +#### 3.5.1 架构切换 + +```typescript +// 环境变量控制 +const USE_SKILLS_ARCHITECTURE = process.env.RVW_USE_SKILLS !== 'false'; + +// 运行时自动选择 +if (USE_SKILLS_ARCHITECTURE) { + // V2.0 Skills 架构 + const profile = ProfileResolver.resolveFromAgents(agents); + const summary = await executor.execute(profile, context); +} else { + // V1.0 Legacy 架构 + editorialResult = await reviewEditorialStandards(...); + methodologyResult = await reviewMethodology(...); +} +``` + +#### 3.5.2 结果存储 + +```typescript +// Skills 执行摘要存储到 picoExtract 字段(暂时复用) +const skillsContext = { + version: '2.0', + executedAt: new Date().toISOString(), + summary: { + overallStatus: skillsSummary.overallStatus, + totalSkills: skillsSummary.totalSkills, + successCount: skillsSummary.successCount, + errorCount: skillsSummary.errorCount, + }, + forensicsResult: skillsSummary.results.find(r => r.skillId === 'DataForensicsSkill')?.data, +}; +``` + +--- + +## 4. 已知问题 + +### 4.1 数据库迁移阻塞 + +**问题**:尝试添加 `contextData` 字段时,Prisma migrate 报错(历史迁移问题) + +**临时方案**:将 Skills 执行摘要存储到现有的 `picoExtract` JSON 字段 + +**后续计划**:修复历史迁移后,添加专用 `context_data` 字段 + +### 4.2 Python Forensics API + +**状态**:`analyzeDocx` 方法已添加到 `ExtractionClient`,但 Python 端 API (`/api/v1/forensics/analyze`) 尚未实现 + +**后续计划**:Week 3 实现 Python 端完整 API + +--- + +## 5. 测试状态 + +### 5.1 TypeScript 编译 + +✅ 无 Lint 错误 + +### 5.2 集成测试 + +⏳ 待 Python API 完成后进行端到端测试 + +--- + +## 6. 后续计划 + +### 6.1 Week 3 计划 + +| 任务 | 优先级 | 说明 | +|------|--------|------| +| Python Forensics API | P0 | 实现 `/api/v1/forensics/analyze` | +| 前端表格渲染 | P1 | TaskDetail 页面展示提取的表格 | +| 问题高亮 | P1 | 根据 R1C1 坐标高亮问题单元格 | +| 端到端测试 | P1 | 完整流程测试 | + +### 6.2 V2.1 规划 + +| 功能 | 说明 | +|------|------| +| `contextData` 专用字段 | 修复迁移后添加 | +| 增量持久化 | 实现 `onSkillComplete` 回调 | +| Profile 数据库存储 | 支持用户自定义 Profile | +| 更多 Skills | 如引用格式检查、图表检查等 | + +--- + +## 7. 变更日志 + +| 时间 | 变更内容 | +|------|---------| +| 2026-02-18 09:00 | 开始 Day 7 开发 | +| 2026-02-18 09:30 | 创建 skills 目录结构 | +| 2026-02-18 10:00 | 完成 types.ts(含 Zod Schema) | +| 2026-02-18 10:30 | 完成 registry.ts | +| 2026-02-18 11:00 | 完成 executor.ts(含超时熔断) | +| 2026-02-18 11:30 | 完成 profile.ts(含 3 个预设 Profile) | +| 2026-02-18 12:00 | 完成 context.ts | +| 2026-02-18 13:00 | 完成 BaseSkill.ts | +| 2026-02-18 13:30 | 扩展 ExtractionClient(IExtractionClient + analyzeDocx) | +| 2026-02-18 14:00 | 完成 DataForensicsSkill(含路径安全) | +| 2026-02-18 14:30 | 完成 EditorialSkill | +| 2026-02-18 15:00 | 完成 MethodologySkill | +| 2026-02-18 15:30 | 完成 Skills 注册入口 | +| 2026-02-18 16:00 | 改造 reviewWorker | +| 2026-02-18 16:30 | 修复 Lint 错误,处理数据库迁移问题 | +| 2026-02-18 17:00 | 更新开发文档 | + +--- + +*开发记录生成时间: 2026-02-18* +*RVW V2.0 Skills 架构* diff --git a/docs/03-业务模块/RVW-稿件审查系统/06-开发记录/RVW V2.0 Skills 架构深度审查报告.md b/docs/03-业务模块/RVW-稿件审查系统/06-开发记录/RVW V2.0 Skills 架构深度审查报告.md new file mode 100644 index 00000000..c34dbf3f --- /dev/null +++ b/docs/03-业务模块/RVW-稿件审查系统/06-开发记录/RVW V2.0 Skills 架构深度审查报告.md @@ -0,0 +1,124 @@ +# **RVW V2.0 Skills 架构深度审查报告** + +**审查对象:** RVW V2.0 Skills 架构技术设计文档 (v1.0) + +**审查日期:** 2026-02-17 + +**审查结论:** ✅ **架构设计通过 (Approved)** + +**核心评价:** 结构清晰,扩展性强。**不仅满足 V2.0 需求,更确立了全系统 Agentic AI 的演进基石。** + +## **1\. 🟢 架构亮点 (Strengths)** + +这份设计文档在以下几个方面表现卓越,值得团队坚持: + +### **1.1 "优雅降级" 的教科书级示范** + +在 DataForensicsSkill.ts 的设计中(第 5.2 节),当 Python 服务不可用(ECONNREFUSED)时,系统没有直接抛出 500 错误,而是返回 status: 'warning' 并提示用户“服务暂不可用,已跳过验证”。 + +* **价值**:这是极佳的用户体验设计。它保证了即使高级功能挂了,基础的审稿流程(LLM 部分)依然能跑通,系统韧性极强。 + +### **1.2 上下文设计的 "轻重分离"** + +在 SkillContext 设计中(第 4.1 节),同时保留了 documentPath(用于 Python 读取文件)和 documentContent(用于 LLM 读取文本)。 + +* **价值**:这避免了将巨大的二进制文件加载到 Node.js 内存中,只传递路径给 Python 服务处理,符合“云原生”的高效原则。 + +### **1.3 Profile 的 "硬编码" 策略** + +在 MVP 阶段选择将 Profile 硬编码在 profile.ts 中(第 4.4 节),而不是直接上数据库表。 + +* **价值**:极其务实。这避免了在 Week 2 开发繁琐的 CRUD 管理界面,让团队能聚焦于核心逻辑,同时代码结构又预留了未来切换到数据库的能力。 + +## **2\. 🟡 潜在风险与改进建议 (Risks & Improvements)** + +尽管大框架完美,但在工程细节上,有以下优化建议: + +### **2.1 配置验证的类型安全 (Type Safety in Config)** + +* **问题**:目前 SkillConfig 定义为 any。 +* **建议**:引入 **Zod** 库进行运行时 Schema 验证。 + // 示例:在 DataForensicsSkill 中 + import { z } from 'zod'; + const ConfigSchema \= z.object({ + checkLevel: z.enum(\['L1', 'L1\_L2'\]).default('L1\_L2'), + tolerancePercent: z.number().min(0).max(1).default(0.1) + }); + // 在 run 方法开头: const safeConfig \= ConfigSchema.parse(config); + +### **2.2 状态持久化的时机 (State Persistence Timing)** + +* **问题**:目前的 ReviewWorker 是在所有 Skill 执行完毕后一次性更新数据库。 +* **建议**:在 SkillExecutor 中增加 onSkillComplete 回调,**每执行完一个 Skill 就更新一次数据库**(增量更新),实现应用层断点续传。 + +### **2.3 测试的可模拟性 (Mockability)** + +* **建议**:采用**依赖注入**。在 Skill 的构造函数中注入 ExtractionClient 实例,确保单元测试可以 Mock Python 服务。 + +### **2.4 ⚠️ 核心框架的耦合风险 (Coupling Risk)** + +* **问题**:skills/core 目录下的代码(Registry, Executor)如果引入了 modules/rvw 下的业务类型,会导致未来无法提取为通用模块。 +* **红线**:skills/core 下的文件 **严禁 import** modules/rvw/services 或 modules/rvw/types 中的业务特定代码。它必须是纯粹的、通用的。 + +## **3\. 🛡️ 安全性审查 (Security Review)** + +* **路径遍历风险**:确保 documentPath 来源于系统可信的存储服务生成,防止恶意读取。 +* **资源耗尽风险**:在 EditorialSkill 前置检查中增加文本长度限制(如 \>10万字截断)。 + +## **4\. 🏛️ 系统架构演进战略 (System Architecture Evolution Strategy)** + +**本章节至关重要。请开发团队在编码时时刻铭记:你们不仅仅是在做 RVW 模块,你们是在为全公司搭建 Skills 基础设施。** + +### **4.1 战略定位:RVW 作为“架构试验田”** + +Skills 架构不仅仅服务于 RVW,未来将上升为 IIT、AIA、ASL 等所有模块的通用底座。 + +* **现状**:各模块(IIT, AIA)都在重复造“工具调用”的轮子。 +* **目标**:通过 RVW V2.0 项目,孵化出一套标准的 Skills 框架。 + +### **4.2 演进路线图 (The Roadmap)** + +我们采用 **"先试点,后下沉"** 的稳健策略: + +1. **阶段一:孵化 (Incubation) \- 当前 (Week 2\)** + * **开发位置**:backend/src/modules/rvw/skills/\* + * **任务**:在此目录下完整实现 Registry, Executor, SkillInterface。 + * **要求**:虽然代码在 RVW 目录下,但**设计必须通用**。不要在 Executor 里写死 "ReviewTask" 这样的字眼,要用泛型 TContext。 +2. **阶段二:下沉 (Extraction) \- V2.x (1-2个月后)** + * **动作**:将 modules/rvw/skills/core 剪切移动到 common/skills/core。 + * **动作**:将 DataForensicsSkill 改造为通用 ForensicsSkill,放入 common/skills/library。 + * **验证**:如果阶段一的代码写得足够解耦,这个移动过程应该是零痛感的。 +3. **阶段三:统一 (Unification) \- V3.0** + * **动作**:IIT 模块重构,弃用内部的 ToolsService,改为调用 common/skills。 + +### **4.3 对 IIT 模块的协同建议** + +* 虽然 IIT 目前不重构,但新开发的 Tool **接口定义(Input/Output Schema)应尽量与 RVW 的 Skill 标准保持一致**,以便未来无缝迁移。 + +## **5\. 📝 最终执行建议 (Action Plan)** + +### **Day 7: 核心框架开发 (The Foundation)** + +* **目标**:搭建 skills/core。 +* **关键指令**: + * 编写 types.ts 和 registry.ts 时,**忘掉 RVW 这个业务**,假设你是在写一个开源的 agent-skill-engine 库。 + * 引入 Zod 做配置验证。 + +### **Day 8: 技能实现 (The Implementation)** + +* **目标**:开发 DataForensicsSkill。 +* **关键指令**: + * 将 Python 调用逻辑封装严密。 + * 确保 documentPath 的处理逻辑是安全的。 + +### **Day 9: 业务迁移 (The Migration)** + +* **目标**:将 editorialService 封装为 Skill。 +* **关键指令**: + * 这是一次“搬家”。将原有逻辑原封不动地搬进 run() 方法,不要搞破坏性重构。 + +**结论:** + +**RVW V2.0 是全系统迈向 Agentic AI 的第一步。** 请开发团队以“编写通用框架”的高标准来要求 skills/core 的代码质量,但以“快速交付业务”的务实态度来实现具体的 library/\* 技能。 + +**架构设计通过,准许启动开发。** \ No newline at end of file