feat(rvw): implement Skills architecture (Day 7-10)

- Add Skills core framework (types, registry, executor, profile, context)
- Implement DataForensicsSkill with DI, path security, graceful degradation
- Implement EditorialSkill and MethodologySkill wrapping existing services
- Extend ExtractionClient with IExtractionClient interface and analyzeDocx
- Refactor reviewWorker to support V1/V2 architecture switching
- Add Zod config validation and generic type support
- Update development docs and module status

Day 7: Skills core framework (~700 lines)
Day 8: DataForensicsSkill + ExtractionClient extension (~400 lines)
Day 9: EditorialSkill + MethodologySkill (~350 lines)
Day 10: ReviewWorker integration (~280 lines)

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
2026-02-18 10:09:40 +08:00
parent e785969e54
commit 9f256c4a02
20 changed files with 5603 additions and 72 deletions

View File

@@ -1,9 +1,13 @@
import FormData from 'form-data';
import axios from 'axios';
import * as fs from 'fs';
import * as path from 'path';
/**
* Extraction Service Client
* 调用Python微服务进行文档提取
*
* @version 2.0.0 - 新增数据侦探 API (analyzeDocx)
*/
const EXTRACTION_SERVICE_URL = process.env.EXTRACTION_SERVICE_URL || 'http://localhost:8000';
@@ -22,12 +26,78 @@ export interface ExtractionResult {
file_size?: number;
page_count?: number;
has_tables?: boolean;
[key: string]: any;
[key: string]: unknown;
};
error?: string;
}
class ExtractionClient {
/**
* 数据侦探结果Python 返回)
*/
export interface ForensicsResult {
tables: ForensicsTable[];
methods: string[];
issues: ForensicsIssue[];
summary: {
totalTables: number;
totalIssues: number;
errorCount: number;
warningCount: number;
};
}
export interface ForensicsTable {
id: string;
caption: string;
data: string[][];
html?: string;
headers?: string[];
rowCount: number;
colCount: number;
}
export interface ForensicsIssue {
severity: 'ERROR' | 'WARNING' | 'INFO';
type: string;
message: string;
location?: {
tableId?: string;
cellRef?: string;
paragraph?: number;
lineRange?: [number, number];
};
evidence?: {
expected?: string | number;
actual?: string | number;
formula?: string;
[key: string]: unknown;
};
}
/**
* 数据侦探配置
*/
export interface ForensicsConfig {
checkLevel: 'L1' | 'L1_L2' | 'L1_L2_L25';
tolerancePercent: number;
}
/**
* IExtractionClient 接口
* 用于依赖注入,便于测试 Mock
*/
export interface IExtractionClient {
health(): Promise<{ status: string; checks: unknown; timestamp: string }>;
extractDocument(file: Buffer, filename: string): Promise<ExtractionResult>;
extractPdf(file: Buffer, filename: string, method?: 'auto' | 'nougat' | 'pymupdf'): Promise<ExtractionResult>;
extractDocx(file: Buffer, filename: string): Promise<ExtractionResult>;
extractTxt(file: Buffer, filename: string): Promise<ExtractionResult>;
detectLanguage(file: Buffer, filename: string): Promise<{ language: string; chinese_ratio: number; chinese_chars: number; total_chars: number }>;
getPdfStrategy(file: Buffer, filename: string): Promise<{ detected_language: string; recommended_method: string; reason: string; nougat_available: boolean }>;
analyzeDocx(filePath: string, config: ForensicsConfig): Promise<ForensicsResult>;
}
class ExtractionClient implements IExtractionClient {
private baseUrl: string;
constructor(baseUrl: string = EXTRACTION_SERVICE_URL) {
@@ -260,6 +330,63 @@ class ExtractionClient {
throw new Error('Get PDF strategy failed');
}
}
/**
* 🆕 数据侦探 API - 分析 Word 文档
* 提取表格并进行数据验证L1 算术 + L2 统计 + L2.5 一致性)
*
* @param filePath 文件路径(服务端路径)
* @param config 侦探配置
* @returns 侦探结果
*/
async analyzeDocx(
filePath: string,
config: ForensicsConfig
): Promise<ForensicsResult> {
try {
// 读取文件
const file = fs.readFileSync(filePath);
const filename = path.basename(filePath);
const formData = new FormData();
formData.append('file', file, filename);
formData.append('check_level', config.checkLevel);
formData.append('tolerance_percent', config.tolerancePercent.toString());
const response = await axios.post<ForensicsResult>(
`${this.baseUrl}/api/v1/forensics/analyze`,
formData,
{
headers: {
...formData.getHeaders(),
},
timeout: 60000, // 60 秒超时
}
);
return response.data;
} catch (error) {
console.error('[ExtractionClient] Forensics analysis failed:', error);
if (axios.isAxiosError(error)) {
if (error.code === 'ECONNREFUSED') {
const err = new Error('Forensics service unavailable');
(err as NodeJS.ErrnoException).code = 'ECONNREFUSED';
throw err;
}
if (error.code === 'ETIMEDOUT') {
const err = new Error('Forensics service timeout');
(err as NodeJS.ErrnoException).code = 'ETIMEDOUT';
throw err;
}
if (error.response) {
throw new Error(`Forensics analysis failed: ${error.response.data.detail || error.message}`);
}
}
throw new Error('Forensics analysis failed');
}
}
}
// 导出类和单例