feat(rvw): Complete V2.0 Week 3 - Statistical validation extension and UX improvements
Week 3 Development Summary: - Implement negative sign normalization (6 Unicode variants) - Enhance T-test validation with smart sample size extraction - Enhance SE triangle and CI-P consistency validation with subrow support - Add precise sub-cell highlighting for P-values in multi-line cells - Add frontend issue type Chinese translations (6 new types) - Add file format tips for PDF/DOC uploads Technical improvements: - Add _clean_statistical_text() in extractor.py - Add _safe_float() wrapper in validator.py - Add ForensicsReport.tsx component - Update ISSUE_TYPE_LABELS translations Documentation: - Add 2026-02-18 development record - Update RVW module status (v5.1) - Update system status (v5.2) Status: Week 3 complete, ready for Week 4 testing Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -54,6 +54,7 @@ export interface ForensicsTable {
|
||||
headers?: string[];
|
||||
rowCount: number;
|
||||
colCount: number;
|
||||
issues?: ForensicsIssue[]; // 每个表格的问题列表
|
||||
}
|
||||
|
||||
export interface ForensicsIssue {
|
||||
@@ -354,7 +355,7 @@ class ExtractionClient implements IExtractionClient {
|
||||
formData.append('tolerance_percent', config.tolerancePercent.toString());
|
||||
|
||||
const response = await axios.post<ForensicsResult>(
|
||||
`${this.baseUrl}/api/v1/forensics/analyze`,
|
||||
`${this.baseUrl}/api/v1/forensics/analyze_docx`,
|
||||
formData,
|
||||
{
|
||||
headers: {
|
||||
|
||||
@@ -27,6 +27,17 @@ function getUserId(request: FastifyRequest): string {
|
||||
return userId;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取租户ID(从JWT Token中获取)
|
||||
*/
|
||||
function getTenantId(request: FastifyRequest): string {
|
||||
const tenantId = (request as any).user?.tenantId;
|
||||
if (!tenantId) {
|
||||
throw new Error('Tenant not found');
|
||||
}
|
||||
return tenantId;
|
||||
}
|
||||
|
||||
// ==================== 任务创建 ====================
|
||||
|
||||
/**
|
||||
@@ -43,7 +54,8 @@ export async function createTask(
|
||||
) {
|
||||
try {
|
||||
const userId = getUserId(request);
|
||||
logger.info('[RVW:Controller] 上传稿件', { userId });
|
||||
const tenantId = getTenantId(request);
|
||||
logger.info('[RVW:Controller] 上传稿件', { userId, tenantId });
|
||||
|
||||
// 获取上传的文件
|
||||
const data = await request.file();
|
||||
@@ -105,7 +117,7 @@ export async function createTask(
|
||||
}
|
||||
|
||||
// 创建任务
|
||||
const task = await reviewService.createTask(file, filename, userId, modelType);
|
||||
const task = await reviewService.createTask(file, filename, userId, tenantId, modelType);
|
||||
|
||||
logger.info('[RVW:Controller] 任务已创建', { taskId: task.id });
|
||||
|
||||
|
||||
@@ -17,6 +17,24 @@ import { ModelType } from '../../../common/llm/adapters/types.js';
|
||||
import { logger } from '../../../common/logging/index.js';
|
||||
import { jobQueue } from '../../../common/jobs/index.js';
|
||||
import { Prisma } from '@prisma/client';
|
||||
import { storage } from '../../../common/storage/index.js';
|
||||
import { randomUUID } from 'crypto';
|
||||
import path from 'path';
|
||||
|
||||
/**
|
||||
* 生成 RVW 模块的 OSS 存储 Key
|
||||
* 格式: tenants/{tenantId}/users/{userId}/rvw/{taskId}/{filename}
|
||||
*/
|
||||
function generateRvwStorageKey(
|
||||
tenantId: string,
|
||||
userId: string,
|
||||
taskId: string,
|
||||
filename: string
|
||||
): string {
|
||||
const uuid = randomUUID().replace(/-/g, '').substring(0, 16);
|
||||
const ext = path.extname(filename).toLowerCase();
|
||||
return `tenants/${tenantId}/users/${userId}/rvw/${taskId}/${uuid}${ext}`;
|
||||
}
|
||||
import {
|
||||
AgentType,
|
||||
TaskStatus,
|
||||
@@ -44,6 +62,7 @@ import {
|
||||
* @param file 文件Buffer
|
||||
* @param filename 文件名
|
||||
* @param userId 用户ID
|
||||
* @param tenantId 租户ID
|
||||
* @param modelType 模型类型
|
||||
* @returns 创建的任务
|
||||
*/
|
||||
@@ -51,11 +70,12 @@ export async function createTask(
|
||||
file: Buffer,
|
||||
filename: string,
|
||||
userId: string,
|
||||
tenantId: string,
|
||||
modelType: ModelType = 'deepseek-v3'
|
||||
) {
|
||||
logger.info('[RVW] 创建审查任务', { filename, userId, modelType });
|
||||
logger.info('[RVW] 创建审查任务', { filename, userId, tenantId, modelType });
|
||||
|
||||
// 创建任务记录(状态为pending,等待用户选择智能体后运行)
|
||||
// 1. 先创建任务记录获取 taskId
|
||||
const task = await prisma.reviewTask.create({
|
||||
data: {
|
||||
userId,
|
||||
@@ -70,12 +90,37 @@ export async function createTask(
|
||||
|
||||
logger.info('[RVW] 任务已创建', { taskId: task.id, status: task.status });
|
||||
|
||||
// 异步提取文档文本(预处理,不运行评估)
|
||||
// 2. 生成 OSS 存储 Key 并上传文件
|
||||
const storageKey = generateRvwStorageKey(tenantId, userId, task.id, filename);
|
||||
let updatedTask = task;
|
||||
|
||||
try {
|
||||
logger.info('[RVW] 开始上传文件到存储', { taskId: task.id, storageKey });
|
||||
await storage.upload(storageKey, file);
|
||||
logger.info('[RVW] 文件已上传到存储', { taskId: task.id, storageKey });
|
||||
|
||||
// 3. 更新任务的 filePath 字段
|
||||
updatedTask = await prisma.reviewTask.update({
|
||||
where: { id: task.id },
|
||||
data: { filePath: storageKey },
|
||||
});
|
||||
logger.info('[RVW] 任务 filePath 已更新', { taskId: task.id, filePath: storageKey });
|
||||
} catch (uploadError) {
|
||||
logger.error('[RVW] 文件上传失败', {
|
||||
taskId: task.id,
|
||||
storageKey,
|
||||
error: uploadError instanceof Error ? uploadError.message : 'Unknown error',
|
||||
stack: uploadError instanceof Error ? uploadError.stack : undefined,
|
||||
});
|
||||
// 上传失败不阻塞任务创建,DataForensicsSkill 会优雅降级
|
||||
}
|
||||
|
||||
// 4. 异步提取文档文本(预处理,不运行评估)
|
||||
extractDocumentAsync(task.id, file, filename).catch(error => {
|
||||
logger.error('[RVW] 文档提取失败', { taskId: task.id, error: error.message });
|
||||
});
|
||||
|
||||
return task;
|
||||
return updatedTask;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -191,6 +236,7 @@ export async function runReview(params: RunReviewParams): Promise<{ jobId: strin
|
||||
agents,
|
||||
extractedText: task.extractedText,
|
||||
modelType: (task.modelUsed || 'deepseek-v3') as ModelType,
|
||||
__expireInSeconds: 10 * 60, // 10分钟超时(审稿任务通常2-3分钟完成)
|
||||
});
|
||||
|
||||
logger.info('[RVW] 审查任务已推送到队列', {
|
||||
@@ -364,6 +410,10 @@ export async function getTaskReport(userId: string, taskId: string): Promise<Rev
|
||||
throw new Error(`报告尚未完成,当前状态: ${task.status}`);
|
||||
}
|
||||
|
||||
// 从 contextData 中提取 forensicsResult(V2.0 Skills 架构)
|
||||
const contextData = task.contextData as { forensicsResult?: unknown } | null;
|
||||
const forensicsResult = contextData?.forensicsResult ?? undefined;
|
||||
|
||||
return {
|
||||
taskId: task.id,
|
||||
fileName: task.fileName,
|
||||
@@ -374,6 +424,7 @@ export async function getTaskReport(userId: string, taskId: string): Promise<Rev
|
||||
overallScore: task.overallScore ?? undefined,
|
||||
editorialReview: task.editorialReview as unknown as EditorialReview | undefined,
|
||||
methodologyReview: task.methodologyReview as unknown as MethodologyReview | undefined,
|
||||
forensicsResult: forensicsResult as ReviewReport['forensicsResult'],
|
||||
completedAt: task.completedAt ?? undefined,
|
||||
durationSeconds: task.durationSeconds ?? undefined,
|
||||
};
|
||||
|
||||
@@ -68,7 +68,7 @@ export function getMethodologyStatus(review: MethodologyReview | null | undefine
|
||||
* @param editorialScore 稿约规范性分数
|
||||
* @param methodologyScore 方法学分数
|
||||
* @param agents 选择的智能体
|
||||
* @returns 综合分数
|
||||
* @returns 综合分数(保留1位小数)
|
||||
*/
|
||||
export function calculateOverallScore(
|
||||
editorialScore: number | null | undefined,
|
||||
@@ -78,18 +78,21 @@ export function calculateOverallScore(
|
||||
const hasEditorial = agents.includes('editorial') && editorialScore != null;
|
||||
const hasMethodology = agents.includes('methodology') && methodologyScore != null;
|
||||
|
||||
let score: number | null = null;
|
||||
|
||||
if (hasEditorial && hasMethodology) {
|
||||
// 两个都选:稿约40% + 方法学60%
|
||||
return editorialScore! * 0.4 + methodologyScore! * 0.6;
|
||||
score = editorialScore! * 0.4 + methodologyScore! * 0.6;
|
||||
} else if (hasEditorial) {
|
||||
// 只选规范性
|
||||
return editorialScore!;
|
||||
score = editorialScore!;
|
||||
} else if (hasMethodology) {
|
||||
// 只选方法学
|
||||
return methodologyScore!;
|
||||
score = methodologyScore!;
|
||||
}
|
||||
|
||||
return null;
|
||||
// 修复浮点数精度问题:保留1位小数
|
||||
return score !== null ? Math.round(score * 10) / 10 : null;
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -121,9 +121,8 @@ export class ContextBuilder {
|
||||
if (!this.context.taskId) {
|
||||
errors.push('taskId is required');
|
||||
}
|
||||
if (!this.context.documentPath) {
|
||||
errors.push('documentPath is required');
|
||||
}
|
||||
// documentPath 是可选的:DataForensicsSkill 需要,但 Editorial/Methodology 不需要
|
||||
// DataForensicsSkill.canRun() 会检查 documentPath,没有就跳过
|
||||
if (this.context.documentContent === undefined) {
|
||||
errors.push('documentContent is required');
|
||||
}
|
||||
@@ -147,9 +146,8 @@ export class ContextBuilder {
|
||||
if (!this.context.taskId) {
|
||||
errors.push('taskId is required');
|
||||
}
|
||||
if (!this.context.documentPath) {
|
||||
errors.push('documentPath is required');
|
||||
}
|
||||
// documentPath 是可选的:DataForensicsSkill 需要,但 Editorial/Methodology 不需要
|
||||
// DataForensicsSkill.canRun() 会检查 documentPath,没有就跳过
|
||||
if (this.context.documentContent === undefined) {
|
||||
errors.push('documentContent is required');
|
||||
}
|
||||
|
||||
@@ -58,19 +58,19 @@ export class SkillExecutor<TContext extends BaseSkillContext = SkillContext> {
|
||||
...initialContext,
|
||||
profile,
|
||||
previousResults: [],
|
||||
} as TContext;
|
||||
} as unknown as TContext;
|
||||
|
||||
logger.info({
|
||||
logger.info('[SkillExecutor] Starting pipeline execution', {
|
||||
taskId: context.taskId,
|
||||
profileId: profile.id,
|
||||
pipelineLength: profile.pipeline.length,
|
||||
}, '[SkillExecutor] Starting pipeline execution');
|
||||
});
|
||||
|
||||
// 遍历 Pipeline
|
||||
for (const item of profile.pipeline) {
|
||||
// 跳过禁用的 Skill
|
||||
if (!item.enabled) {
|
||||
logger.debug({ skillId: item.skillId }, '[SkillExecutor] Skill disabled, skipping');
|
||||
logger.debug('[SkillExecutor] Skill disabled, skipping', { skillId: item.skillId });
|
||||
results.push(this.createSkippedResult(item.skillId, 'Skill disabled in profile'));
|
||||
continue;
|
||||
}
|
||||
@@ -78,20 +78,20 @@ export class SkillExecutor<TContext extends BaseSkillContext = SkillContext> {
|
||||
// 获取 Skill
|
||||
const skill = SkillRegistry.get(item.skillId);
|
||||
if (!skill) {
|
||||
logger.warn({ skillId: item.skillId }, '[SkillExecutor] Skill not found in registry');
|
||||
logger.warn('[SkillExecutor] Skill not found in registry', { skillId: item.skillId });
|
||||
results.push(this.createSkippedResult(item.skillId, 'Skill not found'));
|
||||
continue;
|
||||
}
|
||||
|
||||
// 前置检查
|
||||
if (skill.canRun && !skill.canRun(context as SkillContext)) {
|
||||
logger.info({ skillId: item.skillId }, '[SkillExecutor] Skill pre-check failed, skipping');
|
||||
if (skill.canRun && !skill.canRun(context as unknown as SkillContext)) {
|
||||
logger.info('[SkillExecutor] Skill pre-check failed, skipping', { skillId: item.skillId });
|
||||
results.push(this.createSkippedResult(item.skillId, 'Pre-check failed'));
|
||||
continue;
|
||||
}
|
||||
|
||||
// 执行 Skill
|
||||
const result = await this.executeSkill(skill, context as SkillContext, item, profile);
|
||||
const result = await this.executeSkill(skill, context as unknown as SkillContext, item, profile);
|
||||
results.push(result);
|
||||
|
||||
// 调用完成回调(V2.1 扩展点)
|
||||
@@ -100,7 +100,7 @@ export class SkillExecutor<TContext extends BaseSkillContext = SkillContext> {
|
||||
await this.config.onSkillComplete(item.skillId, result, context);
|
||||
} catch (callbackError: unknown) {
|
||||
const errorMessage = callbackError instanceof Error ? callbackError.message : String(callbackError);
|
||||
logger.error({ skillId: item.skillId, error: errorMessage }, '[SkillExecutor] onSkillComplete callback failed');
|
||||
logger.error('[SkillExecutor] onSkillComplete callback failed', { skillId: item.skillId, error: errorMessage });
|
||||
}
|
||||
}
|
||||
|
||||
@@ -112,7 +112,7 @@ export class SkillExecutor<TContext extends BaseSkillContext = SkillContext> {
|
||||
|
||||
// 检查是否需要中断
|
||||
if (result.status === 'error' && !this.shouldContinue(item, profile)) {
|
||||
logger.warn({ skillId: item.skillId }, '[SkillExecutor] Skill failed and continueOnError=false, stopping');
|
||||
logger.warn('[SkillExecutor] Skill failed and continueOnError=false, stopping', { skillId: item.skillId });
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -120,13 +120,13 @@ export class SkillExecutor<TContext extends BaseSkillContext = SkillContext> {
|
||||
// 生成汇总
|
||||
const summary = this.buildSummary(context.taskId, profile.id, results, startTime);
|
||||
|
||||
logger.info({
|
||||
logger.info('[SkillExecutor] Pipeline execution completed', {
|
||||
taskId: context.taskId,
|
||||
overallStatus: summary.overallStatus,
|
||||
totalTime: summary.totalExecutionTime,
|
||||
successCount: summary.successCount,
|
||||
errorCount: summary.errorCount,
|
||||
}, '[SkillExecutor] Pipeline execution completed');
|
||||
});
|
||||
|
||||
return summary;
|
||||
}
|
||||
@@ -144,23 +144,23 @@ export class SkillExecutor<TContext extends BaseSkillContext = SkillContext> {
|
||||
const timeoutMultiplier = profile.globalConfig?.timeoutMultiplier ?? 1;
|
||||
const timeout = Math.round((item.timeout ?? skill.metadata.defaultTimeout ?? this.config.defaultTimeout) * timeoutMultiplier);
|
||||
|
||||
logger.info({
|
||||
logger.info('[SkillExecutor] Executing skill', {
|
||||
skillId: skill.metadata.id,
|
||||
taskId: context.taskId,
|
||||
timeout,
|
||||
}, '[SkillExecutor] Executing skill');
|
||||
});
|
||||
|
||||
try {
|
||||
// 带超时执行
|
||||
const result = await this.executeWithTimeout(skill, context, item.config, timeout);
|
||||
|
||||
logger.info({
|
||||
logger.info('[SkillExecutor] Skill execution completed', {
|
||||
skillId: skill.metadata.id,
|
||||
taskId: context.taskId,
|
||||
status: result.status,
|
||||
executionTime: result.executionTime,
|
||||
issueCount: result.issues.length,
|
||||
}, '[SkillExecutor] Skill execution completed');
|
||||
});
|
||||
|
||||
return result;
|
||||
} catch (error: unknown) {
|
||||
@@ -169,11 +169,11 @@ export class SkillExecutor<TContext extends BaseSkillContext = SkillContext> {
|
||||
|
||||
// 判断是否超时
|
||||
if (errorMessage === 'SKILL_TIMEOUT') {
|
||||
logger.warn({
|
||||
logger.warn('[SkillExecutor] Skill execution timed out', {
|
||||
skillId: skill.metadata.id,
|
||||
taskId: context.taskId,
|
||||
timeout,
|
||||
}, '[SkillExecutor] Skill execution timed out');
|
||||
});
|
||||
|
||||
return {
|
||||
skillId: skill.metadata.id,
|
||||
@@ -192,11 +192,11 @@ export class SkillExecutor<TContext extends BaseSkillContext = SkillContext> {
|
||||
}
|
||||
|
||||
// 其他错误
|
||||
logger.error({
|
||||
logger.error('[SkillExecutor] Skill execution failed', {
|
||||
skillId: skill.metadata.id,
|
||||
taskId: context.taskId,
|
||||
error: errorMessage,
|
||||
}, '[SkillExecutor] Skill execution failed');
|
||||
});
|
||||
|
||||
return {
|
||||
skillId: skill.metadata.id,
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
* @since 2026-02-18
|
||||
*/
|
||||
|
||||
import { JournalProfile, PipelineItem } from './types.js';
|
||||
import { JournalProfile } from './types.js';
|
||||
import { logger } from '../../../../common/logging/index.js';
|
||||
|
||||
/**
|
||||
@@ -34,13 +34,13 @@ export const DEFAULT_PROFILE: JournalProfile = {
|
||||
skillId: 'EditorialSkill',
|
||||
enabled: true,
|
||||
optional: false,
|
||||
timeout: 45000,
|
||||
timeout: 180000, // 180 秒
|
||||
},
|
||||
{
|
||||
skillId: 'MethodologySkill',
|
||||
enabled: true,
|
||||
optional: false,
|
||||
timeout: 45000,
|
||||
timeout: 180000, // 180 秒
|
||||
},
|
||||
],
|
||||
|
||||
@@ -78,13 +78,13 @@ export const CHINESE_CORE_PROFILE: JournalProfile = {
|
||||
config: {
|
||||
standard: 'chinese-core',
|
||||
},
|
||||
timeout: 45000,
|
||||
timeout: 180000, // 180 秒
|
||||
},
|
||||
{
|
||||
skillId: 'MethodologySkill',
|
||||
enabled: true,
|
||||
optional: false,
|
||||
timeout: 45000,
|
||||
timeout: 180000, // 180 秒
|
||||
},
|
||||
],
|
||||
|
||||
@@ -154,11 +154,11 @@ export class ProfileResolver {
|
||||
const profile = PROFILES.get(id);
|
||||
|
||||
if (!profile) {
|
||||
logger.warn({ profileId: id }, '[ProfileResolver] Profile not found, using default');
|
||||
logger.warn('[ProfileResolver] Profile not found, using default', { profileId: id });
|
||||
return DEFAULT_PROFILE;
|
||||
}
|
||||
|
||||
logger.debug({ profileId: id }, '[ProfileResolver] Profile resolved');
|
||||
logger.debug('[ProfileResolver] Profile resolved', { profileId: id });
|
||||
return profile;
|
||||
}
|
||||
|
||||
@@ -196,10 +196,10 @@ export class ProfileResolver {
|
||||
enabled: enabledSkills.has(item.skillId),
|
||||
}));
|
||||
|
||||
logger.debug({
|
||||
logger.debug('[ProfileResolver] Profile built from agents', {
|
||||
selectedAgents,
|
||||
enabledSkills: Array.from(enabledSkills),
|
||||
}, '[ProfileResolver] Profile built from agents');
|
||||
});
|
||||
|
||||
return baseProfile;
|
||||
}
|
||||
@@ -223,7 +223,7 @@ export class ProfileResolver {
|
||||
*/
|
||||
static register(profile: JournalProfile): void {
|
||||
PROFILES.set(profile.id, profile);
|
||||
logger.info({ profileId: profile.id }, '[ProfileResolver] Profile registered');
|
||||
logger.info('[ProfileResolver] Profile registered', { profileId: profile.id });
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -24,11 +24,11 @@ class SkillRegistryClass {
|
||||
const { id, version } = skill.metadata;
|
||||
|
||||
if (this.skills.has(id)) {
|
||||
logger.warn({ skillId: id }, '[SkillRegistry] Skill already registered, overwriting');
|
||||
logger.warn('[SkillRegistry] Skill already registered, overwriting', { skillId: id });
|
||||
}
|
||||
|
||||
this.skills.set(id, skill);
|
||||
logger.info({ skillId: id, version }, '[SkillRegistry] Skill registered');
|
||||
logger.info('[SkillRegistry] Skill registered', { skillId: id, version });
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -92,7 +92,7 @@ class SkillRegistryClass {
|
||||
unregister(id: string): boolean {
|
||||
const result = this.skills.delete(id);
|
||||
if (result) {
|
||||
logger.info({ skillId: id }, '[SkillRegistry] Skill unregistered');
|
||||
logger.info('[SkillRegistry] Skill unregistered', { skillId: id });
|
||||
}
|
||||
return result;
|
||||
}
|
||||
@@ -118,7 +118,7 @@ class SkillRegistryClass {
|
||||
*/
|
||||
markInitialized(): void {
|
||||
this.initialized = true;
|
||||
logger.info({ skillCount: this.size }, '[SkillRegistry] Registry initialized');
|
||||
logger.info('[SkillRegistry] Registry initialized', { skillCount: this.size });
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -111,7 +111,7 @@ export interface ForensicsResult {
|
||||
* RVW 模块扩展字段
|
||||
*/
|
||||
export interface RvwContextExtras {
|
||||
documentPath: string;
|
||||
documentPath?: string; // 可选:DataForensicsSkill 需要,Editorial/Methodology 不需要
|
||||
documentContent: string;
|
||||
documentMeta?: DocumentMeta;
|
||||
tables?: TableData[];
|
||||
|
||||
@@ -20,6 +20,20 @@ import {
|
||||
} from '../core/types.js';
|
||||
import { logger } from '../../../../common/logging/index.js';
|
||||
|
||||
/**
|
||||
* execute 方法的返回类型
|
||||
* 不需要包含 skillId, skillName, startedAt, completedAt, executionTime
|
||||
* 这些字段由 BaseSkill.run() 自动填充
|
||||
*/
|
||||
export type ExecuteResult = {
|
||||
status: 'success' | 'warning' | 'error';
|
||||
score?: number;
|
||||
scoreLabel?: string;
|
||||
issues: SkillResult['issues'];
|
||||
data?: unknown;
|
||||
error?: string;
|
||||
};
|
||||
|
||||
/**
|
||||
* Skill 基类
|
||||
* 使用泛型支持不同上下文和配置类型
|
||||
@@ -39,11 +53,12 @@ export abstract class BaseSkill<
|
||||
|
||||
/**
|
||||
* 子类实现具体逻辑
|
||||
* 返回值不需要包含 skillId, skillName, startedAt, completedAt, executionTime
|
||||
*/
|
||||
abstract execute(
|
||||
context: TContext,
|
||||
config?: TConfig
|
||||
): Promise<Omit<SkillResult, 'skillId' | 'skillName' | 'startedAt' | 'completedAt'>>;
|
||||
): Promise<ExecuteResult>;
|
||||
|
||||
/**
|
||||
* 执行入口(统一处理日志、计时、配置验证等)
|
||||
@@ -52,10 +67,10 @@ export abstract class BaseSkill<
|
||||
const startedAt = new Date();
|
||||
const startTime = Date.now();
|
||||
|
||||
logger.info({
|
||||
logger.info(`[${this.metadata.id}] Starting execution`, {
|
||||
skillId: this.metadata.id,
|
||||
taskId: context.taskId,
|
||||
}, `[${this.metadata.id}] Starting execution`);
|
||||
});
|
||||
|
||||
try {
|
||||
// 配置验证(使用 Zod)
|
||||
@@ -64,13 +79,13 @@ export abstract class BaseSkill<
|
||||
const result = await this.execute(context, validatedConfig);
|
||||
const executionTime = Date.now() - startTime;
|
||||
|
||||
logger.info({
|
||||
logger.info(`[${this.metadata.id}] Execution completed`, {
|
||||
skillId: this.metadata.id,
|
||||
taskId: context.taskId,
|
||||
status: result.status,
|
||||
executionTime,
|
||||
issueCount: result.issues.length,
|
||||
}, `[${this.metadata.id}] Execution completed`);
|
||||
});
|
||||
|
||||
return {
|
||||
...result,
|
||||
@@ -90,15 +105,15 @@ export abstract class BaseSkill<
|
||||
: SkillErrorCodes.SKILL_EXECUTION_ERROR;
|
||||
|
||||
const errorMessage = isValidationError
|
||||
? `配置验证失败: ${(error as z.ZodError).errors.map(e => e.message).join(', ')}`
|
||||
? `配置验证失败: ${(error as z.ZodError).issues.map((e: z.ZodIssue) => e.message).join(', ')}`
|
||||
: `执行失败: ${error instanceof Error ? error.message : String(error)}`;
|
||||
|
||||
logger.error({
|
||||
logger.error(`[${this.metadata.id}] Execution failed`, {
|
||||
skillId: this.metadata.id,
|
||||
taskId: context.taskId,
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
errorType,
|
||||
}, `[${this.metadata.id}] Execution failed`);
|
||||
});
|
||||
|
||||
return {
|
||||
skillId: this.metadata.id,
|
||||
|
||||
@@ -8,11 +8,10 @@
|
||||
* @since 2026-02-18
|
||||
*/
|
||||
|
||||
import { BaseSkill } from './BaseSkill.js';
|
||||
import { BaseSkill, ExecuteResult } from './BaseSkill.js';
|
||||
import {
|
||||
SkillMetadata,
|
||||
SkillContext,
|
||||
SkillResult,
|
||||
DataForensicsConfigSchema,
|
||||
DataForensicsConfig,
|
||||
ForensicsResult,
|
||||
@@ -23,19 +22,12 @@ import {
|
||||
IExtractionClient,
|
||||
ForensicsResult as ClientForensicsResult,
|
||||
} from '../../../../common/document/ExtractionClient.js';
|
||||
import { storage } from '../../../../common/storage/index.js';
|
||||
import { logger } from '../../../../common/logging/index.js';
|
||||
|
||||
/**
|
||||
* 安全:允许的文件存储路径前缀
|
||||
*/
|
||||
const ALLOWED_PATH_PREFIXES = [
|
||||
'/app/uploads/', // Docker 容器内路径
|
||||
'D:\\MyCursor\\', // 开发环境 Windows
|
||||
'D:/MyCursor/', // 开发环境 Windows (forward slash)
|
||||
'/tmp/rvw-uploads/', // 临时目录
|
||||
'C:\\Users\\', // Windows 用户目录
|
||||
'/home/', // Linux 用户目录
|
||||
];
|
||||
import * as fs from 'fs/promises';
|
||||
import * as path from 'path';
|
||||
import * as os from 'os';
|
||||
import { randomUUID } from 'crypto';
|
||||
|
||||
/**
|
||||
* 数据侦探 Skill
|
||||
@@ -76,39 +68,35 @@ export class DataForensicsSkill extends BaseSkill<SkillContext, DataForensicsCon
|
||||
|
||||
/**
|
||||
* 前置检查
|
||||
* 增加路径安全验证(防止路径遍历攻击)
|
||||
* 验证 documentPath(OSS storage key)格式
|
||||
*/
|
||||
canRun(context: SkillContext): boolean {
|
||||
if (!context.documentPath) {
|
||||
logger.warn({ taskId: context.taskId }, '[DataForensicsSkill] No document path');
|
||||
logger.warn('[DataForensicsSkill] No document path (storageKey)', { taskId: context.taskId });
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!context.documentPath.toLowerCase().endsWith('.docx')) {
|
||||
logger.info({ taskId: context.taskId }, '[DataForensicsSkill] Not a .docx file, skipping');
|
||||
logger.info('[DataForensicsSkill] Not a .docx file, skipping', { taskId: context.taskId });
|
||||
return false;
|
||||
}
|
||||
|
||||
// 安全检查:路径白名单
|
||||
const normalizedPath = context.documentPath.replace(/\\/g, '/');
|
||||
const isPathAllowed = ALLOWED_PATH_PREFIXES.some(prefix => {
|
||||
const normalizedPrefix = prefix.replace(/\\/g, '/');
|
||||
return normalizedPath.startsWith(normalizedPrefix);
|
||||
});
|
||||
|
||||
if (!isPathAllowed) {
|
||||
logger.error({
|
||||
// 安全检查:OSS key 格式验证(tenants/xxx/users/xxx/rvw/xxx/xxx.docx)
|
||||
const isOssKey = context.documentPath.startsWith('tenants/') ||
|
||||
context.documentPath.startsWith('temp/');
|
||||
|
||||
if (!isOssKey) {
|
||||
logger.warn('[DataForensicsSkill] Invalid storage key format', {
|
||||
taskId: context.taskId,
|
||||
documentPath: '[REDACTED]', // 不记录完整路径
|
||||
}, '[DataForensicsSkill] Document path not in allowed prefixes (security check)');
|
||||
});
|
||||
return false;
|
||||
}
|
||||
|
||||
// 检查是否包含路径遍历
|
||||
if (context.documentPath.includes('..')) {
|
||||
logger.error({
|
||||
logger.error('[DataForensicsSkill] Path traversal detected (security check)', {
|
||||
taskId: context.taskId,
|
||||
}, '[DataForensicsSkill] Path traversal detected (security check)');
|
||||
});
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -117,23 +105,46 @@ export class DataForensicsSkill extends BaseSkill<SkillContext, DataForensicsCon
|
||||
|
||||
/**
|
||||
* 执行数据侦探
|
||||
* 从 OSS 下载文件到临时目录,然后调用 Python 服务分析
|
||||
*/
|
||||
async execute(
|
||||
context: SkillContext,
|
||||
config?: DataForensicsConfig
|
||||
): Promise<Omit<SkillResult, 'skillId' | 'skillName' | 'startedAt' | 'completedAt'>> {
|
||||
): Promise<ExecuteResult> {
|
||||
const checkLevel = config?.checkLevel || 'L1_L2_L25';
|
||||
const tolerancePercent = config?.tolerancePercent || 0.1;
|
||||
const storageKey = context.documentPath!;
|
||||
|
||||
logger.info({
|
||||
logger.info('[DataForensicsSkill] Starting analysis', {
|
||||
taskId: context.taskId,
|
||||
storageKey,
|
||||
checkLevel,
|
||||
tolerancePercent,
|
||||
}, '[DataForensicsSkill] Starting analysis');
|
||||
});
|
||||
|
||||
// 创建临时文件路径
|
||||
const tempDir = os.tmpdir();
|
||||
const tempFilename = `rvw-${randomUUID()}.docx`;
|
||||
const tempFilePath = path.join(tempDir, tempFilename);
|
||||
|
||||
try {
|
||||
// 使用依赖注入的 client
|
||||
const result = await this.extractionClient.analyzeDocx(context.documentPath, {
|
||||
// 1. 从 OSS 下载文件到临时目录
|
||||
logger.info('[DataForensicsSkill] Downloading file from storage', {
|
||||
taskId: context.taskId,
|
||||
storageKey,
|
||||
tempFilePath,
|
||||
});
|
||||
|
||||
const fileBuffer = await storage.download(storageKey);
|
||||
await fs.writeFile(tempFilePath, fileBuffer);
|
||||
|
||||
logger.info('[DataForensicsSkill] File downloaded successfully', {
|
||||
taskId: context.taskId,
|
||||
fileSize: fileBuffer.length,
|
||||
});
|
||||
|
||||
// 2. 调用 Python 服务分析临时文件
|
||||
const result = await this.extractionClient.analyzeDocx(tempFilePath, {
|
||||
checkLevel,
|
||||
tolerancePercent,
|
||||
});
|
||||
@@ -159,13 +170,13 @@ export class DataForensicsSkill extends BaseSkill<SkillContext, DataForensicsCon
|
||||
score = 100;
|
||||
}
|
||||
|
||||
logger.info({
|
||||
logger.info('[DataForensicsSkill] Analysis completed', {
|
||||
taskId: context.taskId,
|
||||
tableCount: forensicsResult.summary.totalTables,
|
||||
issueCount: forensicsResult.summary.totalIssues,
|
||||
errorCount: forensicsResult.summary.errorCount,
|
||||
warningCount: forensicsResult.summary.warningCount,
|
||||
}, '[DataForensicsSkill] Analysis completed');
|
||||
});
|
||||
|
||||
return {
|
||||
status,
|
||||
@@ -178,10 +189,10 @@ export class DataForensicsSkill extends BaseSkill<SkillContext, DataForensicsCon
|
||||
// 特殊处理:Python 服务不可用时的优雅降级
|
||||
const errorObj = error as NodeJS.ErrnoException;
|
||||
if (errorObj.code === 'ECONNREFUSED' || errorObj.code === 'ETIMEDOUT') {
|
||||
logger.warn({
|
||||
logger.warn('[DataForensicsSkill] Python service unavailable, degrading gracefully', {
|
||||
taskId: context.taskId,
|
||||
error: errorObj.message,
|
||||
}, '[DataForensicsSkill] Python service unavailable, degrading gracefully');
|
||||
});
|
||||
|
||||
return {
|
||||
status: 'warning',
|
||||
@@ -199,39 +210,132 @@ export class DataForensicsSkill extends BaseSkill<SkillContext, DataForensicsCon
|
||||
};
|
||||
}
|
||||
|
||||
// 存储服务错误的优雅降级
|
||||
if (errorObj.message?.includes('storage') || errorObj.message?.includes('OSS')) {
|
||||
logger.warn('[DataForensicsSkill] Storage service error, degrading gracefully', {
|
||||
taskId: context.taskId,
|
||||
error: errorObj.message,
|
||||
});
|
||||
|
||||
return {
|
||||
status: 'warning',
|
||||
issues: [{
|
||||
severity: 'WARNING',
|
||||
type: 'STORAGE_ERROR',
|
||||
message: '文件存储服务暂不可用,已跳过表格验证。',
|
||||
}],
|
||||
data: {
|
||||
tables: [],
|
||||
methods: [],
|
||||
issues: [],
|
||||
summary: { totalTables: 0, totalIssues: 0, errorCount: 0, warningCount: 1 },
|
||||
} as ForensicsResult,
|
||||
};
|
||||
}
|
||||
|
||||
throw error;
|
||||
} finally {
|
||||
// 3. 清理临时文件
|
||||
try {
|
||||
await fs.unlink(tempFilePath);
|
||||
logger.debug('[DataForensicsSkill] Temp file cleaned up', { tempFilePath });
|
||||
} catch {
|
||||
// 忽略清理错误
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 转换 Python 返回的结果为内部格式
|
||||
*
|
||||
* Python 返回格式:
|
||||
* {
|
||||
* success: boolean,
|
||||
* methodsFound: string[],
|
||||
* tables: [{ id, issues: [...], ... }],
|
||||
* totalIssues: number,
|
||||
* errorCount: number,
|
||||
* warningCount: number
|
||||
* }
|
||||
*
|
||||
* 转换为内部格式:
|
||||
* {
|
||||
* tables: [...],
|
||||
* methods: [...],
|
||||
* issues: [...], // 从 tables[].issues 收集
|
||||
* summary: { totalTables, totalIssues, errorCount, warningCount }
|
||||
* }
|
||||
*/
|
||||
private convertResult(result: ClientForensicsResult): ForensicsResult {
|
||||
const issues: Issue[] = result.issues.map(issue => ({
|
||||
severity: issue.severity,
|
||||
type: issue.type,
|
||||
message: issue.message,
|
||||
location: issue.location,
|
||||
evidence: issue.evidence,
|
||||
}));
|
||||
// 防御性检查
|
||||
const rawTables = result.tables || [];
|
||||
|
||||
// Python 返回的是 methodsFound(驼峰),也可能是 methods
|
||||
const rawMethods = (result as any).methodsFound || result.methods || [];
|
||||
|
||||
// 从 tables[].issues 中收集所有 issues
|
||||
const allIssues: Issue[] = [];
|
||||
for (const table of rawTables) {
|
||||
const tableIssues = (table as any).issues || [];
|
||||
for (const issue of tableIssues) {
|
||||
allIssues.push({
|
||||
severity: issue.severity,
|
||||
type: issue.type,
|
||||
message: issue.message,
|
||||
location: issue.location,
|
||||
evidence: issue.evidence,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// 也检查顶层的 issues(兼容旧格式)
|
||||
const topLevelIssues = result.issues || [];
|
||||
for (const issue of topLevelIssues) {
|
||||
allIssues.push({
|
||||
severity: issue.severity,
|
||||
type: issue.type,
|
||||
message: issue.message,
|
||||
location: issue.location,
|
||||
evidence: issue.evidence,
|
||||
});
|
||||
}
|
||||
|
||||
// 构建 summary(从 Python 返回的顶层字段或 summary 对象)
|
||||
const pyResult = result as any;
|
||||
const summary = result.summary || {
|
||||
totalTables: pyResult.totalTables ?? rawTables.length,
|
||||
totalIssues: pyResult.totalIssues ?? allIssues.length,
|
||||
errorCount: pyResult.errorCount ?? allIssues.filter(i => i.severity === 'ERROR').length,
|
||||
warningCount: pyResult.warningCount ?? allIssues.filter(i => i.severity === 'WARNING').length,
|
||||
};
|
||||
|
||||
return {
|
||||
tables: result.tables.map(t => ({
|
||||
id: t.id,
|
||||
caption: t.caption,
|
||||
data: t.data,
|
||||
html: t.html,
|
||||
headers: t.headers,
|
||||
rowCount: t.rowCount,
|
||||
colCount: t.colCount,
|
||||
})),
|
||||
methods: result.methods,
|
||||
issues,
|
||||
tables: rawTables.map(t => {
|
||||
const tableIssues = ((t as any).issues || []).map((issue: any) => ({
|
||||
severity: issue.severity,
|
||||
type: issue.type,
|
||||
message: issue.message,
|
||||
location: issue.location,
|
||||
evidence: issue.evidence,
|
||||
}));
|
||||
return {
|
||||
id: t.id || '',
|
||||
caption: t.caption || '',
|
||||
data: t.data || [],
|
||||
html: t.html || '',
|
||||
headers: t.headers || [],
|
||||
rowCount: t.rowCount || 0,
|
||||
colCount: t.colCount || 0,
|
||||
issues: tableIssues, // 保留每个表格的 issues
|
||||
};
|
||||
}),
|
||||
methods: rawMethods,
|
||||
issues: allIssues,
|
||||
summary: {
|
||||
totalTables: result.summary.totalTables,
|
||||
totalIssues: result.summary.totalIssues,
|
||||
errorCount: result.summary.errorCount,
|
||||
warningCount: result.summary.warningCount,
|
||||
totalTables: summary.totalTables ?? rawTables.length,
|
||||
totalIssues: summary.totalIssues ?? allIssues.length,
|
||||
errorCount: summary.errorCount ?? 0,
|
||||
warningCount: summary.warningCount ?? 0,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
@@ -8,17 +8,16 @@
|
||||
* @since 2026-02-18
|
||||
*/
|
||||
|
||||
import { BaseSkill } from './BaseSkill.js';
|
||||
import { BaseSkill, ExecuteResult } from './BaseSkill.js';
|
||||
import {
|
||||
SkillMetadata,
|
||||
SkillContext,
|
||||
SkillResult,
|
||||
EditorialConfigSchema,
|
||||
EditorialConfig,
|
||||
Issue,
|
||||
} from '../core/types.js';
|
||||
import { reviewEditorialStandards } from '../../services/editorialService.js';
|
||||
import { EditorialReview, EditorialItem } from '../../types/index.js';
|
||||
import { EditorialReview } from '../../types/index.js';
|
||||
import { logger } from '../../../../common/logging/index.js';
|
||||
|
||||
/**
|
||||
@@ -45,7 +44,7 @@ export class EditorialSkill extends BaseSkill<SkillContext, EditorialConfig> {
|
||||
inputs: ['documentContent'],
|
||||
outputs: ['editorialResult'],
|
||||
|
||||
defaultTimeout: 45000, // 45 秒
|
||||
defaultTimeout: 180000, // 180 秒(LLM 调用可能较慢)
|
||||
retryable: true,
|
||||
|
||||
icon: '📋',
|
||||
@@ -57,18 +56,18 @@ export class EditorialSkill extends BaseSkill<SkillContext, EditorialConfig> {
|
||||
*/
|
||||
canRun(context: SkillContext): boolean {
|
||||
if (!context.documentContent || context.documentContent.trim().length === 0) {
|
||||
logger.warn({ taskId: context.taskId }, '[EditorialSkill] No document content');
|
||||
logger.warn('[EditorialSkill] No document content', { taskId: context.taskId });
|
||||
return false;
|
||||
}
|
||||
|
||||
// 资源限制检查
|
||||
const maxLength = DEFAULT_MAX_CONTENT_LENGTH;
|
||||
if (context.documentContent.length > maxLength) {
|
||||
logger.warn({
|
||||
logger.warn('[EditorialSkill] Content too long', {
|
||||
taskId: context.taskId,
|
||||
contentLength: context.documentContent.length,
|
||||
limit: maxLength,
|
||||
}, '[EditorialSkill] Content too long');
|
||||
});
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -81,23 +80,23 @@ export class EditorialSkill extends BaseSkill<SkillContext, EditorialConfig> {
|
||||
async execute(
|
||||
context: SkillContext,
|
||||
config?: EditorialConfig
|
||||
): Promise<Omit<SkillResult, 'skillId' | 'skillName' | 'startedAt' | 'completedAt'>> {
|
||||
): Promise<ExecuteResult> {
|
||||
const maxContentLength = config?.maxContentLength || DEFAULT_MAX_CONTENT_LENGTH;
|
||||
|
||||
logger.info({
|
||||
logger.info('[EditorialSkill] Starting evaluation', {
|
||||
taskId: context.taskId,
|
||||
contentLength: context.documentContent.length,
|
||||
}, '[EditorialSkill] Starting evaluation');
|
||||
});
|
||||
|
||||
// 截断过长内容
|
||||
let content = context.documentContent;
|
||||
if (content.length > maxContentLength) {
|
||||
content = content.substring(0, maxContentLength);
|
||||
logger.warn({
|
||||
logger.warn('[EditorialSkill] Content truncated', {
|
||||
taskId: context.taskId,
|
||||
originalLength: context.documentContent.length,
|
||||
truncatedLength: maxContentLength,
|
||||
}, '[EditorialSkill] Content truncated');
|
||||
});
|
||||
}
|
||||
|
||||
// 调用现有 editorialService
|
||||
@@ -119,13 +118,13 @@ export class EditorialSkill extends BaseSkill<SkillContext, EditorialConfig> {
|
||||
status = 'success';
|
||||
}
|
||||
|
||||
logger.info({
|
||||
logger.info('[EditorialSkill] Evaluation completed', {
|
||||
taskId: context.taskId,
|
||||
score: result.overall_score,
|
||||
itemCount: result.items.length,
|
||||
errorCount,
|
||||
warningCount,
|
||||
}, '[EditorialSkill] Evaluation completed');
|
||||
});
|
||||
|
||||
return {
|
||||
status,
|
||||
|
||||
@@ -8,17 +8,16 @@
|
||||
* @since 2026-02-18
|
||||
*/
|
||||
|
||||
import { BaseSkill } from './BaseSkill.js';
|
||||
import { BaseSkill, ExecuteResult } from './BaseSkill.js';
|
||||
import {
|
||||
SkillMetadata,
|
||||
SkillContext,
|
||||
SkillResult,
|
||||
MethodologyConfigSchema,
|
||||
MethodologyConfig,
|
||||
Issue,
|
||||
} from '../core/types.js';
|
||||
import { reviewMethodology } from '../../services/methodologyService.js';
|
||||
import { MethodologyReview, MethodologyIssue } from '../../types/index.js';
|
||||
import { MethodologyReview } from '../../types/index.js';
|
||||
import { logger } from '../../../../common/logging/index.js';
|
||||
|
||||
/**
|
||||
@@ -45,7 +44,7 @@ export class MethodologySkill extends BaseSkill<SkillContext, MethodologyConfig>
|
||||
inputs: ['documentContent', 'methods'],
|
||||
outputs: ['methodologyResult'],
|
||||
|
||||
defaultTimeout: 45000, // 45 秒
|
||||
defaultTimeout: 180000, // 180 秒(方法学分析需要更长时间)
|
||||
retryable: true,
|
||||
|
||||
icon: '🔬',
|
||||
@@ -57,18 +56,18 @@ export class MethodologySkill extends BaseSkill<SkillContext, MethodologyConfig>
|
||||
*/
|
||||
canRun(context: SkillContext): boolean {
|
||||
if (!context.documentContent || context.documentContent.trim().length === 0) {
|
||||
logger.warn({ taskId: context.taskId }, '[MethodologySkill] No document content');
|
||||
logger.warn('[MethodologySkill] No document content', { taskId: context.taskId });
|
||||
return false;
|
||||
}
|
||||
|
||||
// 资源限制检查
|
||||
const maxLength = DEFAULT_MAX_CONTENT_LENGTH;
|
||||
if (context.documentContent.length > maxLength) {
|
||||
logger.warn({
|
||||
logger.warn('[MethodologySkill] Content too long', {
|
||||
taskId: context.taskId,
|
||||
contentLength: context.documentContent.length,
|
||||
limit: maxLength,
|
||||
}, '[MethodologySkill] Content too long');
|
||||
});
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -81,34 +80,34 @@ export class MethodologySkill extends BaseSkill<SkillContext, MethodologyConfig>
|
||||
async execute(
|
||||
context: SkillContext,
|
||||
config?: MethodologyConfig
|
||||
): Promise<Omit<SkillResult, 'skillId' | 'skillName' | 'startedAt' | 'completedAt'>> {
|
||||
): Promise<ExecuteResult> {
|
||||
const maxContentLength = config?.maxContentLength || DEFAULT_MAX_CONTENT_LENGTH;
|
||||
|
||||
logger.info({
|
||||
logger.info('[MethodologySkill] Starting evaluation', {
|
||||
taskId: context.taskId,
|
||||
contentLength: context.documentContent.length,
|
||||
detectedMethods: context.methods?.length || 0,
|
||||
}, '[MethodologySkill] Starting evaluation');
|
||||
});
|
||||
|
||||
// 截断过长内容
|
||||
let content = context.documentContent;
|
||||
if (content.length > maxContentLength) {
|
||||
content = content.substring(0, maxContentLength);
|
||||
logger.warn({
|
||||
logger.warn('[MethodologySkill] Content truncated', {
|
||||
taskId: context.taskId,
|
||||
originalLength: context.documentContent.length,
|
||||
truncatedLength: maxContentLength,
|
||||
}, '[MethodologySkill] Content truncated');
|
||||
});
|
||||
}
|
||||
|
||||
// 如果 DataForensicsSkill 提取了统计方法,可以添加到 prompt 中
|
||||
// 目前 reviewMethodology 不支持此参数,留作未来扩展
|
||||
const methodsHint = context.methods?.join(', ') || '';
|
||||
if (methodsHint) {
|
||||
logger.debug({
|
||||
logger.debug('[MethodologySkill] Using detected methods as hint', {
|
||||
taskId: context.taskId,
|
||||
methodsHint,
|
||||
}, '[MethodologySkill] Using detected methods as hint');
|
||||
});
|
||||
}
|
||||
|
||||
// 调用现有 methodologyService
|
||||
@@ -130,13 +129,13 @@ export class MethodologySkill extends BaseSkill<SkillContext, MethodologyConfig>
|
||||
status = 'success';
|
||||
}
|
||||
|
||||
logger.info({
|
||||
logger.info('[MethodologySkill] Evaluation completed', {
|
||||
taskId: context.taskId,
|
||||
score: result.overall_score,
|
||||
partCount: result.parts.length,
|
||||
errorCount,
|
||||
warningCount,
|
||||
}, '[MethodologySkill] Evaluation completed');
|
||||
});
|
||||
|
||||
return {
|
||||
status,
|
||||
|
||||
103
backend/src/modules/rvw/skills/test-skills.ts
Normal file
103
backend/src/modules/rvw/skills/test-skills.ts
Normal file
@@ -0,0 +1,103 @@
|
||||
/**
|
||||
* RVW Skills 架构 - 快速验证脚本
|
||||
*
|
||||
* 运行方式: npx tsx src/modules/rvw/skills/test-skills.ts
|
||||
*/
|
||||
|
||||
import { SkillRegistry } from './core/registry.js';
|
||||
import { ProfileResolver, DEFAULT_PROFILE } from './core/profile.js';
|
||||
import { ContextBuilder } from './core/context.js';
|
||||
import { SkillExecutor } from './core/executor.js';
|
||||
import { registerBuiltinSkills } from './library/index.js';
|
||||
|
||||
// 注册内置 Skills
|
||||
registerBuiltinSkills();
|
||||
|
||||
async function main() {
|
||||
console.log('='.repeat(60));
|
||||
console.log('RVW Skills V2.0 架构验证');
|
||||
console.log('='.repeat(60));
|
||||
|
||||
// 1. 测试 SkillRegistry
|
||||
console.log('\n📋 1. SkillRegistry 验证');
|
||||
console.log('-'.repeat(40));
|
||||
|
||||
const summary = SkillRegistry.getSummary();
|
||||
console.log(` 已初始化: ${summary.initialized}`);
|
||||
console.log(` 注册 Skills 数量: ${summary.skillCount}`);
|
||||
console.log(` 分类统计:`, summary.categories);
|
||||
|
||||
const allSkills = SkillRegistry.getAllMetadata();
|
||||
console.log('\n 已注册的 Skills:');
|
||||
for (const skill of allSkills) {
|
||||
console.log(` - ${skill.id} (${skill.name}) v${skill.version}`);
|
||||
}
|
||||
|
||||
// 2. 测试 ProfileResolver
|
||||
console.log('\n📋 2. ProfileResolver 验证');
|
||||
console.log('-'.repeat(40));
|
||||
|
||||
const defaultProfile = ProfileResolver.resolve('default');
|
||||
console.log(` 默认 Profile: ${defaultProfile.name}`);
|
||||
console.log(` Pipeline 长度: ${defaultProfile.pipeline.length}`);
|
||||
console.log(` Pipeline Skills:`);
|
||||
for (const item of defaultProfile.pipeline) {
|
||||
console.log(` - ${item.skillId} (enabled: ${item.enabled}, optional: ${item.optional})`);
|
||||
}
|
||||
|
||||
// 测试动态 Profile
|
||||
const dynamicProfile = ProfileResolver.resolveFromAgents(['editorial', 'methodology']);
|
||||
console.log(`\n 动态 Profile (editorial + methodology):`);
|
||||
const enabledSkills = dynamicProfile.pipeline.filter(p => p.enabled);
|
||||
console.log(` 启用的 Skills: ${enabledSkills.map(p => p.skillId).join(', ')}`);
|
||||
|
||||
// 3. 测试 ContextBuilder
|
||||
console.log('\n📋 3. ContextBuilder 验证');
|
||||
console.log('-'.repeat(40));
|
||||
|
||||
const context = new ContextBuilder()
|
||||
.taskId('test-task-123')
|
||||
.userId('test-user-456')
|
||||
.documentPath('D:/MyCursor/test/document.docx') // 使用允许的路径前缀
|
||||
.documentContent('这是一篇测试论文的内容...')
|
||||
.profile(defaultProfile)
|
||||
.build();
|
||||
|
||||
console.log(` taskId: ${context.taskId}`);
|
||||
console.log(` userId: ${context.userId}`);
|
||||
console.log(` documentPath: ${context.documentPath}`);
|
||||
console.log(` documentContent 长度: ${context.documentContent.length}`);
|
||||
|
||||
// 4. 测试 canRun 检查
|
||||
console.log('\n📋 4. Skill canRun 检查');
|
||||
console.log('-'.repeat(40));
|
||||
|
||||
for (const skill of SkillRegistry.getAll()) {
|
||||
const canRun = skill.canRun ? skill.canRun(context) : true;
|
||||
console.log(` ${skill.metadata.id}: canRun = ${canRun}`);
|
||||
}
|
||||
|
||||
// 5. 验证总结
|
||||
console.log('\n' + '='.repeat(60));
|
||||
console.log('✅ Skills 架构核心组件验证完成!');
|
||||
console.log('='.repeat(60));
|
||||
|
||||
// 检查是否有问题
|
||||
if (summary.skillCount < 3) {
|
||||
console.log('\n⚠️ 警告: 注册的 Skills 数量少于预期 (预期 3 个)');
|
||||
}
|
||||
|
||||
if (!SkillRegistry.has('DataForensicsSkill')) {
|
||||
console.log('⚠️ 警告: DataForensicsSkill 未注册');
|
||||
}
|
||||
if (!SkillRegistry.has('EditorialSkill')) {
|
||||
console.log('⚠️ 警告: EditorialSkill 未注册');
|
||||
}
|
||||
if (!SkillRegistry.has('MethodologySkill')) {
|
||||
console.log('⚠️ 警告: MethodologySkill 未注册');
|
||||
}
|
||||
|
||||
console.log('\n下一步: 启动后端服务,通过 API 测试完整流程');
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
@@ -65,6 +65,45 @@ export interface MethodologyReview {
|
||||
parts: MethodologyPart[];
|
||||
}
|
||||
|
||||
// ==================== 数据验证(DataForensics) ====================
|
||||
|
||||
export interface ForensicsIssue {
|
||||
severity: 'ERROR' | 'WARNING' | 'INFO';
|
||||
type: string;
|
||||
message: string;
|
||||
location?: {
|
||||
tableId?: string;
|
||||
cellRef?: string;
|
||||
paragraph?: number;
|
||||
};
|
||||
evidence?: Record<string, unknown>;
|
||||
}
|
||||
|
||||
export interface ForensicsTable {
|
||||
id: string;
|
||||
caption: string;
|
||||
html: string;
|
||||
data: string[][];
|
||||
headers: string[];
|
||||
rowCount: number;
|
||||
colCount: number;
|
||||
skipped?: boolean;
|
||||
skipReason?: string;
|
||||
issues: ForensicsIssue[];
|
||||
}
|
||||
|
||||
export interface ForensicsResult {
|
||||
tables: ForensicsTable[];
|
||||
methods: string[];
|
||||
issues: ForensicsIssue[];
|
||||
summary: {
|
||||
totalTables: number;
|
||||
totalIssues: number;
|
||||
errorCount: number;
|
||||
warningCount: number;
|
||||
};
|
||||
}
|
||||
|
||||
// ==================== 请求参数 ====================
|
||||
|
||||
/**
|
||||
@@ -142,6 +181,7 @@ export interface ReviewReport {
|
||||
overallScore?: number;
|
||||
editorialReview?: EditorialReview;
|
||||
methodologyReview?: MethodologyReview;
|
||||
forensicsResult?: ForensicsResult;
|
||||
completedAt?: Date;
|
||||
durationSeconds?: number;
|
||||
}
|
||||
|
||||
@@ -65,16 +65,50 @@ function ensureSkillsInitialized() {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 清理卡住的任务(启动时调用)
|
||||
* 当服务重启时,之前正在执行的任务会卡在 'reviewing' 状态
|
||||
*/
|
||||
async function cleanupStuckTasks(): Promise<void> {
|
||||
try {
|
||||
const stuckTasks = await prisma.reviewTask.updateMany({
|
||||
where: {
|
||||
status: {
|
||||
in: ['reviewing', 'reviewing_editorial', 'reviewing_methodology'],
|
||||
},
|
||||
},
|
||||
data: {
|
||||
status: 'failed',
|
||||
errorMessage: '服务重启导致任务中断,请重新提交',
|
||||
},
|
||||
});
|
||||
|
||||
if (stuckTasks.count > 0) {
|
||||
logger.warn('[reviewWorker] Cleaned up stuck tasks on startup', {
|
||||
count: stuckTasks.count,
|
||||
});
|
||||
console.log(`⚠️ 启动时清理了 ${stuckTasks.count} 个卡住的任务`);
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error('[reviewWorker] Failed to cleanup stuck tasks', {
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 注册审查 Worker 到队列
|
||||
*
|
||||
* 此函数应在应用启动时调用(index.ts)
|
||||
*/
|
||||
export function registerReviewWorker() {
|
||||
export async function registerReviewWorker() {
|
||||
logger.info('[reviewWorker] Registering reviewWorker', {
|
||||
useSkillsArchitecture: USE_SKILLS_ARCHITECTURE,
|
||||
});
|
||||
|
||||
// 清理卡住的任务
|
||||
await cleanupStuckTasks();
|
||||
|
||||
// 初始化 Skills
|
||||
ensureSkillsInitialized();
|
||||
|
||||
@@ -113,6 +147,15 @@ export function registerReviewWorker() {
|
||||
},
|
||||
});
|
||||
|
||||
// 调试日志:检查 filePath
|
||||
logger.info('[reviewWorker] Task info from DB', {
|
||||
taskId,
|
||||
filePath: existingTask?.filePath || '(empty)',
|
||||
fileName: existingTask?.fileName,
|
||||
fileSize: existingTask?.fileSize,
|
||||
});
|
||||
console.log(` 📁 filePath: ${existingTask?.filePath || '(空)'}`);
|
||||
|
||||
if (existingTask?.status === 'completed' && existingTask.completedAt) {
|
||||
logger.warn('[reviewWorker] ⚠️ Task already completed, skipping', {
|
||||
jobId: job.id,
|
||||
@@ -223,8 +266,7 @@ export function registerReviewWorker() {
|
||||
// ========================================
|
||||
logger.info('[reviewWorker] Updating task result', { taskId });
|
||||
|
||||
// 构建 Skills 执行摘要(V2.0 新增,存储到 picoExtract 字段)
|
||||
// 注意:picoExtract 字段暂时复用,未来迁移后移到专用字段
|
||||
// 构建 Skills 执行摘要(V2.0 新增,存储到专用 contextData 字段)
|
||||
const skillsContext = USE_SKILLS_ARCHITECTURE && skillsSummary
|
||||
? {
|
||||
version: '2.0',
|
||||
@@ -246,7 +288,7 @@ export function registerReviewWorker() {
|
||||
status: 'completed',
|
||||
editorialReview: editorialResult as unknown as Prisma.InputJsonValue ?? Prisma.JsonNull,
|
||||
methodologyReview: methodologyResult as unknown as Prisma.InputJsonValue ?? Prisma.JsonNull,
|
||||
picoExtract: skillsContext as unknown as Prisma.InputJsonValue ?? Prisma.JsonNull,
|
||||
contextData: skillsContext as unknown as Prisma.InputJsonValue ?? Prisma.JsonNull,
|
||||
overallScore,
|
||||
editorialScore: editorialScore,
|
||||
methodologyScore: methodologyScore,
|
||||
|
||||
@@ -1,10 +1,11 @@
|
||||
# AIclinicalresearch 系统当前状态与开发指南
|
||||
|
||||
> **文档版本:** v5.1
|
||||
> **文档版本:** v5.2
|
||||
> **创建日期:** 2025-11-28
|
||||
> **维护者:** 开发团队
|
||||
> **最后更新:** 2026-02-18
|
||||
> **🎉 重大里程碑:**
|
||||
> - **2026-02-18:RVW V2.0 Week 3 完成!** 统计验证扩展 + 负号归一化 + 文件格式提示 + 用户体验优化
|
||||
> - **2026-02-18:RVW V2.0 Skills 架构完成!** Skills 核心框架 + 3个 Skill 实现 + ReviewWorker 改造
|
||||
> - **2026-02-17:RVW V2.0 "数据侦探" Day 6 完成!** L2统计验证器 + L2.5一致性取证(SE三角验证、SD>Mean)
|
||||
> - **2026-02-08:IIT 事件级质控 V3.1 开发完成!** record+event 独立质控 + 规则动态过滤 + 报告去重 + AI对话增强
|
||||
@@ -18,13 +19,14 @@
|
||||
> - **2026-01-24:Protocol Agent 框架完成!** 可复用Agent框架+5阶段对话流程
|
||||
> - **2026-01-22:OSS 存储集成完成!** 阿里云 OSS 正式接入平台基础层
|
||||
>
|
||||
> **最新进展(RVW V2.0 Skills 架构 2026-02-18):**
|
||||
> **最新进展(RVW V2.0 Week 3 完成 2026-02-18):**
|
||||
> - ✅ **负号归一化**:6 种 Unicode 负号变体支持,防止 float() 崩溃
|
||||
> - ✅ **T 检验验证增强**:智能样本量提取 + subrow 精确高亮
|
||||
> - ✅ **SE 三角/CI-P 验证增强**:多行单元格 subrow 支持
|
||||
> - ✅ **前端翻译映射更新**:6 种新 IssueType 中文翻译
|
||||
> - ✅ **文件格式提示**:PDF/.doc 上传时提示无法数据验证
|
||||
> - ✅ **Skills 核心框架**:types、registry、executor、profile、context
|
||||
> - ✅ **Zod 配置验证**:运行时类型安全
|
||||
> - ✅ **DataForensicsSkill**:依赖注入 + 路径安全 + 优雅降级
|
||||
> - ✅ **EditorialSkill + MethodologySkill**:封装现有服务
|
||||
> - ✅ **ReviewWorker 改造**:集成 SkillExecutor,支持 V1/V2 架构切换
|
||||
> - ✅ **12 个新文件**:约 1735 行代码
|
||||
> - ✅ **3 个 Skill 实现**:DataForensics、Editorial、Methodology
|
||||
>
|
||||
> **部署状态:** ✅ 生产环境运行中 | 公网地址:http://8.140.53.236/
|
||||
> **REDCap 状态:** ✅ 生产环境运行中 | 地址:https://redcap.xunzhengyixue.com/
|
||||
@@ -67,7 +69,7 @@
|
||||
| **IIT** | IIT Manager Agent | AI驱动IIT研究助手 - 双脑架构+REDCap集成 | ⭐⭐⭐⭐⭐ | 🎉 **事件级质控V3.1完成(设计100%,代码60%)** | **P0** |
|
||||
| **SSA** | 智能统计分析 | 队列/预测模型/RCT分析 | ⭐⭐⭐⭐⭐ | 📋 规划中 | P2 |
|
||||
| **ST** | 统计分析工具 | 100+轻量化统计工具 | ⭐⭐⭐⭐ | 📋 规划中 | P2 |
|
||||
| **RVW** | 稿件审查系统 | 方法学评估 + 🆕数据侦探(L1/L2/L2.5验证)+ Skills架构 + Word导出 | ⭐⭐⭐⭐ | 🚀 **V2.0开发中(Week2 Day10完成)** - Skills核心框架+Skill实现+Worker改造 | P1 |
|
||||
| **RVW** | 稿件审查系统 | 方法学评估 + 🆕数据侦探(L1/L2/L2.5验证)+ Skills架构 + Word导出 | ⭐⭐⭐⭐ | 🚀 **V2.0 Week3完成(85%)** - 统计验证扩展+负号归一化+文件格式提示+用户体验优化 | P1 |
|
||||
| **ADMIN** | 运营管理端 | Prompt管理、租户管理、用户管理、运营监控、系统知识库 | ⭐⭐⭐⭐⭐ | 🎉 **Phase 4.6完成(88%)** - Prompt知识库集成+动态注入 | **P0** |
|
||||
|
||||
---
|
||||
|
||||
@@ -1,21 +1,30 @@
|
||||
# RVW稿件审查模块 - 当前状态与开发指南
|
||||
|
||||
> **文档版本:** v5.0
|
||||
> **文档版本:** v5.1
|
||||
> **创建日期:** 2026-01-07
|
||||
> **最后更新:** 2026-02-18
|
||||
> **维护者:** 开发团队
|
||||
> **当前状态:** 🚀 **V2.0 "数据侦探" 开发中(Week 2 Day 10 完成)**
|
||||
> **当前状态:** 🚀 **V2.0 "数据侦探" Week 3 完成(统计验证扩展+用户体验优化)**
|
||||
> **文档目的:** 快速了解RVW模块状态,为新AI助手提供上下文
|
||||
>
|
||||
> **🎉 V2.0 进展(2026-02-18):**
|
||||
> **🎉 V2.0 进展(2026-02-18 Week 3):**
|
||||
> - ✅ **负号归一化**:防止 float() 崩溃,覆盖 6 种负号变体
|
||||
> - ✅ **T 检验验证增强**:智能样本量提取 + subrow 精确高亮
|
||||
> - ✅ **SE 三角验证增强**:多行单元格 subrow 支持
|
||||
> - ✅ **CI vs P 值验证增强**:subrow 支持 + 灵活 P 值解析
|
||||
> - ✅ **前端翻译映射**:新增 6 种 IssueType 中文翻译
|
||||
> - ✅ **文件格式提示**:PDF/.doc 上传时提示无法数据验证
|
||||
>
|
||||
> **🎉 V2.0 进展(Week 1-2):**
|
||||
> - ✅ **L1 算术验证器**:行列加总、百分比验证(Day 3)
|
||||
> - ✅ **L2 统计验证器**:CI↔P 值一致性、T检验逆向验证(Day 6)
|
||||
> - ✅ **L2 统计验证器**:CI↔P 值一致性、卡方检验逆向验证(Day 6)
|
||||
> - ✅ **L2.5 一致性取证**:SE三角验证、SD>Mean检查(Day 6 终审提权)
|
||||
> - ✅ **Word 文档解析**:python-docx 表格提取(Day 2)
|
||||
> - ✅ **Word 文档解析**:python-docx 表格提取 + 特殊符号提取(Day 2)
|
||||
> - ✅ **Skills 核心框架**:types、registry、executor、profile、context(Day 7)
|
||||
> - ✅ **DataForensicsSkill**:依赖注入、路径安全、优雅降级(Day 8)
|
||||
> - ✅ **DataForensicsSkill**:OSS 集成、依赖注入、优雅降级(Day 8)
|
||||
> - ✅ **EditorialSkill + MethodologySkill**:封装现有服务(Day 9)
|
||||
> - ✅ **ReviewWorker 改造**:集成 SkillExecutor,支持 V1/V2 切换(Day 10)
|
||||
> - ✅ **前端数据验证 Tab**:ForensicsReport 组件、精确单元格高亮(Week 3)
|
||||
|
||||
---
|
||||
|
||||
@@ -377,37 +386,50 @@ Content-Type: multipart/form-data
|
||||
| 阶段 | 任务 | 状态 | 完成日期 |
|
||||
|------|------|------|---------|
|
||||
| Week 1 Day 1 | Python 服务搭建 | ✅ 已完成 | 2026-02-12 |
|
||||
| Week 1 Day 2 | Word 表格提取 | ✅ 已完成 | 2026-02-13 |
|
||||
| Week 1 Day 2 | Word 表格提取 + 特殊符号 | ✅ 已完成 | 2026-02-13 |
|
||||
| Week 1 Day 3 | L1 算术验证器 | ✅ 已完成 | 2026-02-14 |
|
||||
| Week 1 Day 4 | 数据结构设计 | ✅ 已完成 | 2026-02-15 |
|
||||
| Week 1 Day 5 | API 集成 | ✅ 已完成 | 2026-02-16 |
|
||||
| **Week 2 Day 6** | **L2 统计验证器 + L2.5 一致性取证** | **✅ 已完成** | **2026-02-17** |
|
||||
| Week 2 Day 7 | Skills 核心框架 | 📋 待开发 | - |
|
||||
| Week 2 Day 8 | DataForensicsSkill | 📋 待开发 | - |
|
||||
| Week 2 Day 9 | EditorialSkill 封装 | 📋 待开发 | - |
|
||||
| Week 2 Day 10 | ReviewService 改造 | 📋 待开发 | - |
|
||||
| Week 2 Day 6 | L2 统计验证器 + L2.5 一致性取证 | ✅ 已完成 | 2026-02-17 |
|
||||
| Week 2 Day 7 | Skills 核心框架 | ✅ 已完成 | 2026-02-18 |
|
||||
| Week 2 Day 8 | DataForensicsSkill | ✅ 已完成 | 2026-02-18 |
|
||||
| Week 2 Day 9 | EditorialSkill 封装 | ✅ 已完成 | 2026-02-18 |
|
||||
| Week 2 Day 10 | ReviewWorker 改造 | ✅ 已完成 | 2026-02-18 |
|
||||
| **Week 3** | **统计验证扩展 + 用户体验优化** | **✅ 已完成** | **2026-02-18** |
|
||||
| Week 4 | 功能测试 + Bug 修复 | 📋 待开始 | - |
|
||||
|
||||
**V2.0 核心功能**:
|
||||
**V2.0 核心功能(已完成)**:
|
||||
- **L1 算术验证**:行列加总、百分比验证
|
||||
- **L2 统计验证**:CI↔P 一致性、T检验逆向、卡方检验
|
||||
- **L2 统计验证**:CI↔P 一致性、T检验逆向、卡方检验(含 subrow 精确高亮)
|
||||
- **L2.5 一致性取证**(终审提权):SE三角验证、SD>Mean检查
|
||||
- **Skills 架构**:Skill Registry、Skill Executor、Journal Profiles
|
||||
- **负号归一化**:6 种 Unicode 负号变体支持
|
||||
- **文件格式提示**:PDF/.doc 无法数据验证的用户提示
|
||||
|
||||
**Week 3 完成内容(2026-02-18)**:
|
||||
- ✅ 负号归一化(防止 float() 崩溃)
|
||||
- ✅ T 检验验证增强(智能样本量提取)
|
||||
- ✅ SE 三角验证增强(subrow 支持)
|
||||
- ✅ CI vs P 值验证增强(subrow 支持)
|
||||
- ✅ 前端翻译映射更新(6 种新 IssueType)
|
||||
- ✅ 文件格式提示(Header、ReportDetail、TaskDetail)
|
||||
|
||||
### 后续版本(V2.1+)
|
||||
|
||||
- [ ] Week 4 功能测试和 Bug 修复
|
||||
- [ ] ANOVA 验证(多组比较)
|
||||
- [ ] 配对 T 检验验证
|
||||
- [ ] 非参数检验(Mann-Whitney、Wilcoxon)
|
||||
- [ ] .doc 格式支持(Pandoc 方案评估)
|
||||
- [ ] Profile 管理 UI(期刊配置界面)
|
||||
- [ ] PDF报告导出优化
|
||||
- [ ] PICO卡片UI实现
|
||||
- [ ] 历史归档UI实现
|
||||
- [ ] L3 高级逻辑推理验证
|
||||
- [ ] 登录页面(独立产品时)
|
||||
- [ ] 审稿人管理系统
|
||||
- [ ] 多轮审稿流程
|
||||
- [ ] 期刊库管理
|
||||
- [ ] 独立产品打包
|
||||
|
||||
---
|
||||
|
||||
**文档版本:** v3.2
|
||||
**最后更新:** 2026-01-10
|
||||
**当前状态:** ✅ Phase 1-6 完成,模块95%可用,Schema已隔离
|
||||
**下一步:** 生产环境部署测试
|
||||
**文档版本:** v5.1
|
||||
**最后更新:** 2026-02-18
|
||||
**当前状态:** 🚀 V2.0 "数据侦探" Week 3 完成,Skills 架构 + 统计验证 + 用户体验优化
|
||||
**下一步:** Week 4 功能测试和 Bug 修复
|
||||
|
||||
Binary file not shown.
184
docs/03-业务模块/RVW-稿件审查系统/06-开发记录/2026-02-18 统计验证扩展与用户体验优化.md
Normal file
184
docs/03-业务模块/RVW-稿件审查系统/06-开发记录/2026-02-18 统计验证扩展与用户体验优化.md
Normal file
@@ -0,0 +1,184 @@
|
||||
# RVW V2.0 开发记录 - 2026-02-18
|
||||
|
||||
> **日期:** 2026-02-18
|
||||
> **阶段:** Week 3 - 统计验证扩展与用户体验优化
|
||||
> **开发者:** AI Assistant
|
||||
> **状态:** ✅ 完成
|
||||
|
||||
---
|
||||
|
||||
## 📋 今日完成内容
|
||||
|
||||
### 1. 负号归一化功能 ✅
|
||||
|
||||
**问题背景:**
|
||||
- Word 文档中的负号可能是多种 Unicode 字符(数学减号 `\u2212`、En Dash `\u2013`、Em Dash `\u2014` 等)
|
||||
- Python 的 `float()` 无法解析这些特殊字符,导致验证失败
|
||||
|
||||
**实现内容:**
|
||||
|
||||
| 文件 | 修改 |
|
||||
|------|------|
|
||||
| `extraction_service/forensics/extractor.py` | 新增 `_clean_statistical_text()` 方法,在提取单元格时自动清洗 |
|
||||
| `extraction_service/forensics/validator.py` | 新增 `_clean_number_string()` 和 `_safe_float()` 辅助函数 |
|
||||
|
||||
**覆盖的特殊字符:**
|
||||
|
||||
| Unicode | 字符 | 名称 | 清洗为 |
|
||||
|---------|------|------|--------|
|
||||
| `\u2212` | − | 数学减号 | `-` |
|
||||
| `\u2013` | – | En Dash | `-` |
|
||||
| `\u2014` | — | Em Dash | `-` |
|
||||
| `\u2264` | ≤ | 小于等于 | `<=` |
|
||||
| `\u2265` | ≥ | 大于等于 | `>=` |
|
||||
| `\u00d7` | × | 乘号 | `x` |
|
||||
| `\u200b` | | Zero-Width Space | (删除) |
|
||||
|
||||
---
|
||||
|
||||
### 2. 统计验证方法扩展 ✅
|
||||
|
||||
#### 2.1 T 检验验证增强
|
||||
|
||||
**改进点:**
|
||||
- 智能样本量提取:支持 `(n=50)`、`n=50`、`(50例)` 等多种格式
|
||||
- 新增 `_extract_sample_sizes_from_header()` 和 `_extract_sample_sizes_from_row()` 方法
|
||||
- 支持括号格式的 SD:`45.2 (12.3)`
|
||||
- 支持多行单元格 subrow 精确高亮
|
||||
|
||||
#### 2.2 SE 三角验证增强
|
||||
|
||||
**改进点:**
|
||||
- 支持多行单元格的 subrow 精确定位
|
||||
- 遍历 P 值列每一行,分别验证
|
||||
- 显示友好的行描述(如变量名)
|
||||
|
||||
#### 2.3 CI vs P 值一致性验证增强
|
||||
|
||||
**改进点:**
|
||||
- 支持多行单元格 subrow 精确定位
|
||||
- 支持多个 CI/P 值对的验证
|
||||
- 使用 `_parse_pvalue_flexible` 灵活解析
|
||||
|
||||
---
|
||||
|
||||
### 3. 前端翻译映射更新 ✅
|
||||
|
||||
**文件:** `frontend-v2/src/modules/rvw/components/ForensicsReport.tsx`
|
||||
|
||||
新增/完善的问题类型中文翻译:
|
||||
|
||||
| 代码 | 中文描述 |
|
||||
|------|----------|
|
||||
| `ARITHMETIC_TOTAL` | 总计行错误 |
|
||||
| `STAT_CI_PVALUE_CONFLICT` | CI 与 P 值矛盾 |
|
||||
| `STAT_SD_GREATER_MEAN` | SD 大于均值 |
|
||||
| `STAT_REGRESSION_CI_P` | 回归 CI-P 不一致 |
|
||||
| `EXTRACTION_WARNING` | 提取警告 |
|
||||
| `TABLE_SKIPPED` | 表格跳过 |
|
||||
|
||||
---
|
||||
|
||||
### 4. 文件格式提示功能 ✅
|
||||
|
||||
**用户反馈:** 上传 PDF 文件后没有数据验证 Tab,需要提示用户
|
||||
|
||||
**实现内容:**
|
||||
|
||||
| 文件 | 修改 |
|
||||
|------|------|
|
||||
| `Header.tsx` | 上传按钮下方添加蓝色提示框,推荐 .docx 格式 |
|
||||
| `ReportDetail.tsx` | 非 docx 文件时显示黄色警告,解释为什么没有数据验证 |
|
||||
| `TaskDetail.tsx` | 同上 |
|
||||
|
||||
**提示内容:**
|
||||
- **上传时:** "推荐上传 .docx 格式文件,可获得完整的数据验证功能。PDF 和 .doc 格式仅支持稿约和方法学评审。"
|
||||
- **查看报告时:** "当前文件为 PDF/.doc 格式,无法进行数据验证。如需数据验证功能,请上传 .docx 格式文件。"
|
||||
|
||||
---
|
||||
|
||||
## 📊 当前统计验证能力总览
|
||||
|
||||
| 验证类型 | 方法 | 状态 |
|
||||
|----------|------|------|
|
||||
| **L1 算术** | 百分比 n(%) | ✅ |
|
||||
| **L1 算术** | Sum/Total 校验 | ✅ |
|
||||
| **L2 统计** | 卡方检验 P 值逆向验证 | ✅ + subrow |
|
||||
| **L2 统计** | T 检验 P 值逆向验证 | ✅ + subrow |
|
||||
| **L2 统计** | CI vs P 值逻辑一致性 | ✅ + subrow |
|
||||
| **L2.5 取证** | SE 三角验证 | ✅ + subrow |
|
||||
| **L2.5 取证** | SD > Mean 检查 | ✅ |
|
||||
|
||||
---
|
||||
|
||||
## 📁 修改的文件清单
|
||||
|
||||
### Python 后端
|
||||
- `extraction_service/forensics/extractor.py` - 负号归一化
|
||||
- `extraction_service/forensics/validator.py` - 统计验证扩展
|
||||
|
||||
### Node.js 后端
|
||||
- (无修改)
|
||||
|
||||
### 前端
|
||||
- `frontend-v2/src/modules/rvw/components/ForensicsReport.tsx` - 翻译映射
|
||||
- `frontend-v2/src/modules/rvw/components/Header.tsx` - 上传提示
|
||||
- `frontend-v2/src/modules/rvw/components/ReportDetail.tsx` - 格式提示
|
||||
- `frontend-v2/src/modules/rvw/components/TaskDetail.tsx` - 格式提示
|
||||
|
||||
---
|
||||
|
||||
## 📋 待完成工作
|
||||
|
||||
### V2.0 MVP 剩余任务
|
||||
|
||||
| 任务 | 优先级 | 状态 |
|
||||
|------|--------|------|
|
||||
| Week 4 功能测试 | P0 | 📋 待开始 |
|
||||
| Week 4 性能测试 | P1 | 📋 待开始 |
|
||||
| Week 4 Bug 修复 | P0 | 📋 待开始 |
|
||||
| Week 4 文档更新 | P1 | 📋 待开始 |
|
||||
|
||||
### V2.1 待开发功能
|
||||
|
||||
| 功能 | 说明 |
|
||||
|------|------|
|
||||
| ANOVA 验证 | 多组比较 P 值验证 |
|
||||
| 配对 T 检验 | 配对样本验证 |
|
||||
| 非参数检验 | Mann-Whitney, Wilcoxon |
|
||||
| .doc 格式支持 | 评估 Pandoc 替代方案 |
|
||||
| Profile 管理 UI | 期刊配置界面 |
|
||||
|
||||
---
|
||||
|
||||
## 💡 技术要点
|
||||
|
||||
### 负号归一化的重要性
|
||||
|
||||
```python
|
||||
# 未清洗时 float() 会崩溃
|
||||
float('−1.5') # ValueError: could not convert string to float
|
||||
|
||||
# 清洗后正常工作
|
||||
float('-1.5') # -1.5
|
||||
```
|
||||
|
||||
### Subrow 高亮原理
|
||||
|
||||
Word 表格中一个单元格可能包含多行数据(用换行符分隔),例如:
|
||||
|
||||
```
|
||||
| 变量 | P值 |
|
||||
|------|-----|
|
||||
| 年龄 | 0.82
|
||||
性别 0.01 <- 问题在这里
|
||||
BMI 0.95 |
|
||||
```
|
||||
|
||||
通过 `data-subcoord="R2C2S2"` 属性可以精确定位到第 2 行第 2 列的第 2 个子行。
|
||||
|
||||
---
|
||||
|
||||
**文档版本:** v1.0
|
||||
**创建日期:** 2026-02-18
|
||||
**下次更新:** Week 4 测试完成后
|
||||
137
docs/03-业务模块/RVW-稿件审查系统/06-开发记录/RVW V2.0 表格提取疑难杂症专项解决方案.md
Normal file
137
docs/03-业务模块/RVW-稿件审查系统/06-开发记录/RVW V2.0 表格提取疑难杂症专项解决方案.md
Normal file
@@ -0,0 +1,137 @@
|
||||
# **RVW V2.0 表格提取疑难杂症专项解决方案**
|
||||
|
||||
**问题焦点:** Word 表格“假行”现象(单元格内多段落)导致的提取错位
|
||||
|
||||
**核心策略:** 从“视觉模型”回归“DOM 深度解析”
|
||||
|
||||
**技术栈:** Python (python-docx)
|
||||
|
||||
## **1\. 核心判断:为什么不建议全量上视觉模型?**
|
||||
|
||||
您提到用视觉模型(Vision Model,如 GPT-4V, Qwen-VL)来识别,这听起来很诱人(所见即所得),但在**数据侦探**场景下有致命缺陷:
|
||||
|
||||
| 维度 | 视觉模型 (VLM/OCR) | 原生解析 (python-docx) | 结论 |
|
||||
| :---- | :---- | :---- | :---- |
|
||||
| **数值准确性** | **95%\~99%** (存在幻觉风险) | **100%** (直接读取 XML) | ❌ 审计场景不能有 1% 的误差 |
|
||||
| **小数点敏感度** | 可能漏读小数点 (0.05 \-\> 005\) | 绝对精准 | ❌ P 值验证的核心 |
|
||||
| **对齐能力** | 强 (能看懂视觉对齐) | 弱 (需算法辅助) | ✅ 视觉模型优势 |
|
||||
| **成本/速度** | 高/慢 (需 GPU 推理) | 极低/极快 (CPU 解析) | ❌ 影响并发性能 |
|
||||
|
||||
**决策:**
|
||||
|
||||
**“数据”必须信赖 XML(代码),“结构”可以用算法还原。** 我们不需要视觉模型来看数字,我们只需要一段更聪明的 Python 代码来拆解段落。
|
||||
|
||||
## **2\. 现象诊断:什么是“隐性多行”?**
|
||||
|
||||
在您的截图中,Word 表格的一行(Row)内部,用户使用了 **回车键 (Enter)** 或 **软回车 (Shift+Enter)** 进行了换行。
|
||||
|
||||
**python-docx 的默认行为:**
|
||||
|
||||
cell.text 会把这些段落拼接成一个字符串,例如 "DNT时间段\\n\<45 min\\n45\~60 min"。前端 HTML 渲染时,如果没有处理 \\n,或者对应列的行数不匹配,就会导致错位。
|
||||
|
||||
## **3\. 解决方案:行分裂算法 (Row Explosion)**
|
||||
|
||||
我们需要在提取阶段,检测这种情况,并将“逻辑上的一行”分裂成“视觉上的多行”。
|
||||
|
||||
### **3.1 算法逻辑**
|
||||
|
||||
1. **扫描 (Scan)**:遍历表格的每一行。
|
||||
2. **检测 (Detect)**:检查该行每一列的 **段落数量 (Paragraph Count)**。
|
||||
* 例如:Col 1 有 4 个段落,Col 2 有 4 个段落,Col 3 只有 1 个段落(如 P 值)。
|
||||
3. **分裂 (Explode)**:
|
||||
* 取最大段落数 max\_para (如 4)。
|
||||
* 如果 max\_para \> 1,则将此行**分裂**为 4 个新行。
|
||||
4. **填充 (Fill)**:
|
||||
* 对于原本有多段落的列:按顺序填充到新行。
|
||||
* 对于只有 1 个段落的列(如 P 值 0.001):
|
||||
* *策略 A(重复)*:每行都填 0.001。
|
||||
* *策略 B(首行/合并)*:只填第一行,后面留空(前端处理为合并单元格)。
|
||||
|
||||
### **3.2 代码实现 Demo**
|
||||
|
||||
请让 Python 工程师在 DocxTableExtractor 中加入以下逻辑:
|
||||
|
||||
from docx import Document
|
||||
import pandas as pd
|
||||
|
||||
def explode\_word\_table\_rows(table):
|
||||
"""
|
||||
高级表格提取:处理单元格内的多段落(隐性多行)
|
||||
"""
|
||||
structured\_data \= \[\]
|
||||
|
||||
for row in table.rows:
|
||||
\# 1\. 获取该行每一列的段落内容列表
|
||||
\# cells\_content 结构: \[ \['DNT时间段', '\<45min', ...\], \['1299', '881', ...\], \['X2=..'\] \]
|
||||
cells\_content \= \[\]
|
||||
for cell in row.cells:
|
||||
\# 过滤掉空段落,获取真实文本行
|
||||
paras \= \[p.text.strip() for p in cell.paragraphs if p.text.strip()\]
|
||||
if not paras:
|
||||
paras \= \[""\] \# 保持占位
|
||||
cells\_content.append(paras)
|
||||
|
||||
\# 2\. 计算该行“分裂”的最大高度
|
||||
max\_height \= max(len(c) for c in cells\_content)
|
||||
|
||||
\# 3\. 如果是标准单行,直接添加
|
||||
if max\_height \<= 1:
|
||||
flat\_row \= \[c\[0\] if c else "" for c in cells\_content\]
|
||||
structured\_data.append(flat\_row)
|
||||
continue
|
||||
|
||||
\# 4\. 执行分裂 (Row Explosion)
|
||||
\# 针对每一层(visual\_row\_index),构建一行数据
|
||||
for i in range(max\_height):
|
||||
new\_row \= \[\]
|
||||
for col\_idx, cell\_paras in enumerate(cells\_content):
|
||||
\# 策略:如何填充?
|
||||
if len(cell\_paras) \> 1:
|
||||
\# 情况 A:该列有多行,按顺序取
|
||||
\# 如果当前层级超过了该列的行数,填空(或填最后一行)
|
||||
val \= cell\_paras\[i\] if i \< len(cell\_paras) else ""
|
||||
else:
|
||||
\# 情况 B:该列只有一行(通常是统计值 P值)
|
||||
\# 只有第一行填值,模拟“合并单元格”的视觉效果
|
||||
\# 或者:val \= cell\_paras\[0\] (全部重复填充) \-\> 方便后续计算
|
||||
val \= cell\_paras\[0\] if i \== 0 else ""
|
||||
|
||||
new\_row.append(val)
|
||||
structured\_data.append(new\_row)
|
||||
|
||||
return pd.DataFrame(structured\_data)
|
||||
|
||||
\# 使用示例
|
||||
\# doc \= Document("sample.docx")
|
||||
\# df \= explode\_word\_table\_rows(doc.tables\[0\])
|
||||
\# print(df)
|
||||
|
||||
## **4\. 前端渲染的配合**
|
||||
|
||||
为了让“数据侦探”的高亮定位准确,后端返回的数据结构必须包含**分裂后的坐标映射**。
|
||||
|
||||
**推荐的数据结构升级:**
|
||||
|
||||
{
|
||||
"row\_id": "r4\_exploded\_0", // 原始第4行,分裂后的第0子行
|
||||
"is\_virtual": true, // 标记这是分裂出来的行
|
||||
"cells": \[
|
||||
{ "text": "\<45 min", "source\_cell": "R4C1", "paragraph\_index": 1 },
|
||||
{ "text": "881 (46.59)", "source\_cell": "R4C2", "paragraph\_index": 1 },
|
||||
{ "text": "", "source\_cell": "R4C3", "is\_merged\_placeholder": true } // P值列留空
|
||||
\]
|
||||
}
|
||||
|
||||
**前端展示逻辑:**
|
||||
|
||||
* 当后端返回 is\_merged\_placeholder: true 时,前端渲染时不显示内容,或者通过 CSS 渲染为合并单元格的样式(即不画上边框)。
|
||||
|
||||
## **5\. 总结**
|
||||
|
||||
1. **别用视觉模型**:准确率风险太大,得不偿失。
|
||||
2. **用代码“分裂”段落**:Word 的 cell.paragraphs 是您的救星。
|
||||
3. **对齐策略**:通常临床表格中,如果一列有多行,另一列只有一行(如 P 值),那一行 P 值通常是对齐第一行或者居中的。在做\*\*数据验证(L1/L2)\*\*时,我们需要编写逻辑:*“如果检测到分裂行,且 P 值列为空,自动向上寻找最近的一个 P 值作为本行的验证依据。”*
|
||||
|
||||
**实施建议:**
|
||||
|
||||
请 Python 工程师立即测试上述 explode\_word\_table\_rows 逻辑。这能解决您 90% 的“HTML 只有一行”的问题。
|
||||
201
docs/03-业务模块/RVW-稿件审查系统/06-开发记录/临床统计特殊符号提取白皮书.md
Normal file
201
docs/03-业务模块/RVW-稿件审查系统/06-开发记录/临床统计特殊符号提取白皮书.md
Normal file
@@ -0,0 +1,201 @@
|
||||
# **临床统计特殊符号提取白皮书**
|
||||
|
||||
**用途:** 指导 Python (python-docx) 在提取 Word 表格时进行字符清洗和标准化。
|
||||
|
||||
**核心痛点:** 同一个数学含义,可能由多种不同的编码方式表示。
|
||||
|
||||
## **1\. 希腊字母类 (Greek Letters)**
|
||||
|
||||
这是最容易出现乱码或识别错误的重灾区。
|
||||
|
||||
|
|
||||
|
||||
| **符号** | **含义** | **常见 Unicode** | **Word 中的潜在坑 (Legacy Fonts)** | **处理建议** |
|
||||
|
||||
| ![][image1] | **卡方检验** | \\u03c7 (χ) \+ \\u00b2 (²) | 1\. 字体设为 "Symbol" 的 'c' 2\. 公式编辑器对象 | **正则匹配**:\[\\u03c7\\u03a7\]2? **关键词**:chi-square, chi |
|
||||
|
||||
| ![][image2] | 显著性水平 | \\u03b1 | 字体设为 "Symbol" 的 'a' | 替换为 alpha |
|
||||
|
||||
| ![][image3] | 回归系数/功效 | \\u03b2 | 字体设为 "Symbol" 的 'b' | 替换为 beta |
|
||||
|
||||
| ![][image4] | 总体均值 | \\u03bc | 字体设为 "Symbol" 的 'm' | 替换为 u 或 mean |
|
||||
|
||||
| ![][image5] | 总体标准差 | \\u03c3 | 字体设为 "Symbol" 的 's' | 替换为 std |
|
||||
|
||||
| ![][image6] | 变化量/差值 | \\u0394 (大写) | 字体设为 "Symbol" 的 'D' | 替换为 delta |
|
||||
|
||||
| ![][image7] | 相关系数 | \\u03c1 | 字体设为 "Symbol" 的 'r' | 替换为 rho |
|
||||
|
||||
**⚠️ 提取陷阱:** 很多老旧的 Word 文档(特别是中文期刊投稿)喜欢用 **Symbol 字体**。在 python-docx 提取 text 时,你可能会读到一个普通的英文字母 c,但用户看到的是 ![][image8]。
|
||||
|
||||
* **解决方案**:检查 run.font.name。如果字体是 Symbol,需要建立映射表(c \-\> χ, a \-\> α)。
|
||||
|
||||
## **2\. 数学运算符类 (Operators)**
|
||||
|
||||
| **符号** | **含义** | **常见 Unicode** | **Word 变体** | **处理建议** |
|
||||
|
||||
| ![][image9] | **加减/标准差** | \\u00b1 | \+/-, \+ / \- | 统一标准化为 \\u00b1 |
|
||||
|
||||
| ![][image10] | 小于等于 | \\u2264 | \<=, \=\< | 统一为 \<= |
|
||||
|
||||
| ![][image11] | 大于等于 | \\u2265 | \>= | 统一为 \>= |
|
||||
|
||||
| ![][image12] | 不等于 | \\u2260 | \!=, \<\>, /= | 统一为 \!= |
|
||||
|
||||
| ![][image13] | 约等于 | \\u2248 | \~, \= | 统一为 \~= |
|
||||
|
||||
| ![][image14] | **负号/减号** | \\u2212 (Minus) | \\u002d (Hyphen), \\u2013 (En Dash) | **极高危!** 必须统一替换为标准连字符 \- (\\u002d),否则 float() 转换会报错 |
|
||||
|
||||
| ![][image15] | 乘号/交互项 | \\u00d7 | x, X, \* | 统一为 x |
|
||||
|
||||
**⚠️ 提取陷阱:** **“负号”是数据清洗中最大的坑**。Word 会自动把连字符(Hyphen)转成破折号(Dash)或数学减号(Minus)。
|
||||
|
||||
* python 代码:value.replace('\\u2212', '-').replace('\\u2013', '-')
|
||||
|
||||
## **3\. 统计学专用标记 (Statistical Notations)**
|
||||
|
||||
| **符号** | **含义** | **形式** | **提取难点** |
|
||||
|
||||
| ![][image16] | **样本均值** | x 上加横线 | 通常是 **Word 公式对象 (OMML)** 或 **域代码 (EQ)**,python-docx 的 .text **读不出来横线**,只能读到 x。 |
|
||||
|
||||
| ![][image17] | 样本率 | p 上加尖帽 | 同上。 |
|
||||
|
||||
| ![][image18] | 决定系数 | R \+ 上标 2 | python-docx 默认读成 R2。**这通常可以接受**。 |
|
||||
|
||||
| ![][image19] | 下标 (如 ![][image20]) | 文本 \+ 下标 | python-docx 默认读成 Xsub。需要识别 font.subscript 属性。 |
|
||||
|
||||
**⚠️ 提取陷阱:** 对于 ![][image16] 这种带修饰符的字符,python-docx 可能只能提取到底座字符 x。
|
||||
|
||||
* **策略**:对于数据侦探来说,通常我们关注的是表头里的 Mean 或 Average 关键词,而不是符号。如果表头只有 ![][image16],可能需要结合上下文推断。
|
||||
|
||||
## **4\. 拉丁字母的特殊含义 (Latin Context)**
|
||||
|
||||
虽然是普通字母,但在统计学上下文中具有特殊含义,通常以**斜体 (Italic)** 出现。
|
||||
|
||||
| **符号** | **含义** | **易混淆点** |
|
||||
|
||||
| ![][image21] | t 检验统计量 | 容易混淆为时间单位 t (time) 或 吨 (ton) |
|
||||
|
||||
| ![][image22] | F 检验统计量 | 女性 (Female) |
|
||||
|
||||
| ![][image23] | Z 检验统计量 | \- |
|
||||
|
||||
| ![][image24] | P 值 (概率) | 磷 (Phosphorus) |
|
||||
|
||||
| ![][image25] | 样本量 | 牛顿 (Newton) |
|
||||
|
||||
| ![][image26] | 相关系数 | 半径 (radius) |
|
||||
|
||||
| ![][image27] | 回归系数 | \- |
|
||||
|
||||
| ![][image28] | 优势比 | 手术室 (Operating Room), 或者 (or) |
|
||||
|
||||
| ![][image29] | 风险比 | 心率 (Heart Rate) |
|
||||
|
||||
| ![][image30] | 置信区间 | 心脏指数 (Cardiac Index) |
|
||||
|
||||
**⚠️ 提取策略:** 不能只看字符,要看**组合**。
|
||||
|
||||
* P 单独出现且数值在 0-1 之间 \-\> P 值。
|
||||
* t 单独出现且数值 \> 0 \-\> t 值。
|
||||
* CI 后面跟着括号 (1.2-3.4) \-\> 置信区间。
|
||||
|
||||
## **5\. Python 字符串清洗工具箱 (Cleaner Utils)**
|
||||
|
||||
建议在 DocxTableExtractor 中集成以下清洗函数:
|
||||
|
||||
import re
|
||||
|
||||
def clean\_statistical\_text(text):
|
||||
if not text:
|
||||
return ""
|
||||
|
||||
\# 1\. 归一化负号 (CRITICAL)
|
||||
text \= text.replace('\\u2212', '-').replace('\\u2013', '-').replace('\\u2014', '-')
|
||||
|
||||
\# 2\. 归一化卡方 (Chi-square)
|
||||
\# 处理 Symbol 字体的 'c'2 (需配合 run.font 检查,此处仅处理 Unicode)
|
||||
text \= text.replace('\\u03c72', 'chi-square')
|
||||
text \= text.replace('\\u03c7\\u00b2', 'chi-square')
|
||||
text \= re.sub(r'\[Xxχ\]\\^?2', 'chi-square', text) \# 正则匹配常见变体
|
||||
|
||||
\# 3\. 归一化加减号
|
||||
text \= text.replace('\\u00b1', '+/-')
|
||||
|
||||
\# 4\. 归一化比较符
|
||||
text \= text.replace('≤', '\<=').replace('≥', '\>=')
|
||||
|
||||
\# 5\. 去除不可见字符 (Zero-width space 等)
|
||||
text \= re.sub(r'\[\\u200b\\u200c\\u200d\\ufeff\]', '', text)
|
||||
|
||||
return text.strip()
|
||||
|
||||
## **6\. 总结**
|
||||
|
||||
在 Word 提取中,最大的“鬼怪”不是复杂的 ![][image1],而是:
|
||||
|
||||
1. **假的负号**(导致 float() 崩溃)。
|
||||
2. **Symbol 字体**(导致 ![][image2] 变成 a)。
|
||||
3. **多段落换行**(上一节已解决)。
|
||||
|
||||
只要处理好这三点,99% 的统计表格都能被正确解析。
|
||||
|
||||
[image1]: <data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABQAAAAYCAYAAAD6S912AAABVElEQVR4Xu2Tu0rFQBCGIygICqeKwSL3VCKCBAR9DjsbSwttPKDYaWFnYyt2FoL6CBaWwmlObSW+gLUIR7+R2RAHi2wEKz8YdjLz75+9JEHwTxfSNL0mPjRWbd+LLMuOq6oKJS+KYiCmZVkuWF1nMLgn3lrPkyRJDtuaX6HbXrf1XmA0ZHVPtt6LPM/xS0e23gu9jCv3TL7f7gcse4l44QbPvzVaMGlRxrquZ8jH6Dc19oi6EcrbMLpRoRxwc4MO2Z47K/oPqmsiDMN5O+cL+cZEEMfxsqvp9l7bOh+m9a2P8qCrfrciLzB4FlPSKcaJjFbjBSZb7mxkhbbvTRRFc2p4anu9wGikhr0vogGTW2KDuBNTbn3WajqDwRkG25LzzRW6yl0j64ZMxOzI1MRQbtkP/oI1Jl7YOi/YEVNWe0CekV9azY9guGJrDj3PMX4ntvdnfALEtFcZX4GOowAAAABJRU5ErkJggg==>
|
||||
|
||||
[image2]: <data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAwAAAAZCAYAAAAFbs/PAAAAvUlEQVR4XmNgGAWjgDCQl5dfB8T/gXgWiJaTk1uBrgYOgJJxIEXS0tIyID6QbQXii4qK8kDlJ4iLi3ODFQMlgqGKhZHMAIn/BuITUPYvmDgj1BmnEUohACQGkpOVldVRUFBogAlGQ93rgqocLHcAatgnuCBQ50KQINx9SACmQVFR0QxZsBUkiKQODoDiSzHkZGRkOEGCQFoFWRxo8ymg+BaoBkagk/OQJSWgboXh7SBFIDkg+zVUrAeuYaQBAHSNOPrqSBJoAAAAAElFTkSuQmCC>
|
||||
|
||||
[image3]: <data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAwAAAAYCAYAAADOMhxqAAABD0lEQVR4Xu2QPYrCUBSFI6PgoIVNjJCYl5BCrANWU86AC7BxI4L7cANWNrZiYyG6ATsrl2BpI/hzzuMF8+4kVlPOgUuS7557ct9znH+9lCRJVyl1QG08z2vIvqUoinaosfn8wNDD9/3AMmWCcRrH8SDPMLBFLfNMKwiCTzYlBzsjaC45GzOkewJXuRLO1BZcD1z45L5hGI6Q2gI74X0ivRSTVvhDj4mm7qibNGoh5Qv1LTkGjqiF5GwssUJdch42W9VSIXQ03xf1uP9GQsqcZWVB7q4K7h/8hwOu6zatBhN4G2ma1jKG3TsmvZ/3agFeaebTmFhrtCrSy6T6rx3fifsX3X+pVMn9lwrmoWR/qifuh0EraUB3jQAAAABJRU5ErkJggg==>
|
||||
|
||||
[image4]: <data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAwAAAAXCAYAAAA/ZK6/AAAA0klEQVR4XmNgGAUjEMjLy09TUFAwQBYTFxfnBopLIouBAVAhB1DiPxB7IosD+b+BeCuyGEyiFaQBWUxGRkYaJCYnJ6eELA4GQInnIIwmVoVuCBxAnTMJTewJEL9FFgMDkJUgDYqKivrI4siGAOnDyBJg90tLS8sgiQXDDAHSikBDy5E1gNwPMu00iA8MsQogexZIDKjQFYhvAYUZkTWAFLcCFSoAJUOMjY1ZoVKMQKEAIM0MVwxzP9agwwZg7kcXxwnkIUGHEv54AShJMCB7iAAAALSHNqYCnl/cAAAAAElFTkSuQmCC>
|
||||
|
||||
[image5]: <data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAwAAAAXCAYAAAA/ZK6/AAAAlklEQVR4XmNgGAUjFMjKykrJycmFoGMpKSkRFIXy8vKaQPwfD+6FK1ZQUDgFFPiKpB9kQDRIIbIYGCgqKoqDJIBO0UEWB4oFYdUAFJyETQIothWbOEjiABB/wiIOcvcUdHGQ+xuAEm+RxYChUgYU+4sshgJAkkBFMUAmI5CeDuR/BLHR1aEAoEJtoG0BxsbGrOhyww0AAPUbLAw2jOhAAAAAAElFTkSuQmCC>
|
||||
|
||||
[image6]: <data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAXCAYAAAAC9s/ZAAAA2klEQVR4XmNgGAW0BfLy8v/l5OS00cWJAkDNOSADgPg1uhxRAKr5H4iWkZFRQZfHCxQUFDKAuAGII6AGPUFXgxeANCGzoVgRWQ1OALW1GcYHBmIM1IDbyOpwAqjtjOhiIKyoqCiOLI4BgIqCgDZOwCIOi5Hr6HIoAKjgHwOa7TAAc4WysrIYuhwYAG12BSqYgi4OA0D5cpABwDA6hS4HBkDJn0CKBV0cGcBcoaSkxI8uYQnEy1EEsQBQ2oAacgBFAmQ7zHRiMTB1coI1A6NGH12SSLwexRUjGAAAzuRaDctwFcUAAAAASUVORK5CYII=>
|
||||
|
||||
[image7]: <data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAoAAAAYCAYAAADDLGwtAAAAv0lEQVR4XmNgGAVDCMjJyYUA8SMFBYVGdDkwMDY2ZpWXl/8PVOQK4gMVVgL5P9HVMUAVxcD46urqvCAxRUVFfbgioO4GkCBcgAGsUROq2RdZ8B8QH0AoA4tVoZgoLi7ODRKQlZX1Q1P4HMUWkAKQgKioKA9MTEZGhhMkBsTFcIUgK6Em2iKJvQe6+xBcEVQQpBOk+BiU/R+oyB9FEcg6qGko7sMAQEWe6O7DCoCKtoIUootjAKCiJ0A8BV2cbAAA6UU1lUA45VQAAAAASUVORK5CYII=>
|
||||
|
||||
[image8]: <data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAwAAAAYCAYAAADOMhxqAAAA6UlEQVR4XmNgGAUjCMjJyWkB8SMFBYWJ6HIwIC8vLwlmKCkp8QMVrjI2NmYFCv4H4p9oahkUFRXlgQbeQhdnAGpsAGmSlZXVgYmBDASKvUdWhwxYoLacAHGgtv5GV4QCgAoegDQBmYxA+h+IRleDAoCKoqG2/AfZgC6PAcTFxbmhGlrR5bACoMLTUA04PQoHQEWrgdgKiNeANAFDjQNdDRwAFfQAFSSA2MAwV4LakoOmDAJAEkDFlWhiIA2gUEIFwFg0A0rMQhcHGpAB0gS0rQzIVgCy54AlgBr00NTCAdQ/F4DqG9HliAYAE5c3TLQwFisAAAAASUVORK5CYII=>
|
||||
|
||||
[image9]: <data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAA8AAAAXCAYAAADUUxW8AAAASUlEQVR4XmNgGAWDACgqKorLycmVo4sTBeTl5SVHNWMB4uLi3ECF/0nBCgoK8ejmoAB5YmzGBYazZmDIcaCHJiEMNDQM3ZyRAgBYPjDZl5qDigAAAABJRU5ErkJggg==>
|
||||
|
||||
[image10]: <data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAA8AAAAXCAYAAADUUxW8AAAApUlEQVR4XmNgGAWDGKirq/OiixEE8vLyTkD8X05OLhddDicAakgCaVJQUPBHl8MJgBpqoJrM0eVwAqCz5oM0AbEiuhwuwAhUfByIf0lLSwujS+IFMH+BAgVdjmgA9F8E1J8R6HJEA1AgQaMlH12OaAAKNKh3pqLLEQ1UVFT4gAZ8A+J16HJEAxkZGU6gAVfRxQcIiIqK8gCdI0kMVlZWFqOeZnIBAHeIKM/15BGyAAAAAElFTkSuQmCC>
|
||||
|
||||
[image11]: <data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAA8AAAAXCAYAAADUUxW8AAAAuElEQVR4XmNgGAV0BlJSUlzoYkQDRUVFdXl5+f9A3I0uRzSQlpYWBhrwC4i3A7mM6PJEAXFxcW6gAR+B+DyQy4wuTxQwNjZmBRrwEIhfycjIcKLLEwsYgQYcB7kG5Cp0SYJARUWFD6j5GxBvQ5fDCYCKFUExIScnNx9dDicARp8eNPpq0OVwAgUFBX+oTXHocjgBUEMS1CYndDm8AGjLAlAqQxcf5EBUVJQH6FdJYrCysrIY9TSTCwAyxylNVwCyZAAAAABJRU5ErkJggg==>
|
||||
|
||||
[image12]: <data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAA8AAAAXCAYAAADUUxW8AAAAwElEQVR4Xu2TLQ7CQBSESwICjUDsPxa5GknCETgCN+AaaIJB4TBcgBNU4TgECUkFwTBFEDKhdAyOLxnzZee9Npstij+/I4RwZScRY5wjM/YS2HpnJ4GNC+/9lL0Ett7YSWDj0jk3YS+BrRW7dzo4sG5IiezZG2MsD2HqoReWEvjXFTJmzzR9dvXBPYOhIx7yAgc2SGKv0EXxzFICxV1Kaci+lZxzD+UTewkUD7jDAftWrLV9lI/sJfBytvUA9t94AERtLzqe3MJWAAAAAElFTkSuQmCC>
|
||||
|
||||
[image13]: <data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAA8AAAAXCAYAAADUUxW8AAAAyElEQVR4XmNgGAWjgBSgqKhoJisrq4Mujg6kpKRE4BygBhN5efn/QPwOiv/LycmVIamHAwUFBQ6gfDRcAMh5jyQPE5sGMkRaWloGTfyukpISP5gjKirKA7TZFFkBDMjIyHACFf+CugqMgWr90NVRBoCmHoeZDvTXKXR5ZACUT4dzgBquA3EykmQC1JBIuCIoAAakKzxGoKGXg6YGDEAuABkC1BAD9L8qkL1fHjlwQZqB8SuPpAcFgAINqHkHUNNrIC5Glx8FJAAAcQAtugT4oBQAAAAASUVORK5CYII=>
|
||||
|
||||
[image14]: <data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAA8AAAAXCAYAAADUUxW8AAAAJUlEQVR4XmNgGAWjYBRgB6Kiojzy8vKSxGBlZWUx6mkeBcMeAAA77grNb59DWgAAAABJRU5ErkJggg==>
|
||||
|
||||
[image15]: <data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAA8AAAAXCAYAAADUUxW8AAAAYElEQVR4XmNgGAWjgFggJyenJC4uzo0ujgzk5eWz0MXgACj5V0lJiR9dHASAcl9xycEAIzYDQBplZGSEkMVwARQDSNEIA2ADyNEIAmRrJtvZ5AcYNo0wgDeqKE4kIwQAALb2HpmNGUynAAAAAElFTkSuQmCC>
|
||||
|
||||
[image16]: <data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAsAAAAXCAYAAADduLXGAAAAwUlEQVR4XmNgGAWUAgUFBQc5ObkQbBgozUK+YlIBI9CUBfLy8idkZGQ4QQJA9lQQH8hkhqtSUlLiByp8BGMDFfwH4q8gTUB6NhA/hysGScA5ED5IcauoqCgPlH0dLgnyHJJCTZACaWlpGbgCXACocBJIMbo4VgBU+BaIn6CLgwHUEyB3TQJyGWHuhckDnRgB5FuCOcBQ8AUpANFAHANVHA2SMzY2ZgWy38M0ggDItF9QRd1Am4Sg7P9AU08hKxyKAAAX4zkcMt6ZpQAAAABJRU5ErkJggg==>
|
||||
|
||||
[image17]: <data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAoAAAAWCAYAAAD5Jg1dAAAA8klEQVR4XuVROwrCQBRMQEGxEEGM5P+xsV7sxAtIbmCvtSew8BiClzJlGlsLK0vRmbC7bDaNvQND9s2bt2/IOs4/QAjRt7UOkiR5gB/P80Z2TwOGJ7gNw3BIc57nY9vTrINhYUhuHMelUf+INE3nmLxnWbZhXRRFhLU1eNYmrApgPIEDZmJG1cOwoIZjj8UVnERRtKLo+/5UGVEvqXFAac2AnNbAln3HCOEFVoaPWqVXS7gUeIPho5GZb1pQofFdKw1DB2qtp1T5wDcbqHes+TraRMh8tRwq8R9nLYNEkw882o0WmEuuCexeC/I2xYvdV/gC3FlC59JuS4oAAAAASUVORK5CYII=>
|
||||
|
||||
[image18]: <data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABcAAAAYCAYAAAARfGZ1AAABZklEQVR4Xu1TsUrEQBCNoIWoZYhFkk1SGK5SCFj4B4dgIRYnWFgINn6F32BlY+FPWFikt7W0UcRCK0EsDg59T2d1bi7KRe28B0N23pu8nd1MgmCCv0Ycx7POuQHiBXENasrW/BgwvPLrNE1PkD9o/Vdgx1mW9WTdYW5rPgDxEB3cyDEZd5Lfeg5m2/Y9IkmSjW/NPbyR5bHRDnk+rQZ+kOf5quWHEIbhvJjXVnNydPf+8TR/jhOtaK4RKOyKQbdBO5DOzxR3DONFrvHc/6xuALugQRRFcw3a29hVVTXDHGa7yPew2RbDnmgE0vXQfeMuI3B9xGOgZtnXqvjanN1KUZ9TIpPyRK4oiiVb3wp+nJy5b+RH5PlHar4VYFDTxN43P5Rsuqb5VhCDkfkGd0Eed79stbFQluWCmNdW85vyw1ptLMhY0WTTat6cDUj+bGsagcJT/7IOSNOqZl34S8Q9oqMsJvgveAWoLXtEEfJkvQAAAABJRU5ErkJggg==>
|
||||
|
||||
[image19]: <data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACUAAAAYCAYAAAB9ejRwAAAB6ElEQVR4Xu1VPUsDQRC9oIKiYCHhilyySbBULA4LxdJCBLEVbOwEsTKo2Ig/wEoEQQRLEaytLPwHWiiCICQSAglosLAKfryXzCabVQvJmVjcg+Fm583Ovd2d23OcECFC/AMkk8n5RCLxqJT6MCynefjPFndtzv9TxOPxEXlp3oyn0+lBxN7gdpnxtkHvBoVIKILxO59mXluBo9wRYfscc4dc1+2389oKiOrVuwUrx2Ixz87pCCDmTkTN2lzHADEXIiprcx0BhOzietjQR+h5Xp+d0wIiaI8l1rWJHyETDsVvaviggAVPoeatHf8WSJzksemx2fBmXqtAvTPUXrbjX4AjGsYK7u24ajT8hM0RcqmW5LjPGcNzFS89RexA0njP1RcGvwK7gWX4F9HxJoDY5iR8+kM2h+IzIqpgcwTiT3xiUaPwi05NAEVx3iU51PdV47i6RWD1IoafS6VSY8JVAw/ywrrpZOHLNg87qReo5RQlXnKa5+b5y6IPUcf6uBAbV0Y/wa+An9bjIMD/YISXK4q/wI40QaGG/4rdcMXfg60JVd21QL9uFsQOLIi/qKTvotHoAHeAvu/7PVogdmSTgvCc4xhzt5R86YEBBVdgBbF1i+Plm4cdqVpTZ51GH7EtrmAZc06I3+AT7/CY0r/wI/8AAAAASUVORK5CYII=>
|
||||
|
||||
[image20]: <data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAB8AAAAYCAYAAAACqyaBAAABdElEQVR4Xu2UvUrEQBSFN6gg+IdoCCQmk4RASotoZ6eNjYUICj6Aj2DjE1iJrZWFiA9gYSfa+AI+g6VYWCnGc+UOTK6z2SyONu4Hw84992ROspNJrzdixL9EKXWBUXcZ8lpntAWkabqL3qvUnRCG4SKH38oe4fv+NHr3UncCnuyAwuM43jJkj0JpkiTJPDznRs8dCH6kcB1GIGwPoWtcjqGe1D2nyP0uimKW63HD5h5jv+X4kF7n2PY7iqIlaNemj/UF6C9SN8E6q8YDnMl+A8X7XZbljKFtYr83hO8Y2rpqOXJ5ns+hf6Rr8uKaE9PTQN+l1G1goaotPMuyZVqLboJqzE/7+gedb8mgcAmti/Eg9S+w2CGHb8uejWHC+cNUV1U10WhgkRsOleOyYRQMEw7f27fgn9A1HJ4n/Hg8d/NZtoXTEYX2rGsc3Tv4dvRQHd+nVsT21PS+kI7wFdTvQRBM8c1Zfb8GQval9ld4+KuvpNiPT1p0hAlGoqcTAAAAAElFTkSuQmCC>
|
||||
|
||||
[image21]: <data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAcAAAAYCAYAAAA20uedAAAAkElEQVR4XmNgGJ5AXl7+LRCfQBcHAUagxH8gLkKXYJCTkzMGSSoqKoojCzoDcQhQYh9IEsQGYbCkgoJCAFTyDxA/BLFBYnDdDFD7gILpyIJgALNPSkpKBF0OJDkfJIkuDgZAia9AfBVdHATQ/cciDwsIUVFRHqgXjEF8IH3L2NiYFaYTbicIi4uLc8MlBhsAAJtgJhSnxjfGAAAAAElFTkSuQmCC>
|
||||
|
||||
[image22]: <data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAA8AAAAYCAYAAAAlBadpAAAAxElEQVR4XmNgGAUDBOTl5ZcC8X9iMLpeOMCnQEFBIRwo9xVdHAykpKREoJoPoMuBgKioKA9Q7jC6OBgATU4HaZaVlfVDEmYEaQIx5OTkBIFqFiLJIQBQ41WQZphiEAAqjgBqsoFymYF8DpgcCkD3r4qKCh+Uz4KkDBMg+Rcd/0NXiwGw+VdaWloGKLYVWR1WIA/1r7q6Oi+SmCfQvy7I6rACmDPRxQkCQvGLFwCdVg7VHIQuhxMANe2EORcNL0dXOwooBACXVke8Lnk3PAAAAABJRU5ErkJggg==>
|
||||
|
||||
[image23]: <data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAA8AAAAXCAYAAADUUxW8AAAAyklEQVR4XmNgGAUDCOTl5U8D8X8o/iInJ/cIhpHEDdH1gQFIUkFBoRGL+B6oxmB0OTAASmgCNR5CFweKNUANrUSXgwOggjVKSkr8aGLBUBv3IItjABkZGSFkvqKiohlII9C/t5DFCQKQQVAb36PL4QUgv0H92IAuhxfg8iPQC+JAsSnIYigAnx+BYvOBcoro4mAACmWojV/R5YCABSSHLggGxsbGrFCNIAWMaNLMQPFfQLwVTRwCYBqB+BmWpAjGsrKyOuj6RgGJAAC82ER8WRO91wAAAABJRU5ErkJggg==>
|
||||
|
||||
[image24]: <data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAA8AAAAYCAYAAAAlBadpAAAAwUlEQVR4XmNgGAUDBOTl5Zvk5OQeAen/UPwMxIeK/YKKPUXXhwJgmtHFZWRkOKFyt9HlwEBUVJQHquAAuhwI4DIYDIASniBJWVlZP3Q5oPMFCWneA5IEuQCLXDNITkFBIQNdDgxwmQwU04TKLUaXAwNxcXFumGYs+Ju0tLQwuh44APkTpBDorHR0OYIAqPEASLOUlJQIuhxBAHMiujhBoK6uzgvVfBpdjiAA+jOBZP8CNcxBClE4VlJS4kdXOwooBACwSUl+C0KXaQAAAABJRU5ErkJggg==>
|
||||
|
||||
[image25]: <data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACYAAAAYCAYAAACWTY9zAAAB5klEQVR4Xu1VPUsDQRA9MYKiIIpHIOSyuesCgsiBRdDOQhv/gH9Ba1stLLUIloJ2gqSwCVhYWAoWNoKViDaCQQQxjWL0TZy9TJaNJuKRIHkw7O6bz+zMbRynhx566GIopbYzmcwd1g8S7I+lPpvNDoJ/0HrIC2xWpU2sQMKqTo5jn0VfQEGhyccKJBxD4hJuZ52LWzFtwN07loJjBZIuo7hZbBNcWNViUzG52IGk51gSvD+j4jzPm9R6FB1A9iKHNoAubFBMbPvpjH0eco14+85PHYDhm9j7fGtXgtv8zXzBZ833/Rn4FyFPkDLFJx0KPlSWzkTg+SpKDucKFRcEwSifab7aBvxeeT2V8QhUNHF1awOqPl8R8GsWyAlywDbPUt8q4JfnlWKdGLpasZJrgBLzZfAUjN61AOuuqW8VruuOcKxFyTNXklwDVJM+g99h5xvMyZSpbxX4iJYoTjKZHNYczjmOnZO2EVKp1ESzqtPp9BA7N79up5akIJOaUJaW0b+L5mhscJ6XenK64M/WCugvIY8mr6EHWH3zxrG+4ceTveaUeBEo4BE7ROJY3hT19XRsmbxGGIYD0L+zvxUcf1pyaO+czkudkbo/BRKUTa7j4FmsPStdBRR1Sy01+Y4DNzZucv8On3wvkQo23MoxAAAAAElFTkSuQmCC>
|
||||
|
||||
[image26]: <data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAkAAAAZCAYAAADjRwSLAAAAi0lEQVR4XmNgGAWDFcjLy/8G4htycnK3gLQ3EH8D4gNAvAesQFlZWQwoOV1WVtYUKPgfpEFFRYVPQUGhA8j+BzNlK1CAA0hHgxTJyMioSElJcUE1vIUp8obSh+E6cQGozq3o4nAgKirKA1IEdJsLuhwcAB3tB1IEchu6HBwAFewhxj2fQMGALj7kAQCeXiVXaN2b2wAAAABJRU5ErkJggg==>
|
||||
|
||||
[image27]: <data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACQAAAAYCAYAAACSuF9OAAACA0lEQVR4Xu2VvUsDQRDFIygoCiIYY0KSzQcKqdNaiFjYayfYKv4DVoIIFrapA2JnEWwkYGFhby1YaBEJFkIIFhZqNL53mTuWyV24SFII+cFwyXu7s3N7s3eRyIgRIwZDLpdbNsY8ICrFYnFC+w4wj9Pp9DOubYkX/het7uqxWGxaz+0H5HhCrPF3MpmcYs5oNDqjx3nIwl9adycjfrQXFtzcWTabNbaGfDXEia158O5l0Yr2iHhtrYchn88vYG5Z67xBFHqgdQcY61wQ1xXtWTv0ob0wIOc1c9gaemmWObXuAbMqOzDu4zXpMYn2woC5DV6x+BKK22LfSM5NPdaD24doIeJW7LMQxKUeHxYUMIf5pUwmsyq5GFyrqcd6YPCkDLyyionzjnD9RDxi2JieFwbM3UYUfPQm1j3SukOv/gHjUuy7NsKAeXdaI9BvETWtO5ge/UOkoD+dMCP9ozGd91tgQXymQe8Yd4f6Lkj6p+u4E8lZ0rr9/qlqj0C/Eb/rREA7TKVSea27oEd2Eec++p7cYPcT4UI02Ue2Lo3+Jt6O7RFoRSk0cOfg3ZvOznsHAv8LnIP8i9ZQxyi7CQPiG5NOIz1OF8a8IlpB3yN4DesJOOG3YwPFBDzqRCIxb/x6ZNhg0brWCPvH+Lx/hgr66EJ/wV1MwPtnqAQVQ7BDG1r7V/wC9ySnHtFM7nIAAAAASUVORK5CYII=>
|
||||
|
||||
[image28]: <data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAB4AAAAYCAYAAADtaU2/AAABw0lEQVR4Xu2UP0vDUBTFXwdB8d8gJUJCmoRCR4dMBcHVj+Dgp1BQcHLxCxTBxVVEcHbq5Oji6mBBRdxEECtUqHpOemNfbpJOiksOPNqc3733vXfzXoypVOk/5Pv+ZqPRuOEIgmBd818XJrnCZB/4DcSqifeF32UrNBG8aSzygTmMwRjyWTw+c9yyjs5N5DjOrAQda0alk5uSAmB75JgwVqgmdQfKNyaO4ymBXc1SoWAkhXc1o8DuyhYG/5OsXq/PadAvS0rFJFncvWaUsFftm/GOs/XRwg0Bp+PYvFqt1rzE9TULw9ARlntN8LbJ0KkdDQYETM4AJSSuSvHcjuFtSY0Vy64h54g+Nrdm+aMTKcXYholCzLnEdgpYjyw9zdaJvtCxidIDg/GsmVa6wCiKFktY5v16njcj/qHtJ3Jd1xOYa58t8DbjeKU0wwSu1Ch6v/SftG/f3Yk7Bh8yzhScel4vMn1/rW72bP9HAC9lRSmwLjm7oxkF9liUD+9AJi7+NqDgUlkAvGsydkYzUXpH3zWA1xF2wmd/9O1vZ4Kk5Uk75US+8T/veCZQ1Gw2F6RoZiB+3wrjopKa8M8wLi1WqdLf6Bvl9K3mDKixXwAAAABJRU5ErkJggg==>
|
||||
|
||||
[image29]: <data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACEAAAAYCAYAAAB0kZQKAAABsElEQVR4Xu1VsUoDQRS0iJWKiIaThGSTFAZBSHEQ8A9MYyF2dtZWdnbiJ1jnCyxEW0kRSKet/oCFnU0QCwV1JsyFdy+XmMLC4gaW42Zm597uvmwWFnLkyDEHQggX1Wr1Gc9vjTe+QypEUbQk7SPR9X5sMxqNxlZWhuOu7ZxMJGbPE+C7KiD2mgU8A/qKxeKy5cvl8jr5Wq12b/kUEL6mIvpeI8C/TivQ4peFTNVGgHhEQ6VS2fcaoYCh5y14dPL1vFav1yNpX14bA+IDTX4bCQS0FND1mgX0jnwdr+EY7qghq+21MTSZZ37oBwJuFdDy8yzg6dHHHUm4OI4X2Qfk8dy0/hRMPzz6AjjAf1L38zzMQpJfxYs+fua9Ewh/0A88RvlS/YDMHRWyZ/kJBPVDs9lcydC2FT6zH7gA+VL9UCqVNlTEleUnoMmZ2w3+ktoc/dCnzzd20C7PXERSKcbAawT44bQCLaYtJGiXMU69NgbEE5kOvMbOlvbuNYvkNgwZCwH3ZPN5LLjiV0ciuvdGYmpAKpiLJTUQcG7yeX+0vUdjN/Hwg+L439P3GTly/Dv8APAywR01SopIAAAAAElFTkSuQmCC>
|
||||
|
||||
[image30]: <data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABkAAAAYCAYAAAAPtVbGAAABWklEQVR4Xu2TPUoEQRCFe0FBMVNhcGaYHzAxbhAEQ8ELGHgDT+AJvIGYG5l4BT2BGGpmoghG60YauIi+N1SvvW/HxYmdD5pl6qvuru7adq6nRymKYq8sy2v8PmEchDhiO0mSrMS5namq6gYLfWFcZVm2ZrF9fH/Wdb1Np3MQu8V4tnkcrywOamEqkdVZwhs+B1PSNSfz5u/VBcyPNd7gvV+0hJG6GObgVEcaJ3Bb9CjmXF0Dq2dCnufL6mKYk6bpusYJ3Klt4tXxvg+twkt1CvM0FoAbmp+5asqxnSJT1wWuwRvReNzsXyv8C+W8fuAvmtsmQ3VdKOf1A2LDNnlUp7RehcEi7TZm+wEGtsmLihj0axM5JxoP2BrvGp8AecckvhV1Bgtpf2Dupx8YF+omRA/xw8lxcYJV26D1Ggj8GeejH7vqFFb7YJvF41gTA3Cjlny+tyXN7fmHfANv5XKpcwl5NgAAAABJRU5ErkJggg==>
|
||||
149
docs/03-业务模块/RVW-稿件审查系统/06-开发记录/务实版.md
Normal file
149
docs/03-业务模块/RVW-稿件审查系统/06-开发记录/务实版.md
Normal file
@@ -0,0 +1,149 @@
|
||||
# **RVW V2.0 表格提取疑难杂症专项解决方案 (v1.1 务实版)**
|
||||
|
||||
**问题焦点:** Word 表格“隐性多行”(单元格内多段落)导致的提取与验证错位 **核心策略:** **提取层保持原貌,验证层“懒分裂” (Lazy Split)** **技术栈:** Python (python-docx, pandas)
|
||||
|
||||
## **1\. 核心判断:技术选型定调**
|
||||
|
||||
| 维度 | 方案 A: 视觉模型 (VLM) | 方案 B: 结构重组 (预分裂) | 方案 C: 懒分裂 (推荐) |
|
||||
| :---- | :---- | :---- | :---- |
|
||||
| **原理** | 用 GPT-4V 截图识别 | 提取时把 Table 拆成 N 倍行 | **提取保持 \\n,验证时 split** |
|
||||
| **准确性** | 低 (幻觉/小数点风险) | 中 (容易破坏合并单元格结构) | **高 (数据无损,逻辑灵活)** |
|
||||
| **复杂度** | 高 (GPU/Prompt) | 高 (重构 DataFrame 结构) | **低 (仅在 Validator 中处理)** |
|
||||
| **前端适配** | 难 (无法定位) | 难 (需定制虚拟行渲染) | **易 (原生 HTML \<br\>)** |
|
||||
|
||||
**最终决策:**
|
||||
|
||||
1. **坚决不用视觉模型**:数值准确性是底线。
|
||||
2. **放弃“预分裂”**:不在提取阶段破坏表格的物理结构(Row/Span),避免引入元数据丢失风险。
|
||||
3. **采用“懒分裂”**:在验证逻辑中,针对特定单元格内容进行 split('\\n'),实现细粒度验证。
|
||||
|
||||
## **2\. 提取层规范 (Extractor Layer)**
|
||||
|
||||
**目标**:忠实还原 Word 文档的物理结构,不自作聪明地拆行。
|
||||
|
||||
### **2.1 Python 实现逻辑**
|
||||
|
||||
在 DocxTableExtractor 中,对于单元格内的多段落,直接使用换行符 \\n 连接。
|
||||
|
||||
def extract\_cell\_text(cell):
|
||||
"""
|
||||
提取单元格文本,保留段落结构
|
||||
"""
|
||||
\# 过滤掉完全空白的段落,保留有内容的段落
|
||||
paragraphs \= \[p.text.strip() for p in cell.paragraphs if p.text.strip()\]
|
||||
return "\\n".join(paragraphs)
|
||||
|
||||
**输出数据结构示例 (JSON)**:
|
||||
|
||||
{
|
||||
"row\_index": 3,
|
||||
"cells": \[
|
||||
{ "text": "并发症\\n颅内出血\\n牙龈出血" }, // Col 0
|
||||
{ "text": "277 (14.65)\\n85 (4.49)\\n94 (4.97)" }, // Col 1
|
||||
{ "text": "χ²=5.687\\nχ²=0.003\\nχ²=13.745" }, // Col 3 (统计值)
|
||||
{ "text": "0.017\\n0.01\\n\<0.001" } // Col 4 (P值)
|
||||
\]
|
||||
}
|
||||
|
||||
## **3\. 验证层规范 (Validator Layer)**
|
||||
|
||||
**核心逻辑:** 验证器在读取数据时,动态检测是否存在多行内容。如果存在,则在内存中“临时分裂”并逐一验证。
|
||||
|
||||
### **3.1 懒分裂验证算法 (Lazy Verification Logic)**
|
||||
|
||||
def verify\_row\_statistics(row\_data, col\_map):
|
||||
"""
|
||||
验证单行数据的统计逻辑(支持隐性多行)
|
||||
"""
|
||||
issues \= \[\]
|
||||
|
||||
\# 1\. 获取目标单元格的原始文本
|
||||
\# 假设我们要验证 Col 1 (Group A) vs Col 2 (Group B) \-\> P Value
|
||||
cell\_a\_text \= row\_data\[col\_map\['group\_a'\]\]
|
||||
cell\_b\_text \= row\_data\[col\_map\['group\_b'\]\]
|
||||
cell\_p\_text \= row\_data\[col\_map\['p\_value'\]\]
|
||||
|
||||
\# 2\. 懒分裂 (Lazy Split)
|
||||
lines\_a \= cell\_a\_text.split('\\n')
|
||||
lines\_b \= cell\_b\_text.split('\\n')
|
||||
lines\_p \= cell\_p\_text.split('\\n')
|
||||
|
||||
\# 3\. 确定对齐基准(取最大行数)
|
||||
max\_lines \= max(len(lines\_a), len(lines\_b), len(lines\_p))
|
||||
|
||||
\# 4\. 逐行验证 (Line-by-Line Validation)
|
||||
for i in range(max\_lines):
|
||||
\# 安全获取当前行的数据(处理长度不一致情况)
|
||||
val\_a \= lines\_a\[i\] if i \< len(lines\_a) else ""
|
||||
val\_b \= lines\_b\[i\] if i \< len(lines\_b) else ""
|
||||
|
||||
\# P 值匹配策略:
|
||||
\# 如果 P 值列只有 1 行,但数据有 N 行 \-\> 广播机制 (Broadcast)
|
||||
\# 如果 P 值列有 N 行 \-\> 一一对应 (One-to-One)
|
||||
if len(lines\_p) \== 1 and max\_lines \> 1:
|
||||
val\_p \= lines\_p\[0\] \# 策略 A: 共享 P 值
|
||||
else:
|
||||
val\_p \= lines\_p\[i\] if i \< len(lines\_p) else "" \# 策略 B: 独立 P 值
|
||||
|
||||
\# 跳过空行
|
||||
if not val\_a or not val\_b or not val\_p:
|
||||
continue
|
||||
|
||||
\# 执行具体的统计验证
|
||||
\# 传入 line\_index=i 以便报错时定位
|
||||
error \= validate\_single\_line(val\_a, val\_b, val\_p, line\_index=i)
|
||||
if error:
|
||||
issues.append(error)
|
||||
|
||||
return issues
|
||||
|
||||
### **3.2 优势分析**
|
||||
|
||||
1. **兼容性强**:完美支持您截图中的 颅内出血 | 85 | 90 | P=0.01 这种每行独立 P 值的场景。
|
||||
2. **鲁棒性**:如果只有第一行有 P 值(合并单元格视觉效果),代码中的 Broadcast 逻辑也能兜底。
|
||||
3. **定位精准**:报错信息可以包含 line\_index,告诉前端是单元格里的第几行出错了。
|
||||
|
||||
## **4\. 前端渲染规范 (Frontend Layer)**
|
||||
|
||||
**目标**:使用最简单的 Web 技术还原 Word 样式,避免过度设计。
|
||||
|
||||
### **4.1 HTML 渲染策略**
|
||||
|
||||
后端返回的 html 字段中,直接将 \\n 替换为 \<br\>。
|
||||
|
||||
**Python 端处理:**
|
||||
|
||||
def generate\_html\_cell(text):
|
||||
\# 转义 HTML 特殊字符,并将换行转为 \<br\>
|
||||
safe\_text \= html.escape(text)
|
||||
return safe\_text.replace("\\n", "\<br\>")
|
||||
|
||||
**前端展示效果:**
|
||||
|
||||
\<td\>
|
||||
277 (14.65)\<br\>
|
||||
85 (4.49)\<br\>
|
||||
94 (4.97)
|
||||
\</td\>
|
||||
|
||||
### **4.2 错误高亮策略**
|
||||
|
||||
由于我们不再拆分表格行(DOM 结构),高亮的最小单位是 **Cell(单元格)**。
|
||||
|
||||
* **交互设计**:
|
||||
* 当发现第 2 行子数据错误时,**高亮整个单元格**。
|
||||
* **Tooltip 提示**:鼠标悬停时,显示具体错误信息:“第 2 行数据 P 值校验不通过”。
|
||||
* **进阶优化(V2.1 可选)**:
|
||||
* 如果确实需要高亮某一行,Python 生成 HTML 时可以用 \<span\> 包裹每一行: \<span id="r3c2\_L0"\>277 (14.65)\</span\>\<br\>\<span id="r3c2\_L1"\>85 (4.49)\</span\>
|
||||
* 但 MVP 阶段建议**只高亮单元格**,性价比最高。
|
||||
|
||||
## **5\. 总结**
|
||||
|
||||
| 模块 | 核心动作 | 复杂度 |
|
||||
| :---- | :---- | :---- |
|
||||
| **Python 提取** | 保持 \\n,不拆行,输出标准 JSON | ⭐ (低) |
|
||||
| **Python 验证** | split('\\n'),循环对齐,独立计算 | ⭐⭐ (中) |
|
||||
| **前端渲染** | 使用 \<br\> 换行,CSS 控制对齐 | ⭐ (低) |
|
||||
| **前端高亮** | 高亮整个单元格,Tooltip 说明行号 | ⭐ (低) |
|
||||
|
||||
**这是目前最务实、风险最低的实施路径。** 请开发团队以此为准。
|
||||
@@ -173,7 +173,7 @@ async def analyze_docx(
|
||||
f"耗时: {execution_time_ms}ms"
|
||||
)
|
||||
|
||||
return JSONResponse(content=result.model_dump())
|
||||
return JSONResponse(content=result.model_dump(by_alias=True))
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
|
||||
@@ -44,6 +44,12 @@ EFFECT_SIZE_PATTERN = re.compile(
|
||||
re.IGNORECASE
|
||||
)
|
||||
|
||||
# 卡方值匹配,如 "χ²=57.519" 或 "2=57.519" 或 "χ2=57.519"
|
||||
CHI_SQUARE_PATTERN = re.compile(
|
||||
r"(?:χ[²2]|[χx]2|2)\s*[=:]\s*(\d+\.?\d*)",
|
||||
re.IGNORECASE
|
||||
)
|
||||
|
||||
|
||||
# ==================== 统计方法检测 ====================
|
||||
|
||||
|
||||
@@ -225,8 +225,8 @@ class DocxTableExtractor:
|
||||
if col_idx >= num_cols:
|
||||
break
|
||||
|
||||
# 获取单元格文本
|
||||
cell_text = self._get_cell_text(cell)
|
||||
# 获取单元格文本(保留换行符用于 HTML 显示)
|
||||
cell_text = self._get_cell_text(cell, use_newline=True)
|
||||
|
||||
# 检测合并范围
|
||||
# python-docx 中合并单元格会重复出现同一个 cell 对象
|
||||
@@ -253,13 +253,123 @@ class DocxTableExtractor:
|
||||
|
||||
return data
|
||||
|
||||
def _get_cell_text(self, cell: _Cell) -> str:
|
||||
# Symbol 字体字符映射表(Word 使用 Symbol 字体表示希腊字母等)
|
||||
SYMBOL_CHAR_MAP = {
|
||||
'F063': 'χ', # chi
|
||||
'F032': '²', # superscript 2
|
||||
'F061': 'α', # alpha
|
||||
'F062': 'β', # beta
|
||||
'F067': 'γ', # gamma
|
||||
'F064': 'δ', # delta
|
||||
'F065': 'ε', # epsilon
|
||||
'F06D': 'μ', # mu
|
||||
'F073': 'σ', # sigma
|
||||
'F070': 'π', # pi
|
||||
'F0B2': '²', # another superscript 2 encoding
|
||||
}
|
||||
|
||||
def _clean_statistical_text(self, text: str) -> str:
|
||||
"""
|
||||
清洗统计学文本中的特殊字符
|
||||
|
||||
关键清洗:
|
||||
1. 负号归一化(最重要!防止 float() 崩溃)
|
||||
2. 比较符归一化
|
||||
3. 零宽字符清理
|
||||
"""
|
||||
if not text:
|
||||
return ""
|
||||
|
||||
# 1. 负号归一化(极高危!)
|
||||
# Word 会自动把连字符转成破折号或数学减号,导致 float() 报错
|
||||
text = text.replace('\u2212', '-') # 数学减号 (Minus Sign)
|
||||
text = text.replace('\u2013', '-') # En Dash
|
||||
text = text.replace('\u2014', '-') # Em Dash
|
||||
text = text.replace('\u2010', '-') # Hyphen
|
||||
text = text.replace('\u2011', '-') # Non-Breaking Hyphen
|
||||
text = text.replace('\u00ad', '-') # Soft Hyphen
|
||||
|
||||
# 2. 比较符归一化
|
||||
text = text.replace('\u2264', '<=') # ≤
|
||||
text = text.replace('\u2265', '>=') # ≥
|
||||
text = text.replace('\u2260', '!=') # ≠
|
||||
text = text.replace('\u2248', '~=') # ≈
|
||||
|
||||
# 3. 加减号归一化
|
||||
# 保留 ± 原样,因为它在统计学中有特定含义(如 mean±SD)
|
||||
# text = text.replace('\u00b1', '+/-') # ±
|
||||
|
||||
# 4. 乘号归一化
|
||||
text = text.replace('\u00d7', 'x') # ×
|
||||
text = text.replace('\u2217', '*') # ∗ (asterisk operator)
|
||||
|
||||
# 5. 零宽字符清理
|
||||
text = text.replace('\u200b', '') # Zero-Width Space
|
||||
text = text.replace('\u200c', '') # Zero-Width Non-Joiner
|
||||
text = text.replace('\u200d', '') # Zero-Width Joiner
|
||||
text = text.replace('\ufeff', '') # BOM / Zero-Width No-Break Space
|
||||
text = text.replace('\u00a0', ' ') # Non-Breaking Space -> 普通空格
|
||||
|
||||
return text
|
||||
|
||||
def _get_cell_text(self, cell: _Cell, use_newline: bool = False) -> str:
|
||||
"""
|
||||
获取单元格文本(合并多个段落)
|
||||
|
||||
Args:
|
||||
cell: Word 单元格对象
|
||||
use_newline: 是否使用换行符连接段落(用于 HTML 显示)
|
||||
|
||||
注意:会处理 Word 的 <w:sym> 符号字符(如 χ² 等)
|
||||
"""
|
||||
paragraphs = cell.paragraphs
|
||||
texts = [p.text.strip() for p in paragraphs]
|
||||
return " ".join(texts).strip()
|
||||
texts = []
|
||||
|
||||
for para in paragraphs:
|
||||
# 使用增强的文本提取(处理符号字符)
|
||||
para_text = self._extract_paragraph_text(para)
|
||||
if para_text.strip():
|
||||
texts.append(para_text.strip())
|
||||
|
||||
separator = "\n" if use_newline else " "
|
||||
raw_text = separator.join(texts).strip()
|
||||
|
||||
# 清洗统计学特殊字符(负号归一化等)
|
||||
return self._clean_statistical_text(raw_text)
|
||||
|
||||
def _extract_paragraph_text(self, para: Paragraph) -> str:
|
||||
"""
|
||||
从段落中提取完整文本,包括 <w:sym> 符号字符
|
||||
|
||||
Word 使用 <w:sym w:font="Symbol" w:char="F063"/> 表示 χ 等符号,
|
||||
python-docx 的 paragraph.text 不会提取这些内容。
|
||||
"""
|
||||
from docx.oxml.ns import qn
|
||||
|
||||
text_parts = []
|
||||
|
||||
# 遍历段落中的所有 run 元素
|
||||
for run in para._p.iter():
|
||||
# 处理普通文本
|
||||
if run.tag == qn('w:t'):
|
||||
text_parts.append(run.text or '')
|
||||
|
||||
# 处理符号字符 <w:sym>
|
||||
elif run.tag == qn('w:sym'):
|
||||
font = run.get(qn('w:font'))
|
||||
char_code = run.get(qn('w:char'))
|
||||
|
||||
if font == 'Symbol' and char_code:
|
||||
# 查找映射
|
||||
unicode_char = self.SYMBOL_CHAR_MAP.get(char_code.upper(), '')
|
||||
if unicode_char:
|
||||
text_parts.append(unicode_char)
|
||||
else:
|
||||
# 未知符号,记录警告
|
||||
logger.debug(f"Unknown Symbol char: {char_code}")
|
||||
text_parts.append(f'[SYM:{char_code}]')
|
||||
|
||||
return ''.join(text_parts)
|
||||
|
||||
def _generate_html(
|
||||
self,
|
||||
@@ -296,8 +406,10 @@ class DocxTableExtractor:
|
||||
html_parts.append(" <tr>")
|
||||
for col_idx, cell in enumerate(row, start=1):
|
||||
coord = f"R{row_idx}C{col_idx}"
|
||||
# 为每个子行添加 span 标记,支持细粒度高亮
|
||||
cell_html = self._escape_html_with_subrows(cell, coord)
|
||||
html_parts.append(
|
||||
f' <td data-coord="{coord}">{self._escape_html(cell)}</td>'
|
||||
f' <td data-coord="{coord}">{cell_html}</td>'
|
||||
)
|
||||
html_parts.append(" </tr>")
|
||||
html_parts.append(" </tbody>")
|
||||
@@ -307,7 +419,43 @@ class DocxTableExtractor:
|
||||
return "\n".join(html_parts)
|
||||
|
||||
def _escape_html(self, text: str) -> str:
|
||||
"""转义 HTML 特殊字符"""
|
||||
"""转义 HTML 特殊字符,并将换行符转换为 <br>"""
|
||||
escaped = (
|
||||
text
|
||||
.replace("&", "&")
|
||||
.replace("<", "<")
|
||||
.replace(">", ">")
|
||||
.replace('"', """)
|
||||
.replace("'", "'")
|
||||
)
|
||||
# 将换行符转换为 <br> 标签,保留表格中的多行结构
|
||||
return escaped.replace("\n", "<br>")
|
||||
|
||||
def _escape_html_with_subrows(self, text: str, coord: str) -> str:
|
||||
"""
|
||||
转义 HTML 并为每个子行添加 span 标记,支持细粒度高亮
|
||||
|
||||
例如:单元格内容 "0.017\n0.01\n<0.001" 会生成:
|
||||
<span data-subcoord="R5C5S1">0.017</span><br>
|
||||
<span data-subcoord="R5C5S2">0.01</span><br>
|
||||
<span data-subcoord="R5C5S3"><0.001</span>
|
||||
"""
|
||||
lines = text.split("\n")
|
||||
if len(lines) == 1:
|
||||
# 单行内容,直接转义
|
||||
return self._escape_single(text)
|
||||
|
||||
# 多行内容,为每行添加 span
|
||||
result_parts = []
|
||||
for idx, line in enumerate(lines, start=1):
|
||||
escaped_line = self._escape_single(line)
|
||||
subcoord = f"{coord}S{idx}"
|
||||
result_parts.append(f'<span data-subcoord="{subcoord}">{escaped_line}</span>')
|
||||
|
||||
return "<br>".join(result_parts)
|
||||
|
||||
def _escape_single(self, text: str) -> str:
|
||||
"""转义单行文本的 HTML 特殊字符"""
|
||||
return (
|
||||
text
|
||||
.replace("&", "&")
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
定义所有数据结构,确保类型安全和接口一致性。
|
||||
"""
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
from pydantic import BaseModel, Field, model_serializer
|
||||
from typing import List, Dict, Any, Optional
|
||||
from enum import Enum
|
||||
|
||||
@@ -59,15 +59,30 @@ class ForensicsConfig(BaseModel):
|
||||
|
||||
|
||||
class CellLocation(BaseModel):
|
||||
"""单元格位置(R1C1 坐标)"""
|
||||
table_id: str = Field(..., description="表格 ID,如 tbl_0")
|
||||
"""单元格位置(R1C1 坐标),支持单元格内子行定位"""
|
||||
table_id: str = Field(..., alias="tableId", description="表格 ID,如 tbl_0")
|
||||
row: int = Field(..., description="行号,从 1 开始")
|
||||
col: int = Field(..., description="列号,从 1 开始")
|
||||
subrow: Optional[int] = Field(None, description="单元格内子行号,从 1 开始(用于多行单元格)")
|
||||
|
||||
@property
|
||||
def cell_ref(self) -> str:
|
||||
"""返回 R1C1 格式的坐标"""
|
||||
return f"R{self.row}C{self.col}"
|
||||
model_config = {"populate_by_name": True}
|
||||
|
||||
@model_serializer
|
||||
def serialize(self) -> Dict[str, Any]:
|
||||
"""序列化时自动添加 cellRef 字段,支持子行坐标"""
|
||||
# 基础坐标:R{row}C{col}
|
||||
# 子行坐标:R{row}C{col}S{subrow}
|
||||
cell_ref = f"R{self.row}C{self.col}"
|
||||
if self.subrow is not None:
|
||||
cell_ref += f"S{self.subrow}"
|
||||
|
||||
return {
|
||||
"tableId": self.table_id,
|
||||
"row": self.row,
|
||||
"col": self.col,
|
||||
"subrow": self.subrow,
|
||||
"cellRef": cell_ref
|
||||
}
|
||||
|
||||
|
||||
class Issue(BaseModel):
|
||||
@@ -84,26 +99,30 @@ class TableData(BaseModel):
|
||||
id: str = Field(..., description="表格 ID,如 tbl_0")
|
||||
caption: Optional[str] = Field(None, description="表格标题")
|
||||
type: Optional[str] = Field(None, description="表格类型:BASELINE/OUTCOME/OTHER")
|
||||
row_count: int = Field(..., description="行数")
|
||||
col_count: int = Field(..., description="列数")
|
||||
row_count: int = Field(..., alias="rowCount", description="行数")
|
||||
col_count: int = Field(..., alias="colCount", description="列数")
|
||||
html: str = Field(..., description="预渲染的 HTML 片段")
|
||||
data: List[List[str]] = Field(..., description="二维数组数据")
|
||||
issues: List[Issue] = Field(default_factory=list, description="该表格的问题列表")
|
||||
skipped: bool = Field(default=False, description="是否被跳过(超限)")
|
||||
skip_reason: Optional[str] = Field(None, description="跳过原因")
|
||||
skip_reason: Optional[str] = Field(None, alias="skipReason", description="跳过原因")
|
||||
|
||||
model_config = {"populate_by_name": True}
|
||||
|
||||
|
||||
class ForensicsResult(BaseModel):
|
||||
"""数据侦探分析结果"""
|
||||
success: bool = Field(..., description="是否成功")
|
||||
methods_found: List[str] = Field(default_factory=list, description="检测到的统计方法")
|
||||
methods_found: List[str] = Field(default_factory=list, alias="methodsFound", description="检测到的统计方法")
|
||||
tables: List[TableData] = Field(default_factory=list, description="表格列表")
|
||||
total_issues: int = Field(default=0, description="总问题数")
|
||||
error_count: int = Field(default=0, description="ERROR 级别问题数")
|
||||
warning_count: int = Field(default=0, description="WARNING 级别问题数")
|
||||
execution_time_ms: int = Field(default=0, description="执行时间(毫秒)")
|
||||
total_issues: int = Field(default=0, alias="totalIssues", description="总问题数")
|
||||
error_count: int = Field(default=0, alias="errorCount", description="ERROR 级别问题数")
|
||||
warning_count: int = Field(default=0, alias="warningCount", description="WARNING 级别问题数")
|
||||
execution_time_ms: int = Field(default=0, alias="executionTimeMs", description="执行时间(毫秒)")
|
||||
error: Optional[str] = Field(None, description="错误信息(如果失败)")
|
||||
fallback_available: bool = Field(default=True, description="是否可降级执行")
|
||||
fallback_available: bool = Field(default=True, alias="fallbackAvailable", description="是否可降级执行")
|
||||
|
||||
model_config = {"populate_by_name": True}
|
||||
|
||||
|
||||
class ExtractionError(Exception):
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -52,9 +52,6 @@ app.add_middleware(
|
||||
TEMP_DIR = Path(os.getenv("TEMP_DIR", "/tmp/extraction_service"))
|
||||
TEMP_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# 注册 RVW V2.0 数据侦探路由
|
||||
app.include_router(forensics_router)
|
||||
|
||||
# 导入服务模块
|
||||
from services.pdf_extractor import extract_pdf_pymupdf
|
||||
from services.pdf_processor import extract_pdf, get_pdf_processing_strategy
|
||||
@@ -71,6 +68,7 @@ from services.doc_export_service import check_pandoc_available, convert_markdown
|
||||
|
||||
# 新增:RVW V2.0 数据侦探模块
|
||||
from forensics.api import router as forensics_router
|
||||
app.include_router(forensics_router)
|
||||
|
||||
# 兼容:nougat 相关(已废弃,保留空实现避免报错)
|
||||
def check_nougat_available(): return False
|
||||
|
||||
@@ -66,7 +66,7 @@ export default function EditorialReport({ data }: EditorialReportProps) {
|
||||
<div className="flex flex-col items-center">
|
||||
<div className={`w-24 h-24 rounded-full border-4 ${grade.bg.replace('bg-', 'border-')} flex items-center justify-center bg-white shadow-lg`}>
|
||||
<div className="text-center">
|
||||
<span className={`text-3xl font-bold ${grade.color}`}>{data.overall_score}</span>
|
||||
<span className={`text-3xl font-bold ${grade.color}`}>{Number(data.overall_score).toFixed(1)}</span>
|
||||
<span className="text-xs text-slate-400 block">分</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
487
frontend-v2/src/modules/rvw/components/ForensicsReport.tsx
Normal file
487
frontend-v2/src/modules/rvw/components/ForensicsReport.tsx
Normal file
@@ -0,0 +1,487 @@
|
||||
/**
|
||||
* 数据验证报告组件
|
||||
* 展示 DataForensicsSkill 的表格验证结果
|
||||
*/
|
||||
import { useState } from 'react';
|
||||
import {
|
||||
AlertTriangle,
|
||||
CheckCircle,
|
||||
XCircle,
|
||||
Info,
|
||||
Table2,
|
||||
FlaskConical,
|
||||
ChevronDown,
|
||||
ChevronUp,
|
||||
MousePointerClick
|
||||
} from 'lucide-react';
|
||||
import type { ForensicsResult, ForensicsIssue, ForensicsTable } from '../types';
|
||||
|
||||
interface ForensicsReportProps {
|
||||
data: ForensicsResult;
|
||||
}
|
||||
|
||||
// 统计方法英文 -> 中文映射
|
||||
const METHOD_NAMES: Record<string, string> = {
|
||||
'chi-square': '卡方检验',
|
||||
'mann-whitney': 'Mann-Whitney U 检验',
|
||||
't-test': 'T 检验',
|
||||
'anova': '方差分析',
|
||||
'fisher': 'Fisher 精确检验',
|
||||
'wilcoxon': 'Wilcoxon 检验',
|
||||
'kruskal-wallis': 'Kruskal-Wallis 检验',
|
||||
'mcnemar': 'McNemar 检验',
|
||||
'correlation': '相关性分析',
|
||||
'regression': '回归分析',
|
||||
'logistic': 'Logistic 回归',
|
||||
'cox': 'Cox 回归',
|
||||
'kaplan-meier': 'Kaplan-Meier 生存分析',
|
||||
};
|
||||
|
||||
// 问题类型代码 -> 中文描述映射
|
||||
const ISSUE_TYPE_LABELS: Record<string, string> = {
|
||||
// L1 算术验证
|
||||
'ARITHMETIC_PERCENT': '百分比计算错误',
|
||||
'ARITHMETIC_SUM': '合计计算错误',
|
||||
'ARITHMETIC_TOTAL': '总计行错误',
|
||||
'ARITHMETIC_MEAN': '均值计算错误',
|
||||
|
||||
// L2 统计验证
|
||||
'STAT_CHI2_PVALUE': '卡方检验 P 值',
|
||||
'STAT_TTEST_PVALUE': 'T 检验 P 值',
|
||||
'STAT_CI_PVALUE_CONFLICT': 'CI 与 P 值矛盾',
|
||||
|
||||
// L2.5 一致性取证
|
||||
'STAT_SE_TRIANGLE': 'SE 三角验证',
|
||||
'STAT_SD_GREATER_MEAN': 'SD 大于均值',
|
||||
'STAT_REGRESSION_CI_P': '回归 CI-P 不一致',
|
||||
|
||||
// 一致性检查
|
||||
'CONSISTENCY_DUPLICATE': '数据重复',
|
||||
'CONSISTENCY_MISMATCH': '数据不一致',
|
||||
|
||||
// 提取问题
|
||||
'EXTRACTION_WARNING': '提取警告',
|
||||
'TABLE_SKIPPED': '表格跳过',
|
||||
};
|
||||
|
||||
export default function ForensicsReport({ data }: ForensicsReportProps) {
|
||||
const [expandedTables, setExpandedTables] = useState<Set<string>>(new Set());
|
||||
const [highlightedCell, setHighlightedCell] = useState<string | null>(null);
|
||||
|
||||
// 防御性检查:确保所有数组和对象存在
|
||||
const tables = data?.tables || [];
|
||||
const issues = data?.issues || [];
|
||||
const methods = data?.methods || [];
|
||||
const summary = data?.summary || { totalTables: 0, totalIssues: 0, errorCount: 0, warningCount: 0 };
|
||||
|
||||
// 创建 tableId -> caption 映射,用于显示友好的表格名称
|
||||
const tableIdToCaption: Record<string, string> = {};
|
||||
tables.forEach((t, idx) => {
|
||||
tableIdToCaption[t.id] = t.caption || `表格 ${idx + 1}`;
|
||||
});
|
||||
|
||||
// 获取表格的友好名称
|
||||
const getTableName = (tableId: string | undefined): string => {
|
||||
if (!tableId) return '';
|
||||
return tableIdToCaption[tableId] || tableId;
|
||||
};
|
||||
|
||||
// 翻译统计方法名称为中文
|
||||
const translateMethod = (method: string): string => {
|
||||
return METHOD_NAMES[method.toLowerCase()] || method;
|
||||
};
|
||||
|
||||
// 翻译问题类型代码为中文
|
||||
const translateIssueType = (type: string): string => {
|
||||
return ISSUE_TYPE_LABELS[type] || type;
|
||||
};
|
||||
|
||||
const toggleTable = (tableId: string) => {
|
||||
const newExpanded = new Set(expandedTables);
|
||||
if (newExpanded.has(tableId)) {
|
||||
newExpanded.delete(tableId);
|
||||
} else {
|
||||
newExpanded.add(tableId);
|
||||
}
|
||||
setExpandedTables(newExpanded);
|
||||
};
|
||||
|
||||
const getSeverityIcon = (severity: ForensicsIssue['severity']) => {
|
||||
switch (severity) {
|
||||
case 'ERROR':
|
||||
return <XCircle className="w-4 h-4 text-red-500 flex-shrink-0" />;
|
||||
case 'WARNING':
|
||||
return <AlertTriangle className="w-4 h-4 text-amber-500 flex-shrink-0" />;
|
||||
case 'INFO':
|
||||
return <Info className="w-4 h-4 text-blue-500 flex-shrink-0" />;
|
||||
}
|
||||
};
|
||||
|
||||
const getSeverityColors = (severity: ForensicsIssue['severity']) => {
|
||||
switch (severity) {
|
||||
case 'ERROR':
|
||||
return { bg: 'bg-red-50', border: 'border-red-200', text: 'text-red-700' };
|
||||
case 'WARNING':
|
||||
return { bg: 'bg-amber-50', border: 'border-amber-200', text: 'text-amber-700' };
|
||||
case 'INFO':
|
||||
return { bg: 'bg-blue-50', border: 'border-blue-200', text: 'text-blue-700' };
|
||||
}
|
||||
};
|
||||
|
||||
const getOverallStatus = () => {
|
||||
if (summary.errorCount > 0) {
|
||||
return { label: '发现问题', color: 'text-red-600', bg: 'bg-red-500', icon: XCircle };
|
||||
}
|
||||
if (summary.warningCount > 0) {
|
||||
return { label: '需关注', color: 'text-amber-600', bg: 'bg-amber-500', icon: AlertTriangle };
|
||||
}
|
||||
return { label: '数据正常', color: 'text-green-600', bg: 'bg-green-500', icon: CheckCircle };
|
||||
};
|
||||
|
||||
const status = getOverallStatus();
|
||||
const StatusIcon = status.icon;
|
||||
|
||||
const handleCellClick = (cellRef: string | undefined) => {
|
||||
if (cellRef) {
|
||||
setHighlightedCell(highlightedCell === cellRef ? null : cellRef);
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="space-y-6 fade-in">
|
||||
{/* 总览卡片 */}
|
||||
<div className="bg-white rounded-2xl shadow-sm border border-gray-100 overflow-hidden">
|
||||
<div className="p-6 bg-gradient-to-r from-slate-50 to-white">
|
||||
<div className="flex items-start gap-8">
|
||||
{/* 状态图标 */}
|
||||
<div className="flex flex-col items-center">
|
||||
<div className={`w-24 h-24 rounded-full border-4 ${status.bg.replace('bg-', 'border-')} flex items-center justify-center bg-white shadow-lg`}>
|
||||
<StatusIcon className={`w-12 h-12 ${status.color}`} />
|
||||
</div>
|
||||
<span className={`mt-2 px-3 py-1 rounded-full text-xs font-bold ${status.bg} text-white`}>
|
||||
{status.label}
|
||||
</span>
|
||||
</div>
|
||||
|
||||
{/* 统计信息 */}
|
||||
<div className="flex-1">
|
||||
<div className="flex items-center gap-2 mb-2">
|
||||
<FlaskConical className="w-5 h-5 text-indigo-500" />
|
||||
<h3 className="font-bold text-lg text-slate-800">数据验证报告</h3>
|
||||
</div>
|
||||
<p className="text-slate-600 text-sm leading-relaxed mb-4">
|
||||
已检测 {summary.totalTables} 张表格,发现 {summary.totalIssues} 个问题
|
||||
{methods.length > 0 && `,识别到统计方法:${methods.map(translateMethod).join('、')}`}
|
||||
</p>
|
||||
|
||||
{/* 统计指标 */}
|
||||
<div className="flex gap-4">
|
||||
<div className="flex items-center gap-2 px-3 py-1.5 bg-slate-100 rounded-lg border border-slate-200">
|
||||
<Table2 className="w-4 h-4 text-slate-500" />
|
||||
<span className="text-sm font-medium text-slate-700">{summary.totalTables} 张表格</span>
|
||||
</div>
|
||||
{summary.errorCount > 0 && (
|
||||
<div className="flex items-center gap-2 px-3 py-1.5 bg-red-50 rounded-lg border border-red-100">
|
||||
<XCircle className="w-4 h-4 text-red-500" />
|
||||
<span className="text-sm font-medium text-red-700">{summary.errorCount} 个错误</span>
|
||||
</div>
|
||||
)}
|
||||
{summary.warningCount > 0 && (
|
||||
<div className="flex items-center gap-2 px-3 py-1.5 bg-amber-50 rounded-lg border border-amber-100">
|
||||
<AlertTriangle className="w-4 h-4 text-amber-500" />
|
||||
<span className="text-sm font-medium text-amber-700">{summary.warningCount} 个警告</span>
|
||||
</div>
|
||||
)}
|
||||
{summary.errorCount === 0 && summary.warningCount === 0 && (
|
||||
<div className="flex items-center gap-2 px-3 py-1.5 bg-green-50 rounded-lg border border-green-100">
|
||||
<CheckCircle className="w-4 h-4 text-green-500" />
|
||||
<span className="text-sm font-medium text-green-700">未发现问题</span>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* 问题列表(按严重程度排序) */}
|
||||
{issues.length > 0 && (
|
||||
<div className="bg-white rounded-xl shadow-sm border border-gray-100 overflow-hidden">
|
||||
<div className="px-5 py-4 border-b border-gray-100 bg-slate-50">
|
||||
<h3 className="font-bold text-base text-slate-800 flex items-center gap-2">
|
||||
<AlertTriangle className="w-5 h-5 text-amber-500" />
|
||||
发现的问题
|
||||
<span className="text-xs text-slate-400 bg-slate-200 px-2 py-0.5 rounded">
|
||||
共 {issues.length} 项
|
||||
</span>
|
||||
</h3>
|
||||
</div>
|
||||
<div className="divide-y divide-gray-100">
|
||||
{[...issues]
|
||||
.sort((a, b) => {
|
||||
const order = { ERROR: 0, WARNING: 1, INFO: 2 };
|
||||
return order[a.severity] - order[b.severity];
|
||||
})
|
||||
.map((issue, index) => {
|
||||
const colors = getSeverityColors(issue.severity);
|
||||
return (
|
||||
<div
|
||||
key={index}
|
||||
className={`px-5 py-4 ${colors.bg} hover:brightness-95 transition-all cursor-pointer`}
|
||||
onClick={() => handleCellClick(issue.location?.cellRef)}
|
||||
>
|
||||
<div className="flex items-start gap-3">
|
||||
{getSeverityIcon(issue.severity)}
|
||||
<div className="flex-1">
|
||||
<p className={`text-sm font-medium ${colors.text}`}>{issue.message}</p>
|
||||
{issue.location && (
|
||||
<p className="text-xs text-slate-500 mt-1 flex items-center gap-1">
|
||||
<MousePointerClick className="w-3 h-3" />
|
||||
{issue.location.tableId && getTableName(issue.location.tableId)}
|
||||
{issue.location.cellRef && ` · 单元格 ${issue.location.cellRef}`}
|
||||
</p>
|
||||
)}
|
||||
</div>
|
||||
<span className={`text-xs px-2 py-1 rounded ${colors.bg} ${colors.text} border ${colors.border}`}>
|
||||
{translateIssueType(issue.type)}
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* 表格详情 */}
|
||||
{tables.length > 0 && (
|
||||
<div className="space-y-4">
|
||||
<div className="flex items-center gap-3">
|
||||
<Table2 className="w-5 h-5 text-indigo-500" />
|
||||
<h3 className="font-bold text-base text-slate-800">表格详情</h3>
|
||||
<span className="text-xs text-slate-400 bg-slate-100 px-2 py-0.5 rounded">
|
||||
共 {tables.length} 张
|
||||
</span>
|
||||
</div>
|
||||
|
||||
{tables.map((table) => (
|
||||
<TableCard
|
||||
key={table.id}
|
||||
table={table}
|
||||
expanded={expandedTables.has(table.id)}
|
||||
onToggle={() => toggleTable(table.id)}
|
||||
highlightedCell={highlightedCell}
|
||||
/>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* 无表格提示 */}
|
||||
{tables.length === 0 && (
|
||||
<div className="text-center py-12 text-slate-500 bg-white rounded-xl border border-gray-100">
|
||||
<Table2 className="w-12 h-12 mx-auto mb-4 text-slate-300" />
|
||||
<p>未检测到表格数据</p>
|
||||
<p className="text-xs text-slate-400 mt-1">该文档可能不包含数据表格</p>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* 表格卡片组件
|
||||
*/
|
||||
interface TableCardProps {
|
||||
table: ForensicsTable;
|
||||
expanded: boolean;
|
||||
onToggle: () => void;
|
||||
highlightedCell: string | null;
|
||||
}
|
||||
|
||||
function TableCard({ table, expanded, onToggle, highlightedCell }: TableCardProps) {
|
||||
// 防御性检查:确保 issues 数组存在
|
||||
const issues = table.issues || [];
|
||||
const hasIssues = issues.length > 0;
|
||||
const errorCount = issues.filter(i => i.severity === 'ERROR').length;
|
||||
const warningCount = issues.filter(i => i.severity === 'WARNING').length;
|
||||
|
||||
return (
|
||||
<div className="bg-white rounded-xl border border-gray-200 overflow-hidden shadow-sm">
|
||||
{/* 表格头部 */}
|
||||
<div
|
||||
className={`px-5 py-4 flex items-center justify-between cursor-pointer hover:bg-slate-50 transition-colors ${
|
||||
hasIssues ? 'bg-amber-50/50' : 'bg-green-50/30'
|
||||
}`}
|
||||
onClick={onToggle}
|
||||
>
|
||||
<div className="flex items-center gap-3">
|
||||
{hasIssues ? (
|
||||
<AlertTriangle className="w-5 h-5 text-amber-500" />
|
||||
) : (
|
||||
<CheckCircle className="w-5 h-5 text-green-500" />
|
||||
)}
|
||||
<div>
|
||||
<h4 className="font-semibold text-slate-800">{table.caption || `表格 ${table.id}`}</h4>
|
||||
<p className="text-xs text-slate-500">
|
||||
{table.rowCount} 行 × {table.colCount} 列
|
||||
{table.skipped && ` · ⚠️ ${table.skipReason}`}
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
<div className="flex items-center gap-3">
|
||||
{errorCount > 0 && (
|
||||
<span className="px-2 py-1 bg-red-100 text-red-700 text-xs rounded-md font-medium">
|
||||
{errorCount} 错误
|
||||
</span>
|
||||
)}
|
||||
{warningCount > 0 && (
|
||||
<span className="px-2 py-1 bg-amber-100 text-amber-700 text-xs rounded-md font-medium">
|
||||
{warningCount} 警告
|
||||
</span>
|
||||
)}
|
||||
{!hasIssues && (
|
||||
<span className="px-2 py-1 bg-green-100 text-green-700 text-xs rounded-md font-medium">
|
||||
通过
|
||||
</span>
|
||||
)}
|
||||
{expanded ? (
|
||||
<ChevronUp className="w-5 h-5 text-slate-400" />
|
||||
) : (
|
||||
<ChevronDown className="w-5 h-5 text-slate-400" />
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* 展开内容 */}
|
||||
{expanded && (
|
||||
<div className="border-t border-gray-200">
|
||||
{/* 表格渲染 */}
|
||||
<div className="p-4 overflow-x-auto">
|
||||
<style>{`
|
||||
.forensics-table {
|
||||
width: 100%;
|
||||
border-collapse: collapse;
|
||||
font-size: 13px;
|
||||
}
|
||||
.forensics-table th,
|
||||
.forensics-table td {
|
||||
border: 1px solid #e2e8f0;
|
||||
padding: 8px 12px;
|
||||
text-align: left;
|
||||
}
|
||||
.forensics-table th {
|
||||
background: #f8fafc;
|
||||
font-weight: 600;
|
||||
color: #475569;
|
||||
}
|
||||
.forensics-table tr:hover {
|
||||
background: #f8fafc;
|
||||
}
|
||||
.forensics-table td.has-issue,
|
||||
.forensics-table span.has-issue {
|
||||
color: #dc2626 !important;
|
||||
font-weight: 600;
|
||||
}
|
||||
.forensics-table td.highlighted,
|
||||
.forensics-table span.highlighted {
|
||||
color: #dc2626 !important;
|
||||
font-weight: 700;
|
||||
background: #fef2f2 !important;
|
||||
}
|
||||
`}</style>
|
||||
<div
|
||||
className="forensics-table-wrapper"
|
||||
dangerouslySetInnerHTML={{
|
||||
__html: addHighlightToHtml(table.html || '', highlightedCell, issues)
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
|
||||
{/* 表格问题 */}
|
||||
{issues.length > 0 && (
|
||||
<div className="px-4 pb-4">
|
||||
<div className="bg-slate-50 rounded-lg p-3 space-y-2">
|
||||
<p className="text-xs font-semibold text-slate-500 uppercase tracking-wider">
|
||||
该表格发现的问题
|
||||
</p>
|
||||
{issues.map((issue, idx) => (
|
||||
<div key={idx} className="flex items-start gap-2 text-sm">
|
||||
{issue.severity === 'ERROR' ? (
|
||||
<XCircle className="w-4 h-4 text-red-500 flex-shrink-0 mt-0.5" />
|
||||
) : (
|
||||
<AlertTriangle className="w-4 h-4 text-amber-500 flex-shrink-0 mt-0.5" />
|
||||
)}
|
||||
<span className="text-slate-700">{issue.message}</span>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* 给 HTML 表格添加高亮样式
|
||||
* 支持两种坐标:
|
||||
* - data-coord="R5C4" - 单元格级别
|
||||
* - data-subcoord="R5C4S2" - 子行级别(用于多行单元格)
|
||||
*/
|
||||
function addHighlightToHtml(
|
||||
html: string,
|
||||
highlightedCell: string | null,
|
||||
issues: ForensicsIssue[]
|
||||
): string {
|
||||
let result = html;
|
||||
|
||||
// 给有问题的元素添加 has-issue 类
|
||||
for (const issue of issues) {
|
||||
if (issue.location?.cellRef) {
|
||||
const cellRef = issue.location.cellRef;
|
||||
|
||||
// 检查是否包含子行坐标 (如 R5C4S2)
|
||||
if (cellRef.includes('S')) {
|
||||
// 子行级别高亮:匹配 data-subcoord
|
||||
result = result.replace(
|
||||
new RegExp(`data-subcoord="${cellRef}"`, 'g'),
|
||||
`data-subcoord="${cellRef}" class="has-issue"`
|
||||
);
|
||||
} else {
|
||||
// 单元格级别高亮:匹配 data-coord(向后兼容)
|
||||
result = result.replace(
|
||||
new RegExp(`data-coord="${cellRef}"(?![S\\d])`, 'g'),
|
||||
`data-coord="${cellRef}" class="has-issue"`
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 给用户点击高亮的元素添加 highlighted 类
|
||||
if (highlightedCell) {
|
||||
if (highlightedCell.includes('S')) {
|
||||
result = result.replace(
|
||||
new RegExp(`data-subcoord="${highlightedCell}"(\\s+class="[^"]*")?`, 'g'),
|
||||
(match, existingClass) => {
|
||||
if (existingClass) {
|
||||
return match.replace('class="', 'class="highlighted ');
|
||||
}
|
||||
return `data-subcoord="${highlightedCell}" class="highlighted"`;
|
||||
}
|
||||
);
|
||||
} else {
|
||||
result = result.replace(
|
||||
new RegExp(`data-coord="${highlightedCell}"(\\s+class="[^"]*")?`, 'g'),
|
||||
(match, existingClass) => {
|
||||
if (existingClass) {
|
||||
return match.replace('class="', 'class="highlighted ');
|
||||
}
|
||||
return `data-coord="${highlightedCell}" class="highlighted"`;
|
||||
}
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
@@ -1,8 +1,8 @@
|
||||
/**
|
||||
* Dashboard头部组件
|
||||
*/
|
||||
import { useRef } from 'react';
|
||||
import { BrainCircuit, UploadCloud } from 'lucide-react';
|
||||
import { useRef, useState } from 'react';
|
||||
import { BrainCircuit, UploadCloud, Info, X } from 'lucide-react';
|
||||
|
||||
interface HeaderProps {
|
||||
onUpload: (files: FileList) => void;
|
||||
@@ -10,6 +10,7 @@ interface HeaderProps {
|
||||
|
||||
export default function Header({ onUpload }: HeaderProps) {
|
||||
const fileInputRef = useRef<HTMLInputElement>(null);
|
||||
const [showTip, setShowTip] = useState(true);
|
||||
|
||||
const handleFileChange = (e: React.ChangeEvent<HTMLInputElement>) => {
|
||||
if (e.target.files && e.target.files.length > 0) {
|
||||
@@ -20,36 +21,56 @@ export default function Header({ onUpload }: HeaderProps) {
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="flex justify-between items-center mb-6">
|
||||
{/* Logo区域 */}
|
||||
<div className="flex items-center gap-3">
|
||||
<div className="bg-indigo-50 p-2 rounded-lg text-indigo-700">
|
||||
<BrainCircuit className="w-6 h-6" />
|
||||
<div className="mb-6">
|
||||
<div className="flex justify-between items-center">
|
||||
{/* Logo区域 */}
|
||||
<div className="flex items-center gap-3">
|
||||
<div className="bg-indigo-50 p-2 rounded-lg text-indigo-700">
|
||||
<BrainCircuit className="w-6 h-6" />
|
||||
</div>
|
||||
<div>
|
||||
<h1 className="text-xl font-bold text-slate-800">智能审稿系统</h1>
|
||||
<p className="text-xs text-slate-500">当前工作区:编辑部初审组</p>
|
||||
</div>
|
||||
</div>
|
||||
<div>
|
||||
<h1 className="text-xl font-bold text-slate-800">智能审稿系统</h1>
|
||||
<p className="text-xs text-slate-500">当前工作区:编辑部初审组</p>
|
||||
|
||||
{/* 上传按钮 */}
|
||||
<div className="flex gap-3">
|
||||
<input
|
||||
ref={fileInputRef}
|
||||
type="file"
|
||||
multiple
|
||||
accept=".pdf,.doc,.docx"
|
||||
className="hidden"
|
||||
onChange={handleFileChange}
|
||||
/>
|
||||
<button
|
||||
onClick={() => fileInputRef.current?.click()}
|
||||
className="px-5 py-2.5 bg-indigo-600 hover:bg-indigo-700 text-white rounded-lg text-sm font-bold flex items-center gap-2 shadow-sm transition-all hover:-translate-y-0.5"
|
||||
>
|
||||
<UploadCloud className="w-4 h-4" />
|
||||
上传新稿件
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* 上传按钮 */}
|
||||
<div className="flex gap-3">
|
||||
<input
|
||||
ref={fileInputRef}
|
||||
type="file"
|
||||
multiple
|
||||
accept=".pdf,.doc,.docx"
|
||||
className="hidden"
|
||||
onChange={handleFileChange}
|
||||
/>
|
||||
<button
|
||||
onClick={() => fileInputRef.current?.click()}
|
||||
className="px-5 py-2.5 bg-indigo-600 hover:bg-indigo-700 text-white rounded-lg text-sm font-bold flex items-center gap-2 shadow-sm transition-all hover:-translate-y-0.5"
|
||||
>
|
||||
<UploadCloud className="w-4 h-4" />
|
||||
上传新稿件
|
||||
</button>
|
||||
</div>
|
||||
{/* 文件格式提示 */}
|
||||
{showTip && (
|
||||
<div className="mt-3 flex items-start gap-2 p-3 bg-blue-50 border border-blue-200 rounded-lg text-sm">
|
||||
<Info className="w-4 h-4 text-blue-500 mt-0.5 flex-shrink-0" />
|
||||
<div className="flex-1 text-blue-700">
|
||||
<span className="font-medium">推荐上传 .docx 格式文件</span>
|
||||
<span className="text-blue-600">,可获得完整的数据验证功能(表格算术校验、P值验证等)。</span>
|
||||
<span className="text-blue-500">PDF 和 .doc 格式仅支持稿约和方法学评审。</span>
|
||||
</div>
|
||||
<button
|
||||
onClick={() => setShowTip(false)}
|
||||
className="text-blue-400 hover:text-blue-600"
|
||||
>
|
||||
<X className="w-4 h-4" />
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -47,7 +47,7 @@ export default function MethodologyReport({ data }: MethodologyReportProps) {
|
||||
<div className="flex flex-col items-center">
|
||||
<div className={`w-24 h-24 rounded-full border-4 ${grade.bg.replace('bg-', 'border-')} flex items-center justify-center bg-white shadow-lg`}>
|
||||
<div className="text-center">
|
||||
<span className={`text-3xl font-bold ${grade.color}`}>{data.overall_score}</span>
|
||||
<span className={`text-3xl font-bold ${grade.color}`}>{Number(data.overall_score).toFixed(1)}</span>
|
||||
<span className="text-xs text-slate-400 block">分</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -2,24 +2,45 @@
|
||||
* 报告详情页组件
|
||||
*/
|
||||
import { useState } from 'react';
|
||||
import { ArrowLeft, FileCheck, Tag } from 'lucide-react';
|
||||
import { ArrowLeft, FileCheck, Tag, Info } from 'lucide-react';
|
||||
import type { ReviewReport } from '../types';
|
||||
import EditorialReport from './EditorialReport';
|
||||
import MethodologyReport from './MethodologyReport';
|
||||
import ForensicsReport from './ForensicsReport';
|
||||
|
||||
interface ReportDetailProps {
|
||||
report: ReviewReport;
|
||||
onBack: () => void;
|
||||
}
|
||||
|
||||
type TabType = 'editorial' | 'methodology' | 'forensics';
|
||||
|
||||
export default function ReportDetail({ report, onBack }: ReportDetailProps) {
|
||||
const [activeTab, setActiveTab] = useState<'editorial' | 'methodology'>('editorial');
|
||||
const [activeTab, setActiveTab] = useState<TabType>('editorial');
|
||||
|
||||
const hasEditorial = !!report.editorialReview;
|
||||
const hasMethodology = !!report.methodologyReview;
|
||||
const hasForensics = !!report.forensicsResult;
|
||||
|
||||
// 检查文件格式:非 .docx 文件无法进行数据验证
|
||||
const fileName = report.fileName || '';
|
||||
const isDocx = fileName.toLowerCase().endsWith('.docx');
|
||||
const isPdf = fileName.toLowerCase().endsWith('.pdf');
|
||||
const isDoc = fileName.toLowerCase().endsWith('.doc');
|
||||
const showNoForensicsTip = !hasForensics && (hasEditorial || hasMethodology) && (isPdf || isDoc);
|
||||
|
||||
// 如果只有方法学,默认显示方法学
|
||||
const effectiveTab = activeTab === 'editorial' && !hasEditorial && hasMethodology ? 'methodology' : activeTab;
|
||||
// 智能默认 Tab 选择
|
||||
const getEffectiveTab = (): TabType => {
|
||||
if (activeTab === 'editorial' && hasEditorial) return 'editorial';
|
||||
if (activeTab === 'methodology' && hasMethodology) return 'methodology';
|
||||
if (activeTab === 'forensics' && hasForensics) return 'forensics';
|
||||
// 默认优先级:editorial > methodology > forensics
|
||||
if (hasEditorial) return 'editorial';
|
||||
if (hasMethodology) return 'methodology';
|
||||
if (hasForensics) return 'forensics';
|
||||
return 'editorial';
|
||||
};
|
||||
const effectiveTab = getEffectiveTab();
|
||||
|
||||
return (
|
||||
<div className="flex-1 flex flex-col h-full bg-slate-50 relative fade-in">
|
||||
@@ -37,12 +58,12 @@ export default function ReportDetail({ report, onBack }: ReportDetailProps) {
|
||||
<div>
|
||||
<h1 className="text-base font-bold text-slate-800 flex items-center gap-2">
|
||||
{report.fileName}
|
||||
{report.overallScore && (
|
||||
{report.overallScore != null && (
|
||||
<span className={`tag ${
|
||||
report.overallScore >= 80 ? 'tag-green' :
|
||||
report.overallScore >= 60 ? 'tag-amber' : 'tag-red'
|
||||
}`}>
|
||||
{report.overallScore}分
|
||||
{Number(report.overallScore).toFixed(1)}分
|
||||
</span>
|
||||
)}
|
||||
</h1>
|
||||
@@ -59,7 +80,7 @@ export default function ReportDetail({ report, onBack }: ReportDetailProps) {
|
||||
{/* 内容区域 */}
|
||||
<div className="flex-1 overflow-auto p-8 max-w-5xl mx-auto w-full">
|
||||
{/* Tab切换 */}
|
||||
{(hasEditorial || hasMethodology) && (
|
||||
{(hasEditorial || hasMethodology || hasForensics) && (
|
||||
<div className="flex gap-1 bg-slate-200/50 p-1 rounded-lg mb-8 w-fit mx-auto">
|
||||
{hasEditorial && (
|
||||
<button
|
||||
@@ -85,6 +106,30 @@ export default function ReportDetail({ report, onBack }: ReportDetailProps) {
|
||||
方法学评估 ({report.methodologyReview?.overall_score}分)
|
||||
</button>
|
||||
)}
|
||||
{hasForensics && (
|
||||
<button
|
||||
onClick={() => setActiveTab('forensics')}
|
||||
className={`px-6 py-2 rounded-md text-sm transition-all ${
|
||||
effectiveTab === 'forensics'
|
||||
? 'font-bold bg-white text-indigo-600 shadow-sm'
|
||||
: 'font-medium text-slate-500 hover:text-slate-700'
|
||||
}`}
|
||||
>
|
||||
数据验证 ({report.forensicsResult?.summary.totalIssues || 0}个问题)
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* 非 docx 文件无数据验证提示 */}
|
||||
{showNoForensicsTip && (
|
||||
<div className="mb-4 flex items-start gap-2 p-3 bg-amber-50 border border-amber-200 rounded-lg text-sm">
|
||||
<Info className="w-4 h-4 text-amber-500 mt-0.5 flex-shrink-0" />
|
||||
<div className="text-amber-700">
|
||||
<span className="font-medium">当前文件为 {isPdf ? 'PDF' : '.doc'} 格式,</span>
|
||||
<span>无法进行数据验证(表格算术校验、P值验证等)。</span>
|
||||
<span className="text-amber-600">如需数据验证功能,请上传 .docx 格式文件。</span>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
@@ -95,9 +140,12 @@ export default function ReportDetail({ report, onBack }: ReportDetailProps) {
|
||||
{effectiveTab === 'methodology' && report.methodologyReview && (
|
||||
<MethodologyReport data={report.methodologyReview} />
|
||||
)}
|
||||
{effectiveTab === 'forensics' && report.forensicsResult && (
|
||||
<ForensicsReport data={report.forensicsResult} />
|
||||
)}
|
||||
|
||||
{/* 无数据状态 */}
|
||||
{!hasEditorial && !hasMethodology && (
|
||||
{!hasEditorial && !hasMethodology && !hasForensics && (
|
||||
<div className="text-center py-12 text-slate-500">
|
||||
<Tag className="w-12 h-12 mx-auto mb-4 text-slate-300" />
|
||||
<p>暂无评估报告</p>
|
||||
|
||||
@@ -3,15 +3,18 @@
|
||||
* 支持显示审稿进度和结果
|
||||
*/
|
||||
import { useState, useEffect } from 'react';
|
||||
import { ArrowLeft, FileCheck, Clock, AlertCircle, CheckCircle, Loader2, FileText, Bot } from 'lucide-react';
|
||||
import { ArrowLeft, FileCheck, Clock, AlertCircle, CheckCircle, Loader2, FileText, Bot, Info } from 'lucide-react';
|
||||
import { Document, Packer, Paragraph, TextRun, HeadingLevel, AlignmentType, Table, TableRow, TableCell, WidthType } from 'docx';
|
||||
import { saveAs } from 'file-saver';
|
||||
import type { ReviewTask, ReviewReport, TaskStatus } from '../types';
|
||||
import EditorialReport from './EditorialReport';
|
||||
import MethodologyReport from './MethodologyReport';
|
||||
import ForensicsReport from './ForensicsReport';
|
||||
import * as api from '../api';
|
||||
import { message } from 'antd';
|
||||
|
||||
type TabType = 'editorial' | 'methodology' | 'forensics';
|
||||
|
||||
interface TaskDetailProps {
|
||||
task: ReviewTask;
|
||||
jobId?: string | null; // pg-boss 任务ID(可选,用于更精确的状态轮询)
|
||||
@@ -49,7 +52,7 @@ const getProgressSteps = (selectedAgents: string[]) => {
|
||||
export default function TaskDetail({ task: initialTask, jobId, onBack }: TaskDetailProps) {
|
||||
const [task, setTask] = useState<ReviewTask>(initialTask);
|
||||
const [report, setReport] = useState<ReviewReport | null>(null);
|
||||
const [activeTab, setActiveTab] = useState<'editorial' | 'methodology'>('editorial');
|
||||
const [activeTab, setActiveTab] = useState<TabType>('editorial');
|
||||
const [elapsedTime, setElapsedTime] = useState(0);
|
||||
|
||||
// Suppress unused variable warning - jobId is reserved for future use
|
||||
@@ -110,6 +113,8 @@ export default function TaskDetail({ task: initialTask, jobId, onBack }: TaskDet
|
||||
setActiveTab('editorial');
|
||||
} else if (report.methodologyReview) {
|
||||
setActiveTab('methodology');
|
||||
} else if (report.forensicsResult) {
|
||||
setActiveTab('forensics');
|
||||
}
|
||||
}
|
||||
}, [report]);
|
||||
@@ -196,7 +201,7 @@ export default function TaskDetail({ task: initialTask, jobId, onBack }: TaskDet
|
||||
width: { size: 2000, type: WidthType.DXA },
|
||||
}),
|
||||
new TableCell({
|
||||
children: [new Paragraph(`${report.overallScore || '-'} 分`)],
|
||||
children: [new Paragraph(`${report.overallScore != null ? Number(report.overallScore).toFixed(1) : '-'} 分`)],
|
||||
width: { size: 7000, type: WidthType.DXA },
|
||||
}),
|
||||
],
|
||||
@@ -532,7 +537,7 @@ export default function TaskDetail({ task: initialTask, jobId, onBack }: TaskDet
|
||||
审查用时 {report.durationSeconds ? formatTime(report.durationSeconds) : '-'}
|
||||
</p>
|
||||
</div>
|
||||
<div className="text-5xl font-bold">{report.overallScore || '-'}</div>
|
||||
<div className="text-5xl font-bold">{report.overallScore != null ? Number(report.overallScore).toFixed(1) : '-'}</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -562,7 +567,39 @@ export default function TaskDetail({ task: initialTask, jobId, onBack }: TaskDet
|
||||
方法学评估 ({report.methodologyReview.overall_score}分)
|
||||
</button>
|
||||
)}
|
||||
{report.forensicsResult && (
|
||||
<button
|
||||
onClick={() => setActiveTab('forensics')}
|
||||
className={`px-6 py-2 rounded-md text-sm transition-all ${
|
||||
activeTab === 'forensics'
|
||||
? 'font-bold bg-white text-indigo-600 shadow-sm'
|
||||
: 'font-medium text-slate-500 hover:text-slate-700'
|
||||
}`}
|
||||
>
|
||||
数据验证 ({report.forensicsResult.summary.totalIssues || 0}个问题)
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* 非 docx 文件无数据验证提示 */}
|
||||
{!report.forensicsResult && (report.editorialReview || report.methodologyReview) && (() => {
|
||||
const fileName = task.fileName || '';
|
||||
const isPdf = fileName.toLowerCase().endsWith('.pdf');
|
||||
const isDoc = fileName.toLowerCase().endsWith('.doc');
|
||||
if (isPdf || isDoc) {
|
||||
return (
|
||||
<div className="mb-4 flex items-start gap-2 p-3 bg-amber-50 border border-amber-200 rounded-lg text-sm">
|
||||
<Info className="w-4 h-4 text-amber-500 mt-0.5 flex-shrink-0" />
|
||||
<div className="text-amber-700">
|
||||
<span className="font-medium">当前文件为 {isPdf ? 'PDF' : '.doc'} 格式,</span>
|
||||
<span>无法进行数据验证(表格算术校验、P值验证等)。</span>
|
||||
<span className="text-amber-600">如需数据验证功能,请上传 .docx 格式文件。</span>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
return null;
|
||||
})()}
|
||||
|
||||
{/* 报告内容 */}
|
||||
{activeTab === 'editorial' && report.editorialReview && (
|
||||
@@ -571,6 +608,9 @@ export default function TaskDetail({ task: initialTask, jobId, onBack }: TaskDet
|
||||
{activeTab === 'methodology' && report.methodologyReview && (
|
||||
<MethodologyReport data={report.methodologyReview} />
|
||||
)}
|
||||
{activeTab === 'forensics' && report.forensicsResult && (
|
||||
<ForensicsReport data={report.forensicsResult} />
|
||||
)}
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
|
||||
@@ -72,10 +72,51 @@ export interface MethodologyReviewResult {
|
||||
parts: MethodologyPart[];
|
||||
}
|
||||
|
||||
// 数据验证问题
|
||||
export interface ForensicsIssue {
|
||||
severity: 'ERROR' | 'WARNING' | 'INFO';
|
||||
type: string;
|
||||
message: string;
|
||||
location?: {
|
||||
tableId?: string;
|
||||
cellRef?: string;
|
||||
paragraph?: number;
|
||||
};
|
||||
evidence?: Record<string, unknown>;
|
||||
}
|
||||
|
||||
// 表格数据
|
||||
export interface ForensicsTable {
|
||||
id: string;
|
||||
caption: string;
|
||||
html: string;
|
||||
data: string[][];
|
||||
headers: string[];
|
||||
rowCount: number;
|
||||
colCount: number;
|
||||
skipped?: boolean;
|
||||
skipReason?: string;
|
||||
issues: ForensicsIssue[];
|
||||
}
|
||||
|
||||
// 数据验证结果
|
||||
export interface ForensicsResult {
|
||||
tables: ForensicsTable[];
|
||||
methods: string[];
|
||||
issues: ForensicsIssue[];
|
||||
summary: {
|
||||
totalTables: number;
|
||||
totalIssues: number;
|
||||
errorCount: number;
|
||||
warningCount: number;
|
||||
};
|
||||
}
|
||||
|
||||
// 完整审查报告
|
||||
export interface ReviewReport extends ReviewTask {
|
||||
editorialReview?: EditorialReviewResult;
|
||||
methodologyReview?: MethodologyReviewResult;
|
||||
forensicsResult?: ForensicsResult;
|
||||
modelUsed?: string;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user