From f9ed0c25289bb41b8d323edda3eb4f716c72afd2 Mon Sep 17 00:00:00 2001 From: HaHafeng Date: Wed, 18 Feb 2026 18:26:16 +0800 Subject: [PATCH] feat(rvw): Complete V2.0 Week 3 - Statistical validation extension and UX improvements Week 3 Development Summary: - Implement negative sign normalization (6 Unicode variants) - Enhance T-test validation with smart sample size extraction - Enhance SE triangle and CI-P consistency validation with subrow support - Add precise sub-cell highlighting for P-values in multi-line cells - Add frontend issue type Chinese translations (6 new types) - Add file format tips for PDF/DOC uploads Technical improvements: - Add _clean_statistical_text() in extractor.py - Add _safe_float() wrapper in validator.py - Add ForensicsReport.tsx component - Update ISSUE_TYPE_LABELS translations Documentation: - Add 2026-02-18 development record - Update RVW module status (v5.1) - Update system status (v5.2) Status: Week 3 complete, ready for Week 4 testing Co-authored-by: Cursor --- .../src/common/document/ExtractionClient.ts | 3 +- .../rvw/controllers/reviewController.ts | 16 +- .../src/modules/rvw/services/reviewService.ts | 59 +- backend/src/modules/rvw/services/utils.ts | 13 +- .../src/modules/rvw/skills/core/context.ts | 10 +- .../src/modules/rvw/skills/core/executor.ts | 40 +- .../src/modules/rvw/skills/core/profile.ts | 20 +- .../src/modules/rvw/skills/core/registry.ts | 8 +- backend/src/modules/rvw/skills/core/types.ts | 2 +- .../modules/rvw/skills/library/BaseSkill.ts | 31 +- .../rvw/skills/library/DataForensicsSkill.ts | 226 +++-- .../rvw/skills/library/EditorialSkill.ts | 27 +- .../rvw/skills/library/MethodologySkill.ts | 31 +- backend/src/modules/rvw/skills/test-skills.ts | 103 ++ backend/src/modules/rvw/types/index.ts | 40 + .../src/modules/rvw/workers/reviewWorker.ts | 50 +- .../00-系统当前状态与开发指南.md | 18 +- .../RVW-稿件审查系统/00-模块当前状态与开发指南.md | 68 +- ...级医院卒中中心急性缺血性卒中静脉溶栓指标分析_定稿0314 - 副本.docx | Bin 0 -> 58731 bytes .../2026-02-18 统计验证扩展与用户体验优化.md | 184 ++++ .../RVW V2.0 表格提取疑难杂症专项解决方案.md | 137 +++ .../06-开发记录/临床统计特殊符号提取白皮书.md | 201 ++++ .../RVW-稿件审查系统/06-开发记录/务实版.md | 149 +++ extraction_service/forensics/api.py | 2 +- extraction_service/forensics/config.py | 6 + extraction_service/forensics/extractor.py | 162 +++- extraction_service/forensics/types.py | 51 +- extraction_service/forensics/validator.py | 909 +++++++++++++----- extraction_service/main.py | 4 +- .../rvw/components/EditorialReport.tsx | 2 +- .../rvw/components/ForensicsReport.tsx | 487 ++++++++++ .../src/modules/rvw/components/Header.tsx | 77 +- .../rvw/components/MethodologyReport.tsx | 2 +- .../modules/rvw/components/ReportDetail.tsx | 64 +- .../src/modules/rvw/components/TaskDetail.tsx | 48 +- frontend-v2/src/modules/rvw/types/index.ts | 41 + 36 files changed, 2790 insertions(+), 501 deletions(-) create mode 100644 backend/src/modules/rvw/skills/test-skills.ts create mode 100644 docs/03-业务模块/RVW-稿件审查系统/05-测试文档/Test_刘锦_2019—2022年昆明市二、三级医院卒中中心急性缺血性卒中静脉溶栓指标分析_定稿0314 - 副本.docx create mode 100644 docs/03-业务模块/RVW-稿件审查系统/06-开发记录/2026-02-18 统计验证扩展与用户体验优化.md create mode 100644 docs/03-业务模块/RVW-稿件审查系统/06-开发记录/RVW V2.0 表格提取疑难杂症专项解决方案.md create mode 100644 docs/03-业务模块/RVW-稿件审查系统/06-开发记录/临床统计特殊符号提取白皮书.md create mode 100644 docs/03-业务模块/RVW-稿件审查系统/06-开发记录/务实版.md create mode 100644 frontend-v2/src/modules/rvw/components/ForensicsReport.tsx diff --git a/backend/src/common/document/ExtractionClient.ts b/backend/src/common/document/ExtractionClient.ts index ab2bfd07..67302dae 100644 --- a/backend/src/common/document/ExtractionClient.ts +++ b/backend/src/common/document/ExtractionClient.ts @@ -54,6 +54,7 @@ export interface ForensicsTable { headers?: string[]; rowCount: number; colCount: number; + issues?: ForensicsIssue[]; // 每个表格的问题列表 } export interface ForensicsIssue { @@ -354,7 +355,7 @@ class ExtractionClient implements IExtractionClient { formData.append('tolerance_percent', config.tolerancePercent.toString()); const response = await axios.post( - `${this.baseUrl}/api/v1/forensics/analyze`, + `${this.baseUrl}/api/v1/forensics/analyze_docx`, formData, { headers: { diff --git a/backend/src/modules/rvw/controllers/reviewController.ts b/backend/src/modules/rvw/controllers/reviewController.ts index c6258959..be2d47b8 100644 --- a/backend/src/modules/rvw/controllers/reviewController.ts +++ b/backend/src/modules/rvw/controllers/reviewController.ts @@ -27,6 +27,17 @@ function getUserId(request: FastifyRequest): string { return userId; } +/** + * 获取租户ID(从JWT Token中获取) + */ +function getTenantId(request: FastifyRequest): string { + const tenantId = (request as any).user?.tenantId; + if (!tenantId) { + throw new Error('Tenant not found'); + } + return tenantId; +} + // ==================== 任务创建 ==================== /** @@ -43,7 +54,8 @@ export async function createTask( ) { try { const userId = getUserId(request); - logger.info('[RVW:Controller] 上传稿件', { userId }); + const tenantId = getTenantId(request); + logger.info('[RVW:Controller] 上传稿件', { userId, tenantId }); // 获取上传的文件 const data = await request.file(); @@ -105,7 +117,7 @@ export async function createTask( } // 创建任务 - const task = await reviewService.createTask(file, filename, userId, modelType); + const task = await reviewService.createTask(file, filename, userId, tenantId, modelType); logger.info('[RVW:Controller] 任务已创建', { taskId: task.id }); diff --git a/backend/src/modules/rvw/services/reviewService.ts b/backend/src/modules/rvw/services/reviewService.ts index 5f04f5d1..da4f97b2 100644 --- a/backend/src/modules/rvw/services/reviewService.ts +++ b/backend/src/modules/rvw/services/reviewService.ts @@ -17,6 +17,24 @@ import { ModelType } from '../../../common/llm/adapters/types.js'; import { logger } from '../../../common/logging/index.js'; import { jobQueue } from '../../../common/jobs/index.js'; import { Prisma } from '@prisma/client'; +import { storage } from '../../../common/storage/index.js'; +import { randomUUID } from 'crypto'; +import path from 'path'; + +/** + * 生成 RVW 模块的 OSS 存储 Key + * 格式: tenants/{tenantId}/users/{userId}/rvw/{taskId}/{filename} + */ +function generateRvwStorageKey( + tenantId: string, + userId: string, + taskId: string, + filename: string +): string { + const uuid = randomUUID().replace(/-/g, '').substring(0, 16); + const ext = path.extname(filename).toLowerCase(); + return `tenants/${tenantId}/users/${userId}/rvw/${taskId}/${uuid}${ext}`; +} import { AgentType, TaskStatus, @@ -44,6 +62,7 @@ import { * @param file 文件Buffer * @param filename 文件名 * @param userId 用户ID + * @param tenantId 租户ID * @param modelType 模型类型 * @returns 创建的任务 */ @@ -51,11 +70,12 @@ export async function createTask( file: Buffer, filename: string, userId: string, + tenantId: string, modelType: ModelType = 'deepseek-v3' ) { - logger.info('[RVW] 创建审查任务', { filename, userId, modelType }); + logger.info('[RVW] 创建审查任务', { filename, userId, tenantId, modelType }); - // 创建任务记录(状态为pending,等待用户选择智能体后运行) + // 1. 先创建任务记录获取 taskId const task = await prisma.reviewTask.create({ data: { userId, @@ -70,12 +90,37 @@ export async function createTask( logger.info('[RVW] 任务已创建', { taskId: task.id, status: task.status }); - // 异步提取文档文本(预处理,不运行评估) + // 2. 生成 OSS 存储 Key 并上传文件 + const storageKey = generateRvwStorageKey(tenantId, userId, task.id, filename); + let updatedTask = task; + + try { + logger.info('[RVW] 开始上传文件到存储', { taskId: task.id, storageKey }); + await storage.upload(storageKey, file); + logger.info('[RVW] 文件已上传到存储', { taskId: task.id, storageKey }); + + // 3. 更新任务的 filePath 字段 + updatedTask = await prisma.reviewTask.update({ + where: { id: task.id }, + data: { filePath: storageKey }, + }); + logger.info('[RVW] 任务 filePath 已更新', { taskId: task.id, filePath: storageKey }); + } catch (uploadError) { + logger.error('[RVW] 文件上传失败', { + taskId: task.id, + storageKey, + error: uploadError instanceof Error ? uploadError.message : 'Unknown error', + stack: uploadError instanceof Error ? uploadError.stack : undefined, + }); + // 上传失败不阻塞任务创建,DataForensicsSkill 会优雅降级 + } + + // 4. 异步提取文档文本(预处理,不运行评估) extractDocumentAsync(task.id, file, filename).catch(error => { logger.error('[RVW] 文档提取失败', { taskId: task.id, error: error.message }); }); - return task; + return updatedTask; } /** @@ -191,6 +236,7 @@ export async function runReview(params: RunReviewParams): Promise<{ jobId: strin agents, extractedText: task.extractedText, modelType: (task.modelUsed || 'deepseek-v3') as ModelType, + __expireInSeconds: 10 * 60, // 10分钟超时(审稿任务通常2-3分钟完成) }); logger.info('[RVW] 审查任务已推送到队列', { @@ -364,6 +410,10 @@ export async function getTaskReport(userId: string, taskId: string): Promise { ...initialContext, profile, previousResults: [], - } as TContext; + } as unknown as TContext; - logger.info({ + logger.info('[SkillExecutor] Starting pipeline execution', { taskId: context.taskId, profileId: profile.id, pipelineLength: profile.pipeline.length, - }, '[SkillExecutor] Starting pipeline execution'); + }); // 遍历 Pipeline for (const item of profile.pipeline) { // 跳过禁用的 Skill if (!item.enabled) { - logger.debug({ skillId: item.skillId }, '[SkillExecutor] Skill disabled, skipping'); + logger.debug('[SkillExecutor] Skill disabled, skipping', { skillId: item.skillId }); results.push(this.createSkippedResult(item.skillId, 'Skill disabled in profile')); continue; } @@ -78,20 +78,20 @@ export class SkillExecutor { // 获取 Skill const skill = SkillRegistry.get(item.skillId); if (!skill) { - logger.warn({ skillId: item.skillId }, '[SkillExecutor] Skill not found in registry'); + logger.warn('[SkillExecutor] Skill not found in registry', { skillId: item.skillId }); results.push(this.createSkippedResult(item.skillId, 'Skill not found')); continue; } // 前置检查 - if (skill.canRun && !skill.canRun(context as SkillContext)) { - logger.info({ skillId: item.skillId }, '[SkillExecutor] Skill pre-check failed, skipping'); + if (skill.canRun && !skill.canRun(context as unknown as SkillContext)) { + logger.info('[SkillExecutor] Skill pre-check failed, skipping', { skillId: item.skillId }); results.push(this.createSkippedResult(item.skillId, 'Pre-check failed')); continue; } // 执行 Skill - const result = await this.executeSkill(skill, context as SkillContext, item, profile); + const result = await this.executeSkill(skill, context as unknown as SkillContext, item, profile); results.push(result); // 调用完成回调(V2.1 扩展点) @@ -100,7 +100,7 @@ export class SkillExecutor { await this.config.onSkillComplete(item.skillId, result, context); } catch (callbackError: unknown) { const errorMessage = callbackError instanceof Error ? callbackError.message : String(callbackError); - logger.error({ skillId: item.skillId, error: errorMessage }, '[SkillExecutor] onSkillComplete callback failed'); + logger.error('[SkillExecutor] onSkillComplete callback failed', { skillId: item.skillId, error: errorMessage }); } } @@ -112,7 +112,7 @@ export class SkillExecutor { // 检查是否需要中断 if (result.status === 'error' && !this.shouldContinue(item, profile)) { - logger.warn({ skillId: item.skillId }, '[SkillExecutor] Skill failed and continueOnError=false, stopping'); + logger.warn('[SkillExecutor] Skill failed and continueOnError=false, stopping', { skillId: item.skillId }); break; } } @@ -120,13 +120,13 @@ export class SkillExecutor { // 生成汇总 const summary = this.buildSummary(context.taskId, profile.id, results, startTime); - logger.info({ + logger.info('[SkillExecutor] Pipeline execution completed', { taskId: context.taskId, overallStatus: summary.overallStatus, totalTime: summary.totalExecutionTime, successCount: summary.successCount, errorCount: summary.errorCount, - }, '[SkillExecutor] Pipeline execution completed'); + }); return summary; } @@ -144,23 +144,23 @@ export class SkillExecutor { const timeoutMultiplier = profile.globalConfig?.timeoutMultiplier ?? 1; const timeout = Math.round((item.timeout ?? skill.metadata.defaultTimeout ?? this.config.defaultTimeout) * timeoutMultiplier); - logger.info({ + logger.info('[SkillExecutor] Executing skill', { skillId: skill.metadata.id, taskId: context.taskId, timeout, - }, '[SkillExecutor] Executing skill'); + }); try { // 带超时执行 const result = await this.executeWithTimeout(skill, context, item.config, timeout); - logger.info({ + logger.info('[SkillExecutor] Skill execution completed', { skillId: skill.metadata.id, taskId: context.taskId, status: result.status, executionTime: result.executionTime, issueCount: result.issues.length, - }, '[SkillExecutor] Skill execution completed'); + }); return result; } catch (error: unknown) { @@ -169,11 +169,11 @@ export class SkillExecutor { // 判断是否超时 if (errorMessage === 'SKILL_TIMEOUT') { - logger.warn({ + logger.warn('[SkillExecutor] Skill execution timed out', { skillId: skill.metadata.id, taskId: context.taskId, timeout, - }, '[SkillExecutor] Skill execution timed out'); + }); return { skillId: skill.metadata.id, @@ -192,11 +192,11 @@ export class SkillExecutor { } // 其他错误 - logger.error({ + logger.error('[SkillExecutor] Skill execution failed', { skillId: skill.metadata.id, taskId: context.taskId, error: errorMessage, - }, '[SkillExecutor] Skill execution failed'); + }); return { skillId: skill.metadata.id, diff --git a/backend/src/modules/rvw/skills/core/profile.ts b/backend/src/modules/rvw/skills/core/profile.ts index 61aedd80..31546a46 100644 --- a/backend/src/modules/rvw/skills/core/profile.ts +++ b/backend/src/modules/rvw/skills/core/profile.ts @@ -7,7 +7,7 @@ * @since 2026-02-18 */ -import { JournalProfile, PipelineItem } from './types.js'; +import { JournalProfile } from './types.js'; import { logger } from '../../../../common/logging/index.js'; /** @@ -34,13 +34,13 @@ export const DEFAULT_PROFILE: JournalProfile = { skillId: 'EditorialSkill', enabled: true, optional: false, - timeout: 45000, + timeout: 180000, // 180 秒 }, { skillId: 'MethodologySkill', enabled: true, optional: false, - timeout: 45000, + timeout: 180000, // 180 秒 }, ], @@ -78,13 +78,13 @@ export const CHINESE_CORE_PROFILE: JournalProfile = { config: { standard: 'chinese-core', }, - timeout: 45000, + timeout: 180000, // 180 秒 }, { skillId: 'MethodologySkill', enabled: true, optional: false, - timeout: 45000, + timeout: 180000, // 180 秒 }, ], @@ -154,11 +154,11 @@ export class ProfileResolver { const profile = PROFILES.get(id); if (!profile) { - logger.warn({ profileId: id }, '[ProfileResolver] Profile not found, using default'); + logger.warn('[ProfileResolver] Profile not found, using default', { profileId: id }); return DEFAULT_PROFILE; } - logger.debug({ profileId: id }, '[ProfileResolver] Profile resolved'); + logger.debug('[ProfileResolver] Profile resolved', { profileId: id }); return profile; } @@ -196,10 +196,10 @@ export class ProfileResolver { enabled: enabledSkills.has(item.skillId), })); - logger.debug({ + logger.debug('[ProfileResolver] Profile built from agents', { selectedAgents, enabledSkills: Array.from(enabledSkills), - }, '[ProfileResolver] Profile built from agents'); + }); return baseProfile; } @@ -223,7 +223,7 @@ export class ProfileResolver { */ static register(profile: JournalProfile): void { PROFILES.set(profile.id, profile); - logger.info({ profileId: profile.id }, '[ProfileResolver] Profile registered'); + logger.info('[ProfileResolver] Profile registered', { profileId: profile.id }); } /** diff --git a/backend/src/modules/rvw/skills/core/registry.ts b/backend/src/modules/rvw/skills/core/registry.ts index f0796166..b642ea12 100644 --- a/backend/src/modules/rvw/skills/core/registry.ts +++ b/backend/src/modules/rvw/skills/core/registry.ts @@ -24,11 +24,11 @@ class SkillRegistryClass { const { id, version } = skill.metadata; if (this.skills.has(id)) { - logger.warn({ skillId: id }, '[SkillRegistry] Skill already registered, overwriting'); + logger.warn('[SkillRegistry] Skill already registered, overwriting', { skillId: id }); } this.skills.set(id, skill); - logger.info({ skillId: id, version }, '[SkillRegistry] Skill registered'); + logger.info('[SkillRegistry] Skill registered', { skillId: id, version }); } /** @@ -92,7 +92,7 @@ class SkillRegistryClass { unregister(id: string): boolean { const result = this.skills.delete(id); if (result) { - logger.info({ skillId: id }, '[SkillRegistry] Skill unregistered'); + logger.info('[SkillRegistry] Skill unregistered', { skillId: id }); } return result; } @@ -118,7 +118,7 @@ class SkillRegistryClass { */ markInitialized(): void { this.initialized = true; - logger.info({ skillCount: this.size }, '[SkillRegistry] Registry initialized'); + logger.info('[SkillRegistry] Registry initialized', { skillCount: this.size }); } /** diff --git a/backend/src/modules/rvw/skills/core/types.ts b/backend/src/modules/rvw/skills/core/types.ts index ff27ccab..67dff451 100644 --- a/backend/src/modules/rvw/skills/core/types.ts +++ b/backend/src/modules/rvw/skills/core/types.ts @@ -111,7 +111,7 @@ export interface ForensicsResult { * RVW 模块扩展字段 */ export interface RvwContextExtras { - documentPath: string; + documentPath?: string; // 可选:DataForensicsSkill 需要,Editorial/Methodology 不需要 documentContent: string; documentMeta?: DocumentMeta; tables?: TableData[]; diff --git a/backend/src/modules/rvw/skills/library/BaseSkill.ts b/backend/src/modules/rvw/skills/library/BaseSkill.ts index 8da0cbaa..980e4db0 100644 --- a/backend/src/modules/rvw/skills/library/BaseSkill.ts +++ b/backend/src/modules/rvw/skills/library/BaseSkill.ts @@ -20,6 +20,20 @@ import { } from '../core/types.js'; import { logger } from '../../../../common/logging/index.js'; +/** + * execute 方法的返回类型 + * 不需要包含 skillId, skillName, startedAt, completedAt, executionTime + * 这些字段由 BaseSkill.run() 自动填充 + */ +export type ExecuteResult = { + status: 'success' | 'warning' | 'error'; + score?: number; + scoreLabel?: string; + issues: SkillResult['issues']; + data?: unknown; + error?: string; +}; + /** * Skill 基类 * 使用泛型支持不同上下文和配置类型 @@ -39,11 +53,12 @@ export abstract class BaseSkill< /** * 子类实现具体逻辑 + * 返回值不需要包含 skillId, skillName, startedAt, completedAt, executionTime */ abstract execute( context: TContext, config?: TConfig - ): Promise>; + ): Promise; /** * 执行入口(统一处理日志、计时、配置验证等) @@ -52,10 +67,10 @@ export abstract class BaseSkill< const startedAt = new Date(); const startTime = Date.now(); - logger.info({ + logger.info(`[${this.metadata.id}] Starting execution`, { skillId: this.metadata.id, taskId: context.taskId, - }, `[${this.metadata.id}] Starting execution`); + }); try { // 配置验证(使用 Zod) @@ -64,13 +79,13 @@ export abstract class BaseSkill< const result = await this.execute(context, validatedConfig); const executionTime = Date.now() - startTime; - logger.info({ + logger.info(`[${this.metadata.id}] Execution completed`, { skillId: this.metadata.id, taskId: context.taskId, status: result.status, executionTime, issueCount: result.issues.length, - }, `[${this.metadata.id}] Execution completed`); + }); return { ...result, @@ -90,15 +105,15 @@ export abstract class BaseSkill< : SkillErrorCodes.SKILL_EXECUTION_ERROR; const errorMessage = isValidationError - ? `配置验证失败: ${(error as z.ZodError).errors.map(e => e.message).join(', ')}` + ? `配置验证失败: ${(error as z.ZodError).issues.map((e: z.ZodIssue) => e.message).join(', ')}` : `执行失败: ${error instanceof Error ? error.message : String(error)}`; - logger.error({ + logger.error(`[${this.metadata.id}] Execution failed`, { skillId: this.metadata.id, taskId: context.taskId, error: error instanceof Error ? error.message : String(error), errorType, - }, `[${this.metadata.id}] Execution failed`); + }); return { skillId: this.metadata.id, diff --git a/backend/src/modules/rvw/skills/library/DataForensicsSkill.ts b/backend/src/modules/rvw/skills/library/DataForensicsSkill.ts index b5f606fc..de282fcd 100644 --- a/backend/src/modules/rvw/skills/library/DataForensicsSkill.ts +++ b/backend/src/modules/rvw/skills/library/DataForensicsSkill.ts @@ -8,11 +8,10 @@ * @since 2026-02-18 */ -import { BaseSkill } from './BaseSkill.js'; +import { BaseSkill, ExecuteResult } from './BaseSkill.js'; import { SkillMetadata, SkillContext, - SkillResult, DataForensicsConfigSchema, DataForensicsConfig, ForensicsResult, @@ -23,19 +22,12 @@ import { IExtractionClient, ForensicsResult as ClientForensicsResult, } from '../../../../common/document/ExtractionClient.js'; +import { storage } from '../../../../common/storage/index.js'; import { logger } from '../../../../common/logging/index.js'; - -/** - * 安全:允许的文件存储路径前缀 - */ -const ALLOWED_PATH_PREFIXES = [ - '/app/uploads/', // Docker 容器内路径 - 'D:\\MyCursor\\', // 开发环境 Windows - 'D:/MyCursor/', // 开发环境 Windows (forward slash) - '/tmp/rvw-uploads/', // 临时目录 - 'C:\\Users\\', // Windows 用户目录 - '/home/', // Linux 用户目录 -]; +import * as fs from 'fs/promises'; +import * as path from 'path'; +import * as os from 'os'; +import { randomUUID } from 'crypto'; /** * 数据侦探 Skill @@ -76,39 +68,35 @@ export class DataForensicsSkill extends BaseSkill { - const normalizedPrefix = prefix.replace(/\\/g, '/'); - return normalizedPath.startsWith(normalizedPrefix); - }); - - if (!isPathAllowed) { - logger.error({ + // 安全检查:OSS key 格式验证(tenants/xxx/users/xxx/rvw/xxx/xxx.docx) + const isOssKey = context.documentPath.startsWith('tenants/') || + context.documentPath.startsWith('temp/'); + + if (!isOssKey) { + logger.warn('[DataForensicsSkill] Invalid storage key format', { taskId: context.taskId, - documentPath: '[REDACTED]', // 不记录完整路径 - }, '[DataForensicsSkill] Document path not in allowed prefixes (security check)'); + }); return false; } // 检查是否包含路径遍历 if (context.documentPath.includes('..')) { - logger.error({ + logger.error('[DataForensicsSkill] Path traversal detected (security check)', { taskId: context.taskId, - }, '[DataForensicsSkill] Path traversal detected (security check)'); + }); return false; } @@ -117,23 +105,46 @@ export class DataForensicsSkill extends BaseSkill> { + ): Promise { const checkLevel = config?.checkLevel || 'L1_L2_L25'; const tolerancePercent = config?.tolerancePercent || 0.1; + const storageKey = context.documentPath!; - logger.info({ + logger.info('[DataForensicsSkill] Starting analysis', { taskId: context.taskId, + storageKey, checkLevel, tolerancePercent, - }, '[DataForensicsSkill] Starting analysis'); + }); + + // 创建临时文件路径 + const tempDir = os.tmpdir(); + const tempFilename = `rvw-${randomUUID()}.docx`; + const tempFilePath = path.join(tempDir, tempFilename); try { - // 使用依赖注入的 client - const result = await this.extractionClient.analyzeDocx(context.documentPath, { + // 1. 从 OSS 下载文件到临时目录 + logger.info('[DataForensicsSkill] Downloading file from storage', { + taskId: context.taskId, + storageKey, + tempFilePath, + }); + + const fileBuffer = await storage.download(storageKey); + await fs.writeFile(tempFilePath, fileBuffer); + + logger.info('[DataForensicsSkill] File downloaded successfully', { + taskId: context.taskId, + fileSize: fileBuffer.length, + }); + + // 2. 调用 Python 服务分析临时文件 + const result = await this.extractionClient.analyzeDocx(tempFilePath, { checkLevel, tolerancePercent, }); @@ -159,13 +170,13 @@ export class DataForensicsSkill extends BaseSkill ({ - severity: issue.severity, - type: issue.type, - message: issue.message, - location: issue.location, - evidence: issue.evidence, - })); + // 防御性检查 + const rawTables = result.tables || []; + + // Python 返回的是 methodsFound(驼峰),也可能是 methods + const rawMethods = (result as any).methodsFound || result.methods || []; + + // 从 tables[].issues 中收集所有 issues + const allIssues: Issue[] = []; + for (const table of rawTables) { + const tableIssues = (table as any).issues || []; + for (const issue of tableIssues) { + allIssues.push({ + severity: issue.severity, + type: issue.type, + message: issue.message, + location: issue.location, + evidence: issue.evidence, + }); + } + } + + // 也检查顶层的 issues(兼容旧格式) + const topLevelIssues = result.issues || []; + for (const issue of topLevelIssues) { + allIssues.push({ + severity: issue.severity, + type: issue.type, + message: issue.message, + location: issue.location, + evidence: issue.evidence, + }); + } + + // 构建 summary(从 Python 返回的顶层字段或 summary 对象) + const pyResult = result as any; + const summary = result.summary || { + totalTables: pyResult.totalTables ?? rawTables.length, + totalIssues: pyResult.totalIssues ?? allIssues.length, + errorCount: pyResult.errorCount ?? allIssues.filter(i => i.severity === 'ERROR').length, + warningCount: pyResult.warningCount ?? allIssues.filter(i => i.severity === 'WARNING').length, + }; return { - tables: result.tables.map(t => ({ - id: t.id, - caption: t.caption, - data: t.data, - html: t.html, - headers: t.headers, - rowCount: t.rowCount, - colCount: t.colCount, - })), - methods: result.methods, - issues, + tables: rawTables.map(t => { + const tableIssues = ((t as any).issues || []).map((issue: any) => ({ + severity: issue.severity, + type: issue.type, + message: issue.message, + location: issue.location, + evidence: issue.evidence, + })); + return { + id: t.id || '', + caption: t.caption || '', + data: t.data || [], + html: t.html || '', + headers: t.headers || [], + rowCount: t.rowCount || 0, + colCount: t.colCount || 0, + issues: tableIssues, // 保留每个表格的 issues + }; + }), + methods: rawMethods, + issues: allIssues, summary: { - totalTables: result.summary.totalTables, - totalIssues: result.summary.totalIssues, - errorCount: result.summary.errorCount, - warningCount: result.summary.warningCount, + totalTables: summary.totalTables ?? rawTables.length, + totalIssues: summary.totalIssues ?? allIssues.length, + errorCount: summary.errorCount ?? 0, + warningCount: summary.warningCount ?? 0, }, }; } diff --git a/backend/src/modules/rvw/skills/library/EditorialSkill.ts b/backend/src/modules/rvw/skills/library/EditorialSkill.ts index 4085b785..66a07419 100644 --- a/backend/src/modules/rvw/skills/library/EditorialSkill.ts +++ b/backend/src/modules/rvw/skills/library/EditorialSkill.ts @@ -8,17 +8,16 @@ * @since 2026-02-18 */ -import { BaseSkill } from './BaseSkill.js'; +import { BaseSkill, ExecuteResult } from './BaseSkill.js'; import { SkillMetadata, SkillContext, - SkillResult, EditorialConfigSchema, EditorialConfig, Issue, } from '../core/types.js'; import { reviewEditorialStandards } from '../../services/editorialService.js'; -import { EditorialReview, EditorialItem } from '../../types/index.js'; +import { EditorialReview } from '../../types/index.js'; import { logger } from '../../../../common/logging/index.js'; /** @@ -45,7 +44,7 @@ export class EditorialSkill extends BaseSkill { inputs: ['documentContent'], outputs: ['editorialResult'], - defaultTimeout: 45000, // 45 秒 + defaultTimeout: 180000, // 180 秒(LLM 调用可能较慢) retryable: true, icon: '📋', @@ -57,18 +56,18 @@ export class EditorialSkill extends BaseSkill { */ canRun(context: SkillContext): boolean { if (!context.documentContent || context.documentContent.trim().length === 0) { - logger.warn({ taskId: context.taskId }, '[EditorialSkill] No document content'); + logger.warn('[EditorialSkill] No document content', { taskId: context.taskId }); return false; } // 资源限制检查 const maxLength = DEFAULT_MAX_CONTENT_LENGTH; if (context.documentContent.length > maxLength) { - logger.warn({ + logger.warn('[EditorialSkill] Content too long', { taskId: context.taskId, contentLength: context.documentContent.length, limit: maxLength, - }, '[EditorialSkill] Content too long'); + }); return false; } @@ -81,23 +80,23 @@ export class EditorialSkill extends BaseSkill { async execute( context: SkillContext, config?: EditorialConfig - ): Promise> { + ): Promise { const maxContentLength = config?.maxContentLength || DEFAULT_MAX_CONTENT_LENGTH; - logger.info({ + logger.info('[EditorialSkill] Starting evaluation', { taskId: context.taskId, contentLength: context.documentContent.length, - }, '[EditorialSkill] Starting evaluation'); + }); // 截断过长内容 let content = context.documentContent; if (content.length > maxContentLength) { content = content.substring(0, maxContentLength); - logger.warn({ + logger.warn('[EditorialSkill] Content truncated', { taskId: context.taskId, originalLength: context.documentContent.length, truncatedLength: maxContentLength, - }, '[EditorialSkill] Content truncated'); + }); } // 调用现有 editorialService @@ -119,13 +118,13 @@ export class EditorialSkill extends BaseSkill { status = 'success'; } - logger.info({ + logger.info('[EditorialSkill] Evaluation completed', { taskId: context.taskId, score: result.overall_score, itemCount: result.items.length, errorCount, warningCount, - }, '[EditorialSkill] Evaluation completed'); + }); return { status, diff --git a/backend/src/modules/rvw/skills/library/MethodologySkill.ts b/backend/src/modules/rvw/skills/library/MethodologySkill.ts index b05d97bb..75c9ac3a 100644 --- a/backend/src/modules/rvw/skills/library/MethodologySkill.ts +++ b/backend/src/modules/rvw/skills/library/MethodologySkill.ts @@ -8,17 +8,16 @@ * @since 2026-02-18 */ -import { BaseSkill } from './BaseSkill.js'; +import { BaseSkill, ExecuteResult } from './BaseSkill.js'; import { SkillMetadata, SkillContext, - SkillResult, MethodologyConfigSchema, MethodologyConfig, Issue, } from '../core/types.js'; import { reviewMethodology } from '../../services/methodologyService.js'; -import { MethodologyReview, MethodologyIssue } from '../../types/index.js'; +import { MethodologyReview } from '../../types/index.js'; import { logger } from '../../../../common/logging/index.js'; /** @@ -45,7 +44,7 @@ export class MethodologySkill extends BaseSkill inputs: ['documentContent', 'methods'], outputs: ['methodologyResult'], - defaultTimeout: 45000, // 45 秒 + defaultTimeout: 180000, // 180 秒(方法学分析需要更长时间) retryable: true, icon: '🔬', @@ -57,18 +56,18 @@ export class MethodologySkill extends BaseSkill */ canRun(context: SkillContext): boolean { if (!context.documentContent || context.documentContent.trim().length === 0) { - logger.warn({ taskId: context.taskId }, '[MethodologySkill] No document content'); + logger.warn('[MethodologySkill] No document content', { taskId: context.taskId }); return false; } // 资源限制检查 const maxLength = DEFAULT_MAX_CONTENT_LENGTH; if (context.documentContent.length > maxLength) { - logger.warn({ + logger.warn('[MethodologySkill] Content too long', { taskId: context.taskId, contentLength: context.documentContent.length, limit: maxLength, - }, '[MethodologySkill] Content too long'); + }); return false; } @@ -81,34 +80,34 @@ export class MethodologySkill extends BaseSkill async execute( context: SkillContext, config?: MethodologyConfig - ): Promise> { + ): Promise { const maxContentLength = config?.maxContentLength || DEFAULT_MAX_CONTENT_LENGTH; - logger.info({ + logger.info('[MethodologySkill] Starting evaluation', { taskId: context.taskId, contentLength: context.documentContent.length, detectedMethods: context.methods?.length || 0, - }, '[MethodologySkill] Starting evaluation'); + }); // 截断过长内容 let content = context.documentContent; if (content.length > maxContentLength) { content = content.substring(0, maxContentLength); - logger.warn({ + logger.warn('[MethodologySkill] Content truncated', { taskId: context.taskId, originalLength: context.documentContent.length, truncatedLength: maxContentLength, - }, '[MethodologySkill] Content truncated'); + }); } // 如果 DataForensicsSkill 提取了统计方法,可以添加到 prompt 中 // 目前 reviewMethodology 不支持此参数,留作未来扩展 const methodsHint = context.methods?.join(', ') || ''; if (methodsHint) { - logger.debug({ + logger.debug('[MethodologySkill] Using detected methods as hint', { taskId: context.taskId, methodsHint, - }, '[MethodologySkill] Using detected methods as hint'); + }); } // 调用现有 methodologyService @@ -130,13 +129,13 @@ export class MethodologySkill extends BaseSkill status = 'success'; } - logger.info({ + logger.info('[MethodologySkill] Evaluation completed', { taskId: context.taskId, score: result.overall_score, partCount: result.parts.length, errorCount, warningCount, - }, '[MethodologySkill] Evaluation completed'); + }); return { status, diff --git a/backend/src/modules/rvw/skills/test-skills.ts b/backend/src/modules/rvw/skills/test-skills.ts new file mode 100644 index 00000000..d71b75fe --- /dev/null +++ b/backend/src/modules/rvw/skills/test-skills.ts @@ -0,0 +1,103 @@ +/** + * RVW Skills 架构 - 快速验证脚本 + * + * 运行方式: npx tsx src/modules/rvw/skills/test-skills.ts + */ + +import { SkillRegistry } from './core/registry.js'; +import { ProfileResolver, DEFAULT_PROFILE } from './core/profile.js'; +import { ContextBuilder } from './core/context.js'; +import { SkillExecutor } from './core/executor.js'; +import { registerBuiltinSkills } from './library/index.js'; + +// 注册内置 Skills +registerBuiltinSkills(); + +async function main() { + console.log('='.repeat(60)); + console.log('RVW Skills V2.0 架构验证'); + console.log('='.repeat(60)); + + // 1. 测试 SkillRegistry + console.log('\n📋 1. SkillRegistry 验证'); + console.log('-'.repeat(40)); + + const summary = SkillRegistry.getSummary(); + console.log(` 已初始化: ${summary.initialized}`); + console.log(` 注册 Skills 数量: ${summary.skillCount}`); + console.log(` 分类统计:`, summary.categories); + + const allSkills = SkillRegistry.getAllMetadata(); + console.log('\n 已注册的 Skills:'); + for (const skill of allSkills) { + console.log(` - ${skill.id} (${skill.name}) v${skill.version}`); + } + + // 2. 测试 ProfileResolver + console.log('\n📋 2. ProfileResolver 验证'); + console.log('-'.repeat(40)); + + const defaultProfile = ProfileResolver.resolve('default'); + console.log(` 默认 Profile: ${defaultProfile.name}`); + console.log(` Pipeline 长度: ${defaultProfile.pipeline.length}`); + console.log(` Pipeline Skills:`); + for (const item of defaultProfile.pipeline) { + console.log(` - ${item.skillId} (enabled: ${item.enabled}, optional: ${item.optional})`); + } + + // 测试动态 Profile + const dynamicProfile = ProfileResolver.resolveFromAgents(['editorial', 'methodology']); + console.log(`\n 动态 Profile (editorial + methodology):`); + const enabledSkills = dynamicProfile.pipeline.filter(p => p.enabled); + console.log(` 启用的 Skills: ${enabledSkills.map(p => p.skillId).join(', ')}`); + + // 3. 测试 ContextBuilder + console.log('\n📋 3. ContextBuilder 验证'); + console.log('-'.repeat(40)); + + const context = new ContextBuilder() + .taskId('test-task-123') + .userId('test-user-456') + .documentPath('D:/MyCursor/test/document.docx') // 使用允许的路径前缀 + .documentContent('这是一篇测试论文的内容...') + .profile(defaultProfile) + .build(); + + console.log(` taskId: ${context.taskId}`); + console.log(` userId: ${context.userId}`); + console.log(` documentPath: ${context.documentPath}`); + console.log(` documentContent 长度: ${context.documentContent.length}`); + + // 4. 测试 canRun 检查 + console.log('\n📋 4. Skill canRun 检查'); + console.log('-'.repeat(40)); + + for (const skill of SkillRegistry.getAll()) { + const canRun = skill.canRun ? skill.canRun(context) : true; + console.log(` ${skill.metadata.id}: canRun = ${canRun}`); + } + + // 5. 验证总结 + console.log('\n' + '='.repeat(60)); + console.log('✅ Skills 架构核心组件验证完成!'); + console.log('='.repeat(60)); + + // 检查是否有问题 + if (summary.skillCount < 3) { + console.log('\n⚠️ 警告: 注册的 Skills 数量少于预期 (预期 3 个)'); + } + + if (!SkillRegistry.has('DataForensicsSkill')) { + console.log('⚠️ 警告: DataForensicsSkill 未注册'); + } + if (!SkillRegistry.has('EditorialSkill')) { + console.log('⚠️ 警告: EditorialSkill 未注册'); + } + if (!SkillRegistry.has('MethodologySkill')) { + console.log('⚠️ 警告: MethodologySkill 未注册'); + } + + console.log('\n下一步: 启动后端服务,通过 API 测试完整流程'); +} + +main().catch(console.error); diff --git a/backend/src/modules/rvw/types/index.ts b/backend/src/modules/rvw/types/index.ts index a75168c6..f244bec4 100644 --- a/backend/src/modules/rvw/types/index.ts +++ b/backend/src/modules/rvw/types/index.ts @@ -65,6 +65,45 @@ export interface MethodologyReview { parts: MethodologyPart[]; } +// ==================== 数据验证(DataForensics) ==================== + +export interface ForensicsIssue { + severity: 'ERROR' | 'WARNING' | 'INFO'; + type: string; + message: string; + location?: { + tableId?: string; + cellRef?: string; + paragraph?: number; + }; + evidence?: Record; +} + +export interface ForensicsTable { + id: string; + caption: string; + html: string; + data: string[][]; + headers: string[]; + rowCount: number; + colCount: number; + skipped?: boolean; + skipReason?: string; + issues: ForensicsIssue[]; +} + +export interface ForensicsResult { + tables: ForensicsTable[]; + methods: string[]; + issues: ForensicsIssue[]; + summary: { + totalTables: number; + totalIssues: number; + errorCount: number; + warningCount: number; + }; +} + // ==================== 请求参数 ==================== /** @@ -142,6 +181,7 @@ export interface ReviewReport { overallScore?: number; editorialReview?: EditorialReview; methodologyReview?: MethodologyReview; + forensicsResult?: ForensicsResult; completedAt?: Date; durationSeconds?: number; } diff --git a/backend/src/modules/rvw/workers/reviewWorker.ts b/backend/src/modules/rvw/workers/reviewWorker.ts index e79d7c0f..8caaf0c0 100644 --- a/backend/src/modules/rvw/workers/reviewWorker.ts +++ b/backend/src/modules/rvw/workers/reviewWorker.ts @@ -65,16 +65,50 @@ function ensureSkillsInitialized() { } } +/** + * 清理卡住的任务(启动时调用) + * 当服务重启时,之前正在执行的任务会卡在 'reviewing' 状态 + */ +async function cleanupStuckTasks(): Promise { + try { + const stuckTasks = await prisma.reviewTask.updateMany({ + where: { + status: { + in: ['reviewing', 'reviewing_editorial', 'reviewing_methodology'], + }, + }, + data: { + status: 'failed', + errorMessage: '服务重启导致任务中断,请重新提交', + }, + }); + + if (stuckTasks.count > 0) { + logger.warn('[reviewWorker] Cleaned up stuck tasks on startup', { + count: stuckTasks.count, + }); + console.log(`⚠️ 启动时清理了 ${stuckTasks.count} 个卡住的任务`); + } + } catch (error) { + logger.error('[reviewWorker] Failed to cleanup stuck tasks', { + error: error instanceof Error ? error.message : String(error), + }); + } +} + /** * 注册审查 Worker 到队列 * * 此函数应在应用启动时调用(index.ts) */ -export function registerReviewWorker() { +export async function registerReviewWorker() { logger.info('[reviewWorker] Registering reviewWorker', { useSkillsArchitecture: USE_SKILLS_ARCHITECTURE, }); + // 清理卡住的任务 + await cleanupStuckTasks(); + // 初始化 Skills ensureSkillsInitialized(); @@ -113,6 +147,15 @@ export function registerReviewWorker() { }, }); + // 调试日志:检查 filePath + logger.info('[reviewWorker] Task info from DB', { + taskId, + filePath: existingTask?.filePath || '(empty)', + fileName: existingTask?.fileName, + fileSize: existingTask?.fileSize, + }); + console.log(` 📁 filePath: ${existingTask?.filePath || '(空)'}`); + if (existingTask?.status === 'completed' && existingTask.completedAt) { logger.warn('[reviewWorker] ⚠️ Task already completed, skipping', { jobId: job.id, @@ -223,8 +266,7 @@ export function registerReviewWorker() { // ======================================== logger.info('[reviewWorker] Updating task result', { taskId }); - // 构建 Skills 执行摘要(V2.0 新增,存储到 picoExtract 字段) - // 注意:picoExtract 字段暂时复用,未来迁移后移到专用字段 + // 构建 Skills 执行摘要(V2.0 新增,存储到专用 contextData 字段) const skillsContext = USE_SKILLS_ARCHITECTURE && skillsSummary ? { version: '2.0', @@ -246,7 +288,7 @@ export function registerReviewWorker() { status: 'completed', editorialReview: editorialResult as unknown as Prisma.InputJsonValue ?? Prisma.JsonNull, methodologyReview: methodologyResult as unknown as Prisma.InputJsonValue ?? Prisma.JsonNull, - picoExtract: skillsContext as unknown as Prisma.InputJsonValue ?? Prisma.JsonNull, + contextData: skillsContext as unknown as Prisma.InputJsonValue ?? Prisma.JsonNull, overallScore, editorialScore: editorialScore, methodologyScore: methodologyScore, diff --git a/docs/00-系统总体设计/00-系统当前状态与开发指南.md b/docs/00-系统总体设计/00-系统当前状态与开发指南.md index 9f79ca73..3476a37f 100644 --- a/docs/00-系统总体设计/00-系统当前状态与开发指南.md +++ b/docs/00-系统总体设计/00-系统当前状态与开发指南.md @@ -1,10 +1,11 @@ # AIclinicalresearch 系统当前状态与开发指南 -> **文档版本:** v5.1 +> **文档版本:** v5.2 > **创建日期:** 2025-11-28 > **维护者:** 开发团队 > **最后更新:** 2026-02-18 > **🎉 重大里程碑:** +> - **2026-02-18:RVW V2.0 Week 3 完成!** 统计验证扩展 + 负号归一化 + 文件格式提示 + 用户体验优化 > - **2026-02-18:RVW V2.0 Skills 架构完成!** Skills 核心框架 + 3个 Skill 实现 + ReviewWorker 改造 > - **2026-02-17:RVW V2.0 "数据侦探" Day 6 完成!** L2统计验证器 + L2.5一致性取证(SE三角验证、SD>Mean) > - **2026-02-08:IIT 事件级质控 V3.1 开发完成!** record+event 独立质控 + 规则动态过滤 + 报告去重 + AI对话增强 @@ -18,13 +19,14 @@ > - **2026-01-24:Protocol Agent 框架完成!** 可复用Agent框架+5阶段对话流程 > - **2026-01-22:OSS 存储集成完成!** 阿里云 OSS 正式接入平台基础层 > -> **最新进展(RVW V2.0 Skills 架构 2026-02-18):** +> **最新进展(RVW V2.0 Week 3 完成 2026-02-18):** +> - ✅ **负号归一化**:6 种 Unicode 负号变体支持,防止 float() 崩溃 +> - ✅ **T 检验验证增强**:智能样本量提取 + subrow 精确高亮 +> - ✅ **SE 三角/CI-P 验证增强**:多行单元格 subrow 支持 +> - ✅ **前端翻译映射更新**:6 种新 IssueType 中文翻译 +> - ✅ **文件格式提示**:PDF/.doc 上传时提示无法数据验证 > - ✅ **Skills 核心框架**:types、registry、executor、profile、context -> - ✅ **Zod 配置验证**:运行时类型安全 -> - ✅ **DataForensicsSkill**:依赖注入 + 路径安全 + 优雅降级 -> - ✅ **EditorialSkill + MethodologySkill**:封装现有服务 -> - ✅ **ReviewWorker 改造**:集成 SkillExecutor,支持 V1/V2 架构切换 -> - ✅ **12 个新文件**:约 1735 行代码 +> - ✅ **3 个 Skill 实现**:DataForensics、Editorial、Methodology > > **部署状态:** ✅ 生产环境运行中 | 公网地址:http://8.140.53.236/ > **REDCap 状态:** ✅ 生产环境运行中 | 地址:https://redcap.xunzhengyixue.com/ @@ -67,7 +69,7 @@ | **IIT** | IIT Manager Agent | AI驱动IIT研究助手 - 双脑架构+REDCap集成 | ⭐⭐⭐⭐⭐ | 🎉 **事件级质控V3.1完成(设计100%,代码60%)** | **P0** | | **SSA** | 智能统计分析 | 队列/预测模型/RCT分析 | ⭐⭐⭐⭐⭐ | 📋 规划中 | P2 | | **ST** | 统计分析工具 | 100+轻量化统计工具 | ⭐⭐⭐⭐ | 📋 规划中 | P2 | -| **RVW** | 稿件审查系统 | 方法学评估 + 🆕数据侦探(L1/L2/L2.5验证)+ Skills架构 + Word导出 | ⭐⭐⭐⭐ | 🚀 **V2.0开发中(Week2 Day10完成)** - Skills核心框架+Skill实现+Worker改造 | P1 | +| **RVW** | 稿件审查系统 | 方法学评估 + 🆕数据侦探(L1/L2/L2.5验证)+ Skills架构 + Word导出 | ⭐⭐⭐⭐ | 🚀 **V2.0 Week3完成(85%)** - 统计验证扩展+负号归一化+文件格式提示+用户体验优化 | P1 | | **ADMIN** | 运营管理端 | Prompt管理、租户管理、用户管理、运营监控、系统知识库 | ⭐⭐⭐⭐⭐ | 🎉 **Phase 4.6完成(88%)** - Prompt知识库集成+动态注入 | **P0** | --- diff --git a/docs/03-业务模块/RVW-稿件审查系统/00-模块当前状态与开发指南.md b/docs/03-业务模块/RVW-稿件审查系统/00-模块当前状态与开发指南.md index 85f99920..6f1967bc 100644 --- a/docs/03-业务模块/RVW-稿件审查系统/00-模块当前状态与开发指南.md +++ b/docs/03-业务模块/RVW-稿件审查系统/00-模块当前状态与开发指南.md @@ -1,21 +1,30 @@ # RVW稿件审查模块 - 当前状态与开发指南 -> **文档版本:** v5.0 +> **文档版本:** v5.1 > **创建日期:** 2026-01-07 > **最后更新:** 2026-02-18 > **维护者:** 开发团队 -> **当前状态:** 🚀 **V2.0 "数据侦探" 开发中(Week 2 Day 10 完成)** +> **当前状态:** 🚀 **V2.0 "数据侦探" Week 3 完成(统计验证扩展+用户体验优化)** > **文档目的:** 快速了解RVW模块状态,为新AI助手提供上下文 > -> **🎉 V2.0 进展(2026-02-18):** +> **🎉 V2.0 进展(2026-02-18 Week 3):** +> - ✅ **负号归一化**:防止 float() 崩溃,覆盖 6 种负号变体 +> - ✅ **T 检验验证增强**:智能样本量提取 + subrow 精确高亮 +> - ✅ **SE 三角验证增强**:多行单元格 subrow 支持 +> - ✅ **CI vs P 值验证增强**:subrow 支持 + 灵活 P 值解析 +> - ✅ **前端翻译映射**:新增 6 种 IssueType 中文翻译 +> - ✅ **文件格式提示**:PDF/.doc 上传时提示无法数据验证 +> +> **🎉 V2.0 进展(Week 1-2):** > - ✅ **L1 算术验证器**:行列加总、百分比验证(Day 3) -> - ✅ **L2 统计验证器**:CI↔P 值一致性、T检验逆向验证(Day 6) +> - ✅ **L2 统计验证器**:CI↔P 值一致性、卡方检验逆向验证(Day 6) > - ✅ **L2.5 一致性取证**:SE三角验证、SD>Mean检查(Day 6 终审提权) -> - ✅ **Word 文档解析**:python-docx 表格提取(Day 2) +> - ✅ **Word 文档解析**:python-docx 表格提取 + 特殊符号提取(Day 2) > - ✅ **Skills 核心框架**:types、registry、executor、profile、context(Day 7) -> - ✅ **DataForensicsSkill**:依赖注入、路径安全、优雅降级(Day 8) +> - ✅ **DataForensicsSkill**:OSS 集成、依赖注入、优雅降级(Day 8) > - ✅ **EditorialSkill + MethodologySkill**:封装现有服务(Day 9) > - ✅ **ReviewWorker 改造**:集成 SkillExecutor,支持 V1/V2 切换(Day 10) +> - ✅ **前端数据验证 Tab**:ForensicsReport 组件、精确单元格高亮(Week 3) --- @@ -377,37 +386,50 @@ Content-Type: multipart/form-data | 阶段 | 任务 | 状态 | 完成日期 | |------|------|------|---------| | Week 1 Day 1 | Python 服务搭建 | ✅ 已完成 | 2026-02-12 | -| Week 1 Day 2 | Word 表格提取 | ✅ 已完成 | 2026-02-13 | +| Week 1 Day 2 | Word 表格提取 + 特殊符号 | ✅ 已完成 | 2026-02-13 | | Week 1 Day 3 | L1 算术验证器 | ✅ 已完成 | 2026-02-14 | | Week 1 Day 4 | 数据结构设计 | ✅ 已完成 | 2026-02-15 | | Week 1 Day 5 | API 集成 | ✅ 已完成 | 2026-02-16 | -| **Week 2 Day 6** | **L2 统计验证器 + L2.5 一致性取证** | **✅ 已完成** | **2026-02-17** | -| Week 2 Day 7 | Skills 核心框架 | 📋 待开发 | - | -| Week 2 Day 8 | DataForensicsSkill | 📋 待开发 | - | -| Week 2 Day 9 | EditorialSkill 封装 | 📋 待开发 | - | -| Week 2 Day 10 | ReviewService 改造 | 📋 待开发 | - | +| Week 2 Day 6 | L2 统计验证器 + L2.5 一致性取证 | ✅ 已完成 | 2026-02-17 | +| Week 2 Day 7 | Skills 核心框架 | ✅ 已完成 | 2026-02-18 | +| Week 2 Day 8 | DataForensicsSkill | ✅ 已完成 | 2026-02-18 | +| Week 2 Day 9 | EditorialSkill 封装 | ✅ 已完成 | 2026-02-18 | +| Week 2 Day 10 | ReviewWorker 改造 | ✅ 已完成 | 2026-02-18 | +| **Week 3** | **统计验证扩展 + 用户体验优化** | **✅ 已完成** | **2026-02-18** | +| Week 4 | 功能测试 + Bug 修复 | 📋 待开始 | - | -**V2.0 核心功能**: +**V2.0 核心功能(已完成)**: - **L1 算术验证**:行列加总、百分比验证 -- **L2 统计验证**:CI↔P 一致性、T检验逆向、卡方检验 +- **L2 统计验证**:CI↔P 一致性、T检验逆向、卡方检验(含 subrow 精确高亮) - **L2.5 一致性取证**(终审提权):SE三角验证、SD>Mean检查 - **Skills 架构**:Skill Registry、Skill Executor、Journal Profiles +- **负号归一化**:6 种 Unicode 负号变体支持 +- **文件格式提示**:PDF/.doc 无法数据验证的用户提示 + +**Week 3 完成内容(2026-02-18)**: +- ✅ 负号归一化(防止 float() 崩溃) +- ✅ T 检验验证增强(智能样本量提取) +- ✅ SE 三角验证增强(subrow 支持) +- ✅ CI vs P 值验证增强(subrow 支持) +- ✅ 前端翻译映射更新(6 种新 IssueType) +- ✅ 文件格式提示(Header、ReportDetail、TaskDetail) ### 后续版本(V2.1+) +- [ ] Week 4 功能测试和 Bug 修复 +- [ ] ANOVA 验证(多组比较) +- [ ] 配对 T 检验验证 +- [ ] 非参数检验(Mann-Whitney、Wilcoxon) +- [ ] .doc 格式支持(Pandoc 方案评估) +- [ ] Profile 管理 UI(期刊配置界面) - [ ] PDF报告导出优化 - [ ] PICO卡片UI实现 - [ ] 历史归档UI实现 -- [ ] L3 高级逻辑推理验证 -- [ ] 登录页面(独立产品时) -- [ ] 审稿人管理系统 -- [ ] 多轮审稿流程 -- [ ] 期刊库管理 - [ ] 独立产品打包 --- -**文档版本:** v3.2 -**最后更新:** 2026-01-10 -**当前状态:** ✅ Phase 1-6 完成,模块95%可用,Schema已隔离 -**下一步:** 生产环境部署测试 +**文档版本:** v5.1 +**最后更新:** 2026-02-18 +**当前状态:** 🚀 V2.0 "数据侦探" Week 3 完成,Skills 架构 + 统计验证 + 用户体验优化 +**下一步:** Week 4 功能测试和 Bug 修复 diff --git a/docs/03-业务模块/RVW-稿件审查系统/05-测试文档/Test_刘锦_2019—2022年昆明市二、三级医院卒中中心急性缺血性卒中静脉溶栓指标分析_定稿0314 - 副本.docx b/docs/03-业务模块/RVW-稿件审查系统/05-测试文档/Test_刘锦_2019—2022年昆明市二、三级医院卒中中心急性缺血性卒中静脉溶栓指标分析_定稿0314 - 副本.docx new file mode 100644 index 0000000000000000000000000000000000000000..2d7c7fa2907a63f328612d4631bb38141889eadd GIT binary patch literal 58731 zcmZts19T-%)IJKwoM>V@nb@``I^o3T#5u7!v29E;v2EM7?Gsx!zx%)6y6=0}SF5_K zYOh^YyX#q9-TT?4C<_UN0QO%B0qhF@SN{JJ{8z=q&REgG&fbwp@k=m;irmZ$abrR`MBu;c5 z`M@-ynIhM>Da`V$n&SY`9C5^p6btNt4$A&9WY`^ZrA4I`e6CQ`zkH~5TXbQMlATD! zkcNqq@IFxU42@|5_=VI%YmZmjbBn_)IiE`eHs8ko_VhfEk@ynr-3d=Zl@EXmOyS1P zLGkTrO<^z9Lph}X1^q#1?Pt4gr#~hb{^L(w&_#eiQpDoH3$*Wb$;O^Bh4{-Nau%?&}V%|`%@ag3Q|4hR2ybdO`6&>OQ}akQd@kG z2}dnEYP+X$e=a@ZFt2+;F)KGJ_wXJkMsRs=np}P>oE*)wtox-`6RGC>YZkhoTn{{r zSLEC^@MfaKejj5PV^Aei`!MH!&sl#+4O&n+?Z=e-Xx-P_Wg2U%X;K zaYZmKRpk?#V7*&YZYlp;9a%k9Jl1(XF+>;dBm^-D)14JHm~!>)faHIKm{ekXmq-K4 zVT6h65*1=iyM?usk2V%f%l6kJO@8R2X3bDjl#c|i-P4ic>?8#^ab|WAL?qC`5Hi!7 zJKW*dO-q3c8#%BZ2fdoKTd6KZq##P|@C5GG7(mouYh}L2?9lJqp(pPSj}vvIBoIgj z%%}w8otu{49)L91#yI)8eYCIF*XJxN4M`8+0E!zvH%TX z`Ftl^R0l`|0rb}D-4+GmzEy3+gg5i=sfo3a4)*c$+Kg1X*DX6iTZW3gvnA9OrrNeM zDDYJ9dxUw@`*O{Uy3i-$E5!c>K}rPWJn;(z(AUd>{=YzQc673{`45JMcwM^;W{luV z>I)%~E%eB+weKRO*Ksr(%7dbz+h(GJ?1cv8=l@LSK3)l&SLP!&tXB_aJnx=(hKAa_ zL414SQX^20{sMvBxtXEq*RaIrwMp)f-%4op)U&qdVbPPeU7jDJ9xX5+2{H3$r{NgA z?G-IGG?FP`SJfQ~jVlu*BfSfpcmy@_EO7qDkql)!n3BR((K{r{+#Oya!s@w2B)n*w zFbuB%4p-QJZxlSD3*yRGX*DL zhCG)YL7rENRuJp&7!dU=2QB-<4GyrWhKKn6AXLP?t85%i|SeS8_Y zUaKILYu|5kh}~=xXO+^8eWc*E`kJ z*HFN~-W|ZezKZ{&;^^dVZR+?R#uvK!cI$&^zD(1X!s{xpB(g=sH;5(z1J!mD}1DK!WC+D&MuB_yIV|vyHY#VOI-~MjvB`j=tAu{582tocIzM>>C<3z zu250VbKRPMj}IG{zLymXBJq0A^uSzyW}Z~fzD;_0Y}ky9FfL<9t`R{lljNg zw>as38~dPr!+nM$dO0*8n;G~WDMCV8*U#F;!}&#n(7N(C**Ox%Baw&oQlw?taOw!| z8;y8B`9uHXxAptJ_OuT`!CxI*W|k@s#Q5d zx!Z1xF&CB@46Ncy$4|*Iim1a7+tb5~3biurG6{zO`OF3X47X09^Q>1#`@ z#viWE%x&0I_g#jz@td+<$M``@^QeVY$Na*8-486S5zM+?;{8*X@$Z}6V3gS|!Y8QnU`DA_eFpWQifI5}H4 zn{y#WHN*qVufZLBuKN=S!=8gw<}_fpYAx{rCJuM&#)yInU=QZFRE>4?;6_r(J~OPF zKQY|e(OMmUf3jS9PY3>WmFNLkDVmKR$Q|CTfzb#IaY{(lQ8l)4u^eyq10Jbd+PG?# zI>O#T^6Q|@{;X9hRbs3pd3N z*nGw^{haljKWoR_u$AFVt$3Zl*!PJ=dRg|hTKXC5kuL$^mq6`5LG_nF#91x;=Pv)B z5!^zxTX#GO3!H*+>6KfL5#??b@;@}^&7|xRBL`7j6MbkARw2^o^h*JS61U|k4f(>jCv~+Y3dZu~(rrGSOW+}cIh!(_} zb#1-W@HwH}r;&lW4)-O}|9LNeCV+RkHBq=@Idq(72u0$4>> zYg_;nw}|$_#Q4iM8vBpJ&_p?&S(ru|r>L64H1n$t_^+N=PHY}Vn30_0J@<0Et`O*f zyg1RUgnCVgLwCe~%Or{&AR(Y^!7tG*VyyB3l)%KBwPcI>8b>LcEWp4i3dl3DOvx`JpRAtvY8$)y}0nQbA;l(t4A!OCd?7#`@@wb0F{Cs4h^`k9iJH+PUSKmsvL{>L`rW=);2&r`jM&| zMUJhvvs|8{bc2y>^HfP{3U$Kf+F3nbk9tq=+W^{%bBvO9v~zDzaEq##(8|JrR#QC+ zKikz>QnQqL?Amj@ZfqTus1Q@#Fv3R08O$orW#+g1JR=3%jM8@R+9)S}*#ePyZ6c$2 zt?r9;%JayzqzHyW3iyb4zFaE%?$@>dLu0b<&#F}-r3;>E+g;jks{RJq>7X!Qj92vd zJjK{NN{oNSaL_a_A|ka8>06Yl)O=)|UCPsNoL!p52)j46AI{Z`EojJo0T_IFBm>bu ze%6p9D4&P^zsT0~X_e(b9l-7zKe7jn_ z$GM8%LlUax--jMb-np`V#J==AaIDY&`}Ce;oFgR|cRrr8+G2OTg$7W2EOSk46Xf-w z$|Z2ybwNs}&UZ=XREHh(C}FYwO0`zH`mK(Uz=I%q5pKjPlcw2bw4!LOUcRVf1yegg zCnqLuJGI67o!rnWnk%M^2iHRWLDyfa&PP3W@oo#PSeM`!!HDc{ro5@}akyz-9s%M; z8-5)qWBCVcFU|&1v3F@!Np!kn>bPnYrYc5)e`!rXxKV91qncukoVZr+D#()jOhbe) zk;7fZzkDPE!9;#jmBi|X3SuRW3$wo;foCddI3wJ2H<&<{(vCJZjL3l`sXa{iVP9Tn zx?!DpnvDN%Ajn@M8UGm>-Cm;cHJXuT0vG1+|7XOn zKEoLZqNH}T$zk{ofUh11CV3$QKPm0VVg=l-kfqm|-Bpz2U`pe;Fb7Ns3BP9ju`fUH zKkfP7&-zDnQd}&Jh3vH;v}9oiv+4|hBt+Isozw;79f0tm-overT^CC z|EyFY#Q!u8s+Qc27B1+^a4Oqjo?rj(lA*!LB$q4#z zDMm!9Yb6W}T=j%#_(YCf+okTL$zmf6mQZ%>_dvg4m2f+z3$#!Ih=qgLHwQ;^#OZuY zIHLn*(J$RsqJU8MkGVX@Z5V;2b~?OzWw*gc#33Cm#FE#LdYe%(U}x+xg&#LR z7k&2P_2O?JlWKcjKl-r+dknE}ST(g{N?kZ6f5$Yx(F@ucr@vqW? zSSz=vos?4^X<3DVD4e(2H^NOT!?mL^^^0tSSTTP!@8EoZ5F9e!+hwpSha#GLil0M% z=U((66BRlXT{+|EAYFxHbKpL>JjQ)K4J1%j!NWBOnZU>VPP`NaP{sSdyxU`0d_NMH{~3AZ@a)K}D6g z=3#UN&pkf43}o#uWxAPZ%0*S`{!!qyCpqCpMJM=RGZMKP9yrOCL_8)|T@=qjb>eUb zA*A`qw`}#=y=os*6>LF<3G>KwctsXvHX!-!&QFHQZc2AZE~T>PBnO5QC0)WruyWrg zpnLJ>gsnNaXz_^_9^fHJUSRi+Igd6V8qcFv6-H7u=1($?ybSCIOjSwrA)%M(h2>!s z^E%GUs86(>jRnc6Z}j}@q5XG-&X2f~O?-(l=YuA)FrD>L>81OZKd^H!4V<_|zz?rQ z6j#$z`j~N&{M%-&W=S_zBaPE~icSR#IS}QW%hUsvhc&pwZohl&i$ zPr$_GFLwR+)K0xmE{V}&=7Y>KEiCMIEir^Ee0f6MVe9a3v@v}W^BaSnszn=B#Q?)~ z@pp%|f5JS#g~%VC_r*8%i#zQ&jJ5je*h^-ENG9clb!3QxpvZwPj<6W25SvBLSWK^A z6txNCwXE>Msb@B1YSu~n*n^6r%!+vFCnE&Yh4O7djPl^#!*=O7Op?bNJV2f-(Wi?R zRcLz%lV4a(S*)Gt?vEAK!*3KT{5d4j<|{DL%OMRDe+2x1&Zxd2RLHjAF2f5mGF|#WtcFvW5vANs>=!?t0Zi$jNSkeIlG*dYbkMI9g@PNlbfWm9j(zpJ^>I$^ zHqSsbklwZyM#{aSymv?VB&(-ZtB|^??~&h8{E!c7Xc+~C(US2A|gx z{VlH>|J#>{t@h_z;-C;K$x5>iR(Fo6Ao{Ts+h5-rWYwBE)#b!0sKS}&?eu)C`BZVW zKM}<$^xf{t+;Kn$S%+(saqcw<~|v@U!mv=AzYnpX_TGv0czPSx_f#e|}xs@_KQv@N7ibrc~G2YN#70 ziFjvyUc$wQ9-2Hs&8OUyO6G<}NX8PK6AV+s!Kqu| zq2Rp~wfN~&X4LfU80^WEM6diBv4(UoyRR^TS+z!)yLR0lOuNKt;hh{^~+UrqGL(dNd z?PG++im-YkCWvn`FEJJ0qj;-)B{A; zg_XO28o|Uz5*zq8#PSh@j7v>D^xiGb-QT(Ve*$yxf~_mcPkE1C5`;QF`o_43-`^&N zn5u-{j_*UdV&Nrtz7J~x`@+VxW}BNDJnQ+sBLK|!!*g@o!5x8oc*Km0k}AV!AszW2 zJJ&mgd;%3gl49)segl26t@4kjXG~qsH*6*NPZ^wwOZv=fFS{*0?)w3X9#gzwzm~dk z{XRpQgl#&oe>YhtouPcVyr)c6`MwKMdt7RYVR%+73Bh(64wZjiwtA7{HKJjPbqVRh@<;uY#Oj;k$sA$m81xFB! zB^BEVtSmeBx_ZLy?%aa{aZ?pmGRi|!x4)bc^TRXKD@tI&79k`uC}}d3W9*ODi8TFW zcE7}-mf2X-nJA>%f~TK_Y0LxX|PLXPp`ko=KdZOGJZZN9(kKbhH{a)_NZL8@$T=3(b;(#jl77)1uicnapw? z)A_$zB}^@J_smTP8;8MEj6rES>m_K|DPudHWe{C+86uAAWeu}x;Xb89-^=GqC5rT< zU>Q>04;j~NP3iDOHj`(Pi>XOYrMu3hlQH8F+aF6*Zav(!J@T5~^)i7|OXulZ6N-L6 z8PrbA>NblRwO#r)N9~-QosFCe1eMC6l0xs&N#u8xsAte@Til#HYLqKhj>Z(Heu)U) zSREhUE}XJ9c{mM}V9snPTKfyJju2%^dFEcI-HSJ{ER`>mPRLk{)?DN3b^Xr&#n-M; ztcJg)XFY$u8#Fg5k%IKsBQ;_hTO9!dq>4lnA-8fBpwbiigI5l%DV&Z6(WP!ZwP|XK zqm<5&u@+~(P_<5dCMjlNeKrA5v4eGSvmUP7udq`VkOw2tdY@F=!TGDAWlEh6smO7itCL_hvB?#TYMSML(tqrt;%DWqAS8Pu4l!n zwNr>X7@=o*-12h%fplv z8uE0?t3+MJyxL2KVgzUZL7r@|Lp&=}Cz%44Bf(3Yq*>t4kpZW3i(XWY$f%vDhGmqknXK!>4S$)7rx_Ln>31H`#0wT+@FtYig#{lQQ3 z#G<1qxVl~R-SeAsN9t^W$MthB_mh*lqOrOu?62lqecM%kjd7>j`)=c?+uM5o`3d}3 zDwj{0Mrrw(8C69XG$n(%iJ(n!p}#^Y)$hE-xcRp(X=WaX`!|V}IBzeT2ln8`zNE>v z0^_bH`MTBNhm^QaMs>$jU)#Q|CtW%j5ZB#An0xXv)fcjF4rQ`^M`|#h6?AhTHXG;ALg~=Z9G1{fkwFLZ_L{ zdH!H33*9|{C#cR#mKaUWoPjW^DQ^bLRax0d>r;OJkelbH?>&? zQ%jVh*WW*`<=%!ZESH?4fFjl~Q;hV3TC|hG&e62&489o}<1%1ako?lw#LojaQkH@7 zyZQwhc-qW)|GI~~cYdsDK;8t3Z`bpodWF@aR*p~o^~%QLh3m^?|FCoI3ExcasN<-f zQ)_S%MFAycO|#0xEsk8A%R|DhD(<30k$go;r@Q^7+7>*lt>*0;&k*vUkP3a_+k2v# z%NfpnmWKmM=X?GA>HTj7!KV+K%Fl9`is!exZkpC1LW96JA))pVp|nL;eW7O0y8Bc7 zv&+uumz#nIY%VSzU&p;b*|Qw)>W@L$3x!L8<}pG3r|YD9x~&EuFVdP{dJAA%FzI-y zXftTXcJ7+h`vHXo67o|;rfKT4{8JGYnxh7!-yK>tf4pfM$s2I9jddm1IfPrb+%^_d zb?WopW@dP(Q`x>!8vpMp2m#@NsqPA_AFn*k29e+O^39~h48jj4=f zwr(xYoQ9dlk~+)%p;`wOAc3-33Y-c1p7d|m46lalPnug|Zv+OWjNv(u&dAyr926cE zi5E|r-!U;$nO7ELDm!$xxV2^}mEfpjvh437*Y?e7593vKCe3p6N;z9f$`|Tans8Gx z#sC~;>nbZn=}&?|IoQ`Xb8hZvRW<*HI(&<2%?VTZhV=Nd1;#dcrBbyzM6czVL~Jf> z?6qU(znmSJ^ojTJ=%jj+TsOsSyzjC=HW0!wKw+N-+2J+nY(!u{{uf<@mAt}>n}Pb` zvK5Z2q>GJGw)KbUppaLSh<@~hJ- z*t`Ob#8N~SMse+Tz}|$65kp*Y2+gqZCwTlB-$D091fE&8%y~TR@C4D3qW2()@EXsQ z^<~kb-~sdRhLA&ex%t@=zHcVX2kfW!mT2|dYxLUIMz4Rzi5!>N_QIcrA3B>&+aLx* zFLHqk(3Nmh?_L$H1v8XQAiG<_gUq8$qq-eEr)CdF+>9x!Y!D>siH>6d6lhph!TijK zc)+DPOcZl_*|{kDhpIwyh=1Wvy_|e{XcawoQ2<6vSD}ZyN) zq3}U%`~x@&P{3l7f9nH+vlRWc zL`(g)bjEHrvIAvn#Sy$t{9)fCw1#4+;FF;PJR`qI#IiL*V`vW%X?F;Ho{nXWZc)WN&PR?=_AZRuypeMh0&fje!=y6ZQ0P(d!T2MRrF`4hUGJ zq}dq}>uydo`JK$prBhJNQOErX+(|GAD9o_yi6vUU>~*q=G%?g@yqs5ZG37de9k|rJ zWlxBN(UO>pe^+jtp@lx;r+!GP4AJCK0pbJT=>)*sS&Na=(){sTv-HvG`)%Pv6N~9! zQe}_bYF2;Sn{PO*G^CsnRroPs>pX!?vCE*pAMyQW3qI5aED&(}_D*a`1LrWghn|UX zX}Gm3dW>-dpChs0nZeAAV8+k$*DZ>W6(kf{INa3N*5MVZ)Z?=Ge(A!`^JhV_1XNlt zGVW@K^z_+fhYpV(g`B%a5^ zLbACG$qfI5QKK`46r7BQiOFUXT!lsl;_b=!h^ucjH%6t0FRL#?j_K*NRj)^7xx!`q z-dkn!X_q?Ai$1_s^O!%PHR?e|Sx3RYMANDOhEJOwA~SWe918pq*cT98DlGO5-A4hy zFr;2{kzD>|8(NoRZC@k2{A+R-xje-au|nmgY`KoU-CC_m&R*_ML(J~8rtK#98ePa$cWae zlcc66NF|V)Zb%dLACpqM15rlTh|vWl6oml8ZmN=zt9YSGVTQ6%0FfvO?uiRNd^swy z3S}||RipzrBo(3RMC+lH+OMRg+0t10Bpdc_`e;51B}&rZOF1OiA#e&dk&qb3RPex* zY*))MB4yVeSo=W;zIFbnzE*B=?Gnzj^|$YSAJ3NV6`+?7?77VmWp44OR<}ep?o8{! z>v7)vZ-?Ov=?U2l_yd$*sA3Ln;)u*@1e8H96+1xWm=#j07blM`EZn6fi8Ju9tNLZ~ z z7M+RSasbd;&o_2L&TwwQcZqJm|H_IaqQGrGfS$mlW1V@(Df{9#uED)uMXIxGpV+|~ z)mqk77yEP4&iSK5pkRf}=zEV>@sFUhm#EGlGR5qDG^c?NV(9+s-=od~jUZF^w{g1p zQB&`>YYo)rxc~K-gy!ROhq9n3K)7-I+bJY&rDSe4;u#^kID~*A=$ z&E7qP;if7VAj7w%(VG544dCu7NAxm5otcg!k>PD>h7t0bp7(G1!7nDl`B~A}(E_E$ z{U^Odjr;ab^i`9B=%7cx(fa_DsfwZ-Y$^6t8RkHZH#HsxnX^27`D%FSzoZQv(pU|N z@(BSwP=%EpfAee8ViVItmdo~`MAoCmt$jDkGX|%nj2`FHB+7a3upLvAUWb1M%sX=Q zN3V^Klhv*3dBJ^*UE{}|@jAoG%Q=(Ho9D&mp*At-t)3W{VA>n%>jw{y{b5097xLBX z)AgdvRg0@iG<#PeMbE9v1QP+Ty_K@H7PNLCyHQT$bsI(I5b3{IG=I(6o_Y*>&HTO5_D~ktQ#9p3O z?*p!N7p^|mTQ5epecCIF6AGL@zNUM#Ip`I|QaIE2sdDg8Zo>b79L}}!#A#O=y*(@j z4rm(@baa%}>Ar7M_h&gxL*VZgWB}erU^Tuh-=N+;=t{hRa(b(Ci;anbFAMbTUGO~= zyiGv}BK4K$OLokAxy4ph>vLTmU$MCzTl~%~r`1WAC8j710-* zAZEn^KquPSCkeNpo_%6+f->#0LUN3Fdo6~LHAxSqu^+H}WZPzcbD4m`RGj85ce>+(4 zg@2PqK^}}Wx!;Q(o3yo<<8wTd<$y2+A^z&WXrC1@X;*!0eN8A(;7d(R1PCJ@2B_Pq_>yt`}OLJwoo++Q`cIjw7ekZzZ2{=5E?wvgA#%UoHQjF;QCb8XW=<@Ws|tv|Qz z4lDRS!b)@$zIIS($k~7S2vyQ~?Un`K{3x2|F1oS;DF1CqmH0ji`))8DbE&-M&*pVo zt1(>1p8KEB7M}J_Le67Ct+bkO!YE??_NPI&2(1mVW5PkD2vNH~R}Z`g=0P3My1$y* zMO<1Z+V9_AE4n}nSoLADz}BF;{3^#w0sTA}*Zh%L@fwGoERFW+b~O|KxdQlX>_375 zelcO%j%2K%;}L(OFMbRXp=NuLHF#EBmPIm;^az8*XOZ3i!edU^R|6#ZXLtXx zX`x!bTeZ7MYAL@FU4_={__WFlWt=5hQeLgf%p2Ghu55vcuM(}E?jUXHHohpvrK1<^ z9&>|)ZeokW0|d*T0qvA_fP0)x(Hl%IfkIaY>!j&uQ0~8O(S97JqoaL-K(k;H@oHiR;Q zS2`5XhRnPK1_$k$*AHstE$@2od*#=SDdP3d-1pw2v&*i|k0rZRb;f7Hjy^=4J6z7b zr7G{mm%fdAx|F5Qy&*!Pd-4p(UN97pUZ7a1FL&)dh}KrZ zG2Zn7WujA)Sn#5a^}b0B=_xOGOp;cEi^v6RIu20XJpZa5-HbbIs`=GNv~4}D>FH4tX-59?VF=AII3shP^9YP|2!kspNPEKT3oR;Aq^ zRYDHKb6tVAvxs3f*sOwvrlqcJodiM~aCIXC^vUKdi$ zyB-M&SnvmgXu}2)&Lx^CDc{M}x*9qOoD=IkdjG9B(f=LsvttM?54Oj9;=El~Byuxh zg5#l~JK%|ED%)x|rwHu;(-*zv(|(P&+f$%PBg-gpHEAJ$qOIaJ!q&u}>wV14yii4hO0#(* zM4%ZN9j-0R)hwMadpGqbvW-p_B>n)9^RxLony##+&R%cBGHhg@*pxR?vEy*2y(=6J zzYl@9TL5Y!Y5BPnxJ%paDKmL^p=DAxSrmSiF4g%}ABT^MZ*>5FnZ57h{k;GMiKuN| zgdYxVZzV->>9Fj`!yE_>&aKAmZrahU=>1(~;a8EGIk*#G&9Cb|81ih9xmNNT!Y9Or zKZipG;5@Mdm^5>IxN(0C&=xfqMK|L-s&;K2s^e+W84TH5(Wz+F+A*t3uCpe0297Z5 zZ#&ejl$?G~86#@S7@%adGb47DEd#Vf3S+I{7U{WYK8+r!>)QHodfx21W9uE5!d{Zs zn&$UpvDSv_LK?qbqiar`x>%aF1sHVKt+X88u$Hyoo49P64!E~yv5n-eW1r^Q*Nk!W zkW0O#cH1LIzriL`prI3D2<<*{?!M&?>3HJmE}l4xe=zV(iPksJ=7ER2=j@_Z%_BLH z3}op5OAn}8oAVK9K!3D#N|P^mhw1dGmDl&IIQXU!uc#JYK3~hK2xx4vnAYH!2^P?> zqTrNd1tfZo(h=1L=aMl}m?~B|`ZGVIEf_Kd!5=Twt=u8Xk%rXPG0gd zz@lW0I`wH@g?vTXBeMDq*(#0DedP&F2~K~Ho_3c>b*qQ(*uU7`EWaF8IH9EFm5pzx zQ7u@nMq|0e&U6I$qc1quJN zXakJP;Lc5`TIb-zDkeSJ*tP>sgChbxvY-*XF-r_mO!W#DOPY;>gSnbaGax#Sou26} zap#4eoJu`EVY6&EF;MYLRsyRB-Y^X-(_;gXRqY5N^!Oap(x3z4Qg!EwmWn@L4_4xI zMlnG<=FJepJ9$|;Bv?T?zNJo2&_W$8Lm`VlLr5TncMMCIoXgwDR#3>B%(U8h%8Nh& zko|K49sRY>$vK^g44F(6?1%xE^nn|KCq#7j5XIuSNbo2q#1Iq)nY}j}0IepYQzzPI zM*mzTb{H7AKvJ1881w0|AGp^-xjF^48Mfh@8YYHrKu9M9z;5;H>67!PdD~Pn&^E4# zfI!UL$H6le;Lb%sK4yYa1enRXNcoq9la=gzt3`))N#+UggXPZ3syV|hEwkNopGnK{IOs+RjI(p6d&Q7h zQ}88L)EbJSfy@?4gV)>s#p2%fkh@%-yJwKYN!JDB>lkWI3qMQj(XCbAE`@Z>HAh=a zZ+YOq5QNRiz4F(75D3~nhHO4>`*f2wTL*mRflX^iI^MP6Mkd!)D>^RbLW#H*H4j-% zk~wbO@MZxeC&RtLncTclh!C4&)|A1pfNLU(E5+YQ)Z2F1dDFr8+WL!`x&4C>(DwJN zBv2>$&TE>*yJCDtWR zCSB}8$8<{tEz{vdM&_LseZclBI`eg;{c6Tv(BZUk{Y_K7;DQABnd^Ou)O3t)ufTr6 zYD;5S<-XI2+tCnJ%ew8+q&5UV+%zKZ`6Fx%`Xhza^T}us2H=YrZf-n8^W;Op0kMc% zJp8P6H(3&CK5>d&62q3^cfYMju~w#vhcC66lr`VUx`jHXT*e+;-=m%aObKPxF0b|z zr2SCX3<)zs_|um)7>^?p66jPdHISLUz3cSDID&Y#gd909icYhBLNCY66fs!xQ33_Iek80g>6)U)8HS z(%;4QMHG`*U4d;MsJet-+`a6_Jwh?n-0GRvnO|3(2&LYBW+H!|Fj9m?Qv$pZnnW4(iu9zfx|lV z(7i|SGuE#aX@)zLd|e$UG`{vAG2QZ%ZOr}V-#^aEFu0&Ohxr%}Zzb6nWrsJXYT7lA zdKjH)W#v(Sp0l-f);5o4Kf@m;mtFzIHb2y3Qf}zJxN6`M&RRcjs`qo$&`(lFH*$BT z8jltWHJQ|{Z<`9*0Eezed;ZeW!e5;+fpT#Y6a=x5>$k-ej!hfEXmo*pr*PYp|LCHO zj^D;^C-YB*!4G30hc+N&@`j)L0mvRu9Ag~#MK*;8BTxLrf>Y?mYA?@W3Ptmbu4v66}>FJ6im#l_VeWc}mzp(%c`xiNJrt%u|ET zb8tMd`87e2IC`*$syYdhI;&6F{>uFVP`5j9ClI$iXylEGY<_YG!J%$M#O8skmI(crBT>j=F91N=f}e$P5!9xz8&LxE<(2P)?vR8+8{_CE?e zt@-Uik5K8d%91%c#oQBk5wau-aPg(4PbLH|2Hopzv;#gw-P~5-+r3prvV$}rnx+#T z({c7RPZ*PlSe`ewMzE`vLMErrUaX~*c9eVYJ5AVX48O}`C<5>RV<|G9)8J9=VCo_X zC56sg9AYxklwvWe2CbAdt#+?JmI0og(~4`N$P_v%m>X4qMlCZxWS&B%Eg?uyLtzA7tFUjOBArV9gVWQMS0Y+{cwJzl|B7}^X-lb zkEac?%^P;h)BaNLbryDIqU*!v_JLU2_miycd8h4y&yKuU9qVtH#{BD={+;HKNX60S zD;tIU0d>UKLw_Ay6E$2ONe^uwLq7A2DvqfKanoE3#Nz50ou)lDVz-tK8eEynd5$IE zB*&-6CuCr`)2r@&5@n_t{A`Up$ood*%a=sy?AUAt*4-cEbo?83h(EwRO!zS`TBj53 z;!~{8r`5Fa8;jaxf+d%2Lq*b(4Hdf{4S z-Fd${xa8ZDI4AmcgunGb9)fo9y338+s2P!aOUn(X8o8^El3m-e6z@1gqpmJ0hj+dJ zlkz86yD`i;8mC!K!i7z@x}?__v5$01jF5uX{avVmL2DaHXUAE`gbUZk}&R*PKGMQ_qY^}dNFonTX z+QEE-en20?fsR(F&6D!0+6g5qbQh5o^hHhEN$tJc;oSSDE*p{}KV+4SLN^FHc&#^jTd37n{$N9hApC7yS*({;CTl+RioQ99m|ZC z_X?tav$S-*prTG@VFXiI=&#Rv1OgaYCzwee(VX&N4XD~kT$r+TVLk?7Qx=6@4c?WGbxPdZmYOnUTBr~Ud`-1$TyZvcAd*LmO z(`J`%#7om4N)xw0Tm5VGaC{YCXo)%5ppbiI@l z_Ag=!$G5~Yvf1WIbJAz}IDJZi#oiEGQ7sqK0|h_kDs>*Jbk<>5kM)$W65?d54B zO1)F3%LiIW$Ikmj>uhdKGq&b}>#|<(`B9wibVe|ri&lWW$WmEr2Bxc9`sV}S!ry=7 z6@A0X4Ho7lgWnh&40`$2;;k-15g2BPsJ*@I?HbbK`)#R*`1Q2lp)E(K`BUUIXliN8 z*ZaL{tG-I-=Awg8Z#!>lwdteU=b~^6hi$KO$!ya<#92LhjU8-y`ce+3;}| zTljT0>!BcKsjCaI4x89(V5|LoaqzOL{?cd8=To8Q;V`N!!*0{A%dzWy(L966yDu8j z3)toT`CU*-AI8$Q<8Ah(L#Uy%eg5O^K8o0PsrAj_wPWdfki%~SGvdeo1ZK7oL^mr7 z8Rx}9)oN~m^Nc*g6?^S7m+)I`&^+lKgkU$2z+5MW>-16!w3ohYL-!4ZglNaRb6;v4 zkJ}!LYUPXM5o;&b8*R&qaR2j|W0(v{C{SViG^P;RM3 zEUDsjltkCu;1Q^VxKndrz}|7@6I$ zGVyB?qU}5lK3-QmDnIskHibTIa_dYndLxtJQcMxbTv1|uh+*$;xt>Vy(hb3JNY0`Q=R`>&paYK{Hr2!j;Q+N=l5}U zRaf1{)Z4B)!Y*p?-P`Raps|_)m7=vRCFy!Yw{t^d3$#hg+$nP;#_`qOLg|%}_*+ z6k*jUN%P0Nh=)=haGc0;XH{9j@W3nO*bR{p#A=_!=$U8mBG?i&#%zE5xC-$5Dcq|v za&mfa)-Ry^e6Sv_;&gE%L|u3e9yH=vz9Eakb`Ds|7;H!ZW(i#u^_W$ zl)Lqqfwpuy4O@VBjM)|2kCDv*)DX0^w~) zUBAV?E~a<|@YuoFmy+x3=#(*17@)5L&TUTeF)WmFSj8NHZbXk5&MhjZ5g^U z__%uf$wjI&{t9MRC+ZJkeP1ER@lg&x}yBS2=G2UvSCwH`LG zGg*zqa2}?2zI5Q9HTm@WZ}d|IKZ{>GZiT@} zCUp2|#tbN>s`#wGNY_wf4*}jC>eIC7zU0m8W zRCf5cRkZqc_1=@mK(4w6*>QY+9pxIDT<#t4D1#QzTg-B7L6bVWKlAOTRQ@;-<5t>h z2fb8qEytjw!8RnGub~BvXYUNMlZ^y=SXp%ID(|dEn?ZQHhO+x*6vaAIp>TNB&1t((2S+SeQ*~x1a8(`*XmT9+YvG#~q^{AI&^f(Go-?^pIKi9CM?GMX{G<_9JSH)Q8!~ zjRO&C9U|b>BE&6nZt#uHfK)0!wv4RVFrx%$WH$peOB@U=dmfnN1$#EVPh&*a-+zTa zg=3#vbJEaQsqH{+72sovi1a1bL|*CBYdx+xbjSYF)N3WEZ*2IPi8~84bcFzxLpkUC zL+I^+LGAEJioo-|ajn!HgOiF?=Fs(F|NbPW2CThwu%y9YtLq(vC*2)`vgFWg@8976 zn7uUdz1Z%ACOiDbv)!Wq=5wC=yxik2P`{5jwN%f$vgEYQ`4YsD3kv3Q|2lx+18U{i zcZP039!E9q$9Q2GlQQx*c(pn+d2?sSK=8WO%L)EcpFLs(<2~A9-yymG&U3on#wTG; zB9fJfX!vmsN@lfmb5FwaDf1X9$A712JK5rM=(LYWym4mosCxdjpXKP`!`J_5A(rX) zHb_;E8Yzo|l`m0sxib;nL`4!iX;I=5cynTkOM6oNWB?EI>$yQSKAl>O!Wx_Xqvepk zdpw@khi*o_+Z->8o!edNA#N>9a*jHjzT^b2do6QkniP$%eqVi=to)dylox-d03V0n z-Jtp(;zYARUI+C}p;N}dMTBH?c+=e*@n zio)cH5nd(OcG*8ko)dK^6u~9g2Pk6f#(8DNLPa*!QULPgHY;cOe6xJ2EWu@Xg+bih z?Bto71C-nu3rnmM<)BiV>WOeM8C4Bg6`~0yFt$I(&c5QsE+Js1+^so38_XqlXjFwn zTw;k#i|1&9O8)l6YvSQjDihWx?wC zHr3J5;?D^cL}*kTa;&P#`(d%Mvx0xdC@rZ!Md>i9Uqz3@aOsqCNBmA)vDHTBng6NL zD99`z(a90Iu$a;*w2q*{Ywv@rOzJd=%B9VW@Xk6`gjWR(9S>zt6k5$P*O^i&l#W$~ zrxHz2Q9}Ka7f=$5s$!EblLA*>BCb7BQ?@6rMMLH05L$9m77>-2I|W2z*P4xH3NM6Z zeEo8(P9%E7=A(`(#*c*17G1_xaCU`}pQu-u0QY{i( z=1$;Yl7#(NGrU%5?&^wv}W$R(Q-IC00GAL~P|7j~|yD&f|T8%~+pQ z1U5qB6#@wQGm52*QW9Gs+7D-tKaUxVmXb|0!OcY?6|Re9d5T$4nDi8s zDuS&o;!J%3FYQC7^6QjMhM7&QRL7c%T4A|8pORTGB$zzg6y8*76uNlBRJTRsyo^?= zLHwzO7B<763;;4P%3Z$PP>D2(yZfMm6=Q_)J(vFo} zn9NO$BP~vNjKoE8+EhY)yufQ01B@}&)J&vf9E4BSK$h9`NU9SpxQ@!K2yIu*k}9_D z>#U!`Vk(2SGS1C~j;cRBi{9N|$rZDpBo?TjVuGO1#H$A?6jd)0ND(AJ$)}isCAch@ z%A}WMWoR(6Acua}G*xQIILGq4us5DgI!hKjp(9!i!8Y=Bq- zU`&}T84o@rsEe)5U3X!uorOV*-9_UCpO~ELCNzC8wkj4B_a~)U-AjKasWNsvrd|FF z_y`)4R~5cDIkKs_MtB>{`6`U9Vz@X7HEbBRqWBQbxklMcNJA1{$&5)sMna7=ttC`d ztpTp#cp)}YDk)V+Jk_irAW^(jLrx8Yg_T~(?N5v;S2&^eLkccpVX$mUiZd&Mx=S=^ z5=&UtyyzCO3yWA#3zaI$PU01~%EWyWxpo8L99yquK2x=}q6-tZw&E1rDl9f4cPK6y zc5McFxf-@)1PB$V94g(~*@EbXw!|@cFf5@Pq_{&+i#01W04-M>7+f-5JkbETD=WxJ zk>$V=tQBNF`^*lAWwv;}H@JA&P!ONrxD zf3#1K=uvLMirg;7ioBOdFP`e2oa=~Hh0Scv8-xseKPa`is45)?`gKe%VSjamzU3IhOe63M$DENHnL zl3W~ym0_5=a*qpOOgHb8=_Qr~kG)Qy(!@uU03ck~h8k27MO1k74y-e!G0Eb!vhAUp z811;SQLOxnW|)Tuw0~$o6z);mgJESPH)q+@rV<;%_b?y667rImAQC7AdP~`G;tRn+Yio3CF4N_9Gi7L3KMt7pb_QiRr{^hg+#}*`a8IBf86(I(S z=ojfkTd5WtLco_;n2CV9RU|rxq}$1Es@it{YgQMFIg)^n<2cot{I?)03(uZp5Q$?D2xJ~A8_K6g)dS$ z=DMJs^OclOkA~jMmGckPk#?_y zq)>~^^0wBS>lkO%v&aIyiI>GObj&siSZfINf}`_*R&edumcshX z;#rqY@KTbz)H3k)w4++OSa9zdv`v(O^1Uc2opZn+N>&2IA{2i|)0xxW6W8i`i20@V z640&`_ee7+?5=MUCQy2E_$x)yOQi`^)vTfVy7Af0n6P?59bEv~P{|l9(0;M&9d>6% z8O2KK2xr0XKzIs`OkQ0dQD^jtn2e7xE~W&jD#0fOePlUomOj_9Sy~-^5Qd+`eoq{y zxpP@I@6YBXHa+_;>zQ0?(amd5Tk?!>JDC4Gbweug&UK)s^hZOGmkR1YH%+4%r25ti z-Us|3Hg$PNQ~LbpQncKF>X$Elez}KWnC%SM>ZMaWXpf8Ti%DB(1^GZc0if?3h_^yo z#y{y%-6XP=JAl8i&+-d6QUQ0#FIETU5{2QCt-M9dN|Y+OrHFciO94*1aC!T5<&Pl# zgqm-{#v&Bhz<(R9?L$PT6i7frePFV8=0|18%BZ5SAafBA!V7OFUXI=HUZ$&$_I3d5 z5ouR4wx%gFGbq9%@gy1`R`yG|B0vHe?F{y%!k;>W=LyS@8*o*atR^}FH%YVX=nS8K z;|qY_L^1in=!E2@!FuVJ@}Ddl z{i`0$C$3VLp@`kzlB^olAVT^VGt#kTWeb+s#f(NVuc5)X&=$+-Bn_Kbu zrtwuOjQ0FN|E~Y?r^~ndXl;Xb%aDwYfX|>KaX_!T@BV&TxcNx*_axg)1me8aw~*f| zeL5^#2PXeb_|x5NvW`K04miX5U6r2B#Q{#I5=j6Z7wkyu9rd2LQR$Or^_uRjj863` z&3qbiM-^vTG2w)ef8R~Avl_pP(R&%IdoYo#3IH))mJ<8}YLFvW_zGWI_-L(Yu{ zf6YT$9-E_4yvorgDSWlx1=-OXD94J~W zARx5=dGNoFGyOO5NlV57hXbh_<0L@yqAShx{1;Xds7}J)xLmNLWMT>H^fj|VJ$lLU zczYpdY8X_?M074z!C4tZu~{JafBAe5JquwWte?N|oJ-KClpaC(!Ng1nnl>EF`KKLw zh%_{6`DDvHBHIXQ?OxLoCb}c$1H(w;w7)imu8U~8DM+P7M75%IzEEMm*PbE>bj5U- zhoPT(`O$RB4o8&DbxM+<3^Ah;w8B;Xav1XpA=7)Q!f}+r6jhrY$rAik-fQ-LWgS5c0vEyo>3d@RRJ6|Ks0uXnF z-C27PORkkjNjEHC%|35s)-_}-ZOlavv2_M&N(ERtGKDy|{xNy6(gv$nHe36Z-qZrC z*6g*%*>~wMd{_a&fMi%r4?V4}SZY{kf~E$zy z)-H$BYJ0+u-!`N9+O@W=*MHh5#@E*F;2h@4Hygb!=Vv6lb_UZgpJiXZ+QN=T&hk-o zYE?dAv#Q?}o-28MZ%O7Iyt2iVC0!62V}v z$@%ZD>cGg$8ciU3Mf~|=gkmGD>Qlr)N+iXcUarSigtv2uqjCT0*&g5GL((|`*DBy^ zhQ2fXmfXAXB$21x0XA_>cd|gYFVhsyf=pp`hCD_=@qjE4(YRojuBP^&z>1`FYO@_# zDfG%NR4s%m)$~5Y$1%Cc*z^;w>cmeBWa$&1>rm*_6V0K*N-Sv3LKpWT4QiLw11s24 zw@0}0>WFPO-|UNEN6v#6#XEq^(6;J~Cr2iuPX|p^WZ8X}9HvkHq(YR{)sQT64W+to zCqnNxbBP)7jVs(1Bs7?AAoyoxOwvYbl0M@%m=SeR=foo}b=3bPg`K^kFo+4Dyt8LH z4JKfGhgMovgiNiWd*%zScK%%h;UOd$?yQ7E4U8r!&r9l z0E~)JT2C8ZdnOeQgigOFx$c=WHWL~9&NOa#uYa4CA~~rm(La$9C{l#){-*B9Erp<& zLBLIe=GHqoBtqDR$*>&l#K|~{tLH$Apd98i#XbkQ#RY8UgO4oEm_7I>MAU@8EV(#g zIgu3-ny*pDXx6pvZ{fy_Omja-8Jb#vg8Q%}PeTLao|&eL zK643%asW1d6-HCFz5H)YZeEsN_rkQIRZM98hAlETOM65VW|(IS_u`3`WlrnTl$aEq zH#J(LE+#9s##hXcm<_!9#G0Ur3YFyG1D`ply>*eBs@fxHT>)bH4Y)vHEkRD=y-=XLbN>w zR)RQZqWNmvue8$-WK{lTm}5OVf2X5I9fQqu3Q*|bq{!Vz>!#iWSEkia1rY4!jf#bO@+P0~- zCAv$LXnJ61hP&gFb_+`=h-NXym@MN?>H+CSiz4^y422t1YYQvn@U!J(`}8~Y2Pg|p z%udB)$c{rbSdpUFZewhPUE-M!tjV$uFPw|cKK*P}Eb`nxWgH`r$` zFJwboF<`ix|HuNYmFMx;N)}_FAdaVRXH%h%Ld7ku>Yt*K2Is*V#K5j5&#Jm0L z?RBTmFr6VKj$EQJN|kwO-i;Y>`o_@p{rvs-n*t%OIDACHWULA?LDC#SE*AdT6>r9T z1i|k{$o$3AmNn}XKMt`}C39ic_Sw<;A!K2;=yrDn5~ciM6f&6*W>~@Gbit7}rEfb% zG=-utlm6bRXi-5XZYcRhM?&Rv){=cbSDN76kTvVe+V_Gmj~{(%IASl#^NBQ%uB>FF z*t>mreoW*j7y*DG+U*=kKl)Mn5a+r`Z~dY7Hsya07Tfb8#0}`hRvw$YeOZ}i*Y?A1 z@$76WMu}s0GG$oeX3PRk7!e0^3vn&MGT^kaaeO0`|V>L7xTeOY90PDK+(+!AC|kl#4>NEUa12rud};{J-6xgx!OcAk%`vbf}_Xm)b#`R zP1h&W65~Gk+XfpLmJJd}tppnVAs}8H-eC%qV~ccn#<`5o!@_@>#i*Bhpp zptIgy`RTb@0(+RMvbU!Vh@VeoagpnJ^fpk8A8df7cUke95ASbiec7LBYjy*$cmxi% zO*++2Qf<8|Wp}Cm?IwsSZ2_;YC@?f26`id*DyWdYUI7lL*1|CjAQwLpw{y2B;`A%` zbl?kcIh=JPD(dkq4Sel6j7J3J4Y@3Cg#c%+$O-PdtwF((yDPLL#e zom#4rt66MQ6*|Fq`$%4-!{$lm%ZyBeGgT_Fly~+`&C}v_B3ANNQBJ+|2VMBv%m82Yg{68Q#8WU8b+2eoqK= z=5FgQOv?!sdq;jf@wv~wo1c?U_;FftBk5Xx%g{+BYhCrBIk%ZE?3#DR_@#xQd5LHq zva>%}q;FvvnL+ZMVOB^JGGpRHX~Hl1S*df=ZbYZZh8L}C_HeZNC5V}F`L-{JMmXt* zBW~GF!4pnVI-Ph@dtF?QgnX-|swT6Wt;^a0kUl>@m3tU1&7w|5x06PkGe|>xQ=529 zC26I^NB9*!x$tVeL-_Vd?Rg(9d2`47ZB0u@lSMCf?r4(sXIYWO5`RhXiClBK+Lgu; z>Ey|p@H{1q#k`f+gb~PpGI>D@3Vsc1;fO8V9^)ELkh7~v2>1|lnhj=7Sg&QN?9BOC z$?7k(7;1U1w;890X*;u34162H+TCWcdw?4^-8|UZpKHBFS4rWo9f=zqHuJkE_3(x$ z94)as>m2AT&8*;motuA+tMPW;>^V7kD;anePgR}8V7SyeSjUvYrdWJs@LonY+62FP zKhm7Px%_#l097s~`sJx*ztO4J`JDB$PY4oP9RCx5qJCcD3$^^TvI3C0G7V{>&`E`_&W!@8GMe@fKwSbg_tv+PSM5k= zC(4lU0Q#(;O*iXWpW|){_$*LyW}65EjdJcdUPiGXYi_1`IeF6zeC|U@Bk(qC_U3LJ zsQC3eVgv`}x#8_Rs(I{HFafnMUc<(P<4pmLo^>cjC>H8gT17Frw+|vI%Ro;Efg!uETvoT9u$^KLL_6)YNVU$6ZghSWBvy4u^ zxm!M#VgNWWS2T{alAJ<+WCG3ZoYfbazV5jNP^Mky6r{&v*`tzS25`FUtV2mpv;E-)qDiRMrd+!0zJdGf?bFtDt>UP4Q+K=HtH%fuZA=DPKraY4 zdf^>9H^)`@IJ{hf7i*IWP*;?jM7w+-Ni>_*Cw!b%3p(?E^v@<5U#ED&(tIR~mS*UW zmMk`nby-6ge2>Hm#heLH8k_aRS;-X(bdW?t#Rb`zaY0um>5%^Lw?*Y)r5$)#i3j)_ z@O)>8hFtyJ{af@^`v0aKaJb&ZHKB@WJbdo#)dVIQ@Vd2$p^@xSB70g}e%&2O(cp7h z_VHRJ<+;}Jj5b)dT4}YsIe7eIg(eF5BUYas^-IrVLAKKUt&C4|fjN-zPnD%g#uei@EcX9yU{ z$`d*FYkE{8iSo}nV3T(^z{n%2qpMiJf!+|_zR=War3KH9fvbt-~%BA za2Vs;g{lAf$VV;(*i4hF7E5*s-OSMvd2<>LkKVj~ZezQ08(X%@fij4dZM$~sR0D9t zArzg`a-QxyfM9+iC`!%KtI{{GVFe9dh zaMS!J2lFw|SEJ@lH&$&t9?!6f^cu}7$?J*6_-Q2Y+R@h0U}H5TCJaC(+_zX`eP|bC z-%9(tbTGcta64cq_*twg+uhPWBbN2`37Hv0S*YI%#fLr&l>4y;<0>Bnb$_8J{MnZ- zU)4+Rd|tx~qK;_1)}xGZ%SCw%XADi59UPP8-Iu|8G|KYf_9h0%###8T?w_IxQO7e?GwytN=)8lrWpz+OqaFq_mu`v;qXk&tJn$ zq!O(lm`9(}Z%G@x;$MkxFQm4WB8doY8~s*MztS%J1Z-$<0=@%@!-A{NcH?Pq)JfZl zjdPLZs?I|jPCq&MuilS6=g>sk*W0rP+Qk$u#;6$)>Z(?N`+C;9G@leDavE)`E}841 z>>j>&fOUH@uj<&`b+|0xjLQdTuN;Lg?4KJDu|iH_NHUrZpyTc187|0UFj6@*FmLm; z{g9q1znh?+u0Bc6&J&zjteYCF3PCmc+WOgaZ;e=?NvN|(8(*Iah7FyWIxW*(Sqj4A zbuOk(pTk?-NPU=pg`io%QDBtNW1HZy>R596M+}sM0qMDT&EDEZ)7NpYusB(EKGd61 zW04T7FTz|V#EH$XDiQL4>3y-X=M5Y1xZeeGMD{jxZGm=^Kop+A67XRw3>K}fL|%)a zM3DH<2&w-bgi_4A`DDWyQM`czsscTH-r&jg=I|*d18UJg#byq-(ag_Uy5t}LP7(h; zfTlCi`kM}%ax~~5@*Ewr{YmKGX!iQ9Mau?={1nL4o_pRzYfdI@9Hkhkh9_6DS%)TF z!yJnwxR;B(Bg#OTO%V5PR~3SO8wk16F-KuVN5E?le(pymDK#@En!+2+UL{J53){WbLb`|I74`n`14k~c{Xk65s8I|1^$ zlCperpj_!KPe|ktQaR#Ck8^;#DV697TNlS6QN=`0*=d_SS2?KnT7Abb51X zsY;oW6%(DTMvI0qo;$0V>5|jl;WA84D4FXFgsZxcnBBihF3Op3MPMgA6y1(j``nQ= zZ`xEKo}z%tOo~`7{ZJ#4ed@tgDp40V_2D4fu@YEx%f*2UsqW`4_XChAKX9M!@t^O}O`#eu}$ zj(lxUek)jK-Jj@@%x^R2JG{G5&)=B|MQSibT-Jg#D-L@@=I=bQ2+~&p_%F0Yuky(!h1Y> zM-%u3^VSJ);^Pp8=JRowL z;t(x-Md~j^X#q{wo~42XO`EdS%`3+eI!nlSI5LB$&V`w9N$r|!q<^kOS>tYWdDB=C zVs+E%2SNEektgsc{~(Z?)c*=KC9*Z?epd4MR8S>PC{{kLXjMx5RESMp2KFh^tJ-$Y zTns(ApO8Hdp1rJsx{+uuwLAaOO`Y226EPnXWsb~H-q_=I7c<6Ey?j2Y+n&8(; zQrsIOm?F(LH0|sYWoG)i2Y9Ks6qG0InAj7zdgSxad75eLQMq!6x)u@TtkfZZ;ZTW= z!g5D}vW-fNTze*3e|H1h6%Rh%t=UgSeOq+npRC0R4?XyAM?02y1pc(OF2~QnI3woT zFOuf*dF+~T^6&~_1_KSv*PQ( z1$V5&f97A;oYju7d7;Se&5`5WR}SHN@SG#hCAc?T*A&JSgwB486OU=Xm^RareH)CE zpv|S-nXYQFF18sl=_D?PR10n zoj*KYF?O9YCtCZ0-n?8n5V^^tWFYZC_X31Fqmoq*XYvHY+Z44H)N@3Lsdv96t5C*b zN04+l?(C$gWF!&K#G{VDkO}4?eMy{5A&U1$L&jf*CAD~&<1`zC5nhrntgrfCHwBuE}=P6ZB+GC;3y3^b4%;{?n3sBOU z+vu)U#w1pCvls7gByEsKD?gJWG|pjQMN=7D1YREQI=_mebJxGhpg(Sq+h5hXcOYi> z37-2h++e?q=XDs%>=B*X|3qf@mCEji5mCME4fRH!GeonEtaN{;0XH>KZD*;_3Nz2} zkSVhRte_wEG?DssfjlvX2jG26nc5iUyIki2G`Gj^J*L6F($s1Ju5Oq_avH1V+#qvw z)@eT+r8~ShzW3^Lj&IJ7om?s z-`@q^7M{}s-VbhegZjP;9x9zFQK9kSBtyo9xF}&E2dqhBe=h`LCVmL@aNh`g7J%9k zq=NUyAEG@ZZ_l~0q;U3u(o~4tbUeYigdagdMy=#iYBbwy=mxAZ428^X>k@T$^)9)c z3@&+B4cg}71GMCzc|iDL>ZYyE!88Fl9=UAkKFx5?0JBp&4C^?=?0D`P#{6yDsfCN; zz6OG2J#dn`qZ___v9auGt)Cu7*cjDMzk=7`^Qwv6FW&4Cfxz9~F3K#ehQ!P>g7)0~ z#r1K51#FR@w>!4(ERdVgIPK`p8n zs%(mPs~Pl7waU)HBH0DtZIjJT5n|#$=I}1Z*g}Ue zFG-hkU5(z&jgqOVc5kF?=3~M%vR@%`N28%+%IBL7?RZE@}*% z@FX_Lpv+0G>tok1@ti4fiYmqt-La8uo8GyfpXh;{{5^Ud={Djyc4|LX)yi0FsSiG; zn)}aL+Ty^IR~~j?_StcluvsC&DdA$e9dph9=u0)0mCsAqzpQ|9c%R8}l0Ux*+q>XT zR^)0=c4^;;Yoi-lA-=1|!kw|2`Ct#5B?Tf7IAZA*bK*@zZ&Eb=?)ta%yODNtcOV=@ zqDOUTG?Pqg?~qmgC1l8^$C`oQh``_THM8dW24z*}QhLk1LQSuq#jmlNj?j&@PX9xx z-BqklX6Zh3XPxDyHX2$UC)GAnAEVd-+d1=Nt);Vel^KT5{s#MqnV09W!;i=1Ha1fh zy286iZqBymYsC^RcNhQPonbG^yF<^5v3(69&zo>K1U$YhZyT)A5PV*}`NX7Q4L_fb z#eDEwPoi(F9b=9zQiI*%06^z((mJDE0tsvP_iEujJFiz34uhqYT}F4ZZEElFeJLcO zD04ld@)g~d?VLhtE8XbB^RCA5xUOp5=vBq@_AgXd$BksR)+QAy$Jy}XL^=>8>ho3m)67W_Lj+ zrUx}Sq3=1py!;S=owkVi9GZ0!x9_XwJ)Pux#@Bo%c2!EH^w$GIB z0&`Joy9ICmFbQYJFY`+d+cYMoo^PizPkhmp|en0Xk0t1)pKzEjI_ULtX!b77$xkiwZi|w4Sdv>nOaf&{nH0<9R zdyK@S`Qb6Rt72nrG8jTKt9y6ITagH)cSQsZP;@aQOa7)Og53mdZ;w6bcS_vlya0Kx z7bDS=_!=Up0RnmKbxUH)ww!vlp3w(J zY}zd6@!`$9_;$rJGd2@MdFsr(u^JqQtvsIP!2A}=zUVJfW8& z0@{ikm^H-l*0nr#=t*wq-~KJkzn z_4CNIK0NS=fqn<<$g%ClDQeh*?(5VXr7N8FOrB|AG`mc^Rm->nNw0>nn51Tq0E6>? z7T)Ypgj#4m{61IZK+<%OHNJW85TRDdnQ*g!YiF=!z)&yMq#QH?O=~wLpH0_}EeJH| zV5`PS`-tp_XeTojo^1E$*obY&Qi9z~u%KfLU${X4UnCI!Yi*7yV(x(l&Js``o#zBKF&NP*@nj zU}%~QvsnwAq(!h0USTVNTexei<8NfzMrFvx~kXCE;mv8`Wq3jB2bRMeuk| zzu3YCEfVzoMY8`Z7I|#{Hofe3$V0!@3VDO&8JQ!Ztkr^g=c@D?4tCa=UKwK%`yK)7 zrJnFpH09~UR@I&1f*iv6e#2{d`lPbm8HVasVW+qlM!0ua=T@q^EsGak_b(>F=`Q&+ zRuZK)d6e@Bf#YlX!vNi(9Ub&Frx%2iI-UAHF~hPp|1TmXta}<(Al=S(<<5Y(r{H;v zZaVz{L&KvIKLI@k7frrDbLcb69UhWSHlI-%1zQ5-U5O1Ae5d^%^B)%p2QL&BkFWy9 zpk*R`0dg;!8LkG6qEH4@8qg)lnVL_83@B?jCzVe5X9ls%MCe%i+(en{qPeSHfD#@$ z{ACR`;n#UW=evP9JSb9Tyd(TIAI8Ovj;jl$Eil6%)%BU2c?W~gyhSD7o{tRnTXL3g zT0f@Ijb`u%>6>82qR*u&vUBxY@eb~N?CLJTMDGkAKNPl`urU_eg_HJ)!ONiZheMux zslWDbD5%cUMGn4g=E1g}Z{XbVyV!ls|IvQ?(wCWBC9IbPs%Mq6mT#3ml}47-u!MK} zq*RB=Fz;o(CHLDeeO0k?1!T0t>bq8yAEIF%y%xG8;QrR~PLtOw_UkK%sdkl*#B}U5 zaWPeX%pX%(5P>py#ey%ko@6y83iHWKmJmaYyM~jCUz!6=AxAQGE7dV}IQ;fX;$R`J z7IP%NN8?NS$Dxh|?SGH*{23}Zlz8fVo!Gqv(~pd|PD%~3i^ZzLhX zAMOq9=ZLDgq6m67#E{i2aTmAz_WpJxLwWs3^>G$d$o zgn}#Mg}45AWfh!WMRj~~9qB|iQ?i^r++q6a@4esdHD$cd`M!WxjWiNi9-EHPW{&wl%$;zzlVlq=D- z4YbQ{uC|X$pUDb}XpW>#)*PLTeytl@i}w}`()?;@agD!+Zs*H@CTeiecfSk?Pt>pw zexY2bgBH(VtqwyrT!e_<3D+>>=!1e(4=-y<4!a3MVJOyLC3AI#1V_=gc$Xq|`Zv_7 z3Jtjdd4+(V;&UcqCN!zxPYCNtT&uU3tp*M-ZnjwWEjMdk6DiXUDx zLaW5Me`mzd7Vq68xxB(X*e6IkVt0(|^0#8e5Z-a2!qqY6o~Pz>9I(`BuAeT&qedpv zih(#MM)XPFlUoHo=R)0sBoU7}RuQQX{iyyq!xd>7Inf)dz(!mVVf9dqB5Afy_T?x_ zY}!I1Oypc4iH1Wh8xT4B%di%wyy4ehunp}BnD`cx$wFAm(-vE{n}TrC*{{%GAulSo zk6IsrwtOGye^mE+PCx@0<~FF$L9Niq^9x*`b}Wqij-sDzb+&(sw}OI?s#{(bB6tF- zt>400FmISdd!em&jmt5$7UMn0a1Qq{&i+j?EEy&M9iPD4m6xES#$79i7JKb)=1rI- z{Vi{M6VL)`-ZpWa7lW}*rX2Ohi0YZDB+G+oMa7{)DSBNunvc0WyNbeTe_e?~{b)?0 zB3v!Iwq`k2emY*hgo28{Nd9LIOebC%uzAvaP>68B`MuzW{t9{A*<7U%1AfuS$xrfP zGNdMjRZVj+r7~f^6e$GP~dbIjAtcpIg_-J``o`_LRr6m1kYjhrJeJE z^;7-_p6GO0!{ahWUUFVnGeD8I{a_i1`1Kls6IH3q}RXh{&@@SWGg5Bp;vcIiO3ry!;_4)}Z$jS-B>FuP(>|h^5VU#GZNE<4cf&FZd zBJY<(k2E(07L@I`C-dw3f!qClEWi3XRc$@cUE#zn7OX^vqCS1&k0|y=Y6l1BXKOY} z6g7cB%KOOwtv6Xs;&;eeaYv-ePn5Ze7zNt0;^vj@GrdNfeLd- z`LAo*f;&#iFBqEI`&#vzk*^*DO@wd*O9i#$ilBBjb4TX_0#YY}wGS77i9OFJn@PWu zyn}&908%)TZSPrRj%U1na*d{wN%)157W%2Mm4=qlsq98A*6l91bAFPPLUQOL6uDNb zK*~S%3Hh}e6Kt3Kq;MO=z=dgFAqY=SW~4f^!223Uln70hxY6<~vYzDz25@V{%LutEl7#aFjNLGtzsDwqp=n7P7hs}9e zE42Jvelsg;=*PWl29`?;{{m@7HTP$?5&gHhK?ylFQ)5%BP;Wcn_AL16vOae%$?D}O z2s7StRH|F}E9!{RN*gCw^r%ObU7wJLqlj`ofdVEaTZk6OsfR6u%UjS0Mj?x3$*bt zo;~jerk5hf=XH^R|9f4tZd#5wC^MaAZOMweqgGg)IT{CYv=U41LJhb{_6G<;DA+{& z5*D_Hi!K2*qpj7xQ5T5dK|Ekt{aMSQ5tDwpB3bmFe@N%bk#AtM+6cO^toiqo|2^=> z1l~?NlbTW*F~E%TAzBg+{`pDfzuA7wiO!-oLI9B!XrKLQuFKxqG}sg+%yPS}oNXaO zh^G)?L7LO3>y9SioV(2dTeUu2(y-;EDpL>YpH_;Q zJa&$H>^<6b%MNmTTULyjPLLYACR&=lwZL0i_GBjICC}ex95pHkX6uMWXqA398imIYJGTrizCN z$JiN|p>(U&S|o`g{sRT)kqV1YyS(QGExm*L!*K^YGd`t1an=noxL8YU(^Xi1~0Cv&y#kT&;E3F@|IvPRWV4 zy8081w|oK>pw^0qZ3#ER_~u#@(rI{(N**_Rk*&e&IH~?nZ;Ss^!@EH$*bwmJhD!IdxH%A%Kt~v#L0MiJ@6N#2>#8HHZ!h7 z3i81j^x+}@KE7ifj{>`r&RLCL|NI&^R?fz20_~f21 zG1B)xagLW=$tP1{_2d-ez;VgWFL3?rlT+q!12cvAf*o$|Jh`;(FG?=u$DpLzqQmER z<RK5vbD?!#v99qTisM z@_(;oCVs2TYmmk?}d>ona8y)}U5#*1^q!z7H$^e{LkV&PYDf%3hRNQ7;0bun`o+(A34CcS6 zddKKG*l1llXl&bTY_qX#+je8yjcqlyZ8XM;Z8i3{`tE(cGtU3yM@B}no_Wvvy5@w? zS1wbsjPB#|xHA@}O{c@c>jaF5mW$OU*pAxe81k&`;9C;Mnqt{_%H(yi1bkQ}E>B$) zH+GNi)Pw6~=yd)>k^#y`OaFW|0A6=$Si-~sZR$5F>=Z`=$q^k6pYGJ9n#n+GXEGr( zI=rg5G|n)-(?NT>jr3H%T?0bYW@N(7RAZ@xwMXWAL1176X@vJK+ocES3AaX6%U@vi zKZ&E=Y#pKqnk1hHi{uiM#8?p;QY5GvkH5755eK>Jj!PLW4nu_|caoqgEGfuRcwd_) z;46UW3L8XQ1jc!%_nUAB^+EG?z*om(dgb%mTM#&C6~DNmSL@y~{rblyl#iP1il6OD$s^-Nr6j7?S+_l9u{e2ia$q-O?gJ6n+5HUit9zCIHtu zMP*;SW_Zq%C;4MFLR_j4C6=Zam77q`Leg~$eUyNc;!K5v8yPpyWn`?Pmt_Fp$^9dlpy>UK|F?Yv0oOVi@@rKjz|F0i9BRO~ zkl@Yo$4bYyt8)Ke6dV*^N&5|xjDkm(A%nquNUf-iS7VY`O7~=+B4sVA*5f=+CDPV) zjMS~v)qEn=Zc|L*)foXaI~}G4I`B(93?dV3=yL1>?jvc|hYbAw8op> zrkr(H3KsH7s?SB#UT3E~&d#pXDNJpIekKY)X8%X&gb9qbLhb1hzYWyk=|164Cm%JPVt1t^5r!9i9&K9R~$ zYc+ZlFkI0v=!V5&Sd=@^x{+3g1(~=L; zpSW5`qpNbqo}Tr4IO1~}xxzs_k$11+Uw?yv3+1W{f0bkzB01mCf?D33TKnQm%l)wT zE~xLqoLjN2uB$4Dij2*x*3-~xJgchte2hNJM}=Q=UgA=QKsSo-iyqPV z4aS?Nlz=RN1>FLz29fn+4%~>HCEu}pA`v+)ikHIy52S=2=PD-l4H z;Uo$TYKL~|nROW#S)d5#ob>Le z1&t)ib%0JFZt@M0n;V)NVN7~EK(=!}J>LV3k~TaWn}kZX(n3ia@wqiN)|zogE~ar! z&!Yw3p>l@!^j9PFaev8s8=g9NQDF?EOV{|t*x8S*E08Vpgh692a_S8#yLbhFCVLdk zcnqr8yN7(zFHGTxQsOe9I)5Q-?2V_$EVn`@#LjzCslY502;B$#Gj5%(nV$fqGRy%stn2rU_^|qZZ9uo@rW?7#-NNh zHckcEL(SwCnGuRi($IpM^-{z6CaMQ->V7m-8GM{s13Rm}hn$$b18$$l4#sG;INEC! zUi0b<2V0eKtsiT@PO?fRr3ImQ*F${S#es)=lxLn{7I9W5)%VCLO3g2$XKO>iqYMt} z;i-&qSQs+5TIQ{xqDmd@*tyC>m=r^|-M#HQangp++FBM7+zcDVsB84BNGp^0kS~y$ zGXD&zk6jqzM_mBM=S|%Q_CCNHiV>oMo50l8*NgI9!m%s+xx*;!RbS^-MJ1s{72%ls zAUeoKa3g=0vtP9-^BBM9(;!vy*<;NN6&2?M3~s!b+YpUjz8i_Dl9I-+7xc6Iai&5u zBe40M4_(QFto%;jf!Bp)(;1|M9ME8_qU)xNwE@Ze+CwP!3o`Tne{!#kHeiIi$(y~` zwh&QJ`zn_!*e)sW;4d)5C8m6p0AyykMpYjQLTc7T%CllkOkHh|##q=rGHZ+X{(X&C zqg$5Cl>RXv8zSN2QMw*VxS=Ace1_dn!!1RW=NfN}OeGsK$^akV0rVa+d=f)X13-^a zUlE2szq1$}5$?SMznwvg{E~&dLPxadR(qO-rgn0>jM~??U!9*auBcbdQ4gk|!*qnv zJ(=UPxyHsr2_Q>_=iiH*`U+*YznVo2cAWO1o<^W7d83Bt4(Eouruw(^0_&g;WE4XoJiZMro4?QQG@s_S+YatQGWEWzz&;sgMNEZf$5ecd4wS-pt z)0yEXX^4m~*zaGMMO1v3nf#X>H?Pfp9+?zTJ1qf{jDOnQ$&Wkc0pDgw?s&WR6u6kepeZITcdF=3RM)-#S{}<#KZqR!;{xy%j{#Z+TBB9r?$~(EvOI*F!&n{ zZW#n9^xDJu>fq3zv4kS8{J<}F`=)_K0gj*?yjlFl1&51%lKg7+eX28z*V%voa#<_} z*H=0r{2iafO^F$h0ui*SoI|W!5=fPl&;b~=vA}PyMFd|83=pIX zfn4Xko$FVXAWiZtQ02<7f}>G#F-&NMs-A#NA_E9osX>Pser0Sjw zII21^;h)COM&Kfm-V!Id2p0!jT%)~)s5`+DiW#&;gF6!!!d4$m z-Pru>v;oFH=j+MC6-&4pp>VWv7$tU?<-C~$pTPLij%I=c;zL*s`qF!&jUz**&_D_7 z)3^7HM;aiEAIgS;0&WWEmsPx^9AwZe#1BIM^-;N4*?gRlp3QqL1Dg=zJhx3S)EA|_ z(%fljUFsu5VqyultL~Oy!O+=aiFy#ISSL7yx56Ei8Eu^~`;%#ro<*FST?+yK_H$&Xr+~=SxjtMTJ0cL<`)25D}7a%V~obZyP}^K;U96CskM>pO<*$tf-B4 zon3951QL>*g7j|3IBT`WDHQh<(!!xqgdXcIZd|aLM*cwPO9@275Q&aODOosJ2RRLMX-Wc7nj z$Et~3R$9Eqwe#T>)~VLR z_z2K~Uwv~vEy%sqt)29C*6gibp@CIQ*i4HnSXLOSdr$YNPp(dorLw@oXt!HGW7h#o z?RGS%P(o5k?*-D&a2=QRI2Zm`zmNw|d>w z!h~D=0`+dT87+sUP-${$nG?jN)1J1@zwH{_IcK(Ek=ux)3)M zCq9a%6+Y7a4`LH^=L`H&pft>jp#0e(KY|*$D4I4Wz>OkDNZU;)L->A<(lC{CuA9vfwSDiZn*99o$T4NH1d$Y%V~lob{cv1uQk<5a@7e`5>=AlwEyqG(txVwprW$Du>N+Mauf)WuFt+)qsZ& zM>BpC{Ky8jNO*TP0FaX%1Oj^h(>1X>Gx~?O9cA>~DC%@+ce!;tx~FmgDV!S2+8)ET z$aDLh>1BN2H}nMh@80X3dS17=0Hon06P6;FPO}y-$>hm4zMI#t%97Pzre~3Puzb`U?eY)%n^FM0x3)J^J?FLf zh8ewmYOjxmcwgHY==dkD`ufb0(IC>pm`9zLi`+8+ux+AMbRL1J_RW}y+;1n%=Z9Nl4{s#)C*o9d1NH4eyUrmI zu=JCP*Y};K*!8x|2H2&klB``79~W)F{o@?^fGxD{B!)Qzxm+}eNA(_P`<0;4?kx`w z0ZtT=q@X4>XjT{Z=q$F*Q!UrMm}YU8lyinA2HIXggUiweUT#tLYI|*w%jbl2Xra2?cNHMas~K>i-~2FoV1*aDI6?(c zuJ0&Z^|1Vgo$i=ZN2&s` zcW8&_*0wOUVnOvV91VD9W7eKsbGU?QVK7Fcv+>x3NWBKz|J3A|btkKGGDe`L{PHBE zG`7F|ar??Q8KviOV6Su$s{lB-*Z_cy>412VAsV6bd_)>ru1ZH<+xRfF`t?@d|!7z^5Wkk9x$jVHRD)3M_fZ zrdln7T@s00D%&d43h+9WqZQT#e-(OvuH0egXlOl5FWVs{pc2!S_K|UlXtRjGJ`tLv zp3&juq9UVnFJfJ27LLl_N|L&>*%T_7w;O$b855S05Tf$H5Y!Hyc*sY?%=I8(RqGca z48co)wc}DV9hw_p6+hl|<3s_1Zg3FXW2tD=S0$0Bz$L&YsCRtB!?;{PeJ-D^JL85? z(DHvYNt&IR0wD*&z<<@JBS+BjGej!ZTk9;;>kA{$M2kAG{wW5d0{xW=^vfjPzxudP zu=mGL&3Ew+;}UE^8k*htIomBGBWDmud|p&rDtEbj<~xaO$??ECS{1>9dL6vI%T&=dC8l*WT7t1Gv(Dc&Oh!5Hm*QH%{F;P zAh3T{Jf0bNZC#*#$gHJ(g3?xr>gQoGkl9Q}`P$KtHIrP;gdk{r1N56y(Hy(D3C!wLHN92zzX}w(hseBqd(Gxx(yR#& zl-B98%-FW>q@7*AfM2UidACO-pLfS}OL1CYm(Bt{aG}#YSP#5%ss=)z8_DU7sdsk{@-V9G++Ksv!T{TZ*4lj4RGQgk^^}a2<`$ z2?;?F(^KuYbh0k}FYyvOo&^%qVhax*%Fp2@RGIM!nPN0mNnF`a^Phi1GQ6p2G3<6Ek<5CDq2zWo zs=}Su?M6nNOZDp$Fn)q_X`!vqun(+T;TN;5mY;+58}x*`UT>Vk5$$}D^?h;TS*5xb za_d8VDI!D=#hEHl8Yf7O!1K>t#K*4m_SvBE>xGF=<>H&nhCM4A(D0+mSU{VZs$hG{ z=4A#Z$qhj>Bb1?5G*xKa+8kx&GR&PU+QBk43bKDsru{#0DaZd4mjZYc>)LzqB{E+m zF9zrH><=fu0266W%@|OEzn-~%varj`J09=FjZ6=S_;3YDMdpddW+VNSdGscH;gN>f z?t=U;EA_p10%oP*|FY7Be_83@|FY7K|FY7P|C5!Dn5ZUyzhsobzOqOsad>DjR_^^E zDMYV)utf=xQNqbS$(tTFU+95R`XAPvRPICs+?gD;Se#1WQS0Z9O)w`{SzCz8#G(eO zrkms%jyhAta5k3GBov9PNsk<4#cr!na&VX&C_|xvBf{1+VylSyDB3TziKPBn8ie6G zX-o^OJ&JZCA8xIm=Ai+bUet4>?72S#d&yK}{)|%=lhl1zlRGT4SeQpfVOj2#LR|SN zOErMal||=kIf}w2AQ4Z%^Jm~I%AXiNCZc7;7fmtIpt#6n(dKV96pt#P=ey%RMz2NB z_io}$_rfLPqT1aq_{`znSlB-VO!be>9>lr&0MR`8n{uNBoDh-r7LIGw?y$Sxvz;7; zK6t#*jo;N4z&`_pDOfCr!??0gf0_ZOu}&ZF>0ixf$r03N>|BeBh1o=QmB=jRF4ih) zrec>*PfmVx?T2ABo4xOr_X`KFbJgayLYQwQJ)kv2m!+2iN28UG(=o3~#VBm{wW&GN z?rka7z+t1oI7?&va>>`W7B}ZWXDI`6Z zX6hLm%o`Y}rK3pDtOi084OGd*!z+h@BN<@dB}9>qlK*A=o+l$1Y+A_^5$$3)?> zf;Lu4`)2FxYGXzMpzc6;sG&kYr7)L3<4sUgHdQaiHgxjcY8pR2{_(8Cso4GAyKC|+ zK|xI{+=g&fKasL+@W?!c_&ds&u{)nh9C-nsa0uAA)-~>;=*hZ=LcJ4Yh2mdGgeWCq zdrQM-Apvxu*Y_*V+$|ZT^<@w(a%6pl_=$;Y=T5upvbKTPwFszce8l$EAL81~64omo z9CAE?vpa^8H@)DNn7oc1a8B)i$}cekEv6+|RCr3DJomxzL>8SU^gIwi*`YoA5vh@t zt+{Wu*;r{&+7%F2ohfYV+Yaxb;fa}@@Di<*6&T$vum}Jk!1L#RQEyQ6Q z@k|}UaoC~4LPQ0d!Z#szjib$n($=+?5bZeVTZ$8qPvGDopZo<#J85n+&(Tj(B_$Pa zud{~e96ID1#)dH$E%#$$GBMVsqpS1Lm7xZ&1DX%jbfkYB+Ox^mJF$=@2x&(T7?+om z*ky|UGHf$Q=ptOU9|_wObMor@d)RHP_LWZ#MLttm5`?@m?y7Y82-RQZthn1H{XWQ# zC$NssgRrx^Dc#6B zm<08J$g^oVwgW7rh;fsv-dh~o%el=&x$tPiyG@dlVL^&b8u8Y-Cp-NPNqm zuf1I+sFR3|e8SnB{5w>7m=w~oD(t^{$LhCcHRZCLl5*7*B^ zu>~RHC)DQ0pQGWCwW2J7TJYcSrWM>n@bK(X5u;m{xvlQDkCtATF?W~^_jDL1OFC-vuU8C75B7Z4 z1rXqy{lwC2>3z0{&rQNH&ZS4PcG2J$fySvpn>09xlkFYJO=fCPyt;PJdU*d0o&Q`j zA3q~AH?H^mG@y>o8$<~CP=-n(iKABS*p7DQH@!}0TsS6Y!90fCPm>Hz4Ye;r3H|GI z)Z50@d6(+#ip{!%2$Ex2*)6P>=dchmw9!rPNp`m-sP%^HbGre@9 zLHAI*PJ;cZKVh<3V!`K6#$ML8CQ?|7F>ffQg>bsl%}|*IbpQP&=dtI*XV$2Mn@Wo_ zTZzG!iA+pV7M_JB&m+ytrF!_gU70frv?vn!(oEzS*BIH}tcbMtOk=p`2E;-2V1klE zT`90>12~ndrN@^{wzM$7A}BG>gA140+LuN4S358bY_>&C<`?la&OiScOrLKUfRr#3hq-H;220;b)#6EccL4gW~MtO-?l_Qw31Y&j@0veCafH zKz8enEzG>v0cF~nZ%3L;*^ZV0ic?(UWe<*)f&R!BOqkU;N>CAs~_Is{U-j=t_i7}*{2ciHM++D1a*UIiB= zqtOTZA zIgCk5>J9&)7X@kbsEBeNh+sGReyHK4>#%Z-(6B4tv91H@TTE(lW=I~wgZx}r;haE(JX&~ZgfTIWk9pK@K(;i?>Vo&35qf=e>onFBTbjWR@k8hoJ` zBq{zbQiABm^`D>VTc_n>3m(xkSgqW>D@^f?ptm!;n(aTMz=V&_C0P+#$Okx?fyCtX zQ^`D8z3a%cO-23u9qa8uz39}5dX{wY{1NM|{lD2!iPRyT!UfD%8JK$LygIX_{C-k1 zNqKZYENNIhYCD9Q2sgU4 zh$uDy{F~A#+!z}SCea%m|4s9_=yKzcsD600&CL-ZnpcLS(9ux6E7H9j-gpj_KI!g(BBaO16uUnW{uh+bKfcW^^ zh^aYMSH~6ouR1f6L>d@?j(flpU6l%-$fa|Mpc7bD?C4KC z#-4V(2Frv6t`j)guuAh%J58lXR!EKb%_>X$YCdfP8f#bmZMuZ&m*wjx#+8+5(%WF7 zaFwie;Shj=u)>>v=9P9Z_C5(ZQ9bc%s5E_8o55^T2lF+wjwbZSdn&Cm0*`VUY1L!u zi7lt@LQ#ePU}B(v6sD~{^RbC$4MDpRvkl93?x-D|3hVu6dUO6}O?#cnh1)s$y@RsIYMlRwi@(ir`H zmZ{!$5P;#jLucIdSyb`iQCN-mxeeuyGsemltW-3Y()IvT3X8Kiyj z^by1k#`67p#~B`7+&PUVv9X!VbWU?GIIcRRjGM)-&+nAZ&Dzjq=a@!KxZMb(DltxW{$WZLZw;?|cwjKh$#Q<`BG9T4!PY zq6GT}6+3gy;6&&tUs62VHmNKioOYcksS5v%wie_BKNl)P#8x9FUYMMCj2e0k_80l_ ziF*38;fuci2tRwKF=mkw0Uq)7iJ+y`fsg8DB8I3a?`W|SALogQ`^_qN!22>zB}dPb zX@Yg5ON1IB0>#WQfH!1=gv~pEZKb);2Sd&soy+F zt42jiSsM-2Nz-H%S}aON`g5NIolhl}DUXMr5!9iiZ@G_a_94R(wokFpl>jW*Z+!<3!B|(|A>Gn5mJBUHN&!95IjQe114H(4F*KVEbjxJ~biL-dDf2{% zDSgBb4qmtz@oi1~dq1e_%^#{GK>sd{)c(5o24TXzr4`)=y3*q0`;k>`mgT!6@3EXNWuikxX2+q%gfG-xhl;$t z6l5fyMK8y=8Ojk!F1sKK1^}#W>52Xt0RN$A&kn*lVKMt(31~ML>Z)FsMM@XOnGcJcdvD1=wz)h%vhysw!BnA z4t{lH^g1t*i4gqvIQh!}#XnZfpPx*nWkZn65W7(W>OaK$t@vJ%yPj zePNzo?C)_(h|4?4|7kDvLk%+7+(YP@!v$Q{Fc;i=^$>FEw24zsT5x(JK*CGt3RB7q z^*HcPm4$~}aS9=4=xu$B0P z=vXF8es*Q3IzOd-|FN&LpHw0NWcVLF|k=&qB0LHMNBS@3AcrS zafa{DoTFT!@*F1Imv8zZMh_7jfL@_?EYIyQ6lgd;6Dpmh9w0x#*?!j5c}Y z>nP7T2Y`3UZ}uT#O?5osR_-<*f#`d0wraS3TRI}0$7-{s=nLO(|#c$(_vJa@i*x3SehMRJkw6@RpG5P3G(Qi?=! zn-nGuXZVua_5-eqPW50=D`M1^hjg6(F+1jlZOxAuNXEs_vjIH!L<{dri_;^GJfDgZ z=c~j~nLh1bXt%2N?H*mvEmKx{`7P(I&?A`&aKngL*Z3|2GtU`h)`eurfv~E zDPIzb1Wck`Bk7=^Al`{ej;uQilMJ#u=3Z&ZJT$xN(|g$~{^-uz_e?l2ob}QZM=Py= zR&aSq=yVl-IOAkex7H`FzvI*%r(K;easVBKfaSuLckErpj|nupmyDjK8hU-QJaXWT zub1fKo87&^l)Wd9zyIuYe~vg3Nquv>y#Hax4SPz@#ddk1>HN5=O1y1N%8z;3|% zME85j3OP82SGkPGU$}+m)EBJl?d6&6=J6w*@beB);%^9A%bV^T4zlQg#r^kT*868T z#;X&o(!c-tOV85sPVZ=6n^#_-@Y>i^EvT8k1JU1UQ6XS3IOWl0p}e>ZS)-ii6T-W6 z)H_|xPtxk;7PWCS&sd24EQIcCv(Vmr2i%Y4l1;$+4nKsuvN8X#Rwag=$`cUe4nsdK zSgrR=lKeLC5msXLKKdK$dx3A9z5`#zc&T8#Z_a}+e?VEjG)}RilEfUuE7_#7mf6NrV&JXaLw=d| z{bP;dW4zS;j-g({?~}WgInA5>PXp`9W(07%yl$w~QB+B_4mTOF3JrL5xm;#L6l`@u zY5|$}6oo+uR;_%$B>}1^1lq0qc1E)eYF=PDFwqL4$ctdZtg7 z9+IHzMZ}Aj?!y|+f;7zf0EpUN=6&{k{zerF9lj?ODi-t#fYn%hNR=zzc+L4=H0O_QbF6GyrXV9Cp z+ce|`w5+>ragDLei0alFa5kqQxzqwFXe+~n?Z61&(q6c~T~4{*nhU14d$ENw&^^^E zuTX8#VS$4ai13^?KI<_}pnI&F=;t3bytH@-<@`o{r5E(X+6kOQ58*tYz3l|Sjtb(1 zS7l_fAz*QWa6S#an|xFcACZp4Ty|$6G3L_&jDIT$prP?w@F^w1I%Xzu$ZqR-XXUiO zp3bnX#R2o}Q1$eM{EYKM_yP|2@+)=d7WSu;9<#S0A!1s<6j2NlYvnApgJOOM!H?ru z*myBBF`PMVTj=7H%FaIdzJj&zdNEJ9Em-AHCxBu%K2EdqqduF0v&2YRB#c3J@Y;Pi zP>sw^y+2tei(*4%mwwL2)K8{526E%!Z)fQIB50Rm>jcki0o$1wZ z{Y~oZQNcoSX604DEHzNQ=)-AO+10&=3GO`lwGv99mvl8dpE^-@zqPv=#Vl6&ZkV>a z>EO0-{SAb^12w>}%G(8kbVCzjrFGX`l$VaHC~6WXan1ANfsgIiZ(@Pp+r3e$z&mp! z6R02yB+@auNy`N_Kd5LdpE?xVD@BeZ5NbkKH>V~0=eH!747&O|39M#o)aSG>*LA(W= zS5XSiisl_^>B8lUOfc1yRq=pwr(sLRmY3SrT;vdt<0WfOBDn()t^c!x$fxrr@lNGn z){Nq*%Nfl>qFSpkHbX7E%8_^Yr|EQpu89|shn5{1p;~fYW7Td3{wQR$zDsMaU~NiJ z^L7;R{wwl{a^{QKd?$F6H-+g3G&1Y535`t;AeG%8dR@MEZ;XId;y3f+0T|1^|I+n* zU?q&RI9eveEIWS4palebeJr2)RLCcN(*Knxo1T!mt7Xh(WKh@}e_3<$J>*O2f4zY4 zJ#78kP%%*J(4{qaQm9EAN2vHAukD7RUnHYq_E6=a^dK)CN$2Te^p>fl+Y}r+@7+j= z+)yqH1ux~0%~M3OAtEkJK`a<)KVE<9K9rnzfxC=rEw++weHQI>$SS=0!Nh1Cb_$|YB1c{6`M8v;w zdm3H2UVF2Ia=_9aQ>;!p`>a0bBcoTM*H;X?nG^P?<&=_xz?>t_)*7stnR#>`ZI+Vl9!X~XJ!x5o z1aSoqDLngy1QzXg;b;<OZrIMCc2iIHI@l+&2s z!dfz|jPd0j=+MRkWaCE`GA>4MO=Dy+#_nB>T7phNHC!95I_YX_^Tq3-QyAB``c5s0_&Rc%YJnKjl!Svisl{S}sPIzu@De`a9VE zT^Y<1$ISvMpWJeQ)G_s+?$`&!OedfT#fD=3>lVUpAL@SKkDZ@E+Y@$*V8w+}#FU4xMz~1#( z`6bqyL#lj!im9!;qXRgy-aS>()%bh>v)YUM-PRaw+OU;OG^>}*oHpd2TMrz(+C^!u{kW$!T`oIVMRPHRC29V##r0nrU+ys0^aI{d7z=V)&7W*#cTHVxY<-<;ZObK>Y#6%2R zlEfCQ0A(hA3HEc_@t+90RRjKUal6D?UJ;~DGK|OA?7H)TI0?}u&nrrrc)pq|eEdCu z&(N7h>zO%n+s8B&jCQ-`Nb8DIJ>r=&;Zb7t%sn6KPEVQ>WpQhyX9L^Nu*$1d)|1c} zsG--|Wtr||=r$2)PQfj2F+lu|5S2n8W|J_Xg{z`vhoH-+<1&Q9B$v4)Yi=9)M)R}` zCk-c@?Qo2qkp)Y@F>A(8EfYT&kQI>S9(pY9e6~+GG9UE8SG2JsK@M zz}nSDh8dy4&90LbX!j(fQ~Hdebzcoxo2>H55~0H=+8SZsTGQXU=S4Aw?HIIMpY*aiE6ZU{#`R`YxbZ_aK=UDV=O_A_0iy*%ZUM0vN#Ed$1!y|G+|sR|;1eRdHr%~?Ea z$W;yv-t{65;I|Wwi36P!Q~`&Vr;Yd>!?SXcHfdborvB8~;>JGzG>LJ#V+s6tEDDLV zWfWTMxbr{|O3{o;9k*&{XJLv5vR^qQhj1mH5Y>9KS9FFx@F>yAE`WBkk7WuLxucmJ zP94_v2k+s@g|O%KS?Ou`%Wl(oq1|xWbX&=4 zUs4crs#vASG|7zHVF>sniX}3ata63Yeu_fI+?rs`9TXu0%^F`q1awJ5>m#ZK58!TU z2ZDT^tfGYLh5jC^Ai5EIK+;-IGVhN8CkpSlV0*>3S9!Nj1kB7KroiiznCol*v65-V zw$HC^pTeBpDmx>V{aSc5Ns;v`RqfNXu9dc*pdn|F&0ozg zd4FX*rp6n^cZtl2`okDyBx%+no;!SC&?AV(xB!>Yj%eitUP5W*t}sHCX|7N8Nm^gxxT_W{_1z` zuQO#o7cGOX2@?T5pN5ljFBSHcUvwy6S z(FywBC4H`k4s=V|?Sic>J7CRbL-ro!8Ibhm`iJJ-2Dh+F;aXx}?oP?$Dwsc9AJxM& z>_4c@*B`@loC$DZs9NcGVoAf7L5H9aFx^HM!< z`FN8m3|A+dZ9Z=lJdKV%SWLB;H2X;>ddBk!W@R`@C*zUc3vI{9&!Doo?+sWcm0Y@b zZG9k5g7mR#!oKlYu>%viY!Tz5V4PX$QS!@4Ir#iL9uHJw>a$ zu3n`ZaAfH_nXsF&QK;!(DCUQ4&iWI@slU(qa>xsGcU0^Jn+31cN|G0RW?QM^#1if_ z%~Q6e>s*g}GcYKiE}lHVv)ohX_cb6$EIFh$%aop{M^6XWlX7U? z+#V{({9apQ!cgnFkwD&8V`kU~si6s?y;G~+7GzBbdpNi>fKOGQXA7i#D-~8D9wX@t>bl*Ggh|?7mez{uX~m6pCa+S*&8%A{ zhan58^oRM>mgi&`>D(;Ru`VZY$%}prlr8YpZgwg2EFYk-7AoVRl;2aSOAzA)Ul;^d zGx<&DIW?3u+!kcL)m`B%c3`|I52MVipnBDSxe8y|6M0a^-|p@rsSESdY)< z?03cSa0v$M((XOa%hk)BWlqdc*T9(zAY-$V%m{zMS;To)oKwP5yhzy?Y=H8I4j57y_z7!Y)zSNpBZ1!wO>&a{pA%nzjAx4AG>MOioC>07o;xce`vT?9O}B3| zjqWrGjk({0TVr%Me5kFNL8-`I=q#mgb|9yMVAWKdeeG_9^C5k9u--?wCV2wN2RX3h z+UJg7^bNY)h`baWpU$tB_fDWTq9YZ(H}P59nPPw$M{sAh+&{7@TSPqAm`bO}>H5A9z0&(!e7~q1n6SC*XGN99~9gU#|mUr0b3;99k0(Np%lRk=29j!M5Gyxl~aq? zP@GI4aM+QF<$hbPI|*BUM^3kACU$;_M^Ne;T5|PxmBTHCCwu$E-_!3F%<|(7=y2EG z0?v^K3i!%5y9~PAq39xU>o))hAMfN18jEtUq1@>_EHT2jT-3^knA!Y(Nn}R{pzm7SvpmfzEGltS`PTsnu-<%e!)sUJ9F|hQ)@Y;-+0Gd`4E*zRA$1ME+`nn3RD7$Sv!_W)UJ1p4of9FYo;4*-KeR zO6;gOpgtv}$MjM*GX-^7U(M|EP-I1)DNOUEI68QbyBU1OcLqrWJSb|&P&X76QaZD`tddF8VUd7+;#Dg4}@p|HfneyZdt z;LPrTTd2Guw?$c2&9SqvAtXk{-d57+VNi_(I~dbR5BWx@6PhHvowt6g*QBd0wiurO z6{Xmf08Z0wRIZsXAcX56pk0Z{F2<2{KYQe!!zs<)(Xp70A^OVsNYgY7jXIB-gWBDszFa8lfNI(`muA?w!gv$ z;1c!v-0MqnrK$zZXDYv1J}HZ9?1cx>+=BE}M~c*2Tp|HWh{VQH^nA6%=K34xjDo7Y&*VRGB8v4?LqaKV+0)yNpQ@`YLC3@#GE$*M(f5b z9A&HMES9U9tL3O?d50=xo=v|a`OfH>LoeM4OiTljgz0J+${W1y-8WkFcI}aDHi7+b zR35%G>_-EbJE9T*N71xKAVawvdk!&9=DLW9Lh5R;^GpSl;$VeEBj zx}88Au7m{Q+O0~<*-FfezO6%v;QD=&^36Y1JGI~hEEEeV{FKX+%8~We-T&tHHZ0Ts z;EQ8CpHOV7H*TK@dTF)?f9B5{M-2)Ed7hMI&B=tao&riTy|84+L1>hg*_-f_2RhV* zWw7Skwy<&VD}?8Dj22D=)^}iM|cXszhJK?Fex~=8+y-f9!i-0ZcqS4f}db``cvDamU-_Nm1U>#s|CwfzKSf~$xc^$q4nHB zXaKw)+`6ec?1IX)!Fh&HM2d$Ov%26VpZ2BnU2)t_K=xWadM)@g8Y2;V&`dD8OkO7V zxsoN192>#Nq_~e@%5uemw;S*4=h{xSEb!HSOKlvxg-&h?;b5%iWG}j2X{qjJ7+XuI3_siq|9Tixr))y__@!|ATRtu;?ch zL=7gfs2pt-50i?VL~NzTT(h}jAYL8TK&p~HwifSkgn?|gMVL`!c#zymii3 zOm~#33!8JqmVoY=N4;poH)%{iuBB`Jj#`-^XoF9#f?rRiPP*dWcU)X>lxfQ3*7Qm_ z4wU<2OE@=e;AN>M5Vc(9)ie{zPOT2l5__4dH`Fw%RL~NS8b-go4642Qh@7vs8-g3G zlWsNPJu0_ex$FIn z6dhFa=*Q!-fli4xi}|okIU&e?S&z?Um-{7lEo$@SPe~XzU__>bUx)h^VN#ryYag#i z>jQODf4F`{_rK1QAA3jAIa;OK=DUlKAfoEZwEDN zPZ20OubLX{CnxR8Jct^tqnUFSF1vA)DAPf@yH;s9eCN#{{-IlB_YA|5ek9USlHEN5 zVNO2Hi?|9f-kMPnaDnUd5W-QsmRdcSXWL% zT)dc(;U?i~247bk4jdd>SaXZK-oc_YdE7rEp|f@Kgxhb9B}X7xlttHJM*HgM!~K*V zb&OZ%EB4CwLg{j(UtQ$dfC(4}UwQ(#4lR@u19X<`G%9l&*>1VU1T>)iP+6Wl-aqOD~b%wC)iB|wI`C+bMhy>Qu!D!64<5|`-c0gJV!$ng6wmWRnuZ?`?CkF`*Ofodw|UiA zBARDNDzRc|1dQ3C()1x*(iCIr{KUoFSJp_e=GwX(LGBK46{DoAoI&vPl8A?5*P*@M zVe;B~>-D|)de2w6YkUhV6gUlmARtKhPBC33DoQQ5K7R=Eyiln3ztB+qW+%lkf7!V?f*^)Y|EB9ud-_G-F z7TaW^`uG)Xnpa@NiSo^q6+uY3?4O!ZI?XNZMfcOtA|Kk(65=RcUSUInmcZ}~0|x#? z{+Si}i}u`PT-1n#hr2K)Cru!R0bBr^<66})H61c=>T89wbKH}&h$m;mWqy{FlwT?v53;{9%-Os1>ysruz?UwhN5N@Yl93IupegSnHoo1sK^a(VA8F~}@&GrK!DvF){f)r|`oV(9UFyN7(v#Z zRlX4sMIOQ?RyQhEp2J~oB?)MYEbmxeg74STc_np zqNNtlf7^-M7aCet=9D+0nrD!-^8@qlimj?z!^3m+9gOVUv#Z|c=2UA9DTk=|tRHW- zH&NO?vBY(z(&8-O47`aBJuI2+dNix9|Nb=eY(a?EUNA%1@anbN^EYZO7RS#G>50sX zBRC(Us2|bnZ$@AH?e`0b*HjUSSAzK%uah~|pfd~S3Tnc$4mNy0b93EK z$!mWOoAs`bnyb%x-iY?;Yz2)>e5cL~-J+rX3{qD{$_gWK%!cz;*KLIT+GhGC&G^Xg zjjOPidpe0C8E;DUC4zR7g=!g0S9a%T#I|jcJoWRV2=dQh2>8ePOY_0J%%$hBZT2i~ znG?L30Z1THEN8n5k&eNo@VNGa!RM&gCFLl~wDkx^+T++7Q^=#nw&d(FtX&rA^8}P!@1*>fai0&?d)$FMJ z`g%Vvqh1b{bEu8`T6bB;CWm1QwvUIJ@epk5nwu7iY!)Ux8|U}N%hLTSLN1d=+LL5W zpX%g{M_}VFpd{Ja5}0{CRr=1DaTU=l-%S^lEFh#ld+5HwcAd=EYSWw8L|0_&5J?*6 z_b9Nk>AJALxObb-@M<&WYzap!xJgAwMlMHNy^a_irB&1)X>&f5FSzl;ExB{cU0u?C zOEk~fXuVzegzU$8v32%_K64q}Zw)1DGN4xqpBF8)BSkp3HfLmWx*>SC+s4i5-;ePJ zEg=trROnBwW#Fp64d~-)uhq}RsidPiB$V(K$Dl8*tum&dm_q`MyFDnnvQeBbuc;Gy zuS7yINz%>Tekz&Y%FE6x2lyf-iQ>WQ80-j)Pv=X;d6EY?R_J`48{PNOQSqou0;dhL zQF5ij=aqUTf<+ZA53j=R9MOUWNt_t<;m7nFJ;A!3ulF`+s*r3wV5-2LTMmOhMRA_Y z2F#^XbE-4uCm1VG$eBWG#7Wc!6hbClA#n+bjy&FS{v^>$A;c(G+2LCZe{~HN;~3i( zl@MVp-y2VE#(TGMC(ND|hU2q2bnP&IK8)mXYf?O{G=3K;1rt8&>UjPLkI7{#Jg@37 zhY;Oj0KH@8>n}d%_t&1Y3tmXCE26t897GCE0C&1N!E z3@HpNSD&XYc<^VecoWehe%Eq~kTB<9L5TGj`4HMksd97c<0BfSjOohpv2E^j-$zWP z{-mkdL#^dG6tzH0x!OGR61E)c0yV#Eje>P8QQwsWbykKqULrSjdq=C9KgPD`mn!v- zI-V%o5Q;t&yj+*B-*h0x8ZBsUX2g>(xhXURd7*2eH}Rj=I)4pkU+=u-beRb%jAO5W z-n7pZ+~+R<-9)VNk5zAPdIa6qJbk9q-!@dlfHA45;w`PR3AeENaatE89XSRY2hfE^ z`O?ON#Z3et%86innIfi#rRZ>M65UlyvjS0mY-9lN#f-mjd5wrXOdXAAkw8=*Q#x>^ zSAhr7f;Q|hczL<(05d9nxZjf1i@aONosHRzw?P)VN}lP{@t8H4GTn z8c8I8TtR>57_?p)78y?I6kx!*rt=l-+iYy@l{2K4Be=-R`6hzIlDJETVZc&=xZi}G zfiH!IGI~Wb_&M-7Dua0v2%8%S9 zTYK;sn@ruEA6odLyLNS>e_FOcJ zxjz&5jTmOF;;IuF%0iMFr6uBIkO0A)p4#49jKinx6rUOc>KD7`C zdyVLO=jRheFnXnr@{Mg+=~8FvCwp}#oJwD`s04bj(7VA-R=EAOCL~thTF09D&nmB* z!^3LK*+SpUT}l&J*euob3w3|Y;pfSlire0gm1lRO9ZMM_e4-Q0D5Ew~tmA8@d)j&2 z`DX5F(B}3^&H+ZQRGTS0H&w@QSasT-TYEgO?s+V@mni#7c$p|eVdh=v)QaB7^;&!y zrs(>7*``~XHhMFgqkUt$ClvXGG+q8zytG8vb~5TQO1pq%qq)R7(?!r+qNcqkDFp*G z?Sn+g^2KPDg|D+R@nLj1UvKMic!yF4ZehjCDTBc<5n_MoMv7kOmT|?}fvg8c)(y+0 zW`Mebl+12=r|kM<%KKL({GzNK=P0arg^z`v4CCm^qaPw_lbtf&0{O9Vlcl(>X>RX= zSVHuKXogFO-r??BHX+H~FGdMQHh#HtSnNjfkd|3JHC}bwx__F#BKfj_Stb&TB%N4> zVqkMdJA3}~ask_RKed4e`y*koqBl}%G!{_HZ5Nur54jzt;}r#2$FfHHit;|G0J342 zfJeT{!J(x!Y)DUtL1Kbw@2XA~j5>XvE^OaIjEt&)?;itJfAP{=3%g-@E78q(S9s+9 z^PnQ+h}>QO?!xJCs&4k!{NnJg^6mhU{~MWYds$$+~*{rwv{pK84vD(TWOW%ufmVj4kL57yTSisA zK(g3>4Ifwzn+?->vPP=)%!>Ttk*;r+z1e4s2KM*cEf`#>Q#QBcx+BSk$25($LcUR8 zu3znQW>49mJZ^&N*q`K0-mKc(Zi_=Mhf+&Ou(glGRQVVSVMlD-Q+Ef0Ca&luF0nZk zvMItdJLe22m#_*;lz7(&P<ObkE=W1P#c-4h@+*-mO>@N%-#G~=v9iMUdr@!* z3`HYFQ;39`2S)mFNv@9OLi=&9C|9>;AeO5a4vfznD-+B4Gx(G^yWH=+ROCVMcmMzZ z84#$nBAP_3yuu0s0JhNq09@F+Z|xjS*v#x~oz)GEtW8%{! zQzB9wdcx_Tj~4Kmh`sGK<4SvyeA+WTysz)PtqLMuTp-xvH)IUOVOFTqe!!C8nePrW z8>ePg)aUY)ICj?VV5`V=H*}522D!0o>P>5clo_#yBFv(pi;*%2H24DX8Xt=z$KLF*Hv;*$Io0!Mx6K^nz4B;P%DHeuA&Xwo zu8U}5c-C?V4q&NNv}^={K{+P)0t|O#MW0P-lp@r2wwz3Qb*5_rF0nK?-=&FdB4z}3 zXG+}8)3Xpt%xRPtpMXy%CE)@ww612bdf!rolplmk+6ehF4t*?s(ifV8+0=#v z5D5__1kOvkg0a3x6-uhRgrC|L(pi%&2McHWam~h8zrYE$Ozol&C5dNMofWBji^8~% z#0tcChMe5Ppd&Naps6-B@7+jfmL7A+k!-F zv&%45x}=elToK!QojVK_X<>xwP8pl`dD-1-uP@g}k9Ym}PUBlyD3AwI2U8bU>r#a`zKI$X${k;u-wW zc@>Pf#u@jXq>7PMZ``5L$^F@>XJ!kij&spjbMDoV#c8)0V9MmDk4h}q))!+jwxm?B7^&ha?)Xl zM0uFOk2&JgaOYxnc~uPGu?%18aUfL-QCN7~s0%rp90d#r;db#!JD+}#xQgPhf0uzZ zNDxB`8Cb{ozdiZjrC)jKzqzvq4^d?UR-pjqPSW2in%Eh;*qGWnKV-GP{X1;c&W_%- z@7Lrv3UL?<=a5R*@5-Ex7WUp?^hz=&ktgY&Q-+_dQE8B84Nz`dJQp8xJ?DCN8lK!m zh^{V)g6bE#N9YdPAKY1Z#ArnFYR6a<#D%-NJ(s#i;@*E09KaEcn<$EixPDUn>@$!- zUQ&TDcf3ARBXk9-+r8;1T@uP1{!j zYG(ov?^%0J((F79310##6#|54#ZwVEVL1)J@{-NYCET!z9z<1DoOHKtU+`MIKvQz* zTR#UGfmouCf80Pqk1fJZO~>59X$J_BBPI?c{sfp{i zZDYrlJdeM^imuZz+RQFuh6@!*c#3ufEad`z^6ceOxwuw>GX&zvsq#kS&YhDkYLka0 z44I+HSopF@wrmEYH)6&gj=yYF)<1e5O~nLeA`yx+LSU3?-}%&Pf)C`<->bmm%D$ka z+a}_JN(L>x%f;+vTAAjKSKU&6hVX7~XF--s$R|H71)YObNUUT%p>l)HOFxyioUyX| zv?D+R6+h?ME1c9mXqhYHWq`^-pDX5&LI=qd_7PTIY_urUbaq|BeLwP{` zDi6jU3M|Z29PR8M^4#zC_}xA2tYAIQ0~VV9s{EHdhW~^a+S@;L!avb(P22P}|3>5d zGW4(3w!q%~w^3s|$6v#iKjG41Si5nsia%k-AG-ZR`OCNjZ20pJ+{MY+&gLKc|J}o| z(D}POd=3=Y%U}8E|MSyB()z1BKnlM9k-~oW)SpT00r*#AM}G(Yuax%Z$6kvYpbzo~ z06M-S0)F?Jtj6C>_|J3v3IB6=_)kLsvwy>X8!Y~bH#ss14C4m?#4>?@nCHg-8}DT5 z>}+Xk?(~n({|SrvW=*X*ECArv5deTa{3?I>f#qL5cXEDbZTcVJ|9-)K_x`@p-?p7C zOl?g6S3m8Uc>L-Pc77Hj007p?|FBA^{2TBWoa3*CBRqV+!@4y#U **日期:** 2026-02-18 +> **阶段:** Week 3 - 统计验证扩展与用户体验优化 +> **开发者:** AI Assistant +> **状态:** ✅ 完成 + +--- + +## 📋 今日完成内容 + +### 1. 负号归一化功能 ✅ + +**问题背景:** +- Word 文档中的负号可能是多种 Unicode 字符(数学减号 `\u2212`、En Dash `\u2013`、Em Dash `\u2014` 等) +- Python 的 `float()` 无法解析这些特殊字符,导致验证失败 + +**实现内容:** + +| 文件 | 修改 | +|------|------| +| `extraction_service/forensics/extractor.py` | 新增 `_clean_statistical_text()` 方法,在提取单元格时自动清洗 | +| `extraction_service/forensics/validator.py` | 新增 `_clean_number_string()` 和 `_safe_float()` 辅助函数 | + +**覆盖的特殊字符:** + +| Unicode | 字符 | 名称 | 清洗为 | +|---------|------|------|--------| +| `\u2212` | − | 数学减号 | `-` | +| `\u2013` | – | En Dash | `-` | +| `\u2014` | — | Em Dash | `-` | +| `\u2264` | ≤ | 小于等于 | `<=` | +| `\u2265` | ≥ | 大于等于 | `>=` | +| `\u00d7` | × | 乘号 | `x` | +| `\u200b` | | Zero-Width Space | (删除) | + +--- + +### 2. 统计验证方法扩展 ✅ + +#### 2.1 T 检验验证增强 + +**改进点:** +- 智能样本量提取:支持 `(n=50)`、`n=50`、`(50例)` 等多种格式 +- 新增 `_extract_sample_sizes_from_header()` 和 `_extract_sample_sizes_from_row()` 方法 +- 支持括号格式的 SD:`45.2 (12.3)` +- 支持多行单元格 subrow 精确高亮 + +#### 2.2 SE 三角验证增强 + +**改进点:** +- 支持多行单元格的 subrow 精确定位 +- 遍历 P 值列每一行,分别验证 +- 显示友好的行描述(如变量名) + +#### 2.3 CI vs P 值一致性验证增强 + +**改进点:** +- 支持多行单元格 subrow 精确定位 +- 支持多个 CI/P 值对的验证 +- 使用 `_parse_pvalue_flexible` 灵活解析 + +--- + +### 3. 前端翻译映射更新 ✅ + +**文件:** `frontend-v2/src/modules/rvw/components/ForensicsReport.tsx` + +新增/完善的问题类型中文翻译: + +| 代码 | 中文描述 | +|------|----------| +| `ARITHMETIC_TOTAL` | 总计行错误 | +| `STAT_CI_PVALUE_CONFLICT` | CI 与 P 值矛盾 | +| `STAT_SD_GREATER_MEAN` | SD 大于均值 | +| `STAT_REGRESSION_CI_P` | 回归 CI-P 不一致 | +| `EXTRACTION_WARNING` | 提取警告 | +| `TABLE_SKIPPED` | 表格跳过 | + +--- + +### 4. 文件格式提示功能 ✅ + +**用户反馈:** 上传 PDF 文件后没有数据验证 Tab,需要提示用户 + +**实现内容:** + +| 文件 | 修改 | +|------|------| +| `Header.tsx` | 上传按钮下方添加蓝色提示框,推荐 .docx 格式 | +| `ReportDetail.tsx` | 非 docx 文件时显示黄色警告,解释为什么没有数据验证 | +| `TaskDetail.tsx` | 同上 | + +**提示内容:** +- **上传时:** "推荐上传 .docx 格式文件,可获得完整的数据验证功能。PDF 和 .doc 格式仅支持稿约和方法学评审。" +- **查看报告时:** "当前文件为 PDF/.doc 格式,无法进行数据验证。如需数据验证功能,请上传 .docx 格式文件。" + +--- + +## 📊 当前统计验证能力总览 + +| 验证类型 | 方法 | 状态 | +|----------|------|------| +| **L1 算术** | 百分比 n(%) | ✅ | +| **L1 算术** | Sum/Total 校验 | ✅ | +| **L2 统计** | 卡方检验 P 值逆向验证 | ✅ + subrow | +| **L2 统计** | T 检验 P 值逆向验证 | ✅ + subrow | +| **L2 统计** | CI vs P 值逻辑一致性 | ✅ + subrow | +| **L2.5 取证** | SE 三角验证 | ✅ + subrow | +| **L2.5 取证** | SD > Mean 检查 | ✅ | + +--- + +## 📁 修改的文件清单 + +### Python 后端 +- `extraction_service/forensics/extractor.py` - 负号归一化 +- `extraction_service/forensics/validator.py` - 统计验证扩展 + +### Node.js 后端 +- (无修改) + +### 前端 +- `frontend-v2/src/modules/rvw/components/ForensicsReport.tsx` - 翻译映射 +- `frontend-v2/src/modules/rvw/components/Header.tsx` - 上传提示 +- `frontend-v2/src/modules/rvw/components/ReportDetail.tsx` - 格式提示 +- `frontend-v2/src/modules/rvw/components/TaskDetail.tsx` - 格式提示 + +--- + +## 📋 待完成工作 + +### V2.0 MVP 剩余任务 + +| 任务 | 优先级 | 状态 | +|------|--------|------| +| Week 4 功能测试 | P0 | 📋 待开始 | +| Week 4 性能测试 | P1 | 📋 待开始 | +| Week 4 Bug 修复 | P0 | 📋 待开始 | +| Week 4 文档更新 | P1 | 📋 待开始 | + +### V2.1 待开发功能 + +| 功能 | 说明 | +|------|------| +| ANOVA 验证 | 多组比较 P 值验证 | +| 配对 T 检验 | 配对样本验证 | +| 非参数检验 | Mann-Whitney, Wilcoxon | +| .doc 格式支持 | 评估 Pandoc 替代方案 | +| Profile 管理 UI | 期刊配置界面 | + +--- + +## 💡 技术要点 + +### 负号归一化的重要性 + +```python +# 未清洗时 float() 会崩溃 +float('−1.5') # ValueError: could not convert string to float + +# 清洗后正常工作 +float('-1.5') # -1.5 +``` + +### Subrow 高亮原理 + +Word 表格中一个单元格可能包含多行数据(用换行符分隔),例如: + +``` +| 变量 | P值 | +|------|-----| +| 年龄 | 0.82 + 性别 0.01 <- 问题在这里 + BMI 0.95 | +``` + +通过 `data-subcoord="R2C2S2"` 属性可以精确定位到第 2 行第 2 列的第 2 个子行。 + +--- + +**文档版本:** v1.0 +**创建日期:** 2026-02-18 +**下次更新:** Week 4 测试完成后 diff --git a/docs/03-业务模块/RVW-稿件审查系统/06-开发记录/RVW V2.0 表格提取疑难杂症专项解决方案.md b/docs/03-业务模块/RVW-稿件审查系统/06-开发记录/RVW V2.0 表格提取疑难杂症专项解决方案.md new file mode 100644 index 00000000..26e35e71 --- /dev/null +++ b/docs/03-业务模块/RVW-稿件审查系统/06-开发记录/RVW V2.0 表格提取疑难杂症专项解决方案.md @@ -0,0 +1,137 @@ +# **RVW V2.0 表格提取疑难杂症专项解决方案** + +**问题焦点:** Word 表格“假行”现象(单元格内多段落)导致的提取错位 + +**核心策略:** 从“视觉模型”回归“DOM 深度解析” + +**技术栈:** Python (python-docx) + +## **1\. 核心判断:为什么不建议全量上视觉模型?** + +您提到用视觉模型(Vision Model,如 GPT-4V, Qwen-VL)来识别,这听起来很诱人(所见即所得),但在**数据侦探**场景下有致命缺陷: + +| 维度 | 视觉模型 (VLM/OCR) | 原生解析 (python-docx) | 结论 | +| :---- | :---- | :---- | :---- | +| **数值准确性** | **95%\~99%** (存在幻觉风险) | **100%** (直接读取 XML) | ❌ 审计场景不能有 1% 的误差 | +| **小数点敏感度** | 可能漏读小数点 (0.05 \-\> 005\) | 绝对精准 | ❌ P 值验证的核心 | +| **对齐能力** | 强 (能看懂视觉对齐) | 弱 (需算法辅助) | ✅ 视觉模型优势 | +| **成本/速度** | 高/慢 (需 GPU 推理) | 极低/极快 (CPU 解析) | ❌ 影响并发性能 | + +**决策:** + +**“数据”必须信赖 XML(代码),“结构”可以用算法还原。** 我们不需要视觉模型来看数字,我们只需要一段更聪明的 Python 代码来拆解段落。 + +## **2\. 现象诊断:什么是“隐性多行”?** + +在您的截图中,Word 表格的一行(Row)内部,用户使用了 **回车键 (Enter)** 或 **软回车 (Shift+Enter)** 进行了换行。 + +**python-docx 的默认行为:** + +cell.text 会把这些段落拼接成一个字符串,例如 "DNT时间段\\n\<45 min\\n45\~60 min"。前端 HTML 渲染时,如果没有处理 \\n,或者对应列的行数不匹配,就会导致错位。 + +## **3\. 解决方案:行分裂算法 (Row Explosion)** + +我们需要在提取阶段,检测这种情况,并将“逻辑上的一行”分裂成“视觉上的多行”。 + +### **3.1 算法逻辑** + +1. **扫描 (Scan)**:遍历表格的每一行。 +2. **检测 (Detect)**:检查该行每一列的 **段落数量 (Paragraph Count)**。 + * 例如:Col 1 有 4 个段落,Col 2 有 4 个段落,Col 3 只有 1 个段落(如 P 值)。 +3. **分裂 (Explode)**: + * 取最大段落数 max\_para (如 4)。 + * 如果 max\_para \> 1,则将此行**分裂**为 4 个新行。 +4. **填充 (Fill)**: + * 对于原本有多段落的列:按顺序填充到新行。 + * 对于只有 1 个段落的列(如 P 值 0.001): + * *策略 A(重复)*:每行都填 0.001。 + * *策略 B(首行/合并)*:只填第一行,后面留空(前端处理为合并单元格)。 + +### **3.2 代码实现 Demo** + +请让 Python 工程师在 DocxTableExtractor 中加入以下逻辑: + +from docx import Document +import pandas as pd + +def explode\_word\_table\_rows(table): + """ + 高级表格提取:处理单元格内的多段落(隐性多行) + """ + structured\_data \= \[\] + + for row in table.rows: + \# 1\. 获取该行每一列的段落内容列表 + \# cells\_content 结构: \[ \['DNT时间段', '\<45min', ...\], \['1299', '881', ...\], \['X2=..'\] \] + cells\_content \= \[\] + for cell in row.cells: + \# 过滤掉空段落,获取真实文本行 + paras \= \[p.text.strip() for p in cell.paragraphs if p.text.strip()\] + if not paras: + paras \= \[""\] \# 保持占位 + cells\_content.append(paras) + + \# 2\. 计算该行“分裂”的最大高度 + max\_height \= max(len(c) for c in cells\_content) + + \# 3\. 如果是标准单行,直接添加 + if max\_height \<= 1: + flat\_row \= \[c\[0\] if c else "" for c in cells\_content\] + structured\_data.append(flat\_row) + continue + + \# 4\. 执行分裂 (Row Explosion) + \# 针对每一层(visual\_row\_index),构建一行数据 + for i in range(max\_height): + new\_row \= \[\] + for col\_idx, cell\_paras in enumerate(cells\_content): + \# 策略:如何填充? + if len(cell\_paras) \> 1: + \# 情况 A:该列有多行,按顺序取 + \# 如果当前层级超过了该列的行数,填空(或填最后一行) + val \= cell\_paras\[i\] if i \< len(cell\_paras) else "" + else: + \# 情况 B:该列只有一行(通常是统计值 P值) + \# 只有第一行填值,模拟“合并单元格”的视觉效果 + \# 或者:val \= cell\_paras\[0\] (全部重复填充) \-\> 方便后续计算 + val \= cell\_paras\[0\] if i \== 0 else "" + + new\_row.append(val) + structured\_data.append(new\_row) + + return pd.DataFrame(structured\_data) + +\# 使用示例 +\# doc \= Document("sample.docx") +\# df \= explode\_word\_table\_rows(doc.tables\[0\]) +\# print(df) + +## **4\. 前端渲染的配合** + +为了让“数据侦探”的高亮定位准确,后端返回的数据结构必须包含**分裂后的坐标映射**。 + +**推荐的数据结构升级:** + +{ + "row\_id": "r4\_exploded\_0", // 原始第4行,分裂后的第0子行 + "is\_virtual": true, // 标记这是分裂出来的行 + "cells": \[ + { "text": "\<45 min", "source\_cell": "R4C1", "paragraph\_index": 1 }, + { "text": "881 (46.59)", "source\_cell": "R4C2", "paragraph\_index": 1 }, + { "text": "", "source\_cell": "R4C3", "is\_merged\_placeholder": true } // P值列留空 + \] +} + +**前端展示逻辑:** + +* 当后端返回 is\_merged\_placeholder: true 时,前端渲染时不显示内容,或者通过 CSS 渲染为合并单元格的样式(即不画上边框)。 + +## **5\. 总结** + +1. **别用视觉模型**:准确率风险太大,得不偿失。 +2. **用代码“分裂”段落**:Word 的 cell.paragraphs 是您的救星。 +3. **对齐策略**:通常临床表格中,如果一列有多行,另一列只有一行(如 P 值),那一行 P 值通常是对齐第一行或者居中的。在做\*\*数据验证(L1/L2)\*\*时,我们需要编写逻辑:*“如果检测到分裂行,且 P 值列为空,自动向上寻找最近的一个 P 值作为本行的验证依据。”* + +**实施建议:** + +请 Python 工程师立即测试上述 explode\_word\_table\_rows 逻辑。这能解决您 90% 的“HTML 只有一行”的问题。 \ No newline at end of file diff --git a/docs/03-业务模块/RVW-稿件审查系统/06-开发记录/临床统计特殊符号提取白皮书.md b/docs/03-业务模块/RVW-稿件审查系统/06-开发记录/临床统计特殊符号提取白皮书.md new file mode 100644 index 00000000..390dd0e8 --- /dev/null +++ b/docs/03-业务模块/RVW-稿件审查系统/06-开发记录/临床统计特殊符号提取白皮书.md @@ -0,0 +1,201 @@ +# **临床统计特殊符号提取白皮书** + +**用途:** 指导 Python (python-docx) 在提取 Word 表格时进行字符清洗和标准化。 + +**核心痛点:** 同一个数学含义,可能由多种不同的编码方式表示。 + +## **1\. 希腊字母类 (Greek Letters)** + +这是最容易出现乱码或识别错误的重灾区。 + +| + +| **符号** | **含义** | **常见 Unicode** | **Word 中的潜在坑 (Legacy Fonts)** | **处理建议** | + +| ![][image1] | **卡方检验** | \\u03c7 (χ) \+ \\u00b2 (²) | 1\. 字体设为 "Symbol" 的 'c' 2\. 公式编辑器对象 | **正则匹配**:\[\\u03c7\\u03a7\]2? **关键词**:chi-square, chi | + +| ![][image2] | 显著性水平 | \\u03b1 | 字体设为 "Symbol" 的 'a' | 替换为 alpha | + +| ![][image3] | 回归系数/功效 | \\u03b2 | 字体设为 "Symbol" 的 'b' | 替换为 beta | + +| ![][image4] | 总体均值 | \\u03bc | 字体设为 "Symbol" 的 'm' | 替换为 u 或 mean | + +| ![][image5] | 总体标准差 | \\u03c3 | 字体设为 "Symbol" 的 's' | 替换为 std | + +| ![][image6] | 变化量/差值 | \\u0394 (大写) | 字体设为 "Symbol" 的 'D' | 替换为 delta | + +| ![][image7] | 相关系数 | \\u03c1 | 字体设为 "Symbol" 的 'r' | 替换为 rho | + +**⚠️ 提取陷阱:** 很多老旧的 Word 文档(特别是中文期刊投稿)喜欢用 **Symbol 字体**。在 python-docx 提取 text 时,你可能会读到一个普通的英文字母 c,但用户看到的是 ![][image8]。 + +* **解决方案**:检查 run.font.name。如果字体是 Symbol,需要建立映射表(c \-\> χ, a \-\> α)。 + +## **2\. 数学运算符类 (Operators)** + +| **符号** | **含义** | **常见 Unicode** | **Word 变体** | **处理建议** | + +| ![][image9] | **加减/标准差** | \\u00b1 | \+/-, \+ / \- | 统一标准化为 \\u00b1 | + +| ![][image10] | 小于等于 | \\u2264 | \<=, \=\< | 统一为 \<= | + +| ![][image11] | 大于等于 | \\u2265 | \>= | 统一为 \>= | + +| ![][image12] | 不等于 | \\u2260 | \!=, \<\>, /= | 统一为 \!= | + +| ![][image13] | 约等于 | \\u2248 | \~, \= | 统一为 \~= | + +| ![][image14] | **负号/减号** | \\u2212 (Minus) | \\u002d (Hyphen), \\u2013 (En Dash) | **极高危!** 必须统一替换为标准连字符 \- (\\u002d),否则 float() 转换会报错 | + +| ![][image15] | 乘号/交互项 | \\u00d7 | x, X, \* | 统一为 x | + +**⚠️ 提取陷阱:** **“负号”是数据清洗中最大的坑**。Word 会自动把连字符(Hyphen)转成破折号(Dash)或数学减号(Minus)。 + +* python 代码:value.replace('\\u2212', '-').replace('\\u2013', '-') + +## **3\. 统计学专用标记 (Statistical Notations)** + +| **符号** | **含义** | **形式** | **提取难点** | + +| ![][image16] | **样本均值** | x 上加横线 | 通常是 **Word 公式对象 (OMML)** 或 **域代码 (EQ)**,python-docx 的 .text **读不出来横线**,只能读到 x。 | + +| ![][image17] | 样本率 | p 上加尖帽 | 同上。 | + +| ![][image18] | 决定系数 | R \+ 上标 2 | python-docx 默认读成 R2。**这通常可以接受**。 | + +| ![][image19] | 下标 (如 ![][image20]) | 文本 \+ 下标 | python-docx 默认读成 Xsub。需要识别 font.subscript 属性。 | + +**⚠️ 提取陷阱:** 对于 ![][image16] 这种带修饰符的字符,python-docx 可能只能提取到底座字符 x。 + +* **策略**:对于数据侦探来说,通常我们关注的是表头里的 Mean 或 Average 关键词,而不是符号。如果表头只有 ![][image16],可能需要结合上下文推断。 + +## **4\. 拉丁字母的特殊含义 (Latin Context)** + +虽然是普通字母,但在统计学上下文中具有特殊含义,通常以**斜体 (Italic)** 出现。 + +| **符号** | **含义** | **易混淆点** | + +| ![][image21] | t 检验统计量 | 容易混淆为时间单位 t (time) 或 吨 (ton) | + +| ![][image22] | F 检验统计量 | 女性 (Female) | + +| ![][image23] | Z 检验统计量 | \- | + +| ![][image24] | P 值 (概率) | 磷 (Phosphorus) | + +| ![][image25] | 样本量 | 牛顿 (Newton) | + +| ![][image26] | 相关系数 | 半径 (radius) | + +| ![][image27] | 回归系数 | \- | + +| ![][image28] | 优势比 | 手术室 (Operating Room), 或者 (or) | + +| ![][image29] | 风险比 | 心率 (Heart Rate) | + +| ![][image30] | 置信区间 | 心脏指数 (Cardiac Index) | + +**⚠️ 提取策略:** 不能只看字符,要看**组合**。 + +* P 单独出现且数值在 0-1 之间 \-\> P 值。 +* t 单独出现且数值 \> 0 \-\> t 值。 +* CI 后面跟着括号 (1.2-3.4) \-\> 置信区间。 + +## **5\. Python 字符串清洗工具箱 (Cleaner Utils)** + +建议在 DocxTableExtractor 中集成以下清洗函数: + +import re + +def clean\_statistical\_text(text): + if not text: + return "" + + \# 1\. 归一化负号 (CRITICAL) + text \= text.replace('\\u2212', '-').replace('\\u2013', '-').replace('\\u2014', '-') + + \# 2\. 归一化卡方 (Chi-square) + \# 处理 Symbol 字体的 'c'2 (需配合 run.font 检查,此处仅处理 Unicode) + text \= text.replace('\\u03c72', 'chi-square') + text \= text.replace('\\u03c7\\u00b2', 'chi-square') + text \= re.sub(r'\[Xxχ\]\\^?2', 'chi-square', text) \# 正则匹配常见变体 + + \# 3\. 归一化加减号 + text \= text.replace('\\u00b1', '+/-') + + \# 4\. 归一化比较符 + text \= text.replace('≤', '\<=').replace('≥', '\>=') + + \# 5\. 去除不可见字符 (Zero-width space 等) + text \= re.sub(r'\[\\u200b\\u200c\\u200d\\ufeff\]', '', text) + + return text.strip() + +## **6\. 总结** + +在 Word 提取中,最大的“鬼怪”不是复杂的 ![][image1],而是: + +1. **假的负号**(导致 float() 崩溃)。 +2. **Symbol 字体**(导致 ![][image2] 变成 a)。 +3. **多段落换行**(上一节已解决)。 + +只要处理好这三点,99% 的统计表格都能被正确解析。 + +[image1]: + +[image2]: + +[image3]: + +[image4]: + +[image5]: + +[image6]: + +[image7]: + +[image8]: + +[image9]: + +[image10]: + +[image11]: + +[image12]: + +[image13]: + +[image14]: + +[image15]: + +[image16]: + +[image17]: + +[image18]: + +[image19]: + +[image20]: + +[image21]: + +[image22]: + +[image23]: + +[image24]: + +[image25]: + +[image26]: + +[image27]: + +[image28]: + +[image29]: + +[image30]: \ No newline at end of file diff --git a/docs/03-业务模块/RVW-稿件审查系统/06-开发记录/务实版.md b/docs/03-业务模块/RVW-稿件审查系统/06-开发记录/务实版.md new file mode 100644 index 00000000..ea7b7a1a --- /dev/null +++ b/docs/03-业务模块/RVW-稿件审查系统/06-开发记录/务实版.md @@ -0,0 +1,149 @@ +# **RVW V2.0 表格提取疑难杂症专项解决方案 (v1.1 务实版)** + +**问题焦点:** Word 表格“隐性多行”(单元格内多段落)导致的提取与验证错位 **核心策略:** **提取层保持原貌,验证层“懒分裂” (Lazy Split)** **技术栈:** Python (python-docx, pandas) + +## **1\. 核心判断:技术选型定调** + +| 维度 | 方案 A: 视觉模型 (VLM) | 方案 B: 结构重组 (预分裂) | 方案 C: 懒分裂 (推荐) | +| :---- | :---- | :---- | :---- | +| **原理** | 用 GPT-4V 截图识别 | 提取时把 Table 拆成 N 倍行 | **提取保持 \\n,验证时 split** | +| **准确性** | 低 (幻觉/小数点风险) | 中 (容易破坏合并单元格结构) | **高 (数据无损,逻辑灵活)** | +| **复杂度** | 高 (GPU/Prompt) | 高 (重构 DataFrame 结构) | **低 (仅在 Validator 中处理)** | +| **前端适配** | 难 (无法定位) | 难 (需定制虚拟行渲染) | **易 (原生 HTML \)** | + +**最终决策:** + +1. **坚决不用视觉模型**:数值准确性是底线。 +2. **放弃“预分裂”**:不在提取阶段破坏表格的物理结构(Row/Span),避免引入元数据丢失风险。 +3. **采用“懒分裂”**:在验证逻辑中,针对特定单元格内容进行 split('\\n'),实现细粒度验证。 + +## **2\. 提取层规范 (Extractor Layer)** + +**目标**:忠实还原 Word 文档的物理结构,不自作聪明地拆行。 + +### **2.1 Python 实现逻辑** + +在 DocxTableExtractor 中,对于单元格内的多段落,直接使用换行符 \\n 连接。 + +def extract\_cell\_text(cell): + """ + 提取单元格文本,保留段落结构 + """ + \# 过滤掉完全空白的段落,保留有内容的段落 + paragraphs \= \[p.text.strip() for p in cell.paragraphs if p.text.strip()\] + return "\\n".join(paragraphs) + +**输出数据结构示例 (JSON)**: + +{ + "row\_index": 3, + "cells": \[ + { "text": "并发症\\n颅内出血\\n牙龈出血" }, // Col 0 + { "text": "277 (14.65)\\n85 (4.49)\\n94 (4.97)" }, // Col 1 + { "text": "χ²=5.687\\nχ²=0.003\\nχ²=13.745" }, // Col 3 (统计值) + { "text": "0.017\\n0.01\\n\<0.001" } // Col 4 (P值) + \] +} + +## **3\. 验证层规范 (Validator Layer)** + +**核心逻辑:** 验证器在读取数据时,动态检测是否存在多行内容。如果存在,则在内存中“临时分裂”并逐一验证。 + +### **3.1 懒分裂验证算法 (Lazy Verification Logic)** + +def verify\_row\_statistics(row\_data, col\_map): + """ + 验证单行数据的统计逻辑(支持隐性多行) + """ + issues \= \[\] + + \# 1\. 获取目标单元格的原始文本 + \# 假设我们要验证 Col 1 (Group A) vs Col 2 (Group B) \-\> P Value + cell\_a\_text \= row\_data\[col\_map\['group\_a'\]\] + cell\_b\_text \= row\_data\[col\_map\['group\_b'\]\] + cell\_p\_text \= row\_data\[col\_map\['p\_value'\]\] + + \# 2\. 懒分裂 (Lazy Split) + lines\_a \= cell\_a\_text.split('\\n') + lines\_b \= cell\_b\_text.split('\\n') + lines\_p \= cell\_p\_text.split('\\n') + + \# 3\. 确定对齐基准(取最大行数) + max\_lines \= max(len(lines\_a), len(lines\_b), len(lines\_p)) + + \# 4\. 逐行验证 (Line-by-Line Validation) + for i in range(max\_lines): + \# 安全获取当前行的数据(处理长度不一致情况) + val\_a \= lines\_a\[i\] if i \< len(lines\_a) else "" + val\_b \= lines\_b\[i\] if i \< len(lines\_b) else "" + + \# P 值匹配策略: + \# 如果 P 值列只有 1 行,但数据有 N 行 \-\> 广播机制 (Broadcast) + \# 如果 P 值列有 N 行 \-\> 一一对应 (One-to-One) + if len(lines\_p) \== 1 and max\_lines \> 1: + val\_p \= lines\_p\[0\] \# 策略 A: 共享 P 值 + else: + val\_p \= lines\_p\[i\] if i \< len(lines\_p) else "" \# 策略 B: 独立 P 值 + + \# 跳过空行 + if not val\_a or not val\_b or not val\_p: + continue + + \# 执行具体的统计验证 + \# 传入 line\_index=i 以便报错时定位 + error \= validate\_single\_line(val\_a, val\_b, val\_p, line\_index=i) + if error: + issues.append(error) + + return issues + +### **3.2 优势分析** + +1. **兼容性强**:完美支持您截图中的 颅内出血 | 85 | 90 | P=0.01 这种每行独立 P 值的场景。 +2. **鲁棒性**:如果只有第一行有 P 值(合并单元格视觉效果),代码中的 Broadcast 逻辑也能兜底。 +3. **定位精准**:报错信息可以包含 line\_index,告诉前端是单元格里的第几行出错了。 + +## **4\. 前端渲染规范 (Frontend Layer)** + +**目标**:使用最简单的 Web 技术还原 Word 样式,避免过度设计。 + +### **4.1 HTML 渲染策略** + +后端返回的 html 字段中,直接将 \\n 替换为 \。 + +**Python 端处理:** + +def generate\_html\_cell(text): + \# 转义 HTML 特殊字符,并将换行转为 \ + safe\_text \= html.escape(text) + return safe\_text.replace("\\n", "\") + +**前端展示效果:** + +\ + 277 (14.65)\ + 85 (4.49)\ + 94 (4.97) +\ + +### **4.2 错误高亮策略** + +由于我们不再拆分表格行(DOM 结构),高亮的最小单位是 **Cell(单元格)**。 + +* **交互设计**: + * 当发现第 2 行子数据错误时,**高亮整个单元格**。 + * **Tooltip 提示**:鼠标悬停时,显示具体错误信息:“第 2 行数据 P 值校验不通过”。 +* **进阶优化(V2.1 可选)**: + * 如果确实需要高亮某一行,Python 生成 HTML 时可以用 \ 包裹每一行: \277 (14.65)\\\85 (4.49)\ + * 但 MVP 阶段建议**只高亮单元格**,性价比最高。 + +## **5\. 总结** + +| 模块 | 核心动作 | 复杂度 | +| :---- | :---- | :---- | +| **Python 提取** | 保持 \\n,不拆行,输出标准 JSON | ⭐ (低) | +| **Python 验证** | split('\\n'),循环对齐,独立计算 | ⭐⭐ (中) | +| **前端渲染** | 使用 \ 换行,CSS 控制对齐 | ⭐ (低) | +| **前端高亮** | 高亮整个单元格,Tooltip 说明行号 | ⭐ (低) | + +**这是目前最务实、风险最低的实施路径。** 请开发团队以此为准。 \ No newline at end of file diff --git a/extraction_service/forensics/api.py b/extraction_service/forensics/api.py index 75a82e25..d2303a56 100644 --- a/extraction_service/forensics/api.py +++ b/extraction_service/forensics/api.py @@ -173,7 +173,7 @@ async def analyze_docx( f"耗时: {execution_time_ms}ms" ) - return JSONResponse(content=result.model_dump()) + return JSONResponse(content=result.model_dump(by_alias=True)) except HTTPException: raise diff --git a/extraction_service/forensics/config.py b/extraction_service/forensics/config.py index 7dace13c..d8ceec20 100644 --- a/extraction_service/forensics/config.py +++ b/extraction_service/forensics/config.py @@ -44,6 +44,12 @@ EFFECT_SIZE_PATTERN = re.compile( re.IGNORECASE ) +# 卡方值匹配,如 "χ²=57.519" 或 "2=57.519" 或 "χ2=57.519" +CHI_SQUARE_PATTERN = re.compile( + r"(?:χ[²2]|[χx]2|2)\s*[=:]\s*(\d+\.?\d*)", + re.IGNORECASE +) + # ==================== 统计方法检测 ==================== diff --git a/extraction_service/forensics/extractor.py b/extraction_service/forensics/extractor.py index 6b2a3fa9..9a981900 100644 --- a/extraction_service/forensics/extractor.py +++ b/extraction_service/forensics/extractor.py @@ -225,8 +225,8 @@ class DocxTableExtractor: if col_idx >= num_cols: break - # 获取单元格文本 - cell_text = self._get_cell_text(cell) + # 获取单元格文本(保留换行符用于 HTML 显示) + cell_text = self._get_cell_text(cell, use_newline=True) # 检测合并范围 # python-docx 中合并单元格会重复出现同一个 cell 对象 @@ -253,13 +253,123 @@ class DocxTableExtractor: return data - def _get_cell_text(self, cell: _Cell) -> str: + # Symbol 字体字符映射表(Word 使用 Symbol 字体表示希腊字母等) + SYMBOL_CHAR_MAP = { + 'F063': 'χ', # chi + 'F032': '²', # superscript 2 + 'F061': 'α', # alpha + 'F062': 'β', # beta + 'F067': 'γ', # gamma + 'F064': 'δ', # delta + 'F065': 'ε', # epsilon + 'F06D': 'μ', # mu + 'F073': 'σ', # sigma + 'F070': 'π', # pi + 'F0B2': '²', # another superscript 2 encoding + } + + def _clean_statistical_text(self, text: str) -> str: + """ + 清洗统计学文本中的特殊字符 + + 关键清洗: + 1. 负号归一化(最重要!防止 float() 崩溃) + 2. 比较符归一化 + 3. 零宽字符清理 + """ + if not text: + return "" + + # 1. 负号归一化(极高危!) + # Word 会自动把连字符转成破折号或数学减号,导致 float() 报错 + text = text.replace('\u2212', '-') # 数学减号 (Minus Sign) + text = text.replace('\u2013', '-') # En Dash + text = text.replace('\u2014', '-') # Em Dash + text = text.replace('\u2010', '-') # Hyphen + text = text.replace('\u2011', '-') # Non-Breaking Hyphen + text = text.replace('\u00ad', '-') # Soft Hyphen + + # 2. 比较符归一化 + text = text.replace('\u2264', '<=') # ≤ + text = text.replace('\u2265', '>=') # ≥ + text = text.replace('\u2260', '!=') # ≠ + text = text.replace('\u2248', '~=') # ≈ + + # 3. 加减号归一化 + # 保留 ± 原样,因为它在统计学中有特定含义(如 mean±SD) + # text = text.replace('\u00b1', '+/-') # ± + + # 4. 乘号归一化 + text = text.replace('\u00d7', 'x') # × + text = text.replace('\u2217', '*') # ∗ (asterisk operator) + + # 5. 零宽字符清理 + text = text.replace('\u200b', '') # Zero-Width Space + text = text.replace('\u200c', '') # Zero-Width Non-Joiner + text = text.replace('\u200d', '') # Zero-Width Joiner + text = text.replace('\ufeff', '') # BOM / Zero-Width No-Break Space + text = text.replace('\u00a0', ' ') # Non-Breaking Space -> 普通空格 + + return text + + def _get_cell_text(self, cell: _Cell, use_newline: bool = False) -> str: """ 获取单元格文本(合并多个段落) + + Args: + cell: Word 单元格对象 + use_newline: 是否使用换行符连接段落(用于 HTML 显示) + + 注意:会处理 Word 的 符号字符(如 χ² 等) """ paragraphs = cell.paragraphs - texts = [p.text.strip() for p in paragraphs] - return " ".join(texts).strip() + texts = [] + + for para in paragraphs: + # 使用增强的文本提取(处理符号字符) + para_text = self._extract_paragraph_text(para) + if para_text.strip(): + texts.append(para_text.strip()) + + separator = "\n" if use_newline else " " + raw_text = separator.join(texts).strip() + + # 清洗统计学特殊字符(负号归一化等) + return self._clean_statistical_text(raw_text) + + def _extract_paragraph_text(self, para: Paragraph) -> str: + """ + 从段落中提取完整文本,包括 符号字符 + + Word 使用 表示 χ 等符号, + python-docx 的 paragraph.text 不会提取这些内容。 + """ + from docx.oxml.ns import qn + + text_parts = [] + + # 遍历段落中的所有 run 元素 + for run in para._p.iter(): + # 处理普通文本 + if run.tag == qn('w:t'): + text_parts.append(run.text or '') + + # 处理符号字符 + elif run.tag == qn('w:sym'): + font = run.get(qn('w:font')) + char_code = run.get(qn('w:char')) + + if font == 'Symbol' and char_code: + # 查找映射 + unicode_char = self.SYMBOL_CHAR_MAP.get(char_code.upper(), '') + if unicode_char: + text_parts.append(unicode_char) + else: + # 未知符号,记录警告 + logger.debug(f"Unknown Symbol char: {char_code}") + text_parts.append(f'[SYM:{char_code}]') + + return ''.join(text_parts) def _generate_html( self, @@ -296,8 +406,10 @@ class DocxTableExtractor: html_parts.append(" ") for col_idx, cell in enumerate(row, start=1): coord = f"R{row_idx}C{col_idx}" + # 为每个子行添加 span 标记,支持细粒度高亮 + cell_html = self._escape_html_with_subrows(cell, coord) html_parts.append( - f' {self._escape_html(cell)}' + f' {cell_html}' ) html_parts.append(" ") html_parts.append(" ") @@ -307,7 +419,43 @@ class DocxTableExtractor: return "\n".join(html_parts) def _escape_html(self, text: str) -> str: - """转义 HTML 特殊字符""" + """转义 HTML 特殊字符,并将换行符转换为
""" + escaped = ( + text + .replace("&", "&") + .replace("<", "<") + .replace(">", ">") + .replace('"', """) + .replace("'", "'") + ) + # 将换行符转换为
标签,保留表格中的多行结构 + return escaped.replace("\n", "
") + + def _escape_html_with_subrows(self, text: str, coord: str) -> str: + """ + 转义 HTML 并为每个子行添加 span 标记,支持细粒度高亮 + + 例如:单元格内容 "0.017\n0.01\n<0.001" 会生成: + 0.017
+ 0.01
+ <0.001 + """ + lines = text.split("\n") + if len(lines) == 1: + # 单行内容,直接转义 + return self._escape_single(text) + + # 多行内容,为每行添加 span + result_parts = [] + for idx, line in enumerate(lines, start=1): + escaped_line = self._escape_single(line) + subcoord = f"{coord}S{idx}" + result_parts.append(f'{escaped_line}') + + return "
".join(result_parts) + + def _escape_single(self, text: str) -> str: + """转义单行文本的 HTML 特殊字符""" return ( text .replace("&", "&") diff --git a/extraction_service/forensics/types.py b/extraction_service/forensics/types.py index 1df79165..71fafde5 100644 --- a/extraction_service/forensics/types.py +++ b/extraction_service/forensics/types.py @@ -4,7 +4,7 @@ 定义所有数据结构,确保类型安全和接口一致性。 """ -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, model_serializer from typing import List, Dict, Any, Optional from enum import Enum @@ -59,15 +59,30 @@ class ForensicsConfig(BaseModel): class CellLocation(BaseModel): - """单元格位置(R1C1 坐标)""" - table_id: str = Field(..., description="表格 ID,如 tbl_0") + """单元格位置(R1C1 坐标),支持单元格内子行定位""" + table_id: str = Field(..., alias="tableId", description="表格 ID,如 tbl_0") row: int = Field(..., description="行号,从 1 开始") col: int = Field(..., description="列号,从 1 开始") + subrow: Optional[int] = Field(None, description="单元格内子行号,从 1 开始(用于多行单元格)") - @property - def cell_ref(self) -> str: - """返回 R1C1 格式的坐标""" - return f"R{self.row}C{self.col}" + model_config = {"populate_by_name": True} + + @model_serializer + def serialize(self) -> Dict[str, Any]: + """序列化时自动添加 cellRef 字段,支持子行坐标""" + # 基础坐标:R{row}C{col} + # 子行坐标:R{row}C{col}S{subrow} + cell_ref = f"R{self.row}C{self.col}" + if self.subrow is not None: + cell_ref += f"S{self.subrow}" + + return { + "tableId": self.table_id, + "row": self.row, + "col": self.col, + "subrow": self.subrow, + "cellRef": cell_ref + } class Issue(BaseModel): @@ -84,26 +99,30 @@ class TableData(BaseModel): id: str = Field(..., description="表格 ID,如 tbl_0") caption: Optional[str] = Field(None, description="表格标题") type: Optional[str] = Field(None, description="表格类型:BASELINE/OUTCOME/OTHER") - row_count: int = Field(..., description="行数") - col_count: int = Field(..., description="列数") + row_count: int = Field(..., alias="rowCount", description="行数") + col_count: int = Field(..., alias="colCount", description="列数") html: str = Field(..., description="预渲染的 HTML 片段") data: List[List[str]] = Field(..., description="二维数组数据") issues: List[Issue] = Field(default_factory=list, description="该表格的问题列表") skipped: bool = Field(default=False, description="是否被跳过(超限)") - skip_reason: Optional[str] = Field(None, description="跳过原因") + skip_reason: Optional[str] = Field(None, alias="skipReason", description="跳过原因") + + model_config = {"populate_by_name": True} class ForensicsResult(BaseModel): """数据侦探分析结果""" success: bool = Field(..., description="是否成功") - methods_found: List[str] = Field(default_factory=list, description="检测到的统计方法") + methods_found: List[str] = Field(default_factory=list, alias="methodsFound", description="检测到的统计方法") tables: List[TableData] = Field(default_factory=list, description="表格列表") - total_issues: int = Field(default=0, description="总问题数") - error_count: int = Field(default=0, description="ERROR 级别问题数") - warning_count: int = Field(default=0, description="WARNING 级别问题数") - execution_time_ms: int = Field(default=0, description="执行时间(毫秒)") + total_issues: int = Field(default=0, alias="totalIssues", description="总问题数") + error_count: int = Field(default=0, alias="errorCount", description="ERROR 级别问题数") + warning_count: int = Field(default=0, alias="warningCount", description="WARNING 级别问题数") + execution_time_ms: int = Field(default=0, alias="executionTimeMs", description="执行时间(毫秒)") error: Optional[str] = Field(None, description="错误信息(如果失败)") - fallback_available: bool = Field(default=True, description="是否可降级执行") + fallback_available: bool = Field(default=True, alias="fallbackAvailable", description="是否可降级执行") + + model_config = {"populate_by_name": True} class ExtractionError(Exception): diff --git a/extraction_service/forensics/validator.py b/extraction_service/forensics/validator.py index 4635a080..27bda667 100644 --- a/extraction_service/forensics/validator.py +++ b/extraction_service/forensics/validator.py @@ -47,6 +47,7 @@ from .config import ( MEAN_SD_PAREN_PATTERN, CI_PATTERNS, EFFECT_SIZE_PATTERN, + CHI_SQUARE_PATTERN, DEFAULT_TOLERANCE_PERCENT, PVALUE_ERROR_THRESHOLD, PVALUE_WARNING_THRESHOLD, @@ -54,6 +55,43 @@ from .config import ( ) +def _clean_number_string(text: str) -> str: + """ + 清洗数值字符串中的特殊字符,防止 float() 崩溃 + + 关键清洗:负号归一化(Word 会把 - 转成数学减号或破折号) + """ + if not text: + return "" + + # 负号归一化(防止 float() 崩溃) + text = text.replace('\u2212', '-') # 数学减号 (Minus Sign) + text = text.replace('\u2013', '-') # En Dash + text = text.replace('\u2014', '-') # Em Dash + text = text.replace('\u2010', '-') # Hyphen + text = text.replace('\u2011', '-') # Non-Breaking Hyphen + + # 零宽字符清理 + text = text.replace('\u200b', '') # Zero-Width Space + text = text.replace('\u00a0', ' ') # Non-Breaking Space -> 普通空格 + + return text.strip() + + +def _safe_float(text: str) -> Optional[float]: + """ + 安全的 float 转换,处理特殊字符 + + Returns: + 转换成功返回浮点数,失败返回 None + """ + try: + cleaned = _clean_number_string(text) + return float(cleaned) + except (ValueError, TypeError): + return None + + class ArithmeticValidator: """ L1 算术自洽性验证器 @@ -214,20 +252,21 @@ class ArithmeticValidator: - 纯数字 "45" - 带逗号 "1,234" - 带空格 "1 234" + - 负数(含特殊负号字符) """ if not text: return None - # 移除常见分隔符 - cleaned = text.strip().replace(",", "").replace(" ", "") + # 先清洗特殊字符(负号归一化等) + cleaned = _clean_number_string(text) - # 尝试提取第一个数字 - match = re.match(r"^(\d+(?:\.\d+)?)", cleaned) + # 移除常见分隔符 + cleaned = cleaned.replace(",", "").replace(" ", "") + + # 尝试提取数字(支持负数) + match = re.match(r"^(-?\d+(?:\.\d+)?)", cleaned) if match: - try: - return float(match.group(1)) - except ValueError: - return None + return _safe_float(match.group(1)) return None @@ -340,6 +379,11 @@ class StatValidator: ttest_issues = self._validate_ttest(table) issues.extend(ttest_issues) + # 2.5. 卡方检验逆向验证 + if SCIPY_AVAILABLE: + chi2_issues = self._validate_chi_square(table) + issues.extend(chi2_issues) + # 3. SE 三角验证(终审提权:回归系数 CI↔P 一致性) se_issues = self._validate_se_triangle(table) issues.extend(se_issues) @@ -364,68 +408,106 @@ class StatValidator: - 若 95% CI 不跨越 1.0(如 1.1-1.5)→ P 值必须 < 0.05 违反此规则 = 数据逻辑矛盾 + + 改进:支持多行单元格的 subrow 精确定位 """ issues: List[Issue] = [] data = table.data + if len(data) < 2: + return issues + + header = data[0] if data else [] + pvalue_col_idx = self._find_pvalue_column(header) + for row_idx, row in enumerate(data[1:], start=2): + # 获取 P 值列内容(可能有多行) + pvalue_cell = row[pvalue_col_idx] if pvalue_col_idx < len(row) else "" + pvalue_lines = pvalue_cell.split("\n") if pvalue_cell else [] + + # 获取第一列内容(用于描述) + first_cell_lines = row[0].split("\n") if row else [] + + # 整行文本用于查找 CI row_text = " ".join(row) - # 查找 CI(使用增强的 CI 解析) - ci_result = self._parse_ci(row_text) - if ci_result is None: + # 查找所有 CI + all_ci_results = [] + for pattern in CI_PATTERNS: + for match in pattern.finditer(row_text): + ci_lower = _safe_float(match.group(1)) + ci_upper = _safe_float(match.group(2)) + if ci_lower is not None and ci_upper is not None and ci_lower < ci_upper: + all_ci_results.append((ci_lower, ci_upper)) + + if not all_ci_results: + # 回退到单个 CI 解析 + ci_result = self._parse_ci(row_text) + if ci_result: + all_ci_results.append(ci_result) + + if not all_ci_results: continue - ci_lower, ci_upper = ci_result - - # 查找 P 值 - pvalue = self._parse_pvalue(row_text) - if pvalue is None: - continue - - # 检查逻辑一致性 - ci_crosses_one = ci_lower <= 1.0 <= ci_upper - p_significant = pvalue < 0.05 - - # 矛盾情况 - if ci_crosses_one and p_significant: - # CI 跨越 1 但 P < 0.05,矛盾 - issues.append(Issue( - severity=Severity.ERROR, - type=IssueType.STAT_CI_PVALUE_CONFLICT, - message=f"CI 与 P 值逻辑矛盾: 95% CI ({ci_lower}-{ci_upper}) 跨越 1.0,但 P={pvalue} < 0.05", - location=CellLocation( - table_id=table.id, - row=row_idx, - col=1 # 整行问题 - ), - evidence={ - "ci_lower": ci_lower, - "ci_upper": ci_upper, - "ci_crosses_one": ci_crosses_one, - "pvalue": pvalue, - "p_significant": p_significant - } - )) - elif not ci_crosses_one and not p_significant: - # CI 不跨越 1 但 P ≥ 0.05,矛盾 - issues.append(Issue( - severity=Severity.ERROR, - type=IssueType.STAT_CI_PVALUE_CONFLICT, - message=f"CI 与 P 值逻辑矛盾: 95% CI ({ci_lower}-{ci_upper}) 不跨越 1.0,但 P={pvalue} ≥ 0.05", - location=CellLocation( - table_id=table.id, - row=row_idx, - col=1 - ), - evidence={ - "ci_lower": ci_lower, - "ci_upper": ci_upper, - "ci_crosses_one": ci_crosses_one, - "pvalue": pvalue, - "p_significant": p_significant - } - )) + # 遍历 P 值行进行验证 + for line_idx, pvalue_line in enumerate(pvalue_lines): + pvalue = self._parse_pvalue_flexible(pvalue_line) + if pvalue is None: + continue + + # 获取行描述 + row_desc = first_cell_lines[line_idx] if line_idx < len(first_cell_lines) else f"第{line_idx+1}项" + + # 使用对应的 CI(如果有多个 CI,按顺序匹配) + ci_idx = min(line_idx, len(all_ci_results) - 1) + ci_lower, ci_upper = all_ci_results[ci_idx] + + # 检查逻辑一致性 + ci_crosses_one = ci_lower <= 1.0 <= ci_upper + p_significant = pvalue < 0.05 + + # 计算 subrow 索引 + subrow_idx = line_idx + 1 if len(pvalue_lines) > 1 else None + + # 矛盾情况 + if ci_crosses_one and p_significant: + issues.append(Issue( + severity=Severity.ERROR, + type=IssueType.STAT_CI_PVALUE_CONFLICT, + message=f"CI 与 P 值逻辑矛盾 [{row_desc.strip()}]: 95% CI ({ci_lower}-{ci_upper}) 跨越 1.0,但 P={pvalue} < 0.05", + location=CellLocation( + table_id=table.id, + row=row_idx, + col=pvalue_col_idx + 1, + subrow=subrow_idx + ), + evidence={ + "ci_lower": ci_lower, + "ci_upper": ci_upper, + "ci_crosses_one": ci_crosses_one, + "pvalue": pvalue, + "p_significant": p_significant + } + )) + elif not ci_crosses_one and not p_significant: + issues.append(Issue( + severity=Severity.ERROR, + type=IssueType.STAT_CI_PVALUE_CONFLICT, + message=f"CI 与 P 值逻辑矛盾 [{row_desc.strip()}]: 95% CI ({ci_lower}-{ci_upper}) 不跨越 1.0,但 P={pvalue} ≥ 0.05", + location=CellLocation( + table_id=table.id, + row=row_idx, + col=pvalue_col_idx + 1, + subrow=subrow_idx + ), + evidence={ + "ci_lower": ci_lower, + "ci_upper": ci_upper, + "ci_crosses_one": ci_crosses_one, + "pvalue": pvalue, + "p_significant": p_significant + } + )) return issues @@ -437,6 +519,11 @@ class StatValidator: 与报告的 P 值进行对比。 公式: t = (M1 - M2) / sqrt(SD1²/n1 + SD2²/n2) + + 改进: + 1. 智能样本量提取(表头、行首、上下文) + 2. 支持多种 Mean±SD 格式 + 3. 支持多行单元格的 subrow 精确定位 """ issues: List[Issue] = [] @@ -447,90 +534,433 @@ class StatValidator: if len(data) < 2: return issues + header = data[0] if data else [] + + # 预先从表头提取样本量 + n1, n2 = self._extract_sample_sizes_from_header(header) + + # 查找 P 值列的索引 + pvalue_col_idx = self._find_pvalue_column(header) + # 查找包含组比较数据的行 for row_idx, row in enumerate(data[1:], start=2): - # 尝试提取同一行中的两组数据 - mean_sd_matches = list(MEAN_SD_PATTERN.finditer(" ".join(row))) + row_text = " ".join(row) - if len(mean_sd_matches) >= 2: - # 找到至少两组 Mean±SD 数据 - try: - m1, sd1 = float(mean_sd_matches[0].group(1)), float(mean_sd_matches[0].group(2)) - m2, sd2 = float(mean_sd_matches[1].group(1)), float(mean_sd_matches[1].group(2)) - - # 提取 P 值 - row_text = " ".join(row) - pvalue = self._parse_pvalue(row_text) - + # 尝试提取同一行中的两组 Mean±SD 数据 + mean_sd_matches = list(MEAN_SD_PATTERN.finditer(row_text)) + + # 如果没找到,尝试括号格式 + if len(mean_sd_matches) < 2: + mean_sd_matches = list(MEAN_SD_PAREN_PATTERN.finditer(row_text)) + + if len(mean_sd_matches) < 2: + continue + + # 找到至少两组 Mean±SD 数据 + try: + m1 = _safe_float(mean_sd_matches[0].group(1)) + sd1 = _safe_float(mean_sd_matches[0].group(2)) + m2 = _safe_float(mean_sd_matches[1].group(1)) + sd2 = _safe_float(mean_sd_matches[1].group(2)) + + if None in (m1, sd1, m2, sd2): + continue + + # 如果表头没有样本量,尝试从行中提取 + local_n1, local_n2 = n1, n2 + if local_n1 is None or local_n2 is None: + local_n1, local_n2 = self._extract_sample_sizes_from_row(row, header) + + # 仍然没有样本量,跳过 + if local_n1 is None or local_n2 is None: + continue + + # 计算 SE 和 t 值 + se = math.sqrt(sd1**2/local_n1 + sd2**2/local_n2) + if se == 0: + continue + + t_calc = abs(m1 - m2) / se + df = local_n1 + local_n2 - 2 + + # 计算 P 值 + p_calc = 2 * (1 - stats.t.cdf(t_calc, df)) + + # 从 P 值列提取报告的 P 值 + pvalue_cell = row[pvalue_col_idx] if pvalue_col_idx < len(row) else "" + pvalue_lines = pvalue_cell.split("\n") if pvalue_cell else [] + + # 尝试从整行提取 P 值(如果 P 值列没有) + if not pvalue_lines or not any(pvalue_lines): + pvalue = self._parse_pvalue_flexible(row_text) if pvalue is None: continue + pvalue_lines = [str(pvalue)] + subrow_idx = None + pvalue_col = pvalue_col_idx + 1 + else: + # 遍历 P 值单元格的每一行 + for line_idx, pvalue_line in enumerate(pvalue_lines): + pvalue = self._parse_pvalue_flexible(pvalue_line) + if pvalue is None: + continue + + # 计算子行索引 + subrow_idx = line_idx + 1 if len(pvalue_lines) > 1 else None + pvalue_col = pvalue_col_idx + 1 + + # 比较 P 值 + p_diff = abs(p_calc - pvalue) + + # 获取行描述 + first_cell_lines = row[0].split("\n") if row else [] + row_desc = first_cell_lines[line_idx] if line_idx < len(first_cell_lines) else row[0][:20] if row else "" + + if p_diff > PVALUE_ERROR_THRESHOLD: + issues.append(Issue( + severity=Severity.ERROR, + type=IssueType.STAT_TTEST_PVALUE, + message=f"T 检验 P 值矛盾 [{row_desc.strip()}]: 报告 P={pvalue},计算 P={p_calc:.4f}(差异 {p_diff:.3f})", + location=CellLocation( + table_id=table.id, + row=row_idx, + col=pvalue_col, + subrow=subrow_idx + ), + evidence={ + "group1": {"mean": m1, "sd": sd1, "n": local_n1}, + "group2": {"mean": m2, "sd": sd2, "n": local_n2}, + "t_calculated": round(t_calc, 3), + "df": df, + "p_calculated": round(p_calc, 4), + "p_reported": pvalue, + "p_difference": round(p_diff, 4) + } + )) + elif p_diff > PVALUE_WARNING_THRESHOLD: + issues.append(Issue( + severity=Severity.WARNING, + type=IssueType.STAT_TTEST_PVALUE, + message=f"T 检验 P 值轻微偏差 [{row_desc.strip()}]: 报告 P={pvalue},计算 P={p_calc:.4f}", + location=CellLocation( + table_id=table.id, + row=row_idx, + col=pvalue_col, + subrow=subrow_idx + ), + evidence={ + "p_calculated": round(p_calc, 4), + "p_reported": pvalue, + "p_difference": round(p_diff, 4) + } + )) + continue # 已处理完此行的所有 P 值 + + # 单个 P 值的情况 + pvalue = self._parse_pvalue_flexible(pvalue_lines[0]) if pvalue_lines else None + if pvalue is None: + continue - # 尝试从表头获取样本量(简化处理,假设 n=30) - # 实际实现需要更复杂的表格解析 - n1, n2 = self._estimate_sample_sizes(table, row_idx) - - if n1 is None or n2 is None: - continue - - # 计算 t 值 - se = math.sqrt(sd1**2/n1 + sd2**2/n2) - if se == 0: - continue - - t_calc = abs(m1 - m2) / se - df = n1 + n2 - 2 - - # 计算 P 值 - p_calc = 2 * (1 - stats.t.cdf(t_calc, df)) + p_diff = abs(p_calc - pvalue) + + if p_diff > PVALUE_ERROR_THRESHOLD: + issues.append(Issue( + severity=Severity.ERROR, + type=IssueType.STAT_TTEST_PVALUE, + message=f"T 检验 P 值不一致: 报告 P={pvalue},计算 P={p_calc:.4f}(差异 {p_diff:.3f})", + location=CellLocation( + table_id=table.id, + row=row_idx, + col=pvalue_col_idx + 1 + ), + evidence={ + "group1": {"mean": m1, "sd": sd1, "n": local_n1}, + "group2": {"mean": m2, "sd": sd2, "n": local_n2}, + "t_calculated": round(t_calc, 3), + "df": df, + "p_calculated": round(p_calc, 4), + "p_reported": pvalue, + "p_difference": round(p_diff, 4) + } + )) + elif p_diff > PVALUE_WARNING_THRESHOLD: + issues.append(Issue( + severity=Severity.WARNING, + type=IssueType.STAT_TTEST_PVALUE, + message=f"T 检验 P 值轻微偏差: 报告 P={pvalue},计算 P={p_calc:.4f}", + location=CellLocation( + table_id=table.id, + row=row_idx, + col=pvalue_col_idx + 1 + ), + evidence={ + "p_calculated": round(p_calc, 4), + "p_reported": pvalue, + "p_difference": round(p_diff, 4) + } + )) + + except (ValueError, TypeError, ZeroDivisionError) as e: + logger.debug(f"T 检验验证失败: {e}") + continue + + return issues + + def _extract_sample_sizes_from_header(self, header: List[str]) -> Tuple[Optional[int], Optional[int]]: + """ + 从表头提取样本量 + + 支持格式: + - (n=50) + - n=50 + - N=50 + - (50例) + - 对照组(n=48) + """ + n_pattern = re.compile(r"[(\[(]?\s*[nN]\s*[=::]\s*(\d+)\s*[)\])]?") + n_pattern_cn = re.compile(r"[(\[(]?\s*(\d+)\s*例\s*[)\])]?") + + n_values = [] + for cell in header: + # 优先匹配 n=XX 格式 + match = n_pattern.search(cell) + if match: + try: + n_values.append(int(match.group(1))) + except ValueError: + pass + continue + + # 尝试中文格式 + match = n_pattern_cn.search(cell) + if match: + try: + n_values.append(int(match.group(1))) + except ValueError: + pass + + if len(n_values) >= 2: + return n_values[0], n_values[1] + + return None, None + + def _extract_sample_sizes_from_row( + self, + row: List[str], + header: List[str] + ) -> Tuple[Optional[int], Optional[int]]: + """ + 从数据行提取样本量 + + 策略: + 1. 查找行首的 n 值 + 2. 查找与 Mean±SD 列对应的 n 列 + """ + row_text = " ".join(row) + n_pattern = re.compile(r"\(\s*[nN]\s*[=::]\s*(\d+)\s*\)") + + matches = n_pattern.findall(row_text) + if len(matches) >= 2: + try: + return int(matches[0]), int(matches[1]) + except ValueError: + pass + + return None, None + + def _validate_chi_square(self, table: TableData) -> List[Issue]: + """ + 卡方检验逆向验证 + + 从报告的 χ² 值和推断的自由度,反算 P 值,与报告值对比。 + + 原理: + - 查找 χ²=X.XXX 和对应的 P 值 + - 估计自由度(默认 df=1,适用于大多数 2x2 比较) + - 使用卡方分布计算 P 值 + - 与报告的 P 值对比 + + 特殊处理: + - 支持多段落单元格(一个单元格内多行数据) + - 支持 P 值列没有 "P=" 前缀的情况(直接是数值) + + 适用场景: + - 医学基线特征表(分类变量比较) + - 任何报告 χ² 值和 P 值的表格 + """ + issues: List[Issue] = [] + + if not SCIPY_AVAILABLE: + return issues + + data = table.data + if len(data) < 2: + return issues + + # 首先识别表头,找到 P 值列 + header = data[0] + pvalue_col_idx = self._find_pvalue_column(header) + chi2_col_idx = self._find_stat_column(header) + + for row_idx, row in enumerate(data[1:], start=2): + # 获取统计值和 P 值单元格 + stat_cell = row[chi2_col_idx] if chi2_col_idx < len(row) else "" + pvalue_cell = row[pvalue_col_idx] if pvalue_col_idx < len(row) else "" + + # 处理多行单元格:按换行符分割 + stat_lines = stat_cell.split("\n") if stat_cell else [] + pvalue_lines = pvalue_cell.split("\n") if pvalue_cell else [] + + # 逐行匹配卡方值和 P 值 + for line_idx in range(max(len(stat_lines), len(pvalue_lines))): + stat_line = stat_lines[line_idx] if line_idx < len(stat_lines) else "" + pvalue_line = pvalue_lines[line_idx] if line_idx < len(pvalue_lines) else "" + + # 查找 χ² 值 + chi2_match = CHI_SQUARE_PATTERN.search(stat_line) + if not chi2_match: + continue + + chi2_value = _safe_float(chi2_match.group(1)) + if chi2_value is None or chi2_value <= 0: + continue + + # 解析 P 值(支持多种格式) + pvalue = self._parse_pvalue_flexible(pvalue_line) + if pvalue is None: + continue + + # 默认 df=1(最常见的 2x2 比较场景) + df = 1 + + try: + # 使用卡方分布计算 P 值 + p_calc = 1 - stats.chi2.cdf(chi2_value, df) # 比较 P 值 p_diff = abs(p_calc - pvalue) - if p_diff > PVALUE_ERROR_THRESHOLD: - # 严重矛盾 + # 检查显著性是否一致 + p_significant_reported = pvalue < 0.05 + p_significant_calc = p_calc < 0.05 + significance_mismatch = p_significant_reported != p_significant_calc + + # 获取子行描述(从第一列提取) + first_cell_lines = row[0].split("\n") if row else [] + sub_row_desc = first_cell_lines[line_idx] if line_idx < len(first_cell_lines) else f"子行 {line_idx + 1}" + + # 计算子行索引(从 1 开始),用于前端精确高亮 + subrow_idx = line_idx + 1 if len(pvalue_lines) > 1 else None + + if significance_mismatch: issues.append(Issue( severity=Severity.ERROR, - type=IssueType.STAT_TTEST_PVALUE, - message=f"T 检验 P 值不一致: 报告 P={pvalue},计算 P={p_calc:.4f}(差异 {p_diff:.3f})", + type=IssueType.STAT_CHI2_PVALUE, + message=f"卡方检验 P 值矛盾 [{sub_row_desc.strip()}]: χ²={chi2_value}, 报告 P={pvalue}, 计算 P={p_calc:.4f},显著性不一致", location=CellLocation( table_id=table.id, row=row_idx, - col=1 + col=pvalue_col_idx + 1, + subrow=subrow_idx ), evidence={ - "group1": {"mean": m1, "sd": sd1, "n": n1}, - "group2": {"mean": m2, "sd": sd2, "n": n2}, - "t_calculated": round(t_calc, 3), + "chi2_value": chi2_value, "df": df, "p_calculated": round(p_calc, 4), "p_reported": pvalue, - "p_difference": round(p_diff, 4) + "p_difference": round(p_diff, 4), + "sub_row": sub_row_desc.strip(), + "significance_reported": "显著" if p_significant_reported else "不显著", + "significance_calculated": "显著" if p_significant_calc else "不显著" } )) - elif p_diff > PVALUE_WARNING_THRESHOLD: - # 可能是舍入误差 + elif p_diff > PVALUE_ERROR_THRESHOLD: issues.append(Issue( severity=Severity.WARNING, - type=IssueType.STAT_TTEST_PVALUE, - message=f"T 检验 P 值轻微偏差: 报告 P={pvalue},计算 P={p_calc:.4f}(可能是舍入误差)", + type=IssueType.STAT_CHI2_PVALUE, + message=f"卡方检验 P 值偏差 [{sub_row_desc.strip()}]: χ²={chi2_value}, 报告 P={pvalue}, 计算 P={p_calc:.4f},差异 {p_diff:.3f}", location=CellLocation( table_id=table.id, row=row_idx, - col=1 + col=pvalue_col_idx + 1, + subrow=subrow_idx ), evidence={ + "chi2_value": chi2_value, + "df": df, "p_calculated": round(p_calc, 4), "p_reported": pvalue, - "p_difference": round(p_diff, 4) + "p_difference": round(p_diff, 4), + "sub_row": sub_row_desc.strip() } )) - except (ValueError, TypeError, ZeroDivisionError) as e: - logger.debug(f"T 检验验证失败: {e}") + except (ValueError, ZeroDivisionError, TypeError) as e: + logger.debug(f"卡方检验验证失败: {e}") continue return issues + def _find_pvalue_column(self, header: List[str]) -> int: + """查找 P 值列的索引""" + p_keywords = ["p值", "pvalue", "p-value", "p 值", "sig"] + for idx, cell in enumerate(header): + cell_lower = cell.lower().strip() + for kw in p_keywords: + if kw in cell_lower: + return idx + # 默认最后一列 + return len(header) - 1 + + def _find_stat_column(self, header: List[str]) -> int: + """查找统计值列的索引(包含 χ²/t/Z 等)""" + stat_keywords = ["统计", "stat", "χ", "chi", "t值", "z值"] + for idx, cell in enumerate(header): + cell_lower = cell.lower().strip() + for kw in stat_keywords: + if kw in cell_lower: + return idx + # 默认倒数第二列 + return len(header) - 2 + + def _parse_pvalue_flexible(self, text: str) -> Optional[float]: + """ + 灵活解析 P 值 + + 支持格式: + - P=0.05 + - P<0.001 + - 0.05(直接数值) + - <0.001(全角符号) + """ + if not text: + return None + + # 先清洗特殊字符(负号归一化等) + text = _clean_number_string(text) + + # 先尝试标准 P 值格式 + match = PVALUE_PATTERN.search(text) + if match: + val = _safe_float(match.group(1)) + if val is not None: + return val + + # 处理 <0.001 或 <0.001 格式 + less_than_match = re.search(r"[<<]\s*(\d+\.?\d*)", text) + if less_than_match: + val = _safe_float(less_than_match.group(1)) + if val is not None: + return val + + # 直接尝试解析为数字 + val = _safe_float(text) + if val is not None and 0 <= val <= 1: # P 值范围检查 + return val + + return None + + def _validate_se_triangle(self, table: TableData) -> List[Issue]: """ SE 三角验证(终审提权) @@ -543,6 +973,8 @@ class StatValidator: - P_calculated = 2 * (1 - norm.cdf(|Z|)) 若报告的 P 值与计算的 P 值严重不一致,则存在问题。 + + 改进:支持多行单元格的 subrow 精确定位 """ issues: List[Issue] = [] data = table.data @@ -550,102 +982,130 @@ class StatValidator: if not SCIPY_AVAILABLE: return issues + header = data[0] if data else [] + pvalue_col_idx = self._find_pvalue_column(header) + for row_idx, row in enumerate(data[1:], start=2): + # 获取 P 值列的内容(可能有多行) + pvalue_cell = row[pvalue_col_idx] if pvalue_col_idx < len(row) else "" + pvalue_lines = pvalue_cell.split("\n") if pvalue_cell else [] + + # 获取第一列内容(用于描述) + first_cell_lines = row[0].split("\n") if row else [] + + # 将整行连接起来查找 OR/HR/RR 和 CI row_text = " ".join(row) - # 查找 OR/HR/RR - effect_match = EFFECT_SIZE_PATTERN.search(row_text) - if not effect_match: + # 查找所有 OR/HR/RR(可能有多个) + effect_matches = list(EFFECT_SIZE_PATTERN.finditer(row_text)) + if not effect_matches: continue - try: - effect_size = float(effect_match.group(1)) - if effect_size <= 0: - continue - except (ValueError, TypeError): + # 查找所有 CI + ci_matches = [] + for pattern in CI_PATTERNS: + ci_matches.extend(list(pattern.finditer(row_text))) + + if not ci_matches: continue - # 查找 CI - ci_result = self._parse_ci(row_text) - if ci_result is None: - continue - - ci_lower, ci_upper = ci_result - - # 确保 CI 有效(正数且 lower < upper) - if ci_lower <= 0 or ci_upper <= 0 or ci_lower >= ci_upper: - continue - - # 查找报告的 P 值 - pvalue = self._parse_pvalue(row_text) - if pvalue is None: - continue - - try: - # SE 三角计算 - ln_effect = math.log(effect_size) - ln_ci_lower = math.log(ci_lower) - ln_ci_upper = math.log(ci_upper) - - # SE = (ln(CI_upper) - ln(CI_lower)) / 3.92 (for 95% CI) - se = (ln_ci_upper - ln_ci_lower) / 3.92 - - if se <= 0: + # 遍历 P 值行,尝试匹配对应的 OR/CI + for line_idx, pvalue_line in enumerate(pvalue_lines): + pvalue = self._parse_pvalue_flexible(pvalue_line) + if pvalue is None: continue - # Z = ln(OR) / SE - z = abs(ln_effect) / se + # 获取当前行的描述 + row_desc = first_cell_lines[line_idx] if line_idx < len(first_cell_lines) else f"第{line_idx+1}项" - # P = 2 * (1 - norm.cdf(|Z|)) - p_calc = 2 * (1 - stats.norm.cdf(z)) - - # 比较 P 值 - p_diff = abs(p_calc - pvalue) - - if p_diff > PVALUE_ERROR_THRESHOLD: - # 严重矛盾 - issues.append(Issue( - severity=Severity.ERROR, - type=IssueType.STAT_SE_TRIANGLE, - message=f"SE 三角验证不一致: 报告 P={pvalue},由 CI 反推 P={p_calc:.4f}(差异 {p_diff:.3f})", - location=CellLocation( - table_id=table.id, - row=row_idx, - col=1 - ), - evidence={ - "effect_size": effect_size, - "ci_lower": ci_lower, - "ci_upper": ci_upper, - "se_calculated": round(se, 4), - "z_calculated": round(z, 3), - "p_calculated": round(p_calc, 4), - "p_reported": pvalue, - "p_difference": round(p_diff, 4) - } - )) - elif p_diff > PVALUE_WARNING_THRESHOLD: - # 轻微偏差,可能是舍入误差 - issues.append(Issue( - severity=Severity.WARNING, - type=IssueType.STAT_SE_TRIANGLE, - message=f"SE 三角验证轻微偏差: 报告 P={pvalue},由 CI 反推 P={p_calc:.4f}(可能是舍入误差)", - location=CellLocation( - table_id=table.id, - row=row_idx, - col=1 - ), - evidence={ - "effect_size": effect_size, - "p_calculated": round(p_calc, 4), - "p_reported": pvalue, - "p_difference": round(p_diff, 4) - } - )) + # 使用第一个有效的 OR/CI 组合进行验证 + for effect_match in effect_matches: + effect_size = _safe_float(effect_match.group(1)) + if effect_size is None or effect_size <= 0: + continue - except (ValueError, ZeroDivisionError, TypeError) as e: - logger.debug(f"SE 三角验证失败: {e}") - continue + # 查找对应的 CI + ci_result = self._parse_ci(row_text) + if ci_result is None: + continue + + ci_lower, ci_upper = ci_result + + # 确保 CI 有效 + if ci_lower <= 0 or ci_upper <= 0 or ci_lower >= ci_upper: + continue + + try: + # SE 三角计算 + ln_effect = math.log(effect_size) + ln_ci_lower = math.log(ci_lower) + ln_ci_upper = math.log(ci_upper) + + # SE = (ln(CI_upper) - ln(CI_lower)) / 3.92 (for 95% CI) + se = (ln_ci_upper - ln_ci_lower) / 3.92 + + if se <= 0: + continue + + # Z = ln(OR) / SE + z = abs(ln_effect) / se + + # P = 2 * (1 - norm.cdf(|Z|)) + p_calc = 2 * (1 - stats.norm.cdf(z)) + + # 比较 P 值 + p_diff = abs(p_calc - pvalue) + + # 计算 subrow 索引 + subrow_idx = line_idx + 1 if len(pvalue_lines) > 1 else None + + if p_diff > PVALUE_ERROR_THRESHOLD: + issues.append(Issue( + severity=Severity.ERROR, + type=IssueType.STAT_SE_TRIANGLE, + message=f"SE 三角验证不一致 [{row_desc.strip()}]: OR={effect_size}, 报告 P={pvalue},由 CI 反推 P={p_calc:.4f}", + location=CellLocation( + table_id=table.id, + row=row_idx, + col=pvalue_col_idx + 1, + subrow=subrow_idx + ), + evidence={ + "effect_size": effect_size, + "ci_lower": ci_lower, + "ci_upper": ci_upper, + "se_calculated": round(se, 4), + "z_calculated": round(z, 3), + "p_calculated": round(p_calc, 4), + "p_reported": pvalue, + "p_difference": round(p_diff, 4) + } + )) + elif p_diff > PVALUE_WARNING_THRESHOLD: + issues.append(Issue( + severity=Severity.WARNING, + type=IssueType.STAT_SE_TRIANGLE, + message=f"SE 三角验证轻微偏差 [{row_desc.strip()}]: OR={effect_size}, 报告 P={pvalue},计算 P={p_calc:.4f}", + location=CellLocation( + table_id=table.id, + row=row_idx, + col=pvalue_col_idx + 1, + subrow=subrow_idx + ), + evidence={ + "effect_size": effect_size, + "p_calculated": round(p_calc, 4), + "p_reported": pvalue, + "p_difference": round(p_diff, 4) + } + )) + + # 找到有效匹配后跳出 effect_match 循环 + break + + except (ValueError, ZeroDivisionError, TypeError) as e: + logger.debug(f"SE 三角验证失败: {e}") + continue return issues @@ -690,10 +1150,9 @@ class StatValidator: if not match: continue - try: - mean_val = float(match.group(1)) - sd_val = float(match.group(2)) - except (ValueError, TypeError): + mean_val = _safe_float(match.group(1)) + sd_val = _safe_float(match.group(2)) + if mean_val is None or sd_val is None: continue # 检查 SD > Mean(仅对 mean > 0 的情况) @@ -766,23 +1225,20 @@ class StatValidator: match = pattern.search(text) if match: try: - lower = float(match.group(1)) - upper = float(match.group(2)) - if lower < upper: # 基本合理性检查 + lower = _safe_float(match.group(1)) + upper = _safe_float(match.group(2)) + if lower is not None and upper is not None and lower < upper: return lower, upper - except (ValueError, TypeError, IndexError): + except IndexError: continue # 回退到原始的 CI_PATTERN match = CI_PATTERN.search(text) if match: - try: - lower = float(match.group(1)) - upper = float(match.group(2)) - if lower < upper: - return lower, upper - except (ValueError, TypeError): - pass + lower = _safe_float(match.group(1)) + upper = _safe_float(match.group(2)) + if lower is not None and upper is not None and lower < upper: + return lower, upper return None @@ -798,42 +1254,5 @@ class StatValidator: """ match = PVALUE_PATTERN.search(text) if match: - try: - return float(match.group(1)) - except (ValueError, TypeError): - pass + return _safe_float(match.group(1)) return None - - def _estimate_sample_sizes( - self, - table: TableData, - row_idx: int - ) -> Tuple[Optional[int], Optional[int]]: - """ - 尝试从表格中估计样本量 - - 策略: - 1. 查找表头中的 n 值 - 2. 查找 "(n=XX)" 格式 - 3. 默认返回 None - """ - data = table.data - header = data[0] if data else [] - - # 从表头查找 (n=XX) 格式 - n_pattern = re.compile(r"\(?\s*n\s*[=:]\s*(\d+)\s*\)?", re.IGNORECASE) - - n_values = [] - for cell in header: - match = n_pattern.search(cell) - if match: - try: - n_values.append(int(match.group(1))) - except ValueError: - pass - - if len(n_values) >= 2: - return n_values[0], n_values[1] - - # 如果找不到,返回 None(不进行验证) - return None, None diff --git a/extraction_service/main.py b/extraction_service/main.py index 53a6c063..38846299 100644 --- a/extraction_service/main.py +++ b/extraction_service/main.py @@ -52,9 +52,6 @@ app.add_middleware( TEMP_DIR = Path(os.getenv("TEMP_DIR", "/tmp/extraction_service")) TEMP_DIR.mkdir(parents=True, exist_ok=True) -# 注册 RVW V2.0 数据侦探路由 -app.include_router(forensics_router) - # 导入服务模块 from services.pdf_extractor import extract_pdf_pymupdf from services.pdf_processor import extract_pdf, get_pdf_processing_strategy @@ -71,6 +68,7 @@ from services.doc_export_service import check_pandoc_available, convert_markdown # 新增:RVW V2.0 数据侦探模块 from forensics.api import router as forensics_router +app.include_router(forensics_router) # 兼容:nougat 相关(已废弃,保留空实现避免报错) def check_nougat_available(): return False diff --git a/frontend-v2/src/modules/rvw/components/EditorialReport.tsx b/frontend-v2/src/modules/rvw/components/EditorialReport.tsx index 7edf3199..e49ff493 100644 --- a/frontend-v2/src/modules/rvw/components/EditorialReport.tsx +++ b/frontend-v2/src/modules/rvw/components/EditorialReport.tsx @@ -66,7 +66,7 @@ export default function EditorialReport({ data }: EditorialReportProps) {
- {data.overall_score} + {Number(data.overall_score).toFixed(1)}
diff --git a/frontend-v2/src/modules/rvw/components/ForensicsReport.tsx b/frontend-v2/src/modules/rvw/components/ForensicsReport.tsx new file mode 100644 index 00000000..c3af88ec --- /dev/null +++ b/frontend-v2/src/modules/rvw/components/ForensicsReport.tsx @@ -0,0 +1,487 @@ +/** + * 数据验证报告组件 + * 展示 DataForensicsSkill 的表格验证结果 + */ +import { useState } from 'react'; +import { + AlertTriangle, + CheckCircle, + XCircle, + Info, + Table2, + FlaskConical, + ChevronDown, + ChevronUp, + MousePointerClick +} from 'lucide-react'; +import type { ForensicsResult, ForensicsIssue, ForensicsTable } from '../types'; + +interface ForensicsReportProps { + data: ForensicsResult; +} + +// 统计方法英文 -> 中文映射 +const METHOD_NAMES: Record = { + 'chi-square': '卡方检验', + 'mann-whitney': 'Mann-Whitney U 检验', + 't-test': 'T 检验', + 'anova': '方差分析', + 'fisher': 'Fisher 精确检验', + 'wilcoxon': 'Wilcoxon 检验', + 'kruskal-wallis': 'Kruskal-Wallis 检验', + 'mcnemar': 'McNemar 检验', + 'correlation': '相关性分析', + 'regression': '回归分析', + 'logistic': 'Logistic 回归', + 'cox': 'Cox 回归', + 'kaplan-meier': 'Kaplan-Meier 生存分析', +}; + +// 问题类型代码 -> 中文描述映射 +const ISSUE_TYPE_LABELS: Record = { + // L1 算术验证 + 'ARITHMETIC_PERCENT': '百分比计算错误', + 'ARITHMETIC_SUM': '合计计算错误', + 'ARITHMETIC_TOTAL': '总计行错误', + 'ARITHMETIC_MEAN': '均值计算错误', + + // L2 统计验证 + 'STAT_CHI2_PVALUE': '卡方检验 P 值', + 'STAT_TTEST_PVALUE': 'T 检验 P 值', + 'STAT_CI_PVALUE_CONFLICT': 'CI 与 P 值矛盾', + + // L2.5 一致性取证 + 'STAT_SE_TRIANGLE': 'SE 三角验证', + 'STAT_SD_GREATER_MEAN': 'SD 大于均值', + 'STAT_REGRESSION_CI_P': '回归 CI-P 不一致', + + // 一致性检查 + 'CONSISTENCY_DUPLICATE': '数据重复', + 'CONSISTENCY_MISMATCH': '数据不一致', + + // 提取问题 + 'EXTRACTION_WARNING': '提取警告', + 'TABLE_SKIPPED': '表格跳过', +}; + +export default function ForensicsReport({ data }: ForensicsReportProps) { + const [expandedTables, setExpandedTables] = useState>(new Set()); + const [highlightedCell, setHighlightedCell] = useState(null); + + // 防御性检查:确保所有数组和对象存在 + const tables = data?.tables || []; + const issues = data?.issues || []; + const methods = data?.methods || []; + const summary = data?.summary || { totalTables: 0, totalIssues: 0, errorCount: 0, warningCount: 0 }; + + // 创建 tableId -> caption 映射,用于显示友好的表格名称 + const tableIdToCaption: Record = {}; + tables.forEach((t, idx) => { + tableIdToCaption[t.id] = t.caption || `表格 ${idx + 1}`; + }); + + // 获取表格的友好名称 + const getTableName = (tableId: string | undefined): string => { + if (!tableId) return ''; + return tableIdToCaption[tableId] || tableId; + }; + + // 翻译统计方法名称为中文 + const translateMethod = (method: string): string => { + return METHOD_NAMES[method.toLowerCase()] || method; + }; + + // 翻译问题类型代码为中文 + const translateIssueType = (type: string): string => { + return ISSUE_TYPE_LABELS[type] || type; + }; + + const toggleTable = (tableId: string) => { + const newExpanded = new Set(expandedTables); + if (newExpanded.has(tableId)) { + newExpanded.delete(tableId); + } else { + newExpanded.add(tableId); + } + setExpandedTables(newExpanded); + }; + + const getSeverityIcon = (severity: ForensicsIssue['severity']) => { + switch (severity) { + case 'ERROR': + return ; + case 'WARNING': + return ; + case 'INFO': + return ; + } + }; + + const getSeverityColors = (severity: ForensicsIssue['severity']) => { + switch (severity) { + case 'ERROR': + return { bg: 'bg-red-50', border: 'border-red-200', text: 'text-red-700' }; + case 'WARNING': + return { bg: 'bg-amber-50', border: 'border-amber-200', text: 'text-amber-700' }; + case 'INFO': + return { bg: 'bg-blue-50', border: 'border-blue-200', text: 'text-blue-700' }; + } + }; + + const getOverallStatus = () => { + if (summary.errorCount > 0) { + return { label: '发现问题', color: 'text-red-600', bg: 'bg-red-500', icon: XCircle }; + } + if (summary.warningCount > 0) { + return { label: '需关注', color: 'text-amber-600', bg: 'bg-amber-500', icon: AlertTriangle }; + } + return { label: '数据正常', color: 'text-green-600', bg: 'bg-green-500', icon: CheckCircle }; + }; + + const status = getOverallStatus(); + const StatusIcon = status.icon; + + const handleCellClick = (cellRef: string | undefined) => { + if (cellRef) { + setHighlightedCell(highlightedCell === cellRef ? null : cellRef); + } + }; + + return ( +
+ {/* 总览卡片 */} +
+
+
+ {/* 状态图标 */} +
+
+ +
+ + {status.label} + +
+ + {/* 统计信息 */} +
+
+ +

数据验证报告

+
+

+ 已检测 {summary.totalTables} 张表格,发现 {summary.totalIssues} 个问题 + {methods.length > 0 && `,识别到统计方法:${methods.map(translateMethod).join('、')}`} +

+ + {/* 统计指标 */} +
+
+ + {summary.totalTables} 张表格 +
+ {summary.errorCount > 0 && ( +
+ + {summary.errorCount} 个错误 +
+ )} + {summary.warningCount > 0 && ( +
+ + {summary.warningCount} 个警告 +
+ )} + {summary.errorCount === 0 && summary.warningCount === 0 && ( +
+ + 未发现问题 +
+ )} +
+
+
+
+
+ + {/* 问题列表(按严重程度排序) */} + {issues.length > 0 && ( +
+
+

+ + 发现的问题 + + 共 {issues.length} 项 + +

+
+
+ {[...issues] + .sort((a, b) => { + const order = { ERROR: 0, WARNING: 1, INFO: 2 }; + return order[a.severity] - order[b.severity]; + }) + .map((issue, index) => { + const colors = getSeverityColors(issue.severity); + return ( +
handleCellClick(issue.location?.cellRef)} + > +
+ {getSeverityIcon(issue.severity)} +
+

{issue.message}

+ {issue.location && ( +

+ + {issue.location.tableId && getTableName(issue.location.tableId)} + {issue.location.cellRef && ` · 单元格 ${issue.location.cellRef}`} +

+ )} +
+ + {translateIssueType(issue.type)} + +
+
+ ); + })} +
+
+ )} + + {/* 表格详情 */} + {tables.length > 0 && ( +
+
+ +

表格详情

+ + 共 {tables.length} 张 + +
+ + {tables.map((table) => ( + toggleTable(table.id)} + highlightedCell={highlightedCell} + /> + ))} +
+ )} + + {/* 无表格提示 */} + {tables.length === 0 && ( +
+ +

未检测到表格数据

+

该文档可能不包含数据表格

+
+ )} +
+ ); +} + +/** + * 表格卡片组件 + */ +interface TableCardProps { + table: ForensicsTable; + expanded: boolean; + onToggle: () => void; + highlightedCell: string | null; +} + +function TableCard({ table, expanded, onToggle, highlightedCell }: TableCardProps) { + // 防御性检查:确保 issues 数组存在 + const issues = table.issues || []; + const hasIssues = issues.length > 0; + const errorCount = issues.filter(i => i.severity === 'ERROR').length; + const warningCount = issues.filter(i => i.severity === 'WARNING').length; + + return ( +
+ {/* 表格头部 */} +
+
+ {hasIssues ? ( + + ) : ( + + )} +
+

{table.caption || `表格 ${table.id}`}

+

+ {table.rowCount} 行 × {table.colCount} 列 + {table.skipped && ` · ⚠️ ${table.skipReason}`} +

+
+
+
+ {errorCount > 0 && ( + + {errorCount} 错误 + + )} + {warningCount > 0 && ( + + {warningCount} 警告 + + )} + {!hasIssues && ( + + 通过 + + )} + {expanded ? ( + + ) : ( + + )} +
+
+ + {/* 展开内容 */} + {expanded && ( +
+ {/* 表格渲染 */} +
+ +
+
+ + {/* 表格问题 */} + {issues.length > 0 && ( +
+
+

+ 该表格发现的问题 +

+ {issues.map((issue, idx) => ( +
+ {issue.severity === 'ERROR' ? ( + + ) : ( + + )} + {issue.message} +
+ ))} +
+
+ )} +
+ )} +
+ ); +} + +/** + * 给 HTML 表格添加高亮样式 + * 支持两种坐标: + * - data-coord="R5C4" - 单元格级别 + * - data-subcoord="R5C4S2" - 子行级别(用于多行单元格) + */ +function addHighlightToHtml( + html: string, + highlightedCell: string | null, + issues: ForensicsIssue[] +): string { + let result = html; + + // 给有问题的元素添加 has-issue 类 + for (const issue of issues) { + if (issue.location?.cellRef) { + const cellRef = issue.location.cellRef; + + // 检查是否包含子行坐标 (如 R5C4S2) + if (cellRef.includes('S')) { + // 子行级别高亮:匹配 data-subcoord + result = result.replace( + new RegExp(`data-subcoord="${cellRef}"`, 'g'), + `data-subcoord="${cellRef}" class="has-issue"` + ); + } else { + // 单元格级别高亮:匹配 data-coord(向后兼容) + result = result.replace( + new RegExp(`data-coord="${cellRef}"(?![S\\d])`, 'g'), + `data-coord="${cellRef}" class="has-issue"` + ); + } + } + } + + // 给用户点击高亮的元素添加 highlighted 类 + if (highlightedCell) { + if (highlightedCell.includes('S')) { + result = result.replace( + new RegExp(`data-subcoord="${highlightedCell}"(\\s+class="[^"]*")?`, 'g'), + (match, existingClass) => { + if (existingClass) { + return match.replace('class="', 'class="highlighted '); + } + return `data-subcoord="${highlightedCell}" class="highlighted"`; + } + ); + } else { + result = result.replace( + new RegExp(`data-coord="${highlightedCell}"(\\s+class="[^"]*")?`, 'g'), + (match, existingClass) => { + if (existingClass) { + return match.replace('class="', 'class="highlighted '); + } + return `data-coord="${highlightedCell}" class="highlighted"`; + } + ); + } + } + + return result; +} diff --git a/frontend-v2/src/modules/rvw/components/Header.tsx b/frontend-v2/src/modules/rvw/components/Header.tsx index 055c5321..f95c0bbb 100644 --- a/frontend-v2/src/modules/rvw/components/Header.tsx +++ b/frontend-v2/src/modules/rvw/components/Header.tsx @@ -1,8 +1,8 @@ /** * Dashboard头部组件 */ -import { useRef } from 'react'; -import { BrainCircuit, UploadCloud } from 'lucide-react'; +import { useRef, useState } from 'react'; +import { BrainCircuit, UploadCloud, Info, X } from 'lucide-react'; interface HeaderProps { onUpload: (files: FileList) => void; @@ -10,6 +10,7 @@ interface HeaderProps { export default function Header({ onUpload }: HeaderProps) { const fileInputRef = useRef(null); + const [showTip, setShowTip] = useState(true); const handleFileChange = (e: React.ChangeEvent) => { if (e.target.files && e.target.files.length > 0) { @@ -20,36 +21,56 @@ export default function Header({ onUpload }: HeaderProps) { }; return ( -
- {/* Logo区域 */} -
-
- +
+
+ {/* Logo区域 */} +
+
+ +
+
+

智能审稿系统

+

当前工作区:编辑部初审组

+
-
-

智能审稿系统

-

当前工作区:编辑部初审组

+ + {/* 上传按钮 */} +
+ +
- {/* 上传按钮 */} -
- - -
+ {/* 文件格式提示 */} + {showTip && ( +
+ +
+ 推荐上传 .docx 格式文件 + ,可获得完整的数据验证功能(表格算术校验、P值验证等)。 + PDF 和 .doc 格式仅支持稿约和方法学评审。 +
+ +
+ )}
); } diff --git a/frontend-v2/src/modules/rvw/components/MethodologyReport.tsx b/frontend-v2/src/modules/rvw/components/MethodologyReport.tsx index 25a08c71..12ac76a0 100644 --- a/frontend-v2/src/modules/rvw/components/MethodologyReport.tsx +++ b/frontend-v2/src/modules/rvw/components/MethodologyReport.tsx @@ -47,7 +47,7 @@ export default function MethodologyReport({ data }: MethodologyReportProps) {
- {data.overall_score} + {Number(data.overall_score).toFixed(1)}
diff --git a/frontend-v2/src/modules/rvw/components/ReportDetail.tsx b/frontend-v2/src/modules/rvw/components/ReportDetail.tsx index 0c18313b..4363ed15 100644 --- a/frontend-v2/src/modules/rvw/components/ReportDetail.tsx +++ b/frontend-v2/src/modules/rvw/components/ReportDetail.tsx @@ -2,24 +2,45 @@ * 报告详情页组件 */ import { useState } from 'react'; -import { ArrowLeft, FileCheck, Tag } from 'lucide-react'; +import { ArrowLeft, FileCheck, Tag, Info } from 'lucide-react'; import type { ReviewReport } from '../types'; import EditorialReport from './EditorialReport'; import MethodologyReport from './MethodologyReport'; +import ForensicsReport from './ForensicsReport'; interface ReportDetailProps { report: ReviewReport; onBack: () => void; } +type TabType = 'editorial' | 'methodology' | 'forensics'; + export default function ReportDetail({ report, onBack }: ReportDetailProps) { - const [activeTab, setActiveTab] = useState<'editorial' | 'methodology'>('editorial'); + const [activeTab, setActiveTab] = useState('editorial'); const hasEditorial = !!report.editorialReview; const hasMethodology = !!report.methodologyReview; + const hasForensics = !!report.forensicsResult; + + // 检查文件格式:非 .docx 文件无法进行数据验证 + const fileName = report.fileName || ''; + const isDocx = fileName.toLowerCase().endsWith('.docx'); + const isPdf = fileName.toLowerCase().endsWith('.pdf'); + const isDoc = fileName.toLowerCase().endsWith('.doc'); + const showNoForensicsTip = !hasForensics && (hasEditorial || hasMethodology) && (isPdf || isDoc); - // 如果只有方法学,默认显示方法学 - const effectiveTab = activeTab === 'editorial' && !hasEditorial && hasMethodology ? 'methodology' : activeTab; + // 智能默认 Tab 选择 + const getEffectiveTab = (): TabType => { + if (activeTab === 'editorial' && hasEditorial) return 'editorial'; + if (activeTab === 'methodology' && hasMethodology) return 'methodology'; + if (activeTab === 'forensics' && hasForensics) return 'forensics'; + // 默认优先级:editorial > methodology > forensics + if (hasEditorial) return 'editorial'; + if (hasMethodology) return 'methodology'; + if (hasForensics) return 'forensics'; + return 'editorial'; + }; + const effectiveTab = getEffectiveTab(); return (
@@ -37,12 +58,12 @@ export default function ReportDetail({ report, onBack }: ReportDetailProps) {

{report.fileName} - {report.overallScore && ( + {report.overallScore != null && ( = 80 ? 'tag-green' : report.overallScore >= 60 ? 'tag-amber' : 'tag-red' }`}> - {report.overallScore}分 + {Number(report.overallScore).toFixed(1)}分 )}

@@ -59,7 +80,7 @@ export default function ReportDetail({ report, onBack }: ReportDetailProps) { {/* 内容区域 */}
{/* Tab切换 */} - {(hasEditorial || hasMethodology) && ( + {(hasEditorial || hasMethodology || hasForensics) && (
{hasEditorial && ( + )} +
+ )} + + {/* 非 docx 文件无数据验证提示 */} + {showNoForensicsTip && ( +
+ +
+ 当前文件为 {isPdf ? 'PDF' : '.doc'} 格式, + 无法进行数据验证(表格算术校验、P值验证等)。 + 如需数据验证功能,请上传 .docx 格式文件。 +
)} @@ -95,9 +140,12 @@ export default function ReportDetail({ report, onBack }: ReportDetailProps) { {effectiveTab === 'methodology' && report.methodologyReview && ( )} + {effectiveTab === 'forensics' && report.forensicsResult && ( + + )} {/* 无数据状态 */} - {!hasEditorial && !hasMethodology && ( + {!hasEditorial && !hasMethodology && !hasForensics && (

暂无评估报告

diff --git a/frontend-v2/src/modules/rvw/components/TaskDetail.tsx b/frontend-v2/src/modules/rvw/components/TaskDetail.tsx index 2814cdab..0254be08 100644 --- a/frontend-v2/src/modules/rvw/components/TaskDetail.tsx +++ b/frontend-v2/src/modules/rvw/components/TaskDetail.tsx @@ -3,15 +3,18 @@ * 支持显示审稿进度和结果 */ import { useState, useEffect } from 'react'; -import { ArrowLeft, FileCheck, Clock, AlertCircle, CheckCircle, Loader2, FileText, Bot } from 'lucide-react'; +import { ArrowLeft, FileCheck, Clock, AlertCircle, CheckCircle, Loader2, FileText, Bot, Info } from 'lucide-react'; import { Document, Packer, Paragraph, TextRun, HeadingLevel, AlignmentType, Table, TableRow, TableCell, WidthType } from 'docx'; import { saveAs } from 'file-saver'; import type { ReviewTask, ReviewReport, TaskStatus } from '../types'; import EditorialReport from './EditorialReport'; import MethodologyReport from './MethodologyReport'; +import ForensicsReport from './ForensicsReport'; import * as api from '../api'; import { message } from 'antd'; +type TabType = 'editorial' | 'methodology' | 'forensics'; + interface TaskDetailProps { task: ReviewTask; jobId?: string | null; // pg-boss 任务ID(可选,用于更精确的状态轮询) @@ -49,7 +52,7 @@ const getProgressSteps = (selectedAgents: string[]) => { export default function TaskDetail({ task: initialTask, jobId, onBack }: TaskDetailProps) { const [task, setTask] = useState(initialTask); const [report, setReport] = useState(null); - const [activeTab, setActiveTab] = useState<'editorial' | 'methodology'>('editorial'); + const [activeTab, setActiveTab] = useState('editorial'); const [elapsedTime, setElapsedTime] = useState(0); // Suppress unused variable warning - jobId is reserved for future use @@ -110,6 +113,8 @@ export default function TaskDetail({ task: initialTask, jobId, onBack }: TaskDet setActiveTab('editorial'); } else if (report.methodologyReview) { setActiveTab('methodology'); + } else if (report.forensicsResult) { + setActiveTab('forensics'); } } }, [report]); @@ -196,7 +201,7 @@ export default function TaskDetail({ task: initialTask, jobId, onBack }: TaskDet width: { size: 2000, type: WidthType.DXA }, }), new TableCell({ - children: [new Paragraph(`${report.overallScore || '-'} 分`)], + children: [new Paragraph(`${report.overallScore != null ? Number(report.overallScore).toFixed(1) : '-'} 分`)], width: { size: 7000, type: WidthType.DXA }, }), ], @@ -532,7 +537,7 @@ export default function TaskDetail({ task: initialTask, jobId, onBack }: TaskDet 审查用时 {report.durationSeconds ? formatTime(report.durationSeconds) : '-'}

-
{report.overallScore || '-'}
+
{report.overallScore != null ? Number(report.overallScore).toFixed(1) : '-'}
@@ -562,7 +567,39 @@ export default function TaskDetail({ task: initialTask, jobId, onBack }: TaskDet 方法学评估 ({report.methodologyReview.overall_score}分) )} + {report.forensicsResult && ( + + )}
+ + {/* 非 docx 文件无数据验证提示 */} + {!report.forensicsResult && (report.editorialReview || report.methodologyReview) && (() => { + const fileName = task.fileName || ''; + const isPdf = fileName.toLowerCase().endsWith('.pdf'); + const isDoc = fileName.toLowerCase().endsWith('.doc'); + if (isPdf || isDoc) { + return ( +
+ +
+ 当前文件为 {isPdf ? 'PDF' : '.doc'} 格式, + 无法进行数据验证(表格算术校验、P值验证等)。 + 如需数据验证功能,请上传 .docx 格式文件。 +
+
+ ); + } + return null; + })()} {/* 报告内容 */} {activeTab === 'editorial' && report.editorialReview && ( @@ -571,6 +608,9 @@ export default function TaskDetail({ task: initialTask, jobId, onBack }: TaskDet {activeTab === 'methodology' && report.methodologyReview && ( )} + {activeTab === 'forensics' && report.forensicsResult && ( + + )} )}
diff --git a/frontend-v2/src/modules/rvw/types/index.ts b/frontend-v2/src/modules/rvw/types/index.ts index b755f83b..f5a7a7ce 100644 --- a/frontend-v2/src/modules/rvw/types/index.ts +++ b/frontend-v2/src/modules/rvw/types/index.ts @@ -72,10 +72,51 @@ export interface MethodologyReviewResult { parts: MethodologyPart[]; } +// 数据验证问题 +export interface ForensicsIssue { + severity: 'ERROR' | 'WARNING' | 'INFO'; + type: string; + message: string; + location?: { + tableId?: string; + cellRef?: string; + paragraph?: number; + }; + evidence?: Record; +} + +// 表格数据 +export interface ForensicsTable { + id: string; + caption: string; + html: string; + data: string[][]; + headers: string[]; + rowCount: number; + colCount: number; + skipped?: boolean; + skipReason?: string; + issues: ForensicsIssue[]; +} + +// 数据验证结果 +export interface ForensicsResult { + tables: ForensicsTable[]; + methods: string[]; + issues: ForensicsIssue[]; + summary: { + totalTables: number; + totalIssues: number; + errorCount: number; + warningCount: number; + }; +} + // 完整审查报告 export interface ReviewReport extends ReviewTask { editorialReview?: EditorialReviewResult; methodologyReview?: MethodologyReviewResult; + forensicsResult?: ForensicsResult; modelUsed?: string; }