feat(asl/extraction): Complete Tool 3 M1+M2 - skeleton pipeline and HITL workbench
M1 Skeleton Pipeline: - Scatter-dispatch + Aggregator polling pattern (PgBoss) - PKB ACL bridge (PkbBridgeService -> PkbExportService DTOs) - ExtractionSingleWorker with DeepSeek-V3 LLM extraction - PermanentExtractionError for non-retryable failures - Phantom Retry Guard (idempotent worker) - 3-step minimal frontend (Setup -> Progress -> Workbench) - 4 new DB tables (extraction_templates, project_templates, tasks, results) - 3 system templates seed (RCT, Cohort, QC) - M1 integration test suite M2 HITL Workbench: - MinerU VLM integration for high-fidelity table extraction - XML-isolated DynamicPromptBuilder with flat JSON output template - fuzzyQuoteMatch validator (3-tier confidence scoring) - SSE real-time logging via ExtractionEventBus - Schema-driven ExtractionDrawer (dynamic field rendering from template) - Excel wide-table export with flattenModuleData normalization - M2 integration test suite Critical Fixes (data normalization): - DynamicPromptBuilder: explicit flat key-value output format with example - ExtractionExcelExporter: handle both array and flat data formats - ExtractionDrawer: schema-driven rendering instead of hardcoded fields - ExtractionValidator: array-format quote verification support - SSE route: Fastify register encapsulation to bypass auth for EventSource - LLM JSON sanitizer: strip illegal control chars before JSON.parse Also includes: RVW stats verification spec, SSA expert config guide Tested: M1 pipeline test + M2 HITL test + manual frontend verification Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -590,6 +590,102 @@ model AslFulltextScreeningResult {
|
||||
@@schema("asl_schema")
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════
|
||||
// ASL 工具 3:全文智能提取工作台 V2.0
|
||||
// 架构:散装派发 + 独立 Worker + Aggregator 轮询收口
|
||||
// ═══════════════════════════════════════════════════════════════
|
||||
|
||||
/// 系统内置提取模板(RCT / Cohort / QC),管理员维护,用户只读
|
||||
model AslExtractionTemplate {
|
||||
id String @id @default(uuid())
|
||||
code String @unique // RCT / Cohort / QC
|
||||
name String // 随机对照试验 / 队列研究 / 质量改进
|
||||
description String?
|
||||
baseFields Json // { metadata: [...], baseline: [...], rob: [...], outcomes_survival: [...], ... }
|
||||
isSystem Boolean @default(true) @map("is_system")
|
||||
createdAt DateTime @default(now()) @map("created_at")
|
||||
updatedAt DateTime @updatedAt @map("updated_at")
|
||||
|
||||
projectTemplates AslProjectTemplate[] @relation("BaseTemplateProjectTemplates")
|
||||
|
||||
@@map("extraction_templates")
|
||||
@@schema("asl_schema")
|
||||
}
|
||||
|
||||
/// 项目级模板(克隆自系统模板 + 用户自定义字段插槽,M3 启用自定义字段)
|
||||
model AslProjectTemplate {
|
||||
id String @id @default(uuid())
|
||||
projectId String @map("project_id")
|
||||
userId String @map("user_id")
|
||||
baseTemplateId String @map("base_template_id")
|
||||
outcomeType String @default("survival") @map("outcome_type") // survival | dichotomous | continuous
|
||||
customFields Json @default("[]") @map("custom_fields") // M3: [{name, type, prompt}]
|
||||
isLocked Boolean @default(false) @map("is_locked")
|
||||
createdAt DateTime @default(now()) @map("created_at")
|
||||
updatedAt DateTime @updatedAt @map("updated_at")
|
||||
|
||||
baseTemplate AslExtractionTemplate @relation("BaseTemplateProjectTemplates", fields: [baseTemplateId], references: [id])
|
||||
tasks AslExtractionTask[] @relation("TemplateExtractionTasks")
|
||||
|
||||
@@unique([projectId, baseTemplateId], map: "unique_extraction_project_base_template")
|
||||
@@index([projectId], map: "idx_extraction_project_templates_project_id")
|
||||
@@index([userId], map: "idx_extraction_project_templates_user_id")
|
||||
@@map("extraction_project_templates")
|
||||
@@schema("asl_schema")
|
||||
}
|
||||
|
||||
/// 提取任务(1 个任务 = 批量提取 N 篇文献),状态仅由 Aggregator 修改
|
||||
model AslExtractionTask {
|
||||
id String @id @default(uuid())
|
||||
projectId String @map("project_id")
|
||||
userId String @map("user_id")
|
||||
projectTemplateId String @map("project_template_id")
|
||||
pkbKnowledgeBaseId String @map("pkb_knowledge_base_id")
|
||||
idempotencyKey String? @unique @map("idempotency_key")
|
||||
totalCount Int @map("total_count")
|
||||
status String @default("processing") // processing | completed | failed
|
||||
createdAt DateTime @default(now()) @map("created_at")
|
||||
updatedAt DateTime @updatedAt @map("updated_at")
|
||||
completedAt DateTime? @map("completed_at")
|
||||
|
||||
projectTemplate AslProjectTemplate @relation("TemplateExtractionTasks", fields: [projectTemplateId], references: [id])
|
||||
results AslExtractionResult[] @relation("TaskExtractionResults")
|
||||
|
||||
@@index([projectId], map: "idx_extraction_tasks_project_id")
|
||||
@@index([userId], map: "idx_extraction_tasks_user_id")
|
||||
@@index([status], map: "idx_extraction_tasks_status")
|
||||
@@map("extraction_tasks")
|
||||
@@schema("asl_schema")
|
||||
}
|
||||
|
||||
/// 单篇文献提取结果,Worker 只写自己的 Result 行,绝不碰 Task 表
|
||||
model AslExtractionResult {
|
||||
id String @id @default(uuid())
|
||||
taskId String @map("task_id")
|
||||
projectId String @map("project_id")
|
||||
pkbDocumentId String @map("pkb_document_id")
|
||||
snapshotStorageKey String @map("snapshot_storage_key") // API 层冻结的 PKB OSS 路径
|
||||
snapshotFilename String @map("snapshot_filename") // API 层冻结的原始文件名
|
||||
status String @default("pending") // pending | extracting | completed | error
|
||||
extractedData Json? @map("extracted_data") // LLM 结构化提取 JSON
|
||||
quoteVerification Json? @map("quote_verification") // fuzzyQuoteMatch 三级置信度结果
|
||||
manualOverrides Json? @map("manual_overrides") // HITL 人工修改记录(M2)
|
||||
reviewStatus String @default("pending") @map("review_status") // pending | approved
|
||||
reviewedAt DateTime? @map("reviewed_at")
|
||||
errorMessage String? @map("error_message")
|
||||
processedAt DateTime? @map("processed_at")
|
||||
createdAt DateTime @default(now()) @map("created_at")
|
||||
updatedAt DateTime @updatedAt @map("updated_at")
|
||||
|
||||
task AslExtractionTask @relation("TaskExtractionResults", fields: [taskId], references: [id], onDelete: Cascade)
|
||||
|
||||
@@index([taskId, status], map: "idx_extraction_results_task_status") // Aggregator groupBy 性能保障
|
||||
@@index([taskId], map: "idx_extraction_results_task_id")
|
||||
@@index([projectId], map: "idx_extraction_results_project_id")
|
||||
@@map("extraction_results")
|
||||
@@schema("asl_schema")
|
||||
}
|
||||
|
||||
model DCHealthCheck {
|
||||
id String @id @default(uuid())
|
||||
userId String @map("user_id")
|
||||
|
||||
Reference in New Issue
Block a user