Files
AIclinicalresearch/backend/prisma/seed-extraction-templates.ts
HaHafeng f0736dbca1 feat(asl/extraction): Complete Tool 3 M1+M2 - skeleton pipeline and HITL workbench
M1 Skeleton Pipeline:
- Scatter-dispatch + Aggregator polling pattern (PgBoss)
- PKB ACL bridge (PkbBridgeService -> PkbExportService DTOs)
- ExtractionSingleWorker with DeepSeek-V3 LLM extraction
- PermanentExtractionError for non-retryable failures
- Phantom Retry Guard (idempotent worker)
- 3-step minimal frontend (Setup -> Progress -> Workbench)
- 4 new DB tables (extraction_templates, project_templates, tasks, results)
- 3 system templates seed (RCT, Cohort, QC)
- M1 integration test suite

M2 HITL Workbench:
- MinerU VLM integration for high-fidelity table extraction
- XML-isolated DynamicPromptBuilder with flat JSON output template
- fuzzyQuoteMatch validator (3-tier confidence scoring)
- SSE real-time logging via ExtractionEventBus
- Schema-driven ExtractionDrawer (dynamic field rendering from template)
- Excel wide-table export with flattenModuleData normalization
- M2 integration test suite

Critical Fixes (data normalization):
- DynamicPromptBuilder: explicit flat key-value output format with example
- ExtractionExcelExporter: handle both array and flat data formats
- ExtractionDrawer: schema-driven rendering instead of hardcoded fields
- ExtractionValidator: array-format quote verification support
- SSE route: Fastify register encapsulation to bypass auth for EventSource
- LLM JSON sanitizer: strip illegal control chars before JSON.parse

Also includes: RVW stats verification spec, SSA expert config guide

Tested: M1 pipeline test + M2 HITL test + manual frontend verification
Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-25 18:29:20 +08:00

183 lines
11 KiB
TypeScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import { PrismaClient } from '@prisma/client';
const prisma = new PrismaClient();
/**
* ASL 工具 3 系统内置提取模板 Seed
* 3 套模板RCT / Cohort / QC
* 字段定义来自《ASL 工具 3 全文提取数据字典与规范》
*/
const SYSTEM_TEMPLATES = [
{
code: 'RCT',
name: '随机对照试验 (RCT)',
description: '适用于随机对照试验文献的结构化数据提取包含基线特征、RoB 2.0 偏倚风险评估和多种结局指标类型',
baseFields: {
metadata: [
{ key: 'study_id', label: '研究标识', type: 'string', description: '第一作者+年份,如 Gandhi 2018' },
{ key: 'nct_number', label: '临床试验注册号', type: 'string', description: 'ClinicalTrials.gov 注册号' },
{ key: 'study_design', label: '研究设计类型', type: 'string', description: '如 RCT, Phase III RCT' },
{ key: 'funding_source', label: '资金来源', type: 'string', description: '资助方与利益冲突声明' },
],
baseline: [
{ key: 'treatment_name', label: '实验组干预', type: 'string', description: '含剂量/频次' },
{ key: 'control_name', label: '对照组干预', type: 'string', description: '如 Placebo' },
{ key: 'n_treatment', label: '实验组样本量', type: 'integer', description: 'Table 1 中 N=xxx' },
{ key: 'n_control', label: '对照组样本量', type: 'integer', description: 'Table 1 中 N=xxx' },
{ key: 'age_treatment', label: '实验组年龄', type: 'string', description: 'Mean±SD 或 Median(IQR)' },
{ key: 'age_control', label: '对照组年龄', type: 'string', description: 'Mean±SD 或 Median(IQR)' },
{ key: 'male_percent', label: '男性比例(%)', type: 'string', description: '整体或分组' },
],
rob: [
{ key: 'rob_randomization', label: '随机序列产生', type: 'string', description: 'Low/High/Unclear Risk' },
{ key: 'rob_allocation', label: '分配隐藏', type: 'string', description: 'Low/High/Unclear Risk' },
{ key: 'rob_blinding', label: '盲法实施', type: 'string', description: 'Low/High/Unclear Risk' },
{ key: 'rob_attrition', label: '失访与数据完整性', type: 'string', description: 'Low/High/Unclear Risk' },
],
outcomes_survival: [
{ key: 'endpoint_name', label: '终点名称', type: 'string', description: '如 OS, PFS, MACE' },
{ key: 'hr_value', label: '风险比 (HR)', type: 'number', description: 'Hazard Ratio' },
{ key: 'hr_ci_lower', label: '95%CI 下限', type: 'number', description: '' },
{ key: 'hr_ci_upper', label: '95%CI 上限', type: 'number', description: '' },
{ key: 'p_value', label: 'P 值', type: 'string', description: '如 <0.001 或 0.032' },
],
outcomes_dichotomous: [
{ key: 'event_treatment', label: '实验组事件数', type: 'integer', description: '发生事件的具体人数' },
{ key: 'total_treatment', label: '实验组分析总人数', type: 'integer', description: '可能与基线总人数不同' },
{ key: 'event_control', label: '对照组事件数', type: 'integer', description: '' },
{ key: 'total_control', label: '对照组分析总人数', type: 'integer', description: '' },
],
outcomes_continuous: [
{ key: 'mean_treatment', label: '实验组均值', type: 'number', description: '' },
{ key: 'sd_treatment', label: '实验组标准差', type: 'number', description: 'SD若原文为 SE/CI 需换算' },
{ key: 'n_treatment_outcome', label: '实验组分析人数', type: 'integer', description: '' },
{ key: 'mean_control', label: '对照组均值', type: 'number', description: '' },
{ key: 'sd_control', label: '对照组标准差', type: 'number', description: '' },
{ key: 'n_control_outcome', label: '对照组分析人数', type: 'integer', description: '' },
],
},
},
{
code: 'Cohort',
name: '队列研究 (Cohort)',
description: '适用于前瞻性/回顾性队列研究,基线特征与 RCT 类似但无随机化相关偏倚评估',
baseFields: {
metadata: [
{ key: 'study_id', label: '研究标识', type: 'string', description: '第一作者+年份' },
{ key: 'study_design', label: '研究设计类型', type: 'string', description: '如 Prospective Cohort, Retrospective Cohort' },
{ key: 'funding_source', label: '资金来源', type: 'string', description: '' },
{ key: 'follow_up_duration', label: '随访时长', type: 'string', description: '如 Median 5.2 years' },
],
baseline: [
{ key: 'exposure_group', label: '暴露组', type: 'string', description: '暴露因素描述' },
{ key: 'control_group', label: '非暴露组/对照组', type: 'string', description: '' },
{ key: 'n_exposure', label: '暴露组样本量', type: 'integer', description: '' },
{ key: 'n_control', label: '对照组样本量', type: 'integer', description: '' },
{ key: 'age_exposure', label: '暴露组年龄', type: 'string', description: '' },
{ key: 'age_control', label: '对照组年龄', type: 'string', description: '' },
{ key: 'male_percent', label: '男性比例(%)', type: 'string', description: '' },
],
rob: [
{ key: 'rob_selection', label: '选择偏倚', type: 'string', description: 'NOS: 代表性、非暴露组选择、暴露确定' },
{ key: 'rob_comparability', label: '可比性', type: 'string', description: 'NOS: 混杂因素控制' },
{ key: 'rob_outcome', label: '结局评估', type: 'string', description: 'NOS: 结局评估、随访充分性' },
],
outcomes_survival: [
{ key: 'endpoint_name', label: '终点名称', type: 'string', description: '' },
{ key: 'hr_value', label: '风险比 (HR)', type: 'number', description: '调整后 HR' },
{ key: 'hr_ci_lower', label: '95%CI 下限', type: 'number', description: '' },
{ key: 'hr_ci_upper', label: '95%CI 上限', type: 'number', description: '' },
{ key: 'p_value', label: 'P 值', type: 'string', description: '' },
],
outcomes_dichotomous: [
{ key: 'event_treatment', label: '暴露组事件数', type: 'integer', description: '' },
{ key: 'total_treatment', label: '暴露组总人数', type: 'integer', description: '' },
{ key: 'event_control', label: '对照组事件数', type: 'integer', description: '' },
{ key: 'total_control', label: '对照组总人数', type: 'integer', description: '' },
],
outcomes_continuous: [
{ key: 'mean_treatment', label: '暴露组均值', type: 'number', description: '' },
{ key: 'sd_treatment', label: '暴露组标准差', type: 'number', description: '' },
{ key: 'n_treatment_outcome', label: '暴露组分析人数', type: 'integer', description: '' },
{ key: 'mean_control', label: '对照组均值', type: 'number', description: '' },
{ key: 'sd_control', label: '对照组标准差', type: 'number', description: '' },
{ key: 'n_control_outcome', label: '对照组分析人数', type: 'integer', description: '' },
],
},
},
{
code: 'QC',
name: '质量改进研究 (QI/QC)',
description: '适用于质量改进研究,关注干预前后的指标变化,偏倚评估采用 ROBINS-I 简化版',
baseFields: {
metadata: [
{ key: 'study_id', label: '研究标识', type: 'string', description: '第一作者+年份' },
{ key: 'study_design', label: '研究设计类型', type: 'string', description: '如 Before-After, ITS' },
{ key: 'setting', label: '研究场景', type: 'string', description: '如 ICU, Emergency Department' },
{ key: 'funding_source', label: '资金来源', type: 'string', description: '' },
],
baseline: [
{ key: 'intervention_name', label: 'QI 干预措施', type: 'string', description: '质量改进措施描述' },
{ key: 'comparator', label: '对照/基线', type: 'string', description: '如 Pre-intervention period' },
{ key: 'n_intervention', label: '干预组样本量', type: 'integer', description: '' },
{ key: 'n_comparator', label: '对照组样本量', type: 'integer', description: '' },
{ key: 'duration_pre', label: '干预前观察期', type: 'string', description: '' },
{ key: 'duration_post', label: '干预后观察期', type: 'string', description: '' },
],
rob: [
{ key: 'rob_confounding', label: '混杂偏倚', type: 'string', description: 'ROBINS-I 简化' },
{ key: 'rob_measurement', label: '测量偏倚', type: 'string', description: '结局指标测量方法是否一致' },
{ key: 'rob_reporting', label: '报告偏倚', type: 'string', description: '是否选择性报告' },
],
outcomes_dichotomous: [
{ key: 'event_treatment', label: '干预后事件数', type: 'integer', description: '' },
{ key: 'total_treatment', label: '干预后总人数', type: 'integer', description: '' },
{ key: 'event_control', label: '干预前事件数', type: 'integer', description: '' },
{ key: 'total_control', label: '干预前总人数', type: 'integer', description: '' },
],
outcomes_continuous: [
{ key: 'mean_treatment', label: '干预后均值', type: 'number', description: '' },
{ key: 'sd_treatment', label: '干预后标准差', type: 'number', description: '' },
{ key: 'n_treatment_outcome', label: '干预后分析人数', type: 'integer', description: '' },
{ key: 'mean_control', label: '干预前均值', type: 'number', description: '' },
{ key: 'sd_control', label: '干预前标准差', type: 'number', description: '' },
{ key: 'n_control_outcome', label: '干预前分析人数', type: 'integer', description: '' },
],
},
},
];
async function main() {
console.log('🌱 ASL 工具 3注入系统内置提取模板...\n');
for (const template of SYSTEM_TEMPLATES) {
const result = await prisma.aslExtractionTemplate.upsert({
where: { code: template.code },
update: {
name: template.name,
description: template.description,
baseFields: template.baseFields,
},
create: {
code: template.code,
name: template.name,
description: template.description,
baseFields: template.baseFields,
isSystem: true,
},
});
console.log(`${result.code}: ${result.name}`);
}
const count = await prisma.aslExtractionTemplate.count();
console.log(`\n🎉 完成!共 ${count} 套系统模板。`);
}
main()
.then(() => prisma.$disconnect())
.catch(async (e) => {
console.error('❌ Seed 失败:', e);
await prisma.$disconnect();
process.exit(1);
});