feat(iit): Implement real-time quality control system

Summary:

- Add 4 new database tables: iit_field_metadata, iit_qc_logs, iit_record_summary, iit_qc_project_stats

- Implement pg-boss debounce mechanism in WebhookController

- Refactor QC Worker for dual output: QC logs + record summary

- Enhance HardRuleEngine to support form-based rule filtering

- Create QcService for QC data queries

- Optimize ChatService with new intents: query_enrollment, query_qc_status

- Add admin batch operations: one-click full QC + one-click full summary

- Create IIT Admin management module: project config, QC rules, user mapping

Status: Code complete, pending end-to-end testing
Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
2026-02-07 21:56:11 +08:00
parent 0c590854b5
commit 5db4a7064c
74 changed files with 13383 additions and 2129 deletions

View File

@@ -4,10 +4,12 @@
> **更新日期:** 2026-02-05
> **关联文档:** [IIT Manager Agent V2.6 综合开发计划](./IIT%20Manager%20Agent%20V2.6%20综合开发计划.md)
>
> **V2.9 更新**
> **V2.9.1 更新**
> - 新增 `ProfilerService` 用户画像服务
> - `ChatService` 增加反馈循环功能
> - `SchedulerService` 支持 Cron Skill 触发
> - **新增 `AnonymizerService`**PII 脱敏中间件P0 合规必需)
> - **新增 `AutoMapperService`**REDCap Schema 自动对齐工具
---
@@ -16,6 +18,8 @@
| 服务 | 职责 | Phase |
|------|------|-------|
| `ToolsService` | 统一工具管理(字段映射 + 执行) | 1 |
| `AnonymizerService` | **PII 脱敏中间件P0 合规必需)** | 1.5 |
| `AutoMapperService` | **REDCap Schema 自动对齐** | 1 |
| `ChatService` | 消息路由(双脑入口)+ 反馈收集 | 2 |
| `IntentService` | 意图识别(混合路由) | 5 |
| `MemoryService` | 记忆管理V2.8 架构) | 2-3 |
@@ -315,6 +319,460 @@ export class ToolsService {
---
## 2.5 AnonymizerService - PII 脱敏中间件P0 合规必需)
> **文件路径**: `backend/src/modules/iit-manager/services/AnonymizerService.ts`
>
> **⚠️ 重要**:临床数据包含大量患者隐私信息,在调用第三方 LLM 之前**必须脱敏**
### 2.5.1 核心职责
- 识别文本中的 PII个人身份信息
- 发送 LLM 前脱敏Masking
- 接收 LLM 回复后还原Unmasking
- 记录脱敏审计日志
### 2.5.2 PII 识别正则库
```typescript
// PII 类型定义
const PII_PATTERNS = {
// 中文姓名2-4字排除常见非姓名词
name: /(?<![a-zA-Z\u4e00-\u9fa5])[\u4e00-\u9fa5]{2,4}(?![a-zA-Z\u4e00-\u9fa5])/g,
// 身份证号18位
id_card: /\d{6}(19|20)\d{2}(0[1-9]|1[0-2])(0[1-9]|[12]\d|3[01])\d{3}[\dXx]/g,
// 手机号11位1开头
phone: /1[3-9]\d{9}/g,
// 病历号(字母+数字组合)
mrn: /(?:MRN|HN|病案号)[:\s]?([A-Z0-9]{6,12})/gi
};
// 非姓名词排除列表(提高准确率)
const NAME_EXCLUSIONS = [
'患者', '医生', '护士', '主任', '教授', '方案', '访视',
'入组', '排除', '标准', '剂量', '疗程', '周期', '疗效'
];
```
### 2.5.3 完整实现
```typescript
import { prisma } from '../../common/prisma';
import * as crypto from 'crypto';
interface MaskingResult {
maskedText: string;
maskingMap: Record<string, string>; // { "[PATIENT_1]": "张三" }
piiCount: number;
piiTypes: string[];
}
interface UnmaskingContext {
maskingMap: Record<string, string>;
}
export class AnonymizerService {
private encryptionKey: string;
constructor() {
this.encryptionKey = process.env.PII_ENCRYPTION_KEY || 'default-key-change-me';
}
/**
* 脱敏:发送 LLM 前调用
*/
async mask(
text: string,
context: { projectId: string; userId: string; sessionId: string }
): Promise<MaskingResult> {
const maskingMap: Record<string, string> = {};
const piiTypes: string[] = [];
let maskedText = text;
let counter = { name: 0, id_card: 0, phone: 0, mrn: 0 };
// 按优先级处理(先处理身份证,再处理姓名,避免误识别)
// 1. 身份证号
maskedText = maskedText.replace(PII_PATTERNS.id_card, (match) => {
counter.id_card++;
const placeholder = `[ID_CARD_${counter.id_card}]`;
maskingMap[placeholder] = match;
if (!piiTypes.includes('id_card')) piiTypes.push('id_card');
return placeholder;
});
// 2. 手机号
maskedText = maskedText.replace(PII_PATTERNS.phone, (match) => {
counter.phone++;
const placeholder = `[PHONE_${counter.phone}]`;
maskingMap[placeholder] = match;
if (!piiTypes.includes('phone')) piiTypes.push('phone');
return placeholder;
});
// 3. 病历号
maskedText = maskedText.replace(PII_PATTERNS.mrn, (match, mrn) => {
counter.mrn++;
const placeholder = `[MRN_${counter.mrn}]`;
maskingMap[placeholder] = mrn;
if (!piiTypes.includes('mrn')) piiTypes.push('mrn');
return placeholder.padEnd(match.length);
});
// 4. 中文姓名(需要更精细的判断)
maskedText = maskedText.replace(PII_PATTERNS.name, (match) => {
// 排除非姓名词
if (NAME_EXCLUSIONS.includes(match)) return match;
// 排除已被其他规则处理的部分
if (Object.values(maskingMap).includes(match)) return match;
counter.name++;
const placeholder = `[PATIENT_${counter.name}]`;
maskingMap[placeholder] = match;
if (!piiTypes.includes('name')) piiTypes.push('name');
return placeholder;
});
const piiCount = Object.keys(maskingMap).length;
// 记录审计日志
if (piiCount > 0) {
await this.saveAuditLog({
projectId: context.projectId,
userId: context.userId,
sessionId: context.sessionId,
originalHash: this.hashText(text),
maskedPayload: maskedText,
maskingMap: this.encrypt(JSON.stringify(maskingMap)),
piiCount,
piiTypes
});
}
return { maskedText, maskingMap, piiCount, piiTypes };
}
/**
* 还原:接收 LLM 回复后调用
*/
unmask(text: string, context: UnmaskingContext): string {
let result = text;
// 将占位符替换回原始值
for (const [placeholder, original] of Object.entries(context.maskingMap)) {
result = result.replace(new RegExp(this.escapeRegex(placeholder), 'g'), original);
}
return result;
}
// ===== 辅助方法 =====
private hashText(text: string): string {
return crypto.createHash('sha256').update(text).digest('hex');
}
private encrypt(text: string): string {
const cipher = crypto.createCipheriv(
'aes-256-gcm',
crypto.scryptSync(this.encryptionKey, 'salt', 32),
crypto.randomBytes(16)
);
return cipher.update(text, 'utf8', 'hex') + cipher.final('hex');
}
private escapeRegex(str: string): string {
return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
}
private async saveAuditLog(data: {
projectId: string;
userId: string;
sessionId: string;
originalHash: string;
maskedPayload: string;
maskingMap: string;
piiCount: number;
piiTypes: string[];
}): Promise<void> {
await prisma.iitPiiAuditLog.create({
data: {
...data,
llmProvider: process.env.LLM_PROVIDER || 'qwen'
}
});
}
}
```
### 2.5.4 集成到 ChatService
```typescript
// ChatService.ts 中的使用
export class ChatService {
private anonymizer: AnonymizerService;
async handleMessage(userId: string, message: string): Promise<string> {
const projectId = await this.getUserProject(userId);
const sessionId = this.sessionMemory.getSessionId(userId);
// ⚠️ 调用 LLM 前脱敏
const { maskedText, maskingMap, piiCount } = await this.anonymizer.mask(
message,
{ projectId, userId, sessionId }
);
if (piiCount > 0) {
console.log(`[Anonymizer] 检测到 ${piiCount} 个 PII已脱敏`);
}
// 使用脱敏后的文本调用 LLM
const llmResponse = await this.llm.chat(maskedText, ...);
// ⚠️ 收到 LLM 回复后还原
const unmaskedResponse = this.anonymizer.unmask(llmResponse, { maskingMap });
return unmaskedResponse;
}
}
```
---
## 2.6 AutoMapperService - REDCap Schema 自动对齐
> **文件路径**: `backend/src/modules/iit-manager/services/AutoMapperService.ts`
>
> **目的**:大幅减少 `iit_field_mapping` 表的人工配置工作量
### 2.6.1 核心职责
- 解析 REDCap Data DictionaryCSV/JSON
- 使用 LLM 进行语义映射
- 提供管理后台确认界面
### 2.6.2 完整实现
```typescript
import { parse } from 'papaparse';
import { LLMFactory } from '../../common/llm/adapters/LLMFactory';
import { prisma } from '../../common/prisma';
interface FieldDefinition {
variableName: string;
fieldLabel: string;
fieldType: string;
choices?: string;
}
interface MappingSuggestion {
redcapField: string;
redcapLabel: string;
suggestedAlias: string[];
confidence: number;
status: 'pending' | 'confirmed' | 'rejected';
}
export class AutoMapperService {
private llm = LLMFactory.create('qwen');
// 系统标准字段列表
private readonly STANDARD_FIELDS = [
{ name: 'age', aliases: ['年龄', 'age', '岁数'] },
{ name: 'gender', aliases: ['性别', 'sex', 'gender', '男女'] },
{ name: 'ecog', aliases: ['ECOG', 'PS评分', '体力状态'] },
{ name: 'visit_date', aliases: ['访视日期', '就诊日期', 'visit date'] },
{ name: 'height', aliases: ['身高', 'height', 'ht'] },
{ name: 'weight', aliases: ['体重', 'weight', 'wt'] },
{ name: 'bmi', aliases: ['BMI', '体质指数'] },
{ name: 'consent_date', aliases: ['知情同意日期', 'ICF日期', 'consent date'] },
{ name: 'enrollment_date', aliases: ['入组日期', 'enrollment date', '入选日期'] }
];
/**
* 解析 REDCap Data Dictionary
*/
async parseDataDictionary(fileContent: string, format: 'csv' | 'json'): Promise<FieldDefinition[]> {
if (format === 'csv') {
const result = parse(fileContent, { header: true });
return result.data.map((row: any) => ({
variableName: row['Variable / Field Name'] || row['variable_name'],
fieldLabel: row['Field Label'] || row['field_label'],
fieldType: row['Field Type'] || row['field_type'],
choices: row['Choices, Calculations, OR Slider Labels'] || row['choices']
}));
} else {
return JSON.parse(fileContent);
}
}
/**
* 使用 LLM 生成映射建议
*/
async generateMappingSuggestions(
projectId: string,
fields: FieldDefinition[]
): Promise<MappingSuggestion[]> {
const prompt = `你是一个临床研究数据专家。请将以下 REDCap 字段与系统标准字段进行语义匹配。
## 系统标准字段
${this.STANDARD_FIELDS.map(f => `- ${f.name}: ${f.aliases.join(', ')}`).join('\n')}
## REDCap 字段列表
${fields.slice(0, 50).map(f => `- ${f.variableName}: ${f.fieldLabel}`).join('\n')}
请返回 JSON 格式的映射建议:
{
"mappings": [
{
"redcapField": "nl_age",
"suggestedAlias": ["age", "年龄"],
"confidence": 0.95
}
]
}
注意:
1. 只返回有把握的映射confidence >= 0.7
2. 如果不确定,不要强行映射
3. 一个 REDCap 字段可以有多个别名`;
const response = await this.llm.chat([
{ role: 'user', content: prompt }
]);
try {
const jsonMatch = response.content.match(/\{[\s\S]*\}/);
const result = JSON.parse(jsonMatch?.[0] || '{"mappings":[]}');
return result.mappings.map((m: any) => ({
redcapField: m.redcapField,
redcapLabel: fields.find(f => f.variableName === m.redcapField)?.fieldLabel || '',
suggestedAlias: m.suggestedAlias,
confidence: m.confidence,
status: 'pending' as const
}));
} catch (e) {
console.error('[AutoMapper] LLM 返回解析失败', e);
return [];
}
}
/**
* 批量确认映射
*/
async confirmMappings(
projectId: string,
confirmations: Array<{
redcapField: string;
aliases: string[];
confirmed: boolean;
}>
): Promise<{ created: number; skipped: number }> {
let created = 0;
let skipped = 0;
for (const conf of confirmations) {
if (!conf.confirmed) {
skipped++;
continue;
}
for (const alias of conf.aliases) {
try {
await prisma.iitFieldMapping.upsert({
where: {
projectId_aliasName: { projectId, aliasName: alias }
},
create: {
projectId,
aliasName: alias,
actualName: conf.redcapField
},
update: {
actualName: conf.redcapField
}
});
created++;
} catch (e) {
console.error(`[AutoMapper] 创建映射失败: ${alias} -> ${conf.redcapField}`, e);
}
}
}
return { created, skipped };
}
/**
* 一键导入流程
*/
async autoImport(
projectId: string,
fileContent: string,
format: 'csv' | 'json'
): Promise<{
suggestions: MappingSuggestion[];
message: string;
}> {
// 1. 解析 Data Dictionary
const fields = await this.parseDataDictionary(fileContent, format);
console.log(`[AutoMapper] 解析到 ${fields.length} 个字段`);
// 2. 生成 LLM 建议
const suggestions = await this.generateMappingSuggestions(projectId, fields);
console.log(`[AutoMapper] 生成 ${suggestions.length} 个映射建议`);
return {
suggestions,
message: `已解析 ${fields.length} 个 REDCap 字段,生成 ${suggestions.length} 个映射建议,请在管理后台确认。`
};
}
}
```
### 2.6.3 管理后台 API
```typescript
// routes/autoMapperRoutes.ts
router.post('/auto-mapper/import', async (req, res) => {
const { projectId, fileContent, format } = req.body;
const result = await autoMapperService.autoImport(projectId, fileContent, format);
res.json({
success: true,
suggestions: result.suggestions,
message: result.message
});
});
router.post('/auto-mapper/confirm', async (req, res) => {
const { projectId, confirmations } = req.body;
const result = await autoMapperService.confirmMappings(projectId, confirmations);
res.json({
success: true,
created: result.created,
skipped: result.skipped,
message: `已创建 ${result.created} 个映射,跳过 ${result.skipped}`
});
});
```
### 2.6.4 效率对比
| 配置方式 | 100 个字段耗时 | 准确率 |
|----------|---------------|--------|
| 手动逐条配置 | 2-4 小时 | 100%(人工保证) |
| LLM 猜测 + 人工确认 | 15-30 分钟 | 95%LLM猜测→ 100%(人工确认) |
---
## 3. IntentService - 意图识别
> **文件路径**: `backend/src/modules/iit-manager/services/IntentService.ts`