feat(iit): Implement real-time quality control system
Summary: - Add 4 new database tables: iit_field_metadata, iit_qc_logs, iit_record_summary, iit_qc_project_stats - Implement pg-boss debounce mechanism in WebhookController - Refactor QC Worker for dual output: QC logs + record summary - Enhance HardRuleEngine to support form-based rule filtering - Create QcService for QC data queries - Optimize ChatService with new intents: query_enrollment, query_qc_status - Add admin batch operations: one-click full QC + one-click full summary - Create IIT Admin management module: project config, QC rules, user mapping Status: Code complete, pending end-to-end testing Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -4,10 +4,12 @@
|
||||
> **更新日期:** 2026-02-05
|
||||
> **关联文档:** [IIT Manager Agent V2.6 综合开发计划](./IIT%20Manager%20Agent%20V2.6%20综合开发计划.md)
|
||||
>
|
||||
> **V2.9 更新**:
|
||||
> **V2.9.1 更新**:
|
||||
> - 新增 `ProfilerService` 用户画像服务
|
||||
> - `ChatService` 增加反馈循环功能
|
||||
> - `SchedulerService` 支持 Cron Skill 触发
|
||||
> - **新增 `AnonymizerService`**:PII 脱敏中间件(P0 合规必需)
|
||||
> - **新增 `AutoMapperService`**:REDCap Schema 自动对齐工具
|
||||
|
||||
---
|
||||
|
||||
@@ -16,6 +18,8 @@
|
||||
| 服务 | 职责 | Phase |
|
||||
|------|------|-------|
|
||||
| `ToolsService` | 统一工具管理(字段映射 + 执行) | 1 |
|
||||
| `AnonymizerService` | **PII 脱敏中间件(P0 合规必需)** | 1.5 |
|
||||
| `AutoMapperService` | **REDCap Schema 自动对齐** | 1 |
|
||||
| `ChatService` | 消息路由(双脑入口)+ 反馈收集 | 2 |
|
||||
| `IntentService` | 意图识别(混合路由) | 5 |
|
||||
| `MemoryService` | 记忆管理(V2.8 架构) | 2-3 |
|
||||
@@ -315,6 +319,460 @@ export class ToolsService {
|
||||
|
||||
---
|
||||
|
||||
## 2.5 AnonymizerService - PII 脱敏中间件(P0 合规必需)
|
||||
|
||||
> **文件路径**: `backend/src/modules/iit-manager/services/AnonymizerService.ts`
|
||||
>
|
||||
> **⚠️ 重要**:临床数据包含大量患者隐私信息,在调用第三方 LLM 之前**必须脱敏**!
|
||||
|
||||
### 2.5.1 核心职责
|
||||
|
||||
- 识别文本中的 PII(个人身份信息)
|
||||
- 发送 LLM 前脱敏(Masking)
|
||||
- 接收 LLM 回复后还原(Unmasking)
|
||||
- 记录脱敏审计日志
|
||||
|
||||
### 2.5.2 PII 识别正则库
|
||||
|
||||
```typescript
|
||||
// PII 类型定义
|
||||
const PII_PATTERNS = {
|
||||
// 中文姓名(2-4字,排除常见非姓名词)
|
||||
name: /(?<![a-zA-Z\u4e00-\u9fa5])[\u4e00-\u9fa5]{2,4}(?![a-zA-Z\u4e00-\u9fa5])/g,
|
||||
|
||||
// 身份证号(18位)
|
||||
id_card: /\d{6}(19|20)\d{2}(0[1-9]|1[0-2])(0[1-9]|[12]\d|3[01])\d{3}[\dXx]/g,
|
||||
|
||||
// 手机号(11位,1开头)
|
||||
phone: /1[3-9]\d{9}/g,
|
||||
|
||||
// 病历号(字母+数字组合)
|
||||
mrn: /(?:MRN|HN|病案号)[::\s]?([A-Z0-9]{6,12})/gi
|
||||
};
|
||||
|
||||
// 非姓名词排除列表(提高准确率)
|
||||
const NAME_EXCLUSIONS = [
|
||||
'患者', '医生', '护士', '主任', '教授', '方案', '访视',
|
||||
'入组', '排除', '标准', '剂量', '疗程', '周期', '疗效'
|
||||
];
|
||||
```
|
||||
|
||||
### 2.5.3 完整实现
|
||||
|
||||
```typescript
|
||||
import { prisma } from '../../common/prisma';
|
||||
import * as crypto from 'crypto';
|
||||
|
||||
interface MaskingResult {
|
||||
maskedText: string;
|
||||
maskingMap: Record<string, string>; // { "[PATIENT_1]": "张三" }
|
||||
piiCount: number;
|
||||
piiTypes: string[];
|
||||
}
|
||||
|
||||
interface UnmaskingContext {
|
||||
maskingMap: Record<string, string>;
|
||||
}
|
||||
|
||||
export class AnonymizerService {
|
||||
private encryptionKey: string;
|
||||
|
||||
constructor() {
|
||||
this.encryptionKey = process.env.PII_ENCRYPTION_KEY || 'default-key-change-me';
|
||||
}
|
||||
|
||||
/**
|
||||
* 脱敏:发送 LLM 前调用
|
||||
*/
|
||||
async mask(
|
||||
text: string,
|
||||
context: { projectId: string; userId: string; sessionId: string }
|
||||
): Promise<MaskingResult> {
|
||||
const maskingMap: Record<string, string> = {};
|
||||
const piiTypes: string[] = [];
|
||||
let maskedText = text;
|
||||
let counter = { name: 0, id_card: 0, phone: 0, mrn: 0 };
|
||||
|
||||
// 按优先级处理(先处理身份证,再处理姓名,避免误识别)
|
||||
|
||||
// 1. 身份证号
|
||||
maskedText = maskedText.replace(PII_PATTERNS.id_card, (match) => {
|
||||
counter.id_card++;
|
||||
const placeholder = `[ID_CARD_${counter.id_card}]`;
|
||||
maskingMap[placeholder] = match;
|
||||
if (!piiTypes.includes('id_card')) piiTypes.push('id_card');
|
||||
return placeholder;
|
||||
});
|
||||
|
||||
// 2. 手机号
|
||||
maskedText = maskedText.replace(PII_PATTERNS.phone, (match) => {
|
||||
counter.phone++;
|
||||
const placeholder = `[PHONE_${counter.phone}]`;
|
||||
maskingMap[placeholder] = match;
|
||||
if (!piiTypes.includes('phone')) piiTypes.push('phone');
|
||||
return placeholder;
|
||||
});
|
||||
|
||||
// 3. 病历号
|
||||
maskedText = maskedText.replace(PII_PATTERNS.mrn, (match, mrn) => {
|
||||
counter.mrn++;
|
||||
const placeholder = `[MRN_${counter.mrn}]`;
|
||||
maskingMap[placeholder] = mrn;
|
||||
if (!piiTypes.includes('mrn')) piiTypes.push('mrn');
|
||||
return placeholder.padEnd(match.length);
|
||||
});
|
||||
|
||||
// 4. 中文姓名(需要更精细的判断)
|
||||
maskedText = maskedText.replace(PII_PATTERNS.name, (match) => {
|
||||
// 排除非姓名词
|
||||
if (NAME_EXCLUSIONS.includes(match)) return match;
|
||||
// 排除已被其他规则处理的部分
|
||||
if (Object.values(maskingMap).includes(match)) return match;
|
||||
|
||||
counter.name++;
|
||||
const placeholder = `[PATIENT_${counter.name}]`;
|
||||
maskingMap[placeholder] = match;
|
||||
if (!piiTypes.includes('name')) piiTypes.push('name');
|
||||
return placeholder;
|
||||
});
|
||||
|
||||
const piiCount = Object.keys(maskingMap).length;
|
||||
|
||||
// 记录审计日志
|
||||
if (piiCount > 0) {
|
||||
await this.saveAuditLog({
|
||||
projectId: context.projectId,
|
||||
userId: context.userId,
|
||||
sessionId: context.sessionId,
|
||||
originalHash: this.hashText(text),
|
||||
maskedPayload: maskedText,
|
||||
maskingMap: this.encrypt(JSON.stringify(maskingMap)),
|
||||
piiCount,
|
||||
piiTypes
|
||||
});
|
||||
}
|
||||
|
||||
return { maskedText, maskingMap, piiCount, piiTypes };
|
||||
}
|
||||
|
||||
/**
|
||||
* 还原:接收 LLM 回复后调用
|
||||
*/
|
||||
unmask(text: string, context: UnmaskingContext): string {
|
||||
let result = text;
|
||||
|
||||
// 将占位符替换回原始值
|
||||
for (const [placeholder, original] of Object.entries(context.maskingMap)) {
|
||||
result = result.replace(new RegExp(this.escapeRegex(placeholder), 'g'), original);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// ===== 辅助方法 =====
|
||||
|
||||
private hashText(text: string): string {
|
||||
return crypto.createHash('sha256').update(text).digest('hex');
|
||||
}
|
||||
|
||||
private encrypt(text: string): string {
|
||||
const cipher = crypto.createCipheriv(
|
||||
'aes-256-gcm',
|
||||
crypto.scryptSync(this.encryptionKey, 'salt', 32),
|
||||
crypto.randomBytes(16)
|
||||
);
|
||||
return cipher.update(text, 'utf8', 'hex') + cipher.final('hex');
|
||||
}
|
||||
|
||||
private escapeRegex(str: string): string {
|
||||
return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
||||
}
|
||||
|
||||
private async saveAuditLog(data: {
|
||||
projectId: string;
|
||||
userId: string;
|
||||
sessionId: string;
|
||||
originalHash: string;
|
||||
maskedPayload: string;
|
||||
maskingMap: string;
|
||||
piiCount: number;
|
||||
piiTypes: string[];
|
||||
}): Promise<void> {
|
||||
await prisma.iitPiiAuditLog.create({
|
||||
data: {
|
||||
...data,
|
||||
llmProvider: process.env.LLM_PROVIDER || 'qwen'
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 2.5.4 集成到 ChatService
|
||||
|
||||
```typescript
|
||||
// ChatService.ts 中的使用
|
||||
export class ChatService {
|
||||
private anonymizer: AnonymizerService;
|
||||
|
||||
async handleMessage(userId: string, message: string): Promise<string> {
|
||||
const projectId = await this.getUserProject(userId);
|
||||
const sessionId = this.sessionMemory.getSessionId(userId);
|
||||
|
||||
// ⚠️ 调用 LLM 前脱敏
|
||||
const { maskedText, maskingMap, piiCount } = await this.anonymizer.mask(
|
||||
message,
|
||||
{ projectId, userId, sessionId }
|
||||
);
|
||||
|
||||
if (piiCount > 0) {
|
||||
console.log(`[Anonymizer] 检测到 ${piiCount} 个 PII,已脱敏`);
|
||||
}
|
||||
|
||||
// 使用脱敏后的文本调用 LLM
|
||||
const llmResponse = await this.llm.chat(maskedText, ...);
|
||||
|
||||
// ⚠️ 收到 LLM 回复后还原
|
||||
const unmaskedResponse = this.anonymizer.unmask(llmResponse, { maskingMap });
|
||||
|
||||
return unmaskedResponse;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 2.6 AutoMapperService - REDCap Schema 自动对齐
|
||||
|
||||
> **文件路径**: `backend/src/modules/iit-manager/services/AutoMapperService.ts`
|
||||
>
|
||||
> **目的**:大幅减少 `iit_field_mapping` 表的人工配置工作量
|
||||
|
||||
### 2.6.1 核心职责
|
||||
|
||||
- 解析 REDCap Data Dictionary(CSV/JSON)
|
||||
- 使用 LLM 进行语义映射
|
||||
- 提供管理后台确认界面
|
||||
|
||||
### 2.6.2 完整实现
|
||||
|
||||
```typescript
|
||||
import { parse } from 'papaparse';
|
||||
import { LLMFactory } from '../../common/llm/adapters/LLMFactory';
|
||||
import { prisma } from '../../common/prisma';
|
||||
|
||||
interface FieldDefinition {
|
||||
variableName: string;
|
||||
fieldLabel: string;
|
||||
fieldType: string;
|
||||
choices?: string;
|
||||
}
|
||||
|
||||
interface MappingSuggestion {
|
||||
redcapField: string;
|
||||
redcapLabel: string;
|
||||
suggestedAlias: string[];
|
||||
confidence: number;
|
||||
status: 'pending' | 'confirmed' | 'rejected';
|
||||
}
|
||||
|
||||
export class AutoMapperService {
|
||||
private llm = LLMFactory.create('qwen');
|
||||
|
||||
// 系统标准字段列表
|
||||
private readonly STANDARD_FIELDS = [
|
||||
{ name: 'age', aliases: ['年龄', 'age', '岁数'] },
|
||||
{ name: 'gender', aliases: ['性别', 'sex', 'gender', '男女'] },
|
||||
{ name: 'ecog', aliases: ['ECOG', 'PS评分', '体力状态'] },
|
||||
{ name: 'visit_date', aliases: ['访视日期', '就诊日期', 'visit date'] },
|
||||
{ name: 'height', aliases: ['身高', 'height', 'ht'] },
|
||||
{ name: 'weight', aliases: ['体重', 'weight', 'wt'] },
|
||||
{ name: 'bmi', aliases: ['BMI', '体质指数'] },
|
||||
{ name: 'consent_date', aliases: ['知情同意日期', 'ICF日期', 'consent date'] },
|
||||
{ name: 'enrollment_date', aliases: ['入组日期', 'enrollment date', '入选日期'] }
|
||||
];
|
||||
|
||||
/**
|
||||
* 解析 REDCap Data Dictionary
|
||||
*/
|
||||
async parseDataDictionary(fileContent: string, format: 'csv' | 'json'): Promise<FieldDefinition[]> {
|
||||
if (format === 'csv') {
|
||||
const result = parse(fileContent, { header: true });
|
||||
return result.data.map((row: any) => ({
|
||||
variableName: row['Variable / Field Name'] || row['variable_name'],
|
||||
fieldLabel: row['Field Label'] || row['field_label'],
|
||||
fieldType: row['Field Type'] || row['field_type'],
|
||||
choices: row['Choices, Calculations, OR Slider Labels'] || row['choices']
|
||||
}));
|
||||
} else {
|
||||
return JSON.parse(fileContent);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 使用 LLM 生成映射建议
|
||||
*/
|
||||
async generateMappingSuggestions(
|
||||
projectId: string,
|
||||
fields: FieldDefinition[]
|
||||
): Promise<MappingSuggestion[]> {
|
||||
const prompt = `你是一个临床研究数据专家。请将以下 REDCap 字段与系统标准字段进行语义匹配。
|
||||
|
||||
## 系统标准字段
|
||||
${this.STANDARD_FIELDS.map(f => `- ${f.name}: ${f.aliases.join(', ')}`).join('\n')}
|
||||
|
||||
## REDCap 字段列表
|
||||
${fields.slice(0, 50).map(f => `- ${f.variableName}: ${f.fieldLabel}`).join('\n')}
|
||||
|
||||
请返回 JSON 格式的映射建议:
|
||||
{
|
||||
"mappings": [
|
||||
{
|
||||
"redcapField": "nl_age",
|
||||
"suggestedAlias": ["age", "年龄"],
|
||||
"confidence": 0.95
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
注意:
|
||||
1. 只返回有把握的映射(confidence >= 0.7)
|
||||
2. 如果不确定,不要强行映射
|
||||
3. 一个 REDCap 字段可以有多个别名`;
|
||||
|
||||
const response = await this.llm.chat([
|
||||
{ role: 'user', content: prompt }
|
||||
]);
|
||||
|
||||
try {
|
||||
const jsonMatch = response.content.match(/\{[\s\S]*\}/);
|
||||
const result = JSON.parse(jsonMatch?.[0] || '{"mappings":[]}');
|
||||
|
||||
return result.mappings.map((m: any) => ({
|
||||
redcapField: m.redcapField,
|
||||
redcapLabel: fields.find(f => f.variableName === m.redcapField)?.fieldLabel || '',
|
||||
suggestedAlias: m.suggestedAlias,
|
||||
confidence: m.confidence,
|
||||
status: 'pending' as const
|
||||
}));
|
||||
} catch (e) {
|
||||
console.error('[AutoMapper] LLM 返回解析失败', e);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 批量确认映射
|
||||
*/
|
||||
async confirmMappings(
|
||||
projectId: string,
|
||||
confirmations: Array<{
|
||||
redcapField: string;
|
||||
aliases: string[];
|
||||
confirmed: boolean;
|
||||
}>
|
||||
): Promise<{ created: number; skipped: number }> {
|
||||
let created = 0;
|
||||
let skipped = 0;
|
||||
|
||||
for (const conf of confirmations) {
|
||||
if (!conf.confirmed) {
|
||||
skipped++;
|
||||
continue;
|
||||
}
|
||||
|
||||
for (const alias of conf.aliases) {
|
||||
try {
|
||||
await prisma.iitFieldMapping.upsert({
|
||||
where: {
|
||||
projectId_aliasName: { projectId, aliasName: alias }
|
||||
},
|
||||
create: {
|
||||
projectId,
|
||||
aliasName: alias,
|
||||
actualName: conf.redcapField
|
||||
},
|
||||
update: {
|
||||
actualName: conf.redcapField
|
||||
}
|
||||
});
|
||||
created++;
|
||||
} catch (e) {
|
||||
console.error(`[AutoMapper] 创建映射失败: ${alias} -> ${conf.redcapField}`, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return { created, skipped };
|
||||
}
|
||||
|
||||
/**
|
||||
* 一键导入流程
|
||||
*/
|
||||
async autoImport(
|
||||
projectId: string,
|
||||
fileContent: string,
|
||||
format: 'csv' | 'json'
|
||||
): Promise<{
|
||||
suggestions: MappingSuggestion[];
|
||||
message: string;
|
||||
}> {
|
||||
// 1. 解析 Data Dictionary
|
||||
const fields = await this.parseDataDictionary(fileContent, format);
|
||||
console.log(`[AutoMapper] 解析到 ${fields.length} 个字段`);
|
||||
|
||||
// 2. 生成 LLM 建议
|
||||
const suggestions = await this.generateMappingSuggestions(projectId, fields);
|
||||
console.log(`[AutoMapper] 生成 ${suggestions.length} 个映射建议`);
|
||||
|
||||
return {
|
||||
suggestions,
|
||||
message: `已解析 ${fields.length} 个 REDCap 字段,生成 ${suggestions.length} 个映射建议,请在管理后台确认。`
|
||||
};
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 2.6.3 管理后台 API
|
||||
|
||||
```typescript
|
||||
// routes/autoMapperRoutes.ts
|
||||
|
||||
router.post('/auto-mapper/import', async (req, res) => {
|
||||
const { projectId, fileContent, format } = req.body;
|
||||
|
||||
const result = await autoMapperService.autoImport(projectId, fileContent, format);
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
suggestions: result.suggestions,
|
||||
message: result.message
|
||||
});
|
||||
});
|
||||
|
||||
router.post('/auto-mapper/confirm', async (req, res) => {
|
||||
const { projectId, confirmations } = req.body;
|
||||
|
||||
const result = await autoMapperService.confirmMappings(projectId, confirmations);
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
created: result.created,
|
||||
skipped: result.skipped,
|
||||
message: `已创建 ${result.created} 个映射,跳过 ${result.skipped} 个`
|
||||
});
|
||||
});
|
||||
```
|
||||
|
||||
### 2.6.4 效率对比
|
||||
|
||||
| 配置方式 | 100 个字段耗时 | 准确率 |
|
||||
|----------|---------------|--------|
|
||||
| 手动逐条配置 | 2-4 小时 | 100%(人工保证) |
|
||||
| LLM 猜测 + 人工确认 | 15-30 分钟 | 95%(LLM猜测)→ 100%(人工确认) |
|
||||
|
||||
---
|
||||
|
||||
## 3. IntentService - 意图识别
|
||||
|
||||
> **文件路径**: `backend/src/modules/iit-manager/services/IntentService.ts`
|
||||
|
||||
Reference in New Issue
Block a user