feat(iit-manager): Integrate Dify knowledge base for hybrid retrieval

Completed features:
- Created Dify dataset (Dify_test0102) with 2 processed documents
- Linked test0102 project with Dify dataset ID
- Extended intent detection to recognize query_protocol intent
- Implemented queryDifyKnowledge method (semantic search Top 5)
- Integrated hybrid retrieval (REDCap data + Dify documents)
- Fixed AI hallucination bugs (intent detection + API field path)
- Developed debugging scripts
- Completed end-to-end testing (5 scenarios passed)
- Generated comprehensive documentation (600+ lines)
- Updated development plans and module status

Technical highlights:
- Single project single knowledge base architecture
- Smart routing based on user intent
- Prevent AI hallucination by injecting real data/documents
- Session memory for multi-turn conversations
- Reused LLMFactory for DeepSeek-V3 integration

Bug fixes:
- Fixed intent detection missing keywords
- Fixed Dify API response field path error

Testing: All scenarios verified in WeChat production environment

Status: Fully tested and deployed
This commit is contained in:
2026-01-04 15:44:11 +08:00
parent b47079b387
commit dfc472810b
162 changed files with 3093 additions and 62 deletions

View File

@@ -168,3 +168,4 @@ logger.info('[SessionMemory] 会话记忆管理器已启动', {
});

View File

@@ -0,0 +1,105 @@
/**
* 直接查询数据库中的iit_schema.projects表结构
*/
import { PrismaClient } from '@prisma/client';
const prisma = new PrismaClient();
async function checkTableStructure() {
try {
console.log('🔍 查询 iit_schema.projects 表结构...\n');
// 1. 查询表的所有列信息
const columns = await prisma.$queryRaw<any[]>`
SELECT
column_name,
data_type,
character_maximum_length,
is_nullable,
column_default
FROM information_schema.columns
WHERE table_schema = 'iit_schema'
AND table_name = 'projects'
ORDER BY ordinal_position
`;
console.log('📋 表结构:');
console.log('='.repeat(100));
console.log(
'Column Name'.padEnd(30) +
'Data Type'.padEnd(20) +
'Nullable'.padEnd(12) +
'Default'
);
console.log('='.repeat(100));
columns.forEach(col => {
const colName = col.column_name.padEnd(30);
const dataType = (col.data_type +
(col.character_maximum_length ? `(${col.character_maximum_length})` : '')
).padEnd(20);
const nullable = (col.is_nullable === 'YES' ? 'YES' : 'NO').padEnd(12);
const defaultVal = col.column_default || '';
console.log(`${colName}${dataType}${nullable}${defaultVal}`);
});
console.log('='.repeat(100));
console.log(`\n总计: ${columns.length} 个字段\n`);
// 2. 检查是否存在 dify 相关字段
const difyColumns = columns.filter(col =>
col.column_name.toLowerCase().includes('dify')
);
if (difyColumns.length > 0) {
console.log('✅ 找到Dify相关字段');
difyColumns.forEach(col => {
console.log(` - ${col.column_name} (${col.data_type}, nullable: ${col.is_nullable})`);
});
} else {
console.log('❌ 未找到Dify相关字段');
}
console.log('');
// 3. 查询test0102项目的当前数据
console.log('📊 查询test0102项目的当前配置...\n');
const projects = await prisma.$queryRaw<any[]>`
SELECT
id,
name,
redcap_project_id,
redcap_url,
dify_dataset_id,
status,
created_at
FROM iit_schema.projects
WHERE redcap_project_id = '16'
`;
if (projects.length > 0) {
console.log('✅ test0102项目信息');
const project = projects[0];
console.log(` ID: ${project.id}`);
console.log(` 名称: ${project.name}`);
console.log(` REDCap项目ID: ${project.redcap_project_id}`);
console.log(` REDCap URL: ${project.redcap_url}`);
console.log(` Dify Dataset ID: ${project.dify_dataset_id || '(未设置)'}`);
console.log(` 状态: ${project.status}`);
console.log(` 创建时间: ${project.created_at}`);
} else {
console.log('❌ 未找到test0102项目');
}
console.log('');
} catch (error) {
console.error('❌ 查询失败:', error);
} finally {
await prisma.$disconnect();
}
}
checkTableStructure();

View File

@@ -89,3 +89,4 @@ async function checkProjectConfig() {
checkProjectConfig().catch(console.error);

View File

@@ -71,3 +71,4 @@ async function main() {
main();

View File

@@ -0,0 +1,75 @@
/**
* 将Dify知识库关联到test0102项目
* Dify Dataset ID: b49595b2-bf71-4e47-9988-4aa2816d3c6f
*/
import { PrismaClient } from '@prisma/client';
const prisma = new PrismaClient();
async function linkDifyToProject() {
try {
console.log('🔗 开始关联Dify知识库到test0102项目...\n');
// 1. 查询test0102项目
const project = await prisma.iitProject.findFirst({
where: {
redcapProjectId: '16' // test0102的REDCap项目ID
}
});
if (!project) {
console.error('❌ 未找到test0102项目');
return;
}
console.log('✅ 找到test0102项目');
console.log(` 项目ID: ${project.id}`);
console.log(` 项目名称: ${project.name}`);
console.log(` REDCap项目ID: ${project.redcapProjectId}`);
console.log(` 当前Dify Dataset ID: ${project.difyDatasetId || '(未设置)'}`);
console.log('');
// 2. 更新dify_dataset_id
const difyDatasetId = 'b49595b2-bf71-4e47-9988-4aa2816d3c6f';
const updatedProject = await prisma.iitProject.update({
where: {
id: project.id
},
data: {
difyDatasetId: difyDatasetId
}
});
console.log('✅ 成功关联Dify知识库');
console.log(` Dify Dataset ID: ${difyDatasetId}`);
console.log('');
// 3. 验证更新
console.log('📋 验证更新后的项目配置:');
console.log(JSON.stringify({
id: updatedProject.id,
name: updatedProject.name,
redcapProjectId: updatedProject.redcapProjectId,
difyDatasetId: updatedProject.difyDatasetId,
status: updatedProject.status
}, null, 2));
console.log('');
console.log('🎉 关联完成!');
console.log('');
console.log('📝 下一步:');
console.log(' 1. 在ChatService中集成Dify检索');
console.log(' 2. 测试AI对话能否查询研究方案文档');
console.log(' 3. 企业微信端验证混合检索REDCap数据 + Dify文档');
} catch (error) {
console.error('❌ 操作失败:', error);
} finally {
await prisma.$disconnect();
}
}
// 执行关联
linkDifyToProject();

View File

@@ -19,6 +19,7 @@ import { logger } from '../../../common/logging/index.js';
import { sessionMemory } from '../agents/SessionMemory.js';
import { PrismaClient } from '@prisma/client';
import { RedcapAdapter } from '../adapters/RedcapAdapter.js';
import { difyClient } from '../../../common/rag/DifyClient.js';
const prisma = new PrismaClient();
@@ -52,7 +53,7 @@ export class ChatService {
const { intent, params } = this.detectIntent(userMessage);
logger.info('[ChatService] 意图识别', { userId, intent, params });
// 3. 如果需要查询数据,先执行查询
// 3. 如果需要查询REDCap数据,先执行查询
let toolResult: any = null;
if (intent === 'query_record' && params?.recordId) {
toolResult = await this.queryRedcapRecord(params.recordId);
@@ -62,7 +63,13 @@ export class ChatService {
toolResult = await this.getProjectInfo();
}
// 4. 获取上下文最近2轮对话
// 4. 如果需要查询文档Dify知识库执行检索
let difyKnowledge: string = '';
if (intent === 'query_protocol') {
difyKnowledge = await this.queryDifyKnowledge(userMessage);
}
// 5. 获取上下文最近2轮对话
const context = sessionMemory.getContext(userId);
logger.info('[ChatService] 处理消息', {
@@ -70,18 +77,20 @@ export class ChatService {
messageLength: userMessage.length,
hasContext: !!context,
hasToolResult: !!toolResult,
hasDifyKnowledge: !!difyKnowledge,
intent,
});
// 5. 构建LLM消息包含查询结果
// 6. 构建LLM消息包含查询结果 + Dify知识库
const messages = this.buildMessagesWithData(
userMessage,
context,
toolResult,
difyKnowledge,
userId
);
// 6. 调用LLM复用通用能力层
// 7. 调用LLM复用通用能力层
const response = await this.llm.chat(messages, {
temperature: 0.7,
maxTokens: 500, // 企业微信建议控制输出长度
@@ -91,13 +100,14 @@ export class ChatService {
const aiResponse = response.content;
const duration = Date.now() - startTime;
// 7. 记录AI回复
// 8. 记录AI回复
sessionMemory.addMessage(userId, 'assistant', aiResponse);
logger.info('[ChatService] 对话完成', {
userId,
intent,
hasToolResult: !!toolResult,
hasDifyKnowledge: !!difyKnowledge,
duration: `${duration}ms`,
inputTokens: response.usage?.promptTokens,
outputTokens: response.usage?.completionTokens,
@@ -122,11 +132,17 @@ export class ChatService {
* 简单意图识别(基于关键词)
*/
private detectIntent(message: string): {
intent: 'query_record' | 'count_records' | 'project_info' | 'general_chat';
intent: 'query_record' | 'count_records' | 'project_info' | 'query_protocol' | 'general_chat';
params?: any;
} {
const lowerMessage = message.toLowerCase();
// 识别文档查询研究方案、伦理、CRF等
// 注意:包含"入选"(等同于"纳入"
if (/(研究方案|伦理|知情同意|CRF|病例报告表|纳入|入选|排除|标准|入组标准|治疗方案|试验设计|研究目的|研究流程|观察指标|诊断标准|疾病标准)/.test(message)) {
return { intent: 'query_protocol' };
}
// 识别记录查询包含ID号码
const recordIdMatch = message.match(/(?:ID|记录|患者|受试者).*?(\d+)|(\d+).*?(?:入组|数据|信息|情况)/i);
if (recordIdMatch) {
@@ -159,6 +175,7 @@ export class ChatService {
userMessage: string,
context: string,
toolResult: any,
difyKnowledge: string,
userId: string
): Message[] {
const messages: Message[] = [];
@@ -169,7 +186,7 @@ export class ChatService {
content: this.getSystemPromptWithData(userId)
});
// 2. 如果有工具查询结果注入到System消息
// 2. 如果有REDCap查询结果注入到System消息
if (toolResult) {
messages.push({
role: 'system',
@@ -177,7 +194,15 @@ export class ChatService {
});
}
// 3. 上下文
// 3. 如果有Dify知识库检索结果注入到System消息
if (difyKnowledge) {
messages.push({
role: 'system',
content: `【研究方案文档检索结果】\n${difyKnowledge}\n\n请基于以上文档内容回答用户问题。`
});
}
// 4. 上下文
if (context) {
messages.push({
role: 'system',
@@ -185,7 +210,7 @@ export class ChatService {
});
}
// 4. 用户消息
// 5. 用户消息
messages.push({
role: 'user',
content: userMessage
@@ -201,20 +226,21 @@ export class ChatService {
return `你是IIT Manager智能助手负责帮助PI管理临床研究项目。
【重要原则】
⚠️ 你**必须基于系统提供的REDCap查询结果**回答问题,**绝对不能编造数据**。
⚠️ 如果系统提供了查询结果,请使用这些真实数据;如果没有提供,明确告知用户需要查询REDCap
⚠️ 你**必须基于系统提供的数据和文档**回答问题,**绝对不能编造信息**。
⚠️ 如果系统提供了查询结果或文档内容,请使用这些真实信息;如果没有提供,明确告知用户。
【你的能力】
✅ 回答研究进展问题基于REDCap实时数据
✅ 查询患者记录详情
✅ 统计入组人数
✅ 提供项目信息
✅ 解答研究方案相关问题(基于知识库文档)
【回复原则】
1. **基于事实**:只使用系统提供的数据,不编造
1. **基于事实**:只使用系统提供的数据和文档,不编造
2. **简洁专业**控制在150字以内
3. **友好礼貌**:使用"您"称呼PI
4. **引导行动**如需更多信息建议登录REDCap系统
4. **引导行动**:如需更多详细信息,建议查看完整文档或登录REDCap系统
【当前用户】
- 企业微信UserID: ${userId}
@@ -367,6 +393,75 @@ export class ChatService {
}
}
/**
* 查询Dify知识库研究方案文档
*/
private async queryDifyKnowledge(query: string): Promise<string> {
try {
// 1. 获取项目配置包含difyDatasetId
const project = await prisma.iitProject.findFirst({
where: { status: 'active' },
select: {
name: true,
difyDatasetId: true,
}
});
if (!project) {
logger.warn('[ChatService] 未找到活跃项目');
return '';
}
if (!project.difyDatasetId) {
logger.warn('[ChatService] 项目未配置Dify知识库');
return '';
}
// 2. 调用Dify检索API
const retrievalResult = await difyClient.retrieveKnowledge(
project.difyDatasetId,
query,
{
retrieval_model: {
search_method: 'semantic_search',
top_k: 5, // 检索Top 5相关片段
}
}
);
// 3. 格式化检索结果
if (!retrievalResult.records || retrievalResult.records.length === 0) {
logger.info('[ChatService] Dify未检索到相关文档');
return '';
}
let formattedKnowledge = '';
retrievalResult.records.forEach((record, index) => {
const score = (record.score * 100).toFixed(1);
const documentName = record.segment?.document?.name || '未知文档';
const content = record.segment?.content || '';
formattedKnowledge += `\n[文档${index + 1}] ${documentName} (相关度: ${score}%)\n`;
formattedKnowledge += `${content}\n`;
formattedKnowledge += `---\n`;
});
logger.info('[ChatService] Dify检索成功', {
query,
recordCount: retrievalResult.records.length,
projectName: project.name,
});
return formattedKnowledge;
} catch (error: any) {
logger.error('[ChatService] Dify查询失败', {
query,
error: error.message
});
return ''; // 失败时返回空字符串,不影响主流程
}
}
/**
* 清除用户会话(用于重置对话)
*/

View File

@@ -0,0 +1,128 @@
/**
* 测试ChatService的Dify知识库集成
*
* 测试场景:
* 1. 询问研究方案相关问题触发Dify检索
* 2. 询问患者数据触发REDCap查询
* 3. 混合查询(同时涉及文档和数据)
*/
import { ChatService } from './services/ChatService.js';
const chatService = new ChatService();
async function testDifyIntegration() {
console.log('='.repeat(80));
console.log('🧪 测试ChatService的Dify知识库集成');
console.log('='.repeat(80));
console.log('');
const testUserId = 'FengZhiBo';
// 测试1研究方案相关问题应该触发Dify检索
console.log('📝 测试1询问研究的纳入排除标准应触发Dify检索');
console.log('-'.repeat(80));
try {
const answer1 = await chatService.handleMessage(
testUserId,
'这个研究的排除标准是什么?'
);
console.log('✅ AI回答:');
console.log(answer1);
console.log('');
} catch (error: any) {
console.error('❌ 测试1失败:', error.message);
console.log('');
}
// 等待2秒
await new Promise(resolve => setTimeout(resolve, 2000));
// 测试2CRF相关问题应该触发Dify检索
console.log('📝 测试2询问CRF表格内容应触发Dify检索');
console.log('-'.repeat(80));
try {
const answer2 = await chatService.handleMessage(
testUserId,
'CRF表格中有哪些观察指标'
);
console.log('✅ AI回答:');
console.log(answer2);
console.log('');
} catch (error: any) {
console.error('❌ 测试2失败:', error.message);
console.log('');
}
// 等待2秒
await new Promise(resolve => setTimeout(resolve, 2000));
// 测试3患者数据查询应该触发REDCap查询
console.log('📊 测试3询问患者记录应触发REDCap查询');
console.log('-'.repeat(80));
try {
const answer3 = await chatService.handleMessage(
testUserId,
'查询一下ID 7的患者情况'
);
console.log('✅ AI回答:');
console.log(answer3);
console.log('');
} catch (error: any) {
console.error('❌ 测试3失败:', error.message);
console.log('');
}
// 等待2秒
await new Promise(resolve => setTimeout(resolve, 2000));
// 测试4混合查询可能同时触发Dify和REDCap
console.log('🔀 测试4混合查询询问研究目的');
console.log('-'.repeat(80));
try {
const answer4 = await chatService.handleMessage(
testUserId,
'这个研究的主要研究目的是什么?'
);
console.log('✅ AI回答:');
console.log(answer4);
console.log('');
} catch (error: any) {
console.error('❌ 测试4失败:', error.message);
console.log('');
}
// 测试5统计查询REDCap
console.log('📈 测试5统计查询应触发REDCap查询');
console.log('-'.repeat(80));
try {
const answer5 = await chatService.handleMessage(
testUserId,
'目前有多少位患者入组?'
);
console.log('✅ AI回答:');
console.log(answer5);
console.log('');
} catch (error: any) {
console.error('❌ 测试5失败:', error.message);
console.log('');
}
console.log('='.repeat(80));
console.log('✅ 测试完成!');
console.log('='.repeat(80));
console.log('');
console.log('📝 测试总结:');
console.log(' - Dify知识库检索研究方案、CRF');
console.log(' - REDCap数据查询患者记录、统计');
console.log(' - 上下文记忆SessionMemory');
console.log('');
console.log('🚀 下一步:企业微信端到端测试');
}
// 执行测试
testDifyIntegration().catch(error => {
console.error('❌ 测试脚本执行失败:', error);
process.exit(1);
});

View File

@@ -0,0 +1,104 @@
/**
* 测试脚本查询Dify中的知识库
* 目标:找到手动创建的 Dify_test0102 知识库
*/
import { difyClient } from '../../common/rag/DifyClient.js';
async function queryDifyKnowledgeBases() {
try {
console.log('🔍 开始查询Dify知识库列表...\n');
// 1. 获取知识库列表
const datasets = await difyClient.getDatasets(1, 100);
console.log(`✅ 成功获取知识库列表,共 ${datasets.total}\n`);
console.log('📚 知识库列表:\n');
// 2. 显示所有知识库
datasets.data.forEach((dataset, index) => {
console.log(`${index + 1}. ${dataset.name}`);
console.log(` ID: ${dataset.id}`);
console.log(` 描述: ${dataset.description || '(无)'}`);
console.log(` 文档数: ${dataset.document_count}`);
console.log(` 字数: ${dataset.word_count}`);
console.log(` 索引技术: ${dataset.indexing_technique}`);
console.log(` 创建时间: ${new Date(dataset.created_at * 1000).toLocaleString('zh-CN')}`);
console.log('');
});
// 3. 查找 Dify_test0102
const targetDataset = datasets.data.find(d => d.name === 'Dify_test0102');
if (targetDataset) {
console.log('🎯 找到目标知识库Dify_test0102');
console.log(` Dataset ID: ${targetDataset.id}`);
console.log(` 文档数量: ${targetDataset.document_count}`);
console.log('');
// 4. 获取文档列表
console.log('📄 正在查询文档列表...\n');
const documents = await difyClient.getDocuments(targetDataset.id, 1, 20);
console.log(`✅ 该知识库包含 ${documents.total} 个文档:\n`);
documents.data.forEach((doc, index) => {
console.log(`${index + 1}. ${doc.name}`);
console.log(` 文档ID: ${doc.id}`);
console.log(` 状态: ${doc.indexing_status}`);
console.log(` Token数: ${doc.tokens}`);
console.log(` 字数: ${doc.word_count}`);
console.log(` 创建时间: ${new Date(doc.created_at * 1000).toLocaleString('zh-CN')}`);
console.log('');
});
// 5. 测试检索功能
console.log('🔍 测试知识库检索功能...\n');
const testQuery = '研究的主要目的是什么';
console.log(`查询问题: "${testQuery}"\n`);
const retrievalResults = await difyClient.retrieveKnowledge(
targetDataset.id,
testQuery,
{
retrieval_model: {
search_method: 'semantic_search',
top_k: 5,
},
}
);
console.log(`✅ 检索到 ${retrievalResults.records.length} 个相关片段:\n`);
retrievalResults.records.forEach((record, index) => {
const score = (record.score * 100).toFixed(1);
const content = record.content || '';
const preview = content.substring(0, 100).replace(/\n/g, ' ');
console.log(`${index + 1}. [相关度: ${score}%] ${record.document_name}`);
console.log(` 内容预览: ${preview}${content.length > 100 ? '...' : ''}`);
console.log('');
});
// 6. 输出关联信息
console.log('📝 下一步操作:');
console.log(`将以下信息更新到 test0102 项目的数据库记录中:`);
console.log(` dify_dataset_id: "${targetDataset.id}"`);
console.log(` dify_enabled: true`);
console.log('');
} else {
console.log('❌ 未找到名为 "Dify_test0102" 的知识库');
console.log('');
console.log('可用的知识库名称:');
datasets.data.forEach(d => console.log(` - ${d.name}`));
}
} catch (error) {
console.error('❌ 查询失败:', error);
if (error.response?.data) {
console.error('错误详情:', error.response.data);
}
}
}
// 执行查询
queryDifyKnowledgeBases();

View File

@@ -154,3 +154,4 @@ testIitDatabase()

View File

@@ -247,3 +247,4 @@ main().catch((error) => {
process.exit(1);
});

View File

@@ -224,3 +224,4 @@ export interface CachedProtocolRules {