Summary: - Migrate PostgreSQL to pgvector/pgvector:pg15 Docker image - Successfully install and verify pgvector 0.8.1 extension - Create comprehensive Dify-to-pgvector migration plan - Update PKB module documentation with pgvector status - Update system documentation with pgvector integration Key changes: - docker-compose.yml: Switch to pgvector/pgvector:pg15 image - Add EkbDocument and EkbChunk data model design - Design R-C-R-G hybrid retrieval architecture - Add clinical data JSONB fields (pico, studyDesign, regimen, safety, criteria, endpoints) - Create detailed 10-day implementation roadmap Documentation updates: - PKB module status: pgvector RAG infrastructure ready - System status: pgvector 0.8.1 integrated - New: Dify replacement development plan (01-Dify替换为pgvector开发计划.md) - New: Enterprise medical knowledge base solution V2 Tested: PostgreSQL with pgvector verified, frontend and backend functionality confirmed
377 lines
11 KiB
JavaScript
377 lines
11 KiB
JavaScript
/**
|
||
* 工具C - 高级场景测试(含多重插补)
|
||
*
|
||
* 测试复杂的数据清洗场景:
|
||
* - 多条件筛选+分组统计
|
||
* - 时间序列计算
|
||
* - 多重插补(Multiple Imputation)
|
||
* - 复杂分类逻辑
|
||
* - 数据探索(不生成代码)
|
||
*
|
||
* 运行命令:node test-tool-c-advanced-scenarios.mjs
|
||
*/
|
||
|
||
import fetch from 'node-fetch';
|
||
import FormData from 'form-data';
|
||
import fs from 'fs';
|
||
import path from 'path';
|
||
import { fileURLToPath } from 'url';
|
||
|
||
const __filename = fileURLToPath(import.meta.url);
|
||
const __dirname = path.dirname(__filename);
|
||
|
||
// ==================== 配置 ====================
|
||
|
||
const API_BASE = 'http://localhost:3000/api/v1/dc/tool-c';
|
||
const TEST_FILE = path.join(__dirname, 'uploads', 'test_data_advanced.xlsx'); // 需要准备测试数据
|
||
|
||
// ==================== 颜色输出 ====================
|
||
|
||
const colors = {
|
||
reset: '\x1b[0m',
|
||
bright: '\x1b[1m',
|
||
red: '\x1b[31m',
|
||
green: '\x1b[32m',
|
||
yellow: '\x1b[33m',
|
||
blue: '\x1b[34m',
|
||
magenta: '\x1b[35m',
|
||
cyan: '\x1b[36m',
|
||
};
|
||
|
||
function log(message, color = 'reset') {
|
||
console.log(`${colors[color]}${message}${colors.reset}`);
|
||
}
|
||
|
||
// ==================== 高级测试场景 ====================
|
||
|
||
const advancedScenarios = [
|
||
{
|
||
id: 1,
|
||
name: '多条件筛选+分组统计',
|
||
message: '筛选出年龄≥18岁、性别为女、BMI≥28的患者,按年龄段(18-30, 30-50, 50+)分组统计人数',
|
||
description: '测试复杂的多条件筛选和分组统计功能',
|
||
expectedFeatures: ['条件筛选', 'BMI过滤', '年龄分组', 'value_counts'],
|
||
},
|
||
{
|
||
id: 2,
|
||
name: '时间序列计算',
|
||
message: '计算每位患者的首次就诊日期和最近就诊日期,以及总就诊次数',
|
||
description: '测试日期时间处理和分组聚合',
|
||
expectedFeatures: ['pd.to_datetime', 'groupby', 'agg', '多列聚合'],
|
||
},
|
||
{
|
||
id: 3,
|
||
name: '多重插补(Multiple Imputation)- 基础版',
|
||
message: '对年龄列的缺失值进行多重插补:生成5个插补数据集,每个数据集用不同的随机种子填补缺失值(基于正态分布)',
|
||
description: '测试统计学中的多重插补方法(基础实现)',
|
||
expectedFeatures: ['np.random.seed', 'np.random.normal', '循环生成多个数据集'],
|
||
multipleOutputs: true,
|
||
},
|
||
{
|
||
id: 4,
|
||
name: '多重插补(MICE算法模拟)',
|
||
message: '对年龄、BMI、血压三列进行链式方程多重插补(MICE):先用中位数初始填补,再迭代3轮,每轮用其他列预测填补当前列',
|
||
description: '测试高级多重插补算法(MICE迭代填补)',
|
||
expectedFeatures: ['多轮迭代', 'fillna', '分列填补', '链式方程'],
|
||
},
|
||
{
|
||
id: 5,
|
||
name: '复杂分类逻辑',
|
||
message: '根据BMI和血压综合判断健康等级:BMI<24且收缩压<140为健康,BMI 24-28或收缩压140-160为预警,其他为危险',
|
||
description: '测试多条件嵌套逻辑判断',
|
||
expectedFeatures: ['np.where', '嵌套条件', '多变量判断'],
|
||
},
|
||
{
|
||
id: 6,
|
||
name: '数据探索(不生成代码)',
|
||
message: '性别列有多少缺失值?年龄列的平均值是多少?BMI列的中位数是多少?',
|
||
description: '测试数据探索问答能力(应直接回答,不生成代码)',
|
||
expectedFeatures: ['直接回答', '不生成代码'],
|
||
},
|
||
{
|
||
id: 7,
|
||
name: '分层多重插补',
|
||
message: '对年龄列的缺失值进行分层多重插补:按性别分组,男性用男性年龄均值填补,女性用女性年龄均值填补,生成3个插补数据集',
|
||
description: '测试分层多重插补(考虑分组)',
|
||
expectedFeatures: ['groupby', 'transform', '分组填补', '多次生成'],
|
||
multipleOutputs: true,
|
||
},
|
||
{
|
||
id: 8,
|
||
name: '缺失模式分析',
|
||
message: '分析数据集的缺失模式:统计每一列的缺失率,找出缺失率超过20%的列,并标记哪些行缺失值超过3个',
|
||
description: '测试缺失值分析功能',
|
||
expectedFeatures: ['isna()', 'sum()', 'mean()', '条件判断'],
|
||
},
|
||
];
|
||
|
||
// ==================== 辅助函数 ====================
|
||
|
||
async function uploadFile() {
|
||
log('\n📤 步骤1: 上传测试文件...', 'cyan');
|
||
|
||
if (!fs.existsSync(TEST_FILE)) {
|
||
log(`❌ 测试文件不存在: ${TEST_FILE}`, 'red');
|
||
log('💡 提示:请创建测试数据文件,包含以下列:', 'yellow');
|
||
log(' - patient_id(患者ID)', 'yellow');
|
||
log(' - gender(性别:男/女)', 'yellow');
|
||
log(' - age(年龄,带缺失值)', 'yellow');
|
||
log(' - BMI(带缺失值)', 'yellow');
|
||
log(' - systolic_bp(收缩压,带缺失值)', 'yellow');
|
||
log(' - visit_date(就诊日期)', 'yellow');
|
||
return null;
|
||
}
|
||
|
||
const formData = new FormData();
|
||
formData.append('file', fs.createReadStream(TEST_FILE));
|
||
|
||
const response = await fetch(`${API_BASE}/sessions/upload`, {
|
||
method: 'POST',
|
||
body: formData,
|
||
});
|
||
|
||
const result = await response.json();
|
||
|
||
if (result.success) {
|
||
log(`✅ 上传成功: Session ID = ${result.data.sessionId}`, 'green');
|
||
log(` 文件: ${result.data.fileName}`, 'bright');
|
||
log(` 数据: ${result.data.totalRows} 行 × ${result.data.totalCols} 列`, 'bright');
|
||
return result.data.sessionId;
|
||
} else {
|
||
log(`❌ 上传失败: ${result.error}`, 'red');
|
||
return null;
|
||
}
|
||
}
|
||
|
||
async function testScenario(sessionId, scenario) {
|
||
log(`\n${'='.repeat(80)}`, 'magenta');
|
||
log(`📋 场景${scenario.id}: ${scenario.name}`, 'bright');
|
||
log(`📝 描述: ${scenario.description}`, 'cyan');
|
||
log(`💬 用户输入: "${scenario.message}"`, 'yellow');
|
||
log(`${'='.repeat(80)}`, 'magenta');
|
||
|
||
try {
|
||
const startTime = Date.now();
|
||
|
||
// 调用流式API
|
||
const response = await fetch(`${API_BASE}/ai/stream-process`, {
|
||
method: 'POST',
|
||
headers: { 'Content-Type': 'application/json' },
|
||
body: JSON.stringify({
|
||
sessionId,
|
||
message: scenario.message,
|
||
maxRetries: 3,
|
||
}),
|
||
});
|
||
|
||
if (!response.ok) {
|
||
throw new Error(`HTTP ${response.status}`);
|
||
}
|
||
|
||
log('\n📡 流式响应:', 'cyan');
|
||
let stepCount = 0;
|
||
let lastStep = null;
|
||
|
||
// 读取SSE流
|
||
const reader = response.body;
|
||
let buffer = '';
|
||
|
||
for await (const chunk of reader) {
|
||
buffer += chunk.toString();
|
||
const lines = buffer.split('\n');
|
||
buffer = lines.pop() || '';
|
||
|
||
for (const line of lines) {
|
||
if (line.startsWith('data: ')) {
|
||
const data = line.slice(6);
|
||
|
||
if (data === '[DONE]') {
|
||
const duration = ((Date.now() - startTime) / 1000).toFixed(2);
|
||
log(`\n✅ 场景${scenario.id}完成 (耗时: ${duration}秒)`, 'green');
|
||
|
||
// 分析结果
|
||
if (lastStep) {
|
||
if (lastStep.status === 'success') {
|
||
log(` ✓ 执行成功`, 'green');
|
||
if (lastStep.retryCount && lastStep.retryCount > 0) {
|
||
log(` ⚠️ 重试次数: ${lastStep.retryCount + 1}`, 'yellow');
|
||
}
|
||
} else if (lastStep.status === 'failed') {
|
||
log(` ✗ 执行失败: ${lastStep.error}`, 'red');
|
||
}
|
||
}
|
||
continue;
|
||
}
|
||
|
||
try {
|
||
const step = JSON.parse(data);
|
||
stepCount++;
|
||
lastStep = step;
|
||
|
||
// 显示步骤(简化)
|
||
const icon = step.status === 'success' ? '✅' :
|
||
step.status === 'failed' ? '❌' :
|
||
step.status === 'retrying' ? '🔄' : '⏳';
|
||
log(` ${icon} Step ${step.step}: ${step.message}`, 'bright');
|
||
|
||
// 显示代码(Step 3)
|
||
if (step.stepName === 'show_code' && step.data?.code) {
|
||
log(`\n📝 生成的代码:`, 'cyan');
|
||
console.log(step.data.code);
|
||
log(`\n💡 解释: ${step.data.explanation}\n`, 'yellow');
|
||
}
|
||
|
||
// 显示错误
|
||
if (step.error) {
|
||
log(` ⚠️ 错误: ${step.error.substring(0, 100)}...`, 'red');
|
||
}
|
||
} catch (e) {
|
||
// 忽略解析错误
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
return { success: true, steps: stepCount };
|
||
} catch (error) {
|
||
log(`\n❌ 场景${scenario.id}失败: ${error.message}`, 'red');
|
||
return { success: false, error: error.message };
|
||
}
|
||
}
|
||
|
||
// ==================== 主测试函数 ====================
|
||
|
||
async function runAdvancedTests() {
|
||
log('\n' + '='.repeat(80), 'bright');
|
||
log('🧪 工具C高级场景测试(含多重插补)', 'bright');
|
||
log('='.repeat(80) + '\n', 'bright');
|
||
|
||
// Step 1: 上传文件
|
||
const sessionId = await uploadFile();
|
||
if (!sessionId) {
|
||
log('\n❌ 上传失败,测试终止', 'red');
|
||
return;
|
||
}
|
||
|
||
// Step 2: 运行所有场景
|
||
const results = [];
|
||
for (const scenario of advancedScenarios) {
|
||
const result = await testScenario(sessionId, scenario);
|
||
results.push({ scenario: scenario.name, ...result });
|
||
|
||
// 等待1秒,避免请求过快
|
||
await new Promise(resolve => setTimeout(resolve, 1000));
|
||
}
|
||
|
||
// Step 3: 生成测试报告
|
||
log('\n' + '='.repeat(80), 'bright');
|
||
log('📊 测试报告', 'bright');
|
||
log('='.repeat(80), 'bright');
|
||
|
||
const successCount = results.filter(r => r.success).length;
|
||
const failCount = results.filter(r => !r.success).length;
|
||
|
||
log(`\n✅ 成功: ${successCount}/${results.length}`, 'green');
|
||
log(`❌ 失败: ${failCount}/${results.length}`, failCount > 0 ? 'red' : 'green');
|
||
|
||
log('\n📋 详细结果:', 'cyan');
|
||
results.forEach((r, i) => {
|
||
const icon = r.success ? '✅' : '❌';
|
||
log(` ${icon} 场景${i + 1}: ${r.scenario}`, r.success ? 'green' : 'red');
|
||
});
|
||
|
||
// Step 4: 测试导出功能
|
||
log('\n📥 测试导出功能...', 'cyan');
|
||
try {
|
||
const exportResponse = await fetch(`${API_BASE}/sessions/${sessionId}/export`);
|
||
if (exportResponse.ok) {
|
||
const buffer = await exportResponse.arrayBuffer();
|
||
const exportPath = path.join(__dirname, 'test-output', `export_${Date.now()}.xlsx`);
|
||
|
||
// 确保目录存在
|
||
const dir = path.dirname(exportPath);
|
||
if (!fs.existsSync(dir)) {
|
||
fs.mkdirSync(dir, { recursive: true });
|
||
}
|
||
|
||
fs.writeFileSync(exportPath, Buffer.from(buffer));
|
||
log(`✅ 导出成功: ${exportPath} (${(buffer.byteLength / 1024).toFixed(2)}KB)`, 'green');
|
||
} else {
|
||
log(`❌ 导出失败: HTTP ${exportResponse.status}`, 'red');
|
||
}
|
||
} catch (error) {
|
||
log(`❌ 导出异常: ${error.message}`, 'red');
|
||
}
|
||
|
||
log('\n' + '='.repeat(80), 'bright');
|
||
log('🎉 测试完成!', 'bright');
|
||
log('='.repeat(80) + '\n', 'bright');
|
||
}
|
||
|
||
// ==================== 执行测试 ====================
|
||
|
||
runAdvancedTests().catch(error => {
|
||
log(`\n❌ 测试异常: ${error.message}`, 'red');
|
||
console.error(error);
|
||
process.exit(1);
|
||
});
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|