Phase I - Session Blackboard + READ Layer: - SessionBlackboardService with Postgres-Only cache - DataProfileService for data overview generation - PicoInferenceService for LLM-driven PICO extraction - Frontend DataContextCard and VariableDictionaryPanel - E2E tests: 31/31 passed Phase II - Conversation Layer LLM + Intent Router: - ConversationService with SSE streaming - IntentRouterService (rule-first + LLM fallback, 6 intents) - SystemPromptService with 6-segment dynamic assembly - TokenTruncationService for context management - ChatHandlerService as unified chat entry - Frontend SSAChatPane and useSSAChat hook - E2E tests: 38/38 passed Phase III - Method Consultation + AskUser Standardization: - ToolRegistryService with Repository Pattern - MethodConsultService with DecisionTable + LLM enhancement - AskUserService with global interrupt handling - Frontend AskUserCard component - E2E tests: 13/13 passed Phase IV - Dialogue-Driven Analysis + QPER Integration: - ToolOrchestratorService (plan/execute/report) - analysis_plan SSE event for WorkflowPlan transmission - Dual-channel confirmation (ask_user card + workspace button) - PICO as optional hint for LLM parsing - E2E tests: 25/25 passed R Statistics Service: - 5 new R tools: anova_one, baseline_table, fisher, linear_reg, wilcoxon - Enhanced guardrails and block helpers - Comprehensive test suite (run_all_tools_test.js) Documentation: - Updated system status document (v5.9) - Updated SSA module status and development plan (v1.8) Total E2E: 107/107 passed (Phase I: 31, Phase II: 38, Phase III: 13, Phase IV: 25) Co-authored-by: Cursor <cursoragent@cursor.com>
300 lines
12 KiB
JavaScript
300 lines
12 KiB
JavaScript
/**
|
||
* SSA R 统计引擎 — 全工具端到端测试
|
||
*
|
||
* 覆盖范围:12 个统计工具 + JIT 护栏 + report_blocks 协议验证
|
||
*
|
||
* 运行方式:
|
||
* node r-statistics-service/tests/run_all_tools_test.js
|
||
*
|
||
* 前置条件:R 服务容器已启动(docker-compose up -d)
|
||
*/
|
||
|
||
const http = require('http');
|
||
const fs = require('fs');
|
||
const path = require('path');
|
||
|
||
const R_URL = process.env.R_SERVICE_URL || 'http://localhost:8082';
|
||
const TIMEOUT = 60000;
|
||
|
||
// ==================== HTTP ====================
|
||
|
||
function post(endpoint, body) {
|
||
return new Promise((resolve, reject) => {
|
||
const url = new URL(endpoint, R_URL);
|
||
const payload = JSON.stringify(body);
|
||
const req = http.request(
|
||
{ hostname: url.hostname, port: url.port, path: url.pathname, method: 'POST',
|
||
headers: { 'Content-Type': 'application/json', 'Content-Length': Buffer.byteLength(payload) },
|
||
timeout: TIMEOUT },
|
||
(res) => {
|
||
let data = '';
|
||
res.on('data', c => (data += c));
|
||
res.on('end', () => {
|
||
try { resolve({ status: res.statusCode, body: JSON.parse(data) }); }
|
||
catch { resolve({ status: res.statusCode, body: data }); }
|
||
});
|
||
}
|
||
);
|
||
req.on('error', reject);
|
||
req.on('timeout', () => { req.destroy(); reject(new Error('timeout')); });
|
||
req.write(payload);
|
||
req.end();
|
||
});
|
||
}
|
||
|
||
function get(endpoint) {
|
||
return new Promise((resolve, reject) => {
|
||
const url = new URL(endpoint, R_URL);
|
||
http.get(url, { timeout: TIMEOUT }, (res) => {
|
||
let data = '';
|
||
res.on('data', c => (data += c));
|
||
res.on('end', () => {
|
||
try { resolve({ status: res.statusCode, body: JSON.parse(data) }); }
|
||
catch { resolve({ status: res.statusCode, body: data }); }
|
||
});
|
||
}).on('error', reject);
|
||
});
|
||
}
|
||
|
||
// ==================== 测试数据 ====================
|
||
|
||
function loadCSV() {
|
||
const csvPath = path.join(__dirname, '..', '..', 'docs', '03-业务模块', 'SSA-智能统计分析', '05-测试文档', 'test.csv');
|
||
const lines = fs.readFileSync(csvPath, 'utf-8').trim().split('\n');
|
||
const headers = lines[0].split(',');
|
||
return lines.slice(1).map(line => {
|
||
const vals = line.split(',');
|
||
const row = {};
|
||
headers.forEach((h, i) => {
|
||
const v = vals[i];
|
||
if (v === '' || v === undefined) row[h] = null;
|
||
else if (!isNaN(Number(v))) row[h] = Number(v);
|
||
else row[h] = v;
|
||
});
|
||
return row;
|
||
});
|
||
}
|
||
|
||
function loadJSON(name) {
|
||
return JSON.parse(fs.readFileSync(path.join(__dirname, name), 'utf-8'));
|
||
}
|
||
|
||
// ==================== 校验 ====================
|
||
|
||
function validateBlocks(blocks, toolName) {
|
||
const issues = [];
|
||
if (!Array.isArray(blocks)) { issues.push('report_blocks 不是数组'); return issues; }
|
||
if (blocks.length === 0) { issues.push('report_blocks 为空'); return issues; }
|
||
const validTypes = ['markdown', 'table', 'image', 'key_value'];
|
||
blocks.forEach((b, i) => {
|
||
if (!validTypes.includes(b.type)) issues.push(`blocks[${i}].type 非法: ${b.type}`);
|
||
if (b.type === 'table') {
|
||
if (!Array.isArray(b.headers)) issues.push(`blocks[${i}] table 缺少 headers`);
|
||
if (!Array.isArray(b.rows)) issues.push(`blocks[${i}] table 缺少 rows`);
|
||
}
|
||
if (b.type === 'markdown' && !b.content) issues.push(`blocks[${i}] markdown 缺少 content`);
|
||
if (b.type === 'image' && !b.data) issues.push(`blocks[${i}] image 缺少 data`);
|
||
if (b.type === 'key_value' && !Array.isArray(b.items)) issues.push(`blocks[${i}] key_value 缺少 items`);
|
||
});
|
||
return issues;
|
||
}
|
||
|
||
// ==================== 主测试 ====================
|
||
|
||
async function main() {
|
||
console.log('\n╔══════════════════════════════════════════════════════════╗');
|
||
console.log('║ SSA R 统计引擎 — 全工具端到端测试 (12 tools + JIT) ║');
|
||
console.log('║ ' + new Date().toISOString().slice(0, 19) + ' ║');
|
||
console.log('╚══════════════════════════════════════════════════════════╝\n');
|
||
|
||
// 0. 健康检查
|
||
let toolsLoaded = 0;
|
||
try {
|
||
const h = await get('/health');
|
||
toolsLoaded = h.body.tools_loaded || 0;
|
||
console.log(`✅ 健康检查通过 version=${h.body.version} tools_loaded=${toolsLoaded} dev_mode=${h.body.dev_mode}\n`);
|
||
} catch (e) {
|
||
console.log(`❌ R 服务不可用: ${e.message}\n`);
|
||
process.exit(1);
|
||
}
|
||
|
||
// 0.1 工具列表
|
||
try {
|
||
const tl = await get('/api/v1/tools');
|
||
console.log(`📋 已注册工具 (${tl.body.count}): ${tl.body.tools.join(', ')}\n`);
|
||
} catch { /* skip */ }
|
||
|
||
const csvData = loadCSV();
|
||
const ds = { type: 'inline', data: csvData };
|
||
|
||
const results = [];
|
||
|
||
async function run(name, toolCode, body, checks) {
|
||
const t0 = Date.now();
|
||
try {
|
||
const res = await post(`/api/v1/skills/${toolCode}`, body);
|
||
const ms = Date.now() - t0;
|
||
const d = res.body;
|
||
if (d.status === 'success') {
|
||
const blockIssues = validateBlocks(d.report_blocks, toolCode);
|
||
const extra = checks ? checks(d) : {};
|
||
const hasPlots = Array.isArray(d.plots) && d.plots.length > 0;
|
||
const hasCode = !!d.reproducible_code;
|
||
const blocksOk = blockIssues.length === 0;
|
||
const icon = blocksOk ? '✅' : '⚠️';
|
||
console.log(`${icon} ${name} (${ms}ms) blocks=${(d.report_blocks||[]).length} plots=${hasPlots?'✓':'✗'} code=${hasCode?'✓':'✗'} ${JSON.stringify(extra)}`);
|
||
if (!blocksOk) blockIssues.forEach(iss => console.log(` ⚠ ${iss}`));
|
||
results.push({ name, status: 'pass', ms, blocksOk, extra });
|
||
} else if (d.status === 'blocked') {
|
||
console.log(`🔒 ${name} (${ms}ms) status=blocked message=${d.message}`);
|
||
results.push({ name, status: 'blocked', ms });
|
||
} else {
|
||
console.log(`❌ ${name} (${ms}ms) error=${d.error_code||''} ${d.message||''}`);
|
||
results.push({ name, status: 'fail', ms, error: d.message });
|
||
}
|
||
} catch (e) {
|
||
console.log(`❌ ${name} EXCEPTION: ${e.message}`);
|
||
results.push({ name, status: 'error', error: e.message });
|
||
}
|
||
}
|
||
|
||
// ========== Phase 2A 工具(原有 7 个) ==========
|
||
console.log('─'.repeat(60));
|
||
console.log(' Phase 2A 工具(原有 7 个)');
|
||
console.log('─'.repeat(60));
|
||
|
||
await run('ST_DESCRIPTIVE (描述性统计)', 'ST_DESCRIPTIVE', {
|
||
data_source: ds,
|
||
params: { variables: ['age', 'bmi', 'time'], group_var: 'sex' }
|
||
}, d => ({ groups: Object.keys(d.results?.summary || {}).length }));
|
||
|
||
await run('ST_T_TEST_IND (独立样本T检验)', 'ST_T_TEST_IND', {
|
||
data_source: ds,
|
||
params: { group_var: 'sex', value_var: 'age' },
|
||
guardrails: { check_normality: true }
|
||
}, d => ({ t: d.results?.statistic, p: d.results?.p_value_fmt }));
|
||
|
||
await run('ST_MANN_WHITNEY (Mann-Whitney U)', 'ST_MANN_WHITNEY', {
|
||
data_source: ds,
|
||
params: { group_var: 'sex', value_var: 'bmi' }
|
||
}, d => ({ U: d.results?.statistic_U, p: d.results?.p_value_fmt }));
|
||
|
||
await run('ST_CHI_SQUARE (卡方检验)', 'ST_CHI_SQUARE', {
|
||
data_source: ds,
|
||
params: { var1: 'sex', var2: 'smoke' }
|
||
}, d => ({ chi2: d.results?.statistic, p: d.results?.p_value_fmt }));
|
||
|
||
await run('ST_CORRELATION (相关分析)', 'ST_CORRELATION', {
|
||
data_source: ds,
|
||
params: { var_x: 'age', var_y: 'bmi', method: 'auto' }
|
||
}, d => ({ r: d.results?.statistic, p: d.results?.p_value_fmt }));
|
||
|
||
await run('ST_LOGISTIC_BINARY (Logistic回归)', 'ST_LOGISTIC_BINARY', {
|
||
data_source: ds,
|
||
params: { outcome_var: 'Yqol', predictors: ['age', 'bmi', 'sex', 'smoke'] }
|
||
}, d => ({ aic: d.results?.model_fit?.aic, sig: d.results?.coefficients?.filter(c => c.significant)?.length }));
|
||
|
||
await run('ST_T_TEST_PAIRED (配对T检验)', 'ST_T_TEST_PAIRED', {
|
||
data_source: ds,
|
||
params: { before_var: 'mouth_open', after_var: 'bucal_relax' },
|
||
guardrails: { check_normality: true }
|
||
}, d => ({ p: d.results?.p_value_fmt }));
|
||
|
||
// ========== Phase Deploy 新工具(5 个) ==========
|
||
console.log('\n' + '─'.repeat(60));
|
||
console.log(' Phase Deploy 新工具(5 个)');
|
||
console.log('─'.repeat(60));
|
||
|
||
const fisherData = loadJSON('test_fisher.json');
|
||
await run('ST_FISHER (Fisher精确检验)', 'ST_FISHER',
|
||
fisherData,
|
||
d => ({ p: d.results?.p_value_fmt, or: d.results?.odds_ratio }));
|
||
|
||
const anovaData = loadJSON('test_anova_one.json');
|
||
await run('ST_ANOVA_ONE (单因素方差分析)', 'ST_ANOVA_ONE',
|
||
anovaData,
|
||
d => ({ stat: d.results?.statistic, p: d.results?.p_value_fmt, method: d.results?.method }));
|
||
|
||
const wilcoxData = loadJSON('test_wilcoxon.json');
|
||
await run('ST_WILCOXON (Wilcoxon符号秩)', 'ST_WILCOXON',
|
||
wilcoxData,
|
||
d => ({ V: d.results?.statistic, p: d.results?.p_value_fmt, r: d.results?.effect_size?.r }));
|
||
|
||
const linearData = loadJSON('test_linear_reg.json');
|
||
await run('ST_LINEAR_REG (线性回归)', 'ST_LINEAR_REG',
|
||
linearData,
|
||
d => ({ r2: d.results?.model_fit?.r_squared, f: d.results?.model_fit?.f_statistic }));
|
||
|
||
const baselineData = loadJSON('test_baseline_table.json');
|
||
await run('ST_BASELINE_TABLE (基线特征表)', 'ST_BASELINE_TABLE',
|
||
baselineData,
|
||
d => ({
|
||
sig_vars: d.results?.significant_vars?.length || 0,
|
||
methods: d.results?.method_info?.length || 0,
|
||
is_baseline: d.report_blocks?.[0]?.metadata?.is_baseline_table
|
||
}));
|
||
|
||
// ========== JIT 护栏 ==========
|
||
console.log('\n' + '─'.repeat(60));
|
||
console.log(' JIT 护栏检查');
|
||
console.log('─'.repeat(60));
|
||
|
||
const jitTests = [
|
||
{ name: 'JIT for ST_T_TEST_IND', code: 'ST_T_TEST_IND', body: { data_source: ds, tool_code: 'ST_T_TEST_IND', params: { group_var: 'sex', value_var: 'age' } } },
|
||
{ name: 'JIT for ST_ANOVA_ONE', code: 'ST_ANOVA_ONE', body: { data_source: anovaData.data_source, tool_code: 'ST_ANOVA_ONE', params: anovaData.params } },
|
||
{ name: 'JIT for ST_FISHER', code: 'ST_FISHER', body: { data_source: fisherData.data_source, tool_code: 'ST_FISHER', params: fisherData.params } },
|
||
{ name: 'JIT for ST_LINEAR_REG', code: 'ST_LINEAR_REG', body: { data_source: linearData.data_source, tool_code: 'ST_LINEAR_REG', params: linearData.params } },
|
||
];
|
||
|
||
for (const jt of jitTests) {
|
||
const t0 = Date.now();
|
||
try {
|
||
const res = await post('/api/v1/guardrails/jit', jt.body);
|
||
const ms = Date.now() - t0;
|
||
const d = res.body;
|
||
if (d.status === 'success') {
|
||
console.log(`✅ ${jt.name} (${ms}ms) checks=${d.checks?.length} all_passed=${d.all_checks_passed} suggested=${d.suggested_tool || 'none'}`);
|
||
results.push({ name: jt.name, status: 'pass', ms });
|
||
} else {
|
||
console.log(`❌ ${jt.name} (${ms}ms) ${d.message || ''}`);
|
||
results.push({ name: jt.name, status: 'fail', ms });
|
||
}
|
||
} catch (e) {
|
||
console.log(`❌ ${jt.name} EXCEPTION: ${e.message}`);
|
||
results.push({ name: jt.name, status: 'error' });
|
||
}
|
||
}
|
||
|
||
// ========== 汇总 ==========
|
||
console.log('\n' + '═'.repeat(60));
|
||
console.log(' 测试汇总');
|
||
console.log('═'.repeat(60));
|
||
|
||
const pass = results.filter(r => r.status === 'pass').length;
|
||
const blocked = results.filter(r => r.status === 'blocked').length;
|
||
const fail = results.filter(r => r.status === 'fail' || r.status === 'error').length;
|
||
const total = results.length;
|
||
|
||
console.log(` 通过: ${pass}/${total} 阻塞: ${blocked} 失败: ${fail}`);
|
||
if (fail > 0) {
|
||
console.log('\n 失败项:');
|
||
results.filter(r => r.status === 'fail' || r.status === 'error').forEach(r => {
|
||
console.log(` ❌ ${r.name}: ${r.error || 'unknown'}`);
|
||
});
|
||
}
|
||
|
||
const avgMs = Math.round(results.filter(r => r.ms).reduce((s, r) => s + r.ms, 0) / results.filter(r => r.ms).length);
|
||
console.log(`\n 平均响应时间: ${avgMs}ms`);
|
||
console.log('═'.repeat(60));
|
||
|
||
if (fail === 0) {
|
||
console.log('🎉 全部测试通过!R 统计引擎 12 工具 + JIT 护栏就绪。\n');
|
||
} else {
|
||
console.log('⚠️ 存在失败项,请检查 R 服务日志。\n');
|
||
}
|
||
|
||
process.exit(fail > 0 ? 1 : 0);
|
||
}
|
||
|
||
main().catch(e => { console.error('测试脚本异常:', e); process.exit(1); });
|