Files
AIclinicalresearch/r-statistics-service/tests/run_all_tools_test.js
HaHafeng 3446909ff7 feat(ssa): Complete Phase I-IV intelligent dialogue and tool system development
Phase I - Session Blackboard + READ Layer:
- SessionBlackboardService with Postgres-Only cache
- DataProfileService for data overview generation
- PicoInferenceService for LLM-driven PICO extraction
- Frontend DataContextCard and VariableDictionaryPanel
- E2E tests: 31/31 passed

Phase II - Conversation Layer LLM + Intent Router:
- ConversationService with SSE streaming
- IntentRouterService (rule-first + LLM fallback, 6 intents)
- SystemPromptService with 6-segment dynamic assembly
- TokenTruncationService for context management
- ChatHandlerService as unified chat entry
- Frontend SSAChatPane and useSSAChat hook
- E2E tests: 38/38 passed

Phase III - Method Consultation + AskUser Standardization:
- ToolRegistryService with Repository Pattern
- MethodConsultService with DecisionTable + LLM enhancement
- AskUserService with global interrupt handling
- Frontend AskUserCard component
- E2E tests: 13/13 passed

Phase IV - Dialogue-Driven Analysis + QPER Integration:
- ToolOrchestratorService (plan/execute/report)
- analysis_plan SSE event for WorkflowPlan transmission
- Dual-channel confirmation (ask_user card + workspace button)
- PICO as optional hint for LLM parsing
- E2E tests: 25/25 passed

R Statistics Service:
- 5 new R tools: anova_one, baseline_table, fisher, linear_reg, wilcoxon
- Enhanced guardrails and block helpers
- Comprehensive test suite (run_all_tools_test.js)

Documentation:
- Updated system status document (v5.9)
- Updated SSA module status and development plan (v1.8)

Total E2E: 107/107 passed (Phase I: 31, Phase II: 38, Phase III: 13, Phase IV: 25)

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-22 18:53:39 +08:00

300 lines
12 KiB
JavaScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* SSA R 统计引擎 — 全工具端到端测试
*
* 覆盖范围12 个统计工具 + JIT 护栏 + report_blocks 协议验证
*
* 运行方式:
* node r-statistics-service/tests/run_all_tools_test.js
*
* 前置条件R 服务容器已启动docker-compose up -d
*/
const http = require('http');
const fs = require('fs');
const path = require('path');
const R_URL = process.env.R_SERVICE_URL || 'http://localhost:8082';
const TIMEOUT = 60000;
// ==================== HTTP ====================
function post(endpoint, body) {
return new Promise((resolve, reject) => {
const url = new URL(endpoint, R_URL);
const payload = JSON.stringify(body);
const req = http.request(
{ hostname: url.hostname, port: url.port, path: url.pathname, method: 'POST',
headers: { 'Content-Type': 'application/json', 'Content-Length': Buffer.byteLength(payload) },
timeout: TIMEOUT },
(res) => {
let data = '';
res.on('data', c => (data += c));
res.on('end', () => {
try { resolve({ status: res.statusCode, body: JSON.parse(data) }); }
catch { resolve({ status: res.statusCode, body: data }); }
});
}
);
req.on('error', reject);
req.on('timeout', () => { req.destroy(); reject(new Error('timeout')); });
req.write(payload);
req.end();
});
}
function get(endpoint) {
return new Promise((resolve, reject) => {
const url = new URL(endpoint, R_URL);
http.get(url, { timeout: TIMEOUT }, (res) => {
let data = '';
res.on('data', c => (data += c));
res.on('end', () => {
try { resolve({ status: res.statusCode, body: JSON.parse(data) }); }
catch { resolve({ status: res.statusCode, body: data }); }
});
}).on('error', reject);
});
}
// ==================== 测试数据 ====================
function loadCSV() {
const csvPath = path.join(__dirname, '..', '..', 'docs', '03-业务模块', 'SSA-智能统计分析', '05-测试文档', 'test.csv');
const lines = fs.readFileSync(csvPath, 'utf-8').trim().split('\n');
const headers = lines[0].split(',');
return lines.slice(1).map(line => {
const vals = line.split(',');
const row = {};
headers.forEach((h, i) => {
const v = vals[i];
if (v === '' || v === undefined) row[h] = null;
else if (!isNaN(Number(v))) row[h] = Number(v);
else row[h] = v;
});
return row;
});
}
function loadJSON(name) {
return JSON.parse(fs.readFileSync(path.join(__dirname, name), 'utf-8'));
}
// ==================== 校验 ====================
function validateBlocks(blocks, toolName) {
const issues = [];
if (!Array.isArray(blocks)) { issues.push('report_blocks 不是数组'); return issues; }
if (blocks.length === 0) { issues.push('report_blocks 为空'); return issues; }
const validTypes = ['markdown', 'table', 'image', 'key_value'];
blocks.forEach((b, i) => {
if (!validTypes.includes(b.type)) issues.push(`blocks[${i}].type 非法: ${b.type}`);
if (b.type === 'table') {
if (!Array.isArray(b.headers)) issues.push(`blocks[${i}] table 缺少 headers`);
if (!Array.isArray(b.rows)) issues.push(`blocks[${i}] table 缺少 rows`);
}
if (b.type === 'markdown' && !b.content) issues.push(`blocks[${i}] markdown 缺少 content`);
if (b.type === 'image' && !b.data) issues.push(`blocks[${i}] image 缺少 data`);
if (b.type === 'key_value' && !Array.isArray(b.items)) issues.push(`blocks[${i}] key_value 缺少 items`);
});
return issues;
}
// ==================== 主测试 ====================
async function main() {
console.log('\n╔══════════════════════════════════════════════════════════╗');
console.log('║ SSA R 统计引擎 — 全工具端到端测试 (12 tools + JIT) ║');
console.log('║ ' + new Date().toISOString().slice(0, 19) + ' ║');
console.log('╚══════════════════════════════════════════════════════════╝\n');
// 0. 健康检查
let toolsLoaded = 0;
try {
const h = await get('/health');
toolsLoaded = h.body.tools_loaded || 0;
console.log(`✅ 健康检查通过 version=${h.body.version} tools_loaded=${toolsLoaded} dev_mode=${h.body.dev_mode}\n`);
} catch (e) {
console.log(`❌ R 服务不可用: ${e.message}\n`);
process.exit(1);
}
// 0.1 工具列表
try {
const tl = await get('/api/v1/tools');
console.log(`📋 已注册工具 (${tl.body.count}): ${tl.body.tools.join(', ')}\n`);
} catch { /* skip */ }
const csvData = loadCSV();
const ds = { type: 'inline', data: csvData };
const results = [];
async function run(name, toolCode, body, checks) {
const t0 = Date.now();
try {
const res = await post(`/api/v1/skills/${toolCode}`, body);
const ms = Date.now() - t0;
const d = res.body;
if (d.status === 'success') {
const blockIssues = validateBlocks(d.report_blocks, toolCode);
const extra = checks ? checks(d) : {};
const hasPlots = Array.isArray(d.plots) && d.plots.length > 0;
const hasCode = !!d.reproducible_code;
const blocksOk = blockIssues.length === 0;
const icon = blocksOk ? '✅' : '⚠️';
console.log(`${icon} ${name} (${ms}ms) blocks=${(d.report_blocks||[]).length} plots=${hasPlots?'✓':'✗'} code=${hasCode?'✓':'✗'} ${JSON.stringify(extra)}`);
if (!blocksOk) blockIssues.forEach(iss => console.log(`${iss}`));
results.push({ name, status: 'pass', ms, blocksOk, extra });
} else if (d.status === 'blocked') {
console.log(`🔒 ${name} (${ms}ms) status=blocked message=${d.message}`);
results.push({ name, status: 'blocked', ms });
} else {
console.log(`${name} (${ms}ms) error=${d.error_code||''} ${d.message||''}`);
results.push({ name, status: 'fail', ms, error: d.message });
}
} catch (e) {
console.log(`${name} EXCEPTION: ${e.message}`);
results.push({ name, status: 'error', error: e.message });
}
}
// ========== Phase 2A 工具(原有 7 个) ==========
console.log('─'.repeat(60));
console.log(' Phase 2A 工具(原有 7 个)');
console.log('─'.repeat(60));
await run('ST_DESCRIPTIVE (描述性统计)', 'ST_DESCRIPTIVE', {
data_source: ds,
params: { variables: ['age', 'bmi', 'time'], group_var: 'sex' }
}, d => ({ groups: Object.keys(d.results?.summary || {}).length }));
await run('ST_T_TEST_IND (独立样本T检验)', 'ST_T_TEST_IND', {
data_source: ds,
params: { group_var: 'sex', value_var: 'age' },
guardrails: { check_normality: true }
}, d => ({ t: d.results?.statistic, p: d.results?.p_value_fmt }));
await run('ST_MANN_WHITNEY (Mann-Whitney U)', 'ST_MANN_WHITNEY', {
data_source: ds,
params: { group_var: 'sex', value_var: 'bmi' }
}, d => ({ U: d.results?.statistic_U, p: d.results?.p_value_fmt }));
await run('ST_CHI_SQUARE (卡方检验)', 'ST_CHI_SQUARE', {
data_source: ds,
params: { var1: 'sex', var2: 'smoke' }
}, d => ({ chi2: d.results?.statistic, p: d.results?.p_value_fmt }));
await run('ST_CORRELATION (相关分析)', 'ST_CORRELATION', {
data_source: ds,
params: { var_x: 'age', var_y: 'bmi', method: 'auto' }
}, d => ({ r: d.results?.statistic, p: d.results?.p_value_fmt }));
await run('ST_LOGISTIC_BINARY (Logistic回归)', 'ST_LOGISTIC_BINARY', {
data_source: ds,
params: { outcome_var: 'Yqol', predictors: ['age', 'bmi', 'sex', 'smoke'] }
}, d => ({ aic: d.results?.model_fit?.aic, sig: d.results?.coefficients?.filter(c => c.significant)?.length }));
await run('ST_T_TEST_PAIRED (配对T检验)', 'ST_T_TEST_PAIRED', {
data_source: ds,
params: { before_var: 'mouth_open', after_var: 'bucal_relax' },
guardrails: { check_normality: true }
}, d => ({ p: d.results?.p_value_fmt }));
// ========== Phase Deploy 新工具5 个) ==========
console.log('\n' + '─'.repeat(60));
console.log(' Phase Deploy 新工具5 个)');
console.log('─'.repeat(60));
const fisherData = loadJSON('test_fisher.json');
await run('ST_FISHER (Fisher精确检验)', 'ST_FISHER',
fisherData,
d => ({ p: d.results?.p_value_fmt, or: d.results?.odds_ratio }));
const anovaData = loadJSON('test_anova_one.json');
await run('ST_ANOVA_ONE (单因素方差分析)', 'ST_ANOVA_ONE',
anovaData,
d => ({ stat: d.results?.statistic, p: d.results?.p_value_fmt, method: d.results?.method }));
const wilcoxData = loadJSON('test_wilcoxon.json');
await run('ST_WILCOXON (Wilcoxon符号秩)', 'ST_WILCOXON',
wilcoxData,
d => ({ V: d.results?.statistic, p: d.results?.p_value_fmt, r: d.results?.effect_size?.r }));
const linearData = loadJSON('test_linear_reg.json');
await run('ST_LINEAR_REG (线性回归)', 'ST_LINEAR_REG',
linearData,
d => ({ r2: d.results?.model_fit?.r_squared, f: d.results?.model_fit?.f_statistic }));
const baselineData = loadJSON('test_baseline_table.json');
await run('ST_BASELINE_TABLE (基线特征表)', 'ST_BASELINE_TABLE',
baselineData,
d => ({
sig_vars: d.results?.significant_vars?.length || 0,
methods: d.results?.method_info?.length || 0,
is_baseline: d.report_blocks?.[0]?.metadata?.is_baseline_table
}));
// ========== JIT 护栏 ==========
console.log('\n' + '─'.repeat(60));
console.log(' JIT 护栏检查');
console.log('─'.repeat(60));
const jitTests = [
{ name: 'JIT for ST_T_TEST_IND', code: 'ST_T_TEST_IND', body: { data_source: ds, tool_code: 'ST_T_TEST_IND', params: { group_var: 'sex', value_var: 'age' } } },
{ name: 'JIT for ST_ANOVA_ONE', code: 'ST_ANOVA_ONE', body: { data_source: anovaData.data_source, tool_code: 'ST_ANOVA_ONE', params: anovaData.params } },
{ name: 'JIT for ST_FISHER', code: 'ST_FISHER', body: { data_source: fisherData.data_source, tool_code: 'ST_FISHER', params: fisherData.params } },
{ name: 'JIT for ST_LINEAR_REG', code: 'ST_LINEAR_REG', body: { data_source: linearData.data_source, tool_code: 'ST_LINEAR_REG', params: linearData.params } },
];
for (const jt of jitTests) {
const t0 = Date.now();
try {
const res = await post('/api/v1/guardrails/jit', jt.body);
const ms = Date.now() - t0;
const d = res.body;
if (d.status === 'success') {
console.log(`${jt.name} (${ms}ms) checks=${d.checks?.length} all_passed=${d.all_checks_passed} suggested=${d.suggested_tool || 'none'}`);
results.push({ name: jt.name, status: 'pass', ms });
} else {
console.log(`${jt.name} (${ms}ms) ${d.message || ''}`);
results.push({ name: jt.name, status: 'fail', ms });
}
} catch (e) {
console.log(`${jt.name} EXCEPTION: ${e.message}`);
results.push({ name: jt.name, status: 'error' });
}
}
// ========== 汇总 ==========
console.log('\n' + '═'.repeat(60));
console.log(' 测试汇总');
console.log('═'.repeat(60));
const pass = results.filter(r => r.status === 'pass').length;
const blocked = results.filter(r => r.status === 'blocked').length;
const fail = results.filter(r => r.status === 'fail' || r.status === 'error').length;
const total = results.length;
console.log(` 通过: ${pass}/${total} 阻塞: ${blocked} 失败: ${fail}`);
if (fail > 0) {
console.log('\n 失败项:');
results.filter(r => r.status === 'fail' || r.status === 'error').forEach(r => {
console.log(`${r.name}: ${r.error || 'unknown'}`);
});
}
const avgMs = Math.round(results.filter(r => r.ms).reduce((s, r) => s + r.ms, 0) / results.filter(r => r.ms).length);
console.log(`\n 平均响应时间: ${avgMs}ms`);
console.log('═'.repeat(60));
if (fail === 0) {
console.log('🎉 全部测试通过R 统计引擎 12 工具 + JIT 护栏就绪。\n');
} else {
console.log('⚠️ 存在失败项,请检查 R 服务日志。\n');
}
process.exit(fail > 0 ? 1 : 0);
}
main().catch(e => { console.error('测试脚本异常:', e); process.exit(1); });