Files
AIclinicalresearch/backend/tests/e2e-p1-chat-test.ts
HaHafeng 7c3cc12b2e feat(iit): Complete CRA Agent V3.0 P1 - ChatOrchestrator with LLM Function Calling
P1 Architecture: Lightweight ReAct (Function Calling loop, max 3 rounds)

Core changes:
- Add ToolDefinition/ToolCall types to LLM adapters (DeepSeek + CloseAI + Claude)
- Replace 6 old tools with 4 semantic tools: read_report, look_up_data, check_quality, search_knowledge
- Create ChatOrchestrator (~160 lines) replacing ChatService (1,442 lines)
- Wire WechatCallbackController to ChatOrchestrator, deprecate ChatService
- Fix nullable content (string | null) across 12+ LLM consumer files

E2E test results: 8/8 scenarios passed (100%)
- QC report query, critical issues, patient data, trend, on-demand QC
- Knowledge base search, project overview, data modification refusal

Net code reduction: ~1,100 lines
Tested: E2E P1 chat test 8/8 passed with DeepSeek API

Made-with: Cursor
2026-02-26 14:27:09 +08:00

155 lines
5.0 KiB
TypeScript

/**
* P1 ChatOrchestrator E2E Test
*
* Tests the Lightweight ReAct architecture (Function Calling loop, max 3 rounds)
* by sending 8 representative chat scenarios and validating responses.
*
* Prerequisites:
* - Backend DB reachable (Docker postgres running)
* - DeepSeek API key configured in .env
* - At least one active IIT project in DB
*
* Run: npx tsx tests/e2e-p1-chat-test.ts
*/
import { getChatOrchestrator } from '../src/modules/iit-manager/services/ChatOrchestrator.js';
import { logger } from '../src/common/logging/index.js';
const TEST_USER = 'e2e-test-user';
interface TestCase {
id: number;
input: string;
description: string;
validate: (response: string) => boolean;
}
const testCases: TestCase[] = [
{
id: 1,
input: '最新质控报告怎么样',
description: 'General QC report query → expects read_report(summary)',
validate: (r) => r.length > 10 && !r.includes('系统处理出错'),
},
{
id: 2,
input: '有几条严重违规',
description: 'Critical issues query → expects read_report(critical_issues)',
validate: (r) => r.length > 5 && !r.includes('系统处理出错'),
},
{
id: 3,
input: '003 的数据',
description: 'Patient data lookup → expects look_up_data(003)',
validate: (r) => r.length > 5 && !r.includes('系统处理出错'),
},
{
id: 4,
input: '通过率比上周好了吗',
description: 'Trend query → expects read_report(trend)',
validate: (r) => r.length > 5 && !r.includes('系统处理出错'),
},
{
id: 5,
input: '帮我检查一下 005',
description: 'On-demand QC → expects check_quality(005)',
validate: (r) => r.length > 5 && !r.includes('系统处理出错'),
},
{
id: 6,
input: '入排标准是什么',
description: 'Knowledge base search → expects search_knowledge',
validate: (r) => r.length > 5 && !r.includes('系统处理出错'),
},
{
id: 7,
input: '项目整体怎么样',
description: 'Project overview → expects read_report(summary)',
validate: (r) => r.length > 5 && !r.includes('系统处理出错'),
},
{
id: 8,
input: '帮我修改 003 的数据',
description: 'Data modification request → polite refusal, no tool call',
validate: (r) => r.length > 5 && !r.includes('系统处理出错'),
},
];
async function runTests() {
console.log('='.repeat(60));
console.log(' P1 ChatOrchestrator E2E Test');
console.log(' Architecture: Lightweight ReAct (Function Calling, max 3 rounds)');
console.log('='.repeat(60));
let orchestrator;
try {
console.log('\n🔧 Initializing ChatOrchestrator...');
orchestrator = await getChatOrchestrator();
console.log('✅ ChatOrchestrator initialized successfully\n');
} catch (error: any) {
console.error('❌ Failed to initialize ChatOrchestrator:', error.message);
console.error(' Make sure DB is running and there is an active IIT project.');
process.exit(1);
}
let passCount = 0;
let failCount = 0;
const results: { id: number; desc: string; ok: boolean; response: string; duration: number; error?: string }[] = [];
for (const tc of testCases) {
console.log(`\n📝 [${tc.id}/8] ${tc.description}`);
console.log(` Input: "${tc.input}"`);
const start = Date.now();
try {
const response = await orchestrator.handleMessage(TEST_USER, tc.input);
const duration = Date.now() - start;
const ok = tc.validate(response);
if (ok) {
passCount++;
console.log(` ✅ PASS (${duration}ms)`);
} else {
failCount++;
console.log(` ❌ FAIL (${duration}ms) — validation failed`);
}
console.log(` Response: ${response.substring(0, 150)}${response.length > 150 ? '...' : ''}`);
results.push({ id: tc.id, desc: tc.description, ok, response: response.substring(0, 200), duration });
} catch (error: any) {
const duration = Date.now() - start;
failCount++;
console.log(` ❌ ERROR (${duration}ms) — ${error.message}`);
results.push({ id: tc.id, desc: tc.description, ok: false, response: '', duration, error: error.message });
}
}
// Summary
console.log('\n' + '='.repeat(60));
console.log(' RESULTS');
console.log('='.repeat(60));
console.log(`\n Total: ${testCases.length}`);
console.log(` Pass: ${passCount}`);
console.log(` Fail: ${failCount}`);
console.log(` Rate: ${((passCount / testCases.length) * 100).toFixed(0)}%`);
const avgDuration = results.reduce((sum, r) => sum + r.duration, 0) / results.length;
console.log(` Avg RT: ${avgDuration.toFixed(0)}ms`);
if (failCount > 0) {
console.log('\n Failed cases:');
for (const r of results.filter((r) => !r.ok)) {
console.log(` - [${r.id}] ${r.desc}`);
if (r.error) console.log(` Error: ${r.error}`);
}
}
console.log('\n' + '='.repeat(60));
process.exit(failCount > 0 ? 1 : 0);
}
runTests().catch((err) => {
console.error('Fatal error:', err);
process.exit(1);
});