AIclinicalresearch/backend/tests/e2e-p1-chat-test.ts

/**
 * P1 ChatOrchestrator E2E Test
 *
 * Tests the Lightweight ReAct architecture (Function Calling loop, max 3 rounds)
 * by sending 8 representative chat scenarios and validating responses.
 *
 * Prerequisites:
 *   - Backend DB reachable (Docker postgres running)
 *   - DeepSeek API key configured in .env
 *   - At least one active IIT project in DB
 *
 * Run: npx tsx tests/e2e-p1-chat-test.ts
 */

import { getChatOrchestrator } from '../src/modules/iit-manager/services/ChatOrchestrator.js';
import { logger } from '../src/common/logging/index.js';

const TEST_USER = 'e2e-test-user';

interface TestCase {
  id: number;
  input: string;
  description: string;
  validate: (response: string) => boolean;
}

const testCases: TestCase[] = [
  {
    id: 1,
    input: '最新质控报告怎么样',
    description: 'General QC report query → expects read_report(summary)',
    validate: (r) => r.length > 10 && !r.includes('系统处理出错'),
  },
  {
    id: 2,
    input: '有几条严重违规',
    description: 'Critical issues query → expects read_report(critical_issues)',
    validate: (r) => r.length > 5 && !r.includes('系统处理出错'),
  },
  {
    id: 3,
    input: '003 的数据',
    description: 'Patient data lookup → expects look_up_data(003)',
    validate: (r) => r.length > 5 && !r.includes('系统处理出错'),
  },
  {
    id: 4,
    input: '通过率比上周好了吗',
    description: 'Trend query → expects read_report(trend)',
    validate: (r) => r.length > 5 && !r.includes('系统处理出错'),
  },
  {
    id: 5,
    input: '帮我检查一下 005',
    description: 'On-demand QC → expects check_quality(005)',
    validate: (r) => r.length > 5 && !r.includes('系统处理出错'),
  },
  {
    id: 6,
    input: '入排标准是什么',
    description: 'Knowledge base search → expects search_knowledge',
    validate: (r) => r.length > 5 && !r.includes('系统处理出错'),
  },
  {
    id: 7,
    input: '项目整体怎么样',
    description: 'Project overview → expects read_report(summary)',
    validate: (r) => r.length > 5 && !r.includes('系统处理出错'),
  },
  {
    id: 8,
    input: '帮我修改 003 的数据',
    description: 'Data modification request → polite refusal, no tool call',
    validate: (r) => r.length > 5 && !r.includes('系统处理出错'),
  },
];

async function runTests() {
  console.log('='.repeat(60));
  console.log('  P1 ChatOrchestrator E2E Test');
  console.log('  Architecture: Lightweight ReAct (Function Calling, max 3 rounds)');
  console.log('='.repeat(60));

  let orchestrator;
  try {
    console.log('\n🔧 Initializing ChatOrchestrator...');
    const testProjectId = process.env.TEST_PROJECT_ID || 'test0102-pd-study';
    orchestrator = await getChatOrchestrator(testProjectId);
    console.log('✅ ChatOrchestrator initialized successfully\n');
  } catch (error: any) {
    console.error('❌ Failed to initialize ChatOrchestrator:', error.message);
    console.error('   Make sure DB is running and there is an active IIT project.');
    process.exit(1);
  }

  let passCount = 0;
  let failCount = 0;
  const results: { id: number; desc: string; ok: boolean; response: string; duration: number; error?: string }[] = [];

  for (const tc of testCases) {
    console.log(`\n📝 [${tc.id}/8] ${tc.description}`);
    console.log(`   Input: "${tc.input}"`);

    const start = Date.now();
    try {
      const response = await orchestrator.handleMessage(TEST_USER, tc.input);
      const duration = Date.now() - start;

      const ok = tc.validate(response);
      if (ok) {
        passCount++;
        console.log(`   ✅ PASS (${duration}ms)`);
      } else {
        failCount++;
        console.log(`   ❌ FAIL (${duration}ms) — validation failed`);
      }
      console.log(`   Response: ${response.substring(0, 150)}${response.length > 150 ? '...' : ''}`);

      results.push({ id: tc.id, desc: tc.description, ok, response: response.substring(0, 200), duration });
    } catch (error: any) {
      const duration = Date.now() - start;
      failCount++;
      console.log(`   ❌ ERROR (${duration}ms) — ${error.message}`);
      results.push({ id: tc.id, desc: tc.description, ok: false, response: '', duration, error: error.message });
    }
  }

  // Summary
  console.log('\n' + '='.repeat(60));
  console.log('  RESULTS');
  console.log('='.repeat(60));
  console.log(`\n  Total:  ${testCases.length}`);
  console.log(`  Pass:   ${passCount}`);
  console.log(`  Fail:   ${failCount}`);
  console.log(`  Rate:   ${((passCount / testCases.length) * 100).toFixed(0)}%`);

  const avgDuration = results.reduce((sum, r) => sum + r.duration, 0) / results.length;
  console.log(`  Avg RT: ${avgDuration.toFixed(0)}ms`);

  if (failCount > 0) {
    console.log('\n  Failed cases:');
    for (const r of results.filter((r) => !r.ok)) {
      console.log(`    - [${r.id}] ${r.desc}`);
      if (r.error) console.log(`      Error: ${r.error}`);
    }
  }

  console.log('\n' + '='.repeat(60));
  process.exit(failCount > 0 ? 1 : 0);
}

runTests().catch((err) => {
  console.error('Fatal error:', err);
  process.exit(1);
});