Files
AIclinicalresearch/backend/scripts/verify-llm-models.ts
HaHafeng beb7f7f559 feat(asl): Implement full-text screening core LLM service and validation system (Day 1-3)
Core Components:
- PDFStorageService with Dify/OSS adapters
- LLM12FieldsService with Nougat-first + dual-model + 3-layer JSON parsing
- PromptBuilder for dynamic prompt assembly
- MedicalLogicValidator with 5 rules + fault tolerance
- EvidenceChainValidator for citation integrity
- ConflictDetectionService for dual-model comparison

Prompt Engineering:
- System Prompt (6601 chars, Section-Aware strategy)
- User Prompt template (PICOS context injection)
- JSON Schema (12 fields constraints)
- Cochrane standards (not loaded in MVP)

Key Innovations:
- 3-layer JSON parsing (JSON.parse + json-repair + code block extraction)
- Promise.allSettled for dual-model fault tolerance
- safeGetFieldValue for robust field extraction
- Mixed CN/EN token calculation

Integration Tests:
- integration-test.ts (full test)
- quick-test.ts (quick test)
- cached-result-test.ts (fault tolerance test)

Documentation Updates:
- Development record (Day 2-3 summary)
- Quality assurance strategy (full-text screening)
- Development plan (progress update)
- Module status (v1.1 update)
- Technical debt (10 new items)

Test Results:
- JSON parsing success rate: 100%
- Medical logic validation: 5/5 passed
- Dual-model parallel processing: OK
- Cost per PDF: CNY 0.10

Files: 238 changed, 14383 insertions(+), 32 deletions(-)
Docs: docs/03-涓氬姟妯″潡/ASL-AI鏅鸿兘鏂囩尞/05-寮€鍙戣褰?2025-11-22_Day2-Day3_LLM鏈嶅姟涓庨獙璇佺郴缁熷紑鍙?md
2025-11-22 22:21:12 +08:00

106 lines
3.4 KiB
TypeScript
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* LLM模型验证脚本
* 用于验证实际接入的是哪个版本的模型
*/
import { LLMFactory } from '../src/common/llm/adapters/LLMFactory.js';
import { logger } from '../src/common/logging/index.js';
const TEST_PROMPT = "请用一句话简单介绍你自己,包括你的模型名称和版本。";
async function verifyModel(modelType: string, expectedModel: string) {
console.log(`\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━`);
console.log(`🔍 验证模型: ${modelType}`);
console.log(`━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━`);
try {
const adapter = LLMFactory.getAdapter(modelType as any);
console.log(`✅ 适配器创建成功`);
console.log(` 模型名称: ${adapter.modelName}`);
console.log(` 期望模型: ${expectedModel}`);
console.log(` 匹配状态: ${adapter.modelName === expectedModel ? '✅ 正确' : '❌ 不匹配'}`);
console.log(`\n🚀 发送测试请求...`);
const startTime = Date.now();
const response = await adapter.chat([
{ role: 'user', content: TEST_PROMPT }
]);
const duration = Date.now() - startTime;
console.log(`\n📊 响应结果:`);
console.log(` 实际返回模型: ${response.model}`);
console.log(` 响应时间: ${duration}ms`);
console.log(` Token使用:`);
console.log(` - 输入: ${response.usage?.promptTokens || 0}`);
console.log(` - 输出: ${response.usage?.completionTokens || 0}`);
console.log(` - 总计: ${response.usage?.totalTokens || 0}`);
console.log(`\n💬 模型回复:`);
console.log(` "${response.content}"`);
// 验证是否匹配
if (response.model === expectedModel) {
console.log(`\n✅ 验证通过!实际调用的就是 ${expectedModel}`);
return true;
} else {
console.log(`\n⚠ 警告!期望 ${expectedModel},实际返回 ${response.model}`);
return false;
}
} catch (error) {
console.error(`\n❌ 验证失败:`, error);
return false;
}
}
async function main() {
console.log('\n🔬 ASL模块LLM模型验证工具');
console.log('=' .repeat(60));
console.log('用途: 验证实际接入的模型版本是否正确\n');
const models = [
{ type: 'deepseek-v3', expected: 'deepseek-chat', description: 'DeepSeek-V3' },
{ type: 'qwen3-72b', expected: 'qwen-max', description: 'Qwen最新最强模型' },
];
const results: { model: string; passed: boolean }[] = [];
for (const model of models) {
const passed = await verifyModel(model.type, model.expected);
results.push({ model: model.description, passed });
// 避免API限流
await new Promise(resolve => setTimeout(resolve, 2000));
}
// 总结
console.log('\n\n' + '='.repeat(60));
console.log('📊 验证总结');
console.log('='.repeat(60));
results.forEach(r => {
console.log(`${r.passed ? '✅' : '❌'} ${r.model}: ${r.passed ? '通过' : '未通过'}`);
});
const allPassed = results.every(r => r.passed);
if (allPassed) {
console.log('\n🎉 所有模型验证通过!');
} else {
console.log('\n⚠ 部分模型验证未通过,请检查配置!');
}
console.log('='.repeat(60) + '\n');
}
main().catch(console.error);