Major Changes: - Database: Install pg_bigm/pgvector plugins, create test database - Python service: v1.0 -> v1.1, add pymupdf4llm/openpyxl/pypandoc - Node.js backend: v1.3 -> v1.7, fix pino-pretty and ES Module imports - Frontend: v1.2 -> v1.3, skip TypeScript check for deployment - Code recovery: Restore empty files from local backup Technical Fixes: - Fix pino-pretty error in production (conditional loading) - Fix ES Module import paths (add .js extensions) - Fix OSSAdapter TypeScript errors - Update Prisma Schema (63 models, 16 schemas) - Update environment variables (DATABASE_URL, EXTRACTION_SERVICE_URL, OSS) - Remove deprecated variables (REDIS_URL, DIFY_API_URL, DIFY_API_KEY) Documentation: - Create 0126 deployment folder with 8 documents - Update database development standards v2.0 - Update SAE deployment status records Deployment Status: - PostgreSQL: ai_clinical_research_test with plugins - Python: v1.1 @ 172.17.173.84:8000 - Backend: v1.7 @ 172.17.173.89:3001 - Frontend: v1.3 @ 172.17.173.90:80 Tested: All services running successfully on SAE
130 lines
3.6 KiB
TypeScript
130 lines
3.6 KiB
TypeScript
/**
|
||
* Rerank 重排序测试
|
||
*
|
||
* 测试:向量检索 + Rerank 的效果提升
|
||
*
|
||
* 运行: npx tsx src/tests/test-rerank.ts
|
||
*/
|
||
|
||
import { config } from 'dotenv';
|
||
config();
|
||
|
||
import { PrismaClient } from '@prisma/client';
|
||
import { getVectorSearchService } from '../common/rag/index';
|
||
|
||
const prisma = new PrismaClient();
|
||
|
||
async function testRerank() {
|
||
console.log('========================================');
|
||
console.log('🎯 Rerank 重排序测试');
|
||
console.log('========================================\n');
|
||
|
||
// 检查 API Key
|
||
if (!process.env.DASHSCOPE_API_KEY) {
|
||
console.error('❌ 错误: DASHSCOPE_API_KEY 未配置');
|
||
process.exit(1);
|
||
}
|
||
|
||
// 查找测试文档
|
||
const document = await prisma.ekbDocument.findFirst({
|
||
where: { filename: 'Dongen 2003.pdf' },
|
||
select: { id: true, kbId: true, filename: true },
|
||
});
|
||
|
||
if (!document) {
|
||
console.error('❌ 测试文档不存在');
|
||
console.log(' 请先运行: npx tsx src/tests/test-pdf-ingest.ts <pdf路径>');
|
||
process.exit(1);
|
||
}
|
||
|
||
console.log(`✅ 找到测试文档: ${document.filename}`);
|
||
console.log('');
|
||
|
||
const searchService = getVectorSearchService(prisma);
|
||
|
||
// 测试查询
|
||
const testQuery = '银杏叶对老年痴呆的效果';
|
||
|
||
console.log(`🔍 测试查询: "${testQuery}"`);
|
||
console.log('='.repeat(60));
|
||
console.log('');
|
||
|
||
try {
|
||
// Step 1: 纯向量检索
|
||
console.log('📊 Step 1: 纯向量检索(无 Rerank)');
|
||
console.log('-'.repeat(60));
|
||
|
||
const vectorResults = await searchService.vectorSearch(testQuery, {
|
||
topK: 10,
|
||
minScore: 0.2,
|
||
filter: { kbId: document.kbId },
|
||
enableQueryRewrite: false,
|
||
});
|
||
|
||
console.log(`返回 ${vectorResults.length} 条结果:\n`);
|
||
vectorResults.slice(0, 5).forEach((r, i) => {
|
||
const preview = r.content.substring(0, 80).replace(/\n/g, ' ');
|
||
console.log(`${i + 1}. [${r.score.toFixed(3)}] ${preview}...`);
|
||
});
|
||
console.log('');
|
||
|
||
// Step 2: 向量检索 + Rerank
|
||
console.log('🎯 Step 2: 向量检索 + Rerank 重排序');
|
||
console.log('-'.repeat(60));
|
||
|
||
const rerankedResults = await searchService.rerank(testQuery, vectorResults, {
|
||
topK: 5,
|
||
});
|
||
|
||
console.log(`Rerank 后返回 ${rerankedResults.length} 条结果:\n`);
|
||
rerankedResults.forEach((r, i) => {
|
||
const preview = r.content.substring(0, 80).replace(/\n/g, ' ');
|
||
console.log(`${i + 1}. [${r.score.toFixed(3)}] ${preview}...`);
|
||
});
|
||
console.log('');
|
||
|
||
// 对比分析
|
||
console.log('📈 对比分析');
|
||
console.log('='.repeat(60));
|
||
console.log('');
|
||
console.log('向量检索 Top 1:');
|
||
console.log(` 相似度: ${vectorResults[0].score.toFixed(3)}`);
|
||
console.log(` 内容: ${vectorResults[0].content.substring(0, 100).replace(/\n/g, ' ')}...`);
|
||
console.log('');
|
||
console.log('Rerank Top 1:');
|
||
console.log(` 相关性: ${rerankedResults[0].score.toFixed(3)}`);
|
||
console.log(` 内容: ${rerankedResults[0].content.substring(0, 100).replace(/\n/g, ' ')}...`);
|
||
console.log('');
|
||
|
||
if (rerankedResults[0].chunkId !== vectorResults[0].chunkId) {
|
||
console.log('✨ Rerank 改变了排序!Top 1 结果更准确');
|
||
} else {
|
||
console.log('✅ Rerank 确认了原排序(向量检索已经很准)');
|
||
}
|
||
|
||
console.log('');
|
||
console.log('========================================');
|
||
console.log('🎉 测试完成!');
|
||
console.log('========================================');
|
||
|
||
} catch (error) {
|
||
console.error('❌ 测试失败:', error);
|
||
process.exit(1);
|
||
} finally {
|
||
await prisma.$disconnect();
|
||
}
|
||
}
|
||
|
||
testRerank();
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|