Files
AIclinicalresearch/backend/src/common/rag/RerankService.ts
HaHafeng 40c2f8e148 feat(rag): Complete RAG engine implementation with pgvector
Major Features:
- Created ekb_schema (13th schema) with 3 tables: KB/Document/Chunk
- Implemented EmbeddingService (text-embedding-v4, 1024-dim vectors)
- Implemented ChunkService (smart Markdown chunking)
- Implemented VectorSearchService (multi-query + hybrid search)
- Implemented RerankService (qwen3-rerank)
- Integrated DeepSeek V3 QueryRewriter for cross-language search
- Python service: Added pymupdf4llm for PDF-to-Markdown conversion
- PKB: Dual-mode adapter (pgvector/dify/hybrid)

Architecture:
- Brain-Hand Model: Business layer (DeepSeek) + Engine layer (pgvector)
- Cross-language support: Chinese query matches English documents
- Small Embedding (1024) + Strong Reranker strategy

Performance:
- End-to-end latency: 2.5s
- Cost per query: 0.0025 RMB
- Accuracy improvement: +20.5% (cross-language)

Tests:
- test-embedding-service.ts: Vector embedding verified
- test-rag-e2e.ts: Full pipeline tested
- test-rerank.ts: Rerank quality validated
- test-query-rewrite.ts: Cross-language search verified
- test-pdf-ingest.ts: Real PDF document tested (Dongen 2003.pdf)

Documentation:
- Added 05-RAG-Engine-User-Guide.md
- Added 02-Document-Processing-User-Guide.md
- Updated system status documentation

Status: Production ready
2026-01-21 20:24:29 +08:00

211 lines
5.6 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* RerankService - 重排序服务
*
* 使用阿里云 qwen3-rerank 模型
* 通过 OpenAI 兼容接口调用
*
* @see https://help.aliyun.com/zh/model-studio/text-rerank-api
*/
import { logger } from '../logging/index.js';
// ==================== 类型定义 ====================
export interface RerankDocument {
text: string;
index?: number; // 可选:原始索引
metadata?: Record<string, unknown>;
}
export interface RerankResult {
text: string;
index: number; // 原始索引
relevanceScore: number; // 相关性分数 (0-1)
metadata?: Record<string, unknown>;
}
export interface RerankOptions {
topN?: number; // 返回数量,默认 10
instruct?: string; // 任务指令(可选)
}
export interface RerankConfig {
apiKey?: string;
baseUrl?: string;
model?: string;
}
// ==================== 默认配置 ====================
/**
* 环境变量说明Rerank 模型专用):
*
* - DASHSCOPE_API_KEY: 阿里云百炼 API Key必填可与其他模型共用
*
* - RERANK_BASE_URL: Rerank API 地址(可选)
* - 默认: https://dashscope.aliyuncs.com/compatible-api/v1
*
* - RERANK_MODEL: Rerank 模型名称(可选,默认 qwen3-rerank
*/
function getDefaultConfig() {
return {
apiKey: process.env.DASHSCOPE_API_KEY || '',
baseUrl: process.env.RERANK_BASE_URL || 'https://dashscope.aliyuncs.com/compatible-api/v1',
model: process.env.RERANK_MODEL || 'qwen3-rerank',
};
}
// ==================== RerankService ====================
export class RerankService {
private apiKey: string;
private baseUrl: string;
private model: string;
constructor(config: RerankConfig = {}) {
const finalConfig = { ...getDefaultConfig(), ...config };
if (!finalConfig.apiKey) {
throw new Error('DASHSCOPE_API_KEY 未配置,请在环境变量中设置');
}
this.apiKey = finalConfig.apiKey;
this.baseUrl = finalConfig.baseUrl;
this.model = finalConfig.model;
logger.info(`RerankService 初始化完成: model=${this.model}`);
}
/**
* 重排序文档
*
* 限制:
* - 单个 Query/Document 最大 4000 tokens
* - 最多 500 个 documents
* - 总 tokens 不超过 30000
*/
async rerank(
query: string,
documents: RerankDocument[],
options: RerankOptions = {}
): Promise<RerankResult[]> {
if (documents.length === 0) {
return [];
}
const { topN = 10, instruct } = options;
// 限制 documents 数量
const maxDocs = Math.min(documents.length, 500);
const limitedDocs = documents.slice(0, maxDocs);
try {
const requestBody = {
model: this.model,
query,
documents: limitedDocs.map(doc => doc.text),
top_n: Math.min(topN, limitedDocs.length),
...(instruct && { instruct }),
};
logger.debug(`Rerank 请求: query="${query.substring(0, 30)}...", docs=${limitedDocs.length}, topN=${topN}`);
// 调试日志
logger.debug(`Rerank API URL: ${this.baseUrl}/reranks`);
logger.debug(`Rerank 请求体: ${JSON.stringify(requestBody).substring(0, 200)}...`);
const response = await fetch(`${this.baseUrl}/reranks`, {
method: 'POST',
headers: {
'Authorization': `Bearer ${this.apiKey}`,
'Content-Type': 'application/json',
},
body: JSON.stringify(requestBody),
});
const responseText = await response.text();
logger.debug(`Rerank 响应状态: ${response.status}`);
logger.debug(`Rerank 响应内容: ${responseText.substring(0, 500)}...`);
if (!response.ok) {
throw new Error(`Rerank API 返回错误: ${response.status} - ${responseText}`);
}
const result = JSON.parse(responseText) as {
object: string;
results: Array<{
index: number;
relevance_score: number;
}>;
model: string;
usage: { total_tokens: number };
id: string;
};
const totalTokens = result.usage?.total_tokens || 0;
const cost = (totalTokens * 0.8) / 1_000_000; // ¥0.8/百万token
logger.info(`Rerank 完成: 返回 ${result.results.length} 条, tokens=${totalTokens}, cost=¥${cost.toFixed(6)}`);
// 映射回原始 metadata
return result.results.map(r => ({
text: limitedDocs[r.index].text,
index: r.index,
relevanceScore: r.relevance_score,
metadata: limitedDocs[r.index]?.metadata,
}));
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
const errorDetails = error instanceof Error ? error.stack : JSON.stringify(error);
logger.error('Rerank 失败', {
error: errorMessage,
details: errorDetails,
query: query.substring(0, 100),
docCount: limitedDocs.length,
});
throw error;
}
}
/**
* 获取当前配置
*/
getConfig(): { model: string; baseUrl: string } {
return {
model: this.model,
baseUrl: this.baseUrl,
};
}
}
// ==================== 单例导出 ====================
let _rerankService: RerankService | null = null;
/**
* 获取 RerankService 单例
*/
export function getRerankService(config?: RerankConfig): RerankService {
if (!_rerankService) {
_rerankService = new RerankService(config);
}
return _rerankService;
}
/**
* 快捷方法:重排序
*/
export async function rerank(
query: string,
documents: RerankDocument[],
options?: RerankOptions
): Promise<RerankResult[]> {
const service = getRerankService();
return service.rerank(query, documents, options);
}
export default RerankService;