feat(rag): Complete RAG engine implementation with pgvector

Major Features:
- Created ekb_schema (13th schema) with 3 tables: KB/Document/Chunk
- Implemented EmbeddingService (text-embedding-v4, 1024-dim vectors)
- Implemented ChunkService (smart Markdown chunking)
- Implemented VectorSearchService (multi-query + hybrid search)
- Implemented RerankService (qwen3-rerank)
- Integrated DeepSeek V3 QueryRewriter for cross-language search
- Python service: Added pymupdf4llm for PDF-to-Markdown conversion
- PKB: Dual-mode adapter (pgvector/dify/hybrid)

Architecture:
- Brain-Hand Model: Business layer (DeepSeek) + Engine layer (pgvector)
- Cross-language support: Chinese query matches English documents
- Small Embedding (1024) + Strong Reranker strategy

Performance:
- End-to-end latency: 2.5s
- Cost per query: 0.0025 RMB
- Accuracy improvement: +20.5% (cross-language)

Tests:
- test-embedding-service.ts: Vector embedding verified
- test-rag-e2e.ts: Full pipeline tested
- test-rerank.ts: Rerank quality validated
- test-query-rewrite.ts: Cross-language search verified
- test-pdf-ingest.ts: Real PDF document tested (Dongen 2003.pdf)

Documentation:
- Added 05-RAG-Engine-User-Guide.md
- Added 02-Document-Processing-User-Guide.md
- Updated system status documentation

Status: Production ready
This commit is contained in:
2026-01-21 20:24:29 +08:00
parent 1f5bf2cd65
commit 40c2f8e148
338 changed files with 11014 additions and 1158 deletions

View File

@@ -0,0 +1,210 @@
/**
* RerankService - 重排序服务
*
* 使用阿里云 qwen3-rerank 模型
* 通过 OpenAI 兼容接口调用
*
* @see https://help.aliyun.com/zh/model-studio/text-rerank-api
*/
import { logger } from '../logging/index.js';
// ==================== 类型定义 ====================
export interface RerankDocument {
text: string;
index?: number; // 可选:原始索引
metadata?: Record<string, unknown>;
}
export interface RerankResult {
text: string;
index: number; // 原始索引
relevanceScore: number; // 相关性分数 (0-1)
metadata?: Record<string, unknown>;
}
export interface RerankOptions {
topN?: number; // 返回数量,默认 10
instruct?: string; // 任务指令(可选)
}
export interface RerankConfig {
apiKey?: string;
baseUrl?: string;
model?: string;
}
// ==================== 默认配置 ====================
/**
* 环境变量说明Rerank 模型专用):
*
* - DASHSCOPE_API_KEY: 阿里云百炼 API Key必填可与其他模型共用
*
* - RERANK_BASE_URL: Rerank API 地址(可选)
* - 默认: https://dashscope.aliyuncs.com/compatible-api/v1
*
* - RERANK_MODEL: Rerank 模型名称(可选,默认 qwen3-rerank
*/
function getDefaultConfig() {
return {
apiKey: process.env.DASHSCOPE_API_KEY || '',
baseUrl: process.env.RERANK_BASE_URL || 'https://dashscope.aliyuncs.com/compatible-api/v1',
model: process.env.RERANK_MODEL || 'qwen3-rerank',
};
}
// ==================== RerankService ====================
export class RerankService {
private apiKey: string;
private baseUrl: string;
private model: string;
constructor(config: RerankConfig = {}) {
const finalConfig = { ...getDefaultConfig(), ...config };
if (!finalConfig.apiKey) {
throw new Error('DASHSCOPE_API_KEY 未配置,请在环境变量中设置');
}
this.apiKey = finalConfig.apiKey;
this.baseUrl = finalConfig.baseUrl;
this.model = finalConfig.model;
logger.info(`RerankService 初始化完成: model=${this.model}`);
}
/**
* 重排序文档
*
* 限制:
* - 单个 Query/Document 最大 4000 tokens
* - 最多 500 个 documents
* - 总 tokens 不超过 30000
*/
async rerank(
query: string,
documents: RerankDocument[],
options: RerankOptions = {}
): Promise<RerankResult[]> {
if (documents.length === 0) {
return [];
}
const { topN = 10, instruct } = options;
// 限制 documents 数量
const maxDocs = Math.min(documents.length, 500);
const limitedDocs = documents.slice(0, maxDocs);
try {
const requestBody = {
model: this.model,
query,
documents: limitedDocs.map(doc => doc.text),
top_n: Math.min(topN, limitedDocs.length),
...(instruct && { instruct }),
};
logger.debug(`Rerank 请求: query="${query.substring(0, 30)}...", docs=${limitedDocs.length}, topN=${topN}`);
// 调试日志
logger.debug(`Rerank API URL: ${this.baseUrl}/reranks`);
logger.debug(`Rerank 请求体: ${JSON.stringify(requestBody).substring(0, 200)}...`);
const response = await fetch(`${this.baseUrl}/reranks`, {
method: 'POST',
headers: {
'Authorization': `Bearer ${this.apiKey}`,
'Content-Type': 'application/json',
},
body: JSON.stringify(requestBody),
});
const responseText = await response.text();
logger.debug(`Rerank 响应状态: ${response.status}`);
logger.debug(`Rerank 响应内容: ${responseText.substring(0, 500)}...`);
if (!response.ok) {
throw new Error(`Rerank API 返回错误: ${response.status} - ${responseText}`);
}
const result = JSON.parse(responseText) as {
object: string;
results: Array<{
index: number;
relevance_score: number;
}>;
model: string;
usage: { total_tokens: number };
id: string;
};
const totalTokens = result.usage?.total_tokens || 0;
const cost = (totalTokens * 0.8) / 1_000_000; // ¥0.8/百万token
logger.info(`Rerank 完成: 返回 ${result.results.length} 条, tokens=${totalTokens}, cost=¥${cost.toFixed(6)}`);
// 映射回原始 metadata
return result.results.map(r => ({
text: limitedDocs[r.index].text,
index: r.index,
relevanceScore: r.relevance_score,
metadata: limitedDocs[r.index]?.metadata,
}));
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
const errorDetails = error instanceof Error ? error.stack : JSON.stringify(error);
logger.error('Rerank 失败', {
error: errorMessage,
details: errorDetails,
query: query.substring(0, 100),
docCount: limitedDocs.length,
});
throw error;
}
}
/**
* 获取当前配置
*/
getConfig(): { model: string; baseUrl: string } {
return {
model: this.model,
baseUrl: this.baseUrl,
};
}
}
// ==================== 单例导出 ====================
let _rerankService: RerankService | null = null;
/**
* 获取 RerankService 单例
*/
export function getRerankService(config?: RerankConfig): RerankService {
if (!_rerankService) {
_rerankService = new RerankService(config);
}
return _rerankService;
}
/**
* 快捷方法:重排序
*/
export async function rerank(
query: string,
documents: RerankDocument[],
options?: RerankOptions
): Promise<RerankResult[]> {
const service = getRerankService();
return service.rerank(query, documents, options);
}
export default RerankService;