Major Features: - Created ekb_schema (13th schema) with 3 tables: KB/Document/Chunk - Implemented EmbeddingService (text-embedding-v4, 1024-dim vectors) - Implemented ChunkService (smart Markdown chunking) - Implemented VectorSearchService (multi-query + hybrid search) - Implemented RerankService (qwen3-rerank) - Integrated DeepSeek V3 QueryRewriter for cross-language search - Python service: Added pymupdf4llm for PDF-to-Markdown conversion - PKB: Dual-mode adapter (pgvector/dify/hybrid) Architecture: - Brain-Hand Model: Business layer (DeepSeek) + Engine layer (pgvector) - Cross-language support: Chinese query matches English documents - Small Embedding (1024) + Strong Reranker strategy Performance: - End-to-end latency: 2.5s - Cost per query: 0.0025 RMB - Accuracy improvement: +20.5% (cross-language) Tests: - test-embedding-service.ts: Vector embedding verified - test-rag-e2e.ts: Full pipeline tested - test-rerank.ts: Rerank quality validated - test-query-rewrite.ts: Cross-language search verified - test-pdf-ingest.ts: Real PDF document tested (Dongen 2003.pdf) Documentation: - Added 05-RAG-Engine-User-Guide.md - Added 02-Document-Processing-User-Guide.md - Updated system status documentation Status: Production ready
206 lines
4.3 KiB
TypeScript
206 lines
4.3 KiB
TypeScript
/**
|
||
* OpenAI Compatible 流式响应适配器
|
||
*
|
||
* 将内部 LLM 响应转换为 OpenAI Compatible 格式
|
||
* 支持 Ant Design X 的 XRequest 直接消费
|
||
*/
|
||
|
||
import { FastifyReply } from 'fastify';
|
||
import { v4 as uuidv4 } from 'uuid';
|
||
import type { OpenAIStreamChunk, StreamOptions, THINKING_TAGS } from './types';
|
||
import { logger } from '../logging/logger';
|
||
|
||
/**
|
||
* OpenAI 流式响应适配器
|
||
*/
|
||
export class OpenAIStreamAdapter {
|
||
private reply: FastifyReply;
|
||
private messageId: string;
|
||
private model: string;
|
||
private created: number;
|
||
private isHeaderSent: boolean = false;
|
||
|
||
constructor(reply: FastifyReply, model: string = 'deepseek-v3') {
|
||
this.reply = reply;
|
||
this.messageId = `chatcmpl-${uuidv4()}`;
|
||
this.model = model;
|
||
this.created = Math.floor(Date.now() / 1000);
|
||
}
|
||
|
||
/**
|
||
* 初始化 SSE 连接
|
||
*/
|
||
initSSE(): void {
|
||
if (this.isHeaderSent) return;
|
||
|
||
this.reply.raw.writeHead(200, {
|
||
'Content-Type': 'text/event-stream',
|
||
'Cache-Control': 'no-cache',
|
||
'Connection': 'keep-alive',
|
||
'X-Accel-Buffering': 'no',
|
||
'Access-Control-Allow-Origin': '*',
|
||
});
|
||
|
||
this.isHeaderSent = true;
|
||
logger.debug('[OpenAIStreamAdapter] SSE 连接已初始化');
|
||
}
|
||
|
||
/**
|
||
* 发送内容增量
|
||
*/
|
||
sendContentDelta(content: string): void {
|
||
this.initSSE();
|
||
|
||
const chunk: OpenAIStreamChunk = {
|
||
id: this.messageId,
|
||
object: 'chat.completion.chunk',
|
||
created: this.created,
|
||
model: this.model,
|
||
choices: [{
|
||
index: 0,
|
||
delta: { content },
|
||
finish_reason: null,
|
||
}],
|
||
};
|
||
|
||
this.writeChunk(chunk);
|
||
}
|
||
|
||
/**
|
||
* 发送思考内容增量(DeepSeek 风格)
|
||
*/
|
||
sendReasoningDelta(reasoningContent: string): void {
|
||
this.initSSE();
|
||
|
||
const chunk: OpenAIStreamChunk = {
|
||
id: this.messageId,
|
||
object: 'chat.completion.chunk',
|
||
created: this.created,
|
||
model: this.model,
|
||
choices: [{
|
||
index: 0,
|
||
delta: { reasoning_content: reasoningContent },
|
||
finish_reason: null,
|
||
}],
|
||
};
|
||
|
||
this.writeChunk(chunk);
|
||
}
|
||
|
||
/**
|
||
* 发送角色标识(流开始时)
|
||
*/
|
||
sendRoleStart(): void {
|
||
this.initSSE();
|
||
|
||
const chunk: OpenAIStreamChunk = {
|
||
id: this.messageId,
|
||
object: 'chat.completion.chunk',
|
||
created: this.created,
|
||
model: this.model,
|
||
choices: [{
|
||
index: 0,
|
||
delta: { role: 'assistant' },
|
||
finish_reason: null,
|
||
}],
|
||
};
|
||
|
||
this.writeChunk(chunk);
|
||
}
|
||
|
||
/**
|
||
* 发送完成标识
|
||
*/
|
||
sendComplete(usage?: { promptTokens: number; completionTokens: number; totalTokens: number }): void {
|
||
this.initSSE();
|
||
|
||
const chunk: OpenAIStreamChunk = {
|
||
id: this.messageId,
|
||
object: 'chat.completion.chunk',
|
||
created: this.created,
|
||
model: this.model,
|
||
choices: [{
|
||
index: 0,
|
||
delta: {},
|
||
finish_reason: 'stop',
|
||
}],
|
||
usage: usage ? {
|
||
prompt_tokens: usage.promptTokens,
|
||
completion_tokens: usage.completionTokens,
|
||
total_tokens: usage.totalTokens,
|
||
} : undefined,
|
||
};
|
||
|
||
this.writeChunk(chunk);
|
||
|
||
// 发送 [DONE] 标识
|
||
this.reply.raw.write('data: [DONE]\n\n');
|
||
logger.debug('[OpenAIStreamAdapter] 流式响应完成');
|
||
}
|
||
|
||
/**
|
||
* 发送错误
|
||
*/
|
||
sendError(error: Error | string): void {
|
||
this.initSSE();
|
||
|
||
const errorMessage = typeof error === 'string' ? error : error.message;
|
||
|
||
const errorChunk = {
|
||
error: {
|
||
message: errorMessage,
|
||
type: 'server_error',
|
||
code: 'internal_error',
|
||
},
|
||
};
|
||
|
||
this.reply.raw.write(`data: ${JSON.stringify(errorChunk)}\n\n`);
|
||
this.reply.raw.write('data: [DONE]\n\n');
|
||
|
||
logger.error('[OpenAIStreamAdapter] 流式响应错误', { error: errorMessage });
|
||
}
|
||
|
||
/**
|
||
* 结束流
|
||
*/
|
||
end(): void {
|
||
if (this.isHeaderSent) {
|
||
this.reply.raw.end();
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 获取消息 ID
|
||
*/
|
||
getMessageId(): string {
|
||
return this.messageId;
|
||
}
|
||
|
||
/**
|
||
* 写入 Chunk
|
||
*/
|
||
private writeChunk(chunk: OpenAIStreamChunk): void {
|
||
this.reply.raw.write(`data: ${JSON.stringify(chunk)}\n\n`);
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 创建 OpenAI 流式适配器
|
||
*/
|
||
export function createOpenAIStreamAdapter(
|
||
reply: FastifyReply,
|
||
model?: string
|
||
): OpenAIStreamAdapter {
|
||
return new OpenAIStreamAdapter(reply, model);
|
||
}
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|