Files
AIclinicalresearch/backend/src/common/streaming/OpenAIStreamAdapter.ts
HaHafeng 40c2f8e148 feat(rag): Complete RAG engine implementation with pgvector
Major Features:
- Created ekb_schema (13th schema) with 3 tables: KB/Document/Chunk
- Implemented EmbeddingService (text-embedding-v4, 1024-dim vectors)
- Implemented ChunkService (smart Markdown chunking)
- Implemented VectorSearchService (multi-query + hybrid search)
- Implemented RerankService (qwen3-rerank)
- Integrated DeepSeek V3 QueryRewriter for cross-language search
- Python service: Added pymupdf4llm for PDF-to-Markdown conversion
- PKB: Dual-mode adapter (pgvector/dify/hybrid)

Architecture:
- Brain-Hand Model: Business layer (DeepSeek) + Engine layer (pgvector)
- Cross-language support: Chinese query matches English documents
- Small Embedding (1024) + Strong Reranker strategy

Performance:
- End-to-end latency: 2.5s
- Cost per query: 0.0025 RMB
- Accuracy improvement: +20.5% (cross-language)

Tests:
- test-embedding-service.ts: Vector embedding verified
- test-rag-e2e.ts: Full pipeline tested
- test-rerank.ts: Rerank quality validated
- test-query-rewrite.ts: Cross-language search verified
- test-pdf-ingest.ts: Real PDF document tested (Dongen 2003.pdf)

Documentation:
- Added 05-RAG-Engine-User-Guide.md
- Added 02-Document-Processing-User-Guide.md
- Updated system status documentation

Status: Production ready
2026-01-21 20:24:29 +08:00

206 lines
4.3 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* OpenAI Compatible 流式响应适配器
*
* 将内部 LLM 响应转换为 OpenAI Compatible 格式
* 支持 Ant Design X 的 XRequest 直接消费
*/
import { FastifyReply } from 'fastify';
import { v4 as uuidv4 } from 'uuid';
import type { OpenAIStreamChunk, StreamOptions, THINKING_TAGS } from './types';
import { logger } from '../logging/logger';
/**
* OpenAI 流式响应适配器
*/
export class OpenAIStreamAdapter {
private reply: FastifyReply;
private messageId: string;
private model: string;
private created: number;
private isHeaderSent: boolean = false;
constructor(reply: FastifyReply, model: string = 'deepseek-v3') {
this.reply = reply;
this.messageId = `chatcmpl-${uuidv4()}`;
this.model = model;
this.created = Math.floor(Date.now() / 1000);
}
/**
* 初始化 SSE 连接
*/
initSSE(): void {
if (this.isHeaderSent) return;
this.reply.raw.writeHead(200, {
'Content-Type': 'text/event-stream',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'X-Accel-Buffering': 'no',
'Access-Control-Allow-Origin': '*',
});
this.isHeaderSent = true;
logger.debug('[OpenAIStreamAdapter] SSE 连接已初始化');
}
/**
* 发送内容增量
*/
sendContentDelta(content: string): void {
this.initSSE();
const chunk: OpenAIStreamChunk = {
id: this.messageId,
object: 'chat.completion.chunk',
created: this.created,
model: this.model,
choices: [{
index: 0,
delta: { content },
finish_reason: null,
}],
};
this.writeChunk(chunk);
}
/**
* 发送思考内容增量DeepSeek 风格)
*/
sendReasoningDelta(reasoningContent: string): void {
this.initSSE();
const chunk: OpenAIStreamChunk = {
id: this.messageId,
object: 'chat.completion.chunk',
created: this.created,
model: this.model,
choices: [{
index: 0,
delta: { reasoning_content: reasoningContent },
finish_reason: null,
}],
};
this.writeChunk(chunk);
}
/**
* 发送角色标识(流开始时)
*/
sendRoleStart(): void {
this.initSSE();
const chunk: OpenAIStreamChunk = {
id: this.messageId,
object: 'chat.completion.chunk',
created: this.created,
model: this.model,
choices: [{
index: 0,
delta: { role: 'assistant' },
finish_reason: null,
}],
};
this.writeChunk(chunk);
}
/**
* 发送完成标识
*/
sendComplete(usage?: { promptTokens: number; completionTokens: number; totalTokens: number }): void {
this.initSSE();
const chunk: OpenAIStreamChunk = {
id: this.messageId,
object: 'chat.completion.chunk',
created: this.created,
model: this.model,
choices: [{
index: 0,
delta: {},
finish_reason: 'stop',
}],
usage: usage ? {
prompt_tokens: usage.promptTokens,
completion_tokens: usage.completionTokens,
total_tokens: usage.totalTokens,
} : undefined,
};
this.writeChunk(chunk);
// 发送 [DONE] 标识
this.reply.raw.write('data: [DONE]\n\n');
logger.debug('[OpenAIStreamAdapter] 流式响应完成');
}
/**
* 发送错误
*/
sendError(error: Error | string): void {
this.initSSE();
const errorMessage = typeof error === 'string' ? error : error.message;
const errorChunk = {
error: {
message: errorMessage,
type: 'server_error',
code: 'internal_error',
},
};
this.reply.raw.write(`data: ${JSON.stringify(errorChunk)}\n\n`);
this.reply.raw.write('data: [DONE]\n\n');
logger.error('[OpenAIStreamAdapter] 流式响应错误', { error: errorMessage });
}
/**
* 结束流
*/
end(): void {
if (this.isHeaderSent) {
this.reply.raw.end();
}
}
/**
* 获取消息 ID
*/
getMessageId(): string {
return this.messageId;
}
/**
* 写入 Chunk
*/
private writeChunk(chunk: OpenAIStreamChunk): void {
this.reply.raw.write(`data: ${JSON.stringify(chunk)}\n\n`);
}
}
/**
* 创建 OpenAI 流式适配器
*/
export function createOpenAIStreamAdapter(
reply: FastifyReply,
model?: string
): OpenAIStreamAdapter {
return new OpenAIStreamAdapter(reply, model);
}