AIclinicalresearch/backend/src/common/streaming/OpenAIStreamAdapter.ts

/**
 * OpenAI Compatible 流式响应适配器
 *
 * 将内部 LLM 响应转换为 OpenAI Compatible 格式
 * 支持 Ant Design X 的 XRequest 直接消费
 */

import { FastifyReply } from 'fastify';
import { v4 as uuidv4 } from 'uuid';
import type { OpenAIStreamChunk, StreamOptions, THINKING_TAGS } from './types';
import { logger } from '../logging/logger';

/**
 * OpenAI 流式响应适配器
 */
export class OpenAIStreamAdapter {
  private reply: FastifyReply;
  private messageId: string;
  private model: string;
  private created: number;
  private isHeaderSent: boolean = false;

  constructor(reply: FastifyReply, model: string = 'deepseek-v3') {
    this.reply = reply;
    this.messageId = `chatcmpl-${uuidv4()}`;
    this.model = model;
    this.created = Math.floor(Date.now() / 1000);
  }

  /**
   * 初始化 SSE 连接
   */
  initSSE(): void {
    if (this.isHeaderSent) return;

    this.reply.raw.writeHead(200, {
      'Content-Type': 'text/event-stream',
      'Cache-Control': 'no-cache',
      'Connection': 'keep-alive',
      'X-Accel-Buffering': 'no',
      'Access-Control-Allow-Origin': '*',
    });

    this.isHeaderSent = true;
    logger.debug('[OpenAIStreamAdapter] SSE 连接已初始化');
  }

  /**
   * 发送内容增量
   */
  sendContentDelta(content: string): void {
    this.initSSE();

    const chunk: OpenAIStreamChunk = {
      id: this.messageId,
      object: 'chat.completion.chunk',
      created: this.created,
      model: this.model,
      choices: [{
        index: 0,
        delta: { content },
        finish_reason: null,
      }],
    };

    this.writeChunk(chunk);
  }

  /**
   * 发送思考内容增量（DeepSeek 风格）
   */
  sendReasoningDelta(reasoningContent: string): void {
    this.initSSE();

    const chunk: OpenAIStreamChunk = {
      id: this.messageId,
      object: 'chat.completion.chunk',
      created: this.created,
      model: this.model,
      choices: [{
        index: 0,
        delta: { reasoning_content: reasoningContent },
        finish_reason: null,
      }],
    };

    this.writeChunk(chunk);
  }

  /**
   * 发送角色标识（流开始时）
   */
  sendRoleStart(): void {
    this.initSSE();

    const chunk: OpenAIStreamChunk = {
      id: this.messageId,
      object: 'chat.completion.chunk',
      created: this.created,
      model: this.model,
      choices: [{
        index: 0,
        delta: { role: 'assistant' },
        finish_reason: null,
      }],
    };

    this.writeChunk(chunk);
  }

  /**
   * 发送完成标识
   */
  sendComplete(usage?: { promptTokens: number; completionTokens: number; totalTokens: number }): void {
    this.initSSE();

    const chunk: OpenAIStreamChunk = {
      id: this.messageId,
      object: 'chat.completion.chunk',
      created: this.created,
      model: this.model,
      choices: [{
        index: 0,
        delta: {},
        finish_reason: 'stop',
      }],
      usage: usage ? {
        prompt_tokens: usage.promptTokens,
        completion_tokens: usage.completionTokens,
        total_tokens: usage.totalTokens,
      } : undefined,
    };

    this.writeChunk(chunk);

    // 发送 [DONE] 标识
    this.reply.raw.write('data: [DONE]\n\n');
    logger.debug('[OpenAIStreamAdapter] 流式响应完成');
  }

  /**
   * 发送错误
   */
  sendError(error: Error | string): void {
    this.initSSE();

    const errorMessage = typeof error === 'string' ? error : error.message;

    const errorChunk = {
      error: {
        message: errorMessage,
        type: 'server_error',
        code: 'internal_error',
      },
    };

    this.reply.raw.write(`data: ${JSON.stringify(errorChunk)}\n\n`);
    this.reply.raw.write('data: [DONE]\n\n');

    logger.error('[OpenAIStreamAdapter] 流式响应错误', { error: errorMessage });
  }

  /**
   * 结束流
   */
  end(): void {
    if (this.isHeaderSent) {
      this.reply.raw.end();
    }
  }

  /**
   * 获取消息 ID
   */
  getMessageId(): string {
    return this.messageId;
  }

  /**
   * 写入 Chunk
   */
  private writeChunk(chunk: OpenAIStreamChunk): void {
    this.reply.raw.write(`data: ${JSON.stringify(chunk)}\n\n`);
  }
}

/**
 * 创建 OpenAI 流式适配器
 */
export function createOpenAIStreamAdapter(
  reply: FastifyReply,
  model?: string
): OpenAIStreamAdapter {
  return new OpenAIStreamAdapter(reply, model);
}