From 5c5fec52c18d334db9cbab45b18384ed74b5b90e Mon Sep 17 00:00:00 2001 From: HaHafeng Date: Mon, 9 Mar 2026 18:45:12 +0800 Subject: [PATCH] fix(aia,ssa,asl,infra): harden SSE transport and stabilize attachment context Deliver SSE protocol hardening for SAE/HTTP2 paths, add graceful shutdown health behavior, and improve SSA retry UX for transient stream failures. For AIA, persist attachment extraction results in database with cache read-through fallback, plus production cache safety guard to prevent memory-cache drift in multi-instance deployments; also restore ASL SR page scrolling behavior. Made-with: Cursor --- .../migration.sql | 30 +++ backend/prisma/schema.prisma | 25 +++ backend/src/common/health/healthCheck.ts | 28 ++- backend/src/common/health/index.ts | 2 +- .../common/streaming/OpenAIStreamAdapter.ts | 1 - backend/src/config/env.ts | 5 + backend/src/index.ts | 19 +- .../src/legacy/controllers/chatController.ts | 2 +- .../controllers/conversationController.ts | 2 +- .../modules/aia/services/attachmentService.ts | 168 ++++++++++++++-- .../aia/services/conversationService.ts | 18 +- .../asl/controllers/researchController.ts | 2 +- .../controllers/ExtractionController.ts | 1 - .../tool-c/controllers/StreamAIController.ts | 3 +- .../modules/pkb/controllers/chatController.ts | 4 +- backend/src/modules/ssa/routes/chat.routes.ts | 3 +- .../src/modules/ssa/routes/session.routes.ts | 2 +- .../src/modules/ssa/routes/workflow.routes.ts | 4 +- .../SAE 生产环境 SSE 故障诊断与终极防御.md | 128 ++++++++++++ .../00-阿里云SAE最新真实状态记录.md | 75 ++++++-- docs/05-部署文档/03-待部署变更清单.md | 57 +++--- .../0309部署/01-数据库部署完成总结.md | 182 ++++++++++++++++++ frontend-v2/nginx.conf | 18 +- .../src/modules/asl/components/ASLLayout.tsx | 4 +- .../modules/ssa/components/SSAChatPane.tsx | 8 + .../src/modules/ssa/hooks/useSSAChat.ts | 77 +++++++- frontend-v2/src/modules/ssa/styles/ssa.css | 39 ++++ 27 files changed, 807 insertions(+), 100 deletions(-) create mode 100644 backend/prisma/migrations/20260309_add_aia_attachments_persistence/migration.sql create mode 100644 docs/03-业务模块/SSA-智能统计分析/07-统计专家配置/SAE 生产环境 SSE 故障诊断与终极防御.md create mode 100644 docs/05-部署文档/0309部署/01-数据库部署完成总结.md diff --git a/backend/prisma/migrations/20260309_add_aia_attachments_persistence/migration.sql b/backend/prisma/migrations/20260309_add_aia_attachments_persistence/migration.sql new file mode 100644 index 00000000..d226187c --- /dev/null +++ b/backend/prisma/migrations/20260309_add_aia_attachments_persistence/migration.sql @@ -0,0 +1,30 @@ +-- AIA 附件持久化:数据库真相源 + 缓存加速 +CREATE TABLE IF NOT EXISTS "aia_schema"."attachments" ( + "id" TEXT NOT NULL, + "user_id" TEXT NOT NULL, + "conversation_id" TEXT NOT NULL, + "filename" TEXT NOT NULL, + "mime_type" TEXT, + "size" INTEGER NOT NULL, + "oss_url" TEXT NOT NULL, + "text_content" TEXT, + "extract_status" TEXT NOT NULL DEFAULT 'success', + "extract_error" TEXT, + "token_count" INTEGER NOT NULL DEFAULT 0, + "truncated" BOOLEAN NOT NULL DEFAULT false, + "created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP, + "updated_at" TIMESTAMP(3) NOT NULL, + CONSTRAINT "attachments_pkey" PRIMARY KEY ("id") +); + +CREATE INDEX IF NOT EXISTS "idx_aia_attachments_conversation_id" + ON "aia_schema"."attachments"("conversation_id"); + +CREATE INDEX IF NOT EXISTS "idx_aia_attachments_user_id" + ON "aia_schema"."attachments"("user_id"); + +CREATE INDEX IF NOT EXISTS "idx_aia_attachments_extract_status" + ON "aia_schema"."attachments"("extract_status"); + +CREATE INDEX IF NOT EXISTS "idx_aia_attachments_created_at" + ON "aia_schema"."attachments"("created_at"); diff --git a/backend/prisma/schema.prisma b/backend/prisma/schema.prisma index 6462a3b1..c1f4d73f 100644 --- a/backend/prisma/schema.prisma +++ b/backend/prisma/schema.prisma @@ -127,6 +127,31 @@ model Message { @@schema("aia_schema") } +/// AIA 附件持久化(数据库真相源,缓存仅加速) +model AiaAttachment { + id String @id + userId String @map("user_id") + conversationId String @map("conversation_id") + filename String + mimeType String? @map("mime_type") + size Int + ossUrl String @map("oss_url") + textContent String? @map("text_content") @db.Text + extractStatus String @default("success") @map("extract_status") // success | failed | empty + extractError String? @map("extract_error") + tokenCount Int @default(0) @map("token_count") + truncated Boolean @default(false) + createdAt DateTime @default(now()) @map("created_at") + updatedAt DateTime @updatedAt @map("updated_at") + + @@index([conversationId], map: "idx_aia_attachments_conversation_id") + @@index([userId], map: "idx_aia_attachments_user_id") + @@index([extractStatus], map: "idx_aia_attachments_extract_status") + @@index([createdAt], map: "idx_aia_attachments_created_at") + @@map("attachments") + @@schema("aia_schema") +} + model KnowledgeBase { id String @id @default(uuid()) userId String @map("user_id") diff --git a/backend/src/common/health/healthCheck.ts b/backend/src/common/health/healthCheck.ts index 8b36d308..c3730ee5 100644 --- a/backend/src/common/health/healthCheck.ts +++ b/backend/src/common/health/healthCheck.ts @@ -2,11 +2,21 @@ import { FastifyInstance, FastifyRequest, FastifyReply } from 'fastify' import { prisma, getDatabaseConnectionCount } from '../../config/database.js' import os from 'os' +/** + * 停机标记:SIGTERM 后设为 true,liveness/readiness 立即返回 503 + * 让 CLB/SAE 不再向本 Pod 派发新请求 + */ +let isShuttingDown = false; + +export function markShuttingDown(): void { + isShuttingDown = true; +} + /** * 健康检查响应 */ export interface HealthCheckResponse { - status: 'ok' | 'error' | 'degraded' + status: 'ok' | 'error' | 'degraded' | 'shutting_down' timestamp: number uptime: number checks?: Record _request: FastifyRequest, reply: FastifyReply ) => { + if (isShuttingDown) { + return reply.status(503).send({ + status: 'shutting_down', + timestamp: Date.now(), + uptime: process.uptime(), + }); + } + const response: HealthCheckResponse = { status: 'ok', timestamp: Date.now(), @@ -68,6 +86,14 @@ export async function registerHealthRoutes(app: FastifyInstance): Promise _request: FastifyRequest, reply: FastifyReply ) => { + if (isShuttingDown) { + return reply.status(503).send({ + status: 'shutting_down', + timestamp: Date.now(), + uptime: process.uptime(), + }); + } + const checks: Record = {} let overallStatus: 'ok' | 'error' | 'degraded' = 'ok' diff --git a/backend/src/common/health/index.ts b/backend/src/common/health/index.ts index 903d8f05..c9c27f52 100644 --- a/backend/src/common/health/index.ts +++ b/backend/src/common/health/index.ts @@ -21,7 +21,7 @@ * ``` */ -export { registerHealthRoutes } from './healthCheck.js' +export { registerHealthRoutes, markShuttingDown } from './healthCheck.js' export type { HealthCheckResponse } from './healthCheck.js' diff --git a/backend/src/common/streaming/OpenAIStreamAdapter.ts b/backend/src/common/streaming/OpenAIStreamAdapter.ts index d0318e5d..90487ba2 100644 --- a/backend/src/common/streaming/OpenAIStreamAdapter.ts +++ b/backend/src/common/streaming/OpenAIStreamAdapter.ts @@ -36,7 +36,6 @@ export class OpenAIStreamAdapter { this.reply.raw.writeHead(200, { 'Content-Type': 'text/event-stream', 'Cache-Control': 'no-cache', - 'Connection': 'keep-alive', 'X-Accel-Buffering': 'no', 'Access-Control-Allow-Origin': '*', }); diff --git a/backend/src/config/env.ts b/backend/src/config/env.ts index 31cf5808..69faf90c 100644 --- a/backend/src/config/env.ts +++ b/backend/src/config/env.ts @@ -220,6 +220,11 @@ export function validateEnv(): void { } } + // 生产环境禁止内存缓存(多实例不共享,易导致状态不一致) + if (config.nodeEnv === 'production' && config.cacheType === 'memory') { + errors.push('CACHE_TYPE=memory is forbidden in production; use postgres or redis') + } + // 如果使用PgBoss队列,验证数据库配置 if (config.queueType === 'pgboss') { if (!config.databaseUrl) { diff --git a/backend/src/index.ts b/backend/src/index.ts index 2ad4773d..ac1dfbfd 100644 --- a/backend/src/index.ts +++ b/backend/src/index.ts @@ -15,7 +15,7 @@ import { aslRoutes } from './modules/asl/routes/index.js'; import { registerDCRoutes, initDCModule } from './modules/dc/index.js'; import pkbRoutes from './modules/pkb/routes/index.js'; import { aiaRoutes } from './modules/aia/index.js'; -import { registerHealthRoutes } from './common/health/index.js'; +import { registerHealthRoutes, markShuttingDown } from './common/health/index.js'; import { logger } from './common/logging/index.js'; import { authRoutes, registerAuthPlugin } from './common/auth/index.js'; import { promptRoutes } from './common/prompt/index.js'; @@ -339,11 +339,24 @@ start(); // ============================================ // 🛡️ 优雅关闭处理(Graceful Shutdown) // ============================================ +const SHUTDOWN_TIMEOUT_MS = 30_000; + const gracefulShutdown = async (signal: string) => { console.log(`\n⚠️ 收到 ${signal} 信号,开始优雅关闭...`); - + + // 立即标记停机,健康检查返回 503,CLB 不再派发新请求 + markShuttingDown(); + console.log('🚫 健康检查已切换为 503,CLB 将停止路由新流量'); + + // 强制超时兜底:防止 SSE 长连接或死循环任务阻塞退出 + const forceTimer = setTimeout(() => { + console.error(`❌ 优雅关闭超时 (${SHUTDOWN_TIMEOUT_MS / 1000}s),强制退出`); + process.exit(1); + }, SHUTDOWN_TIMEOUT_MS); + forceTimer.unref(); + try { - // 1. 停止接收新请求 + // 1. 停止接收新请求(已有 SSE 连接继续跑完) await fastify.close(); console.log('✅ HTTP 服务已停止'); diff --git a/backend/src/legacy/controllers/chatController.ts b/backend/src/legacy/controllers/chatController.ts index 06b72c76..31f0bc1a 100644 --- a/backend/src/legacy/controllers/chatController.ts +++ b/backend/src/legacy/controllers/chatController.ts @@ -399,8 +399,8 @@ export class ChatController { reply.raw.writeHead(200, { 'Content-Type': 'text/event-stream', 'Cache-Control': 'no-cache', - Connection: 'keep-alive', 'Access-Control-Allow-Origin': '*', + 'X-Accel-Buffering': 'no', }); // 保存用户消息 diff --git a/backend/src/legacy/controllers/conversationController.ts b/backend/src/legacy/controllers/conversationController.ts index 384324ad..e66e829f 100644 --- a/backend/src/legacy/controllers/conversationController.ts +++ b/backend/src/legacy/controllers/conversationController.ts @@ -195,8 +195,8 @@ export class ConversationController { reply.raw.writeHead(200, { 'Content-Type': 'text/event-stream', 'Cache-Control': 'no-cache', - Connection: 'keep-alive', 'Access-Control-Allow-Origin': '*', + 'X-Accel-Buffering': 'no', }); // 流式输出 diff --git a/backend/src/modules/aia/services/attachmentService.ts b/backend/src/modules/aia/services/attachmentService.ts index 9f4489d7..6a3855f7 100644 --- a/backend/src/modules/aia/services/attachmentService.ts +++ b/backend/src/modules/aia/services/attachmentService.ts @@ -12,6 +12,7 @@ import { logger } from '../../../common/logging/index.js'; import { storage } from '../../../common/storage/index.js'; import { cache } from '../../../common/cache/index.js'; import { ExtractionClient } from '../../../common/document/ExtractionClient.js'; +import { prisma } from '../../../config/database.js'; import type { Attachment } from '../types/index.js'; // 附件缓存前缀和过期时间(2小时) @@ -25,6 +26,24 @@ const MAX_ATTACHMENTS = 5; const MAX_TOKENS_PER_ATTACHMENT = 30000; // 单个附件最大 30k Token const ALLOWED_FILE_TYPES = ['pdf', 'docx', 'txt', 'xlsx', 'doc']; +interface AttachmentQueryScope { + userId?: string; + conversationId?: string; +} + +interface AiaAttachmentTextRecord { + id: string; + textContent: string | null; + extractStatus: 'success' | 'failed' | 'empty' | string; + extractError: string | null; +} + +interface AiaAttachmentInfoRecord { + id: string; + filename: string; + size: number; +} + // ==================== 附件上传 ==================== /** @@ -56,10 +75,17 @@ export async function uploadAttachment( // 3. 提取文本内容 let extractedText = ''; + let extractStatus: 'success' | 'failed' | 'empty' = 'success'; + let extractError: string | undefined; + let wasTruncated = false; try { // 对于 txt 文件,直接读取内容(不依赖 Python 服务) if (ext === 'txt') { extractedText = file.buffer.toString('utf-8'); + if (!extractedText.trim()) { + extractStatus = 'empty'; + extractedText = '[文档内容为空或无法提取]'; + } logger.info('[AIA:AttachmentService] TXT文件直接读取成功', { filename: file.filename, charCount: extractedText.length, @@ -89,6 +115,8 @@ export async function uploadAttachment( filename: file.filename, error: result.error, }); + extractStatus = 'empty'; + extractError = result.error || '文档内容为空或无法提取'; extractedText = '[文档内容为空或无法提取]'; } } @@ -99,6 +127,7 @@ export async function uploadAttachment( const ratio = MAX_TOKENS_PER_ATTACHMENT / tokens; const truncatedLength = Math.floor(extractedText.length * ratio); extractedText = extractedText.slice(0, truncatedLength) + '\n\n[内容已截断,超过30k Token限制]'; + wasTruncated = true; logger.info('[AIA:AttachmentService] 附件内容截断', { originalTokens: tokens, @@ -107,17 +136,19 @@ export async function uploadAttachment( }); } } catch (error) { + const message = error instanceof Error ? error.message : String(error); logger.error('[AIA:AttachmentService] 文本提取失败', { error, filename: file.filename, }); + extractStatus = 'failed'; + extractError = message; extractedText = '[文档内容提取失败]'; } // 5. 构建附件对象 const attachmentId = `att-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`; - const tokenCount = estimateTokens(extractedText); - const truncated = tokenCount > MAX_TOKENS_PER_ATTACHMENT; + const tokenCount = extractStatus === 'success' ? estimateTokens(extractedText) : 0; const attachment: Attachment = { id: attachmentId, @@ -127,12 +158,12 @@ export async function uploadAttachment( ossUrl: url, textContent: extractedText, tokenCount, - truncated, + truncated: wasTruncated, createdAt: new Date().toISOString(), }; // 6. 将提取的文本存储到缓存(供后续发送消息时使用) - if (extractedText && extractedText !== '[文档内容提取失败]' && extractedText !== '[文档内容为空或无法提取]') { + if (extractStatus === 'success' && extractedText) { await cache.set( `${ATTACHMENT_CACHE_PREFIX}${attachmentId}`, extractedText, @@ -150,6 +181,9 @@ export async function uploadAttachment( id: attachmentId, filename: file.filename, size: file.buffer.length, + mimeType: file.mimetype, + ossUrl: url, + extractStatus, }; await cache.set( `${ATTACHMENT_INFO_CACHE_PREFIX}${attachmentId}`, @@ -157,13 +191,48 @@ export async function uploadAttachment( ATTACHMENT_CACHE_TTL ); + // 8. 附件信息持久化到数据库(真相源) + await (prisma as any).aiaAttachment.upsert({ + where: { id: attachmentId }, + update: { + userId, + conversationId, + filename: file.filename, + mimeType: file.mimetype, + size: file.buffer.length, + ossUrl: url, + textContent: extractStatus === 'success' ? extractedText : null, + extractStatus, + extractError: extractError || null, + tokenCount, + truncated: wasTruncated, + }, + create: { + id: attachmentId, + userId, + conversationId, + filename: file.filename, + mimeType: file.mimetype, + size: file.buffer.length, + ossUrl: url, + textContent: extractStatus === 'success' ? extractedText : null, + extractStatus, + extractError: extractError || null, + tokenCount, + truncated: wasTruncated, + }, + }); + return attachment; } /** * 批量获取附件文本内容 */ -export async function getAttachmentsText(attachmentIds: string[]): Promise { +export async function getAttachmentsText( + attachmentIds: string[], + scope: AttachmentQueryScope = {}, +): Promise { if (!attachmentIds || attachmentIds.length === 0) { return ''; } @@ -174,11 +243,28 @@ export async function getAttachmentsText(attachmentIds: string[]): Promise [r.id, r])); for (const attachmentId of attachmentIds) { try { const cacheKey = `${ATTACHMENT_CACHE_PREFIX}${attachmentId}`; - const text = await cache.get(cacheKey); + const text = await cache.get(cacheKey); if (text) { texts.push(`【附件: ${attachmentId}】\n${text}`); @@ -187,8 +273,22 @@ export async function getAttachmentsText(attachmentIds: string[]): Promise> { if (!attachmentIds || attachmentIds.length === 0) { return []; } const details: Array<{ id: string; filename: string; size: number }> = []; + const missingIds: string[] = []; for (const attachmentId of attachmentIds) { try { const cacheKey = `${ATTACHMENT_INFO_CACHE_PREFIX}${attachmentId}`; - const infoJson = await cache.get(cacheKey); + const infoJson = await cache.get(cacheKey); if (infoJson) { const info = JSON.parse(infoJson); details.push(info); } else { - logger.warn('[AIA:AttachmentService] 附件信息不在缓存中', { attachmentId }); - // 如果缓存中没有,添加一个占位信息 + missingIds.push(attachmentId); + } + } catch (error) { + logger.error('[AIA:AttachmentService] 获取附件信息失败', { attachmentId, error }); + missingIds.push(attachmentId); + } + } + + if (missingIds.length > 0) { + const where: any = { + id: { in: missingIds }, + }; + if (scope.userId) where.userId = scope.userId; + if (scope.conversationId) where.conversationId = scope.conversationId; + + const dbRecords = await (prisma as any).aiaAttachment.findMany({ + where, + select: { + id: true, + filename: true, + size: true, + }, + }); + const typedDbRecords = dbRecords as AiaAttachmentInfoRecord[]; + const dbMap = new Map(typedDbRecords.map((r: AiaAttachmentInfoRecord) => [r.id, r])); + + for (const attachmentId of missingIds) { + const record = dbMap.get(attachmentId); + if (record) { + const info = { + id: record.id, + filename: record.filename, + size: record.size, + }; + details.push(info); + await cache.set( + `${ATTACHMENT_INFO_CACHE_PREFIX}${attachmentId}`, + JSON.stringify(info), + ATTACHMENT_CACHE_TTL, + ); + } else { + logger.warn('[AIA:AttachmentService] 附件信息缓存/数据库均未命中', { attachmentId }); details.push({ id: attachmentId, filename: '未知文件', size: 0, }); } - } catch (error) { - logger.error('[AIA:AttachmentService] 获取附件信息失败', { attachmentId, error }); } } diff --git a/backend/src/modules/aia/services/conversationService.ts b/backend/src/modules/aia/services/conversationService.ts index 8d4e2343..b9e547d7 100644 --- a/backend/src/modules/aia/services/conversationService.ts +++ b/backend/src/modules/aia/services/conversationService.ts @@ -227,7 +227,6 @@ export async function getMessages( return { messages: messages.map(m => { const attachmentsJson = m.attachments as any; - const attachmentIds = attachmentsJson?.ids as string[] | undefined; // 直接从 JSON 字段读取附件详情(不查询数据库) const attachmentDetails = attachmentsJson?.details as Array<{ id: string; filename: string; size: number }> | undefined; @@ -237,10 +236,10 @@ export async function getMessages( role: m.role as 'user' | 'assistant', content: m.content, thinkingContent: m.thinkingContent || undefined, - attachments: attachmentIds, attachmentDetails: attachmentDetails && attachmentDetails.length > 0 ? attachmentDetails : undefined, model: m.model || undefined, tokens: m.tokens || undefined, + isPinned: m.isPinned, createdAt: m.createdAt.toISOString(), }; }), @@ -287,7 +286,10 @@ export async function sendMessageStream( let attachmentsData = undefined; if (attachmentIds && attachmentIds.length > 0) { // 从缓存获取附件详情 - const attachmentDetails = await attachmentService.getAttachmentDetails(attachmentIds); + const attachmentDetails = await attachmentService.getAttachmentDetails(attachmentIds, { + userId, + conversationId, + }); attachmentsData = { ids: attachmentIds, details: attachmentDetails, @@ -309,7 +311,7 @@ export async function sendMessageStream( // 5. 处理附件文本(如果有) let userContent = content; if (attachmentIds && attachmentIds.length > 0) { - const attachmentText = await getAttachmentText(attachmentIds); + const attachmentText = await getAttachmentText(attachmentIds, userId, conversationId); if (attachmentText) { userContent = `${content}\n\n---\n附件内容:\n${attachmentText}`; } @@ -434,9 +436,13 @@ async function buildContextMessages( * 获取附件文本内容 * 从缓存中获取上传时提取的文本 */ -async function getAttachmentText(attachmentIds: string[]): Promise { +async function getAttachmentText( + attachmentIds: string[], + userId: string, + conversationId: string, +): Promise { logger.info('[AIA:ConversationService] 获取附件文本', { attachmentIds }); - return attachmentService.getAttachmentsText(attachmentIds); + return attachmentService.getAttachmentsText(attachmentIds, { userId, conversationId }); } /** diff --git a/backend/src/modules/asl/controllers/researchController.ts b/backend/src/modules/asl/controllers/researchController.ts index e52e1e47..b6c01746 100644 --- a/backend/src/modules/asl/controllers/researchController.ts +++ b/backend/src/modules/asl/controllers/researchController.ts @@ -38,8 +38,8 @@ export async function streamSearch( reply.raw.writeHead(200, { 'Content-Type': 'text/event-stream', 'Cache-Control': 'no-cache', - 'Connection': 'keep-alive', 'Access-Control-Allow-Origin': '*', + 'X-Accel-Buffering': 'no', }); try { diff --git a/backend/src/modules/asl/extraction/controllers/ExtractionController.ts b/backend/src/modules/asl/extraction/controllers/ExtractionController.ts index f377f363..062853ab 100644 --- a/backend/src/modules/asl/extraction/controllers/ExtractionController.ts +++ b/backend/src/modules/asl/extraction/controllers/ExtractionController.ts @@ -202,7 +202,6 @@ export async function streamTaskLogs( reply.raw.writeHead(200, { 'Content-Type': 'text/event-stream', 'Cache-Control': 'no-cache', - Connection: 'keep-alive', 'X-Accel-Buffering': 'no', }); diff --git a/backend/src/modules/dc/tool-c/controllers/StreamAIController.ts b/backend/src/modules/dc/tool-c/controllers/StreamAIController.ts index 81b45c83..c132cd24 100644 --- a/backend/src/modules/dc/tool-c/controllers/StreamAIController.ts +++ b/backend/src/modules/dc/tool-c/controllers/StreamAIController.ts @@ -61,8 +61,7 @@ export class StreamAIController { // 设置SSE响应头 reply.raw.setHeader('Content-Type', 'text/event-stream'); reply.raw.setHeader('Cache-Control', 'no-cache'); - reply.raw.setHeader('Connection', 'keep-alive'); - reply.raw.setHeader('X-Accel-Buffering', 'no'); // 禁用Nginx缓冲 + reply.raw.setHeader('X-Accel-Buffering', 'no'); // 发送步骤消息的辅助函数 const sendStep = (step: number, stepName: string, status: StreamMessage['status'], message: string, data?: any, error?: string, retryCount?: number) => { diff --git a/backend/src/modules/pkb/controllers/chatController.ts b/backend/src/modules/pkb/controllers/chatController.ts index e1e1cf64..7a505337 100644 --- a/backend/src/modules/pkb/controllers/chatController.ts +++ b/backend/src/modules/pkb/controllers/chatController.ts @@ -182,7 +182,7 @@ export async function sendMessageStream( reply.raw.writeHead(200, { 'Content-Type': 'text/event-stream', 'Cache-Control': 'no-cache', - 'Connection': 'keep-alive', + 'X-Accel-Buffering': 'no', }); reply.raw.write(`data: ${JSON.stringify({ content: `\n\n⚠️ **Token数量超限**\n\n${errorMsg}`, @@ -223,8 +223,8 @@ export async function sendMessageStream( reply.raw.writeHead(200, { 'Content-Type': 'text/event-stream', 'Cache-Control': 'no-cache', - 'Connection': 'keep-alive', 'Access-Control-Allow-Origin': '*', + 'X-Accel-Buffering': 'no', }); // 流式输出 diff --git a/backend/src/modules/ssa/routes/chat.routes.ts b/backend/src/modules/ssa/routes/chat.routes.ts index e7ea6ffb..9ceda0bd 100644 --- a/backend/src/modules/ssa/routes/chat.routes.ts +++ b/backend/src/modules/ssa/routes/chat.routes.ts @@ -43,11 +43,10 @@ export default async function chatRoutes(app: FastifyInstance) { return reply.status(400).send({ error: '消息内容不能为空' }); } - // SSE 响应头 + // SSE 响应头(不设置 Connection,HTTP/2 中为禁止头部) reply.raw.writeHead(200, { 'Content-Type': 'text/event-stream', 'Cache-Control': 'no-cache', - 'Connection': 'keep-alive', 'Access-Control-Allow-Origin': '*', 'X-Accel-Buffering': 'no', }); diff --git a/backend/src/modules/ssa/routes/session.routes.ts b/backend/src/modules/ssa/routes/session.routes.ts index 5bc55635..38649e1b 100644 --- a/backend/src/modules/ssa/routes/session.routes.ts +++ b/backend/src/modules/ssa/routes/session.routes.ts @@ -266,8 +266,8 @@ export default async function sessionRoutes(app: FastifyInstance) { reply.raw.writeHead(200, { 'Content-Type': 'text/event-stream', 'Cache-Control': 'no-cache', - 'Connection': 'keep-alive', 'Access-Control-Allow-Origin': '*', + 'X-Accel-Buffering': 'no', }); const send = (type: string, data: any) => { diff --git a/backend/src/modules/ssa/routes/workflow.routes.ts b/backend/src/modules/ssa/routes/workflow.routes.ts index 610395fd..8e7ff57e 100644 --- a/backend/src/modules/ssa/routes/workflow.routes.ts +++ b/backend/src/modules/ssa/routes/workflow.routes.ts @@ -276,8 +276,8 @@ export default async function workflowRoutes(app: FastifyInstance) { reply.raw.writeHead(200, { 'Content-Type': 'text/event-stream', 'Cache-Control': 'no-cache', - 'Connection': 'keep-alive', - 'Access-Control-Allow-Origin': '*' + 'Access-Control-Allow-Origin': '*', + 'X-Accel-Buffering': 'no', }); // 发送初始连接确认 diff --git a/docs/03-业务模块/SSA-智能统计分析/07-统计专家配置/SAE 生产环境 SSE 故障诊断与终极防御.md b/docs/03-业务模块/SSA-智能统计分析/07-统计专家配置/SAE 生产环境 SSE 故障诊断与终极防御.md new file mode 100644 index 00000000..e46e06db --- /dev/null +++ b/docs/03-业务模块/SSA-智能统计分析/07-统计专家配置/SAE 生产环境 SSE 故障诊断与终极防御.md @@ -0,0 +1,128 @@ +# **SAE 生产环境 SSE 协议故障诊断与终极防御指南** + +**故障现象:** SAE 部署后,前端偶发 net::ERR\_HTTP2\_PROTOCOL\_ERROR,后端日志显示请求已接收甚至已完成。第二次请求恢复正常。 + +**故障定性:** 云原生环境下的经典长连接断裂与 HTTP/2 协议翻译冲突。 + +**核心认知:** 在 Serverless 容器中,不要试图“防止连接断开”(做不到),必须通过“前端智能重连”和“后端优雅停机”来容错。 + +## **一、 为什么你们的修复“治标不治本”?** + +你们已经做了非常出色的网络层修复: + +1. **去除了 Connection: keep-alive**:防止 HTTP/2 严格模式下因为禁用的连接专有头部(Connection-Specific Headers)导致强制 RST\_STREAM。 +2. **条件化了 Connection: Upgrade**:防止 Nginx 把普通的 SSE 长轮询当成 WebSocket 升级,导致协议错乱。 + +**为什么还是会偶发失败?** + +因为当 SAE 进行滚动更新(Rolling Update)时,旧的 Node.js Pod 会收到 SIGTERM 信号准备退出。此时,阿里云 CLB(负载均衡)的连接池中可能还有几十条保持活跃的 HTTP/2 物理连接。 + +如果浏览器复用了这条即将被回收的 HTTP/2 通道来发起新的 SSE 请求,或者旧 Pod 直接被底层硬杀(Kill \-9),Nginx 往后端转发时会遭遇 Connection Refused 或 Broken Pipe。Nginx 无法优雅地把这个错误包装成 HTTP 状态码,只能粗暴地向客户端发送一个 HTTP/2 GOAWAY 帧,浏览器收到后就会报出 ERR\_HTTP2\_PROTOCOL\_ERROR。 + +## **二、 终极防御三板斧(Cloud-Native Resilience)** + +要 100% 消除用户的报错感知,必须在前端、Nginx 和后端落地以下三个机制: + +### **🪓 第一斧:前端 SSE 智能断线重连 (Intelligent Retry)** + +这是解决问题的绝对核心!对于大模型对话(Chat)和分析执行(Execute),前端绝不能因为一次底层网络闪断就把红色错误拍在用户脸上。 + +**改造方案:** 在 useSSAChat.ts 或底层 SSE 请求库中(推荐使用微软的 @microsoft/fetch-event-source 库,它自带强大的重试机制),拦截网络层错误并静默重试。 + +import { fetchEventSource } from '@microsoft/fetch-event-source'; + +async function startSseStream(url: string, payload: any) { + let retryCount \= 0; + const MAX\_RETRIES \= 3; + + await fetchEventSource(url, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(payload), + + // 关键:拦截底层 HTTP/2 协议错误并决定是否重试 + async onopen(response) { + if (response.ok) retryCount \= 0; // 连接成功,重置计数器 + }, + + onerror(err) { + retryCount++; + if (retryCount \> MAX\_RETRIES) { + throw err; // 超过重试次数,才真正向用户报错 + } + // 记录日志,但不抛出异常 + console.warn(\`\[SSE\] 网络闪断,正在进行第 ${retryCount} 次重连...\`, err); + // 返回一个延迟时间 (指数退避) 告诉底层库多久后重连 + return Math.min(1000 \* (2 \*\* retryCount), 5000); + }, + + onmessage(msg) { + // 处理正常的 SSE 事件 + } + }); +} + +*效果:即便 SAE 正在滚动部署,CLB 断开了连接,浏览器会在 1 秒内静默发起第二次请求,此时 CLB 已经指向了新 Pod,用户毫无感知,只会觉得“这次思考慢了一秒”。* + +### **🪓 第二斧:Nginx 层彻底关闭缓存 (Disable Buffering)** + +如果你们的 Ingress 或前置 Nginx 开启了代理缓冲(Proxy Buffering),它是 SSE 流式输出的绝对天敌,也是加剧协议错误的元凶。 + +**改造方案:** 必须为 /api/v1/ssa/\*/chat 等 SSE 接口单独关闭 Nginx 缓冲。 + +location \~ ^/api/v1/ssa/.\*(?:chat|stream)$ { + proxy\_pass http://backend\_upstream; + + \# 以下三行是 SSE 救命神药 + proxy\_http\_version 1.1; + proxy\_buffering off; \# 严禁 Nginx 缓存 Chunk 数据 + proxy\_cache off; \# 严禁缓存 + chunked\_transfer\_encoding on; + + \# 彻底清理可能导致 H2 冲突的 Headers + proxy\_set\_header Connection ''; + + \# 延长超时时间(应对大模型深度思考) + proxy\_read\_timeout 120s; +} + +### **🪓 第三斧:Node.js 后端优雅停机 (Graceful Shutdown)** + +为什么部署期间连接断裂那么惨烈?因为 Node.js 默认在收到 SAE 的部署停止信号(SIGTERM)时会立刻自杀。 + +**改造方案:** 在 main.ts 或 app.module.ts 中实现 Graceful Shutdown。当收到退出信号时,拒绝新的请求,但**给已经建立的 SSE 长连接留出 30 秒的执行时间**。 + +// Node.js Express/NestJS 优雅停机示例 +let isShuttingDown \= false; + +// 1\. 探针接口:一旦开始停机,立刻告诉 SAE/CLB "我不健康了",不要再给我派发新请求 +app.get('/health', (req, res) \=\> { + if (isShuttingDown) { + return res.status(503).send('Shutting down'); + } + res.status(200).send('OK'); +}); + +// 2\. 捕获系统终止信号 +process.on('SIGTERM', () \=\> { + console.log('\[System\] 收到 SIGTERM 信号,准备优雅停机...'); + isShuttingDown \= true; + + // 停止接收新连接 + server.close(() \=\> { + console.log('\[System\] 所有现有连接已处理完毕,安全退出。'); + process.exit(0); + }); + + // 强制超时机制:如果过了 30 秒还有 SSE 流没跑完(比如陷入死循环),强制退出 + setTimeout(() \=\> { + console.error('\[System\] 优雅停机超时 (30s),强制退出。'); + process.exit(1); + }, 30000); +}); + +## **三、 架构师总结** + +你们在本地开发环境(HTTP/1.1 直连,无负载均衡,无 Pod 销毁)永远无法复现这个问题。 + +**这不单纯是网络配置问题,这是 Serverless 架构下的“状态保持”冲突。** 请优先让前端团队加上 **@microsoft/fetch-event-source 的静默重连机制**,这不仅能解决 SAE 部署期间的报错,还能解决未来医院网络环境不稳定、用户切网(WIFI 换 5G)导致的所有诡异长连接断开问题。这是投入产出比(ROI)最高的一记绝杀。 \ No newline at end of file diff --git a/docs/05-部署文档/00-阿里云SAE最新真实状态记录.md b/docs/05-部署文档/00-阿里云SAE最新真实状态记录.md index 12f98f1a..f2018af1 100644 --- a/docs/05-部署文档/00-阿里云SAE最新真实状态记录.md +++ b/docs/05-部署文档/00-阿里云SAE最新真实状态记录.md @@ -1,7 +1,7 @@ # 🚀 AI临床研究平台 - 阿里云SAE最新真实状态记录 > **文档用途**:记录阿里云SAE服务器最新真实状态 + 每次部署记录 -> **最后更新**:2026-03-05 +> **最后更新**:2026-03-09 > **维护人员**:开发团队 > **说明**:本文档准确记录SAE上所有应用的当前状态,包括内网地址、镜像版本、用户名密码等关键资源信息 @@ -11,11 +11,11 @@ | 服务名称 | 部署状态 | 镜像版本 | 部署位置 | 最后更新时间 | |---------|---------|---------|---------|-------------| -| **PostgreSQL数据库** | ✅ 运行中 | PostgreSQL 15 + 插件 | RDS | 2026-03-05 | -| **前端Nginx服务** | ✅ 运行中 | **v2.5** | SAE | 2026-03-05 | +| **PostgreSQL数据库** | ✅ 运行中 | PostgreSQL 15 + 插件 | RDS | 2026-03-09 | +| **前端Nginx服务** | ✅ 运行中 | **v2.6** | SAE | 2026-03-09 | | **Python微服务** | ✅ 运行中 | **v1.2** | SAE | 2026-02-27 | -| **Node.js后端** | ✅ 运行中 | **v2.8** | SAE | 2026-03-05 | -| **R统计引擎** | ✅ 运行中 | **v1.0.1** | SAE | 2026-02-27 | +| **Node.js后端** | ✅ 运行中 | **v2.9** | SAE | 2026-03-09 | +| **R统计引擎** | ✅ 运行中 | **v1.0.2** | SAE | 2026-03-09 | | **Dify AI服务** | ⚠️ 已废弃 | - | - | 使用pgvector替代 | --- @@ -36,9 +36,9 @@ | 仓库名称 | 最新版本 | 镜像大小 | VPC地址 | |---------|---------|---------|---------| | **python-extraction** | **v1.2** | ~1.1GB | `crpi-cd5ij4pjt65mweeo-vpc.cn-beijing.personal.cr.aliyuncs.com/ai-clinical/python-extraction:v1.2` | -| **ssa-r-statistics** | **v1.0.1** | ~1.8GB | `crpi-cd5ij4pjt65mweeo-vpc.cn-beijing.personal.cr.aliyuncs.com/ai-clinical/ssa-r-statistics:v1.0.1` | -| **ai-clinical_frontend-nginx** | **v2.5** | ~50MB | `crpi-cd5ij4pjt65mweeo-vpc.cn-beijing.personal.cr.aliyuncs.com/ai-clinical/ai-clinical_frontend-nginx:v2.5` | -| **backend-service** | **v2.8** | ~838MB | `crpi-cd5ij4pjt65mweeo-vpc.cn-beijing.personal.cr.aliyuncs.com/ai-clinical/backend-service:v2.8` | +| **ssa-r-statistics** | **v1.0.2** | ~2.1GB | `crpi-cd5ij4pjt65mweeo-vpc.cn-beijing.personal.cr.aliyuncs.com/ai-clinical/ssa-r-statistics:v1.0.2` | +| **ai-clinical_frontend-nginx** | **v2.6** | ~96MB | `crpi-cd5ij4pjt65mweeo-vpc.cn-beijing.personal.cr.aliyuncs.com/ai-clinical/ai-clinical_frontend-nginx:v2.6` | +| **backend-service** | **v2.9** | ~897MB | `crpi-cd5ij4pjt65mweeo-vpc.cn-beijing.personal.cr.aliyuncs.com/ai-clinical/backend-service:v2.9` | --- @@ -91,7 +91,7 @@ postgresql://airesearch:Xibahe%40fengzhibo117@pgm-2zex1m2y3r23hdn5.pg.rds.aliyun | 数据库 | 环境 | Schema数量 | 表数量 | 插件 | 用途 | |--------|------|-----------|-------|------|------| -| `ai_clinical_research_test` | 测试环境(当前) | 16 | **86** | pg_bigm, pgvector | SAE测试环境 | +| `ai_clinical_research_test` | 测试环境(当前) | 16 | **86** | pg_bigm, pgvector | SAE测试环境(24个迁移已应用) | | `ai_clinical_research` | 生产环境(备用) | 11 | ~34 | pg_bigm, pgvector | 未来正式上线 | **Schema架构**(16个业务Schema - 测试数据库): @@ -127,10 +127,10 @@ postgresql://airesearch:Xibahe%40fengzhibo117@pgm-2zex1m2y3r23hdn5.pg.rds.aliyun | 应用名称 | 状态 | 规格 | 实例数 | 端口 | 内网地址 | 镜像版本 | |---------|------|------|-------|------|---------|---------| -| **r-statistics-test** | ✅ 运行中 | 1核2GB | 1 | 8080 | `http://172.17.173.101:8080` | **v1.0.1** | +| **r-statistics-test** | ✅ 运行中 | 1核2GB | 1 | 8080 | `http://172.17.197.22:8080` | **v1.0.2** | | **python-extraction-test** | ✅ 运行中 | **2核4GB** | 1 | 8000 | `http://172.17.173.102:8000` | **v1.2** | -| **nodejs-backend-test** | ✅ 运行中 | **2核4GB** | 1 | 3001 | `http://172.17.173.106:3001` | **v2.8** | -| **frontend-nginx-service** | ✅ 运行中 | 0.5核1GB | 1 | 80 | `http://172.17.173.107:80` | **v2.5** | +| **nodejs-backend-test** | ✅ 运行中 | **2核4GB** | 1 | 3001 | `http://172.17.173.108:3001` | **v2.9** | +| **frontend-nginx-service** | ✅ 运行中 | 0.5核1GB | 1 | 80 | `http://172.17.197.23:80` | **v2.6** | **环境变量配置**: @@ -144,7 +144,7 @@ DATABASE_URL=postgresql://airesearch:Xibahe%40fengzhibo117@pgm-2zex1m2y3r23hdn5. EXTRACTION_SERVICE_URL=http://172.17.173.102:8000 # R统计引擎地址 -R_SERVICE_URL=http://172.17.173.101:8080 +R_SERVICE_URL=http://172.17.197.22:8080 # OSS配置 OSS_ACCESS_KEY_ID=LTAI5tB2Dt3NdvBL3G7nYGv7 @@ -191,7 +191,7 @@ LEGACY_MYSQL_DATABASE=xzyx_online **前端Nginx(frontend-nginx-service)**: ```bash -BACKEND_SERVICE_HOST=172.17.173.106 +BACKEND_SERVICE_HOST=172.17.173.108 BACKEND_SERVICE_PORT=3001 ``` @@ -364,6 +364,49 @@ AIclinicalresearch/extraction_service/ ## 🔄 四、部署历史记录 +### 2026-03-09(0309部署 - 数据库4迁移 + R/后端/前端全量更新) + +#### 部署概览 +- **部署时间**:2026-03-09 +- **部署范围**:数据库迁移(4项) + 种子数据(3项) + R统计引擎 + Node.js后端 + 前端Nginx +- **主要变更**:RVW 4维评审 + SSA Agent 优化 + IIT 去重护栏 + 认证互踢强一致 + 全局心跳 + +#### 数据库变更(7项) +- ✅ DB-1:modules 表 seed 新增 ASL_SR 模块 +- ✅ DB-2:RVW Prompt 新增 DATA_VALIDATION + CLINICAL +- ✅ DB-3:ReviewTask 新增 error_details JSONB 字段 +- ✅ DB-4:SSA execution_mode 默认改为 agent + 历史数据更新 +- ✅ DB-5:SSA Agent Prompt 种子数据(PLANNER + CODER) +- ✅ DB-6:IIT eQuery open 去重护栏 + 唯一索引 +- ✅ DB-7:users 表新增 token_version 列(单账号互踢强一致) + +#### R统计引擎更新(v1.0.1 → v1.0.2) +- ✅ 新增 `/api/v1/execute-code` 端点 + 结构化错误处理 + AST 语法预检 +- ✅ 镜像推送:`ssa-r-statistics:v1.0.2` +- ✅ 内网地址变更:`172.17.173.101` → `172.17.197.22` + +#### Node.js后端更新(v2.8 → v2.9) +- ✅ 13 项变更:Deep Research V2 + ASL_SR 中间件 + RVW 4维评审 + SSA Agent 优化 + IIT 幂等 + 认证互踢 +- ✅ 镜像推送:`backend-service:v2.9` +- ✅ 内网地址变更:`172.17.173.106` → `172.17.173.108` + +#### 前端Nginx更新(v2.5 → v2.6) +- ✅ 10 项变更:ASL 手风琴导航 + Deep Research 历史 + RVW 4Tab + SSA Agent UI + 全局心跳互踢 +- ✅ 镜像推送:`ai-clinical_frontend-nginx:v2.6` +- ✅ 内网地址变更:`172.17.173.107` → `172.17.197.23` + +#### 环境变量同步 +- ✅ `nodejs-backend-test` 的 `R_SERVICE_URL` 更新为 `http://172.17.197.22:8080` +- ✅ `frontend-nginx-service` 的 `BACKEND_SERVICE_HOST` 更新为 `172.17.173.108` +- ℹ️ CLB 由阿里云自动更新,无需手动操作 + +#### 文档产出 +- ✅ `0309部署/01-数据库部署完成总结.md`(完整版) +- ✅ `00-阿里云SAE最新真实状态记录.md`(更新) +- ✅ `03-待部署变更清单.md`(清零移入历史) + +--- + ### 2026-03-05(0305部署 - 登录踢人 + 权限体系升级 + SSA双通道 + UI优化) #### 部署概览 @@ -681,5 +724,5 @@ AIclinicalresearch/extraction_service/ --- > **提示**:本文档记录SAE服务器的最新真实状态,每次部署后必须更新! -> **最后更新**:2026-03-05 -> **当前版本**:前端v2.5 | 后端v2.8 | Python v1.2 | R统计v1.0.1 | PostgreSQL 15 +> **最后更新**:2026-03-09 +> **当前版本**:前端v2.6 | 后端v2.9 | Python v1.2 | R统计v1.0.2 | PostgreSQL 15 diff --git a/docs/05-部署文档/03-待部署变更清单.md b/docs/05-部署文档/03-待部署变更清单.md index cb02bbc2..43a06a7e 100644 --- a/docs/05-部署文档/03-待部署变更清单.md +++ b/docs/05-部署文档/03-待部署变更清单.md @@ -3,7 +3,7 @@ > **用途**: 开发过程中实时记录所有待部署的变更,下次部署时按此清单逐项执行 > **维护规则**: 每次修改 Schema / 新增依赖 / 改配置时,**立即**在此文档追加记录 > **Cursor Rule**: `.cursor/rules/deployment-change-tracking.mdc` 会自动提醒 -> **最后清零**: 2026-03-05(0305 部署完成后清零) +> **最后清零**: 2026-03-09(0309 部署完成后清零) --- @@ -15,46 +15,23 @@ | # | 变更内容 | 迁移文件 | 优先级 | 备注 | |---|---------|---------|--------|------| -| DB-1 | modules 表 seed 新增 ASL_SR 模块(系统综述项目) | `backend/scripts/seed-modules.js` | 高 | 部署后需执行 `node scripts/seed-modules.js`,并在运营管理端为目标用户/租户开通 | -| DB-2 | prompt_templates 表新增 RVW_DATA_VALIDATION + RVW_CLINICAL 两个 Prompt | `backend/scripts/migrate-rvw-prompts.ts` | 高 | 部署后需执行 `npx tsx scripts/migrate-rvw-prompts.ts`,运营管理端可配置修改 | -| DB-3 | ReviewTask 表新增 `error_details` JSONB 字段(存储 Skill 级失败详情) | `prisma/migrations/20260307_add_error_details_to_review_task/migration.sql` | 高 | 支持 partial_completed 状态,记录每个失败/超时 Skill 的名称和原因 | -| DB-4 | SSA execution_mode 默认值改为 `agent` + 已有 session 全部更新 | `prisma/migrations/20260308_default_agent_mode/migration.sql` | 高 | ALTER DEFAULT + UPDATE 旧数据;QPER UI 入口已移除 | -| DB-5 | SSA Agent Prompt 种子数据(SSA_AGENT_PLANNER / SSA_AGENT_CODER) | `prisma/seed-ssa-agent-prompts.ts` | 高 | 部署后执行 `npx tsx prisma/seed-ssa-agent-prompts.ts`;幂等可重复执行 | -| DB-6 | IIT eQuery open 集合去重护栏(历史收敛 + open 唯一索引) | `prisma/migrations/20260308_add_iit_equery_open_dedupe_guard/migration.sql` | 高 | 先自动将历史重复 open eQuery 收敛为 `auto_closed`,再建立部分唯一索引防止未来重复 | -| DB-7 | users 表新增 `token_version`(单账号互踢强一致) | `prisma/migrations/20260309_add_token_version_to_platform_users/migration.sql` | 高 | 登录原子递增版本号,Access/Refresh Token 带版本,旧会话立即失效 | +| DB-1 | AIA 新增 `attachments` 持久化表(附件文本真相源) | `prisma/migrations/20260309_add_aia_attachments_persistence/migration.sql` | 高 | 解决“附件仅缓存”导致偶发“内容已过期或不存在”,支持缓存 miss 回源数据库 | ### 后端变更 (Node.js) | # | 变更内容 | 涉及文件 | 需要操作 | 备注 | |---|---------|---------|---------|------| -| BE-1 | Deep Research V2.0 新增历史列表 + 删除接口 + getTask 鉴权修复 | `deepResearchController.ts`, `routes/index.ts` | 重新构建镜像 | GET /research/v2/tasks, DELETE /research/tasks/:taskId, getTask 增加 userId 校验 | -| BE-2 | SR 相关路由(projects/literatures/screening/fulltext-screening/extraction/charting/meta-analysis)增加 `requireModule('ASL_SR')` 中间件 | `asl/routes/index.ts`, `extraction/routes/index.ts`, `charting/routes/index.ts`, `meta-analysis/routes/index.ts` | 重新构建镜像 | 需先完成 DB-1 seed,否则无 ASL_SR 模块会 403 | -| BE-3 | Unifuncs DeepSearch API 从 S2 升级至 S3(新增 `language: "zh"`) | `unifuncsSseClient.ts`, `unifuncsAsyncClient.ts` | 重新构建镜像 | 通过 `UNIFUNCS_MODEL` 环境变量控制,默认 s3,设为 s2 可降级 | -| BE-4 | RVW 数据验证增加 LLM 核查通道(DataForensicsSkill 增强) | `DataForensicsSkill.ts`, `prompt.fallbacks.ts` | 重新构建镜像 | 规则验证完成后批量调用 LLM 核查所有表格,失败时降级为纯规则验证 | -| BE-5 | RVW 新增临床专业评估维度(ClinicalAssessmentSkill) | `clinicalService.ts`(新), `ClinicalAssessmentSkill.ts`(新), `library/index.ts`, `profile.ts`, `utils.ts`, `reviewWorker.ts`, `reviewService.ts` | 重新构建镜像 | 新增 clinical Agent + Skill,存储在 contextData.clinicalReview | -| BE-6 | RVW 稳定性增强:SkillExecutor Promise.allSettled + partial_completed 状态 + errorDetails | `executor.ts`, `reviewWorker.ts`, `reviewService.ts`, `reviewController.ts`, `types/index.ts` | 重新构建镜像 | 并行 Skill 故障隔离,部分模块失败时仍返回成功模块结果,新增 `partial_completed` 任务状态 | -| BE-7 | DataForensicsSkill LLM 核查增加独立 60s 超时 | `DataForensicsSkill.ts` | 重新构建镜像 | LLM 核查超时不阻塞整体 Skill,graceful 降级为纯规则验证 | -| BE-8 | SSA Agent 通道体验优化(方案 B 左右职责分离 + 10 项 Bug 修复) | `ChatHandlerService.ts`, `AgentCoderService.ts`, `chat.routes.ts` | 重新构建镜像 | 视线牵引 Prompt + maxTokens 8000 + 重试流式生成 + consoleOutput 类型防御 + Prompt 铁律 + parseCode 健壮化 | -| BE-9 | Phase 5A:CoderAgent 防错护栏(4 项改动) | `AgentCoderService.ts`, `TokenTruncationService.ts`, `chat.routes.ts` | 重新构建镜像 | XML 标签提取 + 防御性编程 Prompt + 高保真 Schema 注入 + token 配额 2500 + 后端强制 Agent 模式 | -| BE-10 | SSA Agent 核心 Prompt 接入运营管理端(PlannerAgent + CoderAgent) | `AgentPlannerService.ts`, `AgentCoderService.ts`, `prompt.fallbacks.ts` | 重新构建镜像 | 硬编码 → `PromptService.get()` 动态加载;三级容灾:DB → 缓存 → fallback;需先完成 DB-5 | -| BE-11 | IIT eQuery 幂等写入 + 安全去重工具脚本 | `iitEqueryService.ts`, `scripts/dedupe_open_equeries.ts`, `package.json` | 重新构建镜像 | `createBatch` 改为 `ON CONFLICT DO NOTHING`(open 集合),新增 `npx tsx scripts/dedupe_open_equeries.ts [--apply]` | -| BE-12 | IIT 实时工作流事件名称友好化兜底 + AI 对话证据块强制补齐 | `iitQcCockpitController.ts`, `ChatOrchestrator.ts` | 重新构建镜像 | 时间线事件名采用 event_label/cachedRules/fallback 三层映射;回答含“证据:”时若无明细则自动补齐,避免空证据块 | -| BE-13 | 认证链路改造为数据库强一致互踢(去缓存版 tokenVersion) | `auth.service.ts`, `auth.middleware.ts`, `jwt.service.ts` | 重新构建镜像 | 修复并发登录竞态导致多端同时在线:鉴权改为 `tokenVersion === users.token_version` | +| BE-1 | 移除全部 SSE 端点 `Connection: keep-alive` 响应头(HTTP/2 禁止头部) | `chat.routes.ts`, `session.routes.ts`, `workflow.routes.ts`, `OpenAIStreamAdapter.ts`, `ExtractionController.ts`, `researchController.ts`, `conversationController.ts`, `chatController.ts`×2, `StreamAIController.ts` | 重新构建镜像 | 修复 SAE 环境下 SSE 流式响应 `ERR_HTTP2_PROTOCOL_ERROR` | +| BE-2 | 优雅停机增强:健康检查停机时返回 503 + 30s 强制超时兜底 | `healthCheck.ts`, `health/index.ts`, `index.ts` | 重新构建镜像 | CLB 在滚动更新时不再向濒死 Pod 派发请求 | +| BE-3 | AIA 附件链路稳定性修复(上传落库 + 发送回源 + 错误分层) | `aia/services/attachmentService.ts`, `aia/services/conversationService.ts` | 重新构建镜像 | 上传阶段持久化附件文本与提取状态;发送时缓存未命中自动回源 DB 并回填,显著降低“对话中途上传附件无法识别”概率 | +| BE-4 | 生产环境缓存安全护栏:禁止 `CACHE_TYPE=memory` 启动 | `config/env.ts` | 重新构建镜像 | 防止多实例缓存不共享导致附件/会话等状态偶发丢失,符合云原生规范 | ### 前端变更 | # | 变更内容 | 涉及文件 | 需要操作 | 备注 | |---|---------|---------|---------|------| -| FE-1 | ASL 左侧导航栏重构为互斥手风琴(Deep Research 历史记录 + SR 工具导航) | `ASLLayout.tsx`, `asl-sidebar.css`(新建), `DeepResearchPage.tsx`, `asl/index.tsx` | 重新构建镜像 | Panel A: 智能文献检索历史;Panel B: 系统综述项目(5 工具) | -| FE-2 | Deep Research 历史记录功能(API 客户端 + 类型定义 + URL 任务恢复) | `asl/api/index.ts`, `types/deepResearch.ts`, `DeepResearchPage.tsx`, `asl/index.tsx` | 重新构建镜像 | 新增 listDeepResearchTasks / deleteDeepResearchTask API;新增 /research/deep/:taskId 路由 | -| FE-3 | Panel B SR 工具导航权限控制(hasModule('ASL_SR')) | `ASLLayout.tsx`, `asl-sidebar.css` | 重新构建镜像 | 未开通时显示"请联系管理员开通";已开通显示 5 个 SR 工具导航项 | -| FE-4 | RVW 数据验证报告增加 LLM 核查结果展示 | `ForensicsReport.tsx`, `rvw/types/index.ts` | 重新构建镜像 | 总览展示完整 LLM 报告,每个表格卡片展开后显示对应 AI 核查结果(Markdown) | -| FE-5 | RVW 新增临床专业评估 Tab + Agent 选择项 | `ClinicalReport.tsx`(新), `AgentModal.tsx`, `TaskDetail.tsx`, `rvw/types/index.ts` | 重新构建镜像 | 共 4 个 Tab:稿约规范性/方法学/数据验证/临床评估;Word 导出包含临床评估章节 | -| FE-6 | RVW 前端支持 partial_completed 状态(部分完成) | `TaskDetail.tsx`, `TaskTable.tsx`, `rvw/types/index.ts` | 重新构建镜像 | 琥珀色警告横幅展示失败模块详情,列表页显示"部分完成"标签,支持查看已完成模块的报告 | -| FE-7 | SSA Agent 通道体验优化(方案 B + 动态 UI) | `AgentCodePanel.tsx`, `SSAChatPane.tsx`, `SSAWorkspacePane.tsx`, `SSACodeModal.tsx`, `useSSAChat.ts`, `ssaStore.ts`, `ssa.css` | 重新构建镜像 | 左右职责分离 + JWT 刷新 + 重试代码展示 + 错误信息展示 + 进度条同步 + 导出/查看代码按钮恢复 + ExecutingProgress 组件 | -| FE-8 | SSA 默认 Agent 模式 + 查看代码修复 + 分析历史卡片 | `SSAChatPane.tsx`, `SSAWorkspacePane.tsx`, `useSSAChat.ts`, `ssaStore.ts` | 重新构建镜像 | 移除 ModeToggle + 默认 agent + 查看代码走 Modal + 分析完成后对话插入可点击结果卡片 + ChatIntentType 扩展 system | -| FE-9 | IIT D1 筛选入选表“不合规条目”规则名称友好显示 | `EligibilityTable.tsx` | 重新构建镜像 | 不合规条目由 ruleId 显示改为 ruleName 优先,减少技术标识符暴露 | -| FE-10 | 全局会话心跳(10s)提升异地登录互踢感知时效 | `framework/auth/useAuthHeartbeat.ts`, `App.tsx`, `framework/auth/index.ts` | 重新构建镜像 | 页面可见时心跳、隐藏时暂停、切回前台立即校验;旧端通常 10 秒内感知被踢 | +| FE-1 | Nginx `Connection` 头部条件化(`map $http_upgrade $connection_upgrade`) | `nginx.conf` | 重新构建镜像 | SSE 请求不再携带错误的 `Connection: upgrade`,WebSocket 不受影响 | +| FE-2 | SSA 对话网络错误友好提示 + 指数退避自动重试 2 次 + 手动重试按钮 | `useSSAChat.ts`, `SSAChatPane.tsx`, `ssa.css` | 重新构建镜像 | 瞬时网络错误自动重试 2 次(2s/4s 指数退避),失败后中文友好提示 + 蓝色重试按钮 | ### Python 微服务变更 @@ -66,15 +43,13 @@ | # | 变更内容 | 涉及文件 | 需要操作 | 备注 | |---|---------|---------|---------|------| -| R-1 | 新增 POST /api/v1/execute-code 端点(Agent 通道任意 R 代码执行) | `plumber.R` | 重新构建镜像 | 含超时 + 沙箱限制 | -| R-2 | Agent 结构化错误处理增强(20+ 模式匹配 + format_agent_error) | `plumber.R`, `utils/error_codes.R` | 重新构建镜像 | withCallingHandlers 捕获 warnings/messages + 行号提取 + 错误分类 + 修复建议 | -| R-3 | AST 语法预检(parse() 前置于 eval()) | `plumber.R` | 重新构建镜像 | 语法错误秒级返回 E_SYNTAX + 行号 + 上下文代码,不进入沙箱执行 | +| — | *暂无* | | | | ### 环境变量 / 配置变更 | # | 变更内容 | 服务 | 变量名 | 备注 | |---|---------|------|--------|------| -| ENV-1 | Unifuncs 模型版本控制(可选,不配置则默认 s3) | nodejs-backend | `UNIFUNCS_MODEL=s3` | 降级时改为 `s2`,无需重新部署代码 | +| — | *暂无* | | | | ### 基础设施变更 @@ -112,6 +87,18 @@ ## 历史(已部署,仅供追溯) +### 0309 部署已清零项 + +| # | 变更内容 | 部署日期 | 结果 | +|---|---------|---------|------| +| DB | 4 个 Prisma 迁移(error_details + eQuery 去重 + agent 默认 + token_version) | 2026-03-09 | ✅ | +| DB | 3 个种子脚本(modules ASL_SR + RVW Prompt×2 + SSA Agent Prompt×2) | 2026-03-09 | ✅ | +| R | R 统计引擎 v1.0.1 → v1.0.2(execute-code + 错误处理 + AST 预检,3 项变更) | 2026-03-09 | ✅ | +| BE | Node.js v2.8 → v2.9(Deep Research V2 + RVW 4维 + SSA Agent + IIT 幂等 + 认证互踢,13 项变更) | 2026-03-09 | ✅ | +| FE | 前端 v2.5 → v2.6(ASL 手风琴 + RVW 4Tab + SSA Agent UI + 全局心跳,10 项变更) | 2026-03-09 | ✅ | +| ENV | nodejs-backend-test: R_SERVICE_URL → http://172.17.197.22:8080 | 2026-03-09 | ✅ | +| ENV | frontend-nginx-service: BACKEND_SERVICE_HOST → 172.17.173.108 | 2026-03-09 | ✅ | + ### 0305 部署已清零项 | # | 变更内容 | 部署日期 | 结果 | diff --git a/docs/05-部署文档/0309部署/01-数据库部署完成总结.md b/docs/05-部署文档/0309部署/01-数据库部署完成总结.md new file mode 100644 index 00000000..dbbb5e9b --- /dev/null +++ b/docs/05-部署文档/0309部署/01-数据库部署完成总结.md @@ -0,0 +1,182 @@ +# 2026年3月9日部署完成总结 + +> **部署日期**:2026-03-09 +> **部署范围**:数据库迁移(4项) + 种子数据(3项) + R统计引擎 + Node.js后端 + 前端Nginx +> **部署状态**:✅ 全部完成 +> **文档日期**:2026-03-09 + +--- + +## 部署成果一览 + +### 服务版本对比 + +| 服务 | 部署前 | 部署后 | 变更类型 | +|------|--------|--------|---------| +| PostgreSQL(RDS) | 20/24 迁移 | **24/24 迁移** | 4 个 Prisma 迁移 + 3 个种子脚本 | +| R统计引擎 | v1.0.1 | **v1.0.2** | 新增 execute-code + 错误处理 + AST 预检 | +| Node.js后端 | v2.8 | **v2.9** | 13 项后端变更(RVW/SSA/IIT/认证) | +| 前端Nginx | v2.5 | **v2.6** | 10 项前端变更(ASL/RVW/SSA/IIT/心跳) | +| Python微服务 | v1.2 | v1.2(不变) | 无变更 | + +### 内网地址变更 + +| 服务 | 部署前地址 | 部署后地址 | 状态 | +|------|-----------|-----------|------| +| R统计引擎 | `172.17.173.101:8080` | `172.17.197.22:8080` | ✅ 已变更 | +| Node.js后端 | `172.17.173.106:3001` | `172.17.173.108:3001` | ✅ 已变更 | +| 前端Nginx | `172.17.173.107:80` | `172.17.197.23:80` | ✅ 已变更 | +| Python微服务 | `172.17.173.102:8000` | `172.17.173.102:8000` | 不变 | + +--- + +## 一、数据库部署 + +### 1.1 部署前准备 + +| 项目 | 值 | +|------|---| +| 备份方式 | `pg_dump --format=custom` via Docker 容器 | +| 备份文件 | `backup_before_0309_deploy.dump` | +| 文件大小 | 46.9 MB | +| 备份时间 | 2026-03-09 08:05 | + +### 1.2 Prisma 迁移(4 项) + +使用 `npx prisma migrate deploy`(生产命令)执行。 + +| 序号 | 迁移名称 | 对应清单 | 变更内容 | 结果 | +|------|---------|---------|---------|------| +| 1 | `20260307_add_error_details_to_review_task` | DB-3 | `rvw_schema.review_tasks` 新增 `error_details` JSONB 列 | ✅ | +| 2 | `20260308_add_iit_equery_open_dedupe_guard` | DB-6 | 历史重复 open eQuery 收敛为 `auto_closed` + 部分唯一索引 | ✅ | +| 3 | `20260308_default_agent_mode` | DB-4 | `ssa_sessions.execution_mode` 默认值改为 `agent` + 21 条旧数据更新 | ✅ | +| 4 | `20260309_add_token_version_to_platform_users` | DB-7 | `platform_schema.users` 新增 `token_version` INTEGER 列(默认 0) | ✅ | + +### 1.3 种子数据(3 项) + +| 序号 | 脚本 | 对应清单 | 内容 | 结果 | +|------|------|---------|------|------| +| 1 | `npx tsx scripts/seed-modules.js` | DB-1 | upsert 11 个 modules(新增 ASL_SR) | ✅ | +| 2 | `npx tsx scripts/migrate-rvw-prompts.ts` | DB-2 | upsert 4 个 RVW Prompt(新增 DATA_VALIDATION + CLINICAL) | ✅ | +| 3 | `npx tsx prisma/seed-ssa-agent-prompts.ts` | DB-5 | upsert 2 个 SSA Agent Prompt(PLANNER + CODER) | ✅ | + +### 1.4 数据库最终状态 + +| 项目 | 值 | +|------|---| +| Prisma 迁移 | 24/24 ✅(本地与 RDS 完全同步) | +| Schema 数 | 16 | +| modules 模块数 | 11(含 ASL_SR) | +| RVW Prompt 模板 | 4(含 DATA_VALIDATION + CLINICAL) | +| SSA Agent Prompt | 2(PLANNER + CODER) | + +--- + +## 二、R 统计引擎更新(v1.0.1 → v1.0.2) + +| 项目 | 值 | +|------|---| +| ACR 仓库 | `ssa-r-statistics` | +| 镜像版本 | v1.0.1 → **v1.0.2** | +| Digest | `sha256:7c24b688ee7e5e1e61d6f2821902ab825efc5a4113d0f99f92d9c63deebcd79d` | +| 内网地址 | `http://172.17.197.22:8080` | + +变更内容(3 项): +- ✅ R-1:新增 POST `/api/v1/execute-code` 端点(Agent 通道任意 R 代码执行) +- ✅ R-2:Agent 结构化错误处理增强(20+ 模式匹配 + format_agent_error) +- ✅ R-3:AST 语法预检(parse() 前置于 eval()) + +--- + +## 三、Node.js 后端更新(v2.8 → v2.9) + +| 项目 | 值 | +|------|---| +| ACR 仓库 | `backend-service` | +| 镜像版本 | v2.8 → **v2.9** | +| Digest | `sha256:b28b14e4f7aec66102e7e039d6d910c1e957c7903329d1ba6b4ac20ebbd078f9` | +| 内网地址 | `http://172.17.173.108:3001` | + +变更内容(13 项): +- ✅ BE-1:Deep Research V2.0 历史列表 + 删除接口 + getTask 鉴权修复 +- ✅ BE-2:SR 相关路由增加 `requireModule('ASL_SR')` 中间件 +- ✅ BE-3:Unifuncs DeepSearch API S2 → S3(新增 `language: "zh"`) +- ✅ BE-4:RVW 数据验证增加 LLM 核查通道 +- ✅ BE-5:RVW 新增临床专业评估维度(ClinicalAssessmentSkill) +- ✅ BE-6:RVW 稳定性增强(Promise.allSettled + partial_completed) +- ✅ BE-7:DataForensicsSkill LLM 核查独立 60s 超时 +- ✅ BE-8:SSA Agent 通道体验优化(方案 B + 10 项 Bug 修复) +- ✅ BE-9:Phase 5A CoderAgent 防错护栏(4 项改动) +- ✅ BE-10:SSA Agent Prompt 接入运营管理端(三级容灾) +- ✅ BE-11:IIT eQuery 幂等写入 + 去重工具脚本 +- ✅ BE-12:IIT 事件名称友好化 + AI 对话证据块补齐 +- ✅ BE-13:认证链路改造为数据库强一致互踢(tokenVersion) + +--- + +## 四、前端 Nginx 更新(v2.5 → v2.6) + +| 项目 | 值 | +|------|---| +| ACR 仓库 | `ai-clinical_frontend-nginx` | +| 镜像版本 | v2.5 → **v2.6** | +| Digest | `sha256:da4c9fcfe135b25bcac5143e3f919d8a3a205f53d8b0e930e32f6b8325d2cb70` | +| 内网地址 | `http://172.17.197.23:80` | + +变更内容(10 项): +- ✅ FE-1:ASL 左侧导航栏重构为互斥手风琴 +- ✅ FE-2:Deep Research 历史记录功能 +- ✅ FE-3:Panel B SR 工具导航权限控制 +- ✅ FE-4:RVW 数据验证报告增加 LLM 核查结果展示 +- ✅ FE-5:RVW 新增临床专业评估 Tab + Agent 选择项 +- ✅ FE-6:RVW 前端支持 partial_completed 状态 +- ✅ FE-7:SSA Agent 通道体验优化(方案 B + 动态 UI) +- ✅ FE-8:SSA 默认 Agent 模式 + 查看代码修复 + 分析历史卡片 +- ✅ FE-9:IIT D1 筛选入选表规则名称友好显示 +- ✅ FE-10:全局会话心跳(10s)提升互踢感知时效 + +--- + +## 五、环境变量联动更新 + +| 服务 | 环境变量 | 旧值 | 新值 | +|------|---------|------|------| +| nodejs-backend-test | `R_SERVICE_URL` | `http://172.17.173.101:8080` | `http://172.17.197.22:8080` | +| frontend-nginx-service | `BACKEND_SERVICE_HOST` | `172.17.173.106` | `172.17.173.108` | + +> CLB 负载均衡器由阿里云自动更新,无需手动操作。 + +--- + +## 六、当前系统配置速查 + +### 服务内网地址 + +``` +R统计引擎: http://172.17.197.22:8080 (更新) +Python: http://172.17.173.102:8000 (不变) +后端: http://172.17.173.108:3001 (更新) +前端: http://172.17.197.23:80 (更新) +``` + +### ACR 镜像版本 + +| 仓库 | 版本 | +|------|-----| +| `ssa-r-statistics` | **v1.0.2** | +| `python-extraction` | v1.2 | +| `backend-service` | **v2.9** | +| `ai-clinical_frontend-nginx` | **v2.6** | + +### 公网访问 + +``` +CLB: http://8.140.53.236/ +域名: https://iit.xunzhengyixue.com/ +``` + +--- + +> **文档版本**:v2.0 +> **最后更新**:2026-03-09 +> **维护人员**:开发团队 diff --git a/frontend-v2/nginx.conf b/frontend-v2/nginx.conf index d4be55ea..c1319fc0 100644 --- a/frontend-v2/nginx.conf +++ b/frontend-v2/nginx.conf @@ -17,6 +17,13 @@ http { include /etc/nginx/mime.types; default_type application/octet-stream; + # WebSocket / SSE 兼容:仅当请求包含 Upgrade 头时设置 Connection: upgrade + # SSE 请求无 Upgrade 头 → Connection 为空,避免 HTTP/2 帧协议错误 + map $http_upgrade $connection_upgrade { + default upgrade; + '' ''; + } + # ⚠️ 文件上传大小限制(默认只有 1MB,太小会导致 413 错误) client_max_body_size 50M; # 允许上传最大 50MB 文件 @@ -144,14 +151,15 @@ http { proxy_send_timeout 300s; proxy_read_timeout 300s; - # 缓冲配置 - proxy_buffering off; # 关闭缓冲(实时流式响应) - proxy_request_buffering off; # 支持大文件上传 + # 缓冲配置(SSE 流式响应必须关闭所有缓冲/缓存) + proxy_buffering off; + proxy_cache off; + proxy_request_buffering off; - # WebSocket 支持(如果后续需要) + # WebSocket + SSE 兼容 proxy_http_version 1.1; proxy_set_header Upgrade $http_upgrade; - proxy_set_header Connection "upgrade"; + proxy_set_header Connection $connection_upgrade; # 错误处理 proxy_next_upstream error timeout invalid_header http_500 http_502 http_503; diff --git a/frontend-v2/src/modules/asl/components/ASLLayout.tsx b/frontend-v2/src/modules/asl/components/ASLLayout.tsx index 845bf1d8..5ae9be30 100644 --- a/frontend-v2/src/modules/asl/components/ASLLayout.tsx +++ b/frontend-v2/src/modules/asl/components/ASLLayout.tsx @@ -151,7 +151,7 @@ const ASLLayout = () => { return ( -
+
{/* ── 侧边栏 ── */}
@@ -277,7 +277,7 @@ const ASLLayout = () => {
{/* ── 右侧内容区 ── */} -
+
diff --git a/frontend-v2/src/modules/ssa/components/SSAChatPane.tsx b/frontend-v2/src/modules/ssa/components/SSAChatPane.tsx index 5989847e..e6c05a52 100644 --- a/frontend-v2/src/modules/ssa/components/SSAChatPane.tsx +++ b/frontend-v2/src/modules/ssa/components/SSAChatPane.tsx @@ -80,6 +80,7 @@ export const SSAChatPane: React.FC = () => { loadHistory, abort: abortChat, clearMessages, + retryLastMessage, } = useSSAChat(); const [inputValue, setInputValue] = useState(''); @@ -458,6 +459,13 @@ export const SSAChatPane: React.FC = () => {
{msg.content} +
) : (
diff --git a/frontend-v2/src/modules/ssa/hooks/useSSAChat.ts b/frontend-v2/src/modules/ssa/hooks/useSSAChat.ts index 24fe474d..48be5867 100644 --- a/frontend-v2/src/modules/ssa/hooks/useSSAChat.ts +++ b/frontend-v2/src/modules/ssa/hooks/useSSAChat.ts @@ -77,6 +77,38 @@ export interface UseSSAChatReturn { loadHistory: (sessionId: string) => Promise; abort: () => void; clearMessages: () => void; + retryLastMessage: () => Promise; +} + +// ──────────────────────────────────────────── +// Helpers +// ──────────────────────────────────────────── + +const MAX_AUTO_RETRY = 2; + +function retryDelay(attempt: number): number { + return Math.min(1000 * (2 ** attempt), 5000); +} + +function toFriendlyError(err: any): string { + const msg = (err?.message || '').toLowerCase(); + + if (msg.includes('failed to fetch') || msg.includes('networkerror') || msg.includes('network')) + return '网络连接不稳定,请稍后重试。'; + + if (msg.includes('http2') || msg.includes('protocol')) + return '网络链路出现瞬时波动,请重新发送消息。'; + + if (msg.includes('timeout') || msg.includes('timed out')) + return '请求超时,服务器响应较慢,请稍后重试。'; + + if (msg.includes('502') || msg.includes('503') || msg.includes('504')) + return '服务暂时不可用,可能正在更新中,请稍后重试。'; + + if (msg.includes('401') || msg.includes('登录')) + return '登录已过期,请刷新页面重新登录。'; + + return err?.message || '请求失败,请重试。'; } // ──────────────────────────────────────────── @@ -95,6 +127,8 @@ export function useSSAChat(): UseSSAChatReturn { const [pendingQuestion, setPendingQuestion] = useState(null); const abortRef = useRef(null); + const lastRequestRef = useRef<{ sessionId: string; content: string; metadata?: Record } | null>(null); + const retryCountRef = useRef(0); const ensureFreshToken = useCallback(async (): Promise => { if (isTokenExpired()) { @@ -186,6 +220,7 @@ export function useSSAChat(): UseSSAChatReturn { * 发送消息并接收 SSE 流式响应 */ const sendChatMessage = useCallback(async (sessionId: string, content: string, metadata?: Record) => { + lastRequestRef.current = { sessionId, content, metadata }; setError(null); setIsGenerating(true); setThinkingContent(''); @@ -454,11 +489,32 @@ export function useSSAChat(): UseSSAChatReturn { ? { ...m, content: fullContent || '(已中断)', status: 'complete' } : m )); + retryCountRef.current = 0; } else { - const errMsg = err.message || '请求失败'; - setError(errMsg); + const isNetworkError = /failed to fetch|networkerror|network|http2|protocol/i.test(err.message || ''); + + // 瞬时网络错误自动重试一次 + if (isNetworkError && retryCountRef.current < MAX_AUTO_RETRY) { + retryCountRef.current += 1; + const delay = retryDelay(retryCountRef.current); + setChatMessages(prev => prev.map(m => + m.id === assistantMsgId + ? { ...m, content: `⏳ 网络波动,${(delay / 1000).toFixed(1)}s 后自动重试(第 ${retryCountRef.current}/${MAX_AUTO_RETRY} 次)...`, status: 'generating' } + : m + )); + setIsGenerating(false); + abortRef.current = null; + await new Promise(r => setTimeout(r, delay)); + return sendChatMessage(sessionId, content, metadata); + } + + retryCountRef.current = 0; + const friendlyMsg = toFriendlyError(err); + setError(friendlyMsg); setChatMessages(prev => prev.map(m => - m.id === assistantMsgId ? { ...m, content: errMsg, status: 'error' } : m + m.id === assistantMsgId + ? { ...m, content: `⚠️ ${friendlyMsg}`, status: 'error' } + : m )); } } finally { @@ -535,6 +591,20 @@ export function useSSAChat(): UseSSAChatReturn { await sendChatMessage(sessionId, '已跳过此问题', { askUserResponse: skipResponse }); }, [sendChatMessage]); + const retryLastMessage = useCallback(async () => { + const last = lastRequestRef.current; + if (!last) return; + retryCountRef.current = 0; + setChatMessages(prev => { + let lastErrIdx = -1; + for (let i = prev.length - 1; i >= 0; i--) { + if (prev[i].status === 'error') { lastErrIdx = i; break; } + } + return lastErrIdx >= 0 ? prev.filter((_: ChatMessage, i: number) => i !== lastErrIdx) : prev; + }); + await sendChatMessage(last.sessionId, last.content, last.metadata); + }, [sendChatMessage]); + return { chatMessages, isGenerating, @@ -552,6 +622,7 @@ export function useSSAChat(): UseSSAChatReturn { loadHistory, abort, clearMessages, + retryLastMessage, }; } diff --git a/frontend-v2/src/modules/ssa/styles/ssa.css b/frontend-v2/src/modules/ssa/styles/ssa.css index 2a19bf8e..68e0d55f 100644 --- a/frontend-v2/src/modules/ssa/styles/ssa.css +++ b/frontend-v2/src/modules/ssa/styles/ssa.css @@ -1818,3 +1818,42 @@ border-radius: 4px; font-size: 10px; } + +/* ── SSE 错误提示 + 重试按钮 ── */ + +.chat-error-msg { + display: flex; + align-items: center; + gap: 8px; + padding: 8px 12px; + background: rgba(239, 68, 68, 0.08); + border: 1px solid rgba(239, 68, 68, 0.2); + border-radius: 8px; + color: #fca5a5; + font-size: 13px; + line-height: 1.5; +} + +.chat-retry-btn { + flex-shrink: 0; + margin-left: auto; + padding: 4px 14px; + background: rgba(59, 130, 246, 0.15); + border: 1px solid rgba(59, 130, 246, 0.3); + border-radius: 6px; + color: #93c5fd; + font-size: 12px; + cursor: pointer; + transition: all 0.2s; +} + +.chat-retry-btn:hover:not(:disabled) { + background: rgba(59, 130, 246, 0.25); + border-color: rgba(59, 130, 246, 0.5); + color: #bfdbfe; +} + +.chat-retry-btn:disabled { + opacity: 0.4; + cursor: not-allowed; +}