feat(ssa): Implement dual-channel architecture Phase 1-3 (QPER + LLM Agent pipeline)
Completed: - Phase 1: DB schema (execution_mode + ssa_agent_executions), ModeToggle component, Session PATCH API - Phase 2: AgentPlannerService + AgentCoderService (streaming) + CodeRunnerService + R Docker /execute-code endpoint - Phase 3: AgentCodePanel (3-step confirmation UI), SSE event handling (7 agent events), streaming code display - Three-step confirmation pipeline: plan -> user confirm -> stream code -> user confirm -> execute R code -> results - R Docker sandbox /execute-code endpoint with 120s timeout + block_helpers preloaded - E2E dual-channel test script (8 tests) - Updated R engine architecture doc (v1.5) and SSA module status doc (v4.0) Technical details: - AgentCoderService uses LLM streaming (chatStream) for real-time code generation feedback - ReviewerAgent temporarily disabled, prioritizing Plan -> Code -> Execute flow - CodeRunnerService wraps user code with auto data loading (df variable injection) - Frontend handles agent_planning, agent_plan_ready, code_generating, code_generated, code_executing, code_result events - ask_user mechanism used for plan and code confirmation steps Files: 24 files (4 new services, 2 new components, 1 migration, 1 E2E test, 16 modified) Made-with: Cursor
This commit is contained in:
@@ -0,0 +1,38 @@
|
||||
-- AlterTable: ssa_sessions 新增 execution_mode 列
|
||||
ALTER TABLE "ssa_schema"."ssa_sessions" ADD COLUMN IF NOT EXISTS "execution_mode" TEXT NOT NULL DEFAULT 'qper';
|
||||
|
||||
-- CreateTable: ssa_agent_executions (Agent 通道执行记录)
|
||||
CREATE TABLE IF NOT EXISTS "ssa_schema"."ssa_agent_executions" (
|
||||
"id" TEXT NOT NULL,
|
||||
"session_id" TEXT NOT NULL,
|
||||
"query" TEXT NOT NULL,
|
||||
"plan_text" TEXT,
|
||||
"generated_code" TEXT,
|
||||
"review_result" JSONB,
|
||||
"execution_result" JSONB,
|
||||
"report_blocks" JSONB,
|
||||
"retry_count" INTEGER NOT NULL DEFAULT 0,
|
||||
"status" TEXT NOT NULL DEFAULT 'pending',
|
||||
"error_message" TEXT,
|
||||
"duration_ms" INTEGER,
|
||||
"created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
"updated_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
|
||||
CONSTRAINT "ssa_agent_executions_pkey" PRIMARY KEY ("id")
|
||||
);
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX IF NOT EXISTS "idx_ssa_agent_exec_session" ON "ssa_schema"."ssa_agent_executions"("session_id");
|
||||
|
||||
-- AddForeignKey
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM pg_constraint WHERE conname = 'ssa_agent_executions_session_id_fkey'
|
||||
) THEN
|
||||
ALTER TABLE "ssa_schema"."ssa_agent_executions"
|
||||
ADD CONSTRAINT "ssa_agent_executions_session_id_fkey"
|
||||
FOREIGN KEY ("session_id") REFERENCES "ssa_schema"."ssa_sessions"("id")
|
||||
ON DELETE CASCADE ON UPDATE CASCADE;
|
||||
END IF;
|
||||
END $$;
|
||||
@@ -2480,13 +2480,15 @@ model SsaSession {
|
||||
dataPayload Json? @map("data_payload") /// 真实数据(仅R可见)
|
||||
dataOssKey String? @map("data_oss_key") /// OSS 存储 key(大数据)
|
||||
dataProfile Json? @map("data_profile") /// 🆕 Python 生成的 DataProfile(Phase 2A)
|
||||
executionMode String @default("qper") @map("execution_mode") /// qper | agent
|
||||
status String @default("active") /// active | consult | completed | error
|
||||
createdAt DateTime @default(now()) @map("created_at")
|
||||
updatedAt DateTime @updatedAt @map("updated_at")
|
||||
|
||||
messages SsaMessage[]
|
||||
executionLogs SsaExecutionLog[]
|
||||
workflows SsaWorkflow[] /// 🆕 多步骤流程(Phase 2A)
|
||||
messages SsaMessage[]
|
||||
executionLogs SsaExecutionLog[]
|
||||
workflows SsaWorkflow[] /// 🆕 多步骤流程(Phase 2A)
|
||||
agentExecutions SsaAgentExecution[] /// Agent 模式执行记录
|
||||
|
||||
@@index([userId], map: "idx_ssa_session_user")
|
||||
@@index([status], map: "idx_ssa_session_status")
|
||||
@@ -2494,6 +2496,30 @@ model SsaSession {
|
||||
@@schema("ssa_schema")
|
||||
}
|
||||
|
||||
/// SSA Agent 执行记录(LLM 代码生成通道)
|
||||
model SsaAgentExecution {
|
||||
id String @id @default(uuid())
|
||||
sessionId String @map("session_id")
|
||||
query String
|
||||
planText String? @map("plan_text")
|
||||
generatedCode String? @map("generated_code")
|
||||
reviewResult Json? @map("review_result")
|
||||
executionResult Json? @map("execution_result")
|
||||
reportBlocks Json? @map("report_blocks")
|
||||
retryCount Int @default(0) @map("retry_count")
|
||||
status String @default("pending") /// pending | planning | coding | reviewing | executing | completed | error
|
||||
errorMessage String? @map("error_message")
|
||||
durationMs Int? @map("duration_ms")
|
||||
createdAt DateTime @default(now()) @map("created_at")
|
||||
updatedAt DateTime @updatedAt @map("updated_at")
|
||||
|
||||
session SsaSession @relation(fields: [sessionId], references: [id], onDelete: Cascade)
|
||||
|
||||
@@index([sessionId], map: "idx_ssa_agent_exec_session")
|
||||
@@map("ssa_agent_executions")
|
||||
@@schema("ssa_schema")
|
||||
}
|
||||
|
||||
/// SSA 消息记录
|
||||
model SsaMessage {
|
||||
id String @id @default(uuid())
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
|
||||
import { FastifyInstance, FastifyRequest } from 'fastify';
|
||||
import { logger } from '../../../common/logging/index.js';
|
||||
import { prisma } from '../../../config/database.js';
|
||||
import { conversationService } from '../services/ConversationService.js';
|
||||
import { intentRouterService } from '../services/IntentRouterService.js';
|
||||
import { chatHandlerService } from '../services/ChatHandlerService.js';
|
||||
@@ -114,6 +115,41 @@ export default async function chatRoutes(app: FastifyInstance) {
|
||||
}
|
||||
// ── H1 结束 ──
|
||||
|
||||
// 3. 读取 session 的执行模式
|
||||
const session = await (prisma.ssaSession as any).findUnique({
|
||||
where: { id: sessionId },
|
||||
select: { executionMode: true },
|
||||
});
|
||||
const executionMode = (session?.executionMode as string) || 'qper';
|
||||
|
||||
// ── Agent 通道分流 ──
|
||||
if (executionMode === 'agent') {
|
||||
const placeholderMsgId = await conversationService.createAssistantPlaceholder(
|
||||
conversationId, 'chat',
|
||||
);
|
||||
|
||||
const metaEvent = JSON.stringify({
|
||||
type: 'intent_classified',
|
||||
intent: 'analyze',
|
||||
confidence: 1,
|
||||
source: 'agent_mode',
|
||||
guardTriggered: false,
|
||||
});
|
||||
writer.write(`data: ${metaEvent}\n\n`);
|
||||
|
||||
const result = await chatHandlerService.handleAgentMode(
|
||||
sessionId, conversationId, content.trim(), writer, placeholderMsgId,
|
||||
);
|
||||
|
||||
logger.info('[SSA:Chat] Agent mode request completed', {
|
||||
sessionId, success: result.success,
|
||||
});
|
||||
|
||||
writer.end();
|
||||
return;
|
||||
}
|
||||
// ── QPER 通道(现有逻辑) ──
|
||||
|
||||
// 3. 意图分类
|
||||
const intentResult = await intentRouterService.classify(content.trim(), sessionId);
|
||||
|
||||
|
||||
@@ -136,6 +136,33 @@ export default async function sessionRoutes(app: FastifyInstance) {
|
||||
return reply.send(session);
|
||||
});
|
||||
|
||||
/**
|
||||
* PATCH /sessions/:id/execution-mode
|
||||
* 切换双通道执行模式 (qper / agent)
|
||||
*/
|
||||
app.patch('/:id/execution-mode', async (req, reply) => {
|
||||
const { id } = req.params as { id: string };
|
||||
const { executionMode } = req.body as { executionMode: string };
|
||||
|
||||
if (!executionMode || !['qper', 'agent'].includes(executionMode)) {
|
||||
return reply.status(400).send({ error: 'executionMode must be "qper" or "agent"' });
|
||||
}
|
||||
|
||||
const session = await prisma.ssaSession.findUnique({ where: { id } });
|
||||
if (!session) {
|
||||
return reply.status(404).send({ error: 'Session not found' });
|
||||
}
|
||||
|
||||
await (prisma.ssaSession as any).update({
|
||||
where: { id },
|
||||
data: { executionMode },
|
||||
});
|
||||
|
||||
logger.info('[SSA:Session] Execution mode switched', { sessionId: id, executionMode });
|
||||
|
||||
return reply.send({ success: true, executionMode });
|
||||
});
|
||||
|
||||
// 获取消息历史
|
||||
app.get('/:id/messages', async (req, reply) => {
|
||||
const { id } = req.params as { id: string };
|
||||
|
||||
277
backend/src/modules/ssa/services/AgentCoderService.ts
Normal file
277
backend/src/modules/ssa/services/AgentCoderService.ts
Normal file
@@ -0,0 +1,277 @@
|
||||
/**
|
||||
* AgentCoderService — Agent 通道编码 Agent
|
||||
*
|
||||
* 职责:根据分析计划 + 数据上下文,通过 LLM 生成可执行的 R 代码。
|
||||
*
|
||||
* Prompt 约束(R 代码规范底线):
|
||||
* - 必须使用 block_helpers.R 的输出函数
|
||||
* - 必须使用 load_input_data() 加载数据
|
||||
* - 禁止安装新包(只能用预装包)
|
||||
* - 禁止访问网络和文件系统(沙箱限制)
|
||||
* - 代码最后必须 return 一个包含 report_blocks 的 list
|
||||
*/
|
||||
|
||||
import { LLMFactory } from '../../../common/llm/adapters/LLMFactory.js';
|
||||
import type { Message as LLMMessage } from '../../../common/llm/adapters/types.js';
|
||||
import { logger } from '../../../common/logging/index.js';
|
||||
import { sessionBlackboardService } from './SessionBlackboardService.js';
|
||||
import { tokenTruncationService } from './TokenTruncationService.js';
|
||||
import type { AgentPlan } from './AgentPlannerService.js';
|
||||
|
||||
const MODEL = 'deepseek-v3';
|
||||
|
||||
export interface GeneratedCode {
|
||||
code: string;
|
||||
explanation: string;
|
||||
requiredPackages: string[];
|
||||
}
|
||||
|
||||
export class AgentCoderService {
|
||||
|
||||
/**
|
||||
* 非流式生成(重试场景使用)
|
||||
*/
|
||||
async generateCode(
|
||||
sessionId: string,
|
||||
plan: AgentPlan,
|
||||
errorFeedback?: string,
|
||||
): Promise<GeneratedCode> {
|
||||
const dataContext = await this.buildDataContext(sessionId);
|
||||
const systemPrompt = this.buildSystemPrompt(dataContext);
|
||||
|
||||
const userMessage = errorFeedback
|
||||
? this.buildRetryMessage(plan, errorFeedback)
|
||||
: this.buildFirstMessage(plan);
|
||||
|
||||
const messages: LLMMessage[] = [
|
||||
{ role: 'system', content: systemPrompt },
|
||||
{ role: 'user', content: userMessage },
|
||||
];
|
||||
|
||||
logger.info('[AgentCoder] Generating R code', {
|
||||
sessionId,
|
||||
planTitle: plan.title,
|
||||
isRetry: !!errorFeedback,
|
||||
});
|
||||
|
||||
const adapter = LLMFactory.getAdapter(MODEL as any);
|
||||
const response = await adapter.chat(messages, {
|
||||
temperature: 0.2,
|
||||
maxTokens: 4000,
|
||||
});
|
||||
|
||||
const content = response.content || '';
|
||||
const result = this.parseCode(content);
|
||||
|
||||
logger.info('[AgentCoder] R code generated', {
|
||||
sessionId,
|
||||
codeLength: result.code.length,
|
||||
packages: result.requiredPackages,
|
||||
});
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* 流式生成 — 每积累 CHUNK_SIZE 字符回调一次,前端实时显示代码
|
||||
*/
|
||||
async generateCodeStream(
|
||||
sessionId: string,
|
||||
plan: AgentPlan,
|
||||
onProgress: (accumulated: string) => void,
|
||||
errorFeedback?: string,
|
||||
): Promise<GeneratedCode> {
|
||||
const dataContext = await this.buildDataContext(sessionId);
|
||||
const systemPrompt = this.buildSystemPrompt(dataContext);
|
||||
|
||||
const userMessage = errorFeedback
|
||||
? this.buildRetryMessage(plan, errorFeedback)
|
||||
: this.buildFirstMessage(plan);
|
||||
|
||||
const messages: LLMMessage[] = [
|
||||
{ role: 'system', content: systemPrompt },
|
||||
{ role: 'user', content: userMessage },
|
||||
];
|
||||
|
||||
logger.info('[AgentCoder] Generating R code (stream)', {
|
||||
sessionId, planTitle: plan.title, isRetry: !!errorFeedback,
|
||||
});
|
||||
|
||||
const adapter = LLMFactory.getAdapter(MODEL as any);
|
||||
let fullContent = '';
|
||||
let lastSentLen = 0;
|
||||
const CHUNK_SIZE = 150;
|
||||
|
||||
for await (const chunk of adapter.chatStream(messages, {
|
||||
temperature: 0.2,
|
||||
maxTokens: 4000,
|
||||
})) {
|
||||
if (chunk.content) {
|
||||
fullContent += chunk.content;
|
||||
if (fullContent.length - lastSentLen >= CHUNK_SIZE) {
|
||||
onProgress(fullContent);
|
||||
lastSentLen = fullContent.length;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (fullContent.length > lastSentLen) {
|
||||
onProgress(fullContent);
|
||||
}
|
||||
|
||||
const result = this.parseCode(fullContent);
|
||||
|
||||
logger.info('[AgentCoder] R code generated (stream)', {
|
||||
sessionId, codeLength: result.code.length, packages: result.requiredPackages,
|
||||
});
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private async buildDataContext(sessionId: string): Promise<string> {
|
||||
const blackboard = await sessionBlackboardService.get(sessionId);
|
||||
if (!blackboard) return '(无数据上下文)';
|
||||
|
||||
const truncated = tokenTruncationService.truncate(blackboard, {
|
||||
maxTokens: 1500,
|
||||
strategy: 'balanced',
|
||||
});
|
||||
return tokenTruncationService.toPromptString(truncated);
|
||||
}
|
||||
|
||||
private buildSystemPrompt(dataContext: string): string {
|
||||
return `你是一位 R 统计编程专家(Coder Agent)。你的职责是根据分析计划生成可在 R Docker 沙箱中执行的 R 代码。
|
||||
|
||||
## 数据上下文
|
||||
${dataContext}
|
||||
|
||||
## R 代码规范(铁律)
|
||||
|
||||
### 数据加载(重要!)
|
||||
数据已由执行环境**自动加载**到变量 \`df\` 中(data.frame 格式)。
|
||||
**禁止**自己调用 \`load_input_data()\`,直接使用 \`df\` 即可。
|
||||
|
||||
\`\`\`r
|
||||
# df 已存在,直接使用
|
||||
str(df) # 查看结构
|
||||
\`\`\`
|
||||
|
||||
### 输出规范
|
||||
代码最后必须返回一个 list,包含 report_blocks 字段:
|
||||
\`\`\`r
|
||||
# 使用 block_helpers.R 中的函数构造 Block
|
||||
blocks <- list()
|
||||
blocks[[length(blocks) + 1]] <- make_markdown_block("## 分析结果\\n...")
|
||||
blocks[[length(blocks) + 1]] <- make_table_block_from_df(result_df, title = "表1. 统计结果")
|
||||
blocks[[length(blocks) + 1]] <- make_image_block(base64_data, title = "图1. 可视化")
|
||||
blocks[[length(blocks) + 1]] <- make_kv_block(list("P值" = "0.023", "效应量" = "0.45"))
|
||||
|
||||
# 必须以此格式返回
|
||||
list(
|
||||
status = "success",
|
||||
method = "使用的统计方法",
|
||||
report_blocks = blocks
|
||||
)
|
||||
\`\`\`
|
||||
|
||||
### 可用辅助函数(由 block_helpers.R 提供)
|
||||
- \`make_markdown_block(content, title)\` — Markdown 文本块
|
||||
- \`make_table_block(headers, rows, title, footnote)\` — 表格块
|
||||
- \`make_table_block_from_df(df_arg, title, footnote, digits)\` — 从 data.frame 生成表格块(注意参数名不要与 df 变量冲突)
|
||||
- \`make_image_block(base64_data, title, alt)\` — 图片块
|
||||
- \`make_kv_block(items, title)\` — 键值对块
|
||||
|
||||
### 图表生成
|
||||
\`\`\`r
|
||||
library(base64enc)
|
||||
tmp_file <- tempfile(fileext = ".png")
|
||||
png(tmp_file, width = 800, height = 600, res = 120)
|
||||
# ... 绑图代码 ...
|
||||
dev.off()
|
||||
base64_data <- paste0("data:image/png;base64,", base64encode(tmp_file))
|
||||
unlink(tmp_file)
|
||||
\`\`\`
|
||||
|
||||
### 预装可用包(仅限以下包,禁止使用其他包)
|
||||
base, stats, utils, graphics, grDevices,
|
||||
ggplot2, dplyr, tidyr, broom, gtsummary, gt, scales, gridExtra,
|
||||
car, lmtest, survival, meta, base64enc, glue, jsonlite, cowplot
|
||||
|
||||
### 禁止事项
|
||||
1. 禁止 install.packages() — 只能用上面列出的预装包
|
||||
2. 禁止调用 load_input_data() — 数据已自动加载到 df
|
||||
3. 禁止访问外部网络 — 无 httr/curl 网络请求
|
||||
4. 禁止读写沙箱外文件 — 只能用 tempfile()
|
||||
5. 禁止 system() / shell() 命令
|
||||
6. 所有数字结果必须用 tryCatch 包裹,防止 NA/NaN 导致崩溃
|
||||
7. 禁止使用 pROC, nortest, exact2x2 等未安装的包
|
||||
|
||||
## 输出格式
|
||||
请在 \`\`\`r ... \`\`\` 代码块中输出完整 R 代码,代码块外简要说明。`;
|
||||
}
|
||||
|
||||
private buildFirstMessage(plan: AgentPlan): string {
|
||||
const planDesc = plan.steps
|
||||
.map(s => `${s.order}. ${s.method}: ${s.description} (${s.rationale})`)
|
||||
.join('\n');
|
||||
|
||||
return `请根据以下分析计划生成完整的 R 代码:
|
||||
|
||||
## 分析计划:${plan.title}
|
||||
- 研究设计:${plan.designType}
|
||||
- 结局变量:${plan.variables.outcome.join(', ') || '未指定'}
|
||||
- 预测变量:${plan.variables.predictors.join(', ') || '未指定'}
|
||||
- 分组变量:${plan.variables.grouping || '无'}
|
||||
- 混杂因素:${plan.variables.confounders.join(', ') || '无'}
|
||||
|
||||
## 分析步骤
|
||||
${planDesc}
|
||||
|
||||
## 需要验证的假设
|
||||
${plan.assumptions.join('\n') || '无特殊假设'}
|
||||
|
||||
请生成完整、可直接执行的 R 代码。`;
|
||||
}
|
||||
|
||||
private buildRetryMessage(plan: AgentPlan, errorFeedback: string): string {
|
||||
return `上一次生成的 R 代码执行失败,错误信息如下:
|
||||
|
||||
\`\`\`
|
||||
${errorFeedback}
|
||||
\`\`\`
|
||||
|
||||
请修复代码中的问题并重新生成完整的 R 代码。
|
||||
|
||||
原分析计划:${plan.title}
|
||||
研究设计:${plan.designType}
|
||||
结局变量:${plan.variables.outcome.join(', ') || '未指定'}
|
||||
分组变量:${plan.variables.grouping || '无'}
|
||||
|
||||
请确保:
|
||||
1. 修复上述错误
|
||||
2. 使用 tryCatch 包裹关键计算步骤
|
||||
3. 处理可能的 NA/NaN 值
|
||||
4. 保持 report_blocks 输出格式`;
|
||||
}
|
||||
|
||||
private parseCode(content: string): GeneratedCode {
|
||||
const codeMatch = content.match(/```r\s*([\s\S]*?)```/);
|
||||
const code = codeMatch ? codeMatch[1].trim() : content;
|
||||
|
||||
const packageRegex = /library\((\w+)\)/g;
|
||||
const packages: string[] = [];
|
||||
let match;
|
||||
while ((match = packageRegex.exec(code)) !== null) {
|
||||
packages.push(match[1]);
|
||||
}
|
||||
|
||||
const explanation = content
|
||||
.replace(/```r[\s\S]*?```/g, '')
|
||||
.trim()
|
||||
.slice(0, 500);
|
||||
|
||||
return { code, explanation, requiredPackages: packages };
|
||||
}
|
||||
}
|
||||
|
||||
export const agentCoderService = new AgentCoderService();
|
||||
175
backend/src/modules/ssa/services/AgentPlannerService.ts
Normal file
175
backend/src/modules/ssa/services/AgentPlannerService.ts
Normal file
@@ -0,0 +1,175 @@
|
||||
/**
|
||||
* AgentPlannerService — Agent 通道规划 Agent
|
||||
*
|
||||
* 职责:接收用户自然语言需求 + 数据上下文,通过 LLM 生成结构化分析计划。
|
||||
* 计划以自然语言 + JSON 混合格式输出,供 CoderAgent 消费。
|
||||
*
|
||||
* Prompt 约束(统计规则底线):
|
||||
* - 必须声明研究设计类型
|
||||
* - 必须说明变量角色(结局 / 预测 / 混杂)
|
||||
* - 必须给出统计方法选择理由
|
||||
* - 禁止编造数据或预测结果
|
||||
*/
|
||||
|
||||
import { LLMFactory } from '../../../common/llm/adapters/LLMFactory.js';
|
||||
import type { Message as LLMMessage } from '../../../common/llm/adapters/types.js';
|
||||
import { logger } from '../../../common/logging/index.js';
|
||||
import { sessionBlackboardService } from './SessionBlackboardService.js';
|
||||
import { tokenTruncationService } from './TokenTruncationService.js';
|
||||
|
||||
const MODEL = 'deepseek-v3';
|
||||
|
||||
export interface AgentPlan {
|
||||
title: string;
|
||||
designType: string;
|
||||
variables: {
|
||||
outcome: string[];
|
||||
predictors: string[];
|
||||
grouping: string | null;
|
||||
confounders: string[];
|
||||
};
|
||||
steps: Array<{
|
||||
order: number;
|
||||
method: string;
|
||||
description: string;
|
||||
rationale: string;
|
||||
}>;
|
||||
assumptions: string[];
|
||||
rawText: string;
|
||||
}
|
||||
|
||||
export class AgentPlannerService {
|
||||
|
||||
async generatePlan(
|
||||
sessionId: string,
|
||||
userQuery: string,
|
||||
conversationHistory: LLMMessage[],
|
||||
): Promise<AgentPlan> {
|
||||
const dataContext = await this.buildDataContext(sessionId);
|
||||
|
||||
const systemPrompt = this.buildSystemPrompt(dataContext);
|
||||
|
||||
const messages: LLMMessage[] = [
|
||||
{ role: 'system', content: systemPrompt },
|
||||
...conversationHistory.filter(m => m.role !== 'system').slice(-10),
|
||||
{
|
||||
role: 'user',
|
||||
content: `请为以下分析需求制定统计分析计划:\n\n${userQuery}\n\n请严格按照指定的 JSON 格式输出。`,
|
||||
},
|
||||
];
|
||||
|
||||
logger.info('[AgentPlanner] Generating plan', { sessionId, queryLength: userQuery.length });
|
||||
|
||||
const adapter = LLMFactory.getAdapter(MODEL as any);
|
||||
const response = await adapter.chat(messages, {
|
||||
temperature: 0.3,
|
||||
maxTokens: 2000,
|
||||
});
|
||||
|
||||
const content = response.content || '';
|
||||
const plan = this.parsePlan(content);
|
||||
|
||||
logger.info('[AgentPlanner] Plan generated', {
|
||||
sessionId,
|
||||
title: plan.title,
|
||||
stepsCount: plan.steps.length,
|
||||
});
|
||||
|
||||
return plan;
|
||||
}
|
||||
|
||||
private async buildDataContext(sessionId: string): Promise<string> {
|
||||
const blackboard = await sessionBlackboardService.get(sessionId);
|
||||
if (!blackboard) return '(暂无数据上下文,用户尚未上传数据)';
|
||||
|
||||
const truncated = tokenTruncationService.truncate(blackboard, {
|
||||
maxTokens: 2000,
|
||||
strategy: 'balanced',
|
||||
});
|
||||
|
||||
return tokenTruncationService.toPromptString(truncated);
|
||||
}
|
||||
|
||||
private buildSystemPrompt(dataContext: string): string {
|
||||
return `你是一位高级统计分析规划师(Planner Agent)。你的职责是根据用户的研究需求和数据特征,制定严谨的统计分析计划。
|
||||
|
||||
## 数据上下文
|
||||
${dataContext}
|
||||
|
||||
## 规划规则(铁律)
|
||||
1. 必须声明研究设计类型(横断面 / 队列 / 病例对照 / RCT / 前后对比等)
|
||||
2. 必须明确变量角色:结局变量(outcome)、预测变量(predictors)、分组变量(grouping)、混杂因素(confounders)
|
||||
3. 统计方法选择必须给出理由(数据类型、分布、样本量等)
|
||||
4. 连续变量需考虑正态性:正态→参数方法,非正态→非参数方法
|
||||
5. 分类变量的期望频数 < 5 时应选择 Fisher 精确检验而非卡方检验
|
||||
6. 多因素分析需考虑共线性和 EPV(Events Per Variable)
|
||||
7. 禁止编造任何数据或预测分析结果
|
||||
|
||||
## 输出格式
|
||||
请输出 JSON 格式的分析计划,结构如下:
|
||||
\`\`\`json
|
||||
{
|
||||
"title": "分析计划标题",
|
||||
"designType": "研究设计类型",
|
||||
"variables": {
|
||||
"outcome": ["结局变量名"],
|
||||
"predictors": ["预测变量名"],
|
||||
"grouping": "分组变量名或null",
|
||||
"confounders": ["混杂因素"]
|
||||
},
|
||||
"steps": [
|
||||
{
|
||||
"order": 1,
|
||||
"method": "统计方法名称",
|
||||
"description": "这一步做什么",
|
||||
"rationale": "为什么选这个方法"
|
||||
}
|
||||
],
|
||||
"assumptions": ["需要验证的统计假设"]
|
||||
}
|
||||
\`\`\`
|
||||
|
||||
在 JSON 代码块之后,可以用自然语言补充说明。`;
|
||||
}
|
||||
|
||||
private parsePlan(content: string): AgentPlan {
|
||||
const jsonMatch = content.match(/```(?:json)?\s*([\s\S]*?)```/);
|
||||
|
||||
if (jsonMatch) {
|
||||
try {
|
||||
const parsed = JSON.parse(jsonMatch[1].trim());
|
||||
return {
|
||||
title: parsed.title || '统计分析计划',
|
||||
designType: parsed.designType || '未指定',
|
||||
variables: {
|
||||
outcome: parsed.variables?.outcome || [],
|
||||
predictors: parsed.variables?.predictors || [],
|
||||
grouping: parsed.variables?.grouping || null,
|
||||
confounders: parsed.variables?.confounders || [],
|
||||
},
|
||||
steps: (parsed.steps || []).map((s: any, i: number) => ({
|
||||
order: s.order || i + 1,
|
||||
method: s.method || '未指定',
|
||||
description: s.description || '',
|
||||
rationale: s.rationale || '',
|
||||
})),
|
||||
assumptions: parsed.assumptions || [],
|
||||
rawText: content,
|
||||
};
|
||||
} catch (e) {
|
||||
logger.warn('[AgentPlanner] Failed to parse JSON plan, using raw text');
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
title: '统计分析计划',
|
||||
designType: '未指定',
|
||||
variables: { outcome: [], predictors: [], grouping: null, confounders: [] },
|
||||
steps: [{ order: 1, method: '综合分析', description: content.slice(0, 500), rationale: '' }],
|
||||
assumptions: [],
|
||||
rawText: content,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
export const agentPlannerService = new AgentPlannerService();
|
||||
183
backend/src/modules/ssa/services/AgentReviewerService.ts
Normal file
183
backend/src/modules/ssa/services/AgentReviewerService.ts
Normal file
@@ -0,0 +1,183 @@
|
||||
/**
|
||||
* AgentReviewerService — Agent 通道审核 Agent
|
||||
*
|
||||
* 职责:审核分析计划和生成的 R 代码,确保统计方法学正确性和代码安全性。
|
||||
* 充当独立的 "第二双眼睛",在代码执行前拦截潜在问题。
|
||||
*
|
||||
* 审核维度:
|
||||
* 1. 统计方法学合理性(方法选择 vs 数据特征)
|
||||
* 2. 变量角色正确性(结局/预测/混杂)
|
||||
* 3. 代码安全性(无危险操作)
|
||||
* 4. 输出格式合规性(使用 block_helpers)
|
||||
*/
|
||||
|
||||
import { LLMFactory } from '../../../common/llm/adapters/LLMFactory.js';
|
||||
import type { Message as LLMMessage } from '../../../common/llm/adapters/types.js';
|
||||
import { logger } from '../../../common/logging/index.js';
|
||||
import type { AgentPlan } from './AgentPlannerService.js';
|
||||
|
||||
const MODEL = 'deepseek-v3';
|
||||
|
||||
export interface ReviewResult {
|
||||
passed: boolean;
|
||||
score: number;
|
||||
comments: string[];
|
||||
issues: Array<{
|
||||
severity: 'critical' | 'warning' | 'info';
|
||||
category: 'methodology' | 'code_safety' | 'output_format' | 'variable_usage';
|
||||
message: string;
|
||||
}>;
|
||||
rawText: string;
|
||||
}
|
||||
|
||||
export class AgentReviewerService {
|
||||
|
||||
async review(
|
||||
plan: AgentPlan,
|
||||
code: string,
|
||||
dataContext?: string,
|
||||
): Promise<ReviewResult> {
|
||||
const systemPrompt = this.buildSystemPrompt();
|
||||
|
||||
const userMessage = this.buildReviewRequest(plan, code, dataContext);
|
||||
|
||||
const messages: LLMMessage[] = [
|
||||
{ role: 'system', content: systemPrompt },
|
||||
{ role: 'user', content: userMessage },
|
||||
];
|
||||
|
||||
logger.info('[AgentReviewer] Reviewing plan + code', {
|
||||
planTitle: plan.title,
|
||||
codeLength: code.length,
|
||||
});
|
||||
|
||||
const adapter = LLMFactory.getAdapter(MODEL as any);
|
||||
const response = await adapter.chat(messages, {
|
||||
temperature: 0.1,
|
||||
maxTokens: 1500,
|
||||
});
|
||||
|
||||
const content = response.content || '';
|
||||
const result = this.parseReview(content);
|
||||
|
||||
logger.info('[AgentReviewer] Review complete', {
|
||||
passed: result.passed,
|
||||
score: result.score,
|
||||
issueCount: result.issues.length,
|
||||
});
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private buildSystemPrompt(): string {
|
||||
return `你是一位统计方法学审核专家(Reviewer Agent)。你的职责是审核统计分析计划和 R 代码,确保科学严谨性和代码安全性。
|
||||
|
||||
## 审核清单
|
||||
|
||||
### 1. 统计方法学(methodology)
|
||||
- 研究设计类型是否与分析方法匹配?
|
||||
- 变量类型(连续/分类)是否与所选方法的要求一致?
|
||||
- 是否考虑了正态性检验(连续变量)?
|
||||
- 分类变量的频数是否满足方法要求(如卡方检验的期望频数 ≥ 5)?
|
||||
- 多因素分析的 EPV 是否 ≥ 10?
|
||||
- 是否遗漏了重要的混杂因素控制?
|
||||
|
||||
### 2. 代码安全性(code_safety)
|
||||
- 是否包含 install.packages()?[critical]
|
||||
- 是否包含 system() / shell() 调用?[critical]
|
||||
- 是否有外部网络请求?[critical]
|
||||
- 是否有文件系统越权访问?[critical]
|
||||
|
||||
### 3. 输出格式(output_format)
|
||||
- 是否使用 block_helpers.R 的函数构造输出?
|
||||
- 最终是否返回包含 report_blocks 的 list?
|
||||
- 表格是否有标题和合理的列名?
|
||||
|
||||
### 4. 变量使用(variable_usage)
|
||||
- 代码中使用的变量名是否与数据中的列名一致?
|
||||
- 变量角色分配是否合理?
|
||||
|
||||
## 输出格式
|
||||
请输出 JSON:
|
||||
\`\`\`json
|
||||
{
|
||||
"passed": true/false,
|
||||
"score": 0-100,
|
||||
"issues": [
|
||||
{
|
||||
"severity": "critical|warning|info",
|
||||
"category": "methodology|code_safety|output_format|variable_usage",
|
||||
"message": "问题描述"
|
||||
}
|
||||
],
|
||||
"comments": ["审核意见1", "审核意见2"]
|
||||
}
|
||||
\`\`\`
|
||||
- passed: 没有 critical 级别问题时为 true
|
||||
- score: 综合评分(0-100)`;
|
||||
}
|
||||
|
||||
private buildReviewRequest(plan: AgentPlan, code: string, dataContext?: string): string {
|
||||
const planSummary = [
|
||||
`标题: ${plan.title}`,
|
||||
`设计: ${plan.designType}`,
|
||||
`结局变量: ${plan.variables.outcome.join(', ') || '未指定'}`,
|
||||
`分组变量: ${plan.variables.grouping || '无'}`,
|
||||
`步骤: ${plan.steps.map(s => `${s.order}. ${s.method}`).join(', ')}`,
|
||||
].join('\n');
|
||||
|
||||
let prompt = `请审核以下统计分析计划和 R 代码:
|
||||
|
||||
## 分析计划
|
||||
${planSummary}
|
||||
|
||||
## R 代码
|
||||
\`\`\`r
|
||||
${code}
|
||||
\`\`\``;
|
||||
|
||||
if (dataContext) {
|
||||
prompt += `\n\n## 数据上下文\n${dataContext}`;
|
||||
}
|
||||
|
||||
prompt += '\n\n请严格按照审核清单逐项检查,输出 JSON 格式的审核结果。';
|
||||
return prompt;
|
||||
}
|
||||
|
||||
private parseReview(content: string): ReviewResult {
|
||||
const jsonMatch = content.match(/```(?:json)?\s*([\s\S]*?)```/);
|
||||
|
||||
if (jsonMatch) {
|
||||
try {
|
||||
const parsed = JSON.parse(jsonMatch[1].trim());
|
||||
const issues = (parsed.issues || []).map((issue: any) => ({
|
||||
severity: issue.severity || 'info',
|
||||
category: issue.category || 'methodology',
|
||||
message: issue.message || '',
|
||||
}));
|
||||
|
||||
const hasCritical = issues.some((i: any) => i.severity === 'critical');
|
||||
|
||||
return {
|
||||
passed: hasCritical ? false : (parsed.passed ?? true),
|
||||
score: parsed.score ?? (hasCritical ? 30 : 80),
|
||||
comments: parsed.comments || [],
|
||||
issues,
|
||||
rawText: content,
|
||||
};
|
||||
} catch (e) {
|
||||
logger.warn('[AgentReviewer] Failed to parse review JSON');
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
passed: true,
|
||||
score: 70,
|
||||
comments: ['审核结果解析失败,默认放行(请人工检查)'],
|
||||
issues: [],
|
||||
rawText: content,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
export const agentReviewerService = new AgentReviewerService();
|
||||
@@ -17,6 +17,10 @@ import { tokenTruncationService } from './TokenTruncationService.js';
|
||||
import { methodConsultService } from './MethodConsultService.js';
|
||||
import { askUserService, type AskUserResponse } from './AskUserService.js';
|
||||
import { toolOrchestratorService } from './ToolOrchestratorService.js';
|
||||
import { agentPlannerService } from './AgentPlannerService.js';
|
||||
import { agentCoderService } from './AgentCoderService.js';
|
||||
import { codeRunnerService } from './CodeRunnerService.js';
|
||||
import { prisma } from '../../../config/database.js';
|
||||
import type { IntentType } from './SystemPromptService.js';
|
||||
import type { IntentResult } from './IntentRouterService.js';
|
||||
|
||||
@@ -387,6 +391,377 @@ export class ChatHandlerService {
|
||||
};
|
||||
}
|
||||
|
||||
// ────────────────────────────────────────────
|
||||
// Agent 通道入口(双通道架构 Phase 1 骨架)
|
||||
// ────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Agent 模式入口 — 三步确认式管线
|
||||
*
|
||||
* 状态机:
|
||||
* 新请求 → agentGeneratePlan → plan_pending(等用户确认)
|
||||
* 用户确认计划 → agentStreamCode → code_pending(等用户确认)
|
||||
* 用户确认代码 → agentExecuteCode → completed
|
||||
*/
|
||||
async handleAgentMode(
|
||||
sessionId: string,
|
||||
conversationId: string,
|
||||
userContent: string,
|
||||
writer: StreamWriter,
|
||||
placeholderMessageId: string,
|
||||
): Promise<HandleResult> {
|
||||
try {
|
||||
// 1. 检查是否有等待确认的执行记录
|
||||
const activeExec = await (prisma as any).ssaAgentExecution.findFirst({
|
||||
where: { sessionId, status: { in: ['plan_pending', 'code_pending'] } },
|
||||
orderBy: { createdAt: 'desc' },
|
||||
});
|
||||
|
||||
if (activeExec) {
|
||||
const action = this.parseAgentAction(userContent);
|
||||
|
||||
if (activeExec.status === 'plan_pending') {
|
||||
if (action === 'confirm') {
|
||||
return await this.agentStreamCode(activeExec, sessionId, conversationId, writer, placeholderMessageId);
|
||||
}
|
||||
if (action === 'cancel') {
|
||||
return await this.agentCancel(activeExec, sessionId, conversationId, writer, placeholderMessageId);
|
||||
}
|
||||
}
|
||||
|
||||
if (activeExec.status === 'code_pending') {
|
||||
if (action === 'confirm') {
|
||||
return await this.agentExecuteCode(activeExec, sessionId, conversationId, writer, placeholderMessageId);
|
||||
}
|
||||
if (action === 'cancel') {
|
||||
return await this.agentCancel(activeExec, sessionId, conversationId, writer, placeholderMessageId);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 2. 无挂起确认 — 检查是否是分析请求
|
||||
const blackboard = await sessionBlackboardService.get(sessionId);
|
||||
const hasData = !!blackboard?.dataOverview;
|
||||
|
||||
if (hasData && this.looksLikeAnalysisRequest(userContent)) {
|
||||
return await this.agentGeneratePlan(sessionId, conversationId, userContent, writer, placeholderMessageId);
|
||||
}
|
||||
|
||||
return await this.handleAgentChat(sessionId, conversationId, writer, placeholderMessageId, blackboard);
|
||||
} catch (error: any) {
|
||||
logger.error('[SSA:ChatHandler] Agent mode error', { sessionId, error: error.message });
|
||||
await conversationService.markAssistantError(placeholderMessageId, error.message);
|
||||
return { messageId: placeholderMessageId, intent: 'analyze', success: false, error: error.message };
|
||||
}
|
||||
}
|
||||
|
||||
/** 解析用户回复中的确认/取消意图 */
|
||||
private parseAgentAction(content: string): 'confirm' | 'cancel' | 'other' {
|
||||
const lc = content.toLowerCase();
|
||||
if (lc.includes('agent_confirm') || lc.includes('确认') || lc.includes('执行代码') || lc.includes('开始生成')) return 'confirm';
|
||||
if (lc.includes('agent_cancel') || lc.includes('取消')) return 'cancel';
|
||||
return 'other';
|
||||
}
|
||||
|
||||
// ── Agent Step 1: 生成分析计划 → 等用户确认 ──
|
||||
|
||||
private async agentGeneratePlan(
|
||||
sessionId: string,
|
||||
conversationId: string,
|
||||
userContent: string,
|
||||
writer: StreamWriter,
|
||||
placeholderMessageId: string,
|
||||
): Promise<HandleResult> {
|
||||
const sendEvent = (type: string, data: Record<string, any>) => {
|
||||
writer.write(`data: ${JSON.stringify({ type, ...data })}\n\n`);
|
||||
};
|
||||
|
||||
const execution = await (prisma as any).ssaAgentExecution.create({
|
||||
data: { sessionId, query: userContent, status: 'planning' },
|
||||
});
|
||||
|
||||
sendEvent('agent_planning', { executionId: execution.id, message: '正在制定分析计划...' });
|
||||
|
||||
const conversationHistory = await conversationService.buildContext(sessionId, conversationId, 'analyze');
|
||||
const plan = await agentPlannerService.generatePlan(sessionId, userContent, conversationHistory);
|
||||
|
||||
// planText 存原始文本, reviewResult(JSON) 存结构化计划以便恢复
|
||||
await (prisma as any).ssaAgentExecution.update({
|
||||
where: { id: execution.id },
|
||||
data: {
|
||||
planText: plan.rawText,
|
||||
reviewResult: JSON.parse(JSON.stringify(plan)),
|
||||
status: 'plan_pending',
|
||||
},
|
||||
});
|
||||
|
||||
sendEvent('agent_plan_ready', {
|
||||
executionId: execution.id,
|
||||
plan: { title: plan.title, designType: plan.designType, steps: plan.steps },
|
||||
planText: plan.rawText,
|
||||
});
|
||||
|
||||
// 流式解释计划
|
||||
const hint = [
|
||||
`[系统指令] 你刚刚制定了分析计划「${plan.title}」,包含 ${plan.steps.length} 个步骤。`,
|
||||
'请用简洁的自然语言解释这个计划,然后告知用户可以确认后开始生成 R 代码。',
|
||||
'【禁止】不要编造数值或结果。',
|
||||
].join('\n');
|
||||
|
||||
const msgs = await conversationService.buildContext(sessionId, conversationId, 'analyze', hint);
|
||||
const sr = await conversationService.streamToSSE(msgs, writer, { temperature: 0.5, maxTokens: 800 });
|
||||
await conversationService.finalizeAssistantMessage(placeholderMessageId, sr.content, sr.thinking, sr.tokens);
|
||||
|
||||
sendEvent('ask_user', {
|
||||
questionId: `agent_plan_${execution.id}`,
|
||||
text: '请确认分析计划',
|
||||
options: [
|
||||
{ id: 'agent_confirm_plan', label: '确认方案,生成代码' },
|
||||
{ id: 'agent_cancel', label: '取消' },
|
||||
],
|
||||
});
|
||||
|
||||
return { messageId: placeholderMessageId, intent: 'analyze', success: true };
|
||||
}
|
||||
|
||||
// ── Agent Step 2: 流式生成代码 → 等用户确认 ──
|
||||
|
||||
private async agentStreamCode(
|
||||
execution: any,
|
||||
sessionId: string,
|
||||
conversationId: string,
|
||||
writer: StreamWriter,
|
||||
placeholderMessageId: string,
|
||||
): Promise<HandleResult> {
|
||||
const sendEvent = (type: string, data: Record<string, any>) => {
|
||||
writer.write(`data: ${JSON.stringify({ type, ...data })}\n\n`);
|
||||
};
|
||||
|
||||
await (prisma as any).ssaAgentExecution.update({
|
||||
where: { id: execution.id },
|
||||
data: { status: 'coding' },
|
||||
});
|
||||
|
||||
sendEvent('code_generating', { executionId: execution.id, partialCode: '', message: '正在生成 R 代码...' });
|
||||
|
||||
const plan = execution.reviewResult as any;
|
||||
|
||||
const generated = await agentCoderService.generateCodeStream(
|
||||
sessionId, plan,
|
||||
(accumulated: string) => {
|
||||
sendEvent('code_generating', { executionId: execution.id, partialCode: accumulated });
|
||||
},
|
||||
);
|
||||
|
||||
await (prisma as any).ssaAgentExecution.update({
|
||||
where: { id: execution.id },
|
||||
data: { generatedCode: generated.code, status: 'code_pending' },
|
||||
});
|
||||
|
||||
sendEvent('code_generated', {
|
||||
executionId: execution.id,
|
||||
code: generated.code,
|
||||
explanation: generated.explanation,
|
||||
});
|
||||
|
||||
// 流式说明代码
|
||||
const hint = [
|
||||
`[系统指令] R 代码已生成(${generated.code.split('\n').length} 行),使用 ${generated.requiredPackages.join(', ') || '基础包'}。`,
|
||||
'请简要说明代码逻辑,告知用户可以确认执行。',
|
||||
'【禁止】不要在对话中贴代码(工作区已展示),不要编造结果。',
|
||||
].join('\n');
|
||||
|
||||
const msgs = await conversationService.buildContext(sessionId, conversationId, 'analyze', hint);
|
||||
const sr = await conversationService.streamToSSE(msgs, writer, { temperature: 0.5, maxTokens: 600 });
|
||||
await conversationService.finalizeAssistantMessage(placeholderMessageId, sr.content, sr.thinking, sr.tokens);
|
||||
|
||||
sendEvent('ask_user', {
|
||||
questionId: `agent_code_${execution.id}`,
|
||||
text: '代码已生成,请确认执行',
|
||||
options: [
|
||||
{ id: 'agent_confirm_code', label: '执行代码' },
|
||||
{ id: 'agent_cancel', label: '取消' },
|
||||
],
|
||||
});
|
||||
|
||||
return { messageId: placeholderMessageId, intent: 'analyze', success: true };
|
||||
}
|
||||
|
||||
// ── Agent Step 3: 执行代码 + 重试循环 ──
|
||||
|
||||
private async agentExecuteCode(
|
||||
execution: any,
|
||||
sessionId: string,
|
||||
conversationId: string,
|
||||
writer: StreamWriter,
|
||||
placeholderMessageId: string,
|
||||
): Promise<HandleResult> {
|
||||
const sendEvent = (type: string, data: Record<string, any>) => {
|
||||
writer.write(`data: ${JSON.stringify({ type, ...data })}\n\n`);
|
||||
};
|
||||
|
||||
await (prisma as any).ssaAgentExecution.update({
|
||||
where: { id: execution.id },
|
||||
data: { status: 'executing' },
|
||||
});
|
||||
|
||||
const plan = execution.reviewResult as any;
|
||||
let currentCode = execution.generatedCode as string;
|
||||
let lastError: string | null = null;
|
||||
|
||||
for (let attempt = 0; attempt <= codeRunnerService.maxRetries; attempt++) {
|
||||
sendEvent('code_executing', {
|
||||
executionId: execution.id,
|
||||
attempt: attempt + 1,
|
||||
message: attempt === 0 ? '正在执行 R 代码...' : `第 ${attempt + 1} 次重试执行...`,
|
||||
});
|
||||
|
||||
const execResult = await codeRunnerService.executeCode(sessionId, currentCode);
|
||||
|
||||
if (execResult.success) {
|
||||
const durationMs = execResult.durationMs || 0;
|
||||
|
||||
await (prisma as any).ssaAgentExecution.update({
|
||||
where: { id: execution.id },
|
||||
data: {
|
||||
executionResult: execResult as any,
|
||||
reportBlocks: execResult.reportBlocks as any,
|
||||
generatedCode: currentCode,
|
||||
status: 'completed',
|
||||
retryCount: attempt,
|
||||
durationMs,
|
||||
},
|
||||
});
|
||||
|
||||
sendEvent('code_result', {
|
||||
executionId: execution.id,
|
||||
reportBlocks: execResult.reportBlocks,
|
||||
code: currentCode,
|
||||
durationMs,
|
||||
});
|
||||
|
||||
// 流式总结结果
|
||||
const hint = [
|
||||
`[系统指令] R 代码执行完成(${durationMs}ms),生成 ${(execResult.reportBlocks || []).length} 个报告模块。`,
|
||||
'请简要解释结果的统计学意义。',
|
||||
'【禁止】不要编造数值(工作区已展示完整结果)。',
|
||||
].join('\n');
|
||||
|
||||
const msgs = await conversationService.buildContext(sessionId, conversationId, 'analyze', hint);
|
||||
const sr = await conversationService.streamToSSE(msgs, writer, { temperature: 0.5, maxTokens: 800 });
|
||||
await conversationService.finalizeAssistantMessage(placeholderMessageId, sr.content, sr.thinking, sr.tokens);
|
||||
|
||||
return { messageId: placeholderMessageId, intent: 'analyze', success: true };
|
||||
}
|
||||
|
||||
lastError = execResult.error || '执行失败';
|
||||
|
||||
if (attempt < codeRunnerService.maxRetries) {
|
||||
sendEvent('code_error', { executionId: execution.id, message: lastError, willRetry: true });
|
||||
sendEvent('code_retry', { executionId: execution.id, retryCount: attempt + 1, message: `错误: ${lastError},正在修复代码...` });
|
||||
|
||||
const retry = await agentCoderService.generateCode(sessionId, plan, lastError);
|
||||
currentCode = retry.code;
|
||||
|
||||
await (prisma as any).ssaAgentExecution.update({
|
||||
where: { id: execution.id },
|
||||
data: { generatedCode: currentCode, retryCount: attempt + 1 },
|
||||
});
|
||||
|
||||
sendEvent('code_generated', { executionId: execution.id, code: currentCode });
|
||||
}
|
||||
}
|
||||
|
||||
await (prisma as any).ssaAgentExecution.update({
|
||||
where: { id: execution.id },
|
||||
data: { status: 'error', errorMessage: lastError, retryCount: codeRunnerService.maxRetries },
|
||||
});
|
||||
|
||||
sendEvent('code_error', {
|
||||
executionId: execution.id,
|
||||
message: `经过 ${codeRunnerService.maxRetries + 1} 次尝试仍然失败: ${lastError}`,
|
||||
willRetry: false,
|
||||
});
|
||||
|
||||
return { messageId: placeholderMessageId, intent: 'analyze', success: true };
|
||||
}
|
||||
|
||||
// ── Agent 取消 ──
|
||||
|
||||
private async agentCancel(
|
||||
execution: any,
|
||||
sessionId: string,
|
||||
conversationId: string,
|
||||
writer: StreamWriter,
|
||||
placeholderMessageId: string,
|
||||
): Promise<HandleResult> {
|
||||
await (prisma as any).ssaAgentExecution.update({
|
||||
where: { id: execution.id },
|
||||
data: { status: 'error', errorMessage: '用户取消' },
|
||||
});
|
||||
|
||||
const msgs = await conversationService.buildContext(
|
||||
sessionId, conversationId, 'analyze', '[系统指令] 用户取消了当前分析流程。请简短回复确认取消。',
|
||||
);
|
||||
const sr = await conversationService.streamToSSE(msgs, writer, { temperature: 0.5, maxTokens: 200 });
|
||||
await conversationService.finalizeAssistantMessage(placeholderMessageId, sr.content, sr.thinking, sr.tokens);
|
||||
|
||||
return { messageId: placeholderMessageId, intent: 'analyze', success: true };
|
||||
}
|
||||
|
||||
// ── Agent 自由对话 ──
|
||||
|
||||
private async handleAgentChat(
|
||||
sessionId: string,
|
||||
conversationId: string,
|
||||
writer: StreamWriter,
|
||||
placeholderMessageId: string,
|
||||
blackboard: any,
|
||||
): Promise<HandleResult> {
|
||||
let toolOutputs = '';
|
||||
|
||||
if (blackboard) {
|
||||
const truncated = tokenTruncationService.truncate(blackboard, { maxTokens: 2000, strategy: 'balanced' });
|
||||
const parts: string[] = [];
|
||||
if (truncated.overview) parts.push(`数据概览:\n${truncated.overview}`);
|
||||
if (truncated.variables) parts.push(`变量列表:\n${truncated.variables}`);
|
||||
if (truncated.pico) parts.push(`PICO 推断:\n${truncated.pico}`);
|
||||
if (parts.length > 0) toolOutputs = parts.join('\n\n');
|
||||
}
|
||||
|
||||
const agentHint = [
|
||||
'[当前模式: Agent 代码生成通道]',
|
||||
'你是一位高级统计分析 Agent。你可以:',
|
||||
'1. 与临床研究者自由讨论统计分析需求',
|
||||
'2. 帮助理解数据特征、研究设计、统计方法选择',
|
||||
'3. 当用户明确分析需求后,自动制定计划并生成 R 代码执行',
|
||||
'',
|
||||
'注意:禁止编造或模拟任何分析结果和数值。',
|
||||
blackboard?.dataOverview
|
||||
? '用户已上传数据,你可以结合数据概览回答问题。当用户提出分析需求时,会自动触发分析流程。'
|
||||
: '用户尚未上传数据。请引导用户先上传研究数据文件。',
|
||||
].join('\n');
|
||||
|
||||
const fullToolOutputs = toolOutputs ? `${toolOutputs}\n\n${agentHint}` : agentHint;
|
||||
const messages = await conversationService.buildContext(sessionId, conversationId, 'analyze', fullToolOutputs);
|
||||
const result = await conversationService.streamToSSE(messages, writer, { temperature: 0.7, maxTokens: 2000 });
|
||||
await conversationService.finalizeAssistantMessage(placeholderMessageId, result.content, result.thinking, result.tokens);
|
||||
|
||||
return { messageId: placeholderMessageId, intent: 'analyze', success: true };
|
||||
}
|
||||
|
||||
/** 简单启发式:判断用户消息是否像分析请求 */
|
||||
private looksLikeAnalysisRequest(content: string): boolean {
|
||||
const kw = [
|
||||
'分析', '比较', '检验', '回归', '相关', '差异', '统计',
|
||||
'生存', '卡方', 'logistic', 't检验', 'anova',
|
||||
'基线', '特征表', '描述性', '预测', '影响因素',
|
||||
'帮我做', '帮我跑', '开始分析', '执行分析',
|
||||
];
|
||||
const lc = content.toLowerCase();
|
||||
return kw.some(k => lc.includes(k));
|
||||
}
|
||||
|
||||
// ────────────────────────────────────────────
|
||||
// ask_user 响应处理(Phase III)
|
||||
// ────────────────────────────────────────────
|
||||
|
||||
182
backend/src/modules/ssa/services/CodeRunnerService.ts
Normal file
182
backend/src/modules/ssa/services/CodeRunnerService.ts
Normal file
@@ -0,0 +1,182 @@
|
||||
/**
|
||||
* CodeRunnerService — Agent 通道代码执行器
|
||||
*
|
||||
* 职责:将 LLM 生成的 R 代码发送到 R Docker 沙箱执行,
|
||||
* 处理执行结果,失败时反馈错误给 CoderAgent 进行重试。
|
||||
*
|
||||
* 执行流程:
|
||||
* 1. 构建执行 payload(code + data_source)
|
||||
* 2. 调用 R Docker POST /api/v1/execute-code
|
||||
* 3. 成功 → 返回 report_blocks
|
||||
* 4. 失败 → 返回错误信息,供重试循环使用
|
||||
*
|
||||
* 安全约束:
|
||||
* - 最大重试次数 MAX_RETRIES = 3
|
||||
* - 单次执行超时 120 秒
|
||||
*/
|
||||
|
||||
import axios, { AxiosInstance } from 'axios';
|
||||
import { logger } from '../../../common/logging/index.js';
|
||||
import { storage } from '../../../common/storage/index.js';
|
||||
import { prisma } from '../../../config/database.js';
|
||||
|
||||
const MAX_RETRIES = 3;
|
||||
const EXECUTE_TIMEOUT_MS = 130000;
|
||||
|
||||
export interface CodeExecutionResult {
|
||||
success: boolean;
|
||||
reportBlocks?: any[];
|
||||
consoleOutput?: string[];
|
||||
durationMs?: number;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
export class CodeRunnerService {
|
||||
private client: AxiosInstance;
|
||||
|
||||
constructor() {
|
||||
const baseURL = process.env.R_SERVICE_URL || 'http://localhost:8082';
|
||||
this.client = axios.create({
|
||||
baseURL,
|
||||
timeout: EXECUTE_TIMEOUT_MS,
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* 执行 R 代码并返回结果
|
||||
*/
|
||||
async executeCode(
|
||||
sessionId: string,
|
||||
code: string,
|
||||
): Promise<CodeExecutionResult> {
|
||||
const startTime = Date.now();
|
||||
|
||||
try {
|
||||
const dataSource = await this.buildDataSource(sessionId);
|
||||
|
||||
const payload = {
|
||||
code: this.wrapCode(code, dataSource),
|
||||
session_id: sessionId,
|
||||
timeout: 120,
|
||||
};
|
||||
|
||||
logger.info('[CodeRunner] Executing R code', {
|
||||
sessionId,
|
||||
codeLength: code.length,
|
||||
});
|
||||
|
||||
const response = await this.client.post('/api/v1/execute-code', payload);
|
||||
const durationMs = Date.now() - startTime;
|
||||
|
||||
if (response.data?.status === 'success') {
|
||||
const result = response.data.result || {};
|
||||
const reportBlocks = result.report_blocks || result.data?.report_blocks || [];
|
||||
|
||||
logger.info('[CodeRunner] Execution success', {
|
||||
sessionId,
|
||||
durationMs,
|
||||
blockCount: reportBlocks.length,
|
||||
});
|
||||
|
||||
return {
|
||||
success: true,
|
||||
reportBlocks,
|
||||
consoleOutput: response.data.console_output,
|
||||
durationMs,
|
||||
};
|
||||
}
|
||||
|
||||
const errorMsg = response.data?.message || response.data?.user_hint || 'R 执行返回非成功状态';
|
||||
logger.warn('[CodeRunner] Execution failed (R returned error)', {
|
||||
sessionId,
|
||||
durationMs,
|
||||
error: errorMsg,
|
||||
});
|
||||
|
||||
return {
|
||||
success: false,
|
||||
error: errorMsg,
|
||||
consoleOutput: response.data?.console_output,
|
||||
durationMs,
|
||||
};
|
||||
} catch (error: any) {
|
||||
const durationMs = Date.now() - startTime;
|
||||
const statusCode = error.response?.status;
|
||||
|
||||
if (statusCode === 502 || statusCode === 504) {
|
||||
return {
|
||||
success: false,
|
||||
error: 'R 统计服务超时或崩溃,请检查代码是否有死循环或内存溢出',
|
||||
durationMs,
|
||||
};
|
||||
}
|
||||
|
||||
const errorMsg = error.response?.data?.message
|
||||
|| error.response?.data?.user_hint
|
||||
|| error.message
|
||||
|| 'R 服务调用失败';
|
||||
|
||||
logger.error('[CodeRunner] Execution error', {
|
||||
sessionId,
|
||||
durationMs,
|
||||
error: errorMsg,
|
||||
});
|
||||
|
||||
return {
|
||||
success: false,
|
||||
error: errorMsg,
|
||||
durationMs,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
get maxRetries(): number {
|
||||
return MAX_RETRIES;
|
||||
}
|
||||
|
||||
/**
|
||||
* 包装用户代码:注入 data_source 和 input 变量
|
||||
* URL 通过 Sys.getenv 读取(由 R 端点的 sandbox_env 设置),避免字符串拼接注入风险
|
||||
*/
|
||||
private wrapCode(userCode: string, dataSource: { type: string; oss_url?: string }): string {
|
||||
const escapedUrl = (dataSource.oss_url || '').replace(/\\/g, '\\\\').replace(/"/g, '\\"');
|
||||
return `
|
||||
# === 自动注入:数据加载 ===
|
||||
input <- list(
|
||||
data_source = list(
|
||||
type = "${dataSource.type}",
|
||||
oss_url = "${escapedUrl}"
|
||||
)
|
||||
)
|
||||
|
||||
# 加载数据到 df(LLM 代码直接使用 df,不需要再调用 load_input_data)
|
||||
df <- load_input_data(input)
|
||||
message(paste0("[Agent] Data loaded: ", nrow(df), " rows x ", ncol(df), " cols"))
|
||||
|
||||
# === 用户代码开始 ===
|
||||
${userCode}
|
||||
# === 用户代码结束 ===
|
||||
`.trim();
|
||||
}
|
||||
|
||||
/**
|
||||
* 构建数据源(从 session 读取 OSS key → 预签名 URL)
|
||||
*/
|
||||
private async buildDataSource(sessionId: string): Promise<{ type: string; oss_url: string }> {
|
||||
const session = await prisma.ssaSession.findUnique({
|
||||
where: { id: sessionId },
|
||||
select: { dataOssKey: true },
|
||||
});
|
||||
|
||||
const ossKey = session?.dataOssKey;
|
||||
if (!ossKey) {
|
||||
throw new Error('请先上传数据文件');
|
||||
}
|
||||
|
||||
const signedUrl = await storage.getUrl(ossKey);
|
||||
return { type: 'oss', oss_url: signedUrl };
|
||||
}
|
||||
}
|
||||
|
||||
export const codeRunnerService = new CodeRunnerService();
|
||||
444
backend/tests/e2e-dual-channel-test.ts
Normal file
444
backend/tests/e2e-dual-channel-test.ts
Normal file
@@ -0,0 +1,444 @@
|
||||
/**
|
||||
* SSA 双通道架构 E2E 测试
|
||||
*
|
||||
* 测试 Phase 1~3:
|
||||
* T1. 数据库迁移验证 — execution_mode 字段 + ssa_agent_executions 表
|
||||
* T2. Session execution mode 切换 API
|
||||
* T3. R Docker /execute-code 端点
|
||||
* T4. Agent 模式对话(自由对话 + 分析请求判断)
|
||||
* T5. AgentPlannerService 规划能力
|
||||
* T6. AgentCoderService 代码生成能力
|
||||
* T7. AgentReviewerService 审核能力
|
||||
* T8. ModeToggle 前端集成点验证(API 层面)
|
||||
*
|
||||
* 前置条件:
|
||||
* - PostgreSQL 运行中(Docker Desktop)
|
||||
* - R Docker 运行中(可选,T3 跳过如不可用)
|
||||
* - DeepSeek API key 配置在 .env
|
||||
* - 至少有一个 SSA session(有上传数据)
|
||||
*
|
||||
* 运行: npx tsx tests/e2e-dual-channel-test.ts
|
||||
*/
|
||||
|
||||
import { prisma } from '../src/config/database.js';
|
||||
import { logger } from '../src/common/logging/index.js';
|
||||
import axios from 'axios';
|
||||
|
||||
const R_SERVICE_URL = process.env.R_SERVICE_URL || 'http://localhost:8082';
|
||||
const BACKEND_URL = process.env.BACKEND_URL || 'http://localhost:3000';
|
||||
|
||||
interface TestResult {
|
||||
id: string;
|
||||
name: string;
|
||||
status: 'pass' | 'fail' | 'skip';
|
||||
duration: number;
|
||||
message?: string;
|
||||
}
|
||||
|
||||
const results: TestResult[] = [];
|
||||
|
||||
async function runTest(
|
||||
id: string,
|
||||
name: string,
|
||||
fn: () => Promise<void>,
|
||||
): Promise<void> {
|
||||
const start = Date.now();
|
||||
try {
|
||||
await fn();
|
||||
const dur = Date.now() - start;
|
||||
results.push({ id, name, status: 'pass', duration: dur });
|
||||
console.log(` ✅ ${id} ${name} (${dur}ms)`);
|
||||
} catch (error: any) {
|
||||
const dur = Date.now() - start;
|
||||
if (error.message?.startsWith('SKIP:')) {
|
||||
results.push({ id, name, status: 'skip', duration: dur, message: error.message });
|
||||
console.log(` ⏭️ ${id} ${name} — ${error.message}`);
|
||||
} else {
|
||||
results.push({ id, name, status: 'fail', duration: dur, message: error.message });
|
||||
console.log(` ❌ ${id} ${name} — ${error.message}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function assert(condition: boolean, message: string): void {
|
||||
if (!condition) throw new Error(message);
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════
|
||||
// T1: 数据库迁移验证
|
||||
// ═══════════════════════════════════════════
|
||||
|
||||
async function t1_dbMigration() {
|
||||
// 检查 ssa_sessions 表有 execution_mode 列
|
||||
const colCheck = await prisma.$queryRaw<any[]>`
|
||||
SELECT column_name, column_default
|
||||
FROM information_schema.columns
|
||||
WHERE table_schema = 'ssa_schema'
|
||||
AND table_name = 'ssa_sessions'
|
||||
AND column_name = 'execution_mode'
|
||||
`;
|
||||
assert(colCheck.length === 1, 'execution_mode 列不存在于 ssa_sessions 表');
|
||||
assert(
|
||||
colCheck[0].column_default?.includes('qper'),
|
||||
`execution_mode 默认值应为 qper,实际为 ${colCheck[0].column_default}`,
|
||||
);
|
||||
|
||||
// 检查 ssa_agent_executions 表存在
|
||||
const tableCheck = await prisma.$queryRaw<any[]>`
|
||||
SELECT table_name
|
||||
FROM information_schema.tables
|
||||
WHERE table_schema = 'ssa_schema'
|
||||
AND table_name = 'ssa_agent_executions'
|
||||
`;
|
||||
assert(tableCheck.length === 1, 'ssa_agent_executions 表不存在');
|
||||
|
||||
// 检查索引
|
||||
const idxCheck = await prisma.$queryRaw<any[]>`
|
||||
SELECT indexname
|
||||
FROM pg_indexes
|
||||
WHERE schemaname = 'ssa_schema'
|
||||
AND tablename = 'ssa_agent_executions'
|
||||
AND indexname = 'idx_ssa_agent_exec_session'
|
||||
`;
|
||||
assert(idxCheck.length === 1, 'idx_ssa_agent_exec_session 索引不存在');
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════
|
||||
// T2: Session execution mode CRUD
|
||||
// ═══════════════════════════════════════════
|
||||
|
||||
async function t2_executionModeSwitch() {
|
||||
// 查找一个现有 session
|
||||
const session = await prisma.ssaSession.findFirst({
|
||||
where: { status: 'active' },
|
||||
orderBy: { createdAt: 'desc' },
|
||||
});
|
||||
|
||||
if (!session) throw new Error('SKIP: 无可用 session,请先创建一个 SSA 会话');
|
||||
|
||||
// 读取当前 mode(应为默认 qper)
|
||||
const current = await prisma.$queryRaw<any[]>`
|
||||
SELECT execution_mode FROM ssa_schema.ssa_sessions WHERE id = ${session.id}
|
||||
`;
|
||||
assert(current.length === 1, 'Session 不存在');
|
||||
const originalMode = current[0].execution_mode;
|
||||
console.log(` 当前 mode: ${originalMode}`);
|
||||
|
||||
// 切换到 agent
|
||||
await prisma.$executeRaw`
|
||||
UPDATE ssa_schema.ssa_sessions SET execution_mode = 'agent' WHERE id = ${session.id}
|
||||
`;
|
||||
|
||||
const after = await prisma.$queryRaw<any[]>`
|
||||
SELECT execution_mode FROM ssa_schema.ssa_sessions WHERE id = ${session.id}
|
||||
`;
|
||||
assert(after[0].execution_mode === 'agent', '切换到 agent 失败');
|
||||
|
||||
// 切回 qper
|
||||
await prisma.$executeRaw`
|
||||
UPDATE ssa_schema.ssa_sessions SET execution_mode = 'qper' WHERE id = ${session.id}
|
||||
`;
|
||||
|
||||
const restored = await prisma.$queryRaw<any[]>`
|
||||
SELECT execution_mode FROM ssa_schema.ssa_sessions WHERE id = ${session.id}
|
||||
`;
|
||||
assert(restored[0].execution_mode === 'qper', '切回 qper 失败');
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════
|
||||
// T3: R Docker /execute-code 端点
|
||||
// ═══════════════════════════════════════════
|
||||
|
||||
async function t3_rExecuteCode() {
|
||||
// 先检查 R 服务是否可用
|
||||
try {
|
||||
await axios.get(`${R_SERVICE_URL}/health`, { timeout: 5000 });
|
||||
} catch {
|
||||
throw new Error('SKIP: R Docker 服务不可用');
|
||||
}
|
||||
|
||||
// 执行一段简单的 R 代码
|
||||
const response = await axios.post(`${R_SERVICE_URL}/api/v1/execute-code`, {
|
||||
code: `
|
||||
blocks <- list()
|
||||
blocks[[1]] <- make_markdown_block("## 测试结果\\n双通道 E2E 测试通过", title = "测试")
|
||||
blocks[[2]] <- make_kv_block(items = list("状态" = "成功", "时间" = as.character(Sys.time())), title = "概况")
|
||||
list(status = "success", report_blocks = blocks)
|
||||
`,
|
||||
session_id: 'e2e-test',
|
||||
timeout: 30,
|
||||
}, { timeout: 35000 });
|
||||
|
||||
assert(response.data?.status === 'success', `R 执行状态不是 success: ${response.data?.status}`);
|
||||
assert(
|
||||
Array.isArray(response.data?.result?.report_blocks),
|
||||
'report_blocks 不是数组',
|
||||
);
|
||||
assert(
|
||||
response.data.result.report_blocks.length === 2,
|
||||
`预期 2 个 block,实际 ${response.data.result.report_blocks.length}`,
|
||||
);
|
||||
|
||||
const markdownBlock = response.data.result.report_blocks[0];
|
||||
assert(markdownBlock.type === 'markdown', `Block 0 类型应为 markdown,实际 ${markdownBlock.type}`);
|
||||
|
||||
console.log(` R 执行耗时: ${response.data.duration_ms}ms, blocks: ${response.data.result.report_blocks.length}`);
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════
|
||||
// T4: R Docker 错误处理
|
||||
// ═══════════════════════════════════════════
|
||||
|
||||
async function t4_rExecuteCodeError() {
|
||||
try {
|
||||
await axios.get(`${R_SERVICE_URL}/health`, { timeout: 5000 });
|
||||
} catch {
|
||||
throw new Error('SKIP: R Docker 服务不可用');
|
||||
}
|
||||
|
||||
const response = await axios.post(`${R_SERVICE_URL}/api/v1/execute-code`, {
|
||||
code: 'stop("这是一个故意的错误")',
|
||||
session_id: 'e2e-test-error',
|
||||
timeout: 10,
|
||||
}, { timeout: 15000 });
|
||||
|
||||
assert(response.data?.status === 'error', '错误代码应返回 error 状态');
|
||||
assert(
|
||||
response.data?.message?.includes('故意的错误'),
|
||||
`错误消息应包含原始错误: ${response.data?.message}`,
|
||||
);
|
||||
|
||||
console.log(` 错误捕获正确: "${response.data.message}"`);
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════
|
||||
// T5: AgentPlannerService 单元测试
|
||||
// ═══════════════════════════════════════════
|
||||
|
||||
async function t5_agentPlanner() {
|
||||
const { agentPlannerService } = await import('../src/modules/ssa/services/AgentPlannerService.js');
|
||||
|
||||
// 查找有数据的 session
|
||||
const session = await prisma.ssaSession.findFirst({
|
||||
where: { status: 'active', dataOssKey: { not: null } },
|
||||
orderBy: { createdAt: 'desc' },
|
||||
});
|
||||
|
||||
if (!session) throw new Error('SKIP: 无有数据的 session');
|
||||
|
||||
const plan = await agentPlannerService.generatePlan(
|
||||
session.id,
|
||||
'帮我做一个基线特征表,比较两组的差异',
|
||||
[],
|
||||
);
|
||||
|
||||
assert(!!plan.title, '计划标题为空');
|
||||
assert(plan.steps.length > 0, '计划步骤为空');
|
||||
assert(!!plan.rawText, 'rawText 为空');
|
||||
|
||||
console.log(` 计划标题: ${plan.title}`);
|
||||
console.log(` 设计类型: ${plan.designType}`);
|
||||
console.log(` 步骤数: ${plan.steps.length}`);
|
||||
plan.steps.forEach(s => console.log(` ${s.order}. ${s.method}: ${s.description}`));
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════
|
||||
// T6: AgentCoderService 单元测试
|
||||
// ═══════════════════════════════════════════
|
||||
|
||||
async function t6_agentCoder() {
|
||||
const { agentCoderService } = await import('../src/modules/ssa/services/AgentCoderService.js');
|
||||
|
||||
const session = await prisma.ssaSession.findFirst({
|
||||
where: { status: 'active', dataOssKey: { not: null } },
|
||||
orderBy: { createdAt: 'desc' },
|
||||
});
|
||||
|
||||
if (!session) throw new Error('SKIP: 无有数据的 session');
|
||||
|
||||
const mockPlan = {
|
||||
title: '基线特征表分析',
|
||||
designType: '横断面研究',
|
||||
variables: {
|
||||
outcome: [],
|
||||
predictors: [],
|
||||
grouping: 'group',
|
||||
confounders: [],
|
||||
},
|
||||
steps: [
|
||||
{ order: 1, method: '描述性统计', description: '生成基线特征表', rationale: '了解数据分布' },
|
||||
],
|
||||
assumptions: [],
|
||||
rawText: '基线特征表分析计划',
|
||||
};
|
||||
|
||||
const generated = await agentCoderService.generateCode(session.id, mockPlan);
|
||||
|
||||
assert(generated.code.length > 50, `生成代码太短: ${generated.code.length} chars`);
|
||||
assert(generated.code.includes('load_input_data') || generated.code.includes('df'), '代码中未包含数据加载');
|
||||
|
||||
console.log(` 代码长度: ${generated.code.length} chars`);
|
||||
console.log(` 依赖包: ${generated.requiredPackages.join(', ') || '(无额外依赖)'}`);
|
||||
console.log(` 代码前 100 字符: ${generated.code.slice(0, 100).replace(/\n/g, ' ')}...`);
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════
|
||||
// T7: AgentReviewerService 单元测试
|
||||
// ═══════════════════════════════════════════
|
||||
|
||||
async function t7_agentReviewer() {
|
||||
const { agentReviewerService } = await import('../src/modules/ssa/services/AgentReviewerService.js');
|
||||
|
||||
const safePlan = {
|
||||
title: '基线分析',
|
||||
designType: '队列研究',
|
||||
variables: {
|
||||
outcome: ['death'],
|
||||
predictors: ['age', 'sex'],
|
||||
grouping: 'treatment',
|
||||
confounders: [],
|
||||
},
|
||||
steps: [{ order: 1, method: '基线特征表', description: '描述统计', rationale: '基线比较' }],
|
||||
assumptions: [],
|
||||
rawText: '',
|
||||
};
|
||||
|
||||
const safeCode = `
|
||||
df <- load_input_data(input)
|
||||
library(gtsummary)
|
||||
tbl <- df %>%
|
||||
tbl_summary(by = treatment, include = c(age, sex, death)) %>%
|
||||
add_p()
|
||||
blocks <- list()
|
||||
blocks[[1]] <- make_markdown_block("## 基线特征表")
|
||||
list(status = "success", report_blocks = blocks)
|
||||
`;
|
||||
|
||||
const review = await agentReviewerService.review(safePlan, safeCode);
|
||||
|
||||
assert(typeof review.passed === 'boolean', 'passed 应为 boolean');
|
||||
assert(typeof review.score === 'number', 'score 应为 number');
|
||||
assert(Array.isArray(review.comments), 'comments 应为 array');
|
||||
|
||||
console.log(` 审核通过: ${review.passed}`);
|
||||
console.log(` 评分: ${review.score}/100`);
|
||||
console.log(` 问题数: ${review.issues.length}`);
|
||||
review.comments.forEach(c => console.log(` - ${c}`));
|
||||
|
||||
// 测试危险代码审核
|
||||
const dangerCode = `
|
||||
install.packages("hacker_pkg")
|
||||
system("rm -rf /")
|
||||
df <- load_input_data(input)
|
||||
list(status = "success", report_blocks = list())
|
||||
`;
|
||||
|
||||
const dangerReview = await agentReviewerService.review(safePlan, dangerCode);
|
||||
console.log(` 危险代码审核通过: ${dangerReview.passed} (预期 false)`);
|
||||
console.log(` 危险代码问题数: ${dangerReview.issues.length}`);
|
||||
|
||||
if (dangerReview.passed) {
|
||||
console.log(' ⚠️ 警告: 危险代码未被拦截,Prompt 需要加强');
|
||||
}
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════
|
||||
// T8: Agent Execution 记录 CRUD
|
||||
// ═══════════════════════════════════════════
|
||||
|
||||
async function t8_agentExecutionCrud() {
|
||||
const session = await prisma.ssaSession.findFirst({
|
||||
where: { status: 'active' },
|
||||
orderBy: { createdAt: 'desc' },
|
||||
});
|
||||
|
||||
if (!session) throw new Error('SKIP: 无可用 session');
|
||||
|
||||
// 创建
|
||||
const exec = await (prisma as any).ssaAgentExecution.create({
|
||||
data: {
|
||||
sessionId: session.id,
|
||||
query: 'E2E 测试查询',
|
||||
status: 'pending',
|
||||
},
|
||||
});
|
||||
assert(!!exec.id, '创建执行记录失败');
|
||||
console.log(` 创建记录: ${exec.id}`);
|
||||
|
||||
// 更新
|
||||
await (prisma as any).ssaAgentExecution.update({
|
||||
where: { id: exec.id },
|
||||
data: {
|
||||
status: 'completed',
|
||||
planText: '测试计划',
|
||||
generatedCode: 'print("hello")',
|
||||
durationMs: 1234,
|
||||
},
|
||||
});
|
||||
|
||||
const updated = await (prisma as any).ssaAgentExecution.findUnique({
|
||||
where: { id: exec.id },
|
||||
});
|
||||
assert(updated.status === 'completed', `状态应为 completed,实际 ${updated.status}`);
|
||||
assert(updated.durationMs === 1234, `耗时应为 1234,实际 ${updated.durationMs}`);
|
||||
|
||||
// 删除(清理)
|
||||
await (prisma as any).ssaAgentExecution.delete({ where: { id: exec.id } });
|
||||
console.log(' CRUD 全流程通过');
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════
|
||||
// Main
|
||||
// ═══════════════════════════════════════════
|
||||
|
||||
async function main() {
|
||||
console.log('\n╔══════════════════════════════════════════════╗');
|
||||
console.log('║ SSA 双通道架构 E2E 测试 (Phase 1~3) ║');
|
||||
console.log('╚══════════════════════════════════════════════╝\n');
|
||||
|
||||
console.log('📦 Phase 1: 基础设施');
|
||||
await runTest('T1', '数据库迁移验证(execution_mode + agent_executions)', t1_dbMigration);
|
||||
await runTest('T2', 'Session execution mode 切换', t2_executionModeSwitch);
|
||||
await runTest('T3', 'R Docker /execute-code 正常执行', t3_rExecuteCode);
|
||||
await runTest('T4', 'R Docker /execute-code 错误处理', t4_rExecuteCodeError);
|
||||
|
||||
console.log('\n🤖 Phase 2: Agent 服务');
|
||||
await runTest('T5', 'AgentPlannerService 规划能力', t5_agentPlanner);
|
||||
await runTest('T6', 'AgentCoderService R 代码生成', t6_agentCoder);
|
||||
await runTest('T7', 'AgentReviewerService 审核能力', t7_agentReviewer);
|
||||
await runTest('T8', 'Agent Execution 记录 CRUD', t8_agentExecutionCrud);
|
||||
|
||||
// 汇总
|
||||
console.log('\n' + '═'.repeat(50));
|
||||
const passed = results.filter(r => r.status === 'pass').length;
|
||||
const failed = results.filter(r => r.status === 'fail').length;
|
||||
const skipped = results.filter(r => r.status === 'skip').length;
|
||||
const totalMs = results.reduce((s, r) => s + r.duration, 0);
|
||||
|
||||
console.log(`\n📊 测试结果: ${passed} 通过 / ${failed} 失败 / ${skipped} 跳过 (总耗时 ${totalMs}ms)`);
|
||||
|
||||
if (failed > 0) {
|
||||
console.log('\n❌ 失败详情:');
|
||||
results.filter(r => r.status === 'fail').forEach(r => {
|
||||
console.log(` ${r.id} ${r.name}: ${r.message}`);
|
||||
});
|
||||
}
|
||||
|
||||
if (skipped > 0) {
|
||||
console.log('\n⏭️ 跳过详情:');
|
||||
results.filter(r => r.status === 'skip').forEach(r => {
|
||||
console.log(` ${r.id} ${r.name}: ${r.message}`);
|
||||
});
|
||||
}
|
||||
|
||||
console.log('\n' + (failed === 0 ? '🎉 所有测试通过!' : '⚠️ 有测试失败,请检查。'));
|
||||
|
||||
await prisma.$disconnect();
|
||||
process.exit(failed > 0 ? 1 : 0);
|
||||
}
|
||||
|
||||
main().catch(async (err) => {
|
||||
console.error('测试执行异常:', err);
|
||||
await prisma.$disconnect();
|
||||
process.exit(1);
|
||||
});
|
||||
Reference in New Issue
Block a user