feat(ssa): Implement dual-channel architecture Phase 1-3 (QPER + LLM Agent pipeline)

Completed:
- Phase 1: DB schema (execution_mode + ssa_agent_executions), ModeToggle component, Session PATCH API
- Phase 2: AgentPlannerService + AgentCoderService (streaming) + CodeRunnerService + R Docker /execute-code endpoint
- Phase 3: AgentCodePanel (3-step confirmation UI), SSE event handling (7 agent events), streaming code display
- Three-step confirmation pipeline: plan -> user confirm -> stream code -> user confirm -> execute R code -> results
- R Docker sandbox /execute-code endpoint with 120s timeout + block_helpers preloaded
- E2E dual-channel test script (8 tests)
- Updated R engine architecture doc (v1.5) and SSA module status doc (v4.0)

Technical details:
- AgentCoderService uses LLM streaming (chatStream) for real-time code generation feedback
- ReviewerAgent temporarily disabled, prioritizing Plan -> Code -> Execute flow
- CodeRunnerService wraps user code with auto data loading (df variable injection)
- Frontend handles agent_planning, agent_plan_ready, code_generating, code_generated, code_executing, code_result events
- ask_user mechanism used for plan and code confirmation steps

Files: 24 files (4 new services, 2 new components, 1 migration, 1 E2E test, 16 modified)
Made-with: Cursor
This commit is contained in:
2026-03-02 22:23:54 +08:00
parent 71d32d11ee
commit aadceb5cde
24 changed files with 2694 additions and 56 deletions

View File

@@ -0,0 +1,38 @@
-- AlterTable: ssa_sessions 新增 execution_mode 列
ALTER TABLE "ssa_schema"."ssa_sessions" ADD COLUMN IF NOT EXISTS "execution_mode" TEXT NOT NULL DEFAULT 'qper';
-- CreateTable: ssa_agent_executions (Agent 通道执行记录)
CREATE TABLE IF NOT EXISTS "ssa_schema"."ssa_agent_executions" (
"id" TEXT NOT NULL,
"session_id" TEXT NOT NULL,
"query" TEXT NOT NULL,
"plan_text" TEXT,
"generated_code" TEXT,
"review_result" JSONB,
"execution_result" JSONB,
"report_blocks" JSONB,
"retry_count" INTEGER NOT NULL DEFAULT 0,
"status" TEXT NOT NULL DEFAULT 'pending',
"error_message" TEXT,
"duration_ms" INTEGER,
"created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"updated_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
CONSTRAINT "ssa_agent_executions_pkey" PRIMARY KEY ("id")
);
-- CreateIndex
CREATE INDEX IF NOT EXISTS "idx_ssa_agent_exec_session" ON "ssa_schema"."ssa_agent_executions"("session_id");
-- AddForeignKey
DO $$
BEGIN
IF NOT EXISTS (
SELECT 1 FROM pg_constraint WHERE conname = 'ssa_agent_executions_session_id_fkey'
) THEN
ALTER TABLE "ssa_schema"."ssa_agent_executions"
ADD CONSTRAINT "ssa_agent_executions_session_id_fkey"
FOREIGN KEY ("session_id") REFERENCES "ssa_schema"."ssa_sessions"("id")
ON DELETE CASCADE ON UPDATE CASCADE;
END IF;
END $$;

View File

@@ -2480,13 +2480,15 @@ model SsaSession {
dataPayload Json? @map("data_payload") /// 真实数据仅R可见
dataOssKey String? @map("data_oss_key") /// OSS 存储 key大数据
dataProfile Json? @map("data_profile") /// 🆕 Python 生成的 DataProfilePhase 2A
executionMode String @default("qper") @map("execution_mode") /// qper | agent
status String @default("active") /// active | consult | completed | error
createdAt DateTime @default(now()) @map("created_at")
updatedAt DateTime @updatedAt @map("updated_at")
messages SsaMessage[]
executionLogs SsaExecutionLog[]
workflows SsaWorkflow[] /// 🆕 多步骤流程Phase 2A
messages SsaMessage[]
executionLogs SsaExecutionLog[]
workflows SsaWorkflow[] /// 🆕 多步骤流程Phase 2A
agentExecutions SsaAgentExecution[] /// Agent 模式执行记录
@@index([userId], map: "idx_ssa_session_user")
@@index([status], map: "idx_ssa_session_status")
@@ -2494,6 +2496,30 @@ model SsaSession {
@@schema("ssa_schema")
}
/// SSA Agent 执行记录LLM 代码生成通道)
model SsaAgentExecution {
id String @id @default(uuid())
sessionId String @map("session_id")
query String
planText String? @map("plan_text")
generatedCode String? @map("generated_code")
reviewResult Json? @map("review_result")
executionResult Json? @map("execution_result")
reportBlocks Json? @map("report_blocks")
retryCount Int @default(0) @map("retry_count")
status String @default("pending") /// pending | planning | coding | reviewing | executing | completed | error
errorMessage String? @map("error_message")
durationMs Int? @map("duration_ms")
createdAt DateTime @default(now()) @map("created_at")
updatedAt DateTime @updatedAt @map("updated_at")
session SsaSession @relation(fields: [sessionId], references: [id], onDelete: Cascade)
@@index([sessionId], map: "idx_ssa_agent_exec_session")
@@map("ssa_agent_executions")
@@schema("ssa_schema")
}
/// SSA 消息记录
model SsaMessage {
id String @id @default(uuid())

View File

@@ -12,6 +12,7 @@
import { FastifyInstance, FastifyRequest } from 'fastify';
import { logger } from '../../../common/logging/index.js';
import { prisma } from '../../../config/database.js';
import { conversationService } from '../services/ConversationService.js';
import { intentRouterService } from '../services/IntentRouterService.js';
import { chatHandlerService } from '../services/ChatHandlerService.js';
@@ -114,6 +115,41 @@ export default async function chatRoutes(app: FastifyInstance) {
}
// ── H1 结束 ──
// 3. 读取 session 的执行模式
const session = await (prisma.ssaSession as any).findUnique({
where: { id: sessionId },
select: { executionMode: true },
});
const executionMode = (session?.executionMode as string) || 'qper';
// ── Agent 通道分流 ──
if (executionMode === 'agent') {
const placeholderMsgId = await conversationService.createAssistantPlaceholder(
conversationId, 'chat',
);
const metaEvent = JSON.stringify({
type: 'intent_classified',
intent: 'analyze',
confidence: 1,
source: 'agent_mode',
guardTriggered: false,
});
writer.write(`data: ${metaEvent}\n\n`);
const result = await chatHandlerService.handleAgentMode(
sessionId, conversationId, content.trim(), writer, placeholderMsgId,
);
logger.info('[SSA:Chat] Agent mode request completed', {
sessionId, success: result.success,
});
writer.end();
return;
}
// ── QPER 通道(现有逻辑) ──
// 3. 意图分类
const intentResult = await intentRouterService.classify(content.trim(), sessionId);

View File

@@ -136,6 +136,33 @@ export default async function sessionRoutes(app: FastifyInstance) {
return reply.send(session);
});
/**
* PATCH /sessions/:id/execution-mode
* 切换双通道执行模式 (qper / agent)
*/
app.patch('/:id/execution-mode', async (req, reply) => {
const { id } = req.params as { id: string };
const { executionMode } = req.body as { executionMode: string };
if (!executionMode || !['qper', 'agent'].includes(executionMode)) {
return reply.status(400).send({ error: 'executionMode must be "qper" or "agent"' });
}
const session = await prisma.ssaSession.findUnique({ where: { id } });
if (!session) {
return reply.status(404).send({ error: 'Session not found' });
}
await (prisma.ssaSession as any).update({
where: { id },
data: { executionMode },
});
logger.info('[SSA:Session] Execution mode switched', { sessionId: id, executionMode });
return reply.send({ success: true, executionMode });
});
// 获取消息历史
app.get('/:id/messages', async (req, reply) => {
const { id } = req.params as { id: string };

View File

@@ -0,0 +1,277 @@
/**
* AgentCoderService — Agent 通道编码 Agent
*
* 职责:根据分析计划 + 数据上下文,通过 LLM 生成可执行的 R 代码。
*
* Prompt 约束R 代码规范底线):
* - 必须使用 block_helpers.R 的输出函数
* - 必须使用 load_input_data() 加载数据
* - 禁止安装新包(只能用预装包)
* - 禁止访问网络和文件系统(沙箱限制)
* - 代码最后必须 return 一个包含 report_blocks 的 list
*/
import { LLMFactory } from '../../../common/llm/adapters/LLMFactory.js';
import type { Message as LLMMessage } from '../../../common/llm/adapters/types.js';
import { logger } from '../../../common/logging/index.js';
import { sessionBlackboardService } from './SessionBlackboardService.js';
import { tokenTruncationService } from './TokenTruncationService.js';
import type { AgentPlan } from './AgentPlannerService.js';
const MODEL = 'deepseek-v3';
export interface GeneratedCode {
code: string;
explanation: string;
requiredPackages: string[];
}
export class AgentCoderService {
/**
* 非流式生成(重试场景使用)
*/
async generateCode(
sessionId: string,
plan: AgentPlan,
errorFeedback?: string,
): Promise<GeneratedCode> {
const dataContext = await this.buildDataContext(sessionId);
const systemPrompt = this.buildSystemPrompt(dataContext);
const userMessage = errorFeedback
? this.buildRetryMessage(plan, errorFeedback)
: this.buildFirstMessage(plan);
const messages: LLMMessage[] = [
{ role: 'system', content: systemPrompt },
{ role: 'user', content: userMessage },
];
logger.info('[AgentCoder] Generating R code', {
sessionId,
planTitle: plan.title,
isRetry: !!errorFeedback,
});
const adapter = LLMFactory.getAdapter(MODEL as any);
const response = await adapter.chat(messages, {
temperature: 0.2,
maxTokens: 4000,
});
const content = response.content || '';
const result = this.parseCode(content);
logger.info('[AgentCoder] R code generated', {
sessionId,
codeLength: result.code.length,
packages: result.requiredPackages,
});
return result;
}
/**
* 流式生成 — 每积累 CHUNK_SIZE 字符回调一次,前端实时显示代码
*/
async generateCodeStream(
sessionId: string,
plan: AgentPlan,
onProgress: (accumulated: string) => void,
errorFeedback?: string,
): Promise<GeneratedCode> {
const dataContext = await this.buildDataContext(sessionId);
const systemPrompt = this.buildSystemPrompt(dataContext);
const userMessage = errorFeedback
? this.buildRetryMessage(plan, errorFeedback)
: this.buildFirstMessage(plan);
const messages: LLMMessage[] = [
{ role: 'system', content: systemPrompt },
{ role: 'user', content: userMessage },
];
logger.info('[AgentCoder] Generating R code (stream)', {
sessionId, planTitle: plan.title, isRetry: !!errorFeedback,
});
const adapter = LLMFactory.getAdapter(MODEL as any);
let fullContent = '';
let lastSentLen = 0;
const CHUNK_SIZE = 150;
for await (const chunk of adapter.chatStream(messages, {
temperature: 0.2,
maxTokens: 4000,
})) {
if (chunk.content) {
fullContent += chunk.content;
if (fullContent.length - lastSentLen >= CHUNK_SIZE) {
onProgress(fullContent);
lastSentLen = fullContent.length;
}
}
}
if (fullContent.length > lastSentLen) {
onProgress(fullContent);
}
const result = this.parseCode(fullContent);
logger.info('[AgentCoder] R code generated (stream)', {
sessionId, codeLength: result.code.length, packages: result.requiredPackages,
});
return result;
}
private async buildDataContext(sessionId: string): Promise<string> {
const blackboard = await sessionBlackboardService.get(sessionId);
if (!blackboard) return '(无数据上下文)';
const truncated = tokenTruncationService.truncate(blackboard, {
maxTokens: 1500,
strategy: 'balanced',
});
return tokenTruncationService.toPromptString(truncated);
}
private buildSystemPrompt(dataContext: string): string {
return `你是一位 R 统计编程专家Coder Agent。你的职责是根据分析计划生成可在 R Docker 沙箱中执行的 R 代码。
## 数据上下文
${dataContext}
## R 代码规范(铁律)
### 数据加载(重要!)
数据已由执行环境**自动加载**到变量 \`df\`data.frame 格式)。
**禁止**自己调用 \`load_input_data()\`,直接使用 \`df\` 即可。
\`\`\`r
# df 已存在,直接使用
str(df) # 查看结构
\`\`\`
### 输出规范
代码最后必须返回一个 list包含 report_blocks 字段:
\`\`\`r
# 使用 block_helpers.R 中的函数构造 Block
blocks <- list()
blocks[[length(blocks) + 1]] <- make_markdown_block("## 分析结果\\n...")
blocks[[length(blocks) + 1]] <- make_table_block_from_df(result_df, title = "表1. 统计结果")
blocks[[length(blocks) + 1]] <- make_image_block(base64_data, title = "图1. 可视化")
blocks[[length(blocks) + 1]] <- make_kv_block(list("P值" = "0.023", "效应量" = "0.45"))
# 必须以此格式返回
list(
status = "success",
method = "使用的统计方法",
report_blocks = blocks
)
\`\`\`
### 可用辅助函数(由 block_helpers.R 提供)
- \`make_markdown_block(content, title)\` — Markdown 文本块
- \`make_table_block(headers, rows, title, footnote)\` — 表格块
- \`make_table_block_from_df(df_arg, title, footnote, digits)\` — 从 data.frame 生成表格块(注意参数名不要与 df 变量冲突)
- \`make_image_block(base64_data, title, alt)\` — 图片块
- \`make_kv_block(items, title)\` — 键值对块
### 图表生成
\`\`\`r
library(base64enc)
tmp_file <- tempfile(fileext = ".png")
png(tmp_file, width = 800, height = 600, res = 120)
# ... 绑图代码 ...
dev.off()
base64_data <- paste0("data:image/png;base64,", base64encode(tmp_file))
unlink(tmp_file)
\`\`\`
### 预装可用包(仅限以下包,禁止使用其他包)
base, stats, utils, graphics, grDevices,
ggplot2, dplyr, tidyr, broom, gtsummary, gt, scales, gridExtra,
car, lmtest, survival, meta, base64enc, glue, jsonlite, cowplot
### 禁止事项
1. 禁止 install.packages() — 只能用上面列出的预装包
2. 禁止调用 load_input_data() — 数据已自动加载到 df
3. 禁止访问外部网络 — 无 httr/curl 网络请求
4. 禁止读写沙箱外文件 — 只能用 tempfile()
5. 禁止 system() / shell() 命令
6. 所有数字结果必须用 tryCatch 包裹,防止 NA/NaN 导致崩溃
7. 禁止使用 pROC, nortest, exact2x2 等未安装的包
## 输出格式
请在 \`\`\`r ... \`\`\` 代码块中输出完整 R 代码,代码块外简要说明。`;
}
private buildFirstMessage(plan: AgentPlan): string {
const planDesc = plan.steps
.map(s => `${s.order}. ${s.method}: ${s.description} (${s.rationale})`)
.join('\n');
return `请根据以下分析计划生成完整的 R 代码:
## 分析计划:${plan.title}
- 研究设计:${plan.designType}
- 结局变量:${plan.variables.outcome.join(', ') || '未指定'}
- 预测变量:${plan.variables.predictors.join(', ') || '未指定'}
- 分组变量:${plan.variables.grouping || '无'}
- 混杂因素:${plan.variables.confounders.join(', ') || '无'}
## 分析步骤
${planDesc}
## 需要验证的假设
${plan.assumptions.join('\n') || '无特殊假设'}
请生成完整、可直接执行的 R 代码。`;
}
private buildRetryMessage(plan: AgentPlan, errorFeedback: string): string {
return `上一次生成的 R 代码执行失败,错误信息如下:
\`\`\`
${errorFeedback}
\`\`\`
请修复代码中的问题并重新生成完整的 R 代码。
原分析计划:${plan.title}
研究设计:${plan.designType}
结局变量:${plan.variables.outcome.join(', ') || '未指定'}
分组变量:${plan.variables.grouping || '无'}
请确保:
1. 修复上述错误
2. 使用 tryCatch 包裹关键计算步骤
3. 处理可能的 NA/NaN 值
4. 保持 report_blocks 输出格式`;
}
private parseCode(content: string): GeneratedCode {
const codeMatch = content.match(/```r\s*([\s\S]*?)```/);
const code = codeMatch ? codeMatch[1].trim() : content;
const packageRegex = /library\((\w+)\)/g;
const packages: string[] = [];
let match;
while ((match = packageRegex.exec(code)) !== null) {
packages.push(match[1]);
}
const explanation = content
.replace(/```r[\s\S]*?```/g, '')
.trim()
.slice(0, 500);
return { code, explanation, requiredPackages: packages };
}
}
export const agentCoderService = new AgentCoderService();

View File

@@ -0,0 +1,175 @@
/**
* AgentPlannerService — Agent 通道规划 Agent
*
* 职责:接收用户自然语言需求 + 数据上下文,通过 LLM 生成结构化分析计划。
* 计划以自然语言 + JSON 混合格式输出,供 CoderAgent 消费。
*
* Prompt 约束(统计规则底线):
* - 必须声明研究设计类型
* - 必须说明变量角色(结局 / 预测 / 混杂)
* - 必须给出统计方法选择理由
* - 禁止编造数据或预测结果
*/
import { LLMFactory } from '../../../common/llm/adapters/LLMFactory.js';
import type { Message as LLMMessage } from '../../../common/llm/adapters/types.js';
import { logger } from '../../../common/logging/index.js';
import { sessionBlackboardService } from './SessionBlackboardService.js';
import { tokenTruncationService } from './TokenTruncationService.js';
const MODEL = 'deepseek-v3';
export interface AgentPlan {
title: string;
designType: string;
variables: {
outcome: string[];
predictors: string[];
grouping: string | null;
confounders: string[];
};
steps: Array<{
order: number;
method: string;
description: string;
rationale: string;
}>;
assumptions: string[];
rawText: string;
}
export class AgentPlannerService {
async generatePlan(
sessionId: string,
userQuery: string,
conversationHistory: LLMMessage[],
): Promise<AgentPlan> {
const dataContext = await this.buildDataContext(sessionId);
const systemPrompt = this.buildSystemPrompt(dataContext);
const messages: LLMMessage[] = [
{ role: 'system', content: systemPrompt },
...conversationHistory.filter(m => m.role !== 'system').slice(-10),
{
role: 'user',
content: `请为以下分析需求制定统计分析计划:\n\n${userQuery}\n\n请严格按照指定的 JSON 格式输出。`,
},
];
logger.info('[AgentPlanner] Generating plan', { sessionId, queryLength: userQuery.length });
const adapter = LLMFactory.getAdapter(MODEL as any);
const response = await adapter.chat(messages, {
temperature: 0.3,
maxTokens: 2000,
});
const content = response.content || '';
const plan = this.parsePlan(content);
logger.info('[AgentPlanner] Plan generated', {
sessionId,
title: plan.title,
stepsCount: plan.steps.length,
});
return plan;
}
private async buildDataContext(sessionId: string): Promise<string> {
const blackboard = await sessionBlackboardService.get(sessionId);
if (!blackboard) return '(暂无数据上下文,用户尚未上传数据)';
const truncated = tokenTruncationService.truncate(blackboard, {
maxTokens: 2000,
strategy: 'balanced',
});
return tokenTruncationService.toPromptString(truncated);
}
private buildSystemPrompt(dataContext: string): string {
return `你是一位高级统计分析规划师Planner Agent。你的职责是根据用户的研究需求和数据特征制定严谨的统计分析计划。
## 数据上下文
${dataContext}
## 规划规则(铁律)
1. 必须声明研究设计类型(横断面 / 队列 / 病例对照 / RCT / 前后对比等)
2. 必须明确变量角色:结局变量(outcome)、预测变量(predictors)、分组变量(grouping)、混杂因素(confounders)
3. 统计方法选择必须给出理由(数据类型、分布、样本量等)
4. 连续变量需考虑正态性:正态→参数方法,非正态→非参数方法
5. 分类变量的期望频数 < 5 时应选择 Fisher 精确检验而非卡方检验
6. 多因素分析需考虑共线性和 EPVEvents Per Variable
7. 禁止编造任何数据或预测分析结果
## 输出格式
请输出 JSON 格式的分析计划,结构如下:
\`\`\`json
{
"title": "分析计划标题",
"designType": "研究设计类型",
"variables": {
"outcome": ["结局变量名"],
"predictors": ["预测变量名"],
"grouping": "分组变量名或null",
"confounders": ["混杂因素"]
},
"steps": [
{
"order": 1,
"method": "统计方法名称",
"description": "这一步做什么",
"rationale": "为什么选这个方法"
}
],
"assumptions": ["需要验证的统计假设"]
}
\`\`\`
在 JSON 代码块之后,可以用自然语言补充说明。`;
}
private parsePlan(content: string): AgentPlan {
const jsonMatch = content.match(/```(?:json)?\s*([\s\S]*?)```/);
if (jsonMatch) {
try {
const parsed = JSON.parse(jsonMatch[1].trim());
return {
title: parsed.title || '统计分析计划',
designType: parsed.designType || '未指定',
variables: {
outcome: parsed.variables?.outcome || [],
predictors: parsed.variables?.predictors || [],
grouping: parsed.variables?.grouping || null,
confounders: parsed.variables?.confounders || [],
},
steps: (parsed.steps || []).map((s: any, i: number) => ({
order: s.order || i + 1,
method: s.method || '未指定',
description: s.description || '',
rationale: s.rationale || '',
})),
assumptions: parsed.assumptions || [],
rawText: content,
};
} catch (e) {
logger.warn('[AgentPlanner] Failed to parse JSON plan, using raw text');
}
}
return {
title: '统计分析计划',
designType: '未指定',
variables: { outcome: [], predictors: [], grouping: null, confounders: [] },
steps: [{ order: 1, method: '综合分析', description: content.slice(0, 500), rationale: '' }],
assumptions: [],
rawText: content,
};
}
}
export const agentPlannerService = new AgentPlannerService();

View File

@@ -0,0 +1,183 @@
/**
* AgentReviewerService — Agent 通道审核 Agent
*
* 职责:审核分析计划和生成的 R 代码,确保统计方法学正确性和代码安全性。
* 充当独立的 "第二双眼睛",在代码执行前拦截潜在问题。
*
* 审核维度:
* 1. 统计方法学合理性(方法选择 vs 数据特征)
* 2. 变量角色正确性(结局/预测/混杂)
* 3. 代码安全性(无危险操作)
* 4. 输出格式合规性(使用 block_helpers
*/
import { LLMFactory } from '../../../common/llm/adapters/LLMFactory.js';
import type { Message as LLMMessage } from '../../../common/llm/adapters/types.js';
import { logger } from '../../../common/logging/index.js';
import type { AgentPlan } from './AgentPlannerService.js';
const MODEL = 'deepseek-v3';
export interface ReviewResult {
passed: boolean;
score: number;
comments: string[];
issues: Array<{
severity: 'critical' | 'warning' | 'info';
category: 'methodology' | 'code_safety' | 'output_format' | 'variable_usage';
message: string;
}>;
rawText: string;
}
export class AgentReviewerService {
async review(
plan: AgentPlan,
code: string,
dataContext?: string,
): Promise<ReviewResult> {
const systemPrompt = this.buildSystemPrompt();
const userMessage = this.buildReviewRequest(plan, code, dataContext);
const messages: LLMMessage[] = [
{ role: 'system', content: systemPrompt },
{ role: 'user', content: userMessage },
];
logger.info('[AgentReviewer] Reviewing plan + code', {
planTitle: plan.title,
codeLength: code.length,
});
const adapter = LLMFactory.getAdapter(MODEL as any);
const response = await adapter.chat(messages, {
temperature: 0.1,
maxTokens: 1500,
});
const content = response.content || '';
const result = this.parseReview(content);
logger.info('[AgentReviewer] Review complete', {
passed: result.passed,
score: result.score,
issueCount: result.issues.length,
});
return result;
}
private buildSystemPrompt(): string {
return `你是一位统计方法学审核专家Reviewer Agent。你的职责是审核统计分析计划和 R 代码,确保科学严谨性和代码安全性。
## 审核清单
### 1. 统计方法学methodology
- 研究设计类型是否与分析方法匹配?
- 变量类型(连续/分类)是否与所选方法的要求一致?
- 是否考虑了正态性检验(连续变量)?
- 分类变量的频数是否满足方法要求(如卡方检验的期望频数 ≥ 5
- 多因素分析的 EPV 是否 ≥ 10
- 是否遗漏了重要的混杂因素控制?
### 2. 代码安全性code_safety
- 是否包含 install.packages()[critical]
- 是否包含 system() / shell() 调用?[critical]
- 是否有外部网络请求?[critical]
- 是否有文件系统越权访问?[critical]
### 3. 输出格式output_format
- 是否使用 block_helpers.R 的函数构造输出?
- 最终是否返回包含 report_blocks 的 list
- 表格是否有标题和合理的列名?
### 4. 变量使用variable_usage
- 代码中使用的变量名是否与数据中的列名一致?
- 变量角色分配是否合理?
## 输出格式
请输出 JSON
\`\`\`json
{
"passed": true/false,
"score": 0-100,
"issues": [
{
"severity": "critical|warning|info",
"category": "methodology|code_safety|output_format|variable_usage",
"message": "问题描述"
}
],
"comments": ["审核意见1", "审核意见2"]
}
\`\`\`
- passed: 没有 critical 级别问题时为 true
- score: 综合评分0-100`;
}
private buildReviewRequest(plan: AgentPlan, code: string, dataContext?: string): string {
const planSummary = [
`标题: ${plan.title}`,
`设计: ${plan.designType}`,
`结局变量: ${plan.variables.outcome.join(', ') || '未指定'}`,
`分组变量: ${plan.variables.grouping || '无'}`,
`步骤: ${plan.steps.map(s => `${s.order}. ${s.method}`).join(', ')}`,
].join('\n');
let prompt = `请审核以下统计分析计划和 R 代码:
## 分析计划
${planSummary}
## R 代码
\`\`\`r
${code}
\`\`\``;
if (dataContext) {
prompt += `\n\n## 数据上下文\n${dataContext}`;
}
prompt += '\n\n请严格按照审核清单逐项检查输出 JSON 格式的审核结果。';
return prompt;
}
private parseReview(content: string): ReviewResult {
const jsonMatch = content.match(/```(?:json)?\s*([\s\S]*?)```/);
if (jsonMatch) {
try {
const parsed = JSON.parse(jsonMatch[1].trim());
const issues = (parsed.issues || []).map((issue: any) => ({
severity: issue.severity || 'info',
category: issue.category || 'methodology',
message: issue.message || '',
}));
const hasCritical = issues.some((i: any) => i.severity === 'critical');
return {
passed: hasCritical ? false : (parsed.passed ?? true),
score: parsed.score ?? (hasCritical ? 30 : 80),
comments: parsed.comments || [],
issues,
rawText: content,
};
} catch (e) {
logger.warn('[AgentReviewer] Failed to parse review JSON');
}
}
return {
passed: true,
score: 70,
comments: ['审核结果解析失败,默认放行(请人工检查)'],
issues: [],
rawText: content,
};
}
}
export const agentReviewerService = new AgentReviewerService();

View File

@@ -17,6 +17,10 @@ import { tokenTruncationService } from './TokenTruncationService.js';
import { methodConsultService } from './MethodConsultService.js';
import { askUserService, type AskUserResponse } from './AskUserService.js';
import { toolOrchestratorService } from './ToolOrchestratorService.js';
import { agentPlannerService } from './AgentPlannerService.js';
import { agentCoderService } from './AgentCoderService.js';
import { codeRunnerService } from './CodeRunnerService.js';
import { prisma } from '../../../config/database.js';
import type { IntentType } from './SystemPromptService.js';
import type { IntentResult } from './IntentRouterService.js';
@@ -387,6 +391,377 @@ export class ChatHandlerService {
};
}
// ────────────────────────────────────────────
// Agent 通道入口(双通道架构 Phase 1 骨架)
// ────────────────────────────────────────────
/**
* Agent 模式入口 — 三步确认式管线
*
* 状态机:
* 新请求 → agentGeneratePlan → plan_pending等用户确认
* 用户确认计划 → agentStreamCode → code_pending等用户确认
* 用户确认代码 → agentExecuteCode → completed
*/
async handleAgentMode(
sessionId: string,
conversationId: string,
userContent: string,
writer: StreamWriter,
placeholderMessageId: string,
): Promise<HandleResult> {
try {
// 1. 检查是否有等待确认的执行记录
const activeExec = await (prisma as any).ssaAgentExecution.findFirst({
where: { sessionId, status: { in: ['plan_pending', 'code_pending'] } },
orderBy: { createdAt: 'desc' },
});
if (activeExec) {
const action = this.parseAgentAction(userContent);
if (activeExec.status === 'plan_pending') {
if (action === 'confirm') {
return await this.agentStreamCode(activeExec, sessionId, conversationId, writer, placeholderMessageId);
}
if (action === 'cancel') {
return await this.agentCancel(activeExec, sessionId, conversationId, writer, placeholderMessageId);
}
}
if (activeExec.status === 'code_pending') {
if (action === 'confirm') {
return await this.agentExecuteCode(activeExec, sessionId, conversationId, writer, placeholderMessageId);
}
if (action === 'cancel') {
return await this.agentCancel(activeExec, sessionId, conversationId, writer, placeholderMessageId);
}
}
}
// 2. 无挂起确认 — 检查是否是分析请求
const blackboard = await sessionBlackboardService.get(sessionId);
const hasData = !!blackboard?.dataOverview;
if (hasData && this.looksLikeAnalysisRequest(userContent)) {
return await this.agentGeneratePlan(sessionId, conversationId, userContent, writer, placeholderMessageId);
}
return await this.handleAgentChat(sessionId, conversationId, writer, placeholderMessageId, blackboard);
} catch (error: any) {
logger.error('[SSA:ChatHandler] Agent mode error', { sessionId, error: error.message });
await conversationService.markAssistantError(placeholderMessageId, error.message);
return { messageId: placeholderMessageId, intent: 'analyze', success: false, error: error.message };
}
}
/** 解析用户回复中的确认/取消意图 */
private parseAgentAction(content: string): 'confirm' | 'cancel' | 'other' {
const lc = content.toLowerCase();
if (lc.includes('agent_confirm') || lc.includes('确认') || lc.includes('执行代码') || lc.includes('开始生成')) return 'confirm';
if (lc.includes('agent_cancel') || lc.includes('取消')) return 'cancel';
return 'other';
}
// ── Agent Step 1: 生成分析计划 → 等用户确认 ──
private async agentGeneratePlan(
sessionId: string,
conversationId: string,
userContent: string,
writer: StreamWriter,
placeholderMessageId: string,
): Promise<HandleResult> {
const sendEvent = (type: string, data: Record<string, any>) => {
writer.write(`data: ${JSON.stringify({ type, ...data })}\n\n`);
};
const execution = await (prisma as any).ssaAgentExecution.create({
data: { sessionId, query: userContent, status: 'planning' },
});
sendEvent('agent_planning', { executionId: execution.id, message: '正在制定分析计划...' });
const conversationHistory = await conversationService.buildContext(sessionId, conversationId, 'analyze');
const plan = await agentPlannerService.generatePlan(sessionId, userContent, conversationHistory);
// planText 存原始文本, reviewResult(JSON) 存结构化计划以便恢复
await (prisma as any).ssaAgentExecution.update({
where: { id: execution.id },
data: {
planText: plan.rawText,
reviewResult: JSON.parse(JSON.stringify(plan)),
status: 'plan_pending',
},
});
sendEvent('agent_plan_ready', {
executionId: execution.id,
plan: { title: plan.title, designType: plan.designType, steps: plan.steps },
planText: plan.rawText,
});
// 流式解释计划
const hint = [
`[系统指令] 你刚刚制定了分析计划「${plan.title}」,包含 ${plan.steps.length} 个步骤。`,
'请用简洁的自然语言解释这个计划,然后告知用户可以确认后开始生成 R 代码。',
'【禁止】不要编造数值或结果。',
].join('\n');
const msgs = await conversationService.buildContext(sessionId, conversationId, 'analyze', hint);
const sr = await conversationService.streamToSSE(msgs, writer, { temperature: 0.5, maxTokens: 800 });
await conversationService.finalizeAssistantMessage(placeholderMessageId, sr.content, sr.thinking, sr.tokens);
sendEvent('ask_user', {
questionId: `agent_plan_${execution.id}`,
text: '请确认分析计划',
options: [
{ id: 'agent_confirm_plan', label: '确认方案,生成代码' },
{ id: 'agent_cancel', label: '取消' },
],
});
return { messageId: placeholderMessageId, intent: 'analyze', success: true };
}
// ── Agent Step 2: 流式生成代码 → 等用户确认 ──
private async agentStreamCode(
execution: any,
sessionId: string,
conversationId: string,
writer: StreamWriter,
placeholderMessageId: string,
): Promise<HandleResult> {
const sendEvent = (type: string, data: Record<string, any>) => {
writer.write(`data: ${JSON.stringify({ type, ...data })}\n\n`);
};
await (prisma as any).ssaAgentExecution.update({
where: { id: execution.id },
data: { status: 'coding' },
});
sendEvent('code_generating', { executionId: execution.id, partialCode: '', message: '正在生成 R 代码...' });
const plan = execution.reviewResult as any;
const generated = await agentCoderService.generateCodeStream(
sessionId, plan,
(accumulated: string) => {
sendEvent('code_generating', { executionId: execution.id, partialCode: accumulated });
},
);
await (prisma as any).ssaAgentExecution.update({
where: { id: execution.id },
data: { generatedCode: generated.code, status: 'code_pending' },
});
sendEvent('code_generated', {
executionId: execution.id,
code: generated.code,
explanation: generated.explanation,
});
// 流式说明代码
const hint = [
`[系统指令] R 代码已生成(${generated.code.split('\n').length} 行),使用 ${generated.requiredPackages.join(', ') || '基础包'}`,
'请简要说明代码逻辑,告知用户可以确认执行。',
'【禁止】不要在对话中贴代码(工作区已展示),不要编造结果。',
].join('\n');
const msgs = await conversationService.buildContext(sessionId, conversationId, 'analyze', hint);
const sr = await conversationService.streamToSSE(msgs, writer, { temperature: 0.5, maxTokens: 600 });
await conversationService.finalizeAssistantMessage(placeholderMessageId, sr.content, sr.thinking, sr.tokens);
sendEvent('ask_user', {
questionId: `agent_code_${execution.id}`,
text: '代码已生成,请确认执行',
options: [
{ id: 'agent_confirm_code', label: '执行代码' },
{ id: 'agent_cancel', label: '取消' },
],
});
return { messageId: placeholderMessageId, intent: 'analyze', success: true };
}
// ── Agent Step 3: 执行代码 + 重试循环 ──
private async agentExecuteCode(
execution: any,
sessionId: string,
conversationId: string,
writer: StreamWriter,
placeholderMessageId: string,
): Promise<HandleResult> {
const sendEvent = (type: string, data: Record<string, any>) => {
writer.write(`data: ${JSON.stringify({ type, ...data })}\n\n`);
};
await (prisma as any).ssaAgentExecution.update({
where: { id: execution.id },
data: { status: 'executing' },
});
const plan = execution.reviewResult as any;
let currentCode = execution.generatedCode as string;
let lastError: string | null = null;
for (let attempt = 0; attempt <= codeRunnerService.maxRetries; attempt++) {
sendEvent('code_executing', {
executionId: execution.id,
attempt: attempt + 1,
message: attempt === 0 ? '正在执行 R 代码...' : `${attempt + 1} 次重试执行...`,
});
const execResult = await codeRunnerService.executeCode(sessionId, currentCode);
if (execResult.success) {
const durationMs = execResult.durationMs || 0;
await (prisma as any).ssaAgentExecution.update({
where: { id: execution.id },
data: {
executionResult: execResult as any,
reportBlocks: execResult.reportBlocks as any,
generatedCode: currentCode,
status: 'completed',
retryCount: attempt,
durationMs,
},
});
sendEvent('code_result', {
executionId: execution.id,
reportBlocks: execResult.reportBlocks,
code: currentCode,
durationMs,
});
// 流式总结结果
const hint = [
`[系统指令] R 代码执行完成(${durationMs}ms生成 ${(execResult.reportBlocks || []).length} 个报告模块。`,
'请简要解释结果的统计学意义。',
'【禁止】不要编造数值(工作区已展示完整结果)。',
].join('\n');
const msgs = await conversationService.buildContext(sessionId, conversationId, 'analyze', hint);
const sr = await conversationService.streamToSSE(msgs, writer, { temperature: 0.5, maxTokens: 800 });
await conversationService.finalizeAssistantMessage(placeholderMessageId, sr.content, sr.thinking, sr.tokens);
return { messageId: placeholderMessageId, intent: 'analyze', success: true };
}
lastError = execResult.error || '执行失败';
if (attempt < codeRunnerService.maxRetries) {
sendEvent('code_error', { executionId: execution.id, message: lastError, willRetry: true });
sendEvent('code_retry', { executionId: execution.id, retryCount: attempt + 1, message: `错误: ${lastError},正在修复代码...` });
const retry = await agentCoderService.generateCode(sessionId, plan, lastError);
currentCode = retry.code;
await (prisma as any).ssaAgentExecution.update({
where: { id: execution.id },
data: { generatedCode: currentCode, retryCount: attempt + 1 },
});
sendEvent('code_generated', { executionId: execution.id, code: currentCode });
}
}
await (prisma as any).ssaAgentExecution.update({
where: { id: execution.id },
data: { status: 'error', errorMessage: lastError, retryCount: codeRunnerService.maxRetries },
});
sendEvent('code_error', {
executionId: execution.id,
message: `经过 ${codeRunnerService.maxRetries + 1} 次尝试仍然失败: ${lastError}`,
willRetry: false,
});
return { messageId: placeholderMessageId, intent: 'analyze', success: true };
}
// ── Agent 取消 ──
private async agentCancel(
execution: any,
sessionId: string,
conversationId: string,
writer: StreamWriter,
placeholderMessageId: string,
): Promise<HandleResult> {
await (prisma as any).ssaAgentExecution.update({
where: { id: execution.id },
data: { status: 'error', errorMessage: '用户取消' },
});
const msgs = await conversationService.buildContext(
sessionId, conversationId, 'analyze', '[系统指令] 用户取消了当前分析流程。请简短回复确认取消。',
);
const sr = await conversationService.streamToSSE(msgs, writer, { temperature: 0.5, maxTokens: 200 });
await conversationService.finalizeAssistantMessage(placeholderMessageId, sr.content, sr.thinking, sr.tokens);
return { messageId: placeholderMessageId, intent: 'analyze', success: true };
}
// ── Agent 自由对话 ──
private async handleAgentChat(
sessionId: string,
conversationId: string,
writer: StreamWriter,
placeholderMessageId: string,
blackboard: any,
): Promise<HandleResult> {
let toolOutputs = '';
if (blackboard) {
const truncated = tokenTruncationService.truncate(blackboard, { maxTokens: 2000, strategy: 'balanced' });
const parts: string[] = [];
if (truncated.overview) parts.push(`数据概览:\n${truncated.overview}`);
if (truncated.variables) parts.push(`变量列表:\n${truncated.variables}`);
if (truncated.pico) parts.push(`PICO 推断:\n${truncated.pico}`);
if (parts.length > 0) toolOutputs = parts.join('\n\n');
}
const agentHint = [
'[当前模式: Agent 代码生成通道]',
'你是一位高级统计分析 Agent。你可以',
'1. 与临床研究者自由讨论统计分析需求',
'2. 帮助理解数据特征、研究设计、统计方法选择',
'3. 当用户明确分析需求后,自动制定计划并生成 R 代码执行',
'',
'注意:禁止编造或模拟任何分析结果和数值。',
blackboard?.dataOverview
? '用户已上传数据,你可以结合数据概览回答问题。当用户提出分析需求时,会自动触发分析流程。'
: '用户尚未上传数据。请引导用户先上传研究数据文件。',
].join('\n');
const fullToolOutputs = toolOutputs ? `${toolOutputs}\n\n${agentHint}` : agentHint;
const messages = await conversationService.buildContext(sessionId, conversationId, 'analyze', fullToolOutputs);
const result = await conversationService.streamToSSE(messages, writer, { temperature: 0.7, maxTokens: 2000 });
await conversationService.finalizeAssistantMessage(placeholderMessageId, result.content, result.thinking, result.tokens);
return { messageId: placeholderMessageId, intent: 'analyze', success: true };
}
/** 简单启发式:判断用户消息是否像分析请求 */
private looksLikeAnalysisRequest(content: string): boolean {
const kw = [
'分析', '比较', '检验', '回归', '相关', '差异', '统计',
'生存', '卡方', 'logistic', 't检验', 'anova',
'基线', '特征表', '描述性', '预测', '影响因素',
'帮我做', '帮我跑', '开始分析', '执行分析',
];
const lc = content.toLowerCase();
return kw.some(k => lc.includes(k));
}
// ────────────────────────────────────────────
// ask_user 响应处理Phase III
// ────────────────────────────────────────────

View File

@@ -0,0 +1,182 @@
/**
* CodeRunnerService — Agent 通道代码执行器
*
* 职责:将 LLM 生成的 R 代码发送到 R Docker 沙箱执行,
* 处理执行结果,失败时反馈错误给 CoderAgent 进行重试。
*
* 执行流程:
* 1. 构建执行 payloadcode + data_source
* 2. 调用 R Docker POST /api/v1/execute-code
* 3. 成功 → 返回 report_blocks
* 4. 失败 → 返回错误信息,供重试循环使用
*
* 安全约束:
* - 最大重试次数 MAX_RETRIES = 3
* - 单次执行超时 120 秒
*/
import axios, { AxiosInstance } from 'axios';
import { logger } from '../../../common/logging/index.js';
import { storage } from '../../../common/storage/index.js';
import { prisma } from '../../../config/database.js';
const MAX_RETRIES = 3;
const EXECUTE_TIMEOUT_MS = 130000;
export interface CodeExecutionResult {
success: boolean;
reportBlocks?: any[];
consoleOutput?: string[];
durationMs?: number;
error?: string;
}
export class CodeRunnerService {
private client: AxiosInstance;
constructor() {
const baseURL = process.env.R_SERVICE_URL || 'http://localhost:8082';
this.client = axios.create({
baseURL,
timeout: EXECUTE_TIMEOUT_MS,
headers: { 'Content-Type': 'application/json' },
});
}
/**
* 执行 R 代码并返回结果
*/
async executeCode(
sessionId: string,
code: string,
): Promise<CodeExecutionResult> {
const startTime = Date.now();
try {
const dataSource = await this.buildDataSource(sessionId);
const payload = {
code: this.wrapCode(code, dataSource),
session_id: sessionId,
timeout: 120,
};
logger.info('[CodeRunner] Executing R code', {
sessionId,
codeLength: code.length,
});
const response = await this.client.post('/api/v1/execute-code', payload);
const durationMs = Date.now() - startTime;
if (response.data?.status === 'success') {
const result = response.data.result || {};
const reportBlocks = result.report_blocks || result.data?.report_blocks || [];
logger.info('[CodeRunner] Execution success', {
sessionId,
durationMs,
blockCount: reportBlocks.length,
});
return {
success: true,
reportBlocks,
consoleOutput: response.data.console_output,
durationMs,
};
}
const errorMsg = response.data?.message || response.data?.user_hint || 'R 执行返回非成功状态';
logger.warn('[CodeRunner] Execution failed (R returned error)', {
sessionId,
durationMs,
error: errorMsg,
});
return {
success: false,
error: errorMsg,
consoleOutput: response.data?.console_output,
durationMs,
};
} catch (error: any) {
const durationMs = Date.now() - startTime;
const statusCode = error.response?.status;
if (statusCode === 502 || statusCode === 504) {
return {
success: false,
error: 'R 统计服务超时或崩溃,请检查代码是否有死循环或内存溢出',
durationMs,
};
}
const errorMsg = error.response?.data?.message
|| error.response?.data?.user_hint
|| error.message
|| 'R 服务调用失败';
logger.error('[CodeRunner] Execution error', {
sessionId,
durationMs,
error: errorMsg,
});
return {
success: false,
error: errorMsg,
durationMs,
};
}
}
get maxRetries(): number {
return MAX_RETRIES;
}
/**
* 包装用户代码:注入 data_source 和 input 变量
* URL 通过 Sys.getenv 读取(由 R 端点的 sandbox_env 设置),避免字符串拼接注入风险
*/
private wrapCode(userCode: string, dataSource: { type: string; oss_url?: string }): string {
const escapedUrl = (dataSource.oss_url || '').replace(/\\/g, '\\\\').replace(/"/g, '\\"');
return `
# === 自动注入:数据加载 ===
input <- list(
data_source = list(
type = "${dataSource.type}",
oss_url = "${escapedUrl}"
)
)
# 加载数据到 dfLLM 代码直接使用 df不需要再调用 load_input_data
df <- load_input_data(input)
message(paste0("[Agent] Data loaded: ", nrow(df), " rows x ", ncol(df), " cols"))
# === 用户代码开始 ===
${userCode}
# === 用户代码结束 ===
`.trim();
}
/**
* 构建数据源(从 session 读取 OSS key → 预签名 URL
*/
private async buildDataSource(sessionId: string): Promise<{ type: string; oss_url: string }> {
const session = await prisma.ssaSession.findUnique({
where: { id: sessionId },
select: { dataOssKey: true },
});
const ossKey = session?.dataOssKey;
if (!ossKey) {
throw new Error('请先上传数据文件');
}
const signedUrl = await storage.getUrl(ossKey);
return { type: 'oss', oss_url: signedUrl };
}
}
export const codeRunnerService = new CodeRunnerService();

View File

@@ -0,0 +1,444 @@
/**
* SSA 双通道架构 E2E 测试
*
* 测试 Phase 1~3
* T1. 数据库迁移验证 — execution_mode 字段 + ssa_agent_executions 表
* T2. Session execution mode 切换 API
* T3. R Docker /execute-code 端点
* T4. Agent 模式对话(自由对话 + 分析请求判断)
* T5. AgentPlannerService 规划能力
* T6. AgentCoderService 代码生成能力
* T7. AgentReviewerService 审核能力
* T8. ModeToggle 前端集成点验证API 层面)
*
* 前置条件:
* - PostgreSQL 运行中Docker Desktop
* - R Docker 运行中可选T3 跳过如不可用)
* - DeepSeek API key 配置在 .env
* - 至少有一个 SSA session有上传数据
*
* 运行: npx tsx tests/e2e-dual-channel-test.ts
*/
import { prisma } from '../src/config/database.js';
import { logger } from '../src/common/logging/index.js';
import axios from 'axios';
const R_SERVICE_URL = process.env.R_SERVICE_URL || 'http://localhost:8082';
const BACKEND_URL = process.env.BACKEND_URL || 'http://localhost:3000';
interface TestResult {
id: string;
name: string;
status: 'pass' | 'fail' | 'skip';
duration: number;
message?: string;
}
const results: TestResult[] = [];
async function runTest(
id: string,
name: string,
fn: () => Promise<void>,
): Promise<void> {
const start = Date.now();
try {
await fn();
const dur = Date.now() - start;
results.push({ id, name, status: 'pass', duration: dur });
console.log(`${id} ${name} (${dur}ms)`);
} catch (error: any) {
const dur = Date.now() - start;
if (error.message?.startsWith('SKIP:')) {
results.push({ id, name, status: 'skip', duration: dur, message: error.message });
console.log(` ⏭️ ${id} ${name}${error.message}`);
} else {
results.push({ id, name, status: 'fail', duration: dur, message: error.message });
console.log(`${id} ${name}${error.message}`);
}
}
}
function assert(condition: boolean, message: string): void {
if (!condition) throw new Error(message);
}
// ═══════════════════════════════════════════
// T1: 数据库迁移验证
// ═══════════════════════════════════════════
async function t1_dbMigration() {
// 检查 ssa_sessions 表有 execution_mode 列
const colCheck = await prisma.$queryRaw<any[]>`
SELECT column_name, column_default
FROM information_schema.columns
WHERE table_schema = 'ssa_schema'
AND table_name = 'ssa_sessions'
AND column_name = 'execution_mode'
`;
assert(colCheck.length === 1, 'execution_mode 列不存在于 ssa_sessions 表');
assert(
colCheck[0].column_default?.includes('qper'),
`execution_mode 默认值应为 qper实际为 ${colCheck[0].column_default}`,
);
// 检查 ssa_agent_executions 表存在
const tableCheck = await prisma.$queryRaw<any[]>`
SELECT table_name
FROM information_schema.tables
WHERE table_schema = 'ssa_schema'
AND table_name = 'ssa_agent_executions'
`;
assert(tableCheck.length === 1, 'ssa_agent_executions 表不存在');
// 检查索引
const idxCheck = await prisma.$queryRaw<any[]>`
SELECT indexname
FROM pg_indexes
WHERE schemaname = 'ssa_schema'
AND tablename = 'ssa_agent_executions'
AND indexname = 'idx_ssa_agent_exec_session'
`;
assert(idxCheck.length === 1, 'idx_ssa_agent_exec_session 索引不存在');
}
// ═══════════════════════════════════════════
// T2: Session execution mode CRUD
// ═══════════════════════════════════════════
async function t2_executionModeSwitch() {
// 查找一个现有 session
const session = await prisma.ssaSession.findFirst({
where: { status: 'active' },
orderBy: { createdAt: 'desc' },
});
if (!session) throw new Error('SKIP: 无可用 session请先创建一个 SSA 会话');
// 读取当前 mode应为默认 qper
const current = await prisma.$queryRaw<any[]>`
SELECT execution_mode FROM ssa_schema.ssa_sessions WHERE id = ${session.id}
`;
assert(current.length === 1, 'Session 不存在');
const originalMode = current[0].execution_mode;
console.log(` 当前 mode: ${originalMode}`);
// 切换到 agent
await prisma.$executeRaw`
UPDATE ssa_schema.ssa_sessions SET execution_mode = 'agent' WHERE id = ${session.id}
`;
const after = await prisma.$queryRaw<any[]>`
SELECT execution_mode FROM ssa_schema.ssa_sessions WHERE id = ${session.id}
`;
assert(after[0].execution_mode === 'agent', '切换到 agent 失败');
// 切回 qper
await prisma.$executeRaw`
UPDATE ssa_schema.ssa_sessions SET execution_mode = 'qper' WHERE id = ${session.id}
`;
const restored = await prisma.$queryRaw<any[]>`
SELECT execution_mode FROM ssa_schema.ssa_sessions WHERE id = ${session.id}
`;
assert(restored[0].execution_mode === 'qper', '切回 qper 失败');
}
// ═══════════════════════════════════════════
// T3: R Docker /execute-code 端点
// ═══════════════════════════════════════════
async function t3_rExecuteCode() {
// 先检查 R 服务是否可用
try {
await axios.get(`${R_SERVICE_URL}/health`, { timeout: 5000 });
} catch {
throw new Error('SKIP: R Docker 服务不可用');
}
// 执行一段简单的 R 代码
const response = await axios.post(`${R_SERVICE_URL}/api/v1/execute-code`, {
code: `
blocks <- list()
blocks[[1]] <- make_markdown_block("## 测试结果\\n双通道 E2E 测试通过", title = "测试")
blocks[[2]] <- make_kv_block(items = list("状态" = "成功", "时间" = as.character(Sys.time())), title = "概况")
list(status = "success", report_blocks = blocks)
`,
session_id: 'e2e-test',
timeout: 30,
}, { timeout: 35000 });
assert(response.data?.status === 'success', `R 执行状态不是 success: ${response.data?.status}`);
assert(
Array.isArray(response.data?.result?.report_blocks),
'report_blocks 不是数组',
);
assert(
response.data.result.report_blocks.length === 2,
`预期 2 个 block实际 ${response.data.result.report_blocks.length}`,
);
const markdownBlock = response.data.result.report_blocks[0];
assert(markdownBlock.type === 'markdown', `Block 0 类型应为 markdown实际 ${markdownBlock.type}`);
console.log(` R 执行耗时: ${response.data.duration_ms}ms, blocks: ${response.data.result.report_blocks.length}`);
}
// ═══════════════════════════════════════════
// T4: R Docker 错误处理
// ═══════════════════════════════════════════
async function t4_rExecuteCodeError() {
try {
await axios.get(`${R_SERVICE_URL}/health`, { timeout: 5000 });
} catch {
throw new Error('SKIP: R Docker 服务不可用');
}
const response = await axios.post(`${R_SERVICE_URL}/api/v1/execute-code`, {
code: 'stop("这是一个故意的错误")',
session_id: 'e2e-test-error',
timeout: 10,
}, { timeout: 15000 });
assert(response.data?.status === 'error', '错误代码应返回 error 状态');
assert(
response.data?.message?.includes('故意的错误'),
`错误消息应包含原始错误: ${response.data?.message}`,
);
console.log(` 错误捕获正确: "${response.data.message}"`);
}
// ═══════════════════════════════════════════
// T5: AgentPlannerService 单元测试
// ═══════════════════════════════════════════
async function t5_agentPlanner() {
const { agentPlannerService } = await import('../src/modules/ssa/services/AgentPlannerService.js');
// 查找有数据的 session
const session = await prisma.ssaSession.findFirst({
where: { status: 'active', dataOssKey: { not: null } },
orderBy: { createdAt: 'desc' },
});
if (!session) throw new Error('SKIP: 无有数据的 session');
const plan = await agentPlannerService.generatePlan(
session.id,
'帮我做一个基线特征表,比较两组的差异',
[],
);
assert(!!plan.title, '计划标题为空');
assert(plan.steps.length > 0, '计划步骤为空');
assert(!!plan.rawText, 'rawText 为空');
console.log(` 计划标题: ${plan.title}`);
console.log(` 设计类型: ${plan.designType}`);
console.log(` 步骤数: ${plan.steps.length}`);
plan.steps.forEach(s => console.log(` ${s.order}. ${s.method}: ${s.description}`));
}
// ═══════════════════════════════════════════
// T6: AgentCoderService 单元测试
// ═══════════════════════════════════════════
async function t6_agentCoder() {
const { agentCoderService } = await import('../src/modules/ssa/services/AgentCoderService.js');
const session = await prisma.ssaSession.findFirst({
where: { status: 'active', dataOssKey: { not: null } },
orderBy: { createdAt: 'desc' },
});
if (!session) throw new Error('SKIP: 无有数据的 session');
const mockPlan = {
title: '基线特征表分析',
designType: '横断面研究',
variables: {
outcome: [],
predictors: [],
grouping: 'group',
confounders: [],
},
steps: [
{ order: 1, method: '描述性统计', description: '生成基线特征表', rationale: '了解数据分布' },
],
assumptions: [],
rawText: '基线特征表分析计划',
};
const generated = await agentCoderService.generateCode(session.id, mockPlan);
assert(generated.code.length > 50, `生成代码太短: ${generated.code.length} chars`);
assert(generated.code.includes('load_input_data') || generated.code.includes('df'), '代码中未包含数据加载');
console.log(` 代码长度: ${generated.code.length} chars`);
console.log(` 依赖包: ${generated.requiredPackages.join(', ') || '(无额外依赖)'}`);
console.log(` 代码前 100 字符: ${generated.code.slice(0, 100).replace(/\n/g, ' ')}...`);
}
// ═══════════════════════════════════════════
// T7: AgentReviewerService 单元测试
// ═══════════════════════════════════════════
async function t7_agentReviewer() {
const { agentReviewerService } = await import('../src/modules/ssa/services/AgentReviewerService.js');
const safePlan = {
title: '基线分析',
designType: '队列研究',
variables: {
outcome: ['death'],
predictors: ['age', 'sex'],
grouping: 'treatment',
confounders: [],
},
steps: [{ order: 1, method: '基线特征表', description: '描述统计', rationale: '基线比较' }],
assumptions: [],
rawText: '',
};
const safeCode = `
df <- load_input_data(input)
library(gtsummary)
tbl <- df %>%
tbl_summary(by = treatment, include = c(age, sex, death)) %>%
add_p()
blocks <- list()
blocks[[1]] <- make_markdown_block("## 基线特征表")
list(status = "success", report_blocks = blocks)
`;
const review = await agentReviewerService.review(safePlan, safeCode);
assert(typeof review.passed === 'boolean', 'passed 应为 boolean');
assert(typeof review.score === 'number', 'score 应为 number');
assert(Array.isArray(review.comments), 'comments 应为 array');
console.log(` 审核通过: ${review.passed}`);
console.log(` 评分: ${review.score}/100`);
console.log(` 问题数: ${review.issues.length}`);
review.comments.forEach(c => console.log(` - ${c}`));
// 测试危险代码审核
const dangerCode = `
install.packages("hacker_pkg")
system("rm -rf /")
df <- load_input_data(input)
list(status = "success", report_blocks = list())
`;
const dangerReview = await agentReviewerService.review(safePlan, dangerCode);
console.log(` 危险代码审核通过: ${dangerReview.passed} (预期 false)`);
console.log(` 危险代码问题数: ${dangerReview.issues.length}`);
if (dangerReview.passed) {
console.log(' ⚠️ 警告: 危险代码未被拦截Prompt 需要加强');
}
}
// ═══════════════════════════════════════════
// T8: Agent Execution 记录 CRUD
// ═══════════════════════════════════════════
async function t8_agentExecutionCrud() {
const session = await prisma.ssaSession.findFirst({
where: { status: 'active' },
orderBy: { createdAt: 'desc' },
});
if (!session) throw new Error('SKIP: 无可用 session');
// 创建
const exec = await (prisma as any).ssaAgentExecution.create({
data: {
sessionId: session.id,
query: 'E2E 测试查询',
status: 'pending',
},
});
assert(!!exec.id, '创建执行记录失败');
console.log(` 创建记录: ${exec.id}`);
// 更新
await (prisma as any).ssaAgentExecution.update({
where: { id: exec.id },
data: {
status: 'completed',
planText: '测试计划',
generatedCode: 'print("hello")',
durationMs: 1234,
},
});
const updated = await (prisma as any).ssaAgentExecution.findUnique({
where: { id: exec.id },
});
assert(updated.status === 'completed', `状态应为 completed实际 ${updated.status}`);
assert(updated.durationMs === 1234, `耗时应为 1234实际 ${updated.durationMs}`);
// 删除(清理)
await (prisma as any).ssaAgentExecution.delete({ where: { id: exec.id } });
console.log(' CRUD 全流程通过');
}
// ═══════════════════════════════════════════
// Main
// ═══════════════════════════════════════════
async function main() {
console.log('\n╔══════════════════════════════════════════════╗');
console.log('║ SSA 双通道架构 E2E 测试 (Phase 1~3) ║');
console.log('╚══════════════════════════════════════════════╝\n');
console.log('📦 Phase 1: 基础设施');
await runTest('T1', '数据库迁移验证execution_mode + agent_executions', t1_dbMigration);
await runTest('T2', 'Session execution mode 切换', t2_executionModeSwitch);
await runTest('T3', 'R Docker /execute-code 正常执行', t3_rExecuteCode);
await runTest('T4', 'R Docker /execute-code 错误处理', t4_rExecuteCodeError);
console.log('\n🤖 Phase 2: Agent 服务');
await runTest('T5', 'AgentPlannerService 规划能力', t5_agentPlanner);
await runTest('T6', 'AgentCoderService R 代码生成', t6_agentCoder);
await runTest('T7', 'AgentReviewerService 审核能力', t7_agentReviewer);
await runTest('T8', 'Agent Execution 记录 CRUD', t8_agentExecutionCrud);
// 汇总
console.log('\n' + '═'.repeat(50));
const passed = results.filter(r => r.status === 'pass').length;
const failed = results.filter(r => r.status === 'fail').length;
const skipped = results.filter(r => r.status === 'skip').length;
const totalMs = results.reduce((s, r) => s + r.duration, 0);
console.log(`\n📊 测试结果: ${passed} 通过 / ${failed} 失败 / ${skipped} 跳过 (总耗时 ${totalMs}ms)`);
if (failed > 0) {
console.log('\n❌ 失败详情:');
results.filter(r => r.status === 'fail').forEach(r => {
console.log(` ${r.id} ${r.name}: ${r.message}`);
});
}
if (skipped > 0) {
console.log('\n⏭ 跳过详情:');
results.filter(r => r.status === 'skip').forEach(r => {
console.log(` ${r.id} ${r.name}: ${r.message}`);
});
}
console.log('\n' + (failed === 0 ? '🎉 所有测试通过!' : '⚠️ 有测试失败,请检查。'));
await prisma.$disconnect();
process.exit(failed > 0 ? 1 : 0);
}
main().catch(async (err) => {
console.error('测试执行异常:', err);
await prisma.$disconnect();
process.exit(1);
});