feat(ssa): Complete T-test end-to-end testing with 9 bug fixes - Phase 1 core 85% complete. R service: missing value auto-filter. Backend: error handling, variable matching, dynamic filename. Frontend: module activation, session isolation, error propagation. Full flow verified.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
2026-02-19 20:57:00 +08:00
parent 8137e3cde2
commit 49b5c37cb1
86 changed files with 21207 additions and 252 deletions

View File

@@ -0,0 +1,138 @@
/**
* R 服务客户端
* 负责调用 R Docker 服务执行统计分析
*
* 遵循规范:
* - 使用统一日志服务 @/common/logging
* - 使用统一存储服务 @/common/storageOSS 存储规范)
*/
import axios, { AxiosInstance } from 'axios';
import { logger } from '../../../common/logging/index.js';
import { storage } from '../../../common/storage/index.js';
import { prisma } from '../../../config/database.js';
export class RClientService {
private client: AxiosInstance;
constructor() {
const baseURL = process.env.R_SERVICE_URL || 'http://localhost:8082';
this.client = axios.create({
baseURL,
timeout: 120000, // 120 秒超时
headers: {
'Content-Type': 'application/json'
}
});
}
async execute(sessionId: string, plan: any, session: any): Promise<any> {
const startTime = Date.now();
// 构建请求体(使用统一存储服务)
const requestBody = {
data_source: await this.buildDataSource(session),
params: plan.params,
guardrails: plan.guardrails || {
check_normality: true,
auto_fix: true
}
};
try {
logger.info('[SSA:RClient] Calling R service', {
sessionId,
toolCode: plan.tool_code,
endpoint: `/api/v1/skills/${plan.tool_code}`,
requestBody
});
const response = await this.client.post(
`/api/v1/skills/${plan.tool_code}`,
requestBody
);
const executionMs = Date.now() - startTime;
logger.info('[SSA:RClient] R service response', {
sessionId,
status: response.data?.status,
hasResults: !!response.data?.results,
executionMs
});
// 记录执行日志(失败不阻塞主流程)
try {
await prisma.ssaExecutionLog.create({
data: {
sessionId,
toolCode: plan.tool_code,
inputParams: plan.params,
outputStatus: response.data.status,
outputResult: response.data.results,
traceLog: response.data.trace_log || [],
executionMs
}
});
} catch (logError) {
logger.warn('[SSA:RClient] Failed to save execution log', { error: logError });
}
// 添加执行耗时到返回结果
return {
...response.data,
executionMs
};
} catch (error: any) {
logger.error('R service call failed', { sessionId, toolCode: plan.tool_code, error: error.message });
// 502/504 特殊处理R 服务崩溃或超时)
const statusCode = error.response?.status;
if (statusCode === 502 || statusCode === 504) {
throw new Error('统计服务繁忙或数据异常,请稍后重试');
}
// 提取 R 服务返回的用户友好提示
const userHint = error.response?.data?.user_hint;
if (userHint) {
throw new Error(userHint);
}
throw new Error(`R service error: ${error.message}`);
}
}
/**
* 构建数据源(仅支持 OSS
*
* 设计说明SSA 场景下用户必须上传数据文件,文件存入 OSS
* R 服务通过预签名 URL 从 OSS 下载数据。
*/
private async buildDataSource(session: any): Promise<{ type: string; oss_url: string }> {
const ossKey = session.dataOssKey;
if (!ossKey) {
logger.error('[SSA:RClient] No data uploaded', { sessionId: session.id });
throw new Error('请先上传数据文件');
}
logger.info('[SSA:RClient] Building OSS data source', { sessionId: session.id, ossKey });
const signedUrl = await storage.getUrl(ossKey);
return {
type: 'oss',
oss_url: signedUrl
};
}
async healthCheck(): Promise<boolean> {
try {
const res = await this.client.get('/health');
return res.data.status === 'ok';
} catch {
return false;
}
}
}

View File

@@ -0,0 +1,28 @@
/**
* SSA-Pro 智能统计分析模块入口
* @module ssa
*
* 遵循规范:
* - 使用 authenticate 中间件(模块认证规范)
* - 使用统一日志服务
*/
import { FastifyInstance } from 'fastify';
import { authenticate } from '../../common/auth/auth.middleware.js';
import sessionRoutes from './routes/session.routes.js';
import analysisRoutes from './routes/analysis.routes.js';
import consultRoutes from './routes/consult.routes.js';
import configRoutes from './routes/config.routes.js';
export async function ssaRoutes(app: FastifyInstance) {
// 注册认证中间件(遵循模块认证规范)
app.addHook('preHandler', authenticate);
// 注册子路由
app.register(sessionRoutes, { prefix: '/sessions' });
app.register(analysisRoutes, { prefix: '/sessions' });
app.register(consultRoutes, { prefix: '/consult' });
app.register(configRoutes, { prefix: '/config' });
}
export default ssaRoutes;

View File

@@ -0,0 +1,349 @@
/**
* SSA 分析执行路由
*
* 遵循规范:
* - 使用 getUserId模块认证规范
* - 使用 storageOSS 存储规范)
* - 使用 logger日志服务
*/
import { FastifyInstance, FastifyRequest } from 'fastify';
import { RClientService } from '../executor/RClientService.js';
import { prisma } from '../../../config/database.js';
import { storage } from '../../../common/storage/index.js';
import { logger } from '../../../common/logging/index.js';
function getUserId(request: FastifyRequest): string {
const userId = (request as any).user?.userId;
if (!userId) {
throw new Error('User not authenticated');
}
return userId;
}
function getTenantId(request: FastifyRequest): string {
return (request as any).user?.tenantId || 'default';
}
export default async function analysisRoutes(app: FastifyInstance) {
const rClient = new RClientService();
// 上传数据(遵循 OSS 存储规范)
app.post('/:id/upload', async (req, reply) => {
const { id } = req.params as { id: string };
const userId = getUserId(req);
const tenantId = getTenantId(req);
const data = await req.file();
if (!data) {
return reply.status(400).send({ error: 'No file uploaded' });
}
const buffer = await data.toBuffer();
const filename = data.filename;
// 生成存储 Key遵循 OSS 目录结构规范)
const uuid = crypto.randomUUID().replace(/-/g, '').substring(0, 16);
const ext = filename.split('.').pop()?.toLowerCase() || 'csv';
const storageKey = `tenants/${tenantId}/users/${userId}/ssa/${uuid}.${ext}`;
// 上传到存储服务
await storage.upload(storageKey, buffer);
logger.info('[SSA:Analysis] Data uploaded', { sessionId: id, storageKey });
// 更新会话
await prisma.ssaSession.update({
where: { id },
data: { dataOssKey: storageKey }
});
return reply.send({
success: true,
message: 'Data uploaded successfully',
sessionId: id,
storageKey
});
});
// 生成分析计划
app.post('/:id/plan', async (req, reply) => {
const { id } = req.params as { id: string };
const { query } = req.body as { query: string };
// 获取会话数据 schema
const session = await prisma.ssaSession.findUnique({
where: { id },
select: { dataSchema: true }
});
// TODO: 调用 PlannerService 根据 query 和 schema 推荐分析方法
// const plan = await plannerService.generatePlan(id, query, session?.dataSchema);
// 从 schema 中提取列名用于智能推荐
const schema = session?.dataSchema as any;
const columns = schema?.columns || [];
const columnNames = columns.map((c: any) => c.name.toLowerCase());
// 从用户查询中提取变量名(简单的关键词匹配)
const queryLower = query.toLowerCase();
// 找出查询中提到的所有列
const mentionedColumns = columns.filter((col: any) =>
queryLower.includes(col.name.toLowerCase())
);
logger.info('[SSA:Analysis] Mentioned columns', {
query,
mentionedColumns: mentionedColumns.map((c: any) => ({ name: c.name, type: c.type }))
});
// 从提到的列中分配变量(分类 → 分组,数值 → 值)
let groupVar = '';
let valueVar = '';
for (const col of mentionedColumns) {
if (col.type === 'categorical' && !groupVar) {
groupVar = col.name;
} else if (col.type === 'numeric' && !valueVar) {
valueVar = col.name;
}
}
// 如果查询中只提到了一种类型,从未提到的列中补充
if (!groupVar && mentionedColumns.length > 0) {
// 查询中没有分类变量,从其他分类变量中选一个
const otherCategorical = columns.find((c: any) =>
c.type === 'categorical' && !mentionedColumns.some((m: any) => m.name === c.name)
);
groupVar = otherCategorical?.name || columns.find((c: any) => c.type === 'categorical')?.name || '';
}
if (!valueVar && mentionedColumns.length > 0) {
// 查询中没有数值变量,从其他数值变量中选一个
const otherNumeric = columns.find((c: any) =>
c.type === 'numeric' && !mentionedColumns.some((m: any) => m.name === c.name)
);
valueVar = otherNumeric?.name || columns.find((c: any) => c.type === 'numeric')?.name || '';
}
// 如果完全没有匹配到,使用默认策略(但避免使用同一个变量)
if (!groupVar) {
groupVar = columns.find((c: any) => c.type === 'categorical' && c.name !== valueVar)?.name || columns[0]?.name || 'group';
}
if (!valueVar) {
valueVar = columns.find((c: any) => c.type === 'numeric' && c.name !== groupVar)?.name || columns[1]?.name || 'value';
}
// 最终检查:确保两个变量不相同
if (groupVar === valueVar && columns.length > 1) {
// 如果相同,重新选择
const otherCol = columns.find((c: any) => c.name !== groupVar);
if (otherCol) {
if (otherCol.type === 'numeric') {
valueVar = otherCol.name;
} else {
groupVar = otherCol.name;
}
}
}
logger.info('[SSA:Analysis] Variable matching', {
query,
matchedGroupVar: groupVar,
matchedValueVar: valueVar,
availableColumns: columnNames
});
// 返回前端期望的 AnalysisPlan 格式camelCase
const mockPlan = {
id: `plan_${Date.now()}`,
toolCode: 'ST_T_TEST_IND',
toolName: '独立样本 T 检验',
description: `根据您的数据特征和分析需求"${query}",推荐使用独立样本 T 检验,比较 ${groupVar} 分组下 ${valueVar} 的差异。`,
parameters: {
group_var: groupVar,
value_var: valueVar
},
guardrails: [
{ checkName: '正态性检验', checkCode: 'NORMALITY', actionType: 'Switch', actionTarget: 'WILCOXON' },
{ checkName: '样本量检查', checkCode: 'SAMPLE_SIZE', threshold: 'n >= 30', actionType: 'Warn' }
],
confidence: 0.85
};
logger.info('[SSA:Analysis] Plan generated', { sessionId: id, query, toolCode: mockPlan.toolCode, params: mockPlan.parameters });
return reply.send(mockPlan);
});
// 执行分析
app.post('/:id/execute', async (req, reply) => {
const { id } = req.params as { id: string };
const { plan } = req.body as { plan: any };
logger.info('[SSA:Analysis] Execute request', { sessionId: id, plan });
try {
// 验证 plan 参数
if (!plan || !plan.tool_code) {
logger.error('[SSA:Analysis] Invalid plan', { plan });
return reply.status(400).send({
error: 'Invalid plan: missing tool_code',
user_hint: '分析计划无效,请重新生成'
});
}
// 获取会话数据
const session = await prisma.ssaSession.findUnique({
where: { id }
});
if (!session) {
logger.error('[SSA:Analysis] Session not found', { sessionId: id });
return reply.status(404).send({ error: 'Session not found' });
}
if (!session.dataOssKey) {
logger.error('[SSA:Analysis] No data uploaded', { sessionId: id });
return reply.status(400).send({
error: 'No data uploaded',
user_hint: '请先上传数据文件'
});
}
logger.info('[SSA:Analysis] Calling R service', {
sessionId: id,
toolCode: plan.tool_code,
dataOssKey: session.dataOssKey
});
// 调用 R 服务
const result = await rClient.execute(id, plan, session);
logger.info('[SSA:Analysis] R service returned', {
sessionId: id,
status: result?.status,
hasResults: !!result?.results,
message: result?.message,
userHint: result?.user_hint
});
// 检查 R 服务是否返回错误
if (result?.status === 'error') {
logger.warn('[SSA:Analysis] R service returned error', {
sessionId: id,
errorCode: result.error_code,
message: result.message
});
// 保存错误消息(用于历史记录)
await prisma.ssaMessage.create({
data: {
sessionId: id,
role: 'assistant',
contentType: 'error',
content: result
}
});
// 返回业务错误(使用 422 表示数据不符合业务规则)
return reply.status(422).send({
status: 'error',
error: result.message || '分析执行失败',
error_code: result.error_code,
user_hint: result.user_hint || result.message
});
}
// 保存成功结果消息
await prisma.ssaMessage.create({
data: {
sessionId: id,
role: 'assistant',
contentType: 'result',
content: result
}
});
return reply.send(result);
} catch (error: any) {
logger.error('[SSA:Analysis] Execute failed', {
sessionId: id,
error: error.message,
stack: error.stack
});
return reply.status(500).send({
error: error.message,
user_hint: '分析执行失败,请检查 R 服务是否正常运行'
});
}
});
// 下载代码
app.get('/:id/download-code', async (req, reply) => {
const { id } = req.params as { id: string };
// 获取会话信息(用于文件名)
const session = await prisma.ssaSession.findUnique({
where: { id },
select: { title: true, createdAt: true }
});
// 获取最新的执行结果消息
const latestMessage = await prisma.ssaMessage.findFirst({
where: {
sessionId: id,
contentType: 'result'
},
orderBy: { createdAt: 'desc' }
});
let code = `# SSA-Pro 生成的 R 代码\n# Session: ${id}\n# 暂无可用代码\n`;
let toolName = 'analysis';
if (latestMessage?.content) {
const content = latestMessage.content as any;
// 从消息内容中提取 reproducible_code
const reproducibleCode = content.reproducible_code || content.reproducibleCode;
if (reproducibleCode) {
code = reproducibleCode;
}
// 提取工具名称
if (content.results?.method) {
toolName = content.results.method.replace(/\s+/g, '_').replace(/[^a-zA-Z0-9_]/g, '');
}
}
// 生成有意义的文件名工具名_数据文件名_月日_时分
const now = new Date();
const dateStr = `${String(now.getMonth() + 1).padStart(2, '0')}${String(now.getDate()).padStart(2, '0')}`;
const timeStr = `${String(now.getHours()).padStart(2, '0')}${String(now.getMinutes()).padStart(2, '0')}`;
// 从 session title 提取数据文件名(去除扩展名和特殊字符)
let dataName = 'data';
if (session?.title) {
dataName = session.title
.replace(/\.(csv|xlsx|xls)$/i, '')
.replace(/[^a-zA-Z0-9\u4e00-\u9fa5_-]/g, '_')
.substring(0, 20);
}
const filename = `${toolName}_${dataName}_${dateStr}_${timeStr}.R`;
logger.info('[SSA:Analysis] Download code', { sessionId: id, filename, hasCode: code.length > 50 });
reply.header('Content-Type', 'text/plain; charset=utf-8');
reply.header('Content-Disposition', `attachment; filename="${encodeURIComponent(filename)}"`);
return reply.send(code);
});
// 健康检查
app.get('/r-service/health', async (req, reply) => {
const healthy = await rClient.healthCheck();
return reply.send({
r_service: healthy ? 'ok' : 'unavailable'
});
});
}

View File

@@ -0,0 +1,130 @@
/**
* SSA 配置中台路由
*
* 遵循规范:
* - 管理员接口需要权限校验
* - 使用 logger日志服务
*/
import { FastifyInstance, FastifyRequest } from 'fastify';
import { prisma } from '../../../config/database.js';
import { logger } from '../../../common/logging/index.js';
function getUserId(request: FastifyRequest): string {
const userId = (request as any).user?.userId;
if (!userId) {
throw new Error('User not authenticated');
}
return userId;
}
export default async function configRoutes(app: FastifyInstance) {
// 导入决策表
app.post('/decision-table', async (req, reply) => {
const data = await req.file();
if (!data) {
return reply.status(400).send({ error: 'No file uploaded' });
}
// TODO: 解析 Excel 并导入决策表
return reply.send({
success: true,
message: 'Decision table imported successfully'
});
});
// 获取决策表
app.get('/decision-table', async (req, reply) => {
// TODO: 从数据库获取决策表
return reply.send([]);
});
// 上传 R 脚本
app.post('/r-scripts', async (req, reply) => {
const data = await req.file();
if (!data) {
return reply.status(400).send({ error: 'No file uploaded' });
}
// TODO: 保存 R 脚本到数据库
return reply.send({
success: true,
message: 'R script uploaded successfully'
});
});
// 获取脚本列表
app.get('/r-scripts', async (req, reply) => {
// TODO: 从数据库获取脚本列表
return reply.send([]);
});
// 导入工具配置
app.post('/tool-config', async (req, reply) => {
const data = await req.file();
if (!data) {
return reply.status(400).send({ error: 'No file uploaded' });
}
// TODO: 解析 Excel 并导入工具配置
return reply.send({
success: true,
message: 'Tool config imported successfully'
});
});
// 获取工具列表
app.get('/tools', async (req, reply) => {
// TODO: 从配置缓存获取工具列表
return reply.send([
{
tool_code: 'ST_T_TEST_IND',
name: '独立样本 T 检验',
description: '比较两组独立样本的均值差异',
category: '假设检验'
}
]);
});
// 获取参数映射
app.get('/tools/:code/params', async (req, reply) => {
const { code } = req.params as { code: string };
// TODO: 从数据库获取参数映射
return reply.send([]);
});
// 获取护栏规则
app.get('/tools/:code/guardrails', async (req, reply) => {
const { code } = req.params as { code: string };
// TODO: 从数据库获取护栏规则
return reply.send([]);
});
// 热加载配置
app.post('/reload', async (req, reply) => {
// TODO: 重新加载所有配置到缓存
return reply.send({
success: true,
timestamp: new Date().toISOString()
});
});
// 校验配置文件
app.post('/validate', async (req, reply) => {
const data = await req.file();
if (!data) {
return reply.status(400).send({ error: 'No file uploaded' });
}
// TODO: 仅校验,不导入
return reply.send({ valid: true });
});
}

View File

@@ -0,0 +1,186 @@
/**
* SSA 咨询模式路由
*
* 遵循规范:
* - 使用 StreamingService流式响应服务
* - 使用 LLMFactoryLLM 网关)
* - 使用 getUserId模块认证规范
*/
import { FastifyInstance, FastifyRequest } from 'fastify';
import { prisma } from '../../../config/database.js';
import { createStreamingService } from '../../../common/streaming/index.js';
import { LLMFactory } from '../../../common/llm/adapters/LLMFactory.js';
import { logger } from '../../../common/logging/index.js';
function getUserId(request: FastifyRequest): string {
const userId = (request as any).user?.userId;
if (!userId) {
throw new Error('User not authenticated');
}
return userId;
}
export default async function consultRoutes(app: FastifyInstance) {
// 创建咨询会话(无数据)
app.post('/', async (req, reply) => {
const userId = getUserId(req);
const session = await prisma.ssaSession.create({
data: {
userId,
title: '统计咨询',
status: 'consult'
}
});
return reply.send(session);
});
// 咨询对话(非流式)
app.post('/:id/chat', async (req, reply) => {
const { id } = req.params as { id: string };
const { message } = req.body as { message: string };
const userId = getUserId(req);
// 保存用户消息
await prisma.ssaMessage.create({
data: {
sessionId: id,
role: 'user',
contentType: 'text',
content: { text: message }
}
});
// TODO: 调用 ConsultService 生成回复
const response = `感谢您的咨询。根据您描述的研究设计,我建议考虑以下统计方法...`;
// 保存助手回复
await prisma.ssaMessage.create({
data: {
sessionId: id,
role: 'assistant',
contentType: 'text',
content: { text: response }
}
});
return reply.send({ response });
});
// 咨询对话(流式)- 使用 StreamingService
app.post('/:id/chat/stream', async (req, reply) => {
const { id } = req.params as { id: string };
const { message } = req.body as { message: string };
const userId = getUserId(req);
logger.info('[SSA:Consult] Stream chat started', { sessionId: id, userId });
// 保存用户消息
await prisma.ssaMessage.create({
data: {
sessionId: id,
role: 'user',
contentType: 'text',
content: { text: message }
}
});
// 获取历史消息
const history = await prisma.ssaMessage.findMany({
where: { sessionId: id },
orderBy: { createdAt: 'asc' },
take: 20
});
// 构建消息列表
const messages = [
{
role: 'system' as const,
content: `你是一个专业的生物统计咨询师。请根据用户的研究设计和需求提供统计分析建议并帮助用户制定统计分析计划SAP
要点:
1. 理解研究设计(实验/观察、独立/配对、随机/分层等)
2. 明确研究假设和主要终点
3. 推荐合适的统计方法
4. 提示统计前提条件和注意事项`
},
...history.map(m => ({
role: m.role as 'user' | 'assistant',
content: (m.content as any).text || ''
}))
];
// 使用 StreamingService 流式输出
const streamingService = createStreamingService(reply, {
model: 'deepseek-v3',
temperature: 0.7,
maxTokens: 4096,
enableDeepThinking: true,
userId,
conversationId: id,
});
await streamingService.streamGenerate(messages, {
onComplete: async (content, thinking) => {
// 保存助手回复
await prisma.ssaMessage.create({
data: {
sessionId: id,
role: 'assistant',
contentType: 'text',
content: { text: content, thinking }
}
});
logger.info('[SSA:Consult] Stream chat completed', { sessionId: id });
}
});
});
// 生成 SAP 文档
app.post('/:id/generate-sap', async (req, reply) => {
const { id } = req.params as { id: string };
// TODO: 调用 SAPGeneratorService
const sap = {
title: '统计分析计划',
sections: [
{ heading: '研究背景', content: '...' },
{ heading: '数据描述', content: '...' },
{ heading: '统计假设', content: '...' },
{ heading: '分析方法', content: '...' },
{ heading: '结果解读指南', content: '...' },
{ heading: '注意事项', content: '...' }
],
recommendedTools: ['ST_T_TEST_IND'],
metadata: {
generatedAt: new Date().toISOString(),
sessionId: id,
version: '1.0'
}
};
return reply.send(sap);
});
// 下载 SAP 文档
app.get('/:id/download-sap', async (req, reply) => {
const { id } = req.params as { id: string };
const { format } = req.query as { format?: string };
// TODO: 生成 Word 或 Markdown 格式
const content = `# 统计分析计划\n\n## 研究背景\n...`;
if (format === 'word') {
// TODO: 使用 docx 库生成 Word
reply.header('Content-Type', 'application/vnd.openxmlformats-officedocument.wordprocessingml.document');
reply.header('Content-Disposition', `attachment; filename="SAP_${id}.docx"`);
} else {
reply.header('Content-Type', 'text/markdown');
reply.header('Content-Disposition', `attachment; filename="SAP_${id}.md"`);
}
return reply.send(content);
});
}

View File

@@ -0,0 +1,138 @@
/**
* SSA 会话管理路由
*
* 遵循规范:
* - 使用 getUserId模块认证规范
* - 使用 logger日志服务
* - 使用 storageOSS 存储规范)
*/
import { FastifyInstance, FastifyRequest } from 'fastify';
import crypto from 'crypto';
import { prisma } from '../../../config/database.js';
import { logger } from '../../../common/logging/index.js';
import { storage } from '../../../common/storage/index.js';
import { DataParserService } from '../services/DataParserService.js';
function getUserId(request: FastifyRequest): string {
const userId = (request as any).user?.userId;
if (!userId) {
throw new Error('User not authenticated');
}
return userId;
}
function getTenantId(request: FastifyRequest): string {
return (request as any).user?.tenantId || 'default';
}
export default async function sessionRoutes(app: FastifyInstance) {
// 创建会话(支持同时上传文件)
app.post('/', async (req, reply) => {
const userId = getUserId(req);
const tenantId = getTenantId(req);
// 检查是否有文件上传
const contentType = req.headers['content-type'] || '';
const isMultipart = contentType.includes('multipart/form-data');
let dataOssKey: string | null = null;
let dataSchema: any = null;
let title = '新分析会话';
if (isMultipart) {
// 处理文件上传
const data = await req.file();
if (data) {
const buffer = await data.toBuffer();
const filename = data.filename;
title = filename;
// 生成存储 Key遵循 OSS 目录结构规范)
const uuid = crypto.randomUUID().replace(/-/g, '').substring(0, 16);
const ext = filename.split('.').pop()?.toLowerCase() || 'csv';
dataOssKey = `tenants/${tenantId}/users/${userId}/ssa/${uuid}.${ext}`;
// 上传到 OSS
await storage.upload(dataOssKey, buffer);
logger.info('[SSA:Session] File uploaded to OSS', { dataOssKey, filename });
// 解析数据 schema
try {
const parser = new DataParserService();
dataSchema = await parser.parseSchema(buffer, ext);
logger.info('[SSA:Session] Data schema parsed', {
columns: dataSchema.columns.length,
rowCount: dataSchema.rowCount
});
} catch (parseError) {
logger.warn('[SSA:Session] Schema parsing failed, continuing without schema', { error: parseError });
}
}
}
// 创建会话
const session = await prisma.ssaSession.create({
data: {
userId,
title,
status: 'active',
dataOssKey,
dataSchema
}
});
logger.info('[SSA:Session] Session created', {
sessionId: session.id,
hasFile: !!dataOssKey
});
// 返回前端期望的格式
return reply.send({
sessionId: session.id,
schema: dataSchema || { columns: [], rowCount: 0 }
});
});
// 获取会话列表
app.get('/', async (req, reply) => {
const userId = getUserId(req);
const sessions = await prisma.ssaSession.findMany({
where: { userId },
orderBy: { createdAt: 'desc' },
take: 20
});
return reply.send(sessions);
});
// 获取会话详情
app.get('/:id', async (req, reply) => {
const { id } = req.params as { id: string };
const session = await prisma.ssaSession.findUnique({
where: { id },
include: { messages: true }
});
if (!session) {
return reply.status(404).send({ error: 'Session not found' });
}
return reply.send(session);
});
// 获取消息历史
app.get('/:id/messages', async (req, reply) => {
const { id } = req.params as { id: string };
const messages = await prisma.ssaMessage.findMany({
where: { sessionId: id },
orderBy: { createdAt: 'asc' }
});
return reply.send(messages);
});
}

View File

@@ -0,0 +1,189 @@
/**
* SSA 数据解析服务
*
* 功能:解析 CSV/Excel 文件,提取 schema 信息
*/
import { logger } from '../../../common/logging/index.js';
interface ColumnSchema {
name: string;
type: 'numeric' | 'categorical' | 'datetime' | 'text';
uniqueValues?: number;
nullCount?: number;
}
interface DataSchema {
columns: ColumnSchema[];
rowCount: number;
preview?: any[];
}
export class DataParserService {
/**
* 解析数据文件 schema
*/
async parseSchema(buffer: Buffer, ext: string): Promise<DataSchema> {
if (ext === 'csv') {
return this.parseCsvSchema(buffer);
} else if (ext === 'xlsx' || ext === 'xls') {
return this.parseExcelSchema(buffer);
}
throw new Error(`Unsupported file format: ${ext}`);
}
/**
* 解析 CSV 文件 schema
*/
private async parseCsvSchema(buffer: Buffer): Promise<DataSchema> {
const content = buffer.toString('utf-8');
const lines = content.trim().split('\n');
if (lines.length === 0) {
throw new Error('CSV file is empty');
}
// 解析表头
const headers = this.parseCsvLine(lines[0]);
// 解析数据行(最多读取前 1000 行用于类型推断)
const dataLines = lines.slice(1, Math.min(1001, lines.length));
const rows = dataLines.map(line => this.parseCsvLine(line));
// 推断列类型
const columns: ColumnSchema[] = headers.map((name, index) => {
const values = rows.map(row => row[index]).filter(v => v !== undefined && v !== '');
return {
name,
type: this.inferType(values),
uniqueValues: new Set(values).size,
nullCount: rows.length - values.length
};
});
return {
columns,
rowCount: lines.length - 1
};
}
/**
* 解析 Excel 文件 schema
*/
private async parseExcelSchema(buffer: Buffer): Promise<DataSchema> {
try {
const xlsx = await import('xlsx');
const workbook = xlsx.read(buffer, { type: 'buffer' });
const sheetName = workbook.SheetNames[0];
const sheet = workbook.Sheets[sheetName];
const data = xlsx.utils.sheet_to_json(sheet, { header: 1 }) as any[][];
if (data.length === 0) {
throw new Error('Excel file is empty');
}
const headers = data[0] as string[];
const rows = data.slice(1, Math.min(1001, data.length));
const columns: ColumnSchema[] = headers.map((name, index) => {
const values = rows.map(row => row[index]).filter(v => v !== undefined && v !== null && v !== '');
return {
name: String(name),
type: this.inferType(values.map(String)),
uniqueValues: new Set(values).size,
nullCount: rows.length - values.length
};
});
return {
columns,
rowCount: data.length - 1
};
} catch (error) {
logger.error('[DataParser] Excel parsing failed', { error });
throw new Error('Failed to parse Excel file');
}
}
/**
* 解析 CSV 行(处理引号和逗号)
*/
private parseCsvLine(line: string): string[] {
const result: string[] = [];
let current = '';
let inQuotes = false;
for (let i = 0; i < line.length; i++) {
const char = line[i];
if (char === '"') {
inQuotes = !inQuotes;
} else if (char === ',' && !inQuotes) {
result.push(current.trim());
current = '';
} else {
current += char;
}
}
result.push(current.trim());
return result;
}
/**
* 推断列类型
*
* 规则优先级:
* 1. 唯一值 <= 10 且 唯一值比例 < 20% → categorical即使是数字也视为分类
* 2. 90%+ 是数字 → numeric
* 3. 90%+ 是日期 → datetime
* 4. 唯一值比例 < 50% → categorical
* 5. 其他 → text
*/
private inferType(values: string[]): 'numeric' | 'categorical' | 'datetime' | 'text' {
if (values.length === 0) return 'text';
const sample = values.slice(0, 100);
const uniqueValues = new Set(sample);
const uniqueCount = uniqueValues.size;
const uniqueRatio = uniqueCount / sample.length;
// 规则1唯一值很少<=10且比例很低<20%)→ 分类变量
// 典型场景0/1, 是/否, A/B/C 等
if (uniqueCount <= 10 && uniqueRatio < 0.2) {
return 'categorical';
}
// 规则2检查是否为数值
const numericCount = sample.filter(v => !isNaN(Number(v)) && v !== '').length;
if (numericCount / sample.length > 0.9) {
// 即使是数字如果唯一值只有2-3个也视为分类变量二分类/三分类)
if (uniqueCount <= 3) {
return 'categorical';
}
return 'numeric';
}
// 规则3检查是否为日期
const datePatterns = [
/^\d{4}-\d{2}-\d{2}$/,
/^\d{2}\/\d{2}\/\d{4}$/,
/^\d{4}\/\d{2}\/\d{2}$/
];
const dateCount = sample.filter(v =>
datePatterns.some(p => p.test(v)) || !isNaN(Date.parse(v))
).length;
if (dateCount / sample.length > 0.9) {
return 'datetime';
}
// 规则4唯一值比例较低 → 分类变量
if (uniqueRatio < 0.5) {
return 'categorical';
}
return 'text';
}
}

View File

@@ -0,0 +1,111 @@
/**
* SSA 模块类型定义
*/
// 分析模式
export type SSAMode = 'analysis' | 'consult';
// 会话状态
export type SessionStatus = 'active' | 'consult' | 'completed' | 'error';
// 分析计划
export interface AnalysisPlan {
tool_code: string;
tool_name: string;
reasoning: string;
params: Record<string, any>;
guardrails: GuardrailConfig[];
confidence: number;
}
// 护栏配置
export interface GuardrailConfig {
name: string;
check_code: string;
threshold?: string;
action_type: 'Block' | 'Warn' | 'Switch';
action_target?: string;
status: 'pending' | 'passed' | 'failed' | 'switched';
}
// 执行结果
export interface ExecutionResult {
status: 'success' | 'error' | 'blocked';
message: string;
warnings?: string[];
results?: {
method: string;
statistic: number;
p_value: number;
p_value_fmt: string;
conf_int?: number[];
group_stats?: GroupStats[];
};
plots?: string[];
trace_log?: string[];
reproducible_code?: string;
}
// 分组统计
export interface GroupStats {
group: string;
n: number;
mean: number;
sd: number;
}
// 数据 Schema
export interface DataSchema {
rowCount: number;
columns: ColumnInfo[];
}
// 列信息
export interface ColumnInfo {
name: string;
type: 'numeric' | 'categorical' | 'datetime';
min?: number;
max?: number;
mean?: number;
missing?: number;
uniqueValues?: string[];
uniqueCount?: number;
privacyProtected?: boolean;
}
// SAP 文档
export interface SAPDocument {
title: string;
sections: Array<{
heading: string;
content: string;
}>;
recommendedTools: string[];
metadata: {
generatedAt: string;
sessionId: string;
version: string;
};
}
// 决策表条目
export interface DecisionTableEntry {
goalType: string;
yType: string;
xType?: string;
designType: string;
toolCode: string;
altToolCode?: string;
priority: number;
}
// 参数映射
export interface ParamMapping {
toolCode: string;
jsonKey: string;
rParamName: string;
dataType: 'string' | 'number' | 'boolean';
isRequired: boolean;
defaultValue?: string;
validationRule?: string;
}

View File

@@ -0,0 +1,42 @@
/**
* SSA 计划验证 Schema (Zod)
*/
import { z } from 'zod';
// 护栏配置 Schema
export const guardrailConfigSchema = z.object({
name: z.string(),
check_code: z.string().optional(),
threshold: z.string().optional(),
action_type: z.enum(['Block', 'Warn', 'Switch']).default('Warn'),
action_target: z.string().optional(),
status: z.enum(['pending', 'passed', 'failed', 'switched']).default('pending')
});
// 分析计划 Schema
export const analysisPlanSchema = z.object({
tool_code: z.string().regex(/^ST_[A-Z_]+$/, 'tool_code must match ST_XXX pattern'),
tool_name: z.string().min(1),
reasoning: z.string(),
params: z.record(z.any()),
guardrails: z.array(guardrailConfigSchema).optional(),
confidence: z.number().min(0).max(1).optional()
});
// 执行请求 Schema
export const executeRequestSchema = z.object({
plan: analysisPlanSchema,
confirm: z.boolean().default(true)
});
// 咨询消息 Schema
export const consultMessageSchema = z.object({
message: z.string().min(1, 'Message cannot be empty')
});
// 类型导出
export type GuardrailConfig = z.infer<typeof guardrailConfigSchema>;
export type AnalysisPlan = z.infer<typeof analysisPlanSchema>;
export type ExecuteRequest = z.infer<typeof executeRequestSchema>;
export type ConsultMessage = z.infer<typeof consultMessageSchema>;