fix(ssa): Fix 7 integration bugs and refactor frontend unified state management

Bug fixes:
- Fix garbled error messages in chat (TypeWriter rendering issue)
- Fix R engine NA crash in descriptive.R (defensive isTRUE/is.na checks)
- Fix intent misclassification for statistical significance queries
- Fix step 2 results not displayed (accept warning status alongside success)
- Fix incomplete R code download (only step 1 included)
- Fix multi-task state confusion (clicking old card shows new results)
- Add R engine and backend parameter logging for debugging

Refactor - Unified Record Architecture:
- Replace 12 global singleton fields with AnalysisRecord as single source of truth
- Remove isWorkflowMode branching across all components
- One Analysis = One Record = N Steps paradigm
- selectRecord only sets currentRecordId, all rendering derives from currentRecord
- Fix cross-hook-instance issue: executeWorkflow fallback to store currentRecordId

Updated files: ssaStore, useWorkflow, useAnalysis, SSAChatPane, SSAWorkspacePane,
SSACodeModal, WorkflowTimeline, QueryService, WorkflowExecutorService, descriptive.R

Tested: Manual integration test passed - multi-task switching, R code completeness
Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
2026-02-21 22:58:59 +08:00
parent 371e1c069c
commit 11676f2840
17 changed files with 1573 additions and 1829 deletions

View File

@@ -79,7 +79,15 @@ const SSA_INTENT_PROMPT = `你是一个临床统计分析意图理解引擎。
{"goal":"regression","outcome_var":"Death","outcome_type":"binary","predictor_vars":["Age","BMI","Smoking","Stage"],"predictor_types":["continuous","continuous","binary","categorical"],"grouping_var":null,"design":"independent","confidence":0.8,"reasoning":"用户想分析影响死亡率的因素Death是二分类结局其余变量作为预测因素纳入logistic回归"}
\`\`\`
### 示例 4模糊表达 — 需要追问
### 示例 4统计学意义/检验
用户: "Yqol和bmi是否有统计学意义"
数据画像中有: Yqol [numeric], bmi [numeric], sex [categorical], age [numeric]
输出:
\`\`\`json
{"goal":"correlation","outcome_var":"Yqol","outcome_type":"continuous","predictor_vars":["bmi"],"predictor_types":["continuous"],"grouping_var":null,"design":"independent","confidence":0.85,"reasoning":"用户想了解Yqol和bmi之间是否存在统计学显著关系两者都是连续变量适合相关分析或回归分析"}
\`\`\`
### 示例 5模糊表达 — 需要追问
用户: "帮我分析一下这份数据"
数据画像中有: 10个变量
输出:
@@ -87,7 +95,7 @@ const SSA_INTENT_PROMPT = `你是一个临床统计分析意图理解引擎。
{"goal":"descriptive","outcome_var":null,"outcome_type":null,"predictor_vars":[],"predictor_types":[],"grouping_var":null,"design":"independent","confidence":0.35,"reasoning":"用户没有指定任何分析目标和变量,只能先做描述性统计,建议追问具体分析目的"}
\`\`\`
### 示例 5:队列研究
### 示例 6:队列研究
用户: "我想做一个完整的队列研究分析,看看新药对预后的影响"
数据画像中有: Drug [categorical, 2个水平], Outcome [categorical, 2个水平: 0/1], Age [numeric], Gender [categorical], BMI [numeric], Comorbidity [categorical]
输出:
@@ -127,7 +135,7 @@ async function main() {
content: SSA_INTENT_PROMPT,
model_config: { model: 'deepseek-v3', temperature: 0.3, maxTokens: 2048 },
status: 'ACTIVE',
changelog: `Phase Q v1.0: 5 组 Few-Shot + Confidence Rubric 客观化`,
changelog: `Phase Q v1.1: 6 组 Few-Shot (增加统计学意义示例) + Confidence Rubric 客观化`,
created_by: 'system-seed',
}
});

View File

@@ -411,7 +411,9 @@ export class QueryService {
if (query.includes('比较') || query.includes('差异') || query.includes('不同') || query.includes('有没有效')) {
goal = 'comparison';
} else if (query.includes('相关') || query.includes('关系') || query.includes('关联')) {
} else if (query.includes('相关') || query.includes('关系') || query.includes('关联')
|| query.includes('统计学意义') || query.includes('显著') || query.includes('检验')
|| query.includes('p值') || query.includes('有无差别')) {
goal = 'correlation';
} else if (query.includes('影响') || query.includes('因素') || query.includes('预测') || query.includes('回归')) {
goal = 'regression';

View File

@@ -395,6 +395,11 @@ export class WorkflowExecutorService extends EventEmitter {
});
// 调用 R 服务
logger.info('[SSA:Executor] Calling R service', {
step: step.stepOrder,
toolCode: step.toolCode,
inputParams: step.inputParams,
});
const response = await this.rClient.post(`/api/v1/skills/${step.toolCode}`, {
data_source: dataSource,
params: step.inputParams,
@@ -410,6 +415,16 @@ export class WorkflowExecutorService extends EventEmitter {
if (response.data.status === 'error' || response.data.status === 'blocked') {
const rMsg = response.data.message || '执行失败';
const classified = classifyRError(rMsg);
logger.warn('[SSA:Executor] R tool returned error', {
step: step.stepOrder,
toolCode: step.toolCode,
rMessage: rMsg,
rErrorCode: response.data.error_code,
rUserHint: response.data.user_hint,
classifiedCode: classified.code,
});
return {
stepOrder: step.stepOrder,
toolCode: step.toolCode,

View File

@@ -74,13 +74,47 @@ export interface PrunedProfile {
// 2. LLM 原始输出的 Zod Schema静态版本
// ────────────────────────────────────────────
const VALID_VAR_TYPES = ['continuous', 'binary', 'categorical', 'ordinal', 'datetime'] as const;
const VAR_TYPE_ALIAS: Record<string, VariableType> = {
numeric: 'continuous',
integer: 'continuous',
int: 'continuous',
float: 'continuous',
double: 'continuous',
number: 'continuous',
real: 'continuous',
factor: 'categorical',
string: 'categorical',
text: 'categorical',
character: 'categorical',
char: 'categorical',
nominal: 'categorical',
boolean: 'binary',
bool: 'binary',
logical: 'binary',
dichotomous: 'binary',
date: 'datetime',
time: 'datetime',
timestamp: 'datetime',
};
function normalizeVarType(val: unknown): VariableType {
if (typeof val !== 'string') return 'continuous';
const lower = val.toLowerCase().trim();
if ((VALID_VAR_TYPES as readonly string[]).includes(lower)) return lower as VariableType;
return VAR_TYPE_ALIAS[lower] ?? 'continuous';
}
const varTypeSchema = z.preprocess(normalizeVarType, z.enum(VALID_VAR_TYPES));
/** LLM 直接输出的 JSON 结构Zod 校验用) */
export const LLMIntentOutputSchema = z.object({
goal: z.enum(['comparison', 'correlation', 'regression', 'descriptive', 'cohort_study']),
outcome_var: z.string().nullable().default(null),
outcome_type: z.enum(['continuous', 'binary', 'categorical', 'ordinal', 'datetime']).nullable().default(null),
outcome_type: varTypeSchema.nullable().default(null),
predictor_vars: z.array(z.string()).default([]),
predictor_types: z.array(z.enum(['continuous', 'binary', 'categorical', 'ordinal', 'datetime'])).default([]),
predictor_types: z.array(varTypeSchema).default([]),
grouping_var: z.string().nullable().default(null),
design: z.enum(['independent', 'paired', 'longitudinal', 'cross_sectional']).default('independent'),
confidence: z.number().min(0).max(1).default(0.5),