feat(ssa): Complete QPER architecture - Query, Planner, Execute, Reflection layers

Implement the full QPER intelligent analysis pipeline: - Phase E+: Block-based standardization for all 7 R tools, DynamicReport renderer, Word export enhancement - Phase Q: LLM intent parsing with dynamic Zod validation against real column names, ClarificationCard component, DataProfile is_id_like tagging - Phase P: ConfigLoader with Zod schema validation and hot-reload API, DecisionTableService (4-dimension matching), FlowTemplateService with EPV protection, PlannedTrace audit output - Phase R: ReflectionService with statistical slot injection, sensitivity analysis conflict rules, ConclusionReport with section reveal animation, conclusion caching API, graceful R error classification End-to-end test: 40/40 passed across two complete analysis scenarios. Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-21 18:15:53 +08:00
parent 428a22adf2
commit 371e1c069c
73 changed files with 9242 additions and 706 deletions
--- a/backend/src/modules/ssa/services/ReflectionService.ts
+++ b/backend/src/modules/ssa/services/ReflectionService.ts
@@ -0,0 +1,341 @@
+/**
+ * SSA ReflectionService — Phase R 核心服务
+ *
+ * 职责：StepResult[] → LLM 论文级结论 → ConclusionReport
+ *
+ * 三层防御：
+ *   1. 统计量槽位注入（LLM 只生成叙述框架，数值从 R 输出渲染）
+ *   2. jsonrepair + Zod 强校验 LLM 输出结构
+ *   3. 降级到 ConclusionGeneratorService（规则拼接）
+ *
+ * 交付策略：完整 JSON 收集 + Zod 校验 → 一次性 SSE 推送（不做字符流）
+ */
+
+import { logger } from '../../../common/logging/index.js';
+import { LLMFactory } from '../../../common/llm/adapters/LLMFactory.js';
+import { getPromptService } from '../../../common/prompt/index.js';
+import { prisma } from '../../../config/database.js';
+import { cache } from '../../../common/cache/index.js';
+import { jsonrepair } from 'jsonrepair';
+import type { Message } from '../../../common/llm/adapters/types.js';
+import type { StepResult } from './WorkflowExecutorService.js';
+import { conclusionGeneratorService } from './ConclusionGeneratorService.js';
+import {
+  LLMConclusionSchema,
+  type ConclusionReport,
+  type StepFinding,
+} from '../types/reflection.types.js';
+
+const CACHE_TTL = 3600; // 1 hour
+const LLM_MODEL = 'deepseek-v3';
+const LLM_TEMPERATURE = 0.3;
+const LLM_MAX_TOKENS = 4096;
+
+interface PlannedTraceInput {
+  matchedRule?: string;
+  primaryTool?: string;
+  fallbackTool?: string | null;
+  switchCondition?: string | null;
+  reasoning?: string;
+  epvWarning?: string | null;
+}
+
+interface ReflectInput {
+  workflowId: string;
+  goal: string;
+  title?: string;
+  methodology?: string;
+  sampleInfo?: string;
+  plannedTrace?: PlannedTraceInput;
+}
+
+export class ReflectionService {
+
+  /**
+   * 生成论文级结论（主入口）
+   *
+   * 流程：缓存检查 → 提取 keyFindings → 组装 Prompt → LLM 调用 → Zod 校验 → fallback
+   */
+  async reflect(
+    input: ReflectInput,
+    results: StepResult[],
+  ): Promise<ConclusionReport> {
+    const { workflowId, goal } = input;
+
+    logger.info('[SSA:Reflection] Starting reflection', {
+      workflowId,
+      goal,
+      stepCount: results.length,
+    });
+
+    // 0. Cache hit check
+    const cacheKey = `ssa:conclusion:${workflowId}`;
+    try {
+      const cached = await cache.get<ConclusionReport>(cacheKey);
+      if (cached) {
+        logger.info('[SSA:Reflection] Cache hit', { workflowId });
+        return cached;
+      }
+    } catch {
+      // cache miss, continue
+    }
+
+    // 1. Extract key findings from step results (slot injection)
+    const findings = this.extractKeyFindings(results);
+
+    // 2. Build prompt via PromptService
+    const prompt = await this.buildPrompt(input, findings);
+    if (!prompt) {
+      logger.warn('[SSA:Reflection] Failed to build prompt, falling back to rule-based');
+      return this.fallback(workflowId, results, goal);
+    }
+
+    // 3. Call LLM (full collection, no streaming)
+    try {
+      const llm = LLMFactory.getAdapter(LLM_MODEL);
+      const messages: Message[] = [
+        { role: 'system', content: 'You are a senior biostatistician. Output only valid JSON.' },
+        { role: 'user', content: prompt },
+      ];
+
+      logger.info('[SSA:Reflection] Calling LLM', { model: LLM_MODEL });
+      const response = await llm.chat(messages, {
+        temperature: LLM_TEMPERATURE,
+        maxTokens: LLM_MAX_TOKENS,
+      });
+
+      const rawOutput = response.content;
+      logger.info('[SSA:Reflection] LLM response received', {
+        contentLength: rawOutput.length,
+        usage: response.usage,
+      });
+
+      // 4. jsonrepair + Zod validation
+      const report = this.parseAndValidate(rawOutput, workflowId, input, findings, results);
+
+      // 5. Cache the result
+      try {
+        await cache.set(cacheKey, report, CACHE_TTL);
+      } catch (cacheErr) {
+        logger.warn('[SSA:Reflection] Cache set failed', { error: String(cacheErr) });
+      }
+
+      logger.info('[SSA:Reflection] LLM conclusion generated successfully', {
+        workflowId,
+        source: 'llm',
+        keyFindingsCount: report.key_findings.length,
+      });
+
+      return report;
+
+    } catch (error: any) {
+      logger.warn('[SSA:Reflection] LLM call failed, falling back to rule-based', {
+        workflowId,
+        error: error.message,
+      });
+      return this.fallback(workflowId, results, goal);
+    }
+  }
+
+  /**
+   * 从 StepResult[] 中提取关键统计量（槽位注入数据源）
+   */
+  extractKeyFindings(results: StepResult[]): StepFinding[] {
+    const findings: StepFinding[] = [];
+
+    for (const r of results) {
+      if (r.status !== 'success' && r.status !== 'warning') continue;
+
+      const data = r.result || {};
+      const finding: StepFinding = {
+        step_number: r.stepOrder,
+        tool_name: r.toolName,
+        tool_code: r.toolCode,
+        method: data.method || r.toolName,
+        is_significant: data.p_value != null && data.p_value < 0.05,
+        raw_result: data,
+      };
+
+      // P value
+      if (data.p_value != null) {
+        finding.p_value_num = data.p_value;
+        finding.p_value = data.p_value_fmt || this.formatPValue(data.p_value);
+      }
+
+      // Statistic
+      if (data.statistic != null) {
+        finding.statistic = String(Number(data.statistic).toFixed(3));
+        finding.statistic_name = this.getStatisticName(r.toolCode);
+      }
+      if (data.statistic_U != null) {
+        finding.statistic = String(Number(data.statistic_U).toFixed(1));
+        finding.statistic_name = 'U';
+      }
+
+      // Effect size
+      if (data.effect_size?.cohens_d != null) {
+        finding.effect_size = String(Number(data.effect_size.cohens_d).toFixed(3));
+        finding.effect_size_name = "Cohen's d";
+      } else if (data.effect_size?.cramers_v != null) {
+        finding.effect_size = String(Number(data.effect_size.cramers_v).toFixed(3));
+        finding.effect_size_name = "Cramér's V";
+      } else if (data.effect_size?.r_squared != null) {
+        finding.effect_size = String(Number(data.effect_size.r_squared).toFixed(3));
+        finding.effect_size_name = 'R²';
+      }
+
+      // Confidence interval
+      if (data.conf_int && Array.isArray(data.conf_int) && data.conf_int.length >= 2) {
+        finding.ci_lower = String(Number(data.conf_int[0]).toFixed(3));
+        finding.ci_upper = String(Number(data.conf_int[1]).toFixed(3));
+      }
+
+      // Group stats
+      if (data.group_stats && Array.isArray(data.group_stats)) {
+        finding.group_stats = data.group_stats.map((g: any) => ({
+          group: g.group || g.level || 'unknown',
+          n: g.n || 0,
+          mean: g.mean != null ? Number(Number(g.mean).toFixed(2)) : undefined,
+          sd: g.sd != null ? Number(Number(g.sd).toFixed(2)) : undefined,
+          median: g.median != null ? Number(Number(g.median).toFixed(2)) : undefined,
+        }));
+      }
+
+      findings.push(finding);
+    }
+
+    return findings;
+  }
+
+  /**
+   * 构建 Prompt（通过 PromptService 从数据库加载模板）
+   */
+  private async buildPrompt(
+    input: ReflectInput,
+    findings: StepFinding[],
+  ): Promise<string | null> {
+    try {
+      const promptService = getPromptService(prisma);
+      const rendered = await promptService.get('SSA_REFLECTION', {
+        goal: input.goal,
+        title: input.title || `统计分析：${input.goal}`,
+        methodology: input.methodology || '系统自动选择',
+        sampleInfo: input.sampleInfo || '见各步骤详情',
+        decision_trace: {
+          matched_rule: input.plannedTrace?.matchedRule || '默认规则',
+          primary_tool: input.plannedTrace?.primaryTool || '',
+          fallback_tool: input.plannedTrace?.fallbackTool || null,
+          switch_condition: input.plannedTrace?.switchCondition || null,
+          reasoning: input.plannedTrace?.reasoning || '',
+          epv_warning: input.plannedTrace?.epvWarning || null,
+        },
+        findings: findings.map(f => ({
+          ...f,
+          group_stats: f.group_stats || [],
+        })),
+      });
+
+      return rendered.content;
+    } catch (error: any) {
+      logger.error('[SSA:Reflection] Failed to build prompt', { error: error.message });
+      return null;
+    }
+  }
+
+  /**
+   * 解析 LLM 输出 → jsonrepair → Zod 校验
+   */
+  private parseAndValidate(
+    rawOutput: string,
+    workflowId: string,
+    input: ReflectInput,
+    findings: StepFinding[],
+    results: StepResult[],
+  ): ConclusionReport {
+    // Strip markdown code fences if present
+    let cleaned = rawOutput.trim();
+    if (cleaned.startsWith('```')) {
+      cleaned = cleaned.replace(/^```(?:json)?\s*\n?/, '').replace(/\n?```\s*$/, '');
+    }
+
+    // Layer 1: jsonrepair
+    const repaired = jsonrepair(cleaned);
+
+    // Layer 2: JSON.parse
+    const parsed = JSON.parse(repaired);
+
+    // Layer 3: Zod validation
+    const validated = LLMConclusionSchema.parse(parsed);
+
+    // Assemble full ConclusionReport
+    return {
+      workflow_id: workflowId,
+      title: input.title || `统计分析报告：${input.goal}`,
+      executive_summary: validated.executive_summary,
+      key_findings: validated.key_findings,
+      statistical_summary: validated.statistical_summary,
+      step_summaries: this.buildStepSummaries(findings),
+      recommendations: validated.recommendations || [],
+      limitations: validated.limitations,
+      generated_at: new Date().toISOString(),
+      source: 'llm',
+    };
+  }
+
+  /**
+   * 降级到规则拼接
+   */
+  private fallback(
+    workflowId: string,
+    results: StepResult[],
+    goal: string,
+  ): ConclusionReport {
+    logger.info('[SSA:Reflection] Using rule-based fallback', { workflowId });
+    return conclusionGeneratorService.generateConclusion(results, goal, workflowId);
+  }
+
+  /**
+   * 从 findings 构建 step_summaries
+   */
+  private buildStepSummaries(findings: StepFinding[]): ConclusionReport['step_summaries'] {
+    return findings.map(f => ({
+      step_number: f.step_number,
+      tool_name: f.tool_name,
+      summary: this.buildStepSummaryText(f),
+      p_value: f.p_value_num,
+      is_significant: f.is_significant,
+    }));
+  }
+
+  private buildStepSummaryText(f: StepFinding): string {
+    const parts: string[] = [];
+    if (f.statistic) parts.push(`${f.statistic_name || '统计量'} = ${f.statistic}`);
+    if (f.p_value) parts.push(`P ${f.p_value}`);
+    if (f.effect_size) parts.push(`${f.effect_size_name || '效应量'} = ${f.effect_size}`);
+    return parts.length > 0 ? parts.join(', ') : `${f.tool_name} 分析完成`;
+  }
+
+  private formatPValue(p: number): string {
+    if (p < 0.001) return '< 0.001';
+    if (p < 0.01) return `= ${p.toFixed(3)}`;
+    return `= ${p.toFixed(3)}`;
+  }
+
+  private getStatisticName(toolCode: string): string {
+    const map: Record<string, string> = {
+      'ST_T_TEST_IND': 't',
+      'ST_T_TEST_PAIRED': 't',
+      'ST_MANN_WHITNEY': 'U',
+      'ST_WILCOXON': 'W',
+      'ST_CHI_SQUARE': 'χ²',
+      'ST_FISHER': 'OR',
+      'ST_ANOVA_ONE': 'F',
+      'ST_CORRELATION': 'r',
+      'ST_LINEAR_REG': 'F',
+      'ST_LOGISTIC_BINARY': 'χ²',
+    };
+    return map[toolCode] || '统计量';
+  }
+}
+
+export const reflectionService = new ReflectionService();