feat(ssa): Complete Phase I-IV intelligent dialogue and tool system development

Phase I - Session Blackboard + READ Layer: - SessionBlackboardService with Postgres-Only cache - DataProfileService for data overview generation - PicoInferenceService for LLM-driven PICO extraction - Frontend DataContextCard and VariableDictionaryPanel - E2E tests: 31/31 passed Phase II - Conversation Layer LLM + Intent Router: - ConversationService with SSE streaming - IntentRouterService (rule-first + LLM fallback, 6 intents) - SystemPromptService with 6-segment dynamic assembly - TokenTruncationService for context management - ChatHandlerService as unified chat entry - Frontend SSAChatPane and useSSAChat hook - E2E tests: 38/38 passed Phase III - Method Consultation + AskUser Standardization: - ToolRegistryService with Repository Pattern - MethodConsultService with DecisionTable + LLM enhancement - AskUserService with global interrupt handling - Frontend AskUserCard component - E2E tests: 13/13 passed Phase IV - Dialogue-Driven Analysis + QPER Integration: - ToolOrchestratorService (plan/execute/report) - analysis_plan SSE event for WorkflowPlan transmission - Dual-channel confirmation (ask_user card + workspace button) - PICO as optional hint for LLM parsing - E2E tests: 25/25 passed R Statistics Service: - 5 new R tools: anova_one, baseline_table, fisher, linear_reg, wilcoxon - Enhanced guardrails and block helpers - Comprehensive test suite (run_all_tools_test.js) Documentation: - Updated system status document (v5.9) - Updated SSA module status and development plan (v1.8) Total E2E: 107/107 passed (Phase I: 31, Phase II: 38, Phase III: 13, Phase IV: 25) Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-22 18:53:39 +08:00
parent bf10dec4c8
commit 3446909ff7
68 changed files with 11583 additions and 412 deletions
--- a/backend/src/modules/ssa/services/PicoInferenceService.ts
+++ b/backend/src/modules/ssa/services/PicoInferenceService.ts
@@ -0,0 +1,156 @@
+/**
+ * Phase I — PICO 推断服务
+ *
+ * 调用 LLM (SSA_PICO_INFERENCE prompt) 从数据概览推断 PICO 结构。
+ * 写入 SessionBlackboard.picoInference，标记为 ai_inferred。
+ *
+ * 安全措施：
+ *   - Zod 校验 LLM 输出
+ *   - jsonrepair 容错
+ *   - H3: 观察性研究允许 intervention/comparison 为 null
+ */
+
+import { logger } from '../../../common/logging/index.js';
+import { LLMFactory } from '../../../common/llm/adapters/LLMFactory.js';
+import { getPromptService } from '../../../common/prompt/index.js';
+import { prisma } from '../../../config/database.js';
+import { jsonrepair } from 'jsonrepair';
+import type { Message } from '../../../common/llm/adapters/types.js';
+import { sessionBlackboardService } from './SessionBlackboardService.js';
+import {
+  PicoInferenceSchema,
+  type PicoInference,
+  type DataOverview,
+  type VariableDictEntry,
+} from '../types/session-blackboard.types.js';
+
+const MAX_RETRIES = 1;
+
+export class PicoInferenceService {
+
+  /**
+   * 从 DataOverview 推断 PICO 结构并写入黑板。
+   */
+  async inferFromOverview(
+    sessionId: string,
+    overview: DataOverview,
+    dictionary: VariableDictEntry[],
+  ): Promise<PicoInference | null> {
+    try {
+      logger.info('[SSA:PICO] Starting inference', { sessionId });
+
+      const promptService = getPromptService(prisma);
+
+      const dataOverviewSummary = this.buildOverviewSummary(overview);
+      const variableList = this.buildVariableList(dictionary);
+
+      const rendered = await promptService.get('SSA_PICO_INFERENCE', {
+        dataOverviewSummary,
+        variableList,
+      });
+
+      const adapter = LLMFactory.getAdapter(
+        (rendered.modelConfig?.model as any) || 'deepseek-v3'
+      );
+
+      const messages: Message[] = [
+        { role: 'system', content: rendered.content },
+        { role: 'user', content: '请根据以上数据概览推断 PICO 结构。' },
+      ];
+
+      let pico: PicoInference | null = null;
+
+      for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
+        try {
+          const response = await adapter.chat(messages, {
+            temperature: rendered.modelConfig?.temperature ?? 0.3,
+            maxTokens: rendered.modelConfig?.maxTokens ?? 1024,
+          });
+
+          const raw = this.robustJsonParse(response.content);
+          const validated = PicoInferenceSchema.parse({
+            ...raw,
+            status: 'ai_inferred',
+          });
+
+          pico = validated;
+          break;
+        } catch (err: any) {
+          logger.warn('[SSA:PICO] LLM attempt failed', {
+            attempt, error: err.message,
+          });
+          if (attempt === MAX_RETRIES) throw err;
+        }
+      }
+
+      if (pico) {
+        await sessionBlackboardService.confirmPico(sessionId, {
+          population: pico.population,
+          intervention: pico.intervention,
+          comparison: pico.comparison,
+          outcome: pico.outcome,
+        });
+
+        logger.info('[SSA:PICO] Inference complete', {
+          sessionId,
+          confidence: pico.confidence,
+          hasIntervention: pico.intervention !== null,
+        });
+      }
+
+      return pico;
+    } catch (error: any) {
+      logger.error('[SSA:PICO] Inference failed', {
+        sessionId, error: error.message,
+      });
+      return null;
+    }
+  }
+
+  private buildOverviewSummary(overview: DataOverview): string {
+    const s = overview.profile.summary;
+    const lines = [
+      `数据集: ${s.totalRows} 行, ${s.totalColumns} 列`,
+      `类型分布: 数值型 ${s.numericColumns}, 分类型 ${s.categoricalColumns}, 日期型 ${s.datetimeColumns}, 文本型 ${s.textColumns}`,
+      `整体缺失率: ${s.overallMissingRate}%`,
+      `完整病例数: ${overview.completeCaseCount}`,
+    ];
+
+    const nonNormal = overview.normalityTests
+      ?.filter(t => !t.isNormal)
+      .map(t => t.variable);
+    if (nonNormal && nonNormal.length > 0) {
+      lines.push(`非正态分布变量: ${nonNormal.join(', ')}`);
+    }
+
+    return lines.join('\n');
+  }
+
+  private buildVariableList(dict: VariableDictEntry[]): string {
+    return dict
+      .filter(v => !v.isIdLike)
+      .map(v => {
+        const type = v.confirmedType ?? v.inferredType;
+        const label = v.label ? ` (${v.label})` : '';
+        return `- ${v.name}: ${type}${label}`;
+      })
+      .join('\n');
+  }
+
+  private robustJsonParse(text: string): any {
+    let cleaned = text.trim();
+
+    const fenceMatch = cleaned.match(/```(?:json)?\s*([\s\S]*?)```/);
+    if (fenceMatch) {
+      cleaned = fenceMatch[1].trim();
+    }
+
+    try {
+      return JSON.parse(cleaned);
+    } catch {
+      return JSON.parse(jsonrepair(cleaned));
+    }
+  }
+}
+
+export const picoInferenceService = new PicoInferenceService();