P1 Architecture: Lightweight ReAct (Function Calling loop, max 3 rounds) Core changes: - Add ToolDefinition/ToolCall types to LLM adapters (DeepSeek + CloseAI + Claude) - Replace 6 old tools with 4 semantic tools: read_report, look_up_data, check_quality, search_knowledge - Create ChatOrchestrator (~160 lines) replacing ChatService (1,442 lines) - Wire WechatCallbackController to ChatOrchestrator, deprecate ChatService - Fix nullable content (string | null) across 12+ LLM consumer files E2E test results: 8/8 scenarios passed (100%) - QC report query, critical issues, patient data, trend, on-demand QC - Knowledge base search, project overview, data modification refusal Net code reduction: ~1,100 lines Tested: E2E P1 chat test 8/8 passed with DeepSeek API Made-with: Cursor
342 lines
11 KiB
TypeScript
342 lines
11 KiB
TypeScript
/**
|
||
* SSA ReflectionService — Phase R 核心服务
|
||
*
|
||
* 职责:StepResult[] → LLM 论文级结论 → ConclusionReport
|
||
*
|
||
* 三层防御:
|
||
* 1. 统计量槽位注入(LLM 只生成叙述框架,数值从 R 输出渲染)
|
||
* 2. jsonrepair + Zod 强校验 LLM 输出结构
|
||
* 3. 降级到 ConclusionGeneratorService(规则拼接)
|
||
*
|
||
* 交付策略:完整 JSON 收集 + Zod 校验 → 一次性 SSE 推送(不做字符流)
|
||
*/
|
||
|
||
import { logger } from '../../../common/logging/index.js';
|
||
import { LLMFactory } from '../../../common/llm/adapters/LLMFactory.js';
|
||
import { getPromptService } from '../../../common/prompt/index.js';
|
||
import { prisma } from '../../../config/database.js';
|
||
import { cache } from '../../../common/cache/index.js';
|
||
import { jsonrepair } from 'jsonrepair';
|
||
import type { Message } from '../../../common/llm/adapters/types.js';
|
||
import type { StepResult } from './WorkflowExecutorService.js';
|
||
import { conclusionGeneratorService } from './ConclusionGeneratorService.js';
|
||
import {
|
||
LLMConclusionSchema,
|
||
type ConclusionReport,
|
||
type StepFinding,
|
||
} from '../types/reflection.types.js';
|
||
|
||
const CACHE_TTL = 3600; // 1 hour
|
||
const LLM_MODEL = 'deepseek-v3';
|
||
const LLM_TEMPERATURE = 0.3;
|
||
const LLM_MAX_TOKENS = 4096;
|
||
|
||
interface PlannedTraceInput {
|
||
matchedRule?: string;
|
||
primaryTool?: string;
|
||
fallbackTool?: string | null;
|
||
switchCondition?: string | null;
|
||
reasoning?: string;
|
||
epvWarning?: string | null;
|
||
}
|
||
|
||
interface ReflectInput {
|
||
workflowId: string;
|
||
goal: string;
|
||
title?: string;
|
||
methodology?: string;
|
||
sampleInfo?: string;
|
||
plannedTrace?: PlannedTraceInput;
|
||
}
|
||
|
||
export class ReflectionService {
|
||
|
||
/**
|
||
* 生成论文级结论(主入口)
|
||
*
|
||
* 流程:缓存检查 → 提取 keyFindings → 组装 Prompt → LLM 调用 → Zod 校验 → fallback
|
||
*/
|
||
async reflect(
|
||
input: ReflectInput,
|
||
results: StepResult[],
|
||
): Promise<ConclusionReport> {
|
||
const { workflowId, goal } = input;
|
||
|
||
logger.info('[SSA:Reflection] Starting reflection', {
|
||
workflowId,
|
||
goal,
|
||
stepCount: results.length,
|
||
});
|
||
|
||
// 0. Cache hit check
|
||
const cacheKey = `ssa:conclusion:${workflowId}`;
|
||
try {
|
||
const cached = await cache.get<ConclusionReport>(cacheKey);
|
||
if (cached) {
|
||
logger.info('[SSA:Reflection] Cache hit', { workflowId });
|
||
return cached;
|
||
}
|
||
} catch {
|
||
// cache miss, continue
|
||
}
|
||
|
||
// 1. Extract key findings from step results (slot injection)
|
||
const findings = this.extractKeyFindings(results);
|
||
|
||
// 2. Build prompt via PromptService
|
||
const prompt = await this.buildPrompt(input, findings);
|
||
if (!prompt) {
|
||
logger.warn('[SSA:Reflection] Failed to build prompt, falling back to rule-based');
|
||
return this.fallback(workflowId, results, goal);
|
||
}
|
||
|
||
// 3. Call LLM (full collection, no streaming)
|
||
try {
|
||
const llm = LLMFactory.getAdapter(LLM_MODEL);
|
||
const messages: Message[] = [
|
||
{ role: 'system', content: 'You are a senior biostatistician. Output only valid JSON.' },
|
||
{ role: 'user', content: prompt },
|
||
];
|
||
|
||
logger.info('[SSA:Reflection] Calling LLM', { model: LLM_MODEL });
|
||
const response = await llm.chat(messages, {
|
||
temperature: LLM_TEMPERATURE,
|
||
maxTokens: LLM_MAX_TOKENS,
|
||
});
|
||
|
||
const rawOutput = response.content ?? '';
|
||
logger.info('[SSA:Reflection] LLM response received', {
|
||
contentLength: rawOutput.length,
|
||
usage: response.usage,
|
||
});
|
||
|
||
// 4. jsonrepair + Zod validation
|
||
const report = this.parseAndValidate(rawOutput, workflowId, input, findings, results);
|
||
|
||
// 5. Cache the result
|
||
try {
|
||
await cache.set(cacheKey, report, CACHE_TTL);
|
||
} catch (cacheErr) {
|
||
logger.warn('[SSA:Reflection] Cache set failed', { error: String(cacheErr) });
|
||
}
|
||
|
||
logger.info('[SSA:Reflection] LLM conclusion generated successfully', {
|
||
workflowId,
|
||
source: 'llm',
|
||
keyFindingsCount: report.key_findings.length,
|
||
});
|
||
|
||
return report;
|
||
|
||
} catch (error: any) {
|
||
logger.warn('[SSA:Reflection] LLM call failed, falling back to rule-based', {
|
||
workflowId,
|
||
error: error.message,
|
||
});
|
||
return this.fallback(workflowId, results, goal);
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 从 StepResult[] 中提取关键统计量(槽位注入数据源)
|
||
*/
|
||
extractKeyFindings(results: StepResult[]): StepFinding[] {
|
||
const findings: StepFinding[] = [];
|
||
|
||
for (const r of results) {
|
||
if (r.status !== 'success' && r.status !== 'warning') continue;
|
||
|
||
const data = r.result || {};
|
||
const finding: StepFinding = {
|
||
step_number: r.stepOrder,
|
||
tool_name: r.toolName,
|
||
tool_code: r.toolCode,
|
||
method: data.method || r.toolName,
|
||
is_significant: data.p_value != null && data.p_value < 0.05,
|
||
raw_result: data,
|
||
};
|
||
|
||
// P value
|
||
if (data.p_value != null) {
|
||
finding.p_value_num = data.p_value;
|
||
finding.p_value = data.p_value_fmt || this.formatPValue(data.p_value);
|
||
}
|
||
|
||
// Statistic
|
||
if (data.statistic != null) {
|
||
finding.statistic = String(Number(data.statistic).toFixed(3));
|
||
finding.statistic_name = this.getStatisticName(r.toolCode);
|
||
}
|
||
if (data.statistic_U != null) {
|
||
finding.statistic = String(Number(data.statistic_U).toFixed(1));
|
||
finding.statistic_name = 'U';
|
||
}
|
||
|
||
// Effect size
|
||
if (data.effect_size?.cohens_d != null) {
|
||
finding.effect_size = String(Number(data.effect_size.cohens_d).toFixed(3));
|
||
finding.effect_size_name = "Cohen's d";
|
||
} else if (data.effect_size?.cramers_v != null) {
|
||
finding.effect_size = String(Number(data.effect_size.cramers_v).toFixed(3));
|
||
finding.effect_size_name = "Cramér's V";
|
||
} else if (data.effect_size?.r_squared != null) {
|
||
finding.effect_size = String(Number(data.effect_size.r_squared).toFixed(3));
|
||
finding.effect_size_name = 'R²';
|
||
}
|
||
|
||
// Confidence interval
|
||
if (data.conf_int && Array.isArray(data.conf_int) && data.conf_int.length >= 2) {
|
||
finding.ci_lower = String(Number(data.conf_int[0]).toFixed(3));
|
||
finding.ci_upper = String(Number(data.conf_int[1]).toFixed(3));
|
||
}
|
||
|
||
// Group stats
|
||
if (data.group_stats && Array.isArray(data.group_stats)) {
|
||
finding.group_stats = data.group_stats.map((g: any) => ({
|
||
group: g.group || g.level || 'unknown',
|
||
n: g.n || 0,
|
||
mean: g.mean != null ? Number(Number(g.mean).toFixed(2)) : undefined,
|
||
sd: g.sd != null ? Number(Number(g.sd).toFixed(2)) : undefined,
|
||
median: g.median != null ? Number(Number(g.median).toFixed(2)) : undefined,
|
||
}));
|
||
}
|
||
|
||
findings.push(finding);
|
||
}
|
||
|
||
return findings;
|
||
}
|
||
|
||
/**
|
||
* 构建 Prompt(通过 PromptService 从数据库加载模板)
|
||
*/
|
||
private async buildPrompt(
|
||
input: ReflectInput,
|
||
findings: StepFinding[],
|
||
): Promise<string | null> {
|
||
try {
|
||
const promptService = getPromptService(prisma);
|
||
const rendered = await promptService.get('SSA_REFLECTION', {
|
||
goal: input.goal,
|
||
title: input.title || `统计分析:${input.goal}`,
|
||
methodology: input.methodology || '系统自动选择',
|
||
sampleInfo: input.sampleInfo || '见各步骤详情',
|
||
decision_trace: {
|
||
matched_rule: input.plannedTrace?.matchedRule || '默认规则',
|
||
primary_tool: input.plannedTrace?.primaryTool || '',
|
||
fallback_tool: input.plannedTrace?.fallbackTool || null,
|
||
switch_condition: input.plannedTrace?.switchCondition || null,
|
||
reasoning: input.plannedTrace?.reasoning || '',
|
||
epv_warning: input.plannedTrace?.epvWarning || null,
|
||
},
|
||
findings: findings.map(f => ({
|
||
...f,
|
||
group_stats: f.group_stats || [],
|
||
})),
|
||
});
|
||
|
||
return rendered.content;
|
||
} catch (error: any) {
|
||
logger.error('[SSA:Reflection] Failed to build prompt', { error: error.message });
|
||
return null;
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 解析 LLM 输出 → jsonrepair → Zod 校验
|
||
*/
|
||
private parseAndValidate(
|
||
rawOutput: string,
|
||
workflowId: string,
|
||
input: ReflectInput,
|
||
findings: StepFinding[],
|
||
results: StepResult[],
|
||
): ConclusionReport {
|
||
// Strip markdown code fences if present
|
||
let cleaned = rawOutput.trim();
|
||
if (cleaned.startsWith('```')) {
|
||
cleaned = cleaned.replace(/^```(?:json)?\s*\n?/, '').replace(/\n?```\s*$/, '');
|
||
}
|
||
|
||
// Layer 1: jsonrepair
|
||
const repaired = jsonrepair(cleaned);
|
||
|
||
// Layer 2: JSON.parse
|
||
const parsed = JSON.parse(repaired);
|
||
|
||
// Layer 3: Zod validation
|
||
const validated = LLMConclusionSchema.parse(parsed);
|
||
|
||
// Assemble full ConclusionReport
|
||
return {
|
||
workflow_id: workflowId,
|
||
title: input.title || `统计分析报告:${input.goal}`,
|
||
executive_summary: validated.executive_summary,
|
||
key_findings: validated.key_findings,
|
||
statistical_summary: validated.statistical_summary,
|
||
step_summaries: this.buildStepSummaries(findings),
|
||
recommendations: validated.recommendations || [],
|
||
limitations: validated.limitations,
|
||
generated_at: new Date().toISOString(),
|
||
source: 'llm',
|
||
};
|
||
}
|
||
|
||
/**
|
||
* 降级到规则拼接
|
||
*/
|
||
private fallback(
|
||
workflowId: string,
|
||
results: StepResult[],
|
||
goal: string,
|
||
): ConclusionReport {
|
||
logger.info('[SSA:Reflection] Using rule-based fallback', { workflowId });
|
||
return conclusionGeneratorService.generateConclusion(results, goal, workflowId);
|
||
}
|
||
|
||
/**
|
||
* 从 findings 构建 step_summaries
|
||
*/
|
||
private buildStepSummaries(findings: StepFinding[]): ConclusionReport['step_summaries'] {
|
||
return findings.map(f => ({
|
||
step_number: f.step_number,
|
||
tool_name: f.tool_name,
|
||
summary: this.buildStepSummaryText(f),
|
||
p_value: f.p_value_num,
|
||
is_significant: f.is_significant,
|
||
}));
|
||
}
|
||
|
||
private buildStepSummaryText(f: StepFinding): string {
|
||
const parts: string[] = [];
|
||
if (f.statistic) parts.push(`${f.statistic_name || '统计量'} = ${f.statistic}`);
|
||
if (f.p_value) parts.push(`P ${f.p_value}`);
|
||
if (f.effect_size) parts.push(`${f.effect_size_name || '效应量'} = ${f.effect_size}`);
|
||
return parts.length > 0 ? parts.join(', ') : `${f.tool_name} 分析完成`;
|
||
}
|
||
|
||
private formatPValue(p: number): string {
|
||
if (p < 0.001) return '< 0.001';
|
||
if (p < 0.01) return `= ${p.toFixed(3)}`;
|
||
return `= ${p.toFixed(3)}`;
|
||
}
|
||
|
||
private getStatisticName(toolCode: string): string {
|
||
const map: Record<string, string> = {
|
||
'ST_T_TEST_IND': 't',
|
||
'ST_T_TEST_PAIRED': 't',
|
||
'ST_MANN_WHITNEY': 'U',
|
||
'ST_WILCOXON': 'W',
|
||
'ST_CHI_SQUARE': 'χ²',
|
||
'ST_FISHER': 'OR',
|
||
'ST_ANOVA_ONE': 'F',
|
||
'ST_CORRELATION': 'r',
|
||
'ST_LINEAR_REG': 'F',
|
||
'ST_LOGISTIC_BINARY': 'χ²',
|
||
};
|
||
return map[toolCode] || '统计量';
|
||
}
|
||
}
|
||
|
||
export const reflectionService = new ReflectionService();
|