Files
AIclinicalresearch/backend/src/modules/ssa/services/PicoInferenceService.ts
HaHafeng 3446909ff7 feat(ssa): Complete Phase I-IV intelligent dialogue and tool system development
Phase I - Session Blackboard + READ Layer:
- SessionBlackboardService with Postgres-Only cache
- DataProfileService for data overview generation
- PicoInferenceService for LLM-driven PICO extraction
- Frontend DataContextCard and VariableDictionaryPanel
- E2E tests: 31/31 passed

Phase II - Conversation Layer LLM + Intent Router:
- ConversationService with SSE streaming
- IntentRouterService (rule-first + LLM fallback, 6 intents)
- SystemPromptService with 6-segment dynamic assembly
- TokenTruncationService for context management
- ChatHandlerService as unified chat entry
- Frontend SSAChatPane and useSSAChat hook
- E2E tests: 38/38 passed

Phase III - Method Consultation + AskUser Standardization:
- ToolRegistryService with Repository Pattern
- MethodConsultService with DecisionTable + LLM enhancement
- AskUserService with global interrupt handling
- Frontend AskUserCard component
- E2E tests: 13/13 passed

Phase IV - Dialogue-Driven Analysis + QPER Integration:
- ToolOrchestratorService (plan/execute/report)
- analysis_plan SSE event for WorkflowPlan transmission
- Dual-channel confirmation (ask_user card + workspace button)
- PICO as optional hint for LLM parsing
- E2E tests: 25/25 passed

R Statistics Service:
- 5 new R tools: anova_one, baseline_table, fisher, linear_reg, wilcoxon
- Enhanced guardrails and block helpers
- Comprehensive test suite (run_all_tools_test.js)

Documentation:
- Updated system status document (v5.9)
- Updated SSA module status and development plan (v1.8)

Total E2E: 107/107 passed (Phase I: 31, Phase II: 38, Phase III: 13, Phase IV: 25)

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-22 18:53:39 +08:00

157 lines
4.6 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* Phase I — PICO 推断服务
*
* 调用 LLM (SSA_PICO_INFERENCE prompt) 从数据概览推断 PICO 结构。
* 写入 SessionBlackboard.picoInference标记为 ai_inferred。
*
* 安全措施:
* - Zod 校验 LLM 输出
* - jsonrepair 容错
* - H3: 观察性研究允许 intervention/comparison 为 null
*/
import { logger } from '../../../common/logging/index.js';
import { LLMFactory } from '../../../common/llm/adapters/LLMFactory.js';
import { getPromptService } from '../../../common/prompt/index.js';
import { prisma } from '../../../config/database.js';
import { jsonrepair } from 'jsonrepair';
import type { Message } from '../../../common/llm/adapters/types.js';
import { sessionBlackboardService } from './SessionBlackboardService.js';
import {
PicoInferenceSchema,
type PicoInference,
type DataOverview,
type VariableDictEntry,
} from '../types/session-blackboard.types.js';
const MAX_RETRIES = 1;
export class PicoInferenceService {
/**
* 从 DataOverview 推断 PICO 结构并写入黑板。
*/
async inferFromOverview(
sessionId: string,
overview: DataOverview,
dictionary: VariableDictEntry[],
): Promise<PicoInference | null> {
try {
logger.info('[SSA:PICO] Starting inference', { sessionId });
const promptService = getPromptService(prisma);
const dataOverviewSummary = this.buildOverviewSummary(overview);
const variableList = this.buildVariableList(dictionary);
const rendered = await promptService.get('SSA_PICO_INFERENCE', {
dataOverviewSummary,
variableList,
});
const adapter = LLMFactory.getAdapter(
(rendered.modelConfig?.model as any) || 'deepseek-v3'
);
const messages: Message[] = [
{ role: 'system', content: rendered.content },
{ role: 'user', content: '请根据以上数据概览推断 PICO 结构。' },
];
let pico: PicoInference | null = null;
for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
try {
const response = await adapter.chat(messages, {
temperature: rendered.modelConfig?.temperature ?? 0.3,
maxTokens: rendered.modelConfig?.maxTokens ?? 1024,
});
const raw = this.robustJsonParse(response.content);
const validated = PicoInferenceSchema.parse({
...raw,
status: 'ai_inferred',
});
pico = validated;
break;
} catch (err: any) {
logger.warn('[SSA:PICO] LLM attempt failed', {
attempt, error: err.message,
});
if (attempt === MAX_RETRIES) throw err;
}
}
if (pico) {
await sessionBlackboardService.confirmPico(sessionId, {
population: pico.population,
intervention: pico.intervention,
comparison: pico.comparison,
outcome: pico.outcome,
});
logger.info('[SSA:PICO] Inference complete', {
sessionId,
confidence: pico.confidence,
hasIntervention: pico.intervention !== null,
});
}
return pico;
} catch (error: any) {
logger.error('[SSA:PICO] Inference failed', {
sessionId, error: error.message,
});
return null;
}
}
private buildOverviewSummary(overview: DataOverview): string {
const s = overview.profile.summary;
const lines = [
`数据集: ${s.totalRows} 行, ${s.totalColumns}`,
`类型分布: 数值型 ${s.numericColumns}, 分类型 ${s.categoricalColumns}, 日期型 ${s.datetimeColumns}, 文本型 ${s.textColumns}`,
`整体缺失率: ${s.overallMissingRate}%`,
`完整病例数: ${overview.completeCaseCount}`,
];
const nonNormal = overview.normalityTests
?.filter(t => !t.isNormal)
.map(t => t.variable);
if (nonNormal && nonNormal.length > 0) {
lines.push(`非正态分布变量: ${nonNormal.join(', ')}`);
}
return lines.join('\n');
}
private buildVariableList(dict: VariableDictEntry[]): string {
return dict
.filter(v => !v.isIdLike)
.map(v => {
const type = v.confirmedType ?? v.inferredType;
const label = v.label ? ` (${v.label})` : '';
return `- ${v.name}: ${type}${label}`;
})
.join('\n');
}
private robustJsonParse(text: string): any {
let cleaned = text.trim();
const fenceMatch = cleaned.match(/```(?:json)?\s*([\s\S]*?)```/);
if (fenceMatch) {
cleaned = fenceMatch[1].trim();
}
try {
return JSON.parse(cleaned);
} catch {
return JSON.parse(jsonrepair(cleaned));
}
}
}
export const picoInferenceService = new PicoInferenceService();