Bug fixes: - Fix garbled error messages in chat (TypeWriter rendering issue) - Fix R engine NA crash in descriptive.R (defensive isTRUE/is.na checks) - Fix intent misclassification for statistical significance queries - Fix step 2 results not displayed (accept warning status alongside success) - Fix incomplete R code download (only step 1 included) - Fix multi-task state confusion (clicking old card shows new results) - Add R engine and backend parameter logging for debugging Refactor - Unified Record Architecture: - Replace 12 global singleton fields with AnalysisRecord as single source of truth - Remove isWorkflowMode branching across all components - One Analysis = One Record = N Steps paradigm - selectRecord only sets currentRecordId, all rendering derives from currentRecord - Fix cross-hook-instance issue: executeWorkflow fallback to store currentRecordId Updated files: ssaStore, useWorkflow, useAnalysis, SSAChatPane, SSAWorkspacePane, SSACodeModal, WorkflowTimeline, QueryService, WorkflowExecutorService, descriptive.R Tested: Manual integration test passed - multi-task switching, R code completeness Co-authored-by: Cursor <cursoragent@cursor.com>
181 lines
7.9 KiB
TypeScript
181 lines
7.9 KiB
TypeScript
/**
|
||
* SSA Intent Prompt Seed 脚本
|
||
*
|
||
* 将 SSA_QUERY_INTENT prompt 写入 capability_schema.prompt_templates
|
||
* 运行: npx tsx scripts/seed-ssa-intent-prompt.ts
|
||
*/
|
||
|
||
import { PrismaClient, PromptStatus } from '@prisma/client';
|
||
|
||
const prisma = new PrismaClient();
|
||
|
||
const SSA_INTENT_PROMPT = `你是一个临床统计分析意图理解引擎。你的任务是根据用户的自然语言描述和数据画像,解析出结构化的分析意图。
|
||
|
||
## 输入信息
|
||
|
||
### 用户请求
|
||
{{userQuery}}
|
||
|
||
### 数据画像
|
||
{{dataProfile}}
|
||
|
||
### 可用统计工具
|
||
{{availableTools}}
|
||
|
||
## 你的任务
|
||
|
||
请分析用户的请求,输出一个 JSON 对象(不要输出任何其他内容,只输出 JSON):
|
||
|
||
\`\`\`json
|
||
{
|
||
"goal": "comparison | correlation | regression | descriptive | cohort_study",
|
||
"outcome_var": "结局变量名(Y),必须是数据画像中存在的列名,如果无法确定则为 null",
|
||
"outcome_type": "continuous | binary | categorical | ordinal | datetime | null",
|
||
"predictor_vars": ["自变量名列表(X),必须是数据画像中存在的列名"],
|
||
"predictor_types": ["对应每个自变量的类型"],
|
||
"grouping_var": "分组变量名,必须是数据画像中存在的列名,如果无法确定则为 null",
|
||
"design": "independent | paired | longitudinal | cross_sectional",
|
||
"confidence": 0.0到1.0之间的数字,
|
||
"reasoning": "你的推理过程,用1-2句话说明为什么这样解析"
|
||
}
|
||
\`\`\`
|
||
|
||
## 关键规则
|
||
|
||
1. **变量名必须精确匹配数据画像中的列名**,不要翻译、缩写或改写。如果数据里是 "Blood_Pressure",你就输出 "Blood_Pressure",不要输出 "BP"。
|
||
2. 如果用户没有明确指出变量,请根据数据画像中的变量类型合理推断,但 confidence 应相应降低。
|
||
3. goal 为 "descriptive" 时,不需要 outcome_var 和 predictor_vars。
|
||
|
||
## Confidence 评分准则(严格按此打分)
|
||
|
||
- **0.9 - 1.0**: 用户的原话中明确指定了结局变量(Y)和至少一个自变量(X),且这些变量在数据画像中存在。
|
||
- **0.7 - 0.8**: 用户指出了 Y 变量,但 X 需要根据数据类型推断;或用户的意图清晰但有轻微歧义。
|
||
- **0.5 - 0.6**: 用户意图大致清楚(如"帮我比较一下"),但没有具体指出任何变量名。
|
||
- **< 0.5**: 用户只说了"帮我分析一下"这样的模糊表达,既没有明确 Y 也没有明确 X,必须追问。
|
||
|
||
## Few-Shot 示例
|
||
|
||
### 示例 1:明确的差异比较
|
||
用户: "帮我比较 Treatment 组和 Control 组的 SBP 有没有差异"
|
||
数据画像中有: Group [categorical, 2个水平: Treatment/Control], SBP [numeric]
|
||
输出:
|
||
\`\`\`json
|
||
{"goal":"comparison","outcome_var":"SBP","outcome_type":"continuous","predictor_vars":["Group"],"predictor_types":["binary"],"grouping_var":"Group","design":"independent","confidence":0.95,"reasoning":"用户明确指定了分组变量Group和结局变量SBP,要求比较两组差异"}
|
||
\`\`\`
|
||
|
||
### 示例 2:相关分析
|
||
用户: "年龄和血压有关系吗?"
|
||
数据画像中有: Age [numeric], Blood_Pressure [numeric], Gender [categorical]
|
||
输出:
|
||
\`\`\`json
|
||
{"goal":"correlation","outcome_var":"Blood_Pressure","outcome_type":"continuous","predictor_vars":["Age"],"predictor_types":["continuous"],"grouping_var":null,"design":"independent","confidence":0.85,"reasoning":"用户想了解Age和Blood_Pressure的关系,两者都是连续变量,适合相关分析"}
|
||
\`\`\`
|
||
|
||
### 示例 3:多因素回归
|
||
用户: "什么因素影响患者的死亡率?"
|
||
数据画像中有: Death [categorical, 2个水平: 0/1], Age [numeric], BMI [numeric], Smoking [categorical, 2个水平: Yes/No], Stage [categorical, 4个水平]
|
||
输出:
|
||
\`\`\`json
|
||
{"goal":"regression","outcome_var":"Death","outcome_type":"binary","predictor_vars":["Age","BMI","Smoking","Stage"],"predictor_types":["continuous","continuous","binary","categorical"],"grouping_var":null,"design":"independent","confidence":0.8,"reasoning":"用户想分析影响死亡率的因素,Death是二分类结局,其余变量作为预测因素纳入logistic回归"}
|
||
\`\`\`
|
||
|
||
### 示例 4:统计学意义/检验
|
||
用户: "Yqol和bmi是否有统计学意义?"
|
||
数据画像中有: Yqol [numeric], bmi [numeric], sex [categorical], age [numeric]
|
||
输出:
|
||
\`\`\`json
|
||
{"goal":"correlation","outcome_var":"Yqol","outcome_type":"continuous","predictor_vars":["bmi"],"predictor_types":["continuous"],"grouping_var":null,"design":"independent","confidence":0.85,"reasoning":"用户想了解Yqol和bmi之间是否存在统计学显著关系,两者都是连续变量,适合相关分析或回归分析"}
|
||
\`\`\`
|
||
|
||
### 示例 5:模糊表达 — 需要追问
|
||
用户: "帮我分析一下这份数据"
|
||
数据画像中有: 10个变量
|
||
输出:
|
||
\`\`\`json
|
||
{"goal":"descriptive","outcome_var":null,"outcome_type":null,"predictor_vars":[],"predictor_types":[],"grouping_var":null,"design":"independent","confidence":0.35,"reasoning":"用户没有指定任何分析目标和变量,只能先做描述性统计,建议追问具体分析目的"}
|
||
\`\`\`
|
||
|
||
### 示例 6:队列研究
|
||
用户: "我想做一个完整的队列研究分析,看看新药对预后的影响"
|
||
数据画像中有: Drug [categorical, 2个水平], Outcome [categorical, 2个水平: 0/1], Age [numeric], Gender [categorical], BMI [numeric], Comorbidity [categorical]
|
||
输出:
|
||
\`\`\`json
|
||
{"goal":"cohort_study","outcome_var":"Outcome","outcome_type":"binary","predictor_vars":["Drug","Age","Gender","BMI","Comorbidity"],"predictor_types":["binary","continuous","binary","continuous","categorical"],"grouping_var":"Drug","design":"independent","confidence":0.85,"reasoning":"用户明确要做队列研究分析,Drug是暴露因素/分组变量,Outcome是结局,其余为协变量"}
|
||
\`\`\`
|
||
|
||
请只输出 JSON,不要输出其他内容。`;
|
||
|
||
async function main() {
|
||
console.log('🚀 开始写入 SSA Intent Prompt...\n');
|
||
|
||
const existing = await prisma.prompt_templates.findUnique({
|
||
where: { code: 'SSA_QUERY_INTENT' }
|
||
});
|
||
|
||
if (existing) {
|
||
console.log('⚠️ SSA_QUERY_INTENT 已存在 (id=%d),创建新版本...', existing.id);
|
||
|
||
const latestVersion = await prisma.prompt_versions.findFirst({
|
||
where: { template_id: existing.id },
|
||
orderBy: { version: 'desc' }
|
||
});
|
||
|
||
const newVersion = (latestVersion?.version ?? 0) + 1;
|
||
|
||
// 归档旧的 ACTIVE 版本
|
||
await prisma.prompt_versions.updateMany({
|
||
where: { template_id: existing.id, status: 'ACTIVE' },
|
||
data: { status: 'ARCHIVED' }
|
||
});
|
||
|
||
await prisma.prompt_versions.create({
|
||
data: {
|
||
template_id: existing.id,
|
||
version: newVersion,
|
||
content: SSA_INTENT_PROMPT,
|
||
model_config: { model: 'deepseek-v3', temperature: 0.3, maxTokens: 2048 },
|
||
status: 'ACTIVE',
|
||
changelog: `Phase Q v1.1: 6 组 Few-Shot (增加统计学意义示例) + Confidence Rubric 客观化`,
|
||
created_by: 'system-seed',
|
||
}
|
||
});
|
||
|
||
console.log(' ✅ 新版本 v%d 已创建并设为 ACTIVE', newVersion);
|
||
} else {
|
||
console.log('📝 创建 SSA_QUERY_INTENT 模板...');
|
||
|
||
const template = await prisma.prompt_templates.create({
|
||
data: {
|
||
code: 'SSA_QUERY_INTENT',
|
||
name: 'SSA 意图理解 Prompt',
|
||
module: 'SSA',
|
||
description: 'Phase Q — 将用户自然语言转化为结构化的统计分析意图 (ParsedQuery)',
|
||
variables: ['userQuery', 'dataProfile', 'availableTools'],
|
||
}
|
||
});
|
||
|
||
await prisma.prompt_versions.create({
|
||
data: {
|
||
template_id: template.id,
|
||
version: 1,
|
||
content: SSA_INTENT_PROMPT,
|
||
model_config: { model: 'deepseek-v3', temperature: 0.3, maxTokens: 2048 },
|
||
status: 'ACTIVE',
|
||
changelog: 'Phase Q v1.0: 初始版本,5 组 Few-Shot + Confidence Rubric',
|
||
created_by: 'system-seed',
|
||
}
|
||
});
|
||
|
||
console.log(' ✅ 模板 id=%d + 版本 v1 已创建', template.id);
|
||
}
|
||
|
||
console.log('\n✅ SSA Intent Prompt 写入完成!');
|
||
}
|
||
|
||
main()
|
||
.catch(e => {
|
||
console.error('❌ 写入失败:', e);
|
||
process.exit(1);
|
||
})
|
||
.finally(() => prisma.$disconnect());
|