feat(asl): Complete Deep Research V2.0 core development

Backend:
- Add SSE streaming client (unifuncsSseClient) replacing async polling
- Add paragraph-based reasoning parser with mergeConsecutiveThinking
- Add requirement expansion service (DeepSeek-V3 PICOS+MeSH)
- Add Word export service with Pandoc, inline hyperlinks, reference link expansion
- Add deep research V2 worker with 2s log flush and Chinese source prompt
- Add 5 curated data sources config (PubMed/ClinicalTrials/Cochrane/CNKI/MedJournals)
- Add 4 API endpoints (generate-requirement/tasks/task-status/export-word)
- Update Prisma schema with 6 new V2.0 fields on AslResearchTask
- Add DB migration for V2.0 fields
- Simplify ASL_DEEP_RESEARCH_EXPANSION prompt (remove strategy section)

Frontend:
- Add waterfall-flow DeepResearchPage (phase 0-4 progressive reveal)
- Add LandingView, SetupPanel, StrategyConfirm, AgentTerminal, ResultsView
- Add react-markdown + remark-gfm for report rendering
- Add custom link component showing visible URLs after references
- Add useDeepResearchTask polling hook
- Add deep research TypeScript types

Tests:
- Add E2E test, smoke test, and Chinese data source test scripts

Docs:
- Update ASL module status (v2.0 - core features complete)
- Update system status (v6.1 - ASL V2.0 milestone)
- Update Unifuncs DeepSearch API guide (v2.0 - SSE mode + Chinese source results)
- Update module auth specification (test script guidelines)
- Update V2.0 development plan

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
2026-02-23 13:21:52 +08:00
parent b06daecacd
commit 8f06d4f929
39 changed files with 5605 additions and 417 deletions

View File

@@ -0,0 +1,10 @@
-- Deep Research V2.0: Add 6 new fields to research_tasks
-- Backward compatible: all new columns are nullable
ALTER TABLE "asl_schema"."research_tasks"
ADD COLUMN IF NOT EXISTS "target_sources" JSONB,
ADD COLUMN IF NOT EXISTS "confirmed_requirement" TEXT,
ADD COLUMN IF NOT EXISTS "ai_intent_summary" JSONB,
ADD COLUMN IF NOT EXISTS "execution_logs" JSONB,
ADD COLUMN IF NOT EXISTS "synthesis_report" TEXT,
ADD COLUMN IF NOT EXISTS "result_list" JSONB;

View File

@@ -1,100 +0,0 @@
-- =====================================================
-- Phase 2A: SSA 智能化核心 - 数据库迁移脚本
-- 日期: 2026-02-20
-- 描述: 添加工作流表和数据画像字段
-- 注意: ssa_sessions.id 是 TEXT 类型(存储 UUID 字符串)
-- =====================================================
-- 1. 给 ssa_sessions 表添加 data_profile 字段(如果不存在)
ALTER TABLE ssa_schema.ssa_sessions
ADD COLUMN IF NOT EXISTS data_profile JSONB;
COMMENT ON COLUMN ssa_schema.ssa_sessions.data_profile IS 'Python Tool C 生成的数据画像 (Phase 2A)';
-- 2. 创建 ssa_workflows 表(多步骤分析流程)
CREATE TABLE IF NOT EXISTS ssa_schema.ssa_workflows (
id TEXT PRIMARY KEY DEFAULT gen_random_uuid()::TEXT,
session_id TEXT NOT NULL,
message_id TEXT,
status VARCHAR(20) NOT NULL DEFAULT 'pending',
total_steps INTEGER NOT NULL,
completed_steps INTEGER NOT NULL DEFAULT 0,
workflow_plan JSONB NOT NULL,
reasoning TEXT,
created_at TIMESTAMP WITHOUT TIME ZONE NOT NULL DEFAULT NOW(),
started_at TIMESTAMP WITHOUT TIME ZONE,
completed_at TIMESTAMP WITHOUT TIME ZONE,
CONSTRAINT fk_ssa_workflow_session
FOREIGN KEY (session_id)
REFERENCES ssa_schema.ssa_sessions(id)
ON DELETE CASCADE
);
-- ssa_workflows 索引
CREATE INDEX IF NOT EXISTS idx_ssa_workflow_session
ON ssa_schema.ssa_workflows(session_id);
CREATE INDEX IF NOT EXISTS idx_ssa_workflow_status
ON ssa_schema.ssa_workflows(status);
-- ssa_workflows 字段注释
COMMENT ON TABLE ssa_schema.ssa_workflows IS 'SSA 多步骤分析工作流 (Phase 2A)';
COMMENT ON COLUMN ssa_schema.ssa_workflows.status IS 'pending | running | completed | partial | error';
COMMENT ON COLUMN ssa_schema.ssa_workflows.workflow_plan IS 'LLM 生成的原始工作流计划 JSON';
COMMENT ON COLUMN ssa_schema.ssa_workflows.reasoning IS 'LLM 规划理由说明';
-- 3. 创建 ssa_workflow_steps 表(流程中的每个步骤)
CREATE TABLE IF NOT EXISTS ssa_schema.ssa_workflow_steps (
id TEXT PRIMARY KEY DEFAULT gen_random_uuid()::TEXT,
workflow_id TEXT NOT NULL,
step_order INTEGER NOT NULL,
tool_code VARCHAR(50) NOT NULL,
tool_name VARCHAR(100) NOT NULL,
status VARCHAR(20) NOT NULL DEFAULT 'pending',
input_params JSONB,
guardrail_checks JSONB,
output_result JSONB,
error_info JSONB,
execution_ms INTEGER,
started_at TIMESTAMP WITHOUT TIME ZONE,
completed_at TIMESTAMP WITHOUT TIME ZONE,
CONSTRAINT fk_ssa_workflow_step_workflow
FOREIGN KEY (workflow_id)
REFERENCES ssa_schema.ssa_workflows(id)
ON DELETE CASCADE
);
-- ssa_workflow_steps 索引
CREATE INDEX IF NOT EXISTS idx_ssa_workflow_step_workflow
ON ssa_schema.ssa_workflow_steps(workflow_id);
CREATE INDEX IF NOT EXISTS idx_ssa_workflow_step_status
ON ssa_schema.ssa_workflow_steps(status);
-- ssa_workflow_steps 字段注释
COMMENT ON TABLE ssa_schema.ssa_workflow_steps IS 'SSA 工作流单步执行记录 (Phase 2A)';
COMMENT ON COLUMN ssa_schema.ssa_workflow_steps.status IS 'pending | running | success | warning | error | skipped';
COMMENT ON COLUMN ssa_schema.ssa_workflow_steps.guardrail_checks IS 'R Service JIT 护栏检验结果 (正态性、方差齐性等)';
COMMENT ON COLUMN ssa_schema.ssa_workflow_steps.output_result IS '工具执行结果 (已裁剪,符合 LLM 上下文限制)';
COMMENT ON COLUMN ssa_schema.ssa_workflow_steps.error_info IS '错误信息 (用于容错管道的部分成功场景)';
-- =====================================================
-- 验证脚本
-- =====================================================
SELECT 'ssa_sessions.data_profile 字段' as item,
CASE WHEN EXISTS (
SELECT 1 FROM information_schema.columns
WHERE table_schema = 'ssa_schema' AND table_name = 'ssa_sessions' AND column_name = 'data_profile'
) THEN '✅ 已创建' ELSE '❌ 未创建' END as status;
SELECT 'ssa_workflows 表' as item,
CASE WHEN EXISTS (
SELECT 1 FROM information_schema.tables
WHERE table_schema = 'ssa_schema' AND table_name = 'ssa_workflows'
) THEN '✅ 已创建' ELSE '❌ 未创建' END as status;
SELECT 'ssa_workflow_steps 表' as item,
CASE WHEN EXISTS (
SELECT 1 FROM information_schema.tables
WHERE table_schema = 'ssa_schema' AND table_name = 'ssa_workflow_steps'
) THEN '✅ 已创建' ELSE '❌ 未创建' END as status;

View File

@@ -477,7 +477,7 @@ model AslFulltextScreeningTask {
@@schema("asl_schema")
}
/// 智能文献检索任务DeepSearch
/// 智能文献检索任务DeepSearch V1.x + V2.0
model AslResearchTask {
id String @id @default(uuid())
@@ -486,23 +486,23 @@ model AslResearchTask {
userId String @map("user_id")
// 检索输入
query String // 用户的自然语言查询
filters Json? // 🔜 后续:高级筛选 { yearFrom, yearTo, articleTypes }
query String // 用户的自然语言查询V1.x 原始输入 / V2.0 Step 1 粗略想法)
filters Json? // 高级筛选 { yearRange, targetCount, requireOpenAccess }
// unifuncs 任务
externalTaskId String? @map("external_task_id")
// 状态
status String @default("pending") // pending/processing/completed/failed
// 状态: draft → pending → running → completed / failed
status String @default("pending")
errorMessage String? @map("error_message")
// 结果
// V1.x 结果字段(保留向后兼容)
resultCount Int? @map("result_count")
rawResult String? @map("raw_result") @db.Text
reasoningContent String? @map("reasoning_content") @db.Text // AI思考过程
literatures Json? // 解析后的文献列表
reasoningContent String? @map("reasoning_content") @db.Text
literatures Json?
// 统计(🔜 后续展示)
// 统计
tokenUsage Json? @map("token_usage")
searchCount Int? @map("search_count")
readCount Int? @map("read_count")
@@ -513,6 +513,15 @@ model AslResearchTask {
updatedAt DateTime @updatedAt @map("updated_at")
completedAt DateTime? @map("completed_at")
// ── V2.0 新增字段 ──────────────────────────────
targetSources Json? @map("target_sources") // 选中的数据源 ["https://pubmed.ncbi.nlm.nih.gov/", ...]
confirmedRequirement String? @map("confirmed_requirement") @db.Text // 用户核验后的自然语言检索指令书
aiIntentSummary Json? @map("ai_intent_summary") // PICOS + MeSH 结构化摘要
executionLogs Json? @map("execution_logs") // 终端日志数组 [{type, title, text, ts}]
synthesisReport String? @map("synthesis_report") @db.Text // AI综合报告Markdown
resultList Json? @map("result_list") // 结构化文献元数据列表
// ── 索引 ────────────────────────────
@@index([projectId], map: "idx_research_tasks_project_id")
@@index([userId], map: "idx_research_tasks_user_id")
@@index([status], map: "idx_research_tasks_status")