feat(asl): Complete Deep Research V2.0 core development

Backend: - Add SSE streaming client (unifuncsSseClient) replacing async polling - Add paragraph-based reasoning parser with mergeConsecutiveThinking - Add requirement expansion service (DeepSeek-V3 PICOS+MeSH) - Add Word export service with Pandoc, inline hyperlinks, reference link expansion - Add deep research V2 worker with 2s log flush and Chinese source prompt - Add 5 curated data sources config (PubMed/ClinicalTrials/Cochrane/CNKI/MedJournals) - Add 4 API endpoints (generate-requirement/tasks/task-status/export-word) - Update Prisma schema with 6 new V2.0 fields on AslResearchTask - Add DB migration for V2.0 fields - Simplify ASL_DEEP_RESEARCH_EXPANSION prompt (remove strategy section) Frontend: - Add waterfall-flow DeepResearchPage (phase 0-4 progressive reveal) - Add LandingView, SetupPanel, StrategyConfirm, AgentTerminal, ResultsView - Add react-markdown + remark-gfm for report rendering - Add custom link component showing visible URLs after references - Add useDeepResearchTask polling hook - Add deep research TypeScript types Tests: - Add E2E test, smoke test, and Chinese data source test scripts Docs: - Update ASL module status (v2.0 - core features complete) - Update system status (v6.1 - ASL V2.0 milestone) - Update Unifuncs DeepSearch API guide (v2.0 - SSE mode + Chinese source results) - Update module auth specification (test script guidelines) - Update V2.0 development plan Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-23 13:21:52 +08:00
parent b06daecacd
commit 8f06d4f929
39 changed files with 5605 additions and 417 deletions
--- a/backend/prisma/migrations/20260223_add_deep_research_v2_fields/migration.sql
+++ b/backend/prisma/migrations/20260223_add_deep_research_v2_fields/migration.sql
@@ -0,0 +1,10 @@
+-- Deep Research V2.0: Add 6 new fields to research_tasks
+-- Backward compatible: all new columns are nullable
+
+ALTER TABLE "asl_schema"."research_tasks"
+ADD COLUMN IF NOT EXISTS "target_sources" JSONB,
+ADD COLUMN IF NOT EXISTS "confirmed_requirement" TEXT,
+ADD COLUMN IF NOT EXISTS "ai_intent_summary" JSONB,
+ADD COLUMN IF NOT EXISTS "execution_logs" JSONB,
+ADD COLUMN IF NOT EXISTS "synthesis_report" TEXT,
+ADD COLUMN IF NOT EXISTS "result_list" JSONB;
--- a/backend/prisma/migrations/manual/20260220_add_ssa_workflow_tables.sql
+++ b/backend/prisma/migrations/manual/20260220_add_ssa_workflow_tables.sql
@@ -1,100 +0,0 @@
-- =====================================================
-- Phase 2A: SSA 智能化核心 - 数据库迁移脚本
-- 日期: 2026-02-20
-- 描述: 添加工作流表和数据画像字段
-- 注意: ssa_sessions.id 是 TEXT 类型（存储 UUID 字符串）
-- =====================================================
-
-- 1. 给 ssa_sessions 表添加 data_profile 字段（如果不存在）
-ALTER TABLE ssa_schema.ssa_sessions 
-ADD COLUMN IF NOT EXISTS data_profile JSONB;
-
-COMMENT ON COLUMN ssa_schema.ssa_sessions.data_profile IS 'Python Tool C 生成的数据画像 (Phase 2A)';
-
-- 2. 创建 ssa_workflows 表（多步骤分析流程）
-CREATE TABLE IF NOT EXISTS ssa_schema.ssa_workflows (
-    id              TEXT PRIMARY KEY DEFAULT gen_random_uuid()::TEXT,
-    session_id      TEXT NOT NULL,
-    message_id      TEXT,
-    status          VARCHAR(20) NOT NULL DEFAULT 'pending',
-    total_steps     INTEGER NOT NULL,
-    completed_steps INTEGER NOT NULL DEFAULT 0,
-    workflow_plan   JSONB NOT NULL,
-    reasoning       TEXT,
-    created_at      TIMESTAMP WITHOUT TIME ZONE NOT NULL DEFAULT NOW(),
-    started_at      TIMESTAMP WITHOUT TIME ZONE,
-    completed_at    TIMESTAMP WITHOUT TIME ZONE,
-    
-    CONSTRAINT fk_ssa_workflow_session 
-        FOREIGN KEY (session_id) 
-        REFERENCES ssa_schema.ssa_sessions(id) 
-        ON DELETE CASCADE
-);
-
-- ssa_workflows 索引
-CREATE INDEX IF NOT EXISTS idx_ssa_workflow_session 
-    ON ssa_schema.ssa_workflows(session_id);
-CREATE INDEX IF NOT EXISTS idx_ssa_workflow_status 
-    ON ssa_schema.ssa_workflows(status);
-
-- ssa_workflows 字段注释
-COMMENT ON TABLE ssa_schema.ssa_workflows IS 'SSA 多步骤分析工作流 (Phase 2A)';
-COMMENT ON COLUMN ssa_schema.ssa_workflows.status IS 'pending | running | completed | partial | error';
-COMMENT ON COLUMN ssa_schema.ssa_workflows.workflow_plan IS 'LLM 生成的原始工作流计划 JSON';
-COMMENT ON COLUMN ssa_schema.ssa_workflows.reasoning IS 'LLM 规划理由说明';
-
-- 3. 创建 ssa_workflow_steps 表（流程中的每个步骤）
-CREATE TABLE IF NOT EXISTS ssa_schema.ssa_workflow_steps (
-    id               TEXT PRIMARY KEY DEFAULT gen_random_uuid()::TEXT,
-    workflow_id      TEXT NOT NULL,
-    step_order       INTEGER NOT NULL,
-    tool_code        VARCHAR(50) NOT NULL,
-    tool_name        VARCHAR(100) NOT NULL,
-    status           VARCHAR(20) NOT NULL DEFAULT 'pending',
-    input_params     JSONB,
-    guardrail_checks JSONB,
-    output_result    JSONB,
-    error_info       JSONB,
-    execution_ms     INTEGER,
-    started_at       TIMESTAMP WITHOUT TIME ZONE,
-    completed_at     TIMESTAMP WITHOUT TIME ZONE,
-    
-    CONSTRAINT fk_ssa_workflow_step_workflow 
-        FOREIGN KEY (workflow_id) 
-        REFERENCES ssa_schema.ssa_workflows(id) 
-        ON DELETE CASCADE
-);
-
-- ssa_workflow_steps 索引
-CREATE INDEX IF NOT EXISTS idx_ssa_workflow_step_workflow 
-    ON ssa_schema.ssa_workflow_steps(workflow_id);
-CREATE INDEX IF NOT EXISTS idx_ssa_workflow_step_status 
-    ON ssa_schema.ssa_workflow_steps(status);
-
-- ssa_workflow_steps 字段注释
-COMMENT ON TABLE ssa_schema.ssa_workflow_steps IS 'SSA 工作流单步执行记录 (Phase 2A)';
-COMMENT ON COLUMN ssa_schema.ssa_workflow_steps.status IS 'pending | running | success | warning | error | skipped';
-COMMENT ON COLUMN ssa_schema.ssa_workflow_steps.guardrail_checks IS 'R Service JIT 护栏检验结果 (正态性、方差齐性等)';
-COMMENT ON COLUMN ssa_schema.ssa_workflow_steps.output_result IS '工具执行结果 (已裁剪，符合 LLM 上下文限制)';
-COMMENT ON COLUMN ssa_schema.ssa_workflow_steps.error_info IS '错误信息 (用于容错管道的部分成功场景)';
-
-- =====================================================
-- 验证脚本
-- =====================================================
-SELECT 'ssa_sessions.data_profile 字段' as item, 
-       CASE WHEN EXISTS (
-           SELECT 1 FROM information_schema.columns 
-           WHERE table_schema = 'ssa_schema' AND table_name = 'ssa_sessions' AND column_name = 'data_profile'
-       ) THEN '✅ 已创建' ELSE '❌ 未创建' END as status;
-
-SELECT 'ssa_workflows 表' as item,
-       CASE WHEN EXISTS (
-           SELECT 1 FROM information_schema.tables 
-           WHERE table_schema = 'ssa_schema' AND table_name = 'ssa_workflows'
-       ) THEN '✅ 已创建' ELSE '❌ 未创建' END as status;
-
-SELECT 'ssa_workflow_steps 表' as item,
-       CASE WHEN EXISTS (
-           SELECT 1 FROM information_schema.tables 
-           WHERE table_schema = 'ssa_schema' AND table_name = 'ssa_workflow_steps'
-       ) THEN '✅ 已创建' ELSE '❌ 未创建' END as status;
--- a/backend/prisma/schema.prisma
+++ b/backend/prisma/schema.prisma
@@ -477,7 +477,7 @@ model AslFulltextScreeningTask {
  @@schema("asl_schema")
 }

-/// 智能文献检索任务（DeepSearch）
+/// 智能文献检索任务（DeepSearch V1.x + V2.0）
 model AslResearchTask {
  id               String   @id @default(uuid())
  
@@ -486,23 +486,23 @@ model AslResearchTask {
  userId           String   @map("user_id")
  
  // 检索输入
-  query            String                           // 用户的自然语言查询
-  filters          Json?                            // 🔜 后续：高级筛选 { yearFrom, yearTo, articleTypes }
+  query            String                           // 用户的自然语言查询（V1.x 原始输入 / V2.0 Step 1 粗略想法）
+  filters          Json?                            // 高级筛选 { yearRange, targetCount, requireOpenAccess }
  
  // unifuncs 任务
  externalTaskId   String?  @map("external_task_id")
  
-  // 状态
-  status           String   @default("pending")     // pending/processing/completed/failed
+  // 状态: draft → pending → running → completed / failed
+  status           String   @default("pending")
  errorMessage     String?  @map("error_message")
  
-  // 结果
+  // V1.x 结果字段（保留向后兼容）
  resultCount      Int?     @map("result_count")
  rawResult        String?  @map("raw_result") @db.Text
-  reasoningContent String?  @map("reasoning_content") @db.Text  // AI思考过程
-  literatures      Json?                            // 解析后的文献列表
+  reasoningContent String?  @map("reasoning_content") @db.Text
+  literatures      Json?
  
-  // 统计（🔜 后续展示）
+  // 统计
  tokenUsage       Json?    @map("token_usage")
  searchCount      Int?     @map("search_count")
  readCount        Int?     @map("read_count")
@@ -513,6 +513,15 @@ model AslResearchTask {
  updatedAt        DateTime @updatedAt @map("updated_at")
  completedAt      DateTime? @map("completed_at")

+  // ── V2.0 新增字段 ──────────────────────────────
+  targetSources         Json?    @map("target_sources")          // 选中的数据源 ["https://pubmed.ncbi.nlm.nih.gov/", ...]
+  confirmedRequirement  String?  @map("confirmed_requirement") @db.Text  // 用户核验后的自然语言检索指令书
+  aiIntentSummary       Json?    @map("ai_intent_summary")       // PICOS + MeSH 结构化摘要
+  executionLogs         Json?    @map("execution_logs")          // 终端日志数组 [{type, title, text, ts}]
+  synthesisReport       String?  @map("synthesis_report") @db.Text // AI综合报告（Markdown）
+  resultList            Json?    @map("result_list")             // 结构化文献元数据列表
+
+  // ── 索引 ────────────────────────────
  @@index([projectId], map: "idx_research_tasks_project_id")
  @@index([userId], map: "idx_research_tasks_user_id")
  @@index([status], map: "idx_research_tasks_status")