feat(asl): Add Deep Research V2.0 development plan and Unifuncs API site coverage testing
Completed: - Unifuncs DeepSearch API site coverage test (18 medical sites, 9 tier-1 available) - ClinicalTrials.gov dedicated test (4 strategies, English query + depth>=10 works best) - Deep Research V2.0 development plan (5-day phased delivery) - DeepResearch engine capability guide (docs/02-common-capability/) - Test scripts: test-unifuncs-site-coverage.ts, test-unifuncs-clinicaltrials.ts Key findings: - Tier-1 sites: PubMed(28), ClinicalTrials(38), NCBI(18), Scholar(10), Cochrane(4), CNKI(7), SinoMed(9), GeenMedical(5), VIP(1) - Paid databases (WoS/Embase/Scopus/Ovid) cannot be accessed (no credential support) - ClinicalTrials.gov requires English queries with max_depth>=10 Updated: ASL module status doc, system status doc, common capability list Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
215
backend/scripts/test-unifuncs-clinicaltrials.ts
Normal file
215
backend/scripts/test-unifuncs-clinicaltrials.ts
Normal file
@@ -0,0 +1,215 @@
|
||||
/**
|
||||
* Unifuncs API — ClinicalTrials.gov 专项测试
|
||||
*
|
||||
* ClinicalTrials.gov 在上一轮测试中超时,本脚本用多种策略针对性测试
|
||||
*
|
||||
* 运行方式:
|
||||
* cd backend
|
||||
* npx tsx scripts/test-unifuncs-clinicaltrials.ts
|
||||
*/
|
||||
|
||||
const API_KEY = 'sk-2fNwqUH73elGq0aDKJEM4ReqP7Ry0iqHo4OXyidDe2WpQ9XQ';
|
||||
const BASE_URL = 'https://api.unifuncs.com/deepsearch/v1';
|
||||
const POLL_INTERVAL = 10000;
|
||||
const MAX_WAIT = 900000; // 15 分钟(上次 10 分钟超时,放宽到 15 分钟)
|
||||
|
||||
interface TestStrategy {
|
||||
id: string;
|
||||
name: string;
|
||||
query: string;
|
||||
domainScope: string[];
|
||||
maxDepth: number;
|
||||
introduction: string;
|
||||
}
|
||||
|
||||
const STRATEGIES: TestStrategy[] = [
|
||||
{
|
||||
id: 'A',
|
||||
name: '英文查询 + 限定 clinicaltrials.gov',
|
||||
query: 'Statin therapy for cardiovascular disease prevention, randomized controlled trials, recent 5 years',
|
||||
domainScope: ['https://clinicaltrials.gov/'],
|
||||
maxDepth: 10,
|
||||
introduction: 'You are a clinical research expert. Search ClinicalTrials.gov for relevant clinical trials. Return trial NCT numbers, titles, status, and links.',
|
||||
},
|
||||
{
|
||||
id: 'B',
|
||||
name: '英文查询 + 不限域名(观察是否自然覆盖 clinicaltrials.gov)',
|
||||
query: 'Find clinical trials on statins for cardiovascular disease prevention registered on ClinicalTrials.gov, list NCT numbers and trial details',
|
||||
domainScope: [],
|
||||
maxDepth: 10,
|
||||
introduction: 'You are a clinical research expert. Focus on finding registered clinical trials from ClinicalTrials.gov. Return NCT numbers, trial titles, status, and direct links to clinicaltrials.gov.',
|
||||
},
|
||||
{
|
||||
id: 'C',
|
||||
name: '中文查询 + 限定 clinicaltrials.gov + 高深度',
|
||||
query: '他汀类药物预防心血管疾病的临床试验,请在 ClinicalTrials.gov 上查找注册的 RCT,返回 NCT 编号和试验详情',
|
||||
domainScope: ['https://clinicaltrials.gov/'],
|
||||
maxDepth: 15,
|
||||
introduction: '你是临床试验检索专家。请在 ClinicalTrials.gov 上搜索相关临床试验,返回 NCT 编号、试验标题、状态、链接。',
|
||||
},
|
||||
{
|
||||
id: 'D',
|
||||
name: '简短英文查询 + 限定 clinicaltrials.gov + 低深度',
|
||||
query: 'statin cardiovascular RCT',
|
||||
domainScope: ['https://clinicaltrials.gov/'],
|
||||
maxDepth: 5,
|
||||
introduction: 'Search ClinicalTrials.gov for statin cardiovascular trials. Return NCT IDs and links.',
|
||||
},
|
||||
];
|
||||
|
||||
async function createTask(strategy: TestStrategy): Promise<{ taskId: string } | { error: string }> {
|
||||
const payload: any = {
|
||||
model: 's2',
|
||||
messages: [{ role: 'user', content: strategy.query }],
|
||||
introduction: strategy.introduction,
|
||||
max_depth: strategy.maxDepth,
|
||||
reference_style: 'link',
|
||||
generate_summary: true,
|
||||
};
|
||||
if (strategy.domainScope.length > 0) {
|
||||
payload.domain_scope = strategy.domainScope;
|
||||
}
|
||||
|
||||
try {
|
||||
const res = await fetch(`${BASE_URL}/create_task`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Authorization': `Bearer ${API_KEY}`,
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify(payload),
|
||||
});
|
||||
const json = await res.json() as any;
|
||||
if (json.code === 0 && json.data?.task_id) {
|
||||
return { taskId: json.data.task_id };
|
||||
}
|
||||
return { error: `API error: ${json.message || JSON.stringify(json)}` };
|
||||
} catch (err: any) {
|
||||
return { error: `Request failed: ${err.message}` };
|
||||
}
|
||||
}
|
||||
|
||||
async function queryTask(taskId: string): Promise<any> {
|
||||
const params = new URLSearchParams({ task_id: taskId });
|
||||
const res = await fetch(`${BASE_URL}/query_task?${params.toString()}`, {
|
||||
headers: { 'Authorization': `Bearer ${API_KEY}` },
|
||||
});
|
||||
return res.json();
|
||||
}
|
||||
|
||||
function sleep(ms: number): Promise<void> {
|
||||
return new Promise(resolve => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
function extractLinks(content: string): { ctLinks: string[]; nctIds: string[]; otherLinks: string[] } {
|
||||
const urlPattern = /https?:\/\/[^\s)\]"'>]+/gi;
|
||||
const allUrls = [...new Set(content.match(urlPattern) || [])];
|
||||
const ctLinks = allUrls.filter(u => u.includes('clinicaltrials.gov'));
|
||||
const otherLinks = allUrls.filter(u => !u.includes('clinicaltrials.gov'));
|
||||
|
||||
const nctPattern = /NCT\d{6,}/gi;
|
||||
const nctIds = [...new Set(content.match(nctPattern) || [])];
|
||||
|
||||
return { ctLinks, nctIds, otherLinks };
|
||||
}
|
||||
|
||||
async function testStrategy(strategy: TestStrategy): Promise<void> {
|
||||
const startTime = Date.now();
|
||||
console.log(`\n${'━'.repeat(80)}`);
|
||||
console.log(`策略 ${strategy.id}: ${strategy.name}`);
|
||||
console.log(`查询: "${strategy.query}"`);
|
||||
console.log(`domain_scope: ${strategy.domainScope.length > 0 ? strategy.domainScope.join(', ') : '不限'}`);
|
||||
console.log(`max_depth: ${strategy.maxDepth}`);
|
||||
console.log(`${'━'.repeat(80)}`);
|
||||
|
||||
const createResult = await createTask(strategy);
|
||||
if ('error' in createResult) {
|
||||
console.log(` ❌ 创建失败: ${createResult.error}`);
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(` → task_id: ${createResult.taskId}`);
|
||||
|
||||
const deadline = Date.now() + MAX_WAIT;
|
||||
let lastProgress = '';
|
||||
|
||||
while (Date.now() < deadline) {
|
||||
await sleep(POLL_INTERVAL);
|
||||
|
||||
try {
|
||||
const json = await queryTask(createResult.taskId) as any;
|
||||
const data = json.data;
|
||||
if (!data) continue;
|
||||
|
||||
const progress = data.progress ? `${data.progress.current}/${data.progress.total}` : '?';
|
||||
const stats = data.statistics || {};
|
||||
const statusLine = `${data.status} (${progress}) 搜索${stats.search_count || 0} 阅读${stats.read_count || 0} 迭代${stats.iterations || 0}`;
|
||||
|
||||
if (statusLine !== lastProgress) {
|
||||
const elapsed = ((Date.now() - startTime) / 1000).toFixed(0);
|
||||
console.log(` [${elapsed}s] ${statusLine}`);
|
||||
lastProgress = statusLine;
|
||||
}
|
||||
|
||||
if (data.status === 'completed') {
|
||||
const content = data.result?.content || '';
|
||||
const reasoning = data.result?.reasoning_content || '';
|
||||
const elapsed = ((Date.now() - startTime) / 1000).toFixed(0);
|
||||
|
||||
const { ctLinks, nctIds, otherLinks } = extractLinks(content);
|
||||
const { ctLinks: rCtLinks, nctIds: rNctIds } = extractLinks(reasoning);
|
||||
|
||||
console.log(`\n ✅ 完成 (${elapsed}s)`);
|
||||
console.log(` ├─ 内容长度: ${content.length} 字符`);
|
||||
console.log(` ├─ 推理长度: ${reasoning.length} 字符`);
|
||||
console.log(` ├─ 搜索/阅读: ${stats.search_count || 0}/${stats.read_count || 0}`);
|
||||
console.log(` ├─ ClinicalTrials 链接 (content): ${ctLinks.length} 个`);
|
||||
console.log(` ├─ ClinicalTrials 链接 (reasoning): ${rCtLinks.length} 个`);
|
||||
console.log(` ├─ NCT 编号 (content): ${nctIds.length} 个 → ${nctIds.slice(0, 10).join(', ')}${nctIds.length > 10 ? '...' : ''}`);
|
||||
console.log(` ├─ NCT 编号 (reasoning): ${rNctIds.length} 个`);
|
||||
console.log(` ├─ 其他链接: ${otherLinks.length} 个`);
|
||||
console.log(` └─ Token: ${JSON.stringify(stats.token_usage || {})}`);
|
||||
|
||||
if (ctLinks.length > 0) {
|
||||
console.log(`\n 📎 ClinicalTrials.gov 链接示例:`);
|
||||
ctLinks.slice(0, 8).forEach((link, i) => console.log(` ${i + 1}. ${link}`));
|
||||
}
|
||||
|
||||
if (ctLinks.length === 0 && nctIds.length === 0) {
|
||||
console.log(`\n ⚠️ 内容预览 (前 500 字):`);
|
||||
console.log(` ${content.slice(0, 500).replace(/\n/g, '\n ')}`);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
if (data.status === 'failed') {
|
||||
console.log(` ❌ 失败: ${data.result?.content || '未知错误'}`);
|
||||
return;
|
||||
}
|
||||
} catch (err: any) {
|
||||
// 轮询网络错误,继续重试
|
||||
}
|
||||
}
|
||||
|
||||
console.log(` ⏰ 超时 (${MAX_WAIT / 1000}s)`);
|
||||
}
|
||||
|
||||
async function main() {
|
||||
console.log('╔════════════════════════════════════════════════════════════╗');
|
||||
console.log('║ ClinicalTrials.gov 专项测试 — 4 种策略并行 ║');
|
||||
console.log('╚════════════════════════════════════════════════════════════╝');
|
||||
console.log(`超时: ${MAX_WAIT / 1000}s | 轮询: ${POLL_INTERVAL / 1000}s`);
|
||||
|
||||
// 并行执行所有策略
|
||||
await Promise.all(STRATEGIES.map(s => testStrategy(s)));
|
||||
|
||||
console.log(`\n${'═'.repeat(80)}`);
|
||||
console.log('所有策略测试完成!');
|
||||
console.log(`${'═'.repeat(80)}`);
|
||||
}
|
||||
|
||||
main().catch(err => {
|
||||
console.error('脚本执行失败:', err);
|
||||
process.exit(1);
|
||||
});
|
||||
374
backend/scripts/test-unifuncs-site-coverage.ts
Normal file
374
backend/scripts/test-unifuncs-site-coverage.ts
Normal file
@@ -0,0 +1,374 @@
|
||||
/**
|
||||
* Unifuncs API 网站覆盖能力测试脚本
|
||||
*
|
||||
* 测试 unifuncs DeepSearch 对中国医生常用医学期刊网站的搜索能力
|
||||
* 使用异步模式(create_task + query_task)并行测试所有站点
|
||||
*
|
||||
* 运行方式:
|
||||
* cd backend
|
||||
* npx tsx scripts/test-unifuncs-site-coverage.ts
|
||||
*/
|
||||
|
||||
// ========== 配置 ==========
|
||||
const API_KEY = 'sk-2fNwqUH73elGq0aDKJEM4ReqP7Ry0iqHo4OXyidDe2WpQ9XQ';
|
||||
const BASE_URL = 'https://api.unifuncs.com/deepsearch/v1';
|
||||
const MAX_DEPTH = 5; // 测试用低深度,加快速度
|
||||
const POLL_INTERVAL = 10000; // 10s 轮询间隔
|
||||
const MAX_WAIT = 600000; // 单任务最长等待 10 分钟
|
||||
const QUERY = '他汀类药物预防心血管疾病的随机对照试验和Meta分析,近5年高质量研究';
|
||||
|
||||
// ========== 测试站点列表 ==========
|
||||
interface TestSite {
|
||||
id: number;
|
||||
name: string;
|
||||
url: string;
|
||||
priority: 'top7' | 'other';
|
||||
category: 'english' | 'chinese';
|
||||
}
|
||||
|
||||
const TEST_SITES: TestSite[] = [
|
||||
// ── Top 7 最关注 ──
|
||||
{ id: 1, name: 'PubMed', url: 'https://pubmed.ncbi.nlm.nih.gov/', priority: 'top7', category: 'english' },
|
||||
{ id: 2, name: 'ClinicalTrials.gov', url: 'https://clinicaltrials.gov/', priority: 'top7', category: 'english' },
|
||||
{ id: 3, name: '中华医学期刊网', url: 'https://medjournals.cn/', priority: 'top7', category: 'chinese' },
|
||||
{ id: 4, name: '中国知网 CNKI', url: 'https://www.cnki.net/', priority: 'top7', category: 'chinese' },
|
||||
{ id: 5, name: '万方数据', url: 'https://www.wanfangdata.com.cn/', priority: 'top7', category: 'chinese' },
|
||||
{ id: 6, name: '维普 VIP', url: 'https://www.cqvip.com/', priority: 'top7', category: 'chinese' },
|
||||
{ id: 7, name: '中国临床试验注册中心', url: 'http://www.chictr.org.cn/', priority: 'top7', category: 'chinese' },
|
||||
// ── 其他常用 ──
|
||||
{ id: 8, name: 'CBM/SinoMed', url: 'http://www.sinomed.ac.cn/', priority: 'other', category: 'chinese' },
|
||||
{ id: 9, name: 'Web of Science', url: 'https://www.webofscience.com/', priority: 'other', category: 'english' },
|
||||
{ id: 10, name: 'Embase', url: 'http://www.embase.com/', priority: 'other', category: 'english' },
|
||||
{ id: 11, name: 'Cochrane Library', url: 'https://www.cochranelibrary.com/', priority: 'other', category: 'english' },
|
||||
{ id: 12, name: 'Google Scholar', url: 'https://scholar.google.com/', priority: 'other', category: 'english' },
|
||||
{ id: 13, name: 'Ovid', url: 'http://ovidsp.ovid.com/', priority: 'other', category: 'english' },
|
||||
{ id: 14, name: 'Scopus', url: 'https://www.scopus.com/', priority: 'other', category: 'english' },
|
||||
{ id: 15, name: '中国中医药数据库', url: 'https://cintmed.cintcm.cn/', priority: 'other', category: 'chinese' },
|
||||
{ id: 16, name: 'GeenMedical', url: 'https://www.geenmedical.com/', priority: 'other', category: 'english' },
|
||||
{ id: 17, name: 'NSTL 国家科技图书文献中心', url: 'https://www.nstl.gov.cn/', priority: 'other', category: 'chinese' },
|
||||
{ id: 18, name: 'NCBI (全站)', url: 'https://www.ncbi.nlm.nih.gov/', priority: 'other', category: 'english' },
|
||||
];
|
||||
|
||||
// ========== 结果结构 ==========
|
||||
interface TaskResult {
|
||||
site: TestSite;
|
||||
taskId: string | null;
|
||||
status: 'success' | 'failed' | 'timeout' | 'create_error';
|
||||
searchCount: number;
|
||||
readCount: number;
|
||||
iterations: number;
|
||||
contentLength: number;
|
||||
reasoningLength: number;
|
||||
referencesFound: number; // 在 content 中找到的该站点链接数
|
||||
otherLinksFound: number; // 找到的其他链接数
|
||||
durationSec: number;
|
||||
errorMessage: string;
|
||||
sampleLinks: string[]; // 找到的前 5 个链接
|
||||
}
|
||||
|
||||
// ========== API 封装 ==========
|
||||
|
||||
async function createTask(site: TestSite): Promise<{ taskId: string } | { error: string }> {
|
||||
const payload = {
|
||||
model: 's2',
|
||||
messages: [{ role: 'user', content: QUERY }],
|
||||
introduction: '你是一名专业的临床研究文献检索专家。请在指定数据库中尽可能多地检索相关文献,输出每篇文献的标题、作者、年份、链接。',
|
||||
max_depth: MAX_DEPTH,
|
||||
domain_scope: [site.url],
|
||||
reference_style: 'link',
|
||||
generate_summary: true,
|
||||
};
|
||||
|
||||
try {
|
||||
const res = await fetch(`${BASE_URL}/create_task`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Authorization': `Bearer ${API_KEY}`,
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify(payload),
|
||||
});
|
||||
|
||||
const json = await res.json() as any;
|
||||
if (json.code === 0 && json.data?.task_id) {
|
||||
return { taskId: json.data.task_id };
|
||||
}
|
||||
return { error: `API 返回错误: ${json.message || JSON.stringify(json)}` };
|
||||
} catch (err: any) {
|
||||
return { error: `请求失败: ${err.message}` };
|
||||
}
|
||||
}
|
||||
|
||||
async function queryTask(taskId: string): Promise<any> {
|
||||
const params = new URLSearchParams({ task_id: taskId });
|
||||
const res = await fetch(`${BASE_URL}/query_task?${params.toString()}`, {
|
||||
headers: { 'Authorization': `Bearer ${API_KEY}` },
|
||||
});
|
||||
return res.json();
|
||||
}
|
||||
|
||||
function sleep(ms: number): Promise<void> {
|
||||
return new Promise(resolve => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
// ========== 链接提取 ==========
|
||||
|
||||
function extractDomainLinks(content: string, siteUrl: string): string[] {
|
||||
const domain = new URL(siteUrl).hostname.replace('www.', '');
|
||||
const urlPattern = /https?:\/\/[^\s)\]"'>]+/gi;
|
||||
const allUrls = content.match(urlPattern) || [];
|
||||
return [...new Set(allUrls.filter(u => u.includes(domain)))];
|
||||
}
|
||||
|
||||
function extractAllLinks(content: string): string[] {
|
||||
const urlPattern = /https?:\/\/[^\s)\]"'>]+/gi;
|
||||
return [...new Set(content.match(urlPattern) || [])];
|
||||
}
|
||||
|
||||
// ========== 单站点完整流程 ==========
|
||||
|
||||
async function testSingleSite(site: TestSite): Promise<TaskResult> {
|
||||
const startTime = Date.now();
|
||||
const baseResult: TaskResult = {
|
||||
site,
|
||||
taskId: null,
|
||||
status: 'failed',
|
||||
searchCount: 0,
|
||||
readCount: 0,
|
||||
iterations: 0,
|
||||
contentLength: 0,
|
||||
reasoningLength: 0,
|
||||
referencesFound: 0,
|
||||
otherLinksFound: 0,
|
||||
durationSec: 0,
|
||||
errorMessage: '',
|
||||
sampleLinks: [],
|
||||
};
|
||||
|
||||
// 1. 创建任务
|
||||
console.log(` [${site.id.toString().padStart(2)}] ${site.name} → 创建任务...`);
|
||||
const createResult = await createTask(site);
|
||||
|
||||
if ('error' in createResult) {
|
||||
baseResult.status = 'create_error';
|
||||
baseResult.errorMessage = createResult.error;
|
||||
baseResult.durationSec = (Date.now() - startTime) / 1000;
|
||||
console.log(` [${site.id.toString().padStart(2)}] ${site.name} ✗ 创建失败: ${createResult.error}`);
|
||||
return baseResult;
|
||||
}
|
||||
|
||||
baseResult.taskId = createResult.taskId;
|
||||
console.log(` [${site.id.toString().padStart(2)}] ${site.name} → task_id: ${createResult.taskId}`);
|
||||
|
||||
// 2. 轮询直到完成
|
||||
const deadline = Date.now() + MAX_WAIT;
|
||||
let lastStatus = '';
|
||||
|
||||
while (Date.now() < deadline) {
|
||||
await sleep(POLL_INTERVAL);
|
||||
|
||||
try {
|
||||
const json = await queryTask(createResult.taskId) as any;
|
||||
const data = json.data;
|
||||
|
||||
if (!data) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const currentStatus = data.status;
|
||||
if (currentStatus !== lastStatus) {
|
||||
const progress = data.progress ? `${data.progress.current}/${data.progress.total}` : '?';
|
||||
const stats = data.statistics
|
||||
? `搜索${data.statistics.search_count || 0} 阅读${data.statistics.read_count || 0}`
|
||||
: '';
|
||||
console.log(` [${site.id.toString().padStart(2)}] ${site.name} → ${currentStatus} (${progress}) ${stats}`);
|
||||
lastStatus = currentStatus;
|
||||
}
|
||||
|
||||
if (currentStatus === 'completed') {
|
||||
const content = data.result?.content || '';
|
||||
const reasoning = data.result?.reasoning_content || '';
|
||||
const stats = data.statistics || {};
|
||||
|
||||
const siteLinks = extractDomainLinks(content, site.url);
|
||||
const allLinks = extractAllLinks(content);
|
||||
|
||||
baseResult.status = 'success';
|
||||
baseResult.contentLength = content.length;
|
||||
baseResult.reasoningLength = reasoning.length;
|
||||
baseResult.searchCount = stats.search_count || 0;
|
||||
baseResult.readCount = stats.read_count || 0;
|
||||
baseResult.iterations = stats.iterations || 0;
|
||||
baseResult.referencesFound = siteLinks.length;
|
||||
baseResult.otherLinksFound = allLinks.length - siteLinks.length;
|
||||
baseResult.sampleLinks = siteLinks.slice(0, 5);
|
||||
baseResult.durationSec = (Date.now() - startTime) / 1000;
|
||||
|
||||
console.log(` [${site.id.toString().padStart(2)}] ${site.name} ✓ 完成 | 站内链接:${siteLinks.length} 其他链接:${allLinks.length - siteLinks.length} | ${baseResult.durationSec.toFixed(0)}s`);
|
||||
return baseResult;
|
||||
}
|
||||
|
||||
if (currentStatus === 'failed') {
|
||||
baseResult.status = 'failed';
|
||||
baseResult.errorMessage = data.result?.content || '任务失败';
|
||||
baseResult.durationSec = (Date.now() - startTime) / 1000;
|
||||
console.log(` [${site.id.toString().padStart(2)}] ${site.name} ✗ 失败: ${baseResult.errorMessage.slice(0, 80)}`);
|
||||
return baseResult;
|
||||
}
|
||||
} catch (err: any) {
|
||||
// 轮询中的网络错误,继续重试
|
||||
}
|
||||
}
|
||||
|
||||
// 超时
|
||||
baseResult.status = 'timeout';
|
||||
baseResult.errorMessage = `超时(${MAX_WAIT / 1000}s)`;
|
||||
baseResult.durationSec = (Date.now() - startTime) / 1000;
|
||||
console.log(` [${site.id.toString().padStart(2)}] ${site.name} ⏰ 超时`);
|
||||
return baseResult;
|
||||
}
|
||||
|
||||
// ========== 结果报告 ==========
|
||||
|
||||
function printReport(results: TaskResult[]) {
|
||||
console.log('\n');
|
||||
console.log('='.repeat(100));
|
||||
console.log(' Unifuncs DeepSearch API 网站覆盖能力测试报告');
|
||||
console.log('='.repeat(100));
|
||||
console.log(`测试时间: ${new Date().toISOString()}`);
|
||||
console.log(`测试查询: "${QUERY}"`);
|
||||
console.log(`配置: max_depth=${MAX_DEPTH}, poll_interval=${POLL_INTERVAL / 1000}s`);
|
||||
console.log('');
|
||||
|
||||
// ── Top 7 结果 ──
|
||||
console.log('━'.repeat(100));
|
||||
console.log(' ★ Top 7 最关注站点');
|
||||
console.log('━'.repeat(100));
|
||||
printTable(results.filter(r => r.site.priority === 'top7'));
|
||||
|
||||
// ── 其他结果 ──
|
||||
console.log('');
|
||||
console.log('━'.repeat(100));
|
||||
console.log(' 其他常用站点');
|
||||
console.log('━'.repeat(100));
|
||||
printTable(results.filter(r => r.site.priority === 'other'));
|
||||
|
||||
// ── 汇总 ──
|
||||
console.log('');
|
||||
console.log('━'.repeat(100));
|
||||
console.log(' 汇总统计');
|
||||
console.log('━'.repeat(100));
|
||||
|
||||
const successSites = results.filter(r => r.status === 'success' && r.referencesFound > 0);
|
||||
const reachableSites = results.filter(r => r.status === 'success');
|
||||
const failedSites = results.filter(r => r.status !== 'success');
|
||||
|
||||
console.log(` 可搜索并返回站内链接: ${successSites.length}/${results.length} 个站点`);
|
||||
console.log(` 可到达但无站内链接: ${reachableSites.length - successSites.length} 个站点`);
|
||||
console.log(` 不可用/失败/超时: ${failedSites.length} 个站点`);
|
||||
console.log('');
|
||||
|
||||
if (successSites.length > 0) {
|
||||
console.log(' ✅ 确认可搜索的站点:');
|
||||
for (const r of successSites) {
|
||||
console.log(` - ${r.site.name} (${r.site.url}) → ${r.referencesFound} 个站内链接`);
|
||||
}
|
||||
}
|
||||
|
||||
console.log('');
|
||||
|
||||
const noLinkSites = reachableSites.filter(r => r.referencesFound === 0);
|
||||
if (noLinkSites.length > 0) {
|
||||
console.log(' ⚠️ 任务完成但无站内链接(可能搜索到了但链接指向其他站点):');
|
||||
for (const r of noLinkSites) {
|
||||
console.log(` - ${r.site.name} (${r.site.url}) → 其他链接 ${r.otherLinksFound} 个`);
|
||||
if (r.sampleLinks.length === 0) {
|
||||
const allLinks = r.otherLinksFound;
|
||||
console.log(` 内容长度: ${r.contentLength} 字符, 搜索${r.searchCount}次, 阅读${r.readCount}次`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
console.log('');
|
||||
|
||||
if (failedSites.length > 0) {
|
||||
console.log(' ❌ 不可用站点:');
|
||||
for (const r of failedSites) {
|
||||
console.log(` - ${r.site.name} (${r.site.url}) → ${r.status}: ${r.errorMessage.slice(0, 100)}`);
|
||||
}
|
||||
}
|
||||
|
||||
console.log('');
|
||||
console.log('='.repeat(100));
|
||||
|
||||
// ── 输出可用于前端配置的 JSON ──
|
||||
console.log('');
|
||||
console.log('前端可用数据源配置(可直接用于 V2.0 数据源选择):');
|
||||
console.log('');
|
||||
const configList = results.map(r => ({
|
||||
name: r.site.name,
|
||||
url: r.site.url,
|
||||
category: r.site.category,
|
||||
available: r.status === 'success' && r.referencesFound > 0,
|
||||
reachable: r.status === 'success',
|
||||
siteLinksFound: r.referencesFound,
|
||||
searchCount: r.searchCount,
|
||||
readCount: r.readCount,
|
||||
}));
|
||||
console.log(JSON.stringify(configList, null, 2));
|
||||
}
|
||||
|
||||
function printTable(results: TaskResult[]) {
|
||||
const header = ' 序号 | 状态 | 站点名称 | 站内链接 | 其他链接 | 搜索/阅读 | 耗时 | 说明';
|
||||
console.log(header);
|
||||
console.log(' ' + '-'.repeat(header.length - 2));
|
||||
|
||||
for (const r of results) {
|
||||
const statusIcon =
|
||||
r.status === 'success' && r.referencesFound > 0 ? '✅' :
|
||||
r.status === 'success' ? '⚠️' :
|
||||
r.status === 'timeout' ? '⏰' : '❌';
|
||||
|
||||
const note = r.status !== 'success'
|
||||
? r.errorMessage.slice(0, 25)
|
||||
: r.referencesFound > 0
|
||||
? r.sampleLinks[0]?.slice(0, 35) || ''
|
||||
: `内容${r.contentLength}字`;
|
||||
|
||||
console.log(
|
||||
` ${r.site.id.toString().padStart(4)} | ${statusIcon} | ` +
|
||||
`${(r.site.name).padEnd(24)} | ` +
|
||||
`${r.referencesFound.toString().padStart(8)} | ` +
|
||||
`${r.otherLinksFound.toString().padStart(8)} | ` +
|
||||
`${r.searchCount}/${r.readCount}`.padStart(9) + ' | ' +
|
||||
`${r.durationSec.toFixed(0).padStart(5)}s | ` +
|
||||
note
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// ========== 主入口 ==========
|
||||
|
||||
async function main() {
|
||||
console.log('╔════════════════════════════════════════════════════════════╗');
|
||||
console.log('║ Unifuncs DeepSearch API — 医学网站覆盖能力测试 ║');
|
||||
console.log('╚════════════════════════════════════════════════════════════╝');
|
||||
console.log('');
|
||||
console.log(`查询: "${QUERY}"`);
|
||||
console.log(`站点数: ${TEST_SITES.length} | max_depth: ${MAX_DEPTH} | 超时: ${MAX_WAIT / 1000}s`);
|
||||
console.log(`API: ${BASE_URL}`);
|
||||
console.log('');
|
||||
console.log('并行创建所有任务...\n');
|
||||
|
||||
// 并行创建所有任务并执行
|
||||
const promises = TEST_SITES.map(site => testSingleSite(site));
|
||||
const results = await Promise.all(promises);
|
||||
|
||||
// 输出报告
|
||||
printReport(results);
|
||||
}
|
||||
|
||||
main().catch(err => {
|
||||
console.error('脚本执行失败:', err);
|
||||
process.exit(1);
|
||||
});
|
||||
@@ -1,10 +1,11 @@
|
||||
# AIclinicalresearch 系统当前状态与开发指南
|
||||
|
||||
> **文档版本:** v5.9
|
||||
> **文档版本:** v6.0
|
||||
> **创建日期:** 2025-11-28
|
||||
> **维护者:** 开发团队
|
||||
> **最后更新:** 2026-02-22
|
||||
> **🎉 重大里程碑:**
|
||||
> - **🆕 2026-02-22:ASL Deep Research V2.0 开发计划确认 + Unifuncs API 网站覆盖测试完成!** 18站点实测,9个一级可用,ClinicalTrials.gov 专项验证通过
|
||||
> - **🆕 2026-02-22:SSA Phase I-IV 开发完成!** Session 黑板 + 对话层 LLM + 方法咨询 + 对话驱动分析,E2E 107/107 通过
|
||||
> - **2026-02-21:SSA QPER 智能化主线闭环完成!** Q→P→E→R 四层架构全部开发完成,端到端 40/40 测试通过
|
||||
> - **2026-02-20:SSA Phase 2A 前端集成完成!** 多步骤工作流端到端 + V11 UI联调 + Block-based 架构共识
|
||||
@@ -25,11 +26,13 @@
|
||||
> - **2026-01-24:Protocol Agent 框架完成!** 可复用Agent框架+5阶段对话流程
|
||||
> - **2026-01-22:OSS 存储集成完成!** 阿里云 OSS 正式接入平台基础层
|
||||
>
|
||||
> **🆕 最新进展(SSA Phase I-IV 开发完成 2026-02-22):**
|
||||
> - ✅ **🎉 Phase I-IV 全部开发完成** — Session 黑板 + 意图路由器 + 对话层 LLM + 方法咨询 + AskUser 标准化 + 对话驱动分析 + QPER 集成
|
||||
> - ✅ **E2E 测试全部通过** — Phase I 31/31 + Phase II 38/38 + Phase III 13/13 + Phase IV 25/25 = 共 107 项
|
||||
> - ✅ **团队审查全部落地** — Phase II H1-H4 + Phase III H1-H3+P1 + Phase IV H1-H3+B1-B2,共 12 条反馈全部实现
|
||||
> - ✅ **开发计划 v1.8** — Phase I-IV 完成(99h),剩余 Phase V(18h) + Phase VI(10h)
|
||||
> **🆕 最新进展(ASL V2.0 + SSA Phase I-IV 2026-02-22):**
|
||||
> - ✅ **🎉 ASL Deep Research V2.0 开发计划确认** — 四步瀑布流 + 异步模式 + HITL + 多站点搜索 + Word 导出
|
||||
> - ✅ **Unifuncs API 网站覆盖测试完成** — 18 站点实测,9 个一级可用(PubMed/ClinicalTrials/NCBI/Scholar/Cochrane/CNKI/SinoMed/GeenMedical/维普)
|
||||
> - ✅ **ClinicalTrials.gov 专项验证通过** — 英文查询 + max_depth≥10,120s 获取 38 个 NCT 编号链接
|
||||
> - ✅ **DeepSearch 通用能力指南发布** — `docs/02-通用能力层/04-DeepResearch引擎/`
|
||||
> - ✅ **🎉 SSA Phase I-IV 全部开发完成** — Session 黑板 + 意图路由器 + 对话层 LLM + 方法咨询 + AskUser 标准化 + 对话驱动分析 + QPER 集成
|
||||
> - ✅ **SSA E2E 测试全部通过** — Phase I 31/31 + Phase II 38/38 + Phase III 13/13 + Phase IV 25/25 = 共 107 项
|
||||
>
|
||||
> **部署状态:** ✅ 生产环境运行中 | 公网地址:http://8.140.53.236/
|
||||
> **REDCap 状态:** ✅ 生产环境运行中 | 地址:https://redcap.xunzhengyixue.com/
|
||||
@@ -67,7 +70,7 @@
|
||||
|---------|---------|---------|---------|---------|--------|
|
||||
| **AIA** | AI智能问答 | 12个智能体 + Protocol Agent(全流程方案) | ⭐⭐⭐⭐⭐ | 🎉 **V3.1 MVP完整交付(90%)** - 一键生成+Word导出 | **P0** |
|
||||
| **PKB** | 个人知识库 | RAG问答、私人文献库 | ⭐⭐⭐ | 🎉 **Dify已替换!自研RAG上线(95%)** | P1 |
|
||||
| **ASL** | AI智能文献 | 文献筛选、Meta分析、证据图谱 | ⭐⭐⭐⭐⭐ | 🎉 **智能检索MVP完成(60%)** - DeepSearch集成 | **P0** |
|
||||
| **ASL** | AI智能文献 | 文献筛选、Deep Research、证据图谱 | ⭐⭐⭐⭐⭐ | 🚀 **V2.0 计划确认(65%)** - Unifuncs 18站实测 + 5天开发计划 | **P0** |
|
||||
| **DC** | 数据清洗整理 | ETL + 医学NER(百万行级数据) | ⭐⭐⭐⭐⭐ | ✅ **Tool B完成 + Tool C 99%(异步架构+性能优化-99%+多指标转换+7大功能)** | **P0** |
|
||||
| **IIT** | IIT Manager Agent | AI驱动IIT研究助手 - 双脑架构+REDCap集成 | ⭐⭐⭐⭐⭐ | 🎉 **事件级质控V3.1完成(设计100%,代码60%)** | **P0** |
|
||||
| **SSA** | 智能统计分析 | **QPER架构** + 四层七工具 + 对话层LLM + 意图路由器 | ⭐⭐⭐⭐⭐ | 🎉 **Phase I-IV 开发完成** — QPER闭环 + Session黑板 + 意图路由 + 对话LLM + 方法咨询 + 对话驱动分析,E2E 107/107 | **P1** |
|
||||
@@ -1459,7 +1462,7 @@ npm run dev # http://localhost:3000
|
||||
|
||||
### 模块完成度
|
||||
- ✅ **已完成**:AIA V2.0(85%,核心功能完成)、平台基础层(100%)、RVW(95%)、通用能力层升级(100%)、**PKB(95%,Dify已替换)** 🎉
|
||||
- 🚧 **开发中**:ASL(80%)、DC(Tool C 98%,Tool B后端100%,Tool B前端0%)、IIT(60%,Phase 1.5完成)、**SSA(QPER主线100% + Phase I-IV 全部完成,E2E 107/107,Phase VI 待启动)** 🎉
|
||||
- 🚧 **开发中**:**ASL(65%,V2.0 计划确认 + 18站Unifuncs测试完成)**、DC(Tool C 98%,Tool B后端100%,Tool B前端0%)、IIT(60%,Phase 1.5完成)、**SSA(QPER主线100% + Phase I-IV 全部完成,E2E 107/107,Phase VI 待启动)** 🎉
|
||||
- 📋 **未开始**:ST
|
||||
|
||||
### 部署完成度
|
||||
|
||||
@@ -38,6 +38,7 @@
|
||||
| **认证授权** | `common/auth/` | ✅ | JWT认证 + 权限控制 |
|
||||
| **Prompt管理** | `common/prompt/` | ✅ | 动态Prompt配置 |
|
||||
| **🆕R统计引擎** | `r-statistics-service/` | ✅ | Docker化R统计服务(plumber) |
|
||||
| **🆕DeepResearch引擎** | `common/deepsearch/` | ✅ | Unifuncs DeepSearch API封装(9站点验证) |
|
||||
|
||||
### 前端通用能力
|
||||
|
||||
|
||||
@@ -0,0 +1,295 @@
|
||||
# Unifuncs DeepSearch API 使用指南
|
||||
|
||||
> **文档版本:** v1.0
|
||||
> **创建日期:** 2026-02-22
|
||||
> **维护者:** 开发团队
|
||||
> **文档目的:** 指导业务模块正确使用 Unifuncs DeepSearch API,明确可用网站与最佳策略
|
||||
|
||||
---
|
||||
|
||||
## 1. 概述
|
||||
|
||||
Unifuncs DeepSearch 是一个 AI 驱动的深度搜索引擎,可以在指定的网站范围内自动搜索、阅读和汇总信息。在本平台中,它作为**通用能力层**的一部分,为文献检索、临床试验查找等场景提供底层搜索能力。
|
||||
|
||||
### 核心能力
|
||||
- 自然语言输入 → AI 自动生成搜索策略
|
||||
- 多轮迭代搜索(最大深度可配置)
|
||||
- 自动阅读网页内容并提取关键信息
|
||||
- 返回结构化结果 + 综合报告
|
||||
|
||||
### API 基础信息
|
||||
|
||||
| 项目 | 值 |
|
||||
|------|---|
|
||||
| 基础 URL | `https://api.unifuncs.com/deepsearch/v1` |
|
||||
| 模型 | `s2` |
|
||||
| 认证 | `Authorization: Bearer {UNIFUNCS_API_KEY}` |
|
||||
| 环境变量 | `UNIFUNCS_API_KEY`(已配置在 `backend/.env`) |
|
||||
|
||||
---
|
||||
|
||||
## 2. 网站覆盖能力(2026-02-22 实测)
|
||||
|
||||
### 2.1 测试条件
|
||||
|
||||
- **查询**:他汀类药物预防心血管疾病的随机对照试验和Meta分析,近5年高质量研究
|
||||
- **配置**:max_depth=5,异步模式(create_task + query_task)
|
||||
- **ClinicalTrials.gov 专项**:4 种策略对比测试,max_depth=5~15
|
||||
|
||||
### 2.2 可用性分级
|
||||
|
||||
#### 一级:确认可搜索(返回站内直接链接)
|
||||
|
||||
| 站点 | 域名 | 类型 | 站内链接数 | 搜索/阅读 | 最佳策略 |
|
||||
|------|------|------|-----------|-----------|---------|
|
||||
| **PubMed** | pubmed.ncbi.nlm.nih.gov | 英文 | 28 | 9/29 | 中/英文查询均可,效果最佳 |
|
||||
| **NCBI/PMC** | www.ncbi.nlm.nih.gov | 英文 | 18 | 24/19 | 含 PMC 全文链接 |
|
||||
| **ClinicalTrials.gov** | clinicaltrials.gov | 英文 | 38 | 6/24 | **必须英文查询**,max_depth≥10 |
|
||||
| **Google Scholar** | scholar.google.com | 英文 | 10 | 22/26 | 跨库聚合搜索 |
|
||||
| **CBM/SinoMed** | www.sinomed.ac.cn | 中文 | 9 | 17/12 | 中文生物医学文献数据库 |
|
||||
| **中国知网 CNKI** | www.cnki.net | 中文 | 7 | 40/6 | 中文核心期刊 |
|
||||
| **GeenMedical** | www.geenmedical.com | 英文 | 5 | 38/3 | 医学搜索聚合引擎 |
|
||||
| **Cochrane Library** | www.cochranelibrary.com | 英文 | 4 | 38/12 | 系统综述金标准 |
|
||||
| **维普 VIP** | www.cqvip.com | 中文 | 1 | 33/3 | 可用但链接较少 |
|
||||
|
||||
#### 二级:可到达但链接间接(搜索到内容,但返回链接不指向该站点域名)
|
||||
|
||||
| 站点 | 域名 | 类型 | 其他链接数 | 说明 |
|
||||
|------|------|------|-----------|------|
|
||||
| 中华医学期刊网 | medjournals.cn | 中文 | 12 | 搜索活跃(41次),内容丰富但链接跳转 |
|
||||
| 万方数据 | www.wanfangdata.com.cn | 中文 | 7 | 搜索活跃(42次),链接可能转跳 |
|
||||
| 中国临床试验注册中心 | www.chictr.org.cn | 中文 | 7 | 有内容产出,链接指向其他站 |
|
||||
| 中国中医药数据库 | cintmed.cintcm.cn | 中文 | 22 | 内容最丰富(8631字),链接非直达 |
|
||||
| Scopus | www.scopus.com | 英文 | 15 | 付费墙限制,内容来自外部引用 |
|
||||
| Embase | www.embase.com | 英文 | 14 | 需机构登录 |
|
||||
| Web of Science | www.webofscience.com | 英文 | 6 | 付费墙限制 |
|
||||
|
||||
#### 三级:不可用或受限
|
||||
|
||||
| 站点 | 域名 | 说明 |
|
||||
|------|------|------|
|
||||
| Ovid | ovidsp.ovid.com | 仅搜索未读取内容,需机构登录 |
|
||||
| NSTL | www.nstl.gov.cn | 搜索到但无有效内容和链接 |
|
||||
|
||||
### 2.3 关键发现
|
||||
|
||||
1. **付费库无法穿透**:Unifuncs 只能访问公开可达的网页内容,不支持传入用户名密码。Web of Science、Embase、Scopus、Ovid 等需要机构 IP 或账号登录的库无法直接搜索。
|
||||
|
||||
2. **ClinicalTrials.gov 必须用英文**:该站点为纯英文网站,中文查询效率极低。使用英文查询 + max_depth≥10 时,可稳定返回 30+ 个 NCT 编号和链接。
|
||||
|
||||
3. **中文库表现不一**:CNKI 和 SinoMed 效果较好,能直接返回站内链接;万方和中华医学期刊网可到达但链接不直达。
|
||||
|
||||
---
|
||||
|
||||
## 3. 两种调用模式
|
||||
|
||||
### 3.1 OpenAI 兼容协议(流式,适合实时展示)
|
||||
|
||||
```typescript
|
||||
import OpenAI from 'openai';
|
||||
|
||||
const client = new OpenAI({
|
||||
baseURL: 'https://api.unifuncs.com/deepsearch/v1',
|
||||
apiKey: process.env.UNIFUNCS_API_KEY,
|
||||
});
|
||||
|
||||
const stream = await client.chat.completions.create({
|
||||
model: 's2',
|
||||
messages: [{ role: 'user', content: query }],
|
||||
stream: true,
|
||||
introduction: '你是一名专业的临床研究文献检索专家',
|
||||
max_depth: 15,
|
||||
domain_scope: ['https://pubmed.ncbi.nlm.nih.gov/'],
|
||||
domain_blacklist: [],
|
||||
reference_style: 'link',
|
||||
} as any);
|
||||
|
||||
for await (const chunk of stream) {
|
||||
const delta = chunk.choices[0]?.delta;
|
||||
if ((delta as any)?.reasoning_content) {
|
||||
// AI 思考过程(逐字流式)
|
||||
}
|
||||
if (delta?.content) {
|
||||
// 最终结果内容(逐字流式)
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**优点:** 实时展示 AI 思考过程,用户体验好
|
||||
**缺点:** 连接不稳定,离开页面任务丢失,长任务容易超时
|
||||
|
||||
### 3.2 异步模式(推荐用于 V2.0)
|
||||
|
||||
#### 创建任务
|
||||
|
||||
```typescript
|
||||
const payload = {
|
||||
model: 's2',
|
||||
messages: [{ role: 'user', content: query }],
|
||||
introduction: '你是一名专业的临床研究文献检索专家',
|
||||
max_depth: 15,
|
||||
domain_scope: ['https://pubmed.ncbi.nlm.nih.gov/'],
|
||||
domain_blacklist: [],
|
||||
reference_style: 'link',
|
||||
generate_summary: true,
|
||||
output_prompt: '请输出结构化报告和文献列表',
|
||||
};
|
||||
|
||||
const res = await fetch('https://api.unifuncs.com/deepsearch/v1/create_task', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Authorization': `Bearer ${UNIFUNCS_API_KEY}`,
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify(payload),
|
||||
});
|
||||
|
||||
const { data } = await res.json();
|
||||
// data.task_id → 保存到数据库
|
||||
```
|
||||
|
||||
#### 轮询任务
|
||||
|
||||
```typescript
|
||||
const params = new URLSearchParams({ task_id: taskId });
|
||||
const res = await fetch(
|
||||
`https://api.unifuncs.com/deepsearch/v1/query_task?${params}`,
|
||||
{ headers: { 'Authorization': `Bearer ${UNIFUNCS_API_KEY}` } }
|
||||
);
|
||||
|
||||
const { data } = await res.json();
|
||||
// data.status: pending / processing / completed / failed
|
||||
// data.result.content: 最终结果
|
||||
// data.result.reasoning_content: AI 思考过程(增量)
|
||||
// data.progress: { current, total, message }
|
||||
// data.statistics: { iterations, search_count, read_count, token_usage }
|
||||
```
|
||||
|
||||
**优点:** 任务持久化,离开页面不中断,可恢复,适合长任务
|
||||
**缺点:** 非实时,需要轮询获取进度
|
||||
|
||||
---
|
||||
|
||||
## 4. 关键参数说明
|
||||
|
||||
| 参数 | 类型 | 推荐值 | 说明 |
|
||||
|------|------|--------|------|
|
||||
| `model` | string | `"s2"` | 固定值 |
|
||||
| `max_depth` | number | 10~25 | 搜索深度。测试用 5,生产用 15~25。越大越全但越慢 |
|
||||
| `domain_scope` | string[] | 按需配置 | 限定搜索范围。留空则不限 |
|
||||
| `domain_blacklist` | string[] | `[]` | 排除特定站点 |
|
||||
| `introduction` | string | 见下方 | 设定 AI 角色和搜索指导 |
|
||||
| `reference_style` | string | `"link"` | 引用格式,`link` 或 `character` |
|
||||
| `output_prompt` | string | 可选 | 自定义输出格式提示词 |
|
||||
| `generate_summary` | boolean | `true` | 异步模式完成后自动生成摘要 |
|
||||
|
||||
### 推荐的 introduction 模板
|
||||
|
||||
```
|
||||
你是一名专业的临床研究文献检索专家。
|
||||
请根据用户的研究需求,在指定数据库中系统性地检索相关文献。
|
||||
|
||||
检索要求:
|
||||
1. 优先检索高质量研究:系统综述、Meta分析、RCT
|
||||
2. 关注 PICOS 要素(人群、干预、对照、结局、研究设计)
|
||||
3. 优先近 5 年的研究
|
||||
4. 返回每篇文献的完整元数据(标题、作者、期刊、年份、链接)
|
||||
|
||||
输出要求:
|
||||
1. 按研究类型分组
|
||||
2. 每篇文献附带直接链接
|
||||
3. 最后给出综合性研究概述
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 5. 最佳策略指南
|
||||
|
||||
### 5.1 针对不同站点的策略
|
||||
|
||||
| 目标站点 | 查询语言 | max_depth | 特殊说明 |
|
||||
|---------|---------|-----------|---------|
|
||||
| PubMed / NCBI | 中文或英文均可 | 15~25 | 效果最好,核心数据源 |
|
||||
| ClinicalTrials.gov | **必须英文** | 10~15 | 中文查询极慢甚至超时 |
|
||||
| Cochrane Library | 英文优先 | 10~15 | 系统综述专用 |
|
||||
| Google Scholar | 中文或英文 | 10~15 | 跨库聚合,可能有重复 |
|
||||
| CNKI / SinoMed | 中文 | 10~15 | 中文文献首选 |
|
||||
| GeenMedical | 英文优先 | 5~10 | 聚合搜索,速度快 |
|
||||
|
||||
### 5.2 多站点组合搜索
|
||||
|
||||
```typescript
|
||||
// V2.0 推荐:用户选择多个数据源,合并到 domain_scope
|
||||
const domainScope = [
|
||||
'https://pubmed.ncbi.nlm.nih.gov/',
|
||||
'https://www.cochranelibrary.com/',
|
||||
'https://scholar.google.com/',
|
||||
];
|
||||
|
||||
// 如果包含 ClinicalTrials.gov,需求扩写时自动翻译为英文
|
||||
```
|
||||
|
||||
### 5.3 性能预期
|
||||
|
||||
| max_depth | 预计耗时 | 搜索/阅读量 | 适用场景 |
|
||||
|-----------|---------|------------|---------|
|
||||
| 5 | 1~3 分钟 | 10~40 / 0~20 | 快速探索 |
|
||||
| 10 | 2~5 分钟 | 20~50 / 10~30 | 常规检索 |
|
||||
| 15 | 3~8 分钟 | 30~80 / 20~50 | 深度检索 |
|
||||
| 25 | 5~15 分钟 | 50~150 / 30~80 | 全面研究 |
|
||||
|
||||
### 5.4 成本估算
|
||||
|
||||
- 单次搜索 Token 消耗:5万~30万 tokens(取决于深度和站点数量)
|
||||
- 估算成本:约 ¥0.1~0.5/次(按 unifuncs 定价)
|
||||
|
||||
---
|
||||
|
||||
## 6. 平台集成方式
|
||||
|
||||
### 当前使用(V1.x - ASL 模块)
|
||||
|
||||
```
|
||||
researchService.ts → OpenAI SDK → SSE 流式
|
||||
researchWorker.ts → pg-boss → 异步执行
|
||||
```
|
||||
|
||||
### 计划升级(V2.0 - ASL Deep Research)
|
||||
|
||||
```
|
||||
requirementExpansionService.ts → DeepSeek-V3 需求扩写
|
||||
unifuncsAsyncClient.ts → create_task / query_task 异步模式
|
||||
deepResearchV2Worker.ts → pg-boss Worker → 轮询 + 日志解析
|
||||
```
|
||||
|
||||
### 其他模块可复用场景
|
||||
|
||||
| 模块 | 潜在用途 |
|
||||
|------|---------|
|
||||
| AIA 智能问答 | 智能体联网搜索增强 |
|
||||
| PKB 个人知识库 | 自动补充知识库文献 |
|
||||
| RVW 稿件审查 | 自动查找参考文献验证 |
|
||||
| IIT 研究管理 | 自动检索同类临床试验 |
|
||||
|
||||
---
|
||||
|
||||
## 7. 测试脚本
|
||||
|
||||
项目中已提供两个测试脚本:
|
||||
|
||||
| 脚本 | 路径 | 用途 |
|
||||
|------|------|------|
|
||||
| 全站覆盖测试 | `backend/scripts/test-unifuncs-site-coverage.ts` | 并行测试 18 个医学网站的搜索能力 |
|
||||
| ClinicalTrials 专项 | `backend/scripts/test-unifuncs-clinicaltrials.ts` | 4 种策略对比测试 ClinicalTrials.gov |
|
||||
| 快速验证 | `backend/scripts/test-unifuncs-deepsearch.ts` | 单站点 SSE 流式快速测试 |
|
||||
|
||||
```bash
|
||||
cd backend
|
||||
npx tsx scripts/test-unifuncs-site-coverage.ts
|
||||
npx tsx scripts/test-unifuncs-clinicaltrials.ts
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
**维护者:** 开发团队
|
||||
**最后更新:** 2026-02-22
|
||||
@@ -1,10 +1,13 @@
|
||||
# AI智能文献模块 - 当前状态与开发指南
|
||||
|
||||
> **文档版本:** v1.5
|
||||
> **文档版本:** v1.6
|
||||
> **创建日期:** 2025-11-21
|
||||
> **维护者:** AI智能文献开发团队
|
||||
> **最后更新:** 2026-01-18 🆕 **智能文献检索(DeepSearch)MVP完成**
|
||||
> **重大进展:** unifuncs DeepSearch API 集成 - AI驱动的 PubMed 自动检索
|
||||
> **最后更新:** 2026-02-22 🆕 **Deep Research V2.0 开发计划确认 + Unifuncs API 网站覆盖测试完成**
|
||||
> **重大进展:**
|
||||
> - 🆕 2026-02-22:V2.0 开发计划确认!四步瀑布流 + 异步模式 + HITL 需求确认 + 务实结果展示
|
||||
> - 🆕 2026-02-22:Unifuncs API 网站覆盖测试完成!18 站点实测,9 个一级可用,ClinicalTrials.gov 专项验证通过
|
||||
> - 2026-01-18:智能文献检索(DeepSearch)MVP完成 - unifuncs API 集成
|
||||
> **文档目的:** 反映模块真实状态,帮助新开发人员快速上手
|
||||
|
||||
---
|
||||
@@ -27,17 +30,50 @@
|
||||
AI智能文献模块是一个基于大语言模型(LLM)的文献筛选系统,用于帮助研究人员根据PICOS标准自动筛选文献。
|
||||
|
||||
### 当前状态
|
||||
- **开发阶段**:🚧 标题摘要初筛MVP已完成,全文复筛后端已完成,智能文献检索MVP已完成
|
||||
- **开发阶段**:🚧 V1.x 完成,V2.0 Deep Research 开发计划已确认,即将启动
|
||||
- **已完成功能**:
|
||||
- ✅ 标题摘要初筛(Title & Abstract Screening)- 完整流程
|
||||
- ✅ 全文复筛后端(Day 2-5)- LLM服务 + API + Excel导出
|
||||
- ✅ **智能文献检索(DeepSearch)MVP** - unifuncs API 集成,SSE 实时流式
|
||||
- **开发中功能**:
|
||||
- 🚧 全文复筛前端UI(Day 6-8,预计2.5天)
|
||||
- **模型支持**:DeepSeek-V3 + Qwen-Max 双模型筛选 + unifuncs DeepSearch
|
||||
- ✅ **智能文献检索(DeepSearch)V1.x MVP** - unifuncs API 集成,SSE 实时流式
|
||||
- ✅ **Unifuncs API 网站覆盖测试** - 18 站点实测,9 个一级可用
|
||||
- ✅ **Deep Research V2.0 开发计划** - 完整技术方案、API 契约、5 天分阶段计划
|
||||
- **V2.0 开发中**:
|
||||
- 🚧 Deep Research V2.0 — 四步瀑布流(Landing→配置→HITL→终端→结果)
|
||||
- 🚧 异步模式改造(SSE → Unifuncs create_task/query_task)
|
||||
- 🚧 需求扩写 + HITL 确认 + Agent 终端 + 简洁结果展示 + Word 导出
|
||||
- **模型支持**:DeepSeek-V3(需求扩写) + unifuncs s2(深度搜索) + Qwen-Max(筛选)
|
||||
- **部署状态**:✅ 本地开发环境运行正常
|
||||
|
||||
### 🆕 智能文献检索 DeepSearch(2026-01-18 MVP完成)
|
||||
### 🆕 Deep Research V2.0(2026-02-22 开发计划确认)
|
||||
|
||||
**V2.0 核心升级:**
|
||||
- 四步瀑布流:Landing → 配置 → HITL 策略确认 → Agent 终端 → 结果展示
|
||||
- LLM 需求扩写(DeepSeek-V3):粗略输入 → 结构化自然语言检索指令书
|
||||
- Human-in-the-Loop:用户可编辑修改 AI 生成的检索需求
|
||||
- **异步模式**:SSE → Unifuncs create_task/query_task + pg-boss 队列(离开页面不中断)
|
||||
- Agent 终端:暗色主题 + 分类结构化日志(每 3-5s 弹出一条)
|
||||
- 简洁结果展示:AI 综合报告(Markdown)+ 文献清单表格 + Word 导出
|
||||
- **多站点搜索**:9 个一级可用站点,用户可在前端选择数据源
|
||||
|
||||
**V2.0 确认可用数据源(2026-02-22 实测):**
|
||||
|
||||
| 站点 | 站内链接数 | 说明 |
|
||||
|------|-----------|------|
|
||||
| PubMed | 28 | 核心数据源,效果最佳 |
|
||||
| ClinicalTrials.gov | 38 | 必须英文查询,max_depth≥10 |
|
||||
| NCBI/PMC | 18 | 含 PMC 全文链接 |
|
||||
| Google Scholar | 10 | 跨库聚合 |
|
||||
| CBM/SinoMed | 9 | 中文生物医学 |
|
||||
| CNKI | 7 | 中文核心期刊 |
|
||||
| GeenMedical | 5 | 医学搜索引擎 |
|
||||
| Cochrane Library | 4 | 系统综述金标准 |
|
||||
| 维普 | 1 | 中文库 |
|
||||
|
||||
**开发计划**:5 天分阶段交付,详见 `04-开发计划/07-Deep Research V2.0 开发计划.md`
|
||||
|
||||
**通用能力指南**:`docs/02-通用能力层/04-DeepResearch引擎/01-Unifuncs DeepSearch API 使用指南.md`
|
||||
|
||||
### 智能文献检索 DeepSearch V1.x(2026-01-18 MVP完成)
|
||||
|
||||
**功能概述:**
|
||||
- AI 驱动的自动化 PubMed 文献检索
|
||||
@@ -50,19 +86,15 @@ AI智能文献模块是一个基于大语言模型(LLM)的文献筛选系统
|
||||
- Server-Sent Events (SSE) 实时流式通信
|
||||
- 数据库存储:`asl_schema.asl_research_tasks`
|
||||
|
||||
**API 端点:**
|
||||
**API 端点(V1.x,保留兼容):**
|
||||
- `POST /api/v1/asl/research/stream` - SSE 流式检索
|
||||
- `POST /api/v1/asl/research/tasks` - 异步任务创建(备用)
|
||||
- `POST /api/v1/asl/research/tasks` - 异步任务创建
|
||||
- `GET /api/v1/asl/research/tasks/:taskId/status` - 任务状态查询
|
||||
|
||||
**前端入口:**
|
||||
- 路由:`/literature/research/search`
|
||||
- 菜单:AI智能文献 → 2. 智能文献检索
|
||||
|
||||
**已知限制:**
|
||||
- ⚠️ SSE 模式,离开页面任务中断
|
||||
- ⚠️ 每次检索成本约 0.3 元(unifuncs API)
|
||||
- ⏳ 搜索历史、高级筛选等功能待开发
|
||||
**已知限制(V2.0 将解决):**
|
||||
- ⚠️ SSE 模式,离开页面任务中断 → V2.0 用异步模式解决
|
||||
- ⚠️ 仅搜索 PubMed → V2.0 支持 9 个数据源
|
||||
- ⏳ 无需求扩写、无 HITL → V2.0 新增
|
||||
|
||||
### 🏆 Postgres-Only 架构改造(2025-12-13完成)
|
||||
|
||||
|
||||
@@ -0,0 +1,84 @@
|
||||
7个最关注的医学期刊或医学信息网站
|
||||
|
||||
---下面7个是我最关注的----
|
||||
|
||||
1. PubMed
|
||||
网址 :https://pubmed.ncbi.nlm.nih.gov/
|
||||
7. ClinicalTrials.gov
|
||||
网址 :https://clinicaltrials.gov/
|
||||
|
||||
|
||||
15. 中华医学期刊网
|
||||
网址 :https://medjournals.cn/
|
||||
3. 中国知网(CNKI)
|
||||
网址 :https://www.cnki.net/、ClinicalTrials.gov、
|
||||
8. 万方数据
|
||||
网址 :https://www.wanfangdata.com.cn/
|
||||
9. 维普(VIP)
|
||||
网址 :https://www.cqvip.com/
|
||||
16. 中国临床试验注册中心
|
||||
网址 :http://www.chictr.org.cn/
|
||||
|
||||
|
||||
----下面是所有常用的期刊网站------
|
||||
|
||||
英文数据库
|
||||
1. PubMed
|
||||
网址 :https://pubmed.ncbi.nlm.nih.gov/
|
||||
|
||||
2. 中国生物医学文献数据库(CBM/SinoMed)
|
||||
网址 :http://www.sinomed.ac.cn/
|
||||
|
||||
3. 中国知网(CNKI)
|
||||
网址 :https://www.cnki.net/
|
||||
|
||||
英文数据库
|
||||
4. Web of Science
|
||||
网址 :https://www.webofscience.com/ 或 http://isiknowledge.com/
|
||||
5. Embase
|
||||
网址 :http://www.embase.com/
|
||||
|
||||
6. Cochrane Library
|
||||
网址 :https://www.cochranelibrary.com/
|
||||
|
||||
7. ClinicalTrials.gov
|
||||
网址 :https://clinicaltrials.gov/
|
||||
中文数据库
|
||||
8. 万方数据
|
||||
网址 :https://www.wanfangdata.com.cn/
|
||||
|
||||
9. 维普(VIP)
|
||||
网址 :https://www.cqvip.com/
|
||||
|
||||
10. NCBI(National Center for Biotechnology Information)
|
||||
|
||||
11. Google Scholar
|
||||
网址 :https://scholar.google.com/
|
||||
|
||||
12. Ovid
|
||||
网址 :http://ovidsp.ovid.com/
|
||||
|
||||
13. Scopus
|
||||
网址 :https://www.scopus.com/
|
||||
|
||||
|
||||
14. 中国中医药数据库
|
||||
网址 :http://cowork.cintcm.com/engine/windex.jsp 或 https://cintmed.cintcm.cn/
|
||||
特点 :中国中医科学院开发,48个数据库,220余万条数据
|
||||
重要性 :中医药研究必备
|
||||
15. 中华医学期刊网
|
||||
网址 :https://medjournals.cn/
|
||||
特点 :中华医学会官方平台,收录中华系列期刊
|
||||
重要性 :中华系顶级期刊聚集地
|
||||
|
||||
16. 中国临床试验注册中心
|
||||
网址 :http://www.chictr.org.cn/
|
||||
特点 :中国临床试验注册平台
|
||||
重要性 :中国临床研究必备
|
||||
|
||||
17. GeenMedical
|
||||
网址 :https://www.geenmedical.com/
|
||||
|
||||
18. 国家科技图书文献中心(NSTL) ⭐⭐
|
||||
网址 :https://www.nstl.gov.cn/
|
||||
|
||||
@@ -0,0 +1,144 @@
|
||||
# **技术设计文档 (TDD):ASL \- 智能文献检索 (Deep Research) MVP 版**
|
||||
|
||||
**文档版本:** v4.4-Tech (MVP 自然语言确认版)
|
||||
|
||||
**核心架构:** LLM Requirement Expansion (需求扩写) \+ Postgres-Only (pg-boss) \+ Unifuncs Async API
|
||||
|
||||
## **🏗️ 1\. 系统数据流向 (Data Flow)**
|
||||
|
||||
MVP 版本的架构最大程度降低了状态维护的成本,充分利用 Unifuncs 原生支持自然语言查询的能力。
|
||||
|
||||
1. Client 发送原始简短自然语言 \-\> Node.js 调用 DeepSeek-V3 \-\> 返回**结构化、扩写后的自然语言检索需求(Search Requirements)**(非布尔检索式)。
|
||||
2. Client 展示该检索需求(普通文本/Markdown),允许用户直接进行文字修改与补充 \-\> 用户点击执行,发送修改后的 Confirmed Requirement \-\> Node.js 创建 pg-boss 任务。
|
||||
3. Worker 启动 \-\> 将用户确认的自然语言需求直接作为 content 传给 Unifuncs 创建任务 \-\> 每 5 秒轮询一次 Unifuncs \-\> 增量日志写入 PostgreSQL。
|
||||
4. Client 每 3 秒轮询 Node.js 获取日志 \-\> 渲染 Terminal \-\> 任务完成,渲染结果。
|
||||
|
||||
## **🗄️ 2\. 数据库设计 (Prisma)**
|
||||
|
||||
极简表结构,去掉了复杂的澄清记录,只保留原始问题和最终执行的自然语言需求。
|
||||
|
||||
model AslResearchTask {
|
||||
id String @id @default(uuid())
|
||||
user\_id String
|
||||
|
||||
// Step 1 & 2
|
||||
original\_query String @db.Text
|
||||
target\_sources Json // 选中的数据源,如 \["pubmed.ncbi.nlm.nih.gov"\]
|
||||
filters Json // 高级过滤条件
|
||||
confirmed\_requirement String? @db.Text // 核心字段:用户复核并修改后的自然语言检索需求
|
||||
|
||||
// Step 3
|
||||
status String // 'draft', 'pending', 'running', 'completed', 'failed'
|
||||
unifuncs\_task\_id String? // 外部API的ID
|
||||
execution\_logs Json? // 终端日志 \[{type: 'log', text: '...'}, ...\]
|
||||
|
||||
// Step 4
|
||||
result\_list Json?
|
||||
synthesis\_report String? @db.Text
|
||||
|
||||
created\_at DateTime @default(now())
|
||||
updated\_at DateTime @updatedAt
|
||||
}
|
||||
|
||||
## **🔌 3\. 核心 API 契约**
|
||||
|
||||
### **3.1 检索需求扩写接口 (同步, 无状态)**
|
||||
|
||||
* **POST /api/v1/asl/research/generate-requirement**
|
||||
* **处理:** 拦截用户简短输入,拼装 System Prompt,调用系统内置 LLMFactory(DeepSeek-V3)。
|
||||
* *System Prompt 示例:* “你是一个医学检索辅助专家。请将用户简短的研究意图,扩写并梳理成一份条理清晰的自然语言检索需求说明。内容包括:1. 核心检索主题;2. 建议包含的专业关键词(中英文);3. 目标人群及干预措施限定;4. 文献类型建议(如 RCT)。输出纯文本,方便用户二次编辑。”
|
||||
* *同时:* 创建数据库记录(状态为 draft)。
|
||||
* **返回:** { taskId: "uuid", generatedRequirement: "研究主题:他汀类药物...\\n目标人群:...\\n检索要求:..." }
|
||||
|
||||
### **3.2 任务启动接口 (进入异步队列)**
|
||||
|
||||
* **PUT /api/v1/asl/research/tasks/:id/execute**
|
||||
* **请求体:** { confirmedRequirement: string }
|
||||
* **处理:** 1\. 更新 AslResearchTask 的 confirmed\_requirement 字段。
|
||||
2\. jobQueue.createJob('deep-research-worker', { taskId: id })
|
||||
3\. 更新状态为 pending。
|
||||
* **返回:** { success: true }
|
||||
|
||||
### **3.3 任务状态与日志轮询接口**
|
||||
|
||||
* **GET /api/v1/asl/research/tasks/:id**
|
||||
* **返回:** 包含 status, execution\_logs, 以及(若完成)synthesis\_report 和 result\_list。
|
||||
|
||||
## **⚙️ 4\. 后台 Worker 逻辑 (Unifuncs 集成)**
|
||||
|
||||
使用平台现有的 pg-boss 机制。
|
||||
|
||||
// backend/src/modules/asl/workers/DeepResearchWorker.ts
|
||||
|
||||
export async function processDeepResearch(job: Job) {
|
||||
const taskId \= job.data.taskId;
|
||||
const task \= await prisma.aslResearchTask.findUnique({ where: { id: taskId } });
|
||||
|
||||
// 1\. 发起 Unifuncs 创建任务请求
|
||||
const unifuncsPayload \= {
|
||||
model: "s2",
|
||||
// 💡 核心变更:直接将用户确认的、详细的自然语言需求传给 Unifuncs,由 Unifuncs 自己去理解和拆解检索词
|
||||
messages: \[{
|
||||
role: "user",
|
||||
content: \`请根据以下详细检索需求执行深度研究:\\n${task.confirmed\_requirement}\`
|
||||
}\],
|
||||
introduction: "你是一名资深的循证医学研究员。请严格遵循用户的检索需求,在指定数据库中执行详尽的深度检索。",
|
||||
max\_depth: 25,
|
||||
domain\_scope: task.target\_sources,
|
||||
// 强制输出格式以分离报告与文献JSON
|
||||
output\_prompt: \`
|
||||
\<REPORT\_SECTION\>
|
||||
\[撰写综合报告\]
|
||||
\</REPORT\_SECTION\>
|
||||
\<JSON\_LIST\_SECTION\>
|
||||
\[输出严格的文献JSON数组\]
|
||||
\</JSON\_LIST\_SECTION\>
|
||||
\`,
|
||||
reference\_style: "link"
|
||||
};
|
||||
|
||||
const createRes \= await unifuncsClient.createTask(unifuncsPayload);
|
||||
const unifuncsId \= createRes.data.task\_id;
|
||||
|
||||
await prisma.aslResearchTask.update({
|
||||
where: { id: taskId },
|
||||
data: { unifuncs\_task\_id: unifuncsId, status: 'running' }
|
||||
});
|
||||
|
||||
// 2\. 轮询 Unifuncs 状态 (防无限死循环,设置最大重试次数)
|
||||
let isCompleted \= false;
|
||||
let maxRetries \= 120; // 假设每5秒查一次,最多查10分钟
|
||||
|
||||
while (\!isCompleted && maxRetries \> 0\) {
|
||||
await sleep(5000);
|
||||
const queryRes \= await unifuncsClient.queryTask(unifuncsId);
|
||||
|
||||
// 解析增量日志 reasoning\_content
|
||||
const logs \= parseReasoningToLogs(queryRes.data.result?.reasoning\_content);
|
||||
|
||||
// 更新数据库日志 (覆盖或追加)
|
||||
await prisma.aslResearchTask.update({
|
||||
where: { id: taskId },
|
||||
data: { execution\_logs: logs }
|
||||
});
|
||||
|
||||
if (queryRes.data.status \=== 'completed') {
|
||||
isCompleted \= true;
|
||||
// 解析提取 \<REPORT\_SECTION\> 和 \<JSON\_LIST\_SECTION\>
|
||||
const report \= extractReport(queryRes.data.result.content);
|
||||
const list \= extractJsonList(queryRes.data.result.content);
|
||||
|
||||
await prisma.aslResearchTask.update({
|
||||
where: { id: taskId },
|
||||
data: { status: 'completed', synthesis\_report: report, result\_list: list }
|
||||
});
|
||||
}
|
||||
maxRetries--;
|
||||
}
|
||||
}
|
||||
|
||||
## **🛡️ 5\. 技术优势**
|
||||
|
||||
1. **零学习成本**:去除了对医生来说晦涩难懂的“布尔逻辑检索式”,整个确认过程完全采用大白话(自然语言),用户审查和修改都极其自然。
|
||||
2. **充分发挥 API 威力**:将“拆解关键词、发起搜索、阅读网页”的复杂动作全部下放给专业的 Unifuncs 引擎,本系统架构只做轻量级的“需求扩写”和“流式日志透传”,代码稳定性极高。
|
||||
3. **极速上线**:前端页面仅需一个大文本框渲染扩写后的要求,没有任何复杂的 UI 组件开销,是名副其实的 MVP 最优解。
|
||||
@@ -0,0 +1,143 @@
|
||||
# **产品需求文档 (PRD):ASL \- 智能文献检索 (Deep Research) V4.1**
|
||||
|
||||
## **📑 1\. 文档概述**
|
||||
|
||||
| 属性 | 说明 |
|
||||
| :---- | :---- |
|
||||
| **产品名称** | AI Clinical \- ASL 智能文献模块 |
|
||||
| **功能模块** | Deep Research (智能文献检索) |
|
||||
| **文档版本** | V4.1 (自然语言需求确认版) |
|
||||
| **目标受众** | 研发团队、UI/UX 设计师、测试工程师 |
|
||||
| **当前痛点** | 传统医学检索要求医生手动编写复杂的布尔逻辑(Boolean Query),学习成本极高;MVP版本直接盲搜容易偏离意图;强加聊天框(Chat UI)又会破坏工具的沉浸感。 |
|
||||
| **核心目标** | 采用\*\*“单页瀑布流 \+ 自然语言需求扩写 (Requirement Expansion) \+ 人机协同编辑”\*\*的极简专业工作流,彻底消除布尔逻辑的门槛,充分发挥底层深搜大模型的能力。 |
|
||||
|
||||
## **🎯 2\. 用户故事与核心工作流 (User Workflow)**
|
||||
|
||||
**UX 核心理念:单页瀑布流 (Progressive Disclosure) \+ 大白话指令驱动**
|
||||
|
||||
本版本摒弃了传统数据库必须的“检索式”,改为生成一份\*\*“深度检索指令书”\*\*。
|
||||
|
||||
**标准工作流 (The Happy Path):**
|
||||
|
||||
1. **粗略输入 (Input):** 医生在顶部文本框中输入一个极其粗略的想法(如“他汀预防心血管疾病,要能下载PDF的”),并勾选数据库范围和年份。
|
||||
2. **意图扩写 (Generate):** 点击“生成需求书”,系统内置的 LLM 将这句话扩写为一份结构严谨、逻辑清晰的\*\*《自然语言检索指令书》\*\*,并在下方展开【策略确认区】。
|
||||
3. **人工核验与修改 (Edit \- HITL):** 医生在可编辑的文本区内审查这份指令书。如果发现 AI 遗漏了限定条件(如未限定 RCT),可以直接像写邮件一样用大白话打字补充进去。
|
||||
4. **异步深搜 (Execute):** 确认无误后点击“执行”,系统展开【AI 执行终端】。该自然语言指令被直接发送给底层 Unifuncs 深搜引擎,任务进入后台队列。
|
||||
5. **透明执行 (Monitor):** 终端实时打印深搜引擎跨库检索、阅读、抓取 PDF 的进度日志。
|
||||
6. **成果交付 (Results):** 任务完成后,最下方展开包含图表、文献清单、综合报告的【结果看板】。
|
||||
|
||||
## **💻 3\. 详细功能需求说明 (Functional Requirements)**
|
||||
|
||||
### **模块 3.1:检索立项配置区 (Step 1: Setup)**
|
||||
|
||||
* **自然语言输入区 (Textarea):** 提供大号文本域,支持用户随意输入临床问题。
|
||||
* **数据源与高级过滤 (Options):**
|
||||
* 目标数据源 (Checkbox):PubMed/PMC, BMJ Open/Lancet(OA), Cochrane Library。
|
||||
* 发表年份 (Dropdown):2010至今, 过去5年, 不限。
|
||||
* 目标文献数 (Dropdown):\~100篇(测试集), 全面检索。
|
||||
* 强制约束项 (Checkbox):高亮显示“强制要求:仅限可获取免费全文 (PDF Open Access)”。
|
||||
* **触发动作:** 点击“解析并生成检索需求书”按钮。系统进行短暂 Loading 后,平滑向下滑出 Step 2。
|
||||
|
||||
### **模块 3.2:检索策略确认区 (Step 2: Strategy HITL)**
|
||||
|
||||
**设计意图:** 破除代码恐惧感,建立基于自然语言的专业信任。
|
||||
|
||||
* **左侧 \- 核心意图提炼 (Read-only 卡片):**
|
||||
* 用精简的 UI 块展示 AI 提取出的结构化要素(例如🎯核心目标、💊干预/疾病、📚文献标准)。
|
||||
* 包含一个“预估命中率”的视觉反馈标签。
|
||||
* **右侧 \- 深度检索指令书编辑器 (Code Editor UI, but Plain Text):**
|
||||
* **(核心交互)** 这是一个必须可编辑的宽大文本框 (Textarea)。
|
||||
* 里面展示由 AI 生成的一篇结构化大白话要求(如:【核心主题】...【目标文献类型】...【强制数据要求】...)。
|
||||
* 提示文案引导用户:“您可以像写邮件一样在这里补充任何大白话要求,底层的深搜大模型会完美理解。”
|
||||
* **触发动作:** 点击“确认需求,启动 Deep Research”。向下滑出 Step 3。
|
||||
|
||||
### **模块 3.3:AI 执行终端区 (Step 3: Agent Terminal)**
|
||||
|
||||
* **极客暗黑终端 (Dark Mode Log View):** 固定高度(550px),内部滚动,顶部带脉冲闪烁的 Running 状态灯。
|
||||
* **阶段化日志流 (Event Stream):** 实时打印后台 Worker 轮询获取到的状态,分为不同的视觉样式:
|
||||
* \[Thinking\] 紫色:AI 分析问题、过滤非 RCT 文献的思考过程。
|
||||
* \[Action\] 蓝色:执行具体站点的 Search 动作。
|
||||
* \[Done\] 绿色:阶段性爬取成功。
|
||||
* \[Summary\] 黄色:阶段性总结。
|
||||
* *前端交互:日志增加时,容器需自动滚动到最底部。*
|
||||
|
||||
### **模块 3.4:最终交付结果大屏 (Step 4: Results Dashboard)**
|
||||
|
||||
* **统计看板 (Top Row):** \* 数量概览卡片(包含 OA 获取率、研究类型占比)。
|
||||
* 动态图表(Chart.js):文献来源分布(饼图)、发表年份趋势(柱状图)。
|
||||
* **详情双 Tab 视图:**
|
||||
* **Tab A \- 核心文献清单:** 表格展示 Title, Authors, Journal, Year, Type, PDF状态。
|
||||
* **Tab B \- 智能综合报告:** AI 基于摘要生成的 Markdown 深度报告(背景、共识、研究空白 Gap)。
|
||||
* **科研资产流转 (Actions):**
|
||||
* 推送至 ASL 初筛池(无缝流入下一阶段的筛选工作流)。
|
||||
* 导出 Excel、Word 报告、RIS 引文。
|
||||
|
||||
## **⚙️ 4\. 技术架构与底层实现规约 (Architecture Specs)**
|
||||
|
||||
### **4.1 系统数据流向**
|
||||
|
||||
1. **指令扩写层 (本系统):** 前端提交 original\_query \-\> Node.js 调用内部 DeepSeek-V3 \-\> 返回一段 Markdown 格式的**自然语言检索需求书 (Requirement)**。
|
||||
2. **异步执行层 (pg-boss \+ Unifuncs):** 用户确认后的 confirmed\_requirement \-\> 压入 pg-boss 队列 \-\> Worker 将该自然语言原封不动地发给 Unifuncs /v1/create\_task 接口。由 Unifuncs 内部去理解大白话并自主执行复杂爬虫。
|
||||
3. **状态透传层:** Worker 轮询 Unifuncs 的 reasoning\_content,正则提取并写入 PostgreSQL 的 execution\_logs 数组。前端只负责拉取 DB 渲染,实现状态解耦。
|
||||
|
||||
### **4.2 数据库 Schema (Prisma)**
|
||||
|
||||
model AslResearchTask {
|
||||
id String @id @default(uuid())
|
||||
user\_id String
|
||||
|
||||
// Step 1: 原始输入配置
|
||||
original\_query String @db.Text
|
||||
target\_sources Json
|
||||
filters Json
|
||||
|
||||
// Step 2: HITL 策略确认阶段
|
||||
ai\_intent\_summary Json? // 左侧小卡片用的结构化摘要
|
||||
confirmed\_requirement String? @db.Text // 核心:用户最终复核并修改后的自然语言指令书
|
||||
|
||||
// Step 3: 执行与状态
|
||||
status String // 'draft', 'pending', 'running', 'completed', 'failed'
|
||||
unifuncs\_task\_id String? // 绑定的深搜任务ID
|
||||
execution\_logs Json? // 终端执行日志数组 (增量更新)
|
||||
|
||||
// Step 4: 交付资产
|
||||
result\_list Json? // 抓取到的文献元数据列表
|
||||
synthesis\_report String? @db.Text // Markdown 综合报告
|
||||
|
||||
created\_at DateTime @default(now())
|
||||
updated\_at DateTime @updatedAt
|
||||
}
|
||||
|
||||
### **4.3 Unifuncs 核心 Payload 规约**
|
||||
|
||||
在 Worker 请求 Unifuncs 时,利用其强大的指令遵循能力,强制分离报告与 JSON 列表:
|
||||
|
||||
const unifuncsPayload \= {
|
||||
model: "s2",
|
||||
messages: \[{
|
||||
role: "user",
|
||||
content: \`请严格根据以下检索需求执行深度研究:\\n${task.confirmed\_requirement}\`
|
||||
}\],
|
||||
// 核心约束:强制要求 XML tag 隔离输出,确保前端渲染不崩溃
|
||||
output\_prompt: \`
|
||||
请严格按照以下两部分输出结果,不要包含任何其他废话:
|
||||
\<REPORT\_SECTION\>
|
||||
\[此处撰写深度综合报告,包括研究背景、核心共识、分歧点\]
|
||||
\</REPORT\_SECTION\>
|
||||
|
||||
\<JSON\_LIST\_SECTION\>
|
||||
\[此处输出文献元数据的严格 JSON 数组结构\]
|
||||
\</JSON\_LIST\_SECTION\>
|
||||
\`
|
||||
};
|
||||
|
||||
## **📅 5\. 敏捷迭代开发计划建议 (Sprint Plan)**
|
||||
|
||||
**周期:1.5 周 (约 8 个工作日)**
|
||||
|
||||
| 阶段 | 时间 | 前端任务 (Frontend) | 后端任务 (Backend/Python) |
|
||||
| :---- | :---- | :---- | :---- |
|
||||
| **Phase 1: 单页交互与需求扩写** | Day 1-3 | 搭建 V4.1 瀑布流单页布局;开发 Step 1 和 Step 2 的 UI 联动;支持 Textarea 编辑。 | 升级 Schema;编写“需求扩写 (Requirement Expansion)” 的 Prompt;提供生成草稿接口。 |
|
||||
| **Phase 2: 异步队列与终端日志** | Day 4-6 | 开发暗黑 Terminal 组件;实现轮询接口对接与 auto-scroll 日志滚动逻辑。 | 对接 Unifuncs API;实现 pg-boss 长任务 Worker;解析 reasoning\_content 转为增量 JSON 存入 DB。 |
|
||||
| **Phase 3: 结果解析与多维导出** | Day 7-8 | 集成 Chart.js 绘制双图表;完成 Tab 列表和 Markdown 报告渲染。 | 后端正则切割 \<REPORT\> 与 \<JSON\_LIST\>;提供 Word (复用 Pandoc) 和 .ris 格式的导出接口联调。 |
|
||||
|
||||
@@ -0,0 +1,627 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="zh-CN" class="scroll-smooth">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>ASL - 智能文献检索 (Deep Research) V4.2</title>
|
||||
<script src="https://cdn.tailwindcss.com"></script>
|
||||
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
|
||||
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
|
||||
<script>
|
||||
tailwind.config = {
|
||||
theme: {
|
||||
extend: {
|
||||
colors: {
|
||||
primary: '#1677ff',
|
||||
primaryHover: '#4096ff',
|
||||
bgBase: '#f5f7fa',
|
||||
panelBg: '#ffffff',
|
||||
terminalBg: '#0f172a',
|
||||
terminalHeader: '#1e293b',
|
||||
},
|
||||
animation: {
|
||||
'pulse-fast': 'pulse 1.5s cubic-bezier(0.4, 0, 0.6, 1) infinite',
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
</script>
|
||||
<style>
|
||||
/* 自定义滚动条 */
|
||||
::-webkit-scrollbar { width: 8px; height: 8px; }
|
||||
::-webkit-scrollbar-track { background: transparent; }
|
||||
::-webkit-scrollbar-thumb { background: #cbd5e1; border-radius: 4px; }
|
||||
::-webkit-scrollbar-thumb:hover { background: #94a3b8; }
|
||||
|
||||
.terminal-scroll::-webkit-scrollbar-thumb { background: #475569; }
|
||||
.terminal-scroll::-webkit-scrollbar-thumb:hover { background: #64748b; }
|
||||
|
||||
/* 瀑布流渐入动画 */
|
||||
.reveal-section {
|
||||
opacity: 0;
|
||||
transform: translateY(20px);
|
||||
transition: opacity 0.6s ease-out, transform 0.6s ease-out;
|
||||
}
|
||||
.reveal-visible {
|
||||
opacity: 1;
|
||||
transform: translateY(0);
|
||||
}
|
||||
|
||||
.tab-active { color: #1677ff; border-bottom: 2px solid #1677ff; font-weight: 500; }
|
||||
.tab-inactive { color: #64748b; border-bottom: 2px solid transparent; }
|
||||
.tab-inactive:hover { color: #1677ff; }
|
||||
|
||||
/* 工作流连接线 */
|
||||
.workflow-connector {
|
||||
width: 2px;
|
||||
height: 32px;
|
||||
background-color: #cbd5e1;
|
||||
margin: 0 auto;
|
||||
position: relative;
|
||||
}
|
||||
.workflow-connector::after {
|
||||
content: '';
|
||||
position: absolute;
|
||||
bottom: -4px;
|
||||
left: -4px;
|
||||
width: 10px;
|
||||
height: 10px;
|
||||
border-radius: 50%;
|
||||
background-color: #cbd5e1;
|
||||
}
|
||||
.workflow-connector.active {
|
||||
background-color: #1677ff;
|
||||
}
|
||||
.workflow-connector.active::after {
|
||||
background-color: #1677ff;
|
||||
box-shadow: 0 0 8px #1677ff;
|
||||
}
|
||||
|
||||
/* 落地页平滑消失动画 */
|
||||
.landing-fade-out {
|
||||
opacity: 0;
|
||||
transform: translateY(-30px);
|
||||
pointer-events: none;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body class="bg-bgBase text-gray-800 font-sans h-screen flex overflow-hidden">
|
||||
|
||||
<!-- 左侧导航栏 -->
|
||||
<aside class="w-64 bg-slate-900 text-white flex flex-col h-full flex-shrink-0 shadow-xl z-20">
|
||||
<div class="h-16 flex items-center px-6 border-b border-slate-800">
|
||||
<i class="fa-solid fa-notes-medical text-blue-400 text-xl mr-3"></i>
|
||||
<span class="text-lg font-bold tracking-wide">AI Clinical</span>
|
||||
</div>
|
||||
<div class="p-4 text-xs font-semibold text-slate-400 uppercase tracking-wider">业务模块</div>
|
||||
<nav class="flex-1 px-3 space-y-1">
|
||||
<a href="#" class="flex items-center px-3 py-2.5 bg-blue-600/20 text-blue-400 rounded-lg group transition-colors">
|
||||
<i class="fa-solid fa-magnifying-glass-chart w-6 text-center"></i>
|
||||
<span class="ml-2 font-medium">智能文献检索</span>
|
||||
</a>
|
||||
<a href="#" class="flex items-center px-3 py-2.5 text-slate-300 hover:bg-slate-800 hover:text-white rounded-lg transition-colors">
|
||||
<i class="fa-solid fa-filter w-6 text-center"></i><span class="ml-2">标题摘要初筛</span>
|
||||
</a>
|
||||
<a href="#" class="flex items-center px-3 py-2.5 text-slate-300 hover:bg-slate-800 hover:text-white rounded-lg transition-colors">
|
||||
<i class="fa-solid fa-file-pdf w-6 text-center"></i><span class="ml-2">全文复筛</span>
|
||||
</a>
|
||||
<a href="#" class="flex items-center px-3 py-2.5 text-slate-300 hover:bg-slate-800 hover:text-white rounded-lg transition-colors">
|
||||
<i class="fa-solid fa-chart-network w-6 text-center"></i><span class="ml-2">自动化证据合成</span>
|
||||
</a>
|
||||
</nav>
|
||||
<div class="p-4 border-t border-slate-800 flex items-center">
|
||||
<div class="w-8 h-8 rounded-full bg-blue-500 flex items-center justify-center text-white font-bold text-sm">DR</div>
|
||||
<div class="ml-3">
|
||||
<div class="text-sm font-medium">Dr. 研究员</div>
|
||||
<div class="text-xs text-slate-400">专业版用户</div>
|
||||
</div>
|
||||
</div>
|
||||
</aside>
|
||||
|
||||
<!-- 右侧工作区 -->
|
||||
<main class="flex-1 flex flex-col h-full relative overflow-hidden bg-bgBase">
|
||||
|
||||
<!-- 顶部 Header -->
|
||||
<header class="h-16 bg-panelBg shadow-sm flex items-center justify-between px-6 z-10 flex-shrink-0 relative">
|
||||
<h1 class="text-lg font-semibold text-gray-800">
|
||||
智能文献检索 (Deep Research)
|
||||
<span class="text-xs text-primary bg-blue-50 px-2 py-1 rounded ml-2 border border-blue-100">V4.2 极简引导版</span>
|
||||
</h1>
|
||||
</header>
|
||||
|
||||
<!-- 滚动主容器 -->
|
||||
<div class="flex-1 overflow-y-auto relative" id="scroll-container">
|
||||
|
||||
<!-- ================= 第一眼:落地页大搜索框 (Landing View) ================= -->
|
||||
<div id="landing-view" class="absolute inset-0 flex flex-col items-center justify-center px-6 pb-20 transition-all duration-500 z-10 bg-bgBase">
|
||||
|
||||
<!-- 徽标与标题 -->
|
||||
<div class="text-center mb-10">
|
||||
<div class="inline-flex items-center justify-center w-16 h-16 rounded-2xl bg-white text-primary mb-6 shadow-sm border border-blue-100">
|
||||
<i class="fa-solid fa-sparkles text-3xl"></i>
|
||||
</div>
|
||||
<h1 class="text-3xl md:text-4xl font-extrabold text-slate-800 mb-4 tracking-tight">
|
||||
AI Clinical <span class="text-transparent bg-clip-text bg-gradient-to-r from-blue-600 to-cyan-500">Deep Research</span>
|
||||
</h1>
|
||||
<p class="text-slate-500 text-lg">今天想探索什么临床问题?让 AI 为您构建专业的循证医学检索方案。</p>
|
||||
</div>
|
||||
|
||||
<!-- 核心大搜索框 -->
|
||||
<div class="w-full max-w-3xl relative group">
|
||||
<div class="absolute inset-y-0 left-0 pl-5 flex items-center pointer-events-none">
|
||||
<i class="fa-solid fa-magnifying-glass text-gray-400 text-lg group-focus-within:text-primary transition-colors"></i>
|
||||
</div>
|
||||
<input type="text" id="landing-input" class="w-full pl-14 pr-36 py-4 rounded-2xl border-2 border-white bg-white focus:border-primary focus:ring-4 focus:ring-primary/10 shadow-lg text-lg transition-all outline-none text-slate-700" placeholder="例如:他汀类药物预防心血管疾病的高质量临床研究..." onkeydown="if(event.key === 'Enter') proceedToSetup()">
|
||||
<button onclick="proceedToSetup()" class="absolute right-2 top-2 bottom-2 bg-primary hover:bg-primaryHover text-white px-6 rounded-xl font-medium transition-colors flex items-center shadow-md">
|
||||
开始研究 <i class="fa-solid fa-arrow-right ml-2"></i>
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<!-- 推荐预置词 -->
|
||||
<div class="mt-8 flex flex-wrap justify-center gap-3 max-w-2xl items-center">
|
||||
<span class="text-sm text-slate-400">试着探索:</span>
|
||||
<button onclick="fillLanding('他汀类药物预防心血管疾病的Meta分析系统测试语料库,重点寻找高质量的临床研究文献(包含RCT、队列研究、Meta分析),必须包含可下载的PDF全文。')" class="text-sm bg-white border border-slate-200 hover:border-blue-300 hover:bg-blue-50 text-slate-600 hover:text-primary px-4 py-1.5 rounded-full transition-all shadow-sm">
|
||||
他汀类心血管一级预防
|
||||
</button>
|
||||
<button onclick="fillLanding('SGLT2抑制剂对比安慰剂在射血分数降低的心力衰竭 (HFrEF) 患者中的疗效与安全性,需要最新的RCT。')" class="text-sm bg-white border border-slate-200 hover:border-blue-300 hover:bg-blue-50 text-slate-600 hover:text-primary px-4 py-1.5 rounded-full transition-all shadow-sm">
|
||||
SGLT2 抑制剂治疗 HFrEF
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- ================= 工作台流容器 (初始隐藏) ================= -->
|
||||
<div id="workflow-container" class="max-w-5xl mx-auto py-8 pb-32 hidden">
|
||||
|
||||
<!-- ================= Step 1: 检索立项配置 (继承自Landing输入) ================= -->
|
||||
<section id="step1-setup" class="reveal-section bg-panelBg rounded-xl shadow-sm border border-gray-200 p-8">
|
||||
<div class="flex items-center mb-6">
|
||||
<div class="w-10 h-10 bg-blue-50 text-primary rounded-lg flex items-center justify-center mr-4">
|
||||
<i class="fa-solid fa-1 text-xl"></i>
|
||||
</div>
|
||||
<div>
|
||||
<h2 class="text-xl font-bold text-gray-800">核对研究问题与范围</h2>
|
||||
<p class="text-sm text-gray-500 mt-1">确认您的初步想法并配置检索边界,AI将为您扩写为自然语言深搜指令。</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="space-y-6">
|
||||
<!-- 搜索框 (动态填充) -->
|
||||
<div>
|
||||
<textarea id="search-input" rows="3" class="w-full px-4 py-3 rounded-lg border border-gray-300 focus:outline-none focus:ring-2 focus:ring-primary/50 focus:border-primary resize-none text-base shadow-sm font-medium text-gray-700"></textarea>
|
||||
</div>
|
||||
|
||||
<!-- 基础配置 -->
|
||||
<div class="grid grid-cols-1 md:grid-cols-2 gap-6">
|
||||
<div class="p-5 bg-gray-50/50 rounded-lg border border-gray-100">
|
||||
<h3 class="text-sm font-semibold text-gray-700 mb-3"><i class="fa-solid fa-database mr-2 text-primary"></i>目标数据源</h3>
|
||||
<div class="flex flex-wrap gap-3">
|
||||
<label class="flex items-center px-3 py-1.5 bg-white border border-blue-200 rounded-md cursor-pointer hover:bg-blue-50 transition-colors">
|
||||
<input type="checkbox" checked class="text-primary rounded border-gray-300 focus:ring-primary w-4 h-4">
|
||||
<span class="ml-2 text-sm text-gray-700 font-medium">PubMed / PMC</span>
|
||||
</label>
|
||||
<label class="flex items-center px-3 py-1.5 bg-white border border-blue-200 rounded-md cursor-pointer hover:bg-blue-50 transition-colors">
|
||||
<input type="checkbox" checked class="text-primary rounded border-gray-300 focus:ring-primary w-4 h-4">
|
||||
<span class="ml-2 text-sm text-gray-700 font-medium">BMJ Open / Lancet (OA)</span>
|
||||
</label>
|
||||
<label class="flex items-center px-3 py-1.5 bg-white border border-gray-200 rounded-md cursor-pointer hover:bg-gray-50 transition-colors">
|
||||
<input type="checkbox" checked class="text-primary rounded border-gray-300 focus:ring-primary w-4 h-4">
|
||||
<span class="ml-2 text-sm text-gray-700">Cochrane Library</span>
|
||||
</label>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="p-5 bg-gray-50/50 rounded-lg border border-gray-100">
|
||||
<h3 class="text-sm font-semibold text-gray-700 mb-3"><i class="fa-solid fa-sliders mr-2 text-primary"></i>高级过滤</h3>
|
||||
<div class="grid grid-cols-2 gap-4">
|
||||
<div>
|
||||
<label class="block text-xs text-gray-500 mb-1">发表年份</label>
|
||||
<select class="w-full text-sm border-gray-300 rounded-md py-1.5 pl-2 pr-6 border bg-white focus:ring-primary focus:border-primary">
|
||||
<option>2010 至今</option>
|
||||
<option>过去 5 年</option>
|
||||
<option>不限</option>
|
||||
</select>
|
||||
</div>
|
||||
<div>
|
||||
<label class="block text-xs text-gray-500 mb-1">目标文献数</label>
|
||||
<select class="w-full text-sm border-gray-300 rounded-md py-1.5 pl-2 pr-6 border bg-white focus:ring-primary focus:border-primary">
|
||||
<option>~100 篇 (测试集)</option>
|
||||
<option>全面检索 (不限)</option>
|
||||
</select>
|
||||
</div>
|
||||
<div class="col-span-2 flex items-center mt-1">
|
||||
<label class="flex items-center cursor-pointer">
|
||||
<input type="checkbox" checked class="text-primary rounded border-gray-300 focus:ring-primary w-4 h-4">
|
||||
<span class="ml-2 text-sm text-gray-700 font-medium text-green-600">强制要求:仅限可获取免费全文 (PDF Open Access)</span>
|
||||
</label>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- 动作按钮 -->
|
||||
<div class="flex justify-end pt-2">
|
||||
<button id="btn-generate" class="bg-primary hover:bg-primaryHover text-white px-6 py-2.5 rounded-lg text-sm font-medium transition-all shadow-md hover:shadow-lg flex items-center" onclick="revealStep2()">
|
||||
<i class="fa-solid fa-wand-magic-sparkles mr-2"></i> 解析并生成检索需求书
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<!-- 连接线 1 -->
|
||||
<div id="conn-1" class="workflow-connector hidden my-2"></div>
|
||||
|
||||
<!-- ================= Step 2: 检索策略确认 (HITL) ================= -->
|
||||
<section id="step2-strategy" class="hidden reveal-section bg-panelBg rounded-xl shadow-sm border-2 border-blue-200 p-8 relative overflow-hidden">
|
||||
<div class="absolute top-0 right-0 w-32 h-32 bg-blue-50 rounded-bl-full -z-10 opacity-50"></div>
|
||||
|
||||
<div class="flex items-center justify-between mb-6">
|
||||
<div class="flex items-center">
|
||||
<div class="w-10 h-10 bg-blue-100 text-primary rounded-lg flex items-center justify-center mr-4">
|
||||
<i class="fa-solid fa-2 text-xl"></i>
|
||||
</div>
|
||||
<div>
|
||||
<h2 class="text-xl font-bold text-gray-800">检索需求确认与完善 (人机协同)</h2>
|
||||
<p class="text-sm text-gray-500 mt-1">请核对 AI 扩写的检索指令。确认无误后,系统将把这段大白话交由 Unifuncs 引擎执行。</p>
|
||||
</div>
|
||||
</div>
|
||||
<div class="bg-orange-50 text-orange-600 border border-orange-200 px-3 py-1 rounded text-xs font-medium flex items-center animate-pulse">
|
||||
<i class="fa-solid fa-hand-pointer mr-1.5"></i> 待您确认与修改
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="grid grid-cols-1 lg:grid-cols-3 gap-6">
|
||||
<!-- 左侧:AI 解析摘要 -->
|
||||
<div class="lg:col-span-1 space-y-3">
|
||||
<h3 class="text-sm font-semibold text-gray-700 flex items-center"><i class="fa-solid fa-lightbulb text-primary mr-2"></i>核心意图提炼</h3>
|
||||
<div class="bg-gray-50 p-4 rounded-lg border border-gray-200 text-sm text-gray-700 space-y-4">
|
||||
<div>
|
||||
<span class="text-gray-400 font-bold block mb-1">🎯 核心目标</span>
|
||||
为Meta分析构建测试语料库
|
||||
</div>
|
||||
<div class="border-t border-gray-200 pt-3">
|
||||
<span class="text-blue-500 font-bold block mb-1">💊 干预 / 疾病</span>
|
||||
他汀类药物 (Statins) / 心血管疾病 (CVD)
|
||||
</div>
|
||||
<div class="border-t border-gray-200 pt-3">
|
||||
<span class="text-purple-500 font-bold block mb-1">📚 文献标准</span>
|
||||
要求有PDF全文、高质量临床研究 (RCT/队列/Meta分析)
|
||||
</div>
|
||||
</div>
|
||||
<div class="bg-green-50 border border-green-200 p-3 rounded-lg flex justify-between items-center mt-2">
|
||||
<span class="text-xs text-gray-600">目标完成率预估: <span class="font-bold text-green-600">极高 (可获100+篇)</span></span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- 右侧:自然语言指令编辑器 -->
|
||||
<div class="lg:col-span-2 flex flex-col">
|
||||
<div class="flex items-center justify-between mb-2">
|
||||
<h3 class="text-sm font-semibold text-gray-700 flex items-center"><i class="fa-solid fa-file-lines text-primary mr-2"></i>深度检索指令书 (可直接编辑)</h3>
|
||||
<button class="text-xs text-gray-500 hover:text-primary transition-colors"><i class="fa-solid fa-rotate-left mr-1"></i>重置</button>
|
||||
</div>
|
||||
<textarea class="flex-1 w-full p-5 text-sm leading-relaxed text-gray-800 bg-white rounded-lg border border-gray-300 focus:ring-2 focus:ring-primary shadow-inner resize-y min-h-[220px]" spellcheck="false">请帮我执行一次深度的医学文献检索,目标是构建一个高质量的测试语料库。以下是具体的检索要求:
|
||||
|
||||
【核心主题】
|
||||
评估他汀类药物(Statins)预防心血管疾病(CVD)的疗效与安全性。包含一级预防与二级预防。
|
||||
|
||||
【目标文献类型】
|
||||
- 随机对照试验(RCT)
|
||||
- 前瞻性队列研究(Cohort Study)
|
||||
- 现有的系统综述与Meta分析(Systematic Review & Meta-analysis)
|
||||
|
||||
【强制数据要求】
|
||||
- 必须是开放获取(Open Access)的文献,且必须能直接下载 PDF 全文。
|
||||
- 文章中最好包含标准的基线特征表(Table 1)以及统计学结局数据(HR/OR/RR值及95% CI)。</textarea>
|
||||
<p class="text-xs text-gray-400 mt-2"><i class="fa-solid fa-circle-info mr-1"></i>您可以像写邮件一样在这里补充任何大白话要求。</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- 动作按钮 -->
|
||||
<div class="flex justify-end pt-6 mt-4 border-t border-gray-100 gap-3">
|
||||
<button id="btn-execute" class="bg-primary hover:bg-primaryHover text-white px-8 py-2.5 rounded-lg text-sm font-medium transition-all shadow-md flex items-center" onclick="revealStep3()">
|
||||
<i class="fa-solid fa-rocket mr-2"></i> 确认需求,启动 Deep Research
|
||||
</button>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<!-- 连接线 2 -->
|
||||
<div id="conn-2" class="workflow-connector hidden my-2"></div>
|
||||
|
||||
<!-- ================= Step 3: Deep Research AI Agent 执行终端 ================= -->
|
||||
<section id="step3-terminal" class="hidden reveal-section">
|
||||
<div class="bg-terminalBg rounded-xl shadow-2xl border border-slate-700 overflow-hidden flex flex-col h-[550px]">
|
||||
<div class="bg-terminalHeader px-4 py-3 flex items-center justify-between border-b border-slate-700 shrink-0">
|
||||
<div class="flex space-x-2">
|
||||
<div class="w-3 h-3 rounded-full bg-red-500"></div>
|
||||
<div class="w-3 h-3 rounded-full bg-yellow-500"></div>
|
||||
<div class="w-3 h-3 rounded-full bg-green-500"></div>
|
||||
</div>
|
||||
<div class="text-xs text-slate-400 font-mono flex items-center">
|
||||
<i class="fa-solid fa-microchip mr-2 text-blue-400"></i> Deep Research Agent - 实时执行日志
|
||||
</div>
|
||||
<div id="terminal-status" class="text-xs font-mono text-green-400 flex items-center">
|
||||
<span class="w-2 h-2 rounded-full bg-green-400 mr-2 animate-pulse-fast"></span> Running
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div id="terminal-body" class="p-6 flex-1 overflow-y-auto terminal-scroll space-y-4 font-mono text-sm">
|
||||
<div class="text-slate-400">>> Initializing pg-boss worker for Deep Research... OK</div>
|
||||
<div class="text-slate-400">>> Sending natural language requirements to Unifuncs API... OK</div>
|
||||
<div class="text-slate-400">>> Starting autonomous research loop. Target: 100 high-quality PDFs.</div>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<!-- 连接线 3 -->
|
||||
<div id="conn-3" class="workflow-connector hidden my-2"></div>
|
||||
|
||||
<!-- ================= Step 4: 最终交付结果 ================= -->
|
||||
<section id="step4-results" class="hidden reveal-section space-y-6">
|
||||
<div class="bg-green-50 border border-green-200 rounded-xl p-4 flex items-center justify-between shadow-sm">
|
||||
<div class="flex items-center">
|
||||
<div class="w-10 h-10 bg-green-100 text-green-600 rounded-full flex items-center justify-center mr-4">
|
||||
<i class="fa-solid fa-check text-xl"></i>
|
||||
</div>
|
||||
<div>
|
||||
<h2 class="text-lg font-bold text-gray-800">Deep Research 任务圆满完成</h2>
|
||||
<p class="text-xs text-gray-600 mt-0.5">历时 3 分 12 秒,成功提取并验证 <span class="font-bold">103</span> 篇高质量 OA 文献,并已生成智能综合报告。</p>
|
||||
</div>
|
||||
</div>
|
||||
<div class="flex space-x-3">
|
||||
<button class="px-4 py-2 bg-white text-gray-700 hover:text-primary hover:border-primary border border-gray-300 rounded-lg text-sm font-medium transition-colors shadow-sm flex items-center">
|
||||
<i class="fa-solid fa-share-from-square mr-2"></i> 推送至 ASL 初筛池
|
||||
</button>
|
||||
<button class="px-4 py-2 bg-primary text-white hover:bg-primaryHover rounded-lg text-sm font-medium transition-colors shadow-sm flex items-center">
|
||||
<i class="fa-solid fa-download mr-2"></i> 导出科研结果资产 <i class="fa-solid fa-chevron-down ml-2 text-[10px]"></i>
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="grid grid-cols-1 md:grid-cols-3 gap-6">
|
||||
<div class="bg-panelBg rounded-xl p-5 border border-gray-200 shadow-sm flex flex-col justify-center">
|
||||
<div class="text-sm font-medium text-gray-500 mb-1">高质量文献库构建</div>
|
||||
<div class="text-4xl font-bold text-gray-800 mb-4">103<span class="text-sm text-gray-400 font-normal ml-1">篇</span></div>
|
||||
<div class="space-y-2">
|
||||
<div class="flex justify-between text-xs">
|
||||
<span class="text-gray-500"><i class="fa-solid fa-unlock text-green-500 w-4"></i> PDF 成功获取率</span>
|
||||
<span class="font-medium text-green-600">100%</span>
|
||||
</div>
|
||||
<div class="flex justify-between text-xs">
|
||||
<span class="text-gray-500"><i class="fa-solid fa-vial text-purple-500 w-4"></i> RCT & 队列研究</span>
|
||||
<span class="font-medium text-gray-700">78 篇</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="bg-panelBg rounded-xl p-5 border border-gray-200 shadow-sm">
|
||||
<h4 class="text-xs font-semibold text-gray-500 uppercase mb-3">文献来源分布</h4>
|
||||
<div class="relative h-32 w-full"><canvas id="sourceChart"></canvas></div>
|
||||
</div>
|
||||
<div class="bg-panelBg rounded-xl p-5 border border-gray-200 shadow-sm">
|
||||
<h4 class="text-xs font-semibold text-gray-500 uppercase mb-3">发表年份趋势</h4>
|
||||
<div class="relative h-32 w-full"><canvas id="yearChart"></canvas></div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="bg-panelBg rounded-xl border border-gray-200 shadow-sm overflow-hidden flex flex-col" style="min-height: 500px;">
|
||||
<div class="flex px-6 pt-4 border-b border-gray-200 bg-gray-50/50">
|
||||
<button class="pb-3 px-4 tab-active text-sm flex items-center transition-colors" onclick="switchTab('list')">
|
||||
<i class="fa-solid fa-list-ul mr-2"></i>核心文献清单 (103)
|
||||
</button>
|
||||
<button class="pb-3 px-4 tab-inactive text-sm flex items-center relative transition-colors" onclick="switchTab('report')">
|
||||
<i class="fa-solid fa-file-signature mr-2"></i>智能综合报告 (AI生成)
|
||||
</button>
|
||||
</div>
|
||||
<div id="tab-list" class="flex-1 overflow-auto bg-white">
|
||||
<table class="w-full text-left text-sm text-gray-600">
|
||||
<thead class="bg-gray-50 text-gray-700 text-xs uppercase border-b border-gray-200 sticky top-0 z-10">
|
||||
<tr>
|
||||
<th class="px-6 py-4 font-semibold">文献信息 (Title / Authors / Journal)</th>
|
||||
<th class="px-6 py-4 font-semibold w-24">年份</th>
|
||||
<th class="px-6 py-4 font-semibold w-32">研究类型</th>
|
||||
<th class="px-6 py-4 font-semibold w-32">获取状态</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody class="divide-y divide-gray-100">
|
||||
<tr class="hover:bg-blue-50/30 transition-colors">
|
||||
<td class="px-6 py-4">
|
||||
<div class="font-medium text-primary hover:underline cursor-pointer text-base">Efficacy and safety of cholesterol-lowering treatment: prospective meta-analysis</div>
|
||||
<div class="text-xs text-gray-500 mt-1">Cholesterol Treatment Trialists' (CTT) Collaborators. <span class="text-gray-800 font-medium">The Lancet</span></div>
|
||||
</td>
|
||||
<td class="px-6 py-4">2010</td>
|
||||
<td class="px-6 py-4"><span class="px-2 py-1 bg-purple-50 text-purple-600 rounded text-xs border border-purple-200">Meta-analysis</span></td>
|
||||
<td class="px-6 py-4"><span class="text-green-600 text-xs font-medium flex items-center bg-green-50 px-2 py-1 rounded border border-green-100 w-max"><i class="fa-solid fa-file-pdf mr-1.5"></i>PDF 已缓存</span></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
<div id="tab-report" class="hidden flex-1 overflow-auto p-10 bg-white prose prose-slate max-w-none">
|
||||
<h2 class="text-2xl font-bold text-gray-900 border-b pb-4 mb-6">他汀类药物在心血管疾病预防中的应用</h2>
|
||||
<p class="text-sm text-gray-600">本报告基于此次 Deep Research 提取的 103 篇高质量文献摘要,由 AI 自动交叉验证并总结生成。</p>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</main>
|
||||
|
||||
<!-- 交互逻辑 -->
|
||||
<script>
|
||||
// 辅助工具:填充测试用例
|
||||
function fillLanding(text) {
|
||||
document.getElementById('landing-input').value = text;
|
||||
}
|
||||
|
||||
// 辅助工具:平滑滚动
|
||||
function scrollToElement(id) {
|
||||
setTimeout(() => {
|
||||
const el = document.getElementById(id);
|
||||
el.scrollIntoView({ behavior: 'smooth', block: 'start' });
|
||||
}, 50);
|
||||
}
|
||||
|
||||
// 核心交互一:从落地页进入工作台 (Landing -> Step 1)
|
||||
function proceedToSetup() {
|
||||
const inputVal = document.getElementById('landing-input').value.trim();
|
||||
if (!inputVal) {
|
||||
const inputEl = document.getElementById('landing-input');
|
||||
inputEl.classList.add('border-red-400');
|
||||
setTimeout(() => inputEl.classList.remove('border-red-400'), 1000);
|
||||
return;
|
||||
}
|
||||
|
||||
// 1. 将落地页的输入转移到工作台的输入框中
|
||||
document.getElementById('search-input').value = inputVal;
|
||||
|
||||
// 2. 隐藏落地页 (执行消失动画)
|
||||
const landing = document.getElementById('landing-view');
|
||||
landing.classList.add('landing-fade-out');
|
||||
|
||||
// 3. 动画结束后,真正隐藏 Landing,并显示 Workflow
|
||||
setTimeout(() => {
|
||||
landing.classList.add('hidden');
|
||||
|
||||
const workflow = document.getElementById('workflow-container');
|
||||
workflow.classList.remove('hidden');
|
||||
|
||||
// 触发内部 Step 1 的显示动画
|
||||
const step1 = document.getElementById('step1-setup');
|
||||
void step1.offsetWidth; // Trigger reflow
|
||||
step1.classList.add('reveal-visible');
|
||||
}, 400); // 匹配 CSS duration-500 左右的时间
|
||||
}
|
||||
|
||||
// 核心交互二:生成需求 (Step 1 -> Step 2)
|
||||
function revealStep2() {
|
||||
const btn = document.getElementById('btn-generate');
|
||||
btn.innerHTML = '<i class="fa-solid fa-check mr-2"></i> 需求已生成';
|
||||
btn.classList.replace('bg-primary', 'bg-green-500');
|
||||
btn.classList.replace('hover:bg-primaryHover', 'hover:bg-green-600');
|
||||
btn.disabled = true;
|
||||
|
||||
setTimeout(() => {
|
||||
const conn1 = document.getElementById('conn-1');
|
||||
conn1.classList.remove('hidden');
|
||||
conn1.classList.add('active');
|
||||
|
||||
const step2 = document.getElementById('step2-strategy');
|
||||
step2.classList.remove('hidden');
|
||||
void step2.offsetWidth;
|
||||
step2.classList.add('reveal-visible');
|
||||
scrollToElement('step2-strategy');
|
||||
}, 500);
|
||||
}
|
||||
|
||||
// 核心交互三:执行深搜 (Step 2 -> Step 3)
|
||||
function revealStep3() {
|
||||
const btn = document.getElementById('btn-execute');
|
||||
btn.innerHTML = '<i class="fa-solid fa-lock mr-2"></i> 需求已锁定,执行中...';
|
||||
btn.disabled = true;
|
||||
|
||||
setTimeout(() => {
|
||||
const conn2 = document.getElementById('conn-2');
|
||||
conn2.classList.remove('hidden');
|
||||
conn2.classList.add('active');
|
||||
|
||||
const step3 = document.getElementById('step3-terminal');
|
||||
step3.classList.remove('hidden');
|
||||
void step3.offsetWidth;
|
||||
step3.classList.add('reveal-visible');
|
||||
scrollToElement('step3-terminal');
|
||||
|
||||
startAIExecutionLog();
|
||||
}, 500);
|
||||
}
|
||||
|
||||
// 模拟 AI Agent 思考过程
|
||||
function startAIExecutionLog() {
|
||||
const terminalBody = document.getElementById('terminal-body');
|
||||
const statusIndicator = document.getElementById('terminal-status');
|
||||
|
||||
const events = [
|
||||
{ delay: 1000, type: 'think', title: '任务理解 (Unifuncs Engine)', text: '已收到自然语言指令。目标:他汀类药物心血管疾病预防文献库,需PDF全文。' },
|
||||
{ delay: 1500, type: 'action', title: 'Action: Search', text: '> executing search across PubMed & PMC...' },
|
||||
{ delay: 1500, type: 'done', title: '搜索轮次完成', text: '' },
|
||||
{ delay: 1000, type: 'think', title: '过滤阶段', text: '正在过滤非 RCT 与非开放获取 (OA) 资源...' },
|
||||
{ delay: 1500, type: 'action', title: 'Action: Scrape', text: '> extracting PDF links and Abstract data [103/103] OK.' },
|
||||
{ delay: 1000, type: 'summary', title: '初步发现汇总', text: '✅ 已成功获取 103 篇高质量目标文献。开始生成综述报告。' },
|
||||
{ delay: 2000, type: 'finish', title: '', text: '' }
|
||||
];
|
||||
|
||||
let cumulativeDelay = 0;
|
||||
events.forEach((event) => {
|
||||
cumulativeDelay += event.delay;
|
||||
setTimeout(() => {
|
||||
if (event.type === 'finish') {
|
||||
statusIndicator.innerHTML = '<span class="w-2 h-2 rounded-full bg-slate-500 mr-2"></span> Finished';
|
||||
statusIndicator.className = "text-xs font-mono text-slate-400 flex items-center";
|
||||
revealStep4();
|
||||
return;
|
||||
}
|
||||
|
||||
const entryDiv = document.createElement('div');
|
||||
entryDiv.className = 'opacity-0 transform translate-y-2 transition-all duration-300';
|
||||
|
||||
if (event.type === 'think') {
|
||||
entryDiv.innerHTML = `<div class="flex items-start"><i class="fa-solid fa-brain mt-1 mr-3 text-purple-400 w-4"></i><div class="flex-1 border-l-2 border-slate-700 pl-3"><div class="text-purple-400 font-bold mb-1">[Thinking] ${event.title}</div><div class="text-slate-300">${event.text}</div></div></div>`;
|
||||
} else if (event.type === 'action') {
|
||||
entryDiv.innerHTML = `<div class="flex items-start bg-slate-800 p-3 rounded border border-slate-700"><i class="fa-solid fa-terminal mt-1 mr-3 text-blue-400 w-4"></i><div class="flex-1"><div class="text-blue-400 font-bold mb-1">${event.title}</div><div class="text-green-400">${event.text}</div></div></div>`;
|
||||
} else if (event.type === 'done') {
|
||||
entryDiv.innerHTML = `<div class="flex items-start"><i class="fa-solid fa-check mt-1 mr-3 text-green-500 w-4"></i><div class="text-green-500 font-bold">${event.title}</div></div>`;
|
||||
} else if (event.type === 'summary') {
|
||||
entryDiv.innerHTML = `<div class="flex items-start mt-4 bg-slate-800/50 p-4 border border-slate-600 rounded"><i class="fa-solid fa-flag-checkered mt-1 mr-3 text-yellow-400 w-4 text-lg"></i><div class="flex-1"><div class="text-yellow-400 font-bold mb-2">${event.title}</div><div class="text-slate-300">${event.text}</div></div></div>`;
|
||||
}
|
||||
|
||||
terminalBody.appendChild(entryDiv);
|
||||
void entryDiv.offsetWidth;
|
||||
entryDiv.classList.remove('opacity-0', 'translate-y-2');
|
||||
terminalBody.scrollTop = terminalBody.scrollHeight;
|
||||
|
||||
}, cumulativeDelay);
|
||||
});
|
||||
}
|
||||
|
||||
// 第 3 步 -> 第 4 步
|
||||
function revealStep4() {
|
||||
const conn3 = document.getElementById('conn-3');
|
||||
conn3.classList.remove('hidden');
|
||||
conn3.classList.add('active');
|
||||
|
||||
const step4 = document.getElementById('step4-results');
|
||||
step4.classList.remove('hidden');
|
||||
void step4.offsetWidth;
|
||||
step4.classList.add('reveal-visible');
|
||||
|
||||
scrollToElement('step4-results');
|
||||
setTimeout(renderCharts, 300);
|
||||
}
|
||||
|
||||
// Tab 切换
|
||||
function switchTab(tab) {
|
||||
const btnList = document.querySelector('button[onclick="switchTab(\'list\')"]');
|
||||
const btnReport = document.querySelector('button[onclick="switchTab(\'report\')"]');
|
||||
const contentList = document.getElementById('tab-list');
|
||||
const contentReport = document.getElementById('tab-report');
|
||||
|
||||
if (tab === 'list') {
|
||||
btnList.className = 'pb-3 px-4 tab-active text-sm flex items-center transition-colors';
|
||||
btnReport.className = 'pb-3 px-4 tab-inactive text-sm flex items-center relative transition-colors';
|
||||
contentList.classList.remove('hidden');
|
||||
contentReport.classList.add('hidden');
|
||||
} else {
|
||||
btnReport.className = 'pb-3 px-4 tab-active text-sm flex items-center relative transition-colors';
|
||||
btnList.className = 'pb-3 px-4 tab-inactive text-sm flex items-center transition-colors';
|
||||
contentReport.classList.remove('hidden');
|
||||
contentList.classList.add('hidden');
|
||||
}
|
||||
}
|
||||
|
||||
// 渲染图表
|
||||
function renderCharts() {
|
||||
if(window.chartsRendered) return;
|
||||
new Chart(document.getElementById('sourceChart').getContext('2d'), { type: 'doughnut', data: { labels: ['PMC', 'BMJ Open', 'Cochrane', 'Other OA'], datasets: [{ data: [68, 15, 8, 12], backgroundColor: ['#1677ff', '#52c41a', '#faad14', '#13c2c2'], borderWidth: 0 }] }, options: { responsive: true, maintainAspectRatio: false, plugins: { legend: { position: 'right', labels: { boxWidth: 10, font: { size: 10 } } } }, cutout: '65%' } });
|
||||
new Chart(document.getElementById('yearChart').getContext('2d'), { type: 'bar', data: { labels: ['2010-12', '2013-15', '2016-18', '2019-21', '2022-24'], datasets: [{ label: '纳入文献数', data: [15, 22, 28, 25, 13], backgroundColor: '#69b1ff', borderRadius: 3 }] }, options: { responsive: true, maintainAspectRatio: false, plugins: { legend: { display: false } }, scales: { x: { grid: { display: false } }, y: { beginAtZero: true, grid: { borderDash: [2, 4], color: '#f0f0f0' } } } } });
|
||||
window.chartsRendered = true;
|
||||
}
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
624
docs/03-业务模块/ASL-AI智能文献/04-开发计划/07-Deep Research V2.0 开发计划.md
Normal file
624
docs/03-业务模块/ASL-AI智能文献/04-开发计划/07-Deep Research V2.0 开发计划.md
Normal file
@@ -0,0 +1,624 @@
|
||||
# Deep Research V2.0 开发计划
|
||||
|
||||
> **文档版本:** v1.0
|
||||
> **创建日期:** 2026-02-22
|
||||
> **维护者:** 开发团队
|
||||
> **前置文档:** PRD V4.1 / 原型图 V4.2 / 技术设计 V4.1
|
||||
> **预计工期:** 5 天
|
||||
> **核心理念:** 单页瀑布流 + 自然语言需求扩写 + 异步执行 + 务实结果展示
|
||||
|
||||
---
|
||||
|
||||
## 1. 升级概述
|
||||
|
||||
### 1.1 V1.x → V2.0 变化总结
|
||||
|
||||
| 维度 | V1.x (当前) | V2.0 (目标) |
|
||||
|------|------------|------------|
|
||||
| **交互模式** | 单输入框 → 直接搜索 | 四步瀑布流 Landing → 配置 → HITL 确认 → 终端 → 结果 |
|
||||
| **需求理解** | 用户原文直传 unifuncs | 内置 LLM 需求扩写 + 用户人工核验修改 |
|
||||
| **API 协议** | OpenAI 兼容(SSE 流式) | **Unifuncs 异步模式**(create_task + query_task 轮询) |
|
||||
| **执行展示** | 混合文字流(打字机效果) | 暗黑终端 + 分类结构化日志(每 3-5s 弹出一条) |
|
||||
| **结果展示** | PubMed 链接列表 | 综合报告(Markdown)+ 文献清单表格 + Word 导出 |
|
||||
| **可靠性** | 离开页面任务丢失 | pg-boss 队列,离开页面任务继续,回来可恢复 |
|
||||
|
||||
### 1.2 设计决策记录
|
||||
|
||||
| 决策 | 选择 | 理由 |
|
||||
|------|------|------|
|
||||
| SSE vs 异步 | **异步模式** | Deep Research 任务 3-10 分钟,SSE 连接不稳定;异步模式用户可离开回来,可靠性远高于 SSE |
|
||||
| 异步下的实时性 | **Worker 5s 轮询 + 前端 3s 轮询** | 用户每 3-5s 看到一条新日志,对分钟级 Agent 任务来说体验自然,比逐字流更适合终端 UI |
|
||||
| 结果展示复杂度 | **报告 + 表格,不做图表看板** | 研究人员要的是内容本身(综合报告 + 文献清单),图表是锦上添花非刚需,MVP 不做 |
|
||||
| Word 导出 | **复用 Pandoc** | Protocol Agent 已验证 Pandoc → Word 方案,零额外依赖 |
|
||||
|
||||
---
|
||||
|
||||
## 2. 系统数据流
|
||||
|
||||
```
|
||||
┌──────────────────────────────────────────────────────────────────────┐
|
||||
│ Step 1-2: 需求扩写(同步,本系统内部) │
|
||||
│ │
|
||||
│ 前端 Landing/Setup ──POST──→ Node.js ──LLMFactory──→ DeepSeek-V3 │
|
||||
│ original_query "需求扩写Prompt" │
|
||||
│ │
|
||||
│ 返回:taskId + generatedRequirement(结构化自然语言检索指令书) │
|
||||
│ 前端展示指令书,用户可编辑修改 │
|
||||
└──────────────────────────────────────────────────────────────────────┘
|
||||
↓ 用户确认
|
||||
┌──────────────────────────────────────────────────────────────────────┐
|
||||
│ Step 3: 异步执行(pg-boss + Unifuncs 异步 API) │
|
||||
│ │
|
||||
│ 前端 ──PUT──→ Node.js ──pg-boss push──→ Worker │
|
||||
│ confirmed_requirement │
|
||||
│ │
|
||||
│ Worker: │
|
||||
│ 1. POST unifuncs/v1/create_task(传入 confirmed_requirement) │
|
||||
│ 2. 每 5s GET unifuncs/v1/query_task │
|
||||
│ 3. 解析 reasoning_content → 增量日志写 DB (execution_logs) │
|
||||
│ 4. 完成后解析 content → synthesis_report + result_list │
|
||||
│ │
|
||||
│ 前端每 3s GET /tasks/:id → 渲染 execution_logs 到暗黑终端 │
|
||||
└──────────────────────────────────────────────────────────────────────┘
|
||||
↓ status === 'completed'
|
||||
┌──────────────────────────────────────────────────────────────────────┐
|
||||
│ Step 4: 结果展示(读 DB 渲染) │
|
||||
│ │
|
||||
│ 终端折叠 → 白底结果区展开 │
|
||||
│ ├── ✅ 完成横幅(一行 + 导出 Word 按钮) │
|
||||
│ ├── 📄 AI 综合报告(synthesis_report → Markdown 渲染) │
|
||||
│ └── 📋 文献清单表格(result_list → Ant Design Table) │
|
||||
└──────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 3. 数据库 Schema 变更
|
||||
|
||||
在现有 `AslResearchTask` 基础上**新增 6 个字段**,不删除任何现有字段(向后兼容)。
|
||||
|
||||
```prisma
|
||||
model AslResearchTask {
|
||||
// ── 现有字段(保留不动)──────────────────────────
|
||||
id String @id @default(uuid())
|
||||
projectId String @map("project_id")
|
||||
userId String @map("user_id")
|
||||
query String // 原始粗略输入(Step 1)
|
||||
filters Json? // 高级筛选配置
|
||||
externalTaskId String? @map("external_task_id") // unifuncs task_id
|
||||
status String @default("pending")
|
||||
errorMessage String? @map("error_message")
|
||||
resultCount Int? @map("result_count")
|
||||
rawResult String? @map("raw_result") @db.Text
|
||||
reasoningContent String? @map("reasoning_content") @db.Text
|
||||
literatures Json?
|
||||
tokenUsage Json? @map("token_usage")
|
||||
searchCount Int? @map("search_count")
|
||||
readCount Int? @map("read_count")
|
||||
iterations Int?
|
||||
createdAt DateTime @default(now()) @map("created_at")
|
||||
updatedAt DateTime @updatedAt @map("updated_at")
|
||||
completedAt DateTime? @map("completed_at")
|
||||
|
||||
// ── V2.0 新增字段 ──────────────────────────────
|
||||
targetSources Json? @map("target_sources") // 选中的数据源 ["pubmed.ncbi.nlm.nih.gov", ...]
|
||||
confirmedRequirement String? @map("confirmed_requirement") @db.Text // 用户核验后的自然语言检索指令书
|
||||
aiIntentSummary Json? @map("ai_intent_summary") // AI提炼的结构化摘要(左侧卡片用)
|
||||
executionLogs Json? @map("execution_logs") // 终端日志数组 [{type, title, text, timestamp}]
|
||||
synthesisReport String? @map("synthesis_report") @db.Text // AI综合报告(Markdown)
|
||||
resultList Json? @map("result_list") // 结构化文献元数据列表
|
||||
|
||||
// ── 索引(保留现有)────────────────────────────
|
||||
@@index([projectId], map: "idx_research_tasks_project_id")
|
||||
@@index([userId], map: "idx_research_tasks_user_id")
|
||||
@@index([status], map: "idx_research_tasks_status")
|
||||
@@index([createdAt], map: "idx_research_tasks_created_at")
|
||||
@@map("research_tasks")
|
||||
@@schema("asl_schema")
|
||||
}
|
||||
```
|
||||
|
||||
**Status 枚举扩展:**
|
||||
|
||||
| 状态 | 含义 | 触发时机 |
|
||||
|------|------|---------|
|
||||
| `draft` | 需求已扩写,等待用户确认 | POST /generate-requirement |
|
||||
| `pending` | 用户已确认,等待 Worker 拾取 | PUT /tasks/:id/execute |
|
||||
| `running` | Worker 已创建 unifuncs 任务,轮询中 | Worker 内部 |
|
||||
| `completed` | unifuncs 完成,结果已解析入库 | Worker 内部 |
|
||||
| `failed` | 执行失败 | Worker 内部 |
|
||||
|
||||
**迁移命令:**
|
||||
```bash
|
||||
npx prisma migrate dev --name add_deep_research_v2_fields
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 4. API 契约
|
||||
|
||||
### 4.1 需求扩写(同步)
|
||||
|
||||
**POST /api/v1/asl/research/generate-requirement**
|
||||
|
||||
```typescript
|
||||
// 请求
|
||||
{
|
||||
originalQuery: string, // "他汀预防心血管疾病,要能下载PDF的"
|
||||
targetSources: string[], // ["pubmed.ncbi.nlm.nih.gov", "bmjopen.bmj.com"]
|
||||
filters: {
|
||||
yearRange?: string, // "2010至今" | "过去5年" | "不限"
|
||||
targetCount?: string, // "~100篇" | "全面检索"
|
||||
requireOpenAccess?: boolean // true
|
||||
}
|
||||
}
|
||||
|
||||
// 响应
|
||||
{
|
||||
success: true,
|
||||
data: {
|
||||
taskId: "uuid", // 已创建DB记录(status=draft)
|
||||
generatedRequirement: "请帮我执行一次深度的医学文献检索...", // LLM扩写结果
|
||||
intentSummary: { // 结构化摘要
|
||||
objective: "为Meta分析构建测试语料库",
|
||||
intervention: "他汀类药物 (Statins)",
|
||||
condition: "心血管疾病 (CVD)",
|
||||
literatureStandard: "高质量临床研究,PDF全文可下载"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**实现要点:**
|
||||
- 调用 `LLMFactory.getAdapter('deepseek-v3')` 进行需求扩写
|
||||
- System Prompt 要求 LLM 输出结构化自然语言指令(非布尔检索式)
|
||||
- 同时创建 DB 记录,status = `draft`
|
||||
|
||||
### 4.2 启动执行(进入异步队列)
|
||||
|
||||
**PUT /api/v1/asl/research/tasks/:id/execute**
|
||||
|
||||
```typescript
|
||||
// 请求
|
||||
{
|
||||
confirmedRequirement: string // 用户核验修改后的最终指令书
|
||||
}
|
||||
|
||||
// 响应
|
||||
{ success: true }
|
||||
```
|
||||
|
||||
**实现要点:**
|
||||
- 更新 DB 的 `confirmed_requirement` 和 `target_sources`
|
||||
- `jobQueue.push('asl_deep_research_v2', { taskId })` 推入 pg-boss
|
||||
- status 更新为 `pending`
|
||||
|
||||
### 4.3 任务状态与日志轮询
|
||||
|
||||
**GET /api/v1/asl/research/tasks/:id**
|
||||
|
||||
```typescript
|
||||
// 响应
|
||||
{
|
||||
success: true,
|
||||
data: {
|
||||
taskId: "uuid",
|
||||
status: "running", // draft/pending/running/completed/failed
|
||||
executionLogs: [ // 终端日志(增量)
|
||||
{ type: "think", title: "任务理解", text: "已收到检索需求...", ts: "..." },
|
||||
{ type: "action", title: "Search", text: "executing search across PubMed...", ts: "..." },
|
||||
{ type: "done", title: "搜索轮次完成", text: "", ts: "..." },
|
||||
],
|
||||
progress: { current: 60, total: 100 },
|
||||
// 仅 completed 时有:
|
||||
synthesisReport: "## 研究背景\n他汀类药物...",
|
||||
resultList: [
|
||||
{ title: "...", authors: "...", journal: "...", year: 2010, type: "Meta-analysis", pmid: "...", doi: "...", pdfStatus: "OA" },
|
||||
],
|
||||
resultCount: 103,
|
||||
errorMessage: null
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 4.4 Word 导出
|
||||
|
||||
**GET /api/v1/asl/research/tasks/:id/export-word**
|
||||
|
||||
- 读取 DB 的 `synthesis_report`(Markdown)和 `result_list`(JSON)
|
||||
- 拼接为完整 Markdown(报告 + 文献清单表格)
|
||||
- 调用 Pandoc 转 Word
|
||||
- 返回 `.docx` 文件流
|
||||
|
||||
### 4.5 路由汇总
|
||||
|
||||
| 方法 | 路径 | 说明 | 新增/改造 |
|
||||
|------|------|------|----------|
|
||||
| POST | `/research/generate-requirement` | 需求扩写 | **新增** |
|
||||
| PUT | `/research/tasks/:id/execute` | 启动执行 | **新增** |
|
||||
| GET | `/research/tasks/:id` | 状态+日志+结果 | **改造** |
|
||||
| GET | `/research/tasks/:id/export-word` | Word 导出 | **新增** |
|
||||
| POST | `/research/stream` | V1 SSE(保留兼容) | 不动 |
|
||||
| POST | `/research/tasks` | V1 异步创建(保留) | 不动 |
|
||||
|
||||
---
|
||||
|
||||
## 5. 后台 Worker 逻辑
|
||||
|
||||
### 5.1 核心流程(伪代码)
|
||||
|
||||
```typescript
|
||||
// backend/src/modules/asl/workers/deepResearchV2Worker.ts
|
||||
|
||||
export async function processDeepResearchV2(job: Job) {
|
||||
const { taskId } = job.data;
|
||||
const task = await prisma.aslResearchTask.findUnique({ where: { id: taskId } });
|
||||
|
||||
// 1. 调用 Unifuncs 创建异步任务
|
||||
const unifuncsPayload = {
|
||||
model: "s2",
|
||||
messages: [{
|
||||
role: "user",
|
||||
content: `请根据以下详细检索需求执行深度研究:\n${task.confirmedRequirement}`
|
||||
}],
|
||||
introduction: buildIntroduction(),
|
||||
max_depth: 25,
|
||||
domain_scope: task.targetSources || ["https://pubmed.ncbi.nlm.nih.gov/"],
|
||||
domain_blacklist: ["wanfang.com", "cnki.net"],
|
||||
output_prompt: buildOutputPrompt(),
|
||||
reference_style: "link",
|
||||
generate_summary: true,
|
||||
};
|
||||
|
||||
const createRes = await unifuncsClient.createTask(unifuncsPayload);
|
||||
const unifuncsTaskId = createRes.data.task_id;
|
||||
|
||||
await prisma.aslResearchTask.update({
|
||||
where: { id: taskId },
|
||||
data: { externalTaskId: unifuncsTaskId, status: 'running' }
|
||||
});
|
||||
|
||||
// 2. 轮询 Unifuncs 直到完成
|
||||
let previousReasoning = '';
|
||||
const MAX_POLLS = 180; // 最多 15 分钟(180 × 5s)
|
||||
|
||||
for (let i = 0; i < MAX_POLLS; i++) {
|
||||
await sleep(5000);
|
||||
const queryRes = await unifuncsClient.queryTask(unifuncsTaskId);
|
||||
const data = queryRes.data;
|
||||
|
||||
// 解析增量日志
|
||||
const currentReasoning = data.result?.reasoning_content || '';
|
||||
if (currentReasoning.length > previousReasoning.length) {
|
||||
const increment = currentReasoning.slice(previousReasoning.length);
|
||||
const newLogs = parseReasoningToLogs(increment);
|
||||
await appendExecutionLogs(taskId, newLogs);
|
||||
previousReasoning = currentReasoning;
|
||||
}
|
||||
|
||||
// 同步进度
|
||||
if (data.progress) {
|
||||
// progress 信息可通过 executionLogs 的最后一条体现
|
||||
}
|
||||
|
||||
// 检查完成
|
||||
if (data.status === 'completed') {
|
||||
const content = data.result?.content || '';
|
||||
const report = extractSection(content, 'REPORT_SECTION');
|
||||
const jsonList = extractSection(content, 'JSON_LIST_SECTION');
|
||||
const parsedList = safeParseJsonList(jsonList);
|
||||
|
||||
await prisma.aslResearchTask.update({
|
||||
where: { id: taskId },
|
||||
data: {
|
||||
status: 'completed',
|
||||
rawResult: content,
|
||||
reasoningContent: currentReasoning,
|
||||
synthesisReport: report || content,
|
||||
resultList: parsedList,
|
||||
resultCount: parsedList?.length || 0,
|
||||
tokenUsage: data.statistics?.token_usage,
|
||||
searchCount: data.statistics?.search_count,
|
||||
readCount: data.statistics?.read_count,
|
||||
iterations: data.statistics?.iterations,
|
||||
completedAt: new Date(),
|
||||
}
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
if (data.status === 'failed') {
|
||||
throw new Error(data.result?.content || 'Unifuncs 任务失败');
|
||||
}
|
||||
}
|
||||
|
||||
throw new Error('任务超时(15分钟)');
|
||||
}
|
||||
```
|
||||
|
||||
### 5.2 日志解析逻辑
|
||||
|
||||
```typescript
|
||||
function parseReasoningToLogs(increment: string): LogEntry[] {
|
||||
const logs: LogEntry[] = [];
|
||||
const lines = increment.split('\n').filter(l => l.trim());
|
||||
|
||||
for (const line of lines) {
|
||||
if (line.includes('搜索') || line.includes('search') || line.includes('Search')) {
|
||||
logs.push({ type: 'action', title: 'Search', text: line.trim(), ts: new Date().toISOString() });
|
||||
} else if (line.includes('阅读') || line.includes('read') || line.includes('Read')) {
|
||||
logs.push({ type: 'action', title: 'Read', text: line.trim(), ts: new Date().toISOString() });
|
||||
} else if (line.includes('完成') || line.includes('成功') || line.includes('OK')) {
|
||||
logs.push({ type: 'done', title: '阶段完成', text: line.trim(), ts: new Date().toISOString() });
|
||||
} else if (line.includes('汇总') || line.includes('总结') || line.includes('发现')) {
|
||||
logs.push({ type: 'summary', title: '阶段总结', text: line.trim(), ts: new Date().toISOString() });
|
||||
} else if (line.trim().length > 10) {
|
||||
logs.push({ type: 'think', title: 'Thinking', text: line.trim(), ts: new Date().toISOString() });
|
||||
}
|
||||
}
|
||||
return logs;
|
||||
}
|
||||
```
|
||||
|
||||
### 5.3 output_prompt 设计
|
||||
|
||||
```typescript
|
||||
function buildOutputPrompt(): string {
|
||||
return `请严格按照以下格式输出结果:
|
||||
|
||||
<REPORT_SECTION>
|
||||
[此处撰写深度综合研究报告,使用 Markdown 格式,包括:
|
||||
- 研究背景与目的
|
||||
- 核心发现与共识
|
||||
- 分歧点与研究空白
|
||||
- 参考文献列表(带编号和PubMed链接)]
|
||||
</REPORT_SECTION>
|
||||
|
||||
<JSON_LIST_SECTION>
|
||||
[此处输出文献元数据的严格 JSON 数组,每条包含:
|
||||
{"title":"...", "authors":"...", "journal":"...", "year":2024, "type":"RCT|Meta-analysis|Cohort|SR", "pmid":"...", "doi":"...", "pdfStatus":"OA|Restricted", "url":"https://pubmed.ncbi.nlm.nih.gov/..."}]
|
||||
</JSON_LIST_SECTION>`;
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 6. 前端组件设计
|
||||
|
||||
### 6.1 页面结构
|
||||
|
||||
```
|
||||
frontend-v2/src/modules/asl/pages/
|
||||
└── DeepResearchPage.tsx # V2.0 主页面(替代 ResearchSearch.tsx)
|
||||
|
||||
frontend-v2/src/modules/asl/components/
|
||||
├── deep-research/
|
||||
│ ├── LandingView.tsx # Landing 大搜索框
|
||||
│ ├── SetupPanel.tsx # Step 1: 配置面板
|
||||
│ ├── StrategyConfirm.tsx # Step 2: HITL 策略确认(左右分栏)
|
||||
│ ├── AgentTerminal.tsx # Step 3: 暗黑执行终端
|
||||
│ └── ResultsView.tsx # Step 4: 结果展示(报告+表格)
|
||||
```
|
||||
|
||||
### 6.2 状态管理
|
||||
|
||||
```typescript
|
||||
// 页面级状态(useState 即可,无需 Zustand)
|
||||
interface DeepResearchState {
|
||||
currentStep: 'landing' | 'setup' | 'strategy' | 'terminal' | 'results';
|
||||
taskId: string | null;
|
||||
originalQuery: string;
|
||||
generatedRequirement: string;
|
||||
intentSummary: IntentSummary | null;
|
||||
isGenerating: boolean; // 需求扩写中
|
||||
}
|
||||
```
|
||||
|
||||
### 6.3 各组件核心逻辑
|
||||
|
||||
**LandingView(Landing 大搜索框)**
|
||||
- 居中大输入框 + "开始研究"按钮 + 推荐预置词
|
||||
- 点击后携带输入值,平滑过渡到 SetupPanel
|
||||
- 参考原型图 V4.2 的 `#landing-view` 部分
|
||||
|
||||
**SetupPanel(Step 1: 配置)**
|
||||
- 继承 Landing 输入值到 textarea
|
||||
- 数据源 Checkbox(PubMed/PMC, BMJ Open, Cochrane)
|
||||
- 高级过滤(年份下拉、目标数量、OA 强制)
|
||||
- 点击"解析并生成检索需求书" → POST /generate-requirement
|
||||
- Loading 后平滑展开 Step 2
|
||||
|
||||
**StrategyConfirm(Step 2: HITL 确认)**
|
||||
- 左侧 1/3:AI 意图提炼卡片(只读,来自 `intentSummary`)
|
||||
- 右侧 2/3:可编辑 textarea(内容为 `generatedRequirement`)
|
||||
- 提示文案:"您可以像写邮件一样在这里补充任何大白话要求"
|
||||
- 点击"确认需求,启动 Deep Research" → PUT /execute
|
||||
|
||||
**AgentTerminal(Step 3: 暗黑终端)**
|
||||
- 暗色背景(bg-slate-900),固定高度 550px,内部滚动
|
||||
- 顶部状态栏:红/黄/绿圆点 + "Running" 脉冲指示灯
|
||||
- 日志渲染:
|
||||
- `think` → 紫色 + 🧠 图标
|
||||
- `action` → 蓝色 + 💻 图标
|
||||
- `done` → 绿色 + ✅ 图标
|
||||
- `summary` → 黄色 + 📋 图标
|
||||
- 轮询逻辑:`useQuery` + refetchInterval: 3000(running 时启用)
|
||||
- 新日志出现时 auto-scroll 到底部
|
||||
- 完成后状态灯变灰 "Finished",终端可折叠
|
||||
|
||||
**ResultsView(Step 4: 结果)**
|
||||
- 白色背景,与终端形成视觉分界
|
||||
- 完成横幅(一行):文献数 + 耗时 + "导出 Word" 按钮
|
||||
- AI 综合报告区:`react-markdown` 渲染 `synthesisReport`,可折叠,默认展开
|
||||
- 文献清单表格:Ant Design Table
|
||||
- 列:标题(可点击跳转 PubMed)、期刊、年份、类型 Tag、PDF 状态
|
||||
- 支持简单搜索过滤
|
||||
- 分页(前端分页即可,数据量 ~100 条)
|
||||
|
||||
### 6.4 轮询 Hook
|
||||
|
||||
```typescript
|
||||
// hooks/useDeepResearchTask.ts
|
||||
function useDeepResearchTask(taskId: string | null) {
|
||||
return useQuery({
|
||||
queryKey: ['deep-research-task', taskId],
|
||||
queryFn: () => apiClient.get(`/api/v1/asl/research/tasks/${taskId}`),
|
||||
enabled: !!taskId,
|
||||
refetchInterval: (query) => {
|
||||
const status = query.state.data?.data?.status;
|
||||
return (status === 'pending' || status === 'running') ? 3000 : false;
|
||||
},
|
||||
});
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 7. 复用清单(不重复造轮子)
|
||||
|
||||
| 能力 | 来源 | 用法 |
|
||||
|------|------|------|
|
||||
| LLM 调用 | `common/llm/LLMFactory` | DeepSeek-V3 需求扩写 |
|
||||
| pg-boss 队列 | `common/jobs/jobQueue` | Worker 注册与任务推送 |
|
||||
| 日志服务 | `common/logging/logger` | 全程结构化日志 |
|
||||
| 认证中间件 | `common/auth/authenticate` | 所有 API 路由 |
|
||||
| Prisma 全局实例 | `config/database` | 数据库操作 |
|
||||
| Word 导出 | Pandoc(Python 微服务) | 复用 Protocol Agent 验证的方案 |
|
||||
| 前端 API Client | `common/api/axios` | 带认证的请求 |
|
||||
| 前端布局 | `ASLLayout.tsx` | 左侧导航 |
|
||||
|
||||
---
|
||||
|
||||
## 8. 分阶段开发计划
|
||||
|
||||
### Phase 1: 数据库 + 需求扩写(Day 1)
|
||||
|
||||
**目标:** 用户输入粗略想法 → AI 扩写为结构化指令书 → 用户可编辑修改
|
||||
|
||||
| 任务 | 文件 | 说明 |
|
||||
|------|------|------|
|
||||
| Schema 迁移 | `prisma/schema.prisma` | 新增 6 个字段,`prisma migrate dev` |
|
||||
| 需求扩写 Prompt | `services/requirementExpansionService.ts` | 新建服务,调用 DeepSeek-V3 扩写 |
|
||||
| 扩写 API | `controllers/researchController.ts` | 新增 `POST /generate-requirement` |
|
||||
| 启动 API | `controllers/researchController.ts` | 新增 `PUT /tasks/:id/execute` |
|
||||
| 状态 API 改造 | `controllers/researchController.ts` | 改造 `GET /tasks/:id`,返回新字段 |
|
||||
| 路由注册 | `routes/index.ts` | 注册新端点 |
|
||||
|
||||
**验收标准:**
|
||||
- [ ] `POST /generate-requirement` 返回扩写后的指令书
|
||||
- [ ] `PUT /tasks/:id/execute` 成功推入 pg-boss 队列
|
||||
- [ ] `GET /tasks/:id` 返回含新字段的完整数据
|
||||
|
||||
### Phase 2: Worker 改造 — Unifuncs 异步模式(Day 2)
|
||||
|
||||
**目标:** Worker 使用 create_task + query_task 轮询,增量日志写入 DB
|
||||
|
||||
| 任务 | 文件 | 说明 |
|
||||
|------|------|------|
|
||||
| Unifuncs 异步客户端 | `services/unifuncsAsyncClient.ts` | 新建,封装 create_task / query_task |
|
||||
| V2 Worker | `workers/deepResearchV2Worker.ts` | 新建,轮询 + 日志解析 + 结果切割 |
|
||||
| 日志解析器 | `utils/reasoningParser.ts` | 新建,reasoning_content → 结构化日志 |
|
||||
| 结果解析器 | `utils/resultParser.ts` | 新建,XML 标签切割报告与 JSON 列表 |
|
||||
| Worker 注册 | `workers/researchWorker.ts` | 注册新 Worker `asl_deep_research_v2` |
|
||||
|
||||
**验收标准:**
|
||||
- [ ] Worker 成功调用 unifuncs create_task
|
||||
- [ ] 轮询期间 execution_logs 持续增量更新
|
||||
- [ ] 完成后 synthesis_report 和 result_list 正确入库
|
||||
- [ ] 超时保护(15 分钟)和错误处理正常
|
||||
|
||||
### Phase 3: 前端 — Landing + 配置 + HITL 确认(Day 3)
|
||||
|
||||
**目标:** 完成 Step 1-2 的前端交互,瀑布流渐进展开
|
||||
|
||||
| 任务 | 文件 | 说明 |
|
||||
|------|------|------|
|
||||
| 主页面骨架 | `pages/DeepResearchPage.tsx` | 新建,管理瀑布流状态 |
|
||||
| Landing 组件 | `components/deep-research/LandingView.tsx` | 大搜索框 + 推荐预置词 |
|
||||
| 配置面板 | `components/deep-research/SetupPanel.tsx` | 数据源 + 高级过滤 + 生成按钮 |
|
||||
| HITL 确认 | `components/deep-research/StrategyConfirm.tsx` | 左右分栏 + 可编辑 textarea |
|
||||
| API 函数 | `api/index.ts` | 新增 generateRequirement / executeTask |
|
||||
| 路由注册 | `pages/index.tsx` | 新增 V2 路由 |
|
||||
|
||||
**验收标准:**
|
||||
- [ ] Landing 输入 → Step 1 配置面板流畅过渡
|
||||
- [ ] 点击"生成需求书" → Loading → Step 2 展开
|
||||
- [ ] Step 2 左侧摘要卡片正确展示,右侧 textarea 可编辑
|
||||
- [ ] 点击"启动 Deep Research" → 进入 Step 3
|
||||
|
||||
### Phase 4: 前端 — 终端 + 结果展示(Day 4)
|
||||
|
||||
**目标:** 完成 Step 3-4,终端实时日志 + 结果报告/表格
|
||||
|
||||
| 任务 | 文件 | 说明 |
|
||||
|------|------|------|
|
||||
| 暗黑终端 | `components/deep-research/AgentTerminal.tsx` | 日志渲染 + auto-scroll + 状态灯 |
|
||||
| 结果视图 | `components/deep-research/ResultsView.tsx` | 横幅 + 报告 + 文献表格 |
|
||||
| 轮询 Hook | `hooks/useDeepResearchTask.ts` | 3s 轮询,running 时启用 |
|
||||
| 终端样式 | CSS / Tailwind | 暗色主题 + 日志类型着色 |
|
||||
|
||||
**验收标准:**
|
||||
- [ ] 终端日志按类型着色,新日志 auto-scroll
|
||||
- [ ] 完成后终端折叠,结果区展开
|
||||
- [ ] 综合报告 Markdown 渲染正确
|
||||
- [ ] 文献清单表格展示(标题可点击跳转 PubMed)
|
||||
- [ ] 全流程端到端联调通过
|
||||
|
||||
### Phase 5: Word 导出 + 收尾(Day 5)
|
||||
|
||||
**目标:** Word 导出功能 + 全流程打磨 + 测试
|
||||
|
||||
| 任务 | 文件 | 说明 |
|
||||
|------|------|------|
|
||||
| Word 导出 API | `controllers/researchController.ts` | `GET /tasks/:id/export-word` |
|
||||
| Markdown 拼接 | `services/wordExportService.ts` | 报告 + 文献表格 → 完整 Markdown |
|
||||
| Pandoc 调用 | 复用 Python 微服务 | Markdown → .docx |
|
||||
| 前端导出按钮 | `ResultsView.tsx` | 下载 Word 文件 |
|
||||
| 全流程测试 | 手动 + 脚本 | 端到端验证 |
|
||||
| 文档更新 | 模块状态文档 | 更新 ASL 模块当前状态 |
|
||||
|
||||
**验收标准:**
|
||||
- [ ] 点击"导出 Word" → 下载包含报告和文献清单的 .docx
|
||||
- [ ] 全流程:Landing → 配置 → 扩写 → 确认 → 执行 → 日志 → 结果 → 导出
|
||||
- [ ] 离开页面回来,能恢复查看正在执行/已完成的任务
|
||||
- [ ] 错误情况处理(unifuncs 超时、API 报错、网络中断)
|
||||
|
||||
---
|
||||
|
||||
## 9. 验收标准总览
|
||||
|
||||
### 功能验收
|
||||
|
||||
- [ ] **Landing 引导**:用户输入粗略想法 → 进入配置
|
||||
- [ ] **需求扩写**:AI 自动扩写为结构化自然语言指令书
|
||||
- [ ] **HITL 核验**:用户可直接编辑修改指令书
|
||||
- [ ] **异步执行**:pg-boss 队列,离开页面任务不中断
|
||||
- [ ] **终端日志**:每 3-5s 弹出一条结构化日志
|
||||
- [ ] **综合报告**:Markdown 渲染,内容来自 Unifuncs 输出
|
||||
- [ ] **文献清单**:表格展示,标题可跳转 PubMed
|
||||
- [ ] **Word 导出**:一键导出报告 + 文献清单
|
||||
|
||||
### 非功能验收
|
||||
|
||||
- [ ] V1.x SSE 端点保留不动(向后兼容)
|
||||
- [ ] 所有 API 经过 authenticate 中间件
|
||||
- [ ] 日志使用 `logger`(非 console.log)
|
||||
- [ ] 无硬编码配置(API Key 来自环境变量)
|
||||
- [ ] 数据库变更通过 Prisma migrate(非 db push)
|
||||
|
||||
---
|
||||
|
||||
## 10. 风险与应对
|
||||
|
||||
| 风险 | 概率 | 影响 | 应对措施 |
|
||||
|------|------|------|---------|
|
||||
| Unifuncs 异步模式下 reasoning_content 不增量更新 | 低 | 终端日志为空 | 降级方案:只显示 progress.message |
|
||||
| output_prompt XML 标签分割不可靠 | 中 | 报告和列表无法分离 | 降级方案:整体作为报告展示,文献从 PubMed 链接提取 |
|
||||
| Unifuncs 长任务超时 | 低 | 任务失败 | MAX_POLLS=180(15分钟),超时标记 failed,用户可重试 |
|
||||
| Pandoc Word 导出在 SAE 不可用 | 低 | 导出失败 | 降级方案:导出为 Markdown 文件 |
|
||||
|
||||
---
|
||||
|
||||
**文档维护者:** 开发团队
|
||||
**最后更新:** 2026-02-22
|
||||
**文档状态:** ✅ 方案确认,待开发启动
|
||||
@@ -0,0 +1,93 @@
|
||||
# Unifuncs DeepSearch API 网站覆盖能力测试记录
|
||||
|
||||
> **日期:** 2026-02-22
|
||||
> **目的:** 在 Deep Research V2.0 开发前,验证 Unifuncs API 对中国医生常用医学期刊网站的搜索能力
|
||||
> **结论:** 18 个站点测试完毕,9 个一级可用,8 个二级可达,1 个超时(已通过专项测试解决)
|
||||
|
||||
---
|
||||
|
||||
## 1. 测试背景
|
||||
|
||||
V2.0 升级需要明确 Unifuncs DeepSearch API 能覆盖哪些医学期刊网站,以便在前端展示可选数据源。本次测试覆盖了 7 个最关注站点 + 11 个其他常用站点,共 18 个。
|
||||
|
||||
## 2. 测试配置
|
||||
|
||||
- **脚本**:`backend/scripts/test-unifuncs-site-coverage.ts`
|
||||
- **模式**:异步模式(create_task + query_task 轮询)
|
||||
- **查询**:`他汀类药物预防心血管疾病的随机对照试验和Meta分析,近5年高质量研究`
|
||||
- **max_depth**:5(测试用低深度加快速度)
|
||||
- **超时**:600 秒
|
||||
- **并行**:18 个任务同时创建、各自轮询
|
||||
|
||||
## 3. 全站测试结果
|
||||
|
||||
### 3.1 Top 7 最关注站点
|
||||
|
||||
| 序号 | 站点 | 状态 | 站内链接 | 其他链接 | 搜索/阅读 | 耗时 |
|
||||
|------|------|------|---------|---------|-----------|------|
|
||||
| 1 | PubMed | ✅ | 28 | 0 | 9/29 | 177s |
|
||||
| 2 | ClinicalTrials.gov | ⏰ 超时 | 0 | 0 | 0/0 | 611s |
|
||||
| 3 | 中华医学期刊网 | ⚠️ | 0 | 12 | 41/17 | 376s |
|
||||
| 4 | 中国知网 CNKI | ✅ | 7 | 0 | 40/6 | 189s |
|
||||
| 5 | 万方数据 | ⚠️ | 0 | 7 | 42/15 | 258s |
|
||||
| 6 | 维普 VIP | ✅ | 1 | 0 | 33/3 | 153s |
|
||||
| 7 | 中国临床试验注册中心 | ⚠️ | 0 | 7 | 36/11 | 200s |
|
||||
|
||||
### 3.2 其他常用站点
|
||||
|
||||
| 序号 | 站点 | 状态 | 站内链接 | 其他链接 | 搜索/阅读 | 耗时 |
|
||||
|------|------|------|---------|---------|-----------|------|
|
||||
| 8 | CBM/SinoMed | ✅ | 9 | 1 | 17/12 | 200s |
|
||||
| 9 | Web of Science | ⚠️ | 0 | 6 | 41/4 | 211s |
|
||||
| 10 | Embase | ⚠️ | 0 | 14 | 24/24 | 247s |
|
||||
| 11 | Cochrane Library | ✅ | 4 | 7 | 38/12 | 235s |
|
||||
| 12 | Google Scholar | ✅ | 10 | 0 | 22/26 | 247s |
|
||||
| 13 | Ovid | ⚠️ | 0 | 0 | 18/0 | 95s |
|
||||
| 14 | Scopus | ⚠️ | 0 | 15 | 36/30 | 305s |
|
||||
| 15 | 中国中医药数据库 | ⚠️ | 0 | 22 | 30/19 | 317s |
|
||||
| 16 | GeenMedical | ✅ | 5 | 0 | 38/3 | 131s |
|
||||
| 17 | NSTL | ⚠️ | 0 | 0 | 27/0 | 96s |
|
||||
| 18 | NCBI (全站) | ✅ | 18 | 0 | 24/19 | 246s |
|
||||
|
||||
## 4. ClinicalTrials.gov 专项测试
|
||||
|
||||
ClinicalTrials.gov 在全站测试中超时,进行了 4 种策略对比测试:
|
||||
|
||||
**脚本**:`backend/scripts/test-unifuncs-clinicaltrials.ts`
|
||||
|
||||
| 策略 | 查询语言 | domain_scope | max_depth | 耗时 | CT 链接 | NCT 编号 |
|
||||
|------|---------|-------------|-----------|------|---------|---------|
|
||||
| A 英文+限定CT | English | clinicaltrials.gov | 10 | **120s** | **38** | **37** |
|
||||
| B 英文+不限域 | English | 不限 | 10 | 145s | 13 | 12 |
|
||||
| C 中文+限定CT+高深度 | 中文 | clinicaltrials.gov | 15 | 179s | 22 | 21 |
|
||||
| D 简短英文+限定CT | English | clinicaltrials.gov | 5 | 179s | 21 | 20 |
|
||||
|
||||
**结论**:ClinicalTrials.gov **完全可用**!策略 A(英文查询 + 限定域名 + max_depth=10)效果最佳,120 秒获取 38 个链接。上次超时原因是中文查询 + 低深度组合。
|
||||
|
||||
## 5. 关键发现
|
||||
|
||||
### 可用性总结
|
||||
|
||||
- **一级可用(9 个)**:PubMed, ClinicalTrials.gov, NCBI/PMC, Google Scholar, CBM/SinoMed, CNKI, GeenMedical, Cochrane Library, 维普
|
||||
- **二级可达(8 个)**:中华医学期刊网, 万方, 中国临床试验注册中心, 中国中医药数据库, Scopus, Embase, Web of Science, NSTL
|
||||
- **需特殊策略(1 个)**:ClinicalTrials.gov → 必须英文查询 + max_depth≥10
|
||||
|
||||
### 关于付费库
|
||||
|
||||
Unifuncs API **不支持传入用户名密码**登录搜索。付费库(Web of Science, Embase, Scopus, Ovid)只能访问公开摘要页面。
|
||||
|
||||
## 6. V2.0 前端数据源配置建议
|
||||
|
||||
根据测试结果,前端数据源选择分三档:
|
||||
- **推荐**:PubMed, ClinicalTrials.gov, NCBI/PMC, Google Scholar, Cochrane Library, CNKI, CBM/SinoMed, GeenMedical, 维普
|
||||
- **可选**:中华医学期刊网, 万方数据, 中国临床试验注册中心, 中国中医药数据库
|
||||
- **暂不支持**:Web of Science, Embase, Scopus, Ovid, NSTL
|
||||
|
||||
## 7. 产出物
|
||||
|
||||
| 产出 | 路径 |
|
||||
|------|------|
|
||||
| 全站覆盖测试脚本 | `backend/scripts/test-unifuncs-site-coverage.ts` |
|
||||
| ClinicalTrials 专项测试脚本 | `backend/scripts/test-unifuncs-clinicaltrials.ts` |
|
||||
| DeepSearch API 使用指南 | `docs/02-通用能力层/04-DeepResearch引擎/01-Unifuncs DeepSearch API 使用指南.md` |
|
||||
| V2.0 开发计划 | `docs/03-业务模块/ASL-AI智能文献/04-开发计划/07-Deep Research V2.0 开发计划.md` |
|
||||
Reference in New Issue
Block a user