feat(asl): Add Deep Research V2.0 development plan and Unifuncs API site coverage testing

Completed:
- Unifuncs DeepSearch API site coverage test (18 medical sites, 9 tier-1 available)
- ClinicalTrials.gov dedicated test (4 strategies, English query + depth>=10 works best)
- Deep Research V2.0 development plan (5-day phased delivery)
- DeepResearch engine capability guide (docs/02-common-capability/)
- Test scripts: test-unifuncs-site-coverage.ts, test-unifuncs-clinicaltrials.ts

Key findings:
- Tier-1 sites: PubMed(28), ClinicalTrials(38), NCBI(18), Scholar(10), Cochrane(4), CNKI(7), SinoMed(9), GeenMedical(5), VIP(1)
- Paid databases (WoS/Embase/Scopus/Ovid) cannot be accessed (no credential support)
- ClinicalTrials.gov requires English queries with max_depth>=10

Updated: ASL module status doc, system status doc, common capability list
Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
2026-02-22 22:44:41 +08:00
parent 3446909ff7
commit b06daecacd
12 changed files with 2662 additions and 27 deletions

View File

@@ -0,0 +1,374 @@
/**
* Unifuncs API 网站覆盖能力测试脚本
*
* 测试 unifuncs DeepSearch 对中国医生常用医学期刊网站的搜索能力
* 使用异步模式create_task + query_task并行测试所有站点
*
* 运行方式:
* cd backend
* npx tsx scripts/test-unifuncs-site-coverage.ts
*/
// ========== 配置 ==========
const API_KEY = 'sk-2fNwqUH73elGq0aDKJEM4ReqP7Ry0iqHo4OXyidDe2WpQ9XQ';
const BASE_URL = 'https://api.unifuncs.com/deepsearch/v1';
const MAX_DEPTH = 5; // 测试用低深度,加快速度
const POLL_INTERVAL = 10000; // 10s 轮询间隔
const MAX_WAIT = 600000; // 单任务最长等待 10 分钟
const QUERY = '他汀类药物预防心血管疾病的随机对照试验和Meta分析近5年高质量研究';
// ========== 测试站点列表 ==========
interface TestSite {
id: number;
name: string;
url: string;
priority: 'top7' | 'other';
category: 'english' | 'chinese';
}
const TEST_SITES: TestSite[] = [
// ── Top 7 最关注 ──
{ id: 1, name: 'PubMed', url: 'https://pubmed.ncbi.nlm.nih.gov/', priority: 'top7', category: 'english' },
{ id: 2, name: 'ClinicalTrials.gov', url: 'https://clinicaltrials.gov/', priority: 'top7', category: 'english' },
{ id: 3, name: '中华医学期刊网', url: 'https://medjournals.cn/', priority: 'top7', category: 'chinese' },
{ id: 4, name: '中国知网 CNKI', url: 'https://www.cnki.net/', priority: 'top7', category: 'chinese' },
{ id: 5, name: '万方数据', url: 'https://www.wanfangdata.com.cn/', priority: 'top7', category: 'chinese' },
{ id: 6, name: '维普 VIP', url: 'https://www.cqvip.com/', priority: 'top7', category: 'chinese' },
{ id: 7, name: '中国临床试验注册中心', url: 'http://www.chictr.org.cn/', priority: 'top7', category: 'chinese' },
// ── 其他常用 ──
{ id: 8, name: 'CBM/SinoMed', url: 'http://www.sinomed.ac.cn/', priority: 'other', category: 'chinese' },
{ id: 9, name: 'Web of Science', url: 'https://www.webofscience.com/', priority: 'other', category: 'english' },
{ id: 10, name: 'Embase', url: 'http://www.embase.com/', priority: 'other', category: 'english' },
{ id: 11, name: 'Cochrane Library', url: 'https://www.cochranelibrary.com/', priority: 'other', category: 'english' },
{ id: 12, name: 'Google Scholar', url: 'https://scholar.google.com/', priority: 'other', category: 'english' },
{ id: 13, name: 'Ovid', url: 'http://ovidsp.ovid.com/', priority: 'other', category: 'english' },
{ id: 14, name: 'Scopus', url: 'https://www.scopus.com/', priority: 'other', category: 'english' },
{ id: 15, name: '中国中医药数据库', url: 'https://cintmed.cintcm.cn/', priority: 'other', category: 'chinese' },
{ id: 16, name: 'GeenMedical', url: 'https://www.geenmedical.com/', priority: 'other', category: 'english' },
{ id: 17, name: 'NSTL 国家科技图书文献中心', url: 'https://www.nstl.gov.cn/', priority: 'other', category: 'chinese' },
{ id: 18, name: 'NCBI (全站)', url: 'https://www.ncbi.nlm.nih.gov/', priority: 'other', category: 'english' },
];
// ========== 结果结构 ==========
interface TaskResult {
site: TestSite;
taskId: string | null;
status: 'success' | 'failed' | 'timeout' | 'create_error';
searchCount: number;
readCount: number;
iterations: number;
contentLength: number;
reasoningLength: number;
referencesFound: number; // 在 content 中找到的该站点链接数
otherLinksFound: number; // 找到的其他链接数
durationSec: number;
errorMessage: string;
sampleLinks: string[]; // 找到的前 5 个链接
}
// ========== API 封装 ==========
async function createTask(site: TestSite): Promise<{ taskId: string } | { error: string }> {
const payload = {
model: 's2',
messages: [{ role: 'user', content: QUERY }],
introduction: '你是一名专业的临床研究文献检索专家。请在指定数据库中尽可能多地检索相关文献,输出每篇文献的标题、作者、年份、链接。',
max_depth: MAX_DEPTH,
domain_scope: [site.url],
reference_style: 'link',
generate_summary: true,
};
try {
const res = await fetch(`${BASE_URL}/create_task`, {
method: 'POST',
headers: {
'Authorization': `Bearer ${API_KEY}`,
'Content-Type': 'application/json',
},
body: JSON.stringify(payload),
});
const json = await res.json() as any;
if (json.code === 0 && json.data?.task_id) {
return { taskId: json.data.task_id };
}
return { error: `API 返回错误: ${json.message || JSON.stringify(json)}` };
} catch (err: any) {
return { error: `请求失败: ${err.message}` };
}
}
async function queryTask(taskId: string): Promise<any> {
const params = new URLSearchParams({ task_id: taskId });
const res = await fetch(`${BASE_URL}/query_task?${params.toString()}`, {
headers: { 'Authorization': `Bearer ${API_KEY}` },
});
return res.json();
}
function sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
// ========== 链接提取 ==========
function extractDomainLinks(content: string, siteUrl: string): string[] {
const domain = new URL(siteUrl).hostname.replace('www.', '');
const urlPattern = /https?:\/\/[^\s)\]"'>]+/gi;
const allUrls = content.match(urlPattern) || [];
return [...new Set(allUrls.filter(u => u.includes(domain)))];
}
function extractAllLinks(content: string): string[] {
const urlPattern = /https?:\/\/[^\s)\]"'>]+/gi;
return [...new Set(content.match(urlPattern) || [])];
}
// ========== 单站点完整流程 ==========
async function testSingleSite(site: TestSite): Promise<TaskResult> {
const startTime = Date.now();
const baseResult: TaskResult = {
site,
taskId: null,
status: 'failed',
searchCount: 0,
readCount: 0,
iterations: 0,
contentLength: 0,
reasoningLength: 0,
referencesFound: 0,
otherLinksFound: 0,
durationSec: 0,
errorMessage: '',
sampleLinks: [],
};
// 1. 创建任务
console.log(` [${site.id.toString().padStart(2)}] ${site.name} → 创建任务...`);
const createResult = await createTask(site);
if ('error' in createResult) {
baseResult.status = 'create_error';
baseResult.errorMessage = createResult.error;
baseResult.durationSec = (Date.now() - startTime) / 1000;
console.log(` [${site.id.toString().padStart(2)}] ${site.name} ✗ 创建失败: ${createResult.error}`);
return baseResult;
}
baseResult.taskId = createResult.taskId;
console.log(` [${site.id.toString().padStart(2)}] ${site.name} → task_id: ${createResult.taskId}`);
// 2. 轮询直到完成
const deadline = Date.now() + MAX_WAIT;
let lastStatus = '';
while (Date.now() < deadline) {
await sleep(POLL_INTERVAL);
try {
const json = await queryTask(createResult.taskId) as any;
const data = json.data;
if (!data) {
continue;
}
const currentStatus = data.status;
if (currentStatus !== lastStatus) {
const progress = data.progress ? `${data.progress.current}/${data.progress.total}` : '?';
const stats = data.statistics
? `搜索${data.statistics.search_count || 0} 阅读${data.statistics.read_count || 0}`
: '';
console.log(` [${site.id.toString().padStart(2)}] ${site.name}${currentStatus} (${progress}) ${stats}`);
lastStatus = currentStatus;
}
if (currentStatus === 'completed') {
const content = data.result?.content || '';
const reasoning = data.result?.reasoning_content || '';
const stats = data.statistics || {};
const siteLinks = extractDomainLinks(content, site.url);
const allLinks = extractAllLinks(content);
baseResult.status = 'success';
baseResult.contentLength = content.length;
baseResult.reasoningLength = reasoning.length;
baseResult.searchCount = stats.search_count || 0;
baseResult.readCount = stats.read_count || 0;
baseResult.iterations = stats.iterations || 0;
baseResult.referencesFound = siteLinks.length;
baseResult.otherLinksFound = allLinks.length - siteLinks.length;
baseResult.sampleLinks = siteLinks.slice(0, 5);
baseResult.durationSec = (Date.now() - startTime) / 1000;
console.log(` [${site.id.toString().padStart(2)}] ${site.name} ✓ 完成 | 站内链接:${siteLinks.length} 其他链接:${allLinks.length - siteLinks.length} | ${baseResult.durationSec.toFixed(0)}s`);
return baseResult;
}
if (currentStatus === 'failed') {
baseResult.status = 'failed';
baseResult.errorMessage = data.result?.content || '任务失败';
baseResult.durationSec = (Date.now() - startTime) / 1000;
console.log(` [${site.id.toString().padStart(2)}] ${site.name} ✗ 失败: ${baseResult.errorMessage.slice(0, 80)}`);
return baseResult;
}
} catch (err: any) {
// 轮询中的网络错误,继续重试
}
}
// 超时
baseResult.status = 'timeout';
baseResult.errorMessage = `超时(${MAX_WAIT / 1000}s`;
baseResult.durationSec = (Date.now() - startTime) / 1000;
console.log(` [${site.id.toString().padStart(2)}] ${site.name} ⏰ 超时`);
return baseResult;
}
// ========== 结果报告 ==========
function printReport(results: TaskResult[]) {
console.log('\n');
console.log('='.repeat(100));
console.log(' Unifuncs DeepSearch API 网站覆盖能力测试报告');
console.log('='.repeat(100));
console.log(`测试时间: ${new Date().toISOString()}`);
console.log(`测试查询: "${QUERY}"`);
console.log(`配置: max_depth=${MAX_DEPTH}, poll_interval=${POLL_INTERVAL / 1000}s`);
console.log('');
// ── Top 7 结果 ──
console.log('━'.repeat(100));
console.log(' ★ Top 7 最关注站点');
console.log('━'.repeat(100));
printTable(results.filter(r => r.site.priority === 'top7'));
// ── 其他结果 ──
console.log('');
console.log('━'.repeat(100));
console.log(' 其他常用站点');
console.log('━'.repeat(100));
printTable(results.filter(r => r.site.priority === 'other'));
// ── 汇总 ──
console.log('');
console.log('━'.repeat(100));
console.log(' 汇总统计');
console.log('━'.repeat(100));
const successSites = results.filter(r => r.status === 'success' && r.referencesFound > 0);
const reachableSites = results.filter(r => r.status === 'success');
const failedSites = results.filter(r => r.status !== 'success');
console.log(` 可搜索并返回站内链接: ${successSites.length}/${results.length} 个站点`);
console.log(` 可到达但无站内链接: ${reachableSites.length - successSites.length} 个站点`);
console.log(` 不可用/失败/超时: ${failedSites.length} 个站点`);
console.log('');
if (successSites.length > 0) {
console.log(' ✅ 确认可搜索的站点:');
for (const r of successSites) {
console.log(` - ${r.site.name} (${r.site.url}) → ${r.referencesFound} 个站内链接`);
}
}
console.log('');
const noLinkSites = reachableSites.filter(r => r.referencesFound === 0);
if (noLinkSites.length > 0) {
console.log(' ⚠️ 任务完成但无站内链接(可能搜索到了但链接指向其他站点):');
for (const r of noLinkSites) {
console.log(` - ${r.site.name} (${r.site.url}) → 其他链接 ${r.otherLinksFound}`);
if (r.sampleLinks.length === 0) {
const allLinks = r.otherLinksFound;
console.log(` 内容长度: ${r.contentLength} 字符, 搜索${r.searchCount}次, 阅读${r.readCount}`);
}
}
}
console.log('');
if (failedSites.length > 0) {
console.log(' ❌ 不可用站点:');
for (const r of failedSites) {
console.log(` - ${r.site.name} (${r.site.url}) → ${r.status}: ${r.errorMessage.slice(0, 100)}`);
}
}
console.log('');
console.log('='.repeat(100));
// ── 输出可用于前端配置的 JSON ──
console.log('');
console.log('前端可用数据源配置(可直接用于 V2.0 数据源选择):');
console.log('');
const configList = results.map(r => ({
name: r.site.name,
url: r.site.url,
category: r.site.category,
available: r.status === 'success' && r.referencesFound > 0,
reachable: r.status === 'success',
siteLinksFound: r.referencesFound,
searchCount: r.searchCount,
readCount: r.readCount,
}));
console.log(JSON.stringify(configList, null, 2));
}
function printTable(results: TaskResult[]) {
const header = ' 序号 | 状态 | 站点名称 | 站内链接 | 其他链接 | 搜索/阅读 | 耗时 | 说明';
console.log(header);
console.log(' ' + '-'.repeat(header.length - 2));
for (const r of results) {
const statusIcon =
r.status === 'success' && r.referencesFound > 0 ? '✅' :
r.status === 'success' ? '⚠️' :
r.status === 'timeout' ? '⏰' : '❌';
const note = r.status !== 'success'
? r.errorMessage.slice(0, 25)
: r.referencesFound > 0
? r.sampleLinks[0]?.slice(0, 35) || ''
: `内容${r.contentLength}`;
console.log(
` ${r.site.id.toString().padStart(4)} | ${statusIcon} | ` +
`${(r.site.name).padEnd(24)} | ` +
`${r.referencesFound.toString().padStart(8)} | ` +
`${r.otherLinksFound.toString().padStart(8)} | ` +
`${r.searchCount}/${r.readCount}`.padStart(9) + ' | ' +
`${r.durationSec.toFixed(0).padStart(5)}s | ` +
note
);
}
}
// ========== 主入口 ==========
async function main() {
console.log('╔════════════════════════════════════════════════════════════╗');
console.log('║ Unifuncs DeepSearch API — 医学网站覆盖能力测试 ║');
console.log('╚════════════════════════════════════════════════════════════╝');
console.log('');
console.log(`查询: "${QUERY}"`);
console.log(`站点数: ${TEST_SITES.length} | max_depth: ${MAX_DEPTH} | 超时: ${MAX_WAIT / 1000}s`);
console.log(`API: ${BASE_URL}`);
console.log('');
console.log('并行创建所有任务...\n');
// 并行创建所有任务并执行
const promises = TEST_SITES.map(site => testSingleSite(site));
const results = await Promise.all(promises);
// 输出报告
printReport(results);
}
main().catch(err => {
console.error('脚本执行失败:', err);
process.exit(1);
});