/** * Unifuncs API 网站覆盖能力测试脚本 * * 测试 unifuncs DeepSearch 对中国医生常用医学期刊网站的搜索能力 * 使用异步模式(create_task + query_task)并行测试所有站点 * * 运行方式: * cd backend * npx tsx scripts/test-unifuncs-site-coverage.ts */ // ========== 配置 ========== const API_KEY = 'sk-2fNwqUH73elGq0aDKJEM4ReqP7Ry0iqHo4OXyidDe2WpQ9XQ'; const BASE_URL = 'https://api.unifuncs.com/deepsearch/v1'; const MAX_DEPTH = 5; // 测试用低深度,加快速度 const POLL_INTERVAL = 10000; // 10s 轮询间隔 const MAX_WAIT = 600000; // 单任务最长等待 10 分钟 const QUERY = '他汀类药物预防心血管疾病的随机对照试验和Meta分析,近5年高质量研究'; // ========== 测试站点列表 ========== interface TestSite { id: number; name: string; url: string; priority: 'top7' | 'other'; category: 'english' | 'chinese'; } const TEST_SITES: TestSite[] = [ // ── Top 7 最关注 ── { id: 1, name: 'PubMed', url: 'https://pubmed.ncbi.nlm.nih.gov/', priority: 'top7', category: 'english' }, { id: 2, name: 'ClinicalTrials.gov', url: 'https://clinicaltrials.gov/', priority: 'top7', category: 'english' }, { id: 3, name: '中华医学期刊网', url: 'https://medjournals.cn/', priority: 'top7', category: 'chinese' }, { id: 4, name: '中国知网 CNKI', url: 'https://www.cnki.net/', priority: 'top7', category: 'chinese' }, { id: 5, name: '万方数据', url: 'https://www.wanfangdata.com.cn/', priority: 'top7', category: 'chinese' }, { id: 6, name: '维普 VIP', url: 'https://www.cqvip.com/', priority: 'top7', category: 'chinese' }, { id: 7, name: '中国临床试验注册中心', url: 'http://www.chictr.org.cn/', priority: 'top7', category: 'chinese' }, // ── 其他常用 ── { id: 8, name: 'CBM/SinoMed', url: 'http://www.sinomed.ac.cn/', priority: 'other', category: 'chinese' }, { id: 9, name: 'Web of Science', url: 'https://www.webofscience.com/', priority: 'other', category: 'english' }, { id: 10, name: 'Embase', url: 'http://www.embase.com/', priority: 'other', category: 'english' }, { id: 11, name: 'Cochrane Library', url: 'https://www.cochranelibrary.com/', priority: 'other', category: 'english' }, { id: 12, name: 'Google Scholar', url: 'https://scholar.google.com/', priority: 'other', category: 'english' }, { id: 13, name: 'Ovid', url: 'http://ovidsp.ovid.com/', priority: 'other', category: 'english' }, { id: 14, name: 'Scopus', url: 'https://www.scopus.com/', priority: 'other', category: 'english' }, { id: 15, name: '中国中医药数据库', url: 'https://cintmed.cintcm.cn/', priority: 'other', category: 'chinese' }, { id: 16, name: 'GeenMedical', url: 'https://www.geenmedical.com/', priority: 'other', category: 'english' }, { id: 17, name: 'NSTL 国家科技图书文献中心', url: 'https://www.nstl.gov.cn/', priority: 'other', category: 'chinese' }, { id: 18, name: 'NCBI (全站)', url: 'https://www.ncbi.nlm.nih.gov/', priority: 'other', category: 'english' }, ]; // ========== 结果结构 ========== interface TaskResult { site: TestSite; taskId: string | null; status: 'success' | 'failed' | 'timeout' | 'create_error'; searchCount: number; readCount: number; iterations: number; contentLength: number; reasoningLength: number; referencesFound: number; // 在 content 中找到的该站点链接数 otherLinksFound: number; // 找到的其他链接数 durationSec: number; errorMessage: string; sampleLinks: string[]; // 找到的前 5 个链接 } // ========== API 封装 ========== async function createTask(site: TestSite): Promise<{ taskId: string } | { error: string }> { const payload = { model: 's2', messages: [{ role: 'user', content: QUERY }], introduction: '你是一名专业的临床研究文献检索专家。请在指定数据库中尽可能多地检索相关文献,输出每篇文献的标题、作者、年份、链接。', max_depth: MAX_DEPTH, domain_scope: [site.url], reference_style: 'link', generate_summary: true, }; try { const res = await fetch(`${BASE_URL}/create_task`, { method: 'POST', headers: { 'Authorization': `Bearer ${API_KEY}`, 'Content-Type': 'application/json', }, body: JSON.stringify(payload), }); const json = await res.json() as any; if (json.code === 0 && json.data?.task_id) { return { taskId: json.data.task_id }; } return { error: `API 返回错误: ${json.message || JSON.stringify(json)}` }; } catch (err: any) { return { error: `请求失败: ${err.message}` }; } } async function queryTask(taskId: string): Promise { const params = new URLSearchParams({ task_id: taskId }); const res = await fetch(`${BASE_URL}/query_task?${params.toString()}`, { headers: { 'Authorization': `Bearer ${API_KEY}` }, }); return res.json(); } function sleep(ms: number): Promise { return new Promise(resolve => setTimeout(resolve, ms)); } // ========== 链接提取 ========== function extractDomainLinks(content: string, siteUrl: string): string[] { const domain = new URL(siteUrl).hostname.replace('www.', ''); const urlPattern = /https?:\/\/[^\s)\]"'>]+/gi; const allUrls = content.match(urlPattern) || []; return [...new Set(allUrls.filter(u => u.includes(domain)))]; } function extractAllLinks(content: string): string[] { const urlPattern = /https?:\/\/[^\s)\]"'>]+/gi; return [...new Set(content.match(urlPattern) || [])]; } // ========== 单站点完整流程 ========== async function testSingleSite(site: TestSite): Promise { const startTime = Date.now(); const baseResult: TaskResult = { site, taskId: null, status: 'failed', searchCount: 0, readCount: 0, iterations: 0, contentLength: 0, reasoningLength: 0, referencesFound: 0, otherLinksFound: 0, durationSec: 0, errorMessage: '', sampleLinks: [], }; // 1. 创建任务 console.log(` [${site.id.toString().padStart(2)}] ${site.name} → 创建任务...`); const createResult = await createTask(site); if ('error' in createResult) { baseResult.status = 'create_error'; baseResult.errorMessage = createResult.error; baseResult.durationSec = (Date.now() - startTime) / 1000; console.log(` [${site.id.toString().padStart(2)}] ${site.name} ✗ 创建失败: ${createResult.error}`); return baseResult; } baseResult.taskId = createResult.taskId; console.log(` [${site.id.toString().padStart(2)}] ${site.name} → task_id: ${createResult.taskId}`); // 2. 轮询直到完成 const deadline = Date.now() + MAX_WAIT; let lastStatus = ''; while (Date.now() < deadline) { await sleep(POLL_INTERVAL); try { const json = await queryTask(createResult.taskId) as any; const data = json.data; if (!data) { continue; } const currentStatus = data.status; if (currentStatus !== lastStatus) { const progress = data.progress ? `${data.progress.current}/${data.progress.total}` : '?'; const stats = data.statistics ? `搜索${data.statistics.search_count || 0} 阅读${data.statistics.read_count || 0}` : ''; console.log(` [${site.id.toString().padStart(2)}] ${site.name} → ${currentStatus} (${progress}) ${stats}`); lastStatus = currentStatus; } if (currentStatus === 'completed') { const content = data.result?.content || ''; const reasoning = data.result?.reasoning_content || ''; const stats = data.statistics || {}; const siteLinks = extractDomainLinks(content, site.url); const allLinks = extractAllLinks(content); baseResult.status = 'success'; baseResult.contentLength = content.length; baseResult.reasoningLength = reasoning.length; baseResult.searchCount = stats.search_count || 0; baseResult.readCount = stats.read_count || 0; baseResult.iterations = stats.iterations || 0; baseResult.referencesFound = siteLinks.length; baseResult.otherLinksFound = allLinks.length - siteLinks.length; baseResult.sampleLinks = siteLinks.slice(0, 5); baseResult.durationSec = (Date.now() - startTime) / 1000; console.log(` [${site.id.toString().padStart(2)}] ${site.name} ✓ 完成 | 站内链接:${siteLinks.length} 其他链接:${allLinks.length - siteLinks.length} | ${baseResult.durationSec.toFixed(0)}s`); return baseResult; } if (currentStatus === 'failed') { baseResult.status = 'failed'; baseResult.errorMessage = data.result?.content || '任务失败'; baseResult.durationSec = (Date.now() - startTime) / 1000; console.log(` [${site.id.toString().padStart(2)}] ${site.name} ✗ 失败: ${baseResult.errorMessage.slice(0, 80)}`); return baseResult; } } catch (err: any) { // 轮询中的网络错误,继续重试 } } // 超时 baseResult.status = 'timeout'; baseResult.errorMessage = `超时(${MAX_WAIT / 1000}s)`; baseResult.durationSec = (Date.now() - startTime) / 1000; console.log(` [${site.id.toString().padStart(2)}] ${site.name} ⏰ 超时`); return baseResult; } // ========== 结果报告 ========== function printReport(results: TaskResult[]) { console.log('\n'); console.log('='.repeat(100)); console.log(' Unifuncs DeepSearch API 网站覆盖能力测试报告'); console.log('='.repeat(100)); console.log(`测试时间: ${new Date().toISOString()}`); console.log(`测试查询: "${QUERY}"`); console.log(`配置: max_depth=${MAX_DEPTH}, poll_interval=${POLL_INTERVAL / 1000}s`); console.log(''); // ── Top 7 结果 ── console.log('━'.repeat(100)); console.log(' ★ Top 7 最关注站点'); console.log('━'.repeat(100)); printTable(results.filter(r => r.site.priority === 'top7')); // ── 其他结果 ── console.log(''); console.log('━'.repeat(100)); console.log(' 其他常用站点'); console.log('━'.repeat(100)); printTable(results.filter(r => r.site.priority === 'other')); // ── 汇总 ── console.log(''); console.log('━'.repeat(100)); console.log(' 汇总统计'); console.log('━'.repeat(100)); const successSites = results.filter(r => r.status === 'success' && r.referencesFound > 0); const reachableSites = results.filter(r => r.status === 'success'); const failedSites = results.filter(r => r.status !== 'success'); console.log(` 可搜索并返回站内链接: ${successSites.length}/${results.length} 个站点`); console.log(` 可到达但无站内链接: ${reachableSites.length - successSites.length} 个站点`); console.log(` 不可用/失败/超时: ${failedSites.length} 个站点`); console.log(''); if (successSites.length > 0) { console.log(' ✅ 确认可搜索的站点:'); for (const r of successSites) { console.log(` - ${r.site.name} (${r.site.url}) → ${r.referencesFound} 个站内链接`); } } console.log(''); const noLinkSites = reachableSites.filter(r => r.referencesFound === 0); if (noLinkSites.length > 0) { console.log(' ⚠️ 任务完成但无站内链接(可能搜索到了但链接指向其他站点):'); for (const r of noLinkSites) { console.log(` - ${r.site.name} (${r.site.url}) → 其他链接 ${r.otherLinksFound} 个`); if (r.sampleLinks.length === 0) { const allLinks = r.otherLinksFound; console.log(` 内容长度: ${r.contentLength} 字符, 搜索${r.searchCount}次, 阅读${r.readCount}次`); } } } console.log(''); if (failedSites.length > 0) { console.log(' ❌ 不可用站点:'); for (const r of failedSites) { console.log(` - ${r.site.name} (${r.site.url}) → ${r.status}: ${r.errorMessage.slice(0, 100)}`); } } console.log(''); console.log('='.repeat(100)); // ── 输出可用于前端配置的 JSON ── console.log(''); console.log('前端可用数据源配置(可直接用于 V2.0 数据源选择):'); console.log(''); const configList = results.map(r => ({ name: r.site.name, url: r.site.url, category: r.site.category, available: r.status === 'success' && r.referencesFound > 0, reachable: r.status === 'success', siteLinksFound: r.referencesFound, searchCount: r.searchCount, readCount: r.readCount, })); console.log(JSON.stringify(configList, null, 2)); } function printTable(results: TaskResult[]) { const header = ' 序号 | 状态 | 站点名称 | 站内链接 | 其他链接 | 搜索/阅读 | 耗时 | 说明'; console.log(header); console.log(' ' + '-'.repeat(header.length - 2)); for (const r of results) { const statusIcon = r.status === 'success' && r.referencesFound > 0 ? '✅' : r.status === 'success' ? '⚠️' : r.status === 'timeout' ? '⏰' : '❌'; const note = r.status !== 'success' ? r.errorMessage.slice(0, 25) : r.referencesFound > 0 ? r.sampleLinks[0]?.slice(0, 35) || '' : `内容${r.contentLength}字`; console.log( ` ${r.site.id.toString().padStart(4)} | ${statusIcon} | ` + `${(r.site.name).padEnd(24)} | ` + `${r.referencesFound.toString().padStart(8)} | ` + `${r.otherLinksFound.toString().padStart(8)} | ` + `${r.searchCount}/${r.readCount}`.padStart(9) + ' | ' + `${r.durationSec.toFixed(0).padStart(5)}s | ` + note ); } } // ========== 主入口 ========== async function main() { console.log('╔════════════════════════════════════════════════════════════╗'); console.log('║ Unifuncs DeepSearch API — 医学网站覆盖能力测试 ║'); console.log('╚════════════════════════════════════════════════════════════╝'); console.log(''); console.log(`查询: "${QUERY}"`); console.log(`站点数: ${TEST_SITES.length} | max_depth: ${MAX_DEPTH} | 超时: ${MAX_WAIT / 1000}s`); console.log(`API: ${BASE_URL}`); console.log(''); console.log('并行创建所有任务...\n'); // 并行创建所有任务并执行 const promises = TEST_SITES.map(site => testSingleSite(site)); const results = await Promise.all(promises); // 输出报告 printReport(results); } main().catch(err => { console.error('脚本执行失败:', err); process.exit(1); });