refactor(asl): ASL frontend architecture refactoring with left navigation
- feat: Create ASLLayout component with 7-module left navigation - feat: Implement Title Screening Settings page with optimized PICOS layout - feat: Add placeholder pages for Workbench and Results - fix: Fix nested routing structure for React Router v6 - fix: Resolve Spin component warning in MainLayout - fix: Add QueryClientProvider to App.tsx - style: Optimize PICOS form layout (P+I left, C+O+S right) - style: Align Inclusion/Exclusion criteria side-by-side - docs: Add architecture refactoring and routing fix reports Ref: Week 2 Frontend Development Scope: ASL module MVP - Title Abstract Screening
This commit is contained in:
22
backend/scripts/check-excel-columns.ts
Normal file
22
backend/scripts/check-excel-columns.ts
Normal file
@@ -0,0 +1,22 @@
|
||||
import XLSX from 'xlsx';
|
||||
|
||||
const filePath = 'D:\\MyCursor\\AIclinicalresearch\\docs\\03-业务模块\\ASL-AI智能文献\\05-测试文档\\03-测试数据\\screening\\Test Cases.xlsx';
|
||||
|
||||
const workbook = XLSX.readFile(filePath);
|
||||
const sheetName = workbook.SheetNames[0];
|
||||
const worksheet = workbook.Sheets[sheetName];
|
||||
const data = XLSX.utils.sheet_to_json(worksheet);
|
||||
|
||||
console.log(`总行数: ${data.length}`);
|
||||
console.log('\n前3行数据:');
|
||||
data.slice(0, 3).forEach((row: any, i) => {
|
||||
console.log(`\n第${i+1}行:`);
|
||||
console.log(JSON.stringify(row, null, 2));
|
||||
});
|
||||
|
||||
console.log('\n所有列名:');
|
||||
if (data.length > 0) {
|
||||
console.log(Object.keys(data[0]));
|
||||
}
|
||||
|
||||
|
||||
205
backend/scripts/create-asl-tables.ts
Normal file
205
backend/scripts/create-asl-tables.ts
Normal file
@@ -0,0 +1,205 @@
|
||||
/**
|
||||
* 手动创建ASL模块的4张表
|
||||
* 避免影响现有表
|
||||
*/
|
||||
|
||||
import { prisma } from '../src/config/database.js';
|
||||
|
||||
async function createAslTables() {
|
||||
try {
|
||||
console.log('🔍 开始创建ASL模块表...\n');
|
||||
|
||||
// 1. 创建筛选项目表
|
||||
await prisma.$executeRawUnsafe(`
|
||||
CREATE TABLE IF NOT EXISTS asl_schema.screening_projects (
|
||||
id TEXT PRIMARY KEY,
|
||||
user_id TEXT NOT NULL,
|
||||
project_name TEXT NOT NULL,
|
||||
pico_criteria JSONB NOT NULL,
|
||||
inclusion_criteria TEXT NOT NULL,
|
||||
exclusion_criteria TEXT NOT NULL,
|
||||
status TEXT NOT NULL DEFAULT 'draft',
|
||||
screening_config JSONB,
|
||||
created_at TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
CONSTRAINT fk_user FOREIGN KEY (user_id) REFERENCES platform_schema.users(id) ON DELETE CASCADE
|
||||
);
|
||||
`);
|
||||
console.log('✅ 创建 asl_schema.screening_projects');
|
||||
|
||||
// 创建索引
|
||||
await prisma.$executeRawUnsafe(`
|
||||
CREATE INDEX IF NOT EXISTS idx_screening_projects_user_id ON asl_schema.screening_projects(user_id);
|
||||
`);
|
||||
await prisma.$executeRawUnsafe(`
|
||||
CREATE INDEX IF NOT EXISTS idx_screening_projects_status ON asl_schema.screening_projects(status);
|
||||
`);
|
||||
|
||||
// 2. 创建文献条目表
|
||||
await prisma.$executeRawUnsafe(`
|
||||
CREATE TABLE IF NOT EXISTS asl_schema.literatures (
|
||||
id TEXT PRIMARY KEY,
|
||||
project_id TEXT NOT NULL,
|
||||
pmid TEXT,
|
||||
title TEXT NOT NULL,
|
||||
abstract TEXT NOT NULL,
|
||||
authors TEXT,
|
||||
journal TEXT,
|
||||
publication_year INTEGER,
|
||||
doi TEXT,
|
||||
pdf_url TEXT,
|
||||
pdf_oss_key TEXT,
|
||||
pdf_file_size INTEGER,
|
||||
created_at TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
CONSTRAINT fk_project FOREIGN KEY (project_id) REFERENCES asl_schema.screening_projects(id) ON DELETE CASCADE,
|
||||
CONSTRAINT unique_project_pmid UNIQUE (project_id, pmid)
|
||||
);
|
||||
`);
|
||||
console.log('✅ 创建 asl_schema.literatures');
|
||||
|
||||
// 创建索引
|
||||
await prisma.$executeRawUnsafe(`
|
||||
CREATE INDEX IF NOT EXISTS idx_literatures_project_id ON asl_schema.literatures(project_id);
|
||||
`);
|
||||
await prisma.$executeRawUnsafe(`
|
||||
CREATE INDEX IF NOT EXISTS idx_literatures_doi ON asl_schema.literatures(doi);
|
||||
`);
|
||||
|
||||
// 3. 创建筛选结果表
|
||||
await prisma.$executeRawUnsafe(`
|
||||
CREATE TABLE IF NOT EXISTS asl_schema.screening_results (
|
||||
id TEXT PRIMARY KEY,
|
||||
project_id TEXT NOT NULL,
|
||||
literature_id TEXT NOT NULL,
|
||||
|
||||
-- DeepSeek判断
|
||||
ds_model_name TEXT NOT NULL,
|
||||
ds_p_judgment TEXT,
|
||||
ds_i_judgment TEXT,
|
||||
ds_c_judgment TEXT,
|
||||
ds_s_judgment TEXT,
|
||||
ds_conclusion TEXT,
|
||||
ds_confidence DOUBLE PRECISION,
|
||||
ds_p_evidence TEXT,
|
||||
ds_i_evidence TEXT,
|
||||
ds_c_evidence TEXT,
|
||||
ds_s_evidence TEXT,
|
||||
ds_reason TEXT,
|
||||
|
||||
-- Qwen判断
|
||||
qwen_model_name TEXT NOT NULL,
|
||||
qwen_p_judgment TEXT,
|
||||
qwen_i_judgment TEXT,
|
||||
qwen_c_judgment TEXT,
|
||||
qwen_s_judgment TEXT,
|
||||
qwen_conclusion TEXT,
|
||||
qwen_confidence DOUBLE PRECISION,
|
||||
qwen_p_evidence TEXT,
|
||||
qwen_i_evidence TEXT,
|
||||
qwen_c_evidence TEXT,
|
||||
qwen_s_evidence TEXT,
|
||||
qwen_reason TEXT,
|
||||
|
||||
-- 冲突状态
|
||||
conflict_status TEXT NOT NULL DEFAULT 'none',
|
||||
conflict_fields JSONB,
|
||||
|
||||
-- 最终决策
|
||||
final_decision TEXT,
|
||||
final_decision_by TEXT,
|
||||
final_decision_at TIMESTAMP(3),
|
||||
exclusion_reason TEXT,
|
||||
|
||||
-- AI处理状态
|
||||
ai_processing_status TEXT NOT NULL DEFAULT 'pending',
|
||||
ai_processed_at TIMESTAMP(3),
|
||||
ai_error_message TEXT,
|
||||
|
||||
-- 可追溯信息
|
||||
prompt_version TEXT NOT NULL DEFAULT 'v1.0.0',
|
||||
raw_output JSONB,
|
||||
|
||||
created_at TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
|
||||
CONSTRAINT fk_project_result FOREIGN KEY (project_id) REFERENCES asl_schema.screening_projects(id) ON DELETE CASCADE,
|
||||
CONSTRAINT fk_literature FOREIGN KEY (literature_id) REFERENCES asl_schema.literatures(id) ON DELETE CASCADE,
|
||||
CONSTRAINT unique_project_literature UNIQUE (project_id, literature_id)
|
||||
);
|
||||
`);
|
||||
console.log('✅ 创建 asl_schema.screening_results');
|
||||
|
||||
// 创建索引
|
||||
await prisma.$executeRawUnsafe(`
|
||||
CREATE INDEX IF NOT EXISTS idx_screening_results_project_id ON asl_schema.screening_results(project_id);
|
||||
`);
|
||||
await prisma.$executeRawUnsafe(`
|
||||
CREATE INDEX IF NOT EXISTS idx_screening_results_literature_id ON asl_schema.screening_results(literature_id);
|
||||
`);
|
||||
await prisma.$executeRawUnsafe(`
|
||||
CREATE INDEX IF NOT EXISTS idx_screening_results_conflict_status ON asl_schema.screening_results(conflict_status);
|
||||
`);
|
||||
await prisma.$executeRawUnsafe(`
|
||||
CREATE INDEX IF NOT EXISTS idx_screening_results_final_decision ON asl_schema.screening_results(final_decision);
|
||||
`);
|
||||
|
||||
// 4. 创建筛选任务表
|
||||
await prisma.$executeRawUnsafe(`
|
||||
CREATE TABLE IF NOT EXISTS asl_schema.screening_tasks (
|
||||
id TEXT PRIMARY KEY,
|
||||
project_id TEXT NOT NULL,
|
||||
task_type TEXT NOT NULL,
|
||||
status TEXT NOT NULL DEFAULT 'pending',
|
||||
total_items INTEGER NOT NULL,
|
||||
processed_items INTEGER NOT NULL DEFAULT 0,
|
||||
success_items INTEGER NOT NULL DEFAULT 0,
|
||||
failed_items INTEGER NOT NULL DEFAULT 0,
|
||||
conflict_items INTEGER NOT NULL DEFAULT 0,
|
||||
started_at TIMESTAMP(3),
|
||||
completed_at TIMESTAMP(3),
|
||||
estimated_end_at TIMESTAMP(3),
|
||||
error_message TEXT,
|
||||
created_at TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
CONSTRAINT fk_project_task FOREIGN KEY (project_id) REFERENCES asl_schema.screening_projects(id) ON DELETE CASCADE
|
||||
);
|
||||
`);
|
||||
console.log('✅ 创建 asl_schema.screening_tasks');
|
||||
|
||||
// 创建索引
|
||||
await prisma.$executeRawUnsafe(`
|
||||
CREATE INDEX IF NOT EXISTS idx_screening_tasks_project_id ON asl_schema.screening_tasks(project_id);
|
||||
`);
|
||||
await prisma.$executeRawUnsafe(`
|
||||
CREATE INDEX IF NOT EXISTS idx_screening_tasks_status ON asl_schema.screening_tasks(status);
|
||||
`);
|
||||
|
||||
console.log('\n✅ ASL模块4张表创建完成!');
|
||||
console.log('📊 表列表:');
|
||||
console.log(' - asl_schema.screening_projects (筛选项目)');
|
||||
console.log(' - asl_schema.literatures (文献条目)');
|
||||
console.log(' - asl_schema.screening_results (筛选结果)');
|
||||
console.log(' - asl_schema.screening_tasks (筛选任务)');
|
||||
|
||||
// 验证表
|
||||
const tables = await prisma.$queryRawUnsafe<any[]>(`
|
||||
SELECT tablename
|
||||
FROM pg_tables
|
||||
WHERE schemaname = 'asl_schema'
|
||||
ORDER BY tablename;
|
||||
`);
|
||||
|
||||
console.log('\n🔍 数据库验证:');
|
||||
tables.forEach(t => console.log(` ✓ ${t.tablename}`));
|
||||
|
||||
} catch (error) {
|
||||
console.error('❌ 创建表失败:', error);
|
||||
throw error;
|
||||
} finally {
|
||||
await prisma.$disconnect();
|
||||
}
|
||||
}
|
||||
|
||||
createAslTables();
|
||||
|
||||
59
backend/scripts/create-test-user-for-asl.ts
Normal file
59
backend/scripts/create-test-user-for-asl.ts
Normal file
@@ -0,0 +1,59 @@
|
||||
/**
|
||||
* 为ASL测试创建测试用户
|
||||
*/
|
||||
|
||||
import { prisma } from '../src/config/database.js';
|
||||
|
||||
async function createTestUser() {
|
||||
try {
|
||||
console.log('🔍 检查测试用户是否存在...\n');
|
||||
|
||||
const testUserId = 'asl-test-user-001';
|
||||
|
||||
// 检查用户是否已存在
|
||||
const existingUser = await prisma.user.findUnique({
|
||||
where: { id: testUserId },
|
||||
});
|
||||
|
||||
if (existingUser) {
|
||||
console.log('✅ 测试用户已存在:');
|
||||
console.log(' ID:', existingUser.id);
|
||||
console.log(' 邮箱:', existingUser.email);
|
||||
console.log(' 姓名:', existingUser.name);
|
||||
return existingUser;
|
||||
}
|
||||
|
||||
// 创建测试用户
|
||||
const user = await prisma.user.create({
|
||||
data: {
|
||||
id: testUserId,
|
||||
email: 'asl-test@example.com',
|
||||
password: 'test-password-hash',
|
||||
name: 'ASL测试用户',
|
||||
role: 'user',
|
||||
status: 'active',
|
||||
kbQuota: 10,
|
||||
kbUsed: 0,
|
||||
isTrial: true,
|
||||
},
|
||||
});
|
||||
|
||||
console.log('✅ 测试用户创建成功:');
|
||||
console.log(' ID:', user.id);
|
||||
console.log(' 邮箱:', user.email);
|
||||
console.log(' 姓名:', user.name);
|
||||
console.log('\n💡 在测试脚本中使用此用户ID进行测试');
|
||||
|
||||
return user;
|
||||
} catch (error) {
|
||||
console.error('❌ 创建测试用户失败:', error);
|
||||
throw error;
|
||||
} finally {
|
||||
await prisma.$disconnect();
|
||||
}
|
||||
}
|
||||
|
||||
createTestUser();
|
||||
|
||||
|
||||
|
||||
193
backend/scripts/test-asl-api.ts
Normal file
193
backend/scripts/test-asl-api.ts
Normal file
@@ -0,0 +1,193 @@
|
||||
/**
|
||||
* ASL模块API测试脚本
|
||||
* 测试所有ASL API端点
|
||||
*/
|
||||
|
||||
const BASE_URL = 'http://localhost:3001';
|
||||
const API_PREFIX = '/api/v1/asl';
|
||||
|
||||
// 测试用的userId (需要先创建用户或使用已有用户)
|
||||
const TEST_USER_ID = '00000000-0000-0000-0000-000000000001';
|
||||
|
||||
async function testAPI() {
|
||||
console.log('🚀 开始测试 ASL 模块 API...\n');
|
||||
|
||||
let projectId = '';
|
||||
let literatureIds: string[] = [];
|
||||
|
||||
try {
|
||||
// ==================== 测试1: 健康检查 ====================
|
||||
console.log('📍 测试 1/7: 健康检查');
|
||||
const healthRes = await fetch(`${BASE_URL}/health`);
|
||||
const health = await healthRes.json();
|
||||
console.log('✅ 健康检查成功:', health.status);
|
||||
console.log('');
|
||||
|
||||
// ==================== 测试2: 创建筛选项目 ====================
|
||||
console.log('📍 测试 2/7: 创建筛选项目');
|
||||
const createProjectRes = await fetch(`${BASE_URL}${API_PREFIX}/projects`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
projectName: 'SGLT2抑制剂系统综述测试',
|
||||
picoCriteria: {
|
||||
population: '2型糖尿病成人患者',
|
||||
intervention: 'SGLT2抑制剂',
|
||||
comparison: '安慰剂或常规降糖疗法',
|
||||
outcome: '心血管结局',
|
||||
studyDesign: '随机对照试验 (RCT)',
|
||||
},
|
||||
inclusionCriteria: '英文文献,RCT研究,2010年后发表',
|
||||
exclusionCriteria: '病例报告,综述,动物实验',
|
||||
screeningConfig: {
|
||||
models: ['deepseek-chat', 'qwen-max'],
|
||||
temperature: 0,
|
||||
},
|
||||
}),
|
||||
});
|
||||
|
||||
if (!createProjectRes.ok) {
|
||||
console.log('⚠️ 创建项目失败,状态码:', createProjectRes.status);
|
||||
const error = await createProjectRes.text();
|
||||
console.log('错误信息:', error);
|
||||
console.log('💡 提示: 需要添加JWT认证中间件,或暂时跳过userId验证\n');
|
||||
return;
|
||||
}
|
||||
|
||||
const createResult = await createProjectRes.json();
|
||||
projectId = createResult.data.id;
|
||||
console.log('✅ 项目创建成功');
|
||||
console.log(' 项目ID:', projectId);
|
||||
console.log(' 项目名称:', createResult.data.projectName);
|
||||
console.log('');
|
||||
|
||||
// ==================== 测试3: 获取项目列表 ====================
|
||||
console.log('📍 测试 3/7: 获取项目列表');
|
||||
const listRes = await fetch(`${BASE_URL}${API_PREFIX}/projects`);
|
||||
const listResult = await listRes.json();
|
||||
console.log('✅ 获取项目列表成功');
|
||||
console.log(' 项目数量:', listResult.data.length);
|
||||
console.log('');
|
||||
|
||||
// ==================== 测试4: 获取项目详情 ====================
|
||||
console.log('📍 测试 4/7: 获取项目详情');
|
||||
const detailRes = await fetch(`${BASE_URL}${API_PREFIX}/projects/${projectId}`);
|
||||
const detailResult = await detailRes.json();
|
||||
console.log('✅ 获取项目详情成功');
|
||||
console.log(' 项目名称:', detailResult.data.projectName);
|
||||
console.log(' PICO标准:', JSON.stringify(detailResult.data.picoCriteria, null, 2));
|
||||
console.log('');
|
||||
|
||||
// ==================== 测试5: 导入文献(JSON) ====================
|
||||
console.log('📍 测试 5/7: 导入文献(JSON)');
|
||||
const importRes = await fetch(`${BASE_URL}${API_PREFIX}/literatures/import`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
projectId: projectId,
|
||||
literatures: [
|
||||
{
|
||||
pmid: '12345678',
|
||||
title: 'Efficacy of SGLT2 inhibitors in type 2 diabetes: a randomized controlled trial',
|
||||
abstract: 'Background: SGLT2 inhibitors are a new class of glucose-lowering drugs. Methods: We conducted a randomized, double-blind, placebo-controlled trial. Results: SGLT2 inhibitors significantly reduced HbA1c and body weight. Conclusions: SGLT2 inhibitors are effective for type 2 diabetes.',
|
||||
authors: 'Smith J, Jones A, Brown B',
|
||||
journal: 'New England Journal of Medicine',
|
||||
publicationYear: 2020,
|
||||
doi: '10.1056/NEJMoa1234567',
|
||||
},
|
||||
{
|
||||
pmid: '87654321',
|
||||
title: 'Cardiovascular outcomes with SGLT2 inhibitors in patients with type 2 diabetes',
|
||||
abstract: 'Objective: To evaluate cardiovascular safety of SGLT2 inhibitors. Design: Multicenter randomized controlled trial. Participants: Adults with type 2 diabetes and high cardiovascular risk. Results: SGLT2 inhibitors reduced major adverse cardiovascular events by 25%.',
|
||||
authors: 'Johnson M, Williams C, Davis R',
|
||||
journal: 'The Lancet',
|
||||
publicationYear: 2019,
|
||||
doi: '10.1016/S0140-6736(19)12345-6',
|
||||
},
|
||||
{
|
||||
title: 'A meta-analysis of SGLT2 inhibitor studies',
|
||||
abstract: 'This meta-analysis reviewed 20 studies on SGLT2 inhibitors. We found consistent benefits across different populations. However, results were heterogeneous.',
|
||||
authors: 'Lee K, Park S',
|
||||
journal: 'Diabetes Care',
|
||||
publicationYear: 2021,
|
||||
},
|
||||
],
|
||||
}),
|
||||
});
|
||||
|
||||
const importResult = await importRes.json();
|
||||
console.log('✅ 文献导入成功');
|
||||
console.log(' 导入数量:', importResult.data.importedCount);
|
||||
console.log('');
|
||||
|
||||
// ==================== 测试6: 获取文献列表 ====================
|
||||
console.log('📍 测试 6/7: 获取文献列表');
|
||||
const litListRes = await fetch(`${BASE_URL}${API_PREFIX}/projects/${projectId}/literatures`);
|
||||
const litListResult = await litListRes.json();
|
||||
console.log('✅ 获取文献列表成功');
|
||||
console.log(' 文献数量:', litListResult.data.literatures.length);
|
||||
console.log(' 分页信息:', litListResult.data.pagination);
|
||||
|
||||
if (litListResult.data.literatures.length > 0) {
|
||||
console.log(' 第一篇文献:');
|
||||
console.log(' - 标题:', litListResult.data.literatures[0].title.substring(0, 50) + '...');
|
||||
console.log(' - PMID:', litListResult.data.literatures[0].pmid);
|
||||
literatureIds = litListResult.data.literatures.map((lit: any) => lit.id);
|
||||
}
|
||||
console.log('');
|
||||
|
||||
// ==================== 测试7: 更新项目 ====================
|
||||
console.log('📍 测试 7/7: 更新项目');
|
||||
const updateRes = await fetch(`${BASE_URL}${API_PREFIX}/projects/${projectId}`, {
|
||||
method: 'PUT',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
status: 'screening',
|
||||
}),
|
||||
});
|
||||
|
||||
const updateResult = await updateRes.json();
|
||||
console.log('✅ 项目更新成功');
|
||||
console.log(' 新状态:', updateResult.data.status);
|
||||
console.log('');
|
||||
|
||||
// ==================== 测试总结 ====================
|
||||
console.log('═'.repeat(60));
|
||||
console.log('🎉 所有测试通过!');
|
||||
console.log('═'.repeat(60));
|
||||
console.log('📊 测试总结:');
|
||||
console.log(' ✅ 健康检查');
|
||||
console.log(' ✅ 创建筛选项目');
|
||||
console.log(' ✅ 获取项目列表');
|
||||
console.log(' ✅ 获取项目详情');
|
||||
console.log(' ✅ 导入文献');
|
||||
console.log(' ✅ 获取文献列表');
|
||||
console.log(' ✅ 更新项目状态');
|
||||
console.log('');
|
||||
console.log('📝 创建的测试数据:');
|
||||
console.log(` - 项目ID: ${projectId}`);
|
||||
console.log(` - 文献数量: ${literatureIds.length}`);
|
||||
console.log('');
|
||||
console.log('🧹 清理提示: 如需删除测试数据,请执行:');
|
||||
console.log(` DELETE http://localhost:3001/api/v1/asl/projects/${projectId}`);
|
||||
console.log('');
|
||||
|
||||
} catch (error) {
|
||||
console.error('❌ 测试失败:', error);
|
||||
if (error instanceof Error) {
|
||||
console.error('错误详情:', error.message);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 执行测试
|
||||
testAPI();
|
||||
|
||||
|
||||
|
||||
133
backend/scripts/test-json-parser.ts
Normal file
133
backend/scripts/test-json-parser.ts
Normal file
@@ -0,0 +1,133 @@
|
||||
/**
|
||||
* 测试JSON解析器的修复效果
|
||||
*
|
||||
* 测试目的:验证中文引号等格式问题是否能被正确处理
|
||||
*/
|
||||
|
||||
import { parseJSON } from '../src/common/utils/jsonParser.js';
|
||||
|
||||
console.log('\n🧪 JSON解析器修复测试\n');
|
||||
|
||||
// 测试用例
|
||||
const testCases = [
|
||||
{
|
||||
name: '正常JSON(ASCII引号)',
|
||||
input: '{"conclusion": "exclude", "confidence": 0.95}',
|
||||
expectSuccess: true
|
||||
},
|
||||
{
|
||||
name: '中文引号JSON',
|
||||
input: '{"conclusion": "exclude", "confidence": 0.95}',
|
||||
expectSuccess: true
|
||||
},
|
||||
{
|
||||
name: '混合引号JSON',
|
||||
input: '{"conclusion": "exclude", "confidence": 0.95}',
|
||||
expectSuccess: true
|
||||
},
|
||||
{
|
||||
name: 'JSON代码块(中文引号)',
|
||||
input: `\`\`\`json
|
||||
{
|
||||
"judgment": {
|
||||
"P": "match",
|
||||
"I": "match"
|
||||
},
|
||||
"conclusion": "include",
|
||||
"confidence": 0.85,
|
||||
"reason": "虽然对照组不是安慰剂,但研究质量高"
|
||||
}
|
||||
\`\`\``,
|
||||
expectSuccess: true
|
||||
},
|
||||
{
|
||||
name: '带额外文字的JSON',
|
||||
input: `这是筛选结果:
|
||||
\`\`\`json
|
||||
{"conclusion": "exclude", "confidence": 0.90}
|
||||
\`\`\`
|
||||
以上是我的判断。`,
|
||||
expectSuccess: true
|
||||
},
|
||||
{
|
||||
name: '全角逗号和冒号',
|
||||
input: '{"conclusion":"exclude","confidence":0.95}',
|
||||
expectSuccess: true
|
||||
},
|
||||
{
|
||||
name: '不完整的JSON(应失败)',
|
||||
input: '{"conclusion": "exclude", "confidence":',
|
||||
expectSuccess: false
|
||||
},
|
||||
{
|
||||
name: '非JSON文本(应失败)',
|
||||
input: 'This is not a JSON string at all.',
|
||||
expectSuccess: false
|
||||
},
|
||||
{
|
||||
name: '复杂嵌套JSON(中文引号)',
|
||||
input: `{
|
||||
"judgment": {
|
||||
"P": "match",
|
||||
"I": "partial",
|
||||
"C": "mismatch",
|
||||
"S": "match"
|
||||
},
|
||||
"evidence": {
|
||||
"P": "研究对象为急性缺血性卒中患者",
|
||||
"I": "干预措施为替格瑞洛",
|
||||
"C": "对照组为氯吡格雷而非安慰剂",
|
||||
"S": "随机对照试验"
|
||||
},
|
||||
"conclusion": "exclude",
|
||||
"confidence": 0.92,
|
||||
"reason": "虽然P、I、S维度匹配,但对照组不符合要求"
|
||||
}`,
|
||||
expectSuccess: true
|
||||
}
|
||||
];
|
||||
|
||||
// 运行测试
|
||||
let passed = 0;
|
||||
let failed = 0;
|
||||
|
||||
testCases.forEach((testCase, index) => {
|
||||
console.log(`[测试 ${index + 1}/${testCases.length}] ${testCase.name}`);
|
||||
|
||||
const result = parseJSON(testCase.input);
|
||||
const success = result.success === testCase.expectSuccess;
|
||||
|
||||
if (success) {
|
||||
console.log(' ✅ 通过');
|
||||
if (result.success) {
|
||||
console.log(` 📄 解析结果: ${JSON.stringify(result.data).substring(0, 100)}...`);
|
||||
}
|
||||
passed++;
|
||||
} else {
|
||||
console.log(' ❌ 失败');
|
||||
console.log(` 期望: ${testCase.expectSuccess ? '成功' : '失败'}`);
|
||||
console.log(` 实际: ${result.success ? '成功' : '失败'}`);
|
||||
if (!result.success) {
|
||||
console.log(` 错误: ${result.error}`);
|
||||
}
|
||||
failed++;
|
||||
}
|
||||
console.log('');
|
||||
});
|
||||
|
||||
// 总结
|
||||
console.log('='.repeat(60));
|
||||
console.log('📊 测试总结\n');
|
||||
console.log(`✅ 通过: ${passed}/${testCases.length}`);
|
||||
console.log(`❌ 失败: ${failed}/${testCases.length}`);
|
||||
console.log(`📈 成功率: ${(passed / testCases.length * 100).toFixed(1)}%`);
|
||||
|
||||
if (passed === testCases.length) {
|
||||
console.log('\n🎉 所有测试通过!JSON解析器修复成功!');
|
||||
} else {
|
||||
console.log('\n⚠️ 部分测试失败,需要进一步调试。');
|
||||
}
|
||||
|
||||
console.log('='.repeat(60) + '\n');
|
||||
|
||||
|
||||
377
backend/scripts/test-llm-screening.ts
Normal file
377
backend/scripts/test-llm-screening.ts
Normal file
@@ -0,0 +1,377 @@
|
||||
/**
|
||||
* LLM筛选质量测试脚本
|
||||
* 基于质量保障策略 v1.0.0
|
||||
* MVP目标:准确率≥85%,双模型一致率≥80%
|
||||
*/
|
||||
|
||||
import { llmScreeningService } from '../src/modules/asl/services/llmScreeningService.js';
|
||||
import { logger } from '../src/common/logging/index.js';
|
||||
import * as fs from 'fs/promises';
|
||||
import * as path from 'path';
|
||||
import { fileURLToPath } from 'url';
|
||||
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = path.dirname(__filename);
|
||||
|
||||
// 测试配置
|
||||
const TEST_CONFIG = {
|
||||
sampleFile: path.join(__dirname, 'test-samples/asl-test-literatures.json'),
|
||||
outputDir: path.join(__dirname, 'test-results'),
|
||||
models: {
|
||||
model1: 'deepseek-chat',
|
||||
model2: 'qwen-max'
|
||||
},
|
||||
concurrency: 2, // 并发数(避免API限流)
|
||||
};
|
||||
|
||||
// PICO标准(示例:SGLT2抑制剂系统综述)
|
||||
const PICO_CRITERIA = {
|
||||
population: '2型糖尿病成人患者',
|
||||
intervention: 'SGLT2抑制剂(如empagliflozin、dapagliflozin、canagliflozin等)',
|
||||
comparison: '安慰剂或常规降糖疗法',
|
||||
outcome: '心血管结局(主要不良心血管事件、心衰住院、心血管死亡)',
|
||||
studyDesign: '随机对照试验(RCT)'
|
||||
};
|
||||
|
||||
const INCLUSION_CRITERIA = `
|
||||
1. 成人2型糖尿病患者(≥18岁)
|
||||
2. 随机对照试验(RCT)设计
|
||||
3. 干预措施为SGLT2抑制剂单药或联合治疗
|
||||
4. 报告心血管结局数据
|
||||
5. 英文文献
|
||||
6. 发表于2010年后
|
||||
`;
|
||||
|
||||
const EXCLUSION_CRITERIA = `
|
||||
1. 综述、系统评价、Meta分析
|
||||
2. 病例报告、病例系列
|
||||
3. 动物实验或体外实验
|
||||
4. 会议摘要(未发表完整文章)
|
||||
5. 健康志愿者研究
|
||||
6. 1型糖尿病患者
|
||||
7. 观察性研究(队列、病例对照)
|
||||
`;
|
||||
|
||||
// 质量指标
|
||||
interface QualityMetrics {
|
||||
totalTests: number;
|
||||
correctDecisions: number;
|
||||
accuracy: number;
|
||||
consistencyRate: number;
|
||||
jsonValidRate: number;
|
||||
avgConfidence: number;
|
||||
needReviewRate: number;
|
||||
confusionMatrix: {
|
||||
truePositive: number;
|
||||
falsePositive: number;
|
||||
trueNegative: number;
|
||||
falseNegative: number;
|
||||
uncertain: number;
|
||||
};
|
||||
}
|
||||
|
||||
// 测试结果
|
||||
interface TestResult {
|
||||
literatureId: string;
|
||||
title: string;
|
||||
expectedDecision: string;
|
||||
actualDecision: string;
|
||||
isCorrect: boolean;
|
||||
hasConsensus: boolean;
|
||||
needReview: boolean;
|
||||
avgConfidence: number;
|
||||
deepseekResult: any;
|
||||
qwenResult: any;
|
||||
processingTime: number;
|
||||
}
|
||||
|
||||
async function main() {
|
||||
console.log('🚀 启动LLM筛选质量测试\n');
|
||||
console.log('=' .repeat(80));
|
||||
console.log('测试配置:');
|
||||
console.log(` 模型组合: ${TEST_CONFIG.models.model1} + ${TEST_CONFIG.models.model2}`);
|
||||
console.log(` PICO标准: SGLT2抑制剂 RCT 心血管结局`);
|
||||
console.log(` 质量目标: 准确率≥85%, 一致率≥80%, JSON验证≥95%`);
|
||||
console.log('=' .repeat(80) + '\n');
|
||||
|
||||
try {
|
||||
// 1. 加载测试样本
|
||||
console.log('📖 加载测试样本...');
|
||||
const samplesContent = await fs.readFile(TEST_CONFIG.sampleFile, 'utf-8');
|
||||
const samples = JSON.parse(samplesContent);
|
||||
console.log(`✅ 加载${samples.length}篇测试文献\n`);
|
||||
|
||||
// 2. 执行测试
|
||||
console.log('🧪 开始执行筛选测试...\n');
|
||||
const results: TestResult[] = [];
|
||||
|
||||
for (let i = 0; i < samples.length; i++) {
|
||||
const sample = samples[i];
|
||||
console.log(`[${i + 1}/${samples.length}] 测试文献: ${sample.id}`);
|
||||
console.log(` 标题: ${sample.title.substring(0, 80)}...`);
|
||||
|
||||
const startTime = Date.now();
|
||||
|
||||
try {
|
||||
// 调用双模型筛选
|
||||
const screeningResult = await llmScreeningService.dualModelScreening(
|
||||
sample.id,
|
||||
sample.title,
|
||||
sample.abstract,
|
||||
PICO_CRITERIA,
|
||||
INCLUSION_CRITERIA,
|
||||
EXCLUSION_CRITERIA,
|
||||
[TEST_CONFIG.models.model1, TEST_CONFIG.models.model2]
|
||||
);
|
||||
|
||||
const processingTime = Date.now() - startTime;
|
||||
|
||||
// 判断结果正确性
|
||||
const actualDecision = screeningResult.finalDecision || 'pending';
|
||||
const expectedDecision = sample.expectedDecision;
|
||||
const isCorrect = actualDecision === expectedDecision;
|
||||
|
||||
// 计算平均置信度
|
||||
const avgConfidence = (
|
||||
(screeningResult.deepseek.confidence || 0) +
|
||||
(screeningResult.qwen.confidence || 0)
|
||||
) / 2;
|
||||
|
||||
const result: TestResult = {
|
||||
literatureId: sample.id,
|
||||
title: sample.title,
|
||||
expectedDecision,
|
||||
actualDecision,
|
||||
isCorrect,
|
||||
hasConsensus: !screeningResult.hasConflict,
|
||||
needReview: screeningResult.hasConflict || avgConfidence < 0.7,
|
||||
avgConfidence,
|
||||
deepseekResult: screeningResult.deepseek,
|
||||
qwenResult: screeningResult.qwen,
|
||||
processingTime,
|
||||
};
|
||||
|
||||
results.push(result);
|
||||
|
||||
console.log(` ${isCorrect ? '✅' : '❌'} 期望: ${expectedDecision}, 实际: ${actualDecision}`);
|
||||
console.log(` 一致性: ${screeningResult.hasConflict ? '❌ 冲突' : '✅ 一致'}`);
|
||||
console.log(` 置信度: ${avgConfidence.toFixed(2)}`);
|
||||
console.log(` 耗时: ${processingTime}ms`);
|
||||
console.log('');
|
||||
|
||||
// 避免API限流
|
||||
if (i < samples.length - 1) {
|
||||
await new Promise(resolve => setTimeout(resolve, 1000));
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
console.error(` ❌ 测试失败:`, error);
|
||||
results.push({
|
||||
literatureId: sample.id,
|
||||
title: sample.title,
|
||||
expectedDecision: sample.expectedDecision,
|
||||
actualDecision: 'error',
|
||||
isCorrect: false,
|
||||
hasConsensus: false,
|
||||
needReview: true,
|
||||
avgConfidence: 0,
|
||||
deepseekResult: null,
|
||||
qwenResult: null,
|
||||
processingTime: Date.now() - startTime,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// 3. 计算质量指标
|
||||
console.log('\n' + '='.repeat(80));
|
||||
console.log('📊 质量指标统计\n');
|
||||
|
||||
const metrics = calculateMetrics(results);
|
||||
|
||||
console.log(`总测试数: ${metrics.totalTests}`);
|
||||
console.log(`正确决策: ${metrics.correctDecisions}`);
|
||||
console.log(`准确率: ${(metrics.accuracy * 100).toFixed(1)}% ${metrics.accuracy >= 0.85 ? '✅' : '❌'} (目标≥85%)`);
|
||||
console.log(`一致率: ${(metrics.consistencyRate * 100).toFixed(1)}% ${metrics.consistencyRate >= 0.80 ? '✅' : '❌'} (目标≥80%)`);
|
||||
console.log(`平均置信度: ${metrics.avgConfidence.toFixed(2)}`);
|
||||
console.log(`需人工复核: ${(metrics.needReviewRate * 100).toFixed(1)}% ${metrics.needReviewRate <= 0.20 ? '✅' : '❌'} (目标≤20%)`);
|
||||
console.log('\n混淆矩阵:');
|
||||
console.log(` 真阳性(TP): ${metrics.confusionMatrix.truePositive}`);
|
||||
console.log(` 假阳性(FP): ${metrics.confusionMatrix.falsePositive}`);
|
||||
console.log(` 真阴性(TN): ${metrics.confusionMatrix.trueNegative}`);
|
||||
console.log(` 假阴性(FN): ${metrics.confusionMatrix.falseNegative}`);
|
||||
console.log(` 不确定: ${metrics.confusionMatrix.uncertain}`);
|
||||
|
||||
// 4. 保存结果
|
||||
console.log('\n💾 保存测试结果...');
|
||||
await fs.mkdir(TEST_CONFIG.outputDir, { recursive: true });
|
||||
|
||||
const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
|
||||
const outputFile = path.join(
|
||||
TEST_CONFIG.outputDir,
|
||||
`test-results-${timestamp}.json`
|
||||
);
|
||||
|
||||
await fs.writeFile(
|
||||
outputFile,
|
||||
JSON.stringify({ metrics, results }, null, 2),
|
||||
'utf-8'
|
||||
);
|
||||
|
||||
console.log(`✅ 结果已保存: ${outputFile}`);
|
||||
|
||||
// 5. 生成报告
|
||||
console.log('\n📋 生成测试报告...');
|
||||
const report = generateReport(metrics, results);
|
||||
const reportFile = path.join(
|
||||
TEST_CONFIG.outputDir,
|
||||
`test-report-${timestamp}.md`
|
||||
);
|
||||
|
||||
await fs.writeFile(reportFile, report, 'utf-8');
|
||||
console.log(`✅ 报告已生成: ${reportFile}`);
|
||||
|
||||
// 6. 总结
|
||||
console.log('\n' + '='.repeat(80));
|
||||
console.log('🎯 测试总结\n');
|
||||
|
||||
const allPassed =
|
||||
metrics.accuracy >= 0.85 &&
|
||||
metrics.consistencyRate >= 0.80 &&
|
||||
metrics.needReviewRate <= 0.20;
|
||||
|
||||
if (allPassed) {
|
||||
console.log('✅ 所有质量指标达标!MVP阶段质量要求满足。');
|
||||
} else {
|
||||
console.log('❌ 部分质量指标未达标,需要优化Prompt或调整策略。');
|
||||
console.log('\n改进建议:');
|
||||
if (metrics.accuracy < 0.85) {
|
||||
console.log(' - 优化Prompt,增加示例和指导');
|
||||
console.log(' - 检查错误案例,找出共性问题');
|
||||
}
|
||||
if (metrics.consistencyRate < 0.80) {
|
||||
console.log(' - 提高Prompt的明确性和一致性');
|
||||
console.log(' - 考虑增加Few-shot示例');
|
||||
}
|
||||
if (metrics.needReviewRate > 0.20) {
|
||||
console.log(' - 优化置信度评分策略');
|
||||
console.log(' - 调整人工复核阈值');
|
||||
}
|
||||
}
|
||||
|
||||
console.log('='.repeat(80));
|
||||
|
||||
} catch (error) {
|
||||
console.error('❌ 测试失败:', error);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
function calculateMetrics(results: TestResult[]): QualityMetrics {
|
||||
const totalTests = results.length;
|
||||
const correctDecisions = results.filter(r => r.isCorrect).length;
|
||||
const accuracy = totalTests > 0 ? correctDecisions / totalTests : 0;
|
||||
|
||||
const consensusCount = results.filter(r => r.hasConsensus).length;
|
||||
const consistencyRate = totalTests > 0 ? consensusCount / totalTests : 0;
|
||||
|
||||
const totalConfidence = results.reduce((sum, r) => sum + r.avgConfidence, 0);
|
||||
const avgConfidence = totalTests > 0 ? totalConfidence / totalTests : 0;
|
||||
|
||||
const needReviewCount = results.filter(r => r.needReview).length;
|
||||
const needReviewRate = totalTests > 0 ? needReviewCount / totalTests : 0;
|
||||
|
||||
// 混淆矩阵
|
||||
const confusionMatrix = {
|
||||
truePositive: 0,
|
||||
falsePositive: 0,
|
||||
trueNegative: 0,
|
||||
falseNegative: 0,
|
||||
uncertain: 0,
|
||||
};
|
||||
|
||||
results.forEach(r => {
|
||||
if (r.actualDecision === 'uncertain') {
|
||||
confusionMatrix.uncertain++;
|
||||
} else if (r.expectedDecision === 'include' && r.actualDecision === 'include') {
|
||||
confusionMatrix.truePositive++;
|
||||
} else if (r.expectedDecision === 'exclude' && r.actualDecision === 'include') {
|
||||
confusionMatrix.falsePositive++;
|
||||
} else if (r.expectedDecision === 'exclude' && r.actualDecision === 'exclude') {
|
||||
confusionMatrix.trueNegative++;
|
||||
} else if (r.expectedDecision === 'include' && r.actualDecision === 'exclude') {
|
||||
confusionMatrix.falseNegative++;
|
||||
}
|
||||
});
|
||||
|
||||
return {
|
||||
totalTests,
|
||||
correctDecisions,
|
||||
accuracy,
|
||||
consistencyRate,
|
||||
jsonValidRate: 1.0, // 由AJV自动验证
|
||||
avgConfidence,
|
||||
needReviewRate,
|
||||
confusionMatrix,
|
||||
};
|
||||
}
|
||||
|
||||
function generateReport(metrics: QualityMetrics, results: TestResult[]): string {
|
||||
return `# LLM筛选质量测试报告
|
||||
|
||||
**测试时间**: ${new Date().toISOString()}
|
||||
**测试模型**: ${TEST_CONFIG.models.model1} + ${TEST_CONFIG.models.model2}
|
||||
**测试样本数**: ${metrics.totalTests}
|
||||
|
||||
---
|
||||
|
||||
## 质量指标
|
||||
|
||||
| 指标 | 实际值 | 目标值 | 状态 |
|
||||
|------|--------|--------|------|
|
||||
| 准确率 | ${(metrics.accuracy * 100).toFixed(1)}% | ≥85% | ${metrics.accuracy >= 0.85 ? '✅' : '❌'} |
|
||||
| 一致率 | ${(metrics.consistencyRate * 100).toFixed(1)}% | ≥80% | ${metrics.consistencyRate >= 0.80 ? '✅' : '❌'} |
|
||||
| 平均置信度 | ${metrics.avgConfidence.toFixed(2)} | - | - |
|
||||
| 需人工复核率 | ${(metrics.needReviewRate * 100).toFixed(1)}% | ≤20% | ${metrics.needReviewRate <= 0.20 ? '✅' : '❌'} |
|
||||
|
||||
---
|
||||
|
||||
## 混淆矩阵
|
||||
|
||||
\`\`\`
|
||||
预测纳入 预测排除 不确定
|
||||
实际纳入 ${metrics.confusionMatrix.truePositive} ${metrics.confusionMatrix.falseNegative} -
|
||||
实际排除 ${metrics.confusionMatrix.falsePositive} ${metrics.confusionMatrix.trueNegative} -
|
||||
不确定 - - ${metrics.confusionMatrix.uncertain}
|
||||
\`\`\`
|
||||
|
||||
---
|
||||
|
||||
## 详细结果
|
||||
|
||||
${results.map((r, i) => `
|
||||
### ${i + 1}. ${r.literatureId}
|
||||
|
||||
**标题**: ${r.title}
|
||||
**期望决策**: ${r.expectedDecision}
|
||||
**实际决策**: ${r.actualDecision}
|
||||
**结果**: ${r.isCorrect ? '✅ 正确' : '❌ 错误'}
|
||||
**一致性**: ${r.hasConsensus ? '✅ 一致' : '❌ 冲突'}
|
||||
**平均置信度**: ${r.avgConfidence.toFixed(2)}
|
||||
**处理时间**: ${r.processingTime}ms
|
||||
**需人工复核**: ${r.needReview ? '是' : '否'}
|
||||
|
||||
**DeepSeek结论**: ${r.deepseekResult?.conclusion} (置信度: ${r.deepseekResult?.confidence?.toFixed(2)})
|
||||
**Qwen结论**: ${r.qwenResult?.conclusion} (置信度: ${r.qwenResult?.confidence?.toFixed(2)})
|
||||
`).join('\n')}
|
||||
|
||||
---
|
||||
|
||||
**生成时间**: ${new Date().toISOString()}
|
||||
`;
|
||||
}
|
||||
|
||||
// 运行测试
|
||||
main().catch(console.error);
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,186 @@
|
||||
# LLM筛选质量测试报告
|
||||
|
||||
**测试时间**: 2025-11-18T07:46:42.902Z
|
||||
**测试模型**: deepseek-chat + qwen-max
|
||||
**测试样本数**: 10
|
||||
|
||||
---
|
||||
|
||||
## 质量指标
|
||||
|
||||
| 指标 | 实际值 | 目标值 | 状态 |
|
||||
|------|--------|--------|------|
|
||||
| 准确率 | 0.0% | ≥85% | ❌ |
|
||||
| 一致率 | 0.0% | ≥80% | ❌ |
|
||||
| 平均置信度 | 0.00 | - | - |
|
||||
| 需人工复核率 | 100.0% | ≤20% | ❌ |
|
||||
|
||||
---
|
||||
|
||||
## 混淆矩阵
|
||||
|
||||
```
|
||||
预测纳入 预测排除 不确定
|
||||
实际纳入 0 0 -
|
||||
实际排除 0 0 -
|
||||
不确定 - - 0
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 详细结果
|
||||
|
||||
|
||||
### 1. test-001
|
||||
|
||||
**标题**: Efficacy and Safety of Empagliflozin in Patients with Type 2 Diabetes: A Randomized, Double-Blind, Placebo-Controlled Trial
|
||||
**期望决策**: include
|
||||
**实际决策**: error
|
||||
**结果**: ❌ 错误
|
||||
**一致性**: ❌ 冲突
|
||||
**平均置信度**: 0.00
|
||||
**处理时间**: 6ms
|
||||
**需人工复核**: 是
|
||||
|
||||
**DeepSeek结论**: undefined (置信度: undefined)
|
||||
**Qwen结论**: undefined (置信度: undefined)
|
||||
|
||||
|
||||
### 2. test-002
|
||||
|
||||
**标题**: Cardiovascular Outcomes with Ertugliflozin in Type 2 Diabetes
|
||||
**期望决策**: include
|
||||
**实际决策**: error
|
||||
**结果**: ❌ 错误
|
||||
**一致性**: ❌ 冲突
|
||||
**平均置信度**: 0.00
|
||||
**处理时间**: 1ms
|
||||
**需人工复核**: 是
|
||||
|
||||
**DeepSeek结论**: undefined (置信度: undefined)
|
||||
**Qwen结论**: undefined (置信度: undefined)
|
||||
|
||||
|
||||
### 3. test-003
|
||||
|
||||
**标题**: Systematic Review and Meta-Analysis of SGLT2 Inhibitors in Type 2 Diabetes: A Comprehensive Assessment
|
||||
**期望决策**: exclude
|
||||
**实际决策**: error
|
||||
**结果**: ❌ 错误
|
||||
**一致性**: ❌ 冲突
|
||||
**平均置信度**: 0.00
|
||||
**处理时间**: 1ms
|
||||
**需人工复核**: 是
|
||||
|
||||
**DeepSeek结论**: undefined (置信度: undefined)
|
||||
**Qwen结论**: undefined (置信度: undefined)
|
||||
|
||||
|
||||
### 4. test-004
|
||||
|
||||
**标题**: Dapagliflozin Improves Cardiac Function in Diabetic Rats: An Experimental Study
|
||||
**期望决策**: exclude
|
||||
**实际决策**: error
|
||||
**结果**: ❌ 错误
|
||||
**一致性**: ❌ 冲突
|
||||
**平均置信度**: 0.00
|
||||
**处理时间**: 0ms
|
||||
**需人工复核**: 是
|
||||
|
||||
**DeepSeek结论**: undefined (置信度: undefined)
|
||||
**Qwen结论**: undefined (置信度: undefined)
|
||||
|
||||
|
||||
### 5. test-005
|
||||
|
||||
**标题**: Canagliflozin and Renal Outcomes in Type 2 Diabetes and Nephropathy
|
||||
**期望决策**: include
|
||||
**实际决策**: error
|
||||
**结果**: ❌ 错误
|
||||
**一致性**: ❌ 冲突
|
||||
**平均置信度**: 0.00
|
||||
**处理时间**: 0ms
|
||||
**需人工复核**: 是
|
||||
|
||||
**DeepSeek结论**: undefined (置信度: undefined)
|
||||
**Qwen结论**: undefined (置信度: undefined)
|
||||
|
||||
|
||||
### 6. test-006
|
||||
|
||||
**标题**: Real-World Experience with SGLT2 Inhibitors: A Retrospective Cohort Study
|
||||
**期望决策**: exclude
|
||||
**实际决策**: error
|
||||
**结果**: ❌ 错误
|
||||
**一致性**: ❌ 冲突
|
||||
**平均置信度**: 0.00
|
||||
**处理时间**: 1ms
|
||||
**需人工复核**: 是
|
||||
|
||||
**DeepSeek结论**: undefined (置信度: undefined)
|
||||
**Qwen结论**: undefined (置信度: undefined)
|
||||
|
||||
|
||||
### 7. test-007
|
||||
|
||||
**标题**: Pharmacokinetics and Pharmacodynamics of Empagliflozin in Healthy Volunteers
|
||||
**期望决策**: exclude
|
||||
**实际决策**: error
|
||||
**结果**: ❌ 错误
|
||||
**一致性**: ❌ 冲突
|
||||
**平均置信度**: 0.00
|
||||
**处理时间**: 0ms
|
||||
**需人工复核**: 是
|
||||
|
||||
**DeepSeek结论**: undefined (置信度: undefined)
|
||||
**Qwen结论**: undefined (置信度: undefined)
|
||||
|
||||
|
||||
### 8. test-008
|
||||
|
||||
**标题**: Comparative Effectiveness of SGLT2 Inhibitors versus DPP-4 Inhibitors in Elderly Patients with Type 2 Diabetes
|
||||
**期望决策**: exclude
|
||||
**实际决策**: error
|
||||
**结果**: ❌ 错误
|
||||
**一致性**: ❌ 冲突
|
||||
**平均置信度**: 0.00
|
||||
**处理时间**: 0ms
|
||||
**需人工复核**: 是
|
||||
|
||||
**DeepSeek结论**: undefined (置信度: undefined)
|
||||
**Qwen结论**: undefined (置信度: undefined)
|
||||
|
||||
|
||||
### 9. test-009
|
||||
|
||||
**标题**: Severe Diabetic Ketoacidosis Associated with SGLT2 Inhibitor Use: A Case Report
|
||||
**期望决策**: exclude
|
||||
**实际决策**: error
|
||||
**结果**: ❌ 错误
|
||||
**一致性**: ❌ 冲突
|
||||
**平均置信度**: 0.00
|
||||
**处理时间**: 1ms
|
||||
**需人工复核**: 是
|
||||
|
||||
**DeepSeek结论**: undefined (置信度: undefined)
|
||||
**Qwen结论**: undefined (置信度: undefined)
|
||||
|
||||
|
||||
### 10. test-010
|
||||
|
||||
**标题**: Effect of Sotagliflozin on Cardiovascular and Renal Events in Patients with Type 2 Diabetes and Moderate Renal Impairment
|
||||
**期望决策**: uncertain
|
||||
**实际决策**: error
|
||||
**结果**: ❌ 错误
|
||||
**一致性**: ❌ 冲突
|
||||
**平均置信度**: 0.00
|
||||
**处理时间**: 0ms
|
||||
**需人工复核**: 是
|
||||
|
||||
**DeepSeek结论**: undefined (置信度: undefined)
|
||||
**Qwen结论**: undefined (置信度: undefined)
|
||||
|
||||
|
||||
---
|
||||
|
||||
**生成时间**: 2025-11-18T07:46:42.902Z
|
||||
@@ -0,0 +1,186 @@
|
||||
# LLM筛选质量测试报告
|
||||
|
||||
**测试时间**: 2025-11-18T07:48:51.247Z
|
||||
**测试模型**: deepseek-chat + qwen-max
|
||||
**测试样本数**: 10
|
||||
|
||||
---
|
||||
|
||||
## 质量指标
|
||||
|
||||
| 指标 | 实际值 | 目标值 | 状态 |
|
||||
|------|--------|--------|------|
|
||||
| 准确率 | 0.0% | ≥85% | ❌ |
|
||||
| 一致率 | 0.0% | ≥80% | ❌ |
|
||||
| 平均置信度 | 0.00 | - | - |
|
||||
| 需人工复核率 | 100.0% | ≤20% | ❌ |
|
||||
|
||||
---
|
||||
|
||||
## 混淆矩阵
|
||||
|
||||
```
|
||||
预测纳入 预测排除 不确定
|
||||
实际纳入 0 0 -
|
||||
实际排除 0 0 -
|
||||
不确定 - - 0
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 详细结果
|
||||
|
||||
|
||||
### 1. test-001
|
||||
|
||||
**标题**: Efficacy and Safety of Empagliflozin in Patients with Type 2 Diabetes: A Randomized, Double-Blind, Placebo-Controlled Trial
|
||||
**期望决策**: include
|
||||
**实际决策**: error
|
||||
**结果**: ❌ 错误
|
||||
**一致性**: ❌ 冲突
|
||||
**平均置信度**: 0.00
|
||||
**处理时间**: 8868ms
|
||||
**需人工复核**: 是
|
||||
|
||||
**DeepSeek结论**: undefined (置信度: undefined)
|
||||
**Qwen结论**: undefined (置信度: undefined)
|
||||
|
||||
|
||||
### 2. test-002
|
||||
|
||||
**标题**: Cardiovascular Outcomes with Ertugliflozin in Type 2 Diabetes
|
||||
**期望决策**: include
|
||||
**实际决策**: error
|
||||
**结果**: ❌ 错误
|
||||
**一致性**: ❌ 冲突
|
||||
**平均置信度**: 0.00
|
||||
**处理时间**: 7365ms
|
||||
**需人工复核**: 是
|
||||
|
||||
**DeepSeek结论**: undefined (置信度: undefined)
|
||||
**Qwen结论**: undefined (置信度: undefined)
|
||||
|
||||
|
||||
### 3. test-003
|
||||
|
||||
**标题**: Systematic Review and Meta-Analysis of SGLT2 Inhibitors in Type 2 Diabetes: A Comprehensive Assessment
|
||||
**期望决策**: exclude
|
||||
**实际决策**: error
|
||||
**结果**: ❌ 错误
|
||||
**一致性**: ❌ 冲突
|
||||
**平均置信度**: 0.00
|
||||
**处理时间**: 8163ms
|
||||
**需人工复核**: 是
|
||||
|
||||
**DeepSeek结论**: undefined (置信度: undefined)
|
||||
**Qwen结论**: undefined (置信度: undefined)
|
||||
|
||||
|
||||
### 4. test-004
|
||||
|
||||
**标题**: Dapagliflozin Improves Cardiac Function in Diabetic Rats: An Experimental Study
|
||||
**期望决策**: exclude
|
||||
**实际决策**: error
|
||||
**结果**: ❌ 错误
|
||||
**一致性**: ❌ 冲突
|
||||
**平均置信度**: 0.00
|
||||
**处理时间**: 12106ms
|
||||
**需人工复核**: 是
|
||||
|
||||
**DeepSeek结论**: undefined (置信度: undefined)
|
||||
**Qwen结论**: undefined (置信度: undefined)
|
||||
|
||||
|
||||
### 5. test-005
|
||||
|
||||
**标题**: Canagliflozin and Renal Outcomes in Type 2 Diabetes and Nephropathy
|
||||
**期望决策**: include
|
||||
**实际决策**: error
|
||||
**结果**: ❌ 错误
|
||||
**一致性**: ❌ 冲突
|
||||
**平均置信度**: 0.00
|
||||
**处理时间**: 4700ms
|
||||
**需人工复核**: 是
|
||||
|
||||
**DeepSeek结论**: undefined (置信度: undefined)
|
||||
**Qwen结论**: undefined (置信度: undefined)
|
||||
|
||||
|
||||
### 6. test-006
|
||||
|
||||
**标题**: Real-World Experience with SGLT2 Inhibitors: A Retrospective Cohort Study
|
||||
**期望决策**: exclude
|
||||
**实际决策**: error
|
||||
**结果**: ❌ 错误
|
||||
**一致性**: ❌ 冲突
|
||||
**平均置信度**: 0.00
|
||||
**处理时间**: 7922ms
|
||||
**需人工复核**: 是
|
||||
|
||||
**DeepSeek结论**: undefined (置信度: undefined)
|
||||
**Qwen结论**: undefined (置信度: undefined)
|
||||
|
||||
|
||||
### 7. test-007
|
||||
|
||||
**标题**: Pharmacokinetics and Pharmacodynamics of Empagliflozin in Healthy Volunteers
|
||||
**期望决策**: exclude
|
||||
**实际决策**: error
|
||||
**结果**: ❌ 错误
|
||||
**一致性**: ❌ 冲突
|
||||
**平均置信度**: 0.00
|
||||
**处理时间**: 7877ms
|
||||
**需人工复核**: 是
|
||||
|
||||
**DeepSeek结论**: undefined (置信度: undefined)
|
||||
**Qwen结论**: undefined (置信度: undefined)
|
||||
|
||||
|
||||
### 8. test-008
|
||||
|
||||
**标题**: Comparative Effectiveness of SGLT2 Inhibitors versus DPP-4 Inhibitors in Elderly Patients with Type 2 Diabetes
|
||||
**期望决策**: exclude
|
||||
**实际决策**: error
|
||||
**结果**: ❌ 错误
|
||||
**一致性**: ❌ 冲突
|
||||
**平均置信度**: 0.00
|
||||
**处理时间**: 11004ms
|
||||
**需人工复核**: 是
|
||||
|
||||
**DeepSeek结论**: undefined (置信度: undefined)
|
||||
**Qwen结论**: undefined (置信度: undefined)
|
||||
|
||||
|
||||
### 9. test-009
|
||||
|
||||
**标题**: Severe Diabetic Ketoacidosis Associated with SGLT2 Inhibitor Use: A Case Report
|
||||
**期望决策**: exclude
|
||||
**实际决策**: error
|
||||
**结果**: ❌ 错误
|
||||
**一致性**: ❌ 冲突
|
||||
**平均置信度**: 0.00
|
||||
**处理时间**: 11130ms
|
||||
**需人工复核**: 是
|
||||
|
||||
**DeepSeek结论**: undefined (置信度: undefined)
|
||||
**Qwen结论**: undefined (置信度: undefined)
|
||||
|
||||
|
||||
### 10. test-010
|
||||
|
||||
**标题**: Effect of Sotagliflozin on Cardiovascular and Renal Events in Patients with Type 2 Diabetes and Moderate Renal Impairment
|
||||
**期望决策**: uncertain
|
||||
**实际决策**: error
|
||||
**结果**: ❌ 错误
|
||||
**一致性**: ❌ 冲突
|
||||
**平均置信度**: 0.00
|
||||
**处理时间**: 7387ms
|
||||
**需人工复核**: 是
|
||||
|
||||
**DeepSeek结论**: undefined (置信度: undefined)
|
||||
**Qwen结论**: undefined (置信度: undefined)
|
||||
|
||||
|
||||
---
|
||||
|
||||
**生成时间**: 2025-11-18T07:48:51.247Z
|
||||
@@ -0,0 +1,186 @@
|
||||
# LLM筛选质量测试报告
|
||||
|
||||
**测试时间**: 2025-11-18T07:52:19.261Z
|
||||
**测试模型**: deepseek-chat + qwen-max
|
||||
**测试样本数**: 10
|
||||
|
||||
---
|
||||
|
||||
## 质量指标
|
||||
|
||||
| 指标 | 实际值 | 目标值 | 状态 |
|
||||
|------|--------|--------|------|
|
||||
| 准确率 | 60.0% | ≥85% | ❌ |
|
||||
| 一致率 | 70.0% | ≥80% | ❌ |
|
||||
| 平均置信度 | 0.95 | - | - |
|
||||
| 需人工复核率 | 30.0% | ≤20% | ❌ |
|
||||
|
||||
---
|
||||
|
||||
## 混淆矩阵
|
||||
|
||||
```
|
||||
预测纳入 预测排除 不确定
|
||||
实际纳入 2 1 -
|
||||
实际排除 0 4 -
|
||||
不确定 - - 0
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 详细结果
|
||||
|
||||
|
||||
### 1. test-001
|
||||
|
||||
**标题**: Efficacy and Safety of Empagliflozin in Patients with Type 2 Diabetes: A Randomized, Double-Blind, Placebo-Controlled Trial
|
||||
**期望决策**: include
|
||||
**实际决策**: exclude
|
||||
**结果**: ❌ 错误
|
||||
**一致性**: ✅ 一致
|
||||
**平均置信度**: 0.93
|
||||
**处理时间**: 12188ms
|
||||
**需人工复核**: 否
|
||||
|
||||
**DeepSeek结论**: exclude (置信度: 0.90)
|
||||
**Qwen结论**: exclude (置信度: 0.95)
|
||||
|
||||
|
||||
### 2. test-002
|
||||
|
||||
**标题**: Cardiovascular Outcomes with Ertugliflozin in Type 2 Diabetes
|
||||
**期望决策**: include
|
||||
**实际决策**: include
|
||||
**结果**: ✅ 正确
|
||||
**一致性**: ✅ 一致
|
||||
**平均置信度**: 0.95
|
||||
**处理时间**: 11237ms
|
||||
**需人工复核**: 否
|
||||
|
||||
**DeepSeek结论**: include (置信度: 0.95)
|
||||
**Qwen结论**: include (置信度: 0.95)
|
||||
|
||||
|
||||
### 3. test-003
|
||||
|
||||
**标题**: Systematic Review and Meta-Analysis of SGLT2 Inhibitors in Type 2 Diabetes: A Comprehensive Assessment
|
||||
**期望决策**: exclude
|
||||
**实际决策**: exclude
|
||||
**结果**: ✅ 正确
|
||||
**一致性**: ✅ 一致
|
||||
**平均置信度**: 0.95
|
||||
**处理时间**: 15737ms
|
||||
**需人工复核**: 否
|
||||
|
||||
**DeepSeek结论**: exclude (置信度: 0.95)
|
||||
**Qwen结论**: exclude (置信度: 0.95)
|
||||
|
||||
|
||||
### 4. test-004
|
||||
|
||||
**标题**: Dapagliflozin Improves Cardiac Function in Diabetic Rats: An Experimental Study
|
||||
**期望决策**: exclude
|
||||
**实际决策**: exclude
|
||||
**结果**: ✅ 正确
|
||||
**一致性**: ✅ 一致
|
||||
**平均置信度**: 0.95
|
||||
**处理时间**: 12670ms
|
||||
**需人工复核**: 否
|
||||
|
||||
**DeepSeek结论**: exclude (置信度: 0.95)
|
||||
**Qwen结论**: exclude (置信度: 0.95)
|
||||
|
||||
|
||||
### 5. test-005
|
||||
|
||||
**标题**: Canagliflozin and Renal Outcomes in Type 2 Diabetes and Nephropathy
|
||||
**期望决策**: include
|
||||
**实际决策**: include
|
||||
**结果**: ✅ 正确
|
||||
**一致性**: ✅ 一致
|
||||
**平均置信度**: 0.95
|
||||
**处理时间**: 11345ms
|
||||
**需人工复核**: 否
|
||||
|
||||
**DeepSeek结论**: include (置信度: 0.95)
|
||||
**Qwen结论**: include (置信度: 0.95)
|
||||
|
||||
|
||||
### 6. test-006
|
||||
|
||||
**标题**: Real-World Experience with SGLT2 Inhibitors: A Retrospective Cohort Study
|
||||
**期望决策**: exclude
|
||||
**实际决策**: exclude
|
||||
**结果**: ✅ 正确
|
||||
**一致性**: ✅ 一致
|
||||
**平均置信度**: 0.95
|
||||
**处理时间**: 12213ms
|
||||
**需人工复核**: 否
|
||||
|
||||
**DeepSeek结论**: exclude (置信度: 0.95)
|
||||
**Qwen结论**: exclude (置信度: 0.95)
|
||||
|
||||
|
||||
### 7. test-007
|
||||
|
||||
**标题**: Pharmacokinetics and Pharmacodynamics of Empagliflozin in Healthy Volunteers
|
||||
**期望决策**: exclude
|
||||
**实际决策**: pending
|
||||
**结果**: ❌ 错误
|
||||
**一致性**: ❌ 冲突
|
||||
**平均置信度**: 0.95
|
||||
**处理时间**: 13333ms
|
||||
**需人工复核**: 是
|
||||
|
||||
**DeepSeek结论**: exclude (置信度: 0.95)
|
||||
**Qwen结论**: exclude (置信度: 0.95)
|
||||
|
||||
|
||||
### 8. test-008
|
||||
|
||||
**标题**: Comparative Effectiveness of SGLT2 Inhibitors versus DPP-4 Inhibitors in Elderly Patients with Type 2 Diabetes
|
||||
**期望决策**: exclude
|
||||
**实际决策**: pending
|
||||
**结果**: ❌ 错误
|
||||
**一致性**: ❌ 冲突
|
||||
**平均置信度**: 0.95
|
||||
**处理时间**: 12025ms
|
||||
**需人工复核**: 是
|
||||
|
||||
**DeepSeek结论**: exclude (置信度: 0.95)
|
||||
**Qwen结论**: exclude (置信度: 0.95)
|
||||
|
||||
|
||||
### 9. test-009
|
||||
|
||||
**标题**: Severe Diabetic Ketoacidosis Associated with SGLT2 Inhibitor Use: A Case Report
|
||||
**期望决策**: exclude
|
||||
**实际决策**: exclude
|
||||
**结果**: ✅ 正确
|
||||
**一致性**: ✅ 一致
|
||||
**平均置信度**: 0.95
|
||||
**处理时间**: 11897ms
|
||||
**需人工复核**: 否
|
||||
|
||||
**DeepSeek结论**: exclude (置信度: 0.95)
|
||||
**Qwen结论**: exclude (置信度: 0.95)
|
||||
|
||||
|
||||
### 10. test-010
|
||||
|
||||
**标题**: Effect of Sotagliflozin on Cardiovascular and Renal Events in Patients with Type 2 Diabetes and Moderate Renal Impairment
|
||||
**期望决策**: uncertain
|
||||
**实际决策**: pending
|
||||
**结果**: ❌ 错误
|
||||
**一致性**: ❌ 冲突
|
||||
**平均置信度**: 0.95
|
||||
**处理时间**: 12769ms
|
||||
**需人工复核**: 是
|
||||
|
||||
**DeepSeek结论**: exclude (置信度: 0.95)
|
||||
**Qwen结论**: include (置信度: 0.95)
|
||||
|
||||
|
||||
---
|
||||
|
||||
**生成时间**: 2025-11-18T07:52:19.261Z
|
||||
@@ -0,0 +1,186 @@
|
||||
# LLM筛选质量测试报告
|
||||
|
||||
**测试时间**: 2025-11-18T08:10:57.409Z
|
||||
**测试模型**: deepseek-chat + qwen-max
|
||||
**测试样本数**: 10
|
||||
|
||||
---
|
||||
|
||||
## 质量指标
|
||||
|
||||
| 指标 | 实际值 | 目标值 | 状态 |
|
||||
|------|--------|--------|------|
|
||||
| 准确率 | 60.0% | ≥85% | ❌ |
|
||||
| 一致率 | 70.0% | ≥80% | ❌ |
|
||||
| 平均置信度 | 0.95 | - | - |
|
||||
| 需人工复核率 | 30.0% | ≤20% | ❌ |
|
||||
|
||||
---
|
||||
|
||||
## 混淆矩阵
|
||||
|
||||
```
|
||||
预测纳入 预测排除 不确定
|
||||
实际纳入 2 1 -
|
||||
实际排除 0 4 -
|
||||
不确定 - - 0
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 详细结果
|
||||
|
||||
|
||||
### 1. test-001
|
||||
|
||||
**标题**: Efficacy and Safety of Empagliflozin in Patients with Type 2 Diabetes: A Randomized, Double-Blind, Placebo-Controlled Trial
|
||||
**期望决策**: include
|
||||
**实际决策**: exclude
|
||||
**结果**: ❌ 错误
|
||||
**一致性**: ✅ 一致
|
||||
**平均置信度**: 0.93
|
||||
**处理时间**: 11935ms
|
||||
**需人工复核**: 否
|
||||
|
||||
**DeepSeek结论**: exclude (置信度: 0.90)
|
||||
**Qwen结论**: exclude (置信度: 0.95)
|
||||
|
||||
|
||||
### 2. test-002
|
||||
|
||||
**标题**: Cardiovascular Outcomes with Ertugliflozin in Type 2 Diabetes
|
||||
**期望决策**: include
|
||||
**实际决策**: include
|
||||
**结果**: ✅ 正确
|
||||
**一致性**: ✅ 一致
|
||||
**平均置信度**: 0.95
|
||||
**处理时间**: 13225ms
|
||||
**需人工复核**: 否
|
||||
|
||||
**DeepSeek结论**: include (置信度: 0.95)
|
||||
**Qwen结论**: include (置信度: 0.95)
|
||||
|
||||
|
||||
### 3. test-003
|
||||
|
||||
**标题**: Systematic Review and Meta-Analysis of SGLT2 Inhibitors in Type 2 Diabetes: A Comprehensive Assessment
|
||||
**期望决策**: exclude
|
||||
**实际决策**: exclude
|
||||
**结果**: ✅ 正确
|
||||
**一致性**: ✅ 一致
|
||||
**平均置信度**: 0.95
|
||||
**处理时间**: 10683ms
|
||||
**需人工复核**: 否
|
||||
|
||||
**DeepSeek结论**: exclude (置信度: 0.95)
|
||||
**Qwen结论**: exclude (置信度: 0.95)
|
||||
|
||||
|
||||
### 4. test-004
|
||||
|
||||
**标题**: Dapagliflozin Improves Cardiac Function in Diabetic Rats: An Experimental Study
|
||||
**期望决策**: exclude
|
||||
**实际决策**: exclude
|
||||
**结果**: ✅ 正确
|
||||
**一致性**: ✅ 一致
|
||||
**平均置信度**: 0.95
|
||||
**处理时间**: 13067ms
|
||||
**需人工复核**: 否
|
||||
|
||||
**DeepSeek结论**: exclude (置信度: 0.95)
|
||||
**Qwen结论**: exclude (置信度: 0.95)
|
||||
|
||||
|
||||
### 5. test-005
|
||||
|
||||
**标题**: Canagliflozin and Renal Outcomes in Type 2 Diabetes and Nephropathy
|
||||
**期望决策**: include
|
||||
**实际决策**: include
|
||||
**结果**: ✅ 正确
|
||||
**一致性**: ✅ 一致
|
||||
**平均置信度**: 0.95
|
||||
**处理时间**: 12352ms
|
||||
**需人工复核**: 否
|
||||
|
||||
**DeepSeek结论**: include (置信度: 0.95)
|
||||
**Qwen结论**: include (置信度: 0.95)
|
||||
|
||||
|
||||
### 6. test-006
|
||||
|
||||
**标题**: Real-World Experience with SGLT2 Inhibitors: A Retrospective Cohort Study
|
||||
**期望决策**: exclude
|
||||
**实际决策**: exclude
|
||||
**结果**: ✅ 正确
|
||||
**一致性**: ✅ 一致
|
||||
**平均置信度**: 0.95
|
||||
**处理时间**: 11690ms
|
||||
**需人工复核**: 否
|
||||
|
||||
**DeepSeek结论**: exclude (置信度: 0.95)
|
||||
**Qwen结论**: exclude (置信度: 0.95)
|
||||
|
||||
|
||||
### 7. test-007
|
||||
|
||||
**标题**: Pharmacokinetics and Pharmacodynamics of Empagliflozin in Healthy Volunteers
|
||||
**期望决策**: exclude
|
||||
**实际决策**: pending
|
||||
**结果**: ❌ 错误
|
||||
**一致性**: ❌ 冲突
|
||||
**平均置信度**: 0.95
|
||||
**处理时间**: 14253ms
|
||||
**需人工复核**: 是
|
||||
|
||||
**DeepSeek结论**: exclude (置信度: 0.95)
|
||||
**Qwen结论**: exclude (置信度: 0.95)
|
||||
|
||||
|
||||
### 8. test-008
|
||||
|
||||
**标题**: Comparative Effectiveness of SGLT2 Inhibitors versus DPP-4 Inhibitors in Elderly Patients with Type 2 Diabetes
|
||||
**期望决策**: exclude
|
||||
**实际决策**: pending
|
||||
**结果**: ❌ 错误
|
||||
**一致性**: ❌ 冲突
|
||||
**平均置信度**: 0.95
|
||||
**处理时间**: 12808ms
|
||||
**需人工复核**: 是
|
||||
|
||||
**DeepSeek结论**: exclude (置信度: 0.95)
|
||||
**Qwen结论**: exclude (置信度: 0.95)
|
||||
|
||||
|
||||
### 9. test-009
|
||||
|
||||
**标题**: Severe Diabetic Ketoacidosis Associated with SGLT2 Inhibitor Use: A Case Report
|
||||
**期望决策**: exclude
|
||||
**实际决策**: exclude
|
||||
**结果**: ✅ 正确
|
||||
**一致性**: ✅ 一致
|
||||
**平均置信度**: 0.95
|
||||
**处理时间**: 12092ms
|
||||
**需人工复核**: 否
|
||||
|
||||
**DeepSeek结论**: exclude (置信度: 0.95)
|
||||
**Qwen结论**: exclude (置信度: 0.95)
|
||||
|
||||
|
||||
### 10. test-010
|
||||
|
||||
**标题**: Effect of Sotagliflozin on Cardiovascular and Renal Events in Patients with Type 2 Diabetes and Moderate Renal Impairment
|
||||
**期望决策**: uncertain
|
||||
**实际决策**: pending
|
||||
**结果**: ❌ 错误
|
||||
**一致性**: ❌ 冲突
|
||||
**平均置信度**: 0.95
|
||||
**处理时间**: 13503ms
|
||||
**需人工复核**: 是
|
||||
|
||||
**DeepSeek结论**: exclude (置信度: 0.95)
|
||||
**Qwen结论**: include (置信度: 0.95)
|
||||
|
||||
|
||||
---
|
||||
|
||||
**生成时间**: 2025-11-18T08:10:57.409Z
|
||||
@@ -0,0 +1,150 @@
|
||||
{
|
||||
"metrics": {
|
||||
"totalTests": 10,
|
||||
"correctDecisions": 0,
|
||||
"accuracy": 0,
|
||||
"consistencyRate": 0,
|
||||
"jsonValidRate": 1,
|
||||
"avgConfidence": 0,
|
||||
"needReviewRate": 1,
|
||||
"confusionMatrix": {
|
||||
"truePositive": 0,
|
||||
"falsePositive": 0,
|
||||
"trueNegative": 0,
|
||||
"falseNegative": 0,
|
||||
"uncertain": 0
|
||||
}
|
||||
},
|
||||
"results": [
|
||||
{
|
||||
"literatureId": "test-001",
|
||||
"title": "Efficacy and Safety of Empagliflozin in Patients with Type 2 Diabetes: A Randomized, Double-Blind, Placebo-Controlled Trial",
|
||||
"expectedDecision": "include",
|
||||
"actualDecision": "error",
|
||||
"isCorrect": false,
|
||||
"hasConsensus": false,
|
||||
"needReview": true,
|
||||
"avgConfidence": 0,
|
||||
"deepseekResult": null,
|
||||
"qwenResult": null,
|
||||
"processingTime": 6
|
||||
},
|
||||
{
|
||||
"literatureId": "test-002",
|
||||
"title": "Cardiovascular Outcomes with Ertugliflozin in Type 2 Diabetes",
|
||||
"expectedDecision": "include",
|
||||
"actualDecision": "error",
|
||||
"isCorrect": false,
|
||||
"hasConsensus": false,
|
||||
"needReview": true,
|
||||
"avgConfidence": 0,
|
||||
"deepseekResult": null,
|
||||
"qwenResult": null,
|
||||
"processingTime": 1
|
||||
},
|
||||
{
|
||||
"literatureId": "test-003",
|
||||
"title": "Systematic Review and Meta-Analysis of SGLT2 Inhibitors in Type 2 Diabetes: A Comprehensive Assessment",
|
||||
"expectedDecision": "exclude",
|
||||
"actualDecision": "error",
|
||||
"isCorrect": false,
|
||||
"hasConsensus": false,
|
||||
"needReview": true,
|
||||
"avgConfidence": 0,
|
||||
"deepseekResult": null,
|
||||
"qwenResult": null,
|
||||
"processingTime": 1
|
||||
},
|
||||
{
|
||||
"literatureId": "test-004",
|
||||
"title": "Dapagliflozin Improves Cardiac Function in Diabetic Rats: An Experimental Study",
|
||||
"expectedDecision": "exclude",
|
||||
"actualDecision": "error",
|
||||
"isCorrect": false,
|
||||
"hasConsensus": false,
|
||||
"needReview": true,
|
||||
"avgConfidence": 0,
|
||||
"deepseekResult": null,
|
||||
"qwenResult": null,
|
||||
"processingTime": 0
|
||||
},
|
||||
{
|
||||
"literatureId": "test-005",
|
||||
"title": "Canagliflozin and Renal Outcomes in Type 2 Diabetes and Nephropathy",
|
||||
"expectedDecision": "include",
|
||||
"actualDecision": "error",
|
||||
"isCorrect": false,
|
||||
"hasConsensus": false,
|
||||
"needReview": true,
|
||||
"avgConfidence": 0,
|
||||
"deepseekResult": null,
|
||||
"qwenResult": null,
|
||||
"processingTime": 0
|
||||
},
|
||||
{
|
||||
"literatureId": "test-006",
|
||||
"title": "Real-World Experience with SGLT2 Inhibitors: A Retrospective Cohort Study",
|
||||
"expectedDecision": "exclude",
|
||||
"actualDecision": "error",
|
||||
"isCorrect": false,
|
||||
"hasConsensus": false,
|
||||
"needReview": true,
|
||||
"avgConfidence": 0,
|
||||
"deepseekResult": null,
|
||||
"qwenResult": null,
|
||||
"processingTime": 1
|
||||
},
|
||||
{
|
||||
"literatureId": "test-007",
|
||||
"title": "Pharmacokinetics and Pharmacodynamics of Empagliflozin in Healthy Volunteers",
|
||||
"expectedDecision": "exclude",
|
||||
"actualDecision": "error",
|
||||
"isCorrect": false,
|
||||
"hasConsensus": false,
|
||||
"needReview": true,
|
||||
"avgConfidence": 0,
|
||||
"deepseekResult": null,
|
||||
"qwenResult": null,
|
||||
"processingTime": 0
|
||||
},
|
||||
{
|
||||
"literatureId": "test-008",
|
||||
"title": "Comparative Effectiveness of SGLT2 Inhibitors versus DPP-4 Inhibitors in Elderly Patients with Type 2 Diabetes",
|
||||
"expectedDecision": "exclude",
|
||||
"actualDecision": "error",
|
||||
"isCorrect": false,
|
||||
"hasConsensus": false,
|
||||
"needReview": true,
|
||||
"avgConfidence": 0,
|
||||
"deepseekResult": null,
|
||||
"qwenResult": null,
|
||||
"processingTime": 0
|
||||
},
|
||||
{
|
||||
"literatureId": "test-009",
|
||||
"title": "Severe Diabetic Ketoacidosis Associated with SGLT2 Inhibitor Use: A Case Report",
|
||||
"expectedDecision": "exclude",
|
||||
"actualDecision": "error",
|
||||
"isCorrect": false,
|
||||
"hasConsensus": false,
|
||||
"needReview": true,
|
||||
"avgConfidence": 0,
|
||||
"deepseekResult": null,
|
||||
"qwenResult": null,
|
||||
"processingTime": 1
|
||||
},
|
||||
{
|
||||
"literatureId": "test-010",
|
||||
"title": "Effect of Sotagliflozin on Cardiovascular and Renal Events in Patients with Type 2 Diabetes and Moderate Renal Impairment",
|
||||
"expectedDecision": "uncertain",
|
||||
"actualDecision": "error",
|
||||
"isCorrect": false,
|
||||
"hasConsensus": false,
|
||||
"needReview": true,
|
||||
"avgConfidence": 0,
|
||||
"deepseekResult": null,
|
||||
"qwenResult": null,
|
||||
"processingTime": 0
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,150 @@
|
||||
{
|
||||
"metrics": {
|
||||
"totalTests": 10,
|
||||
"correctDecisions": 0,
|
||||
"accuracy": 0,
|
||||
"consistencyRate": 0,
|
||||
"jsonValidRate": 1,
|
||||
"avgConfidence": 0,
|
||||
"needReviewRate": 1,
|
||||
"confusionMatrix": {
|
||||
"truePositive": 0,
|
||||
"falsePositive": 0,
|
||||
"trueNegative": 0,
|
||||
"falseNegative": 0,
|
||||
"uncertain": 0
|
||||
}
|
||||
},
|
||||
"results": [
|
||||
{
|
||||
"literatureId": "test-001",
|
||||
"title": "Efficacy and Safety of Empagliflozin in Patients with Type 2 Diabetes: A Randomized, Double-Blind, Placebo-Controlled Trial",
|
||||
"expectedDecision": "include",
|
||||
"actualDecision": "error",
|
||||
"isCorrect": false,
|
||||
"hasConsensus": false,
|
||||
"needReview": true,
|
||||
"avgConfidence": 0,
|
||||
"deepseekResult": null,
|
||||
"qwenResult": null,
|
||||
"processingTime": 8868
|
||||
},
|
||||
{
|
||||
"literatureId": "test-002",
|
||||
"title": "Cardiovascular Outcomes with Ertugliflozin in Type 2 Diabetes",
|
||||
"expectedDecision": "include",
|
||||
"actualDecision": "error",
|
||||
"isCorrect": false,
|
||||
"hasConsensus": false,
|
||||
"needReview": true,
|
||||
"avgConfidence": 0,
|
||||
"deepseekResult": null,
|
||||
"qwenResult": null,
|
||||
"processingTime": 7365
|
||||
},
|
||||
{
|
||||
"literatureId": "test-003",
|
||||
"title": "Systematic Review and Meta-Analysis of SGLT2 Inhibitors in Type 2 Diabetes: A Comprehensive Assessment",
|
||||
"expectedDecision": "exclude",
|
||||
"actualDecision": "error",
|
||||
"isCorrect": false,
|
||||
"hasConsensus": false,
|
||||
"needReview": true,
|
||||
"avgConfidence": 0,
|
||||
"deepseekResult": null,
|
||||
"qwenResult": null,
|
||||
"processingTime": 8163
|
||||
},
|
||||
{
|
||||
"literatureId": "test-004",
|
||||
"title": "Dapagliflozin Improves Cardiac Function in Diabetic Rats: An Experimental Study",
|
||||
"expectedDecision": "exclude",
|
||||
"actualDecision": "error",
|
||||
"isCorrect": false,
|
||||
"hasConsensus": false,
|
||||
"needReview": true,
|
||||
"avgConfidence": 0,
|
||||
"deepseekResult": null,
|
||||
"qwenResult": null,
|
||||
"processingTime": 12106
|
||||
},
|
||||
{
|
||||
"literatureId": "test-005",
|
||||
"title": "Canagliflozin and Renal Outcomes in Type 2 Diabetes and Nephropathy",
|
||||
"expectedDecision": "include",
|
||||
"actualDecision": "error",
|
||||
"isCorrect": false,
|
||||
"hasConsensus": false,
|
||||
"needReview": true,
|
||||
"avgConfidence": 0,
|
||||
"deepseekResult": null,
|
||||
"qwenResult": null,
|
||||
"processingTime": 4700
|
||||
},
|
||||
{
|
||||
"literatureId": "test-006",
|
||||
"title": "Real-World Experience with SGLT2 Inhibitors: A Retrospective Cohort Study",
|
||||
"expectedDecision": "exclude",
|
||||
"actualDecision": "error",
|
||||
"isCorrect": false,
|
||||
"hasConsensus": false,
|
||||
"needReview": true,
|
||||
"avgConfidence": 0,
|
||||
"deepseekResult": null,
|
||||
"qwenResult": null,
|
||||
"processingTime": 7922
|
||||
},
|
||||
{
|
||||
"literatureId": "test-007",
|
||||
"title": "Pharmacokinetics and Pharmacodynamics of Empagliflozin in Healthy Volunteers",
|
||||
"expectedDecision": "exclude",
|
||||
"actualDecision": "error",
|
||||
"isCorrect": false,
|
||||
"hasConsensus": false,
|
||||
"needReview": true,
|
||||
"avgConfidence": 0,
|
||||
"deepseekResult": null,
|
||||
"qwenResult": null,
|
||||
"processingTime": 7877
|
||||
},
|
||||
{
|
||||
"literatureId": "test-008",
|
||||
"title": "Comparative Effectiveness of SGLT2 Inhibitors versus DPP-4 Inhibitors in Elderly Patients with Type 2 Diabetes",
|
||||
"expectedDecision": "exclude",
|
||||
"actualDecision": "error",
|
||||
"isCorrect": false,
|
||||
"hasConsensus": false,
|
||||
"needReview": true,
|
||||
"avgConfidence": 0,
|
||||
"deepseekResult": null,
|
||||
"qwenResult": null,
|
||||
"processingTime": 11004
|
||||
},
|
||||
{
|
||||
"literatureId": "test-009",
|
||||
"title": "Severe Diabetic Ketoacidosis Associated with SGLT2 Inhibitor Use: A Case Report",
|
||||
"expectedDecision": "exclude",
|
||||
"actualDecision": "error",
|
||||
"isCorrect": false,
|
||||
"hasConsensus": false,
|
||||
"needReview": true,
|
||||
"avgConfidence": 0,
|
||||
"deepseekResult": null,
|
||||
"qwenResult": null,
|
||||
"processingTime": 11130
|
||||
},
|
||||
{
|
||||
"literatureId": "test-010",
|
||||
"title": "Effect of Sotagliflozin on Cardiovascular and Renal Events in Patients with Type 2 Diabetes and Moderate Renal Impairment",
|
||||
"expectedDecision": "uncertain",
|
||||
"actualDecision": "error",
|
||||
"isCorrect": false,
|
||||
"hasConsensus": false,
|
||||
"needReview": true,
|
||||
"avgConfidence": 0,
|
||||
"deepseekResult": null,
|
||||
"qwenResult": null,
|
||||
"processingTime": 7387
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,470 @@
|
||||
{
|
||||
"metrics": {
|
||||
"totalTests": 10,
|
||||
"correctDecisions": 6,
|
||||
"accuracy": 0.6,
|
||||
"consistencyRate": 0.7,
|
||||
"jsonValidRate": 1,
|
||||
"avgConfidence": 0.9475,
|
||||
"needReviewRate": 0.3,
|
||||
"confusionMatrix": {
|
||||
"truePositive": 2,
|
||||
"falsePositive": 0,
|
||||
"trueNegative": 4,
|
||||
"falseNegative": 1,
|
||||
"uncertain": 0
|
||||
}
|
||||
},
|
||||
"results": [
|
||||
{
|
||||
"literatureId": "test-001",
|
||||
"title": "Efficacy and Safety of Empagliflozin in Patients with Type 2 Diabetes: A Randomized, Double-Blind, Placebo-Controlled Trial",
|
||||
"expectedDecision": "include",
|
||||
"actualDecision": "exclude",
|
||||
"isCorrect": false,
|
||||
"hasConsensus": true,
|
||||
"needReview": false,
|
||||
"avgConfidence": 0.925,
|
||||
"deepseekResult": {
|
||||
"judgment": {
|
||||
"P": "match",
|
||||
"I": "match",
|
||||
"C": "match",
|
||||
"S": "match"
|
||||
},
|
||||
"evidence": {
|
||||
"P": "we randomly assigned 800 adults with type 2 diabetes",
|
||||
"I": "Efficacy and Safety of Empagliflozin in Patients with Type 2 Diabetes",
|
||||
"C": "placebo-controlled trial",
|
||||
"S": "randomized, double-blind, placebo-controlled trial"
|
||||
},
|
||||
"conclusion": "exclude",
|
||||
"confidence": 0.9,
|
||||
"reason": "虽然该文献在PICO标准上完全匹配(研究人群为2型糖尿病成人患者,干预措施为SGLT2抑制剂empagliflozin,对照为安慰剂,研究设计为RCT),且满足纳入标准中的年龄、研究设计、干预措施、发表语言和时间要求,但文献明确显示其主要终点为血糖控制指标(HbA1c变化),而非方案要求的心血管结局(主要不良心血管事件、心衰住院、心血管死亡)。摘要中仅提及次要终点包括体重、血压和不良事件,未报告任何心血管结局数据,因此不符合纳入标准第4条'报告心血管结局数据'的要求,应予排除。"
|
||||
},
|
||||
"qwenResult": {
|
||||
"judgment": {
|
||||
"P": "match",
|
||||
"I": "match",
|
||||
"C": "match",
|
||||
"S": "match"
|
||||
},
|
||||
"evidence": {
|
||||
"P": "patients with type 2 diabetes and inadequate glycemic control (HbA1c 7.0-10.0%)",
|
||||
"I": "to receive empagliflozin 10 mg, empagliflozin 25 mg, or placebo once daily",
|
||||
"C": "placebo once daily",
|
||||
"S": "randomized, double-blind, placebo-controlled trial"
|
||||
},
|
||||
"conclusion": "exclude",
|
||||
"confidence": 0.95,
|
||||
"reason": "尽管该研究符合PICO中的P、I、C和S标准,且为RCT设计,但其主要结局为HbA1c变化、体重和血压等代谢指标,并未报告任何心血管结局(如MACE、心衰住院或心血管死亡)。根据纳入标准第4条,必须报告心血管结局数据方可纳入,而摘要中无相关信息支持此关键结局,因此排除。"
|
||||
},
|
||||
"processingTime": 12188
|
||||
},
|
||||
{
|
||||
"literatureId": "test-002",
|
||||
"title": "Cardiovascular Outcomes with Ertugliflozin in Type 2 Diabetes",
|
||||
"expectedDecision": "include",
|
||||
"actualDecision": "include",
|
||||
"isCorrect": true,
|
||||
"hasConsensus": true,
|
||||
"needReview": false,
|
||||
"avgConfidence": 0.95,
|
||||
"deepseekResult": {
|
||||
"judgment": {
|
||||
"P": "match",
|
||||
"I": "match",
|
||||
"C": "match",
|
||||
"S": "match"
|
||||
},
|
||||
"evidence": {
|
||||
"P": "8246 patients with type 2 diabetes mellitus and established atherosclerotic cardiovascular disease",
|
||||
"I": "ertugliflozin, a sodium-glucose cotransporter 2 (SGLT2) inhibitor",
|
||||
"C": "ertugliflozin (5 mg or 15 mg) or placebo, in addition to standard care",
|
||||
"S": "We randomly assigned 8246 patients"
|
||||
},
|
||||
"conclusion": "include",
|
||||
"confidence": 0.95,
|
||||
"reason": "该文献完全符合纳入标准:研究对象为2型糖尿病成人患者且合并动脉粥样硬化性心血管疾病;干预措施为SGLT2抑制剂(ertugliflozin);对照为安慰剂加标准治疗;研究设计为随机对照试验;报告了主要不良心血管事件和心衰住院等心血管结局;发表于2010年后且为英文文献。同时未触发任何排除标准,研究设计严谨,证据充分,应纳入系统综述。"
|
||||
},
|
||||
"qwenResult": {
|
||||
"judgment": {
|
||||
"P": "match",
|
||||
"I": "match",
|
||||
"C": "match",
|
||||
"S": "match"
|
||||
},
|
||||
"evidence": {
|
||||
"P": "patients with type 2 diabetes mellitus and established atherosclerotic cardiovascular disease",
|
||||
"I": "receive ertugliflozin (5 mg or 15 mg)",
|
||||
"C": "placebo, in addition to standard care",
|
||||
"S": "We randomly assigned 8246 patients... to receive ertugliflozin... or placebo"
|
||||
},
|
||||
"conclusion": "include",
|
||||
"confidence": 0.95,
|
||||
"reason": "该研究纳入2型糖尿病成人患者,使用SGLT2抑制剂ertugliflozin作为干预,对照为安慰剂加标准治疗,报告了主要不良心血管事件等结局,且为随机对照试验设计。完全符合PICO标准和纳入标准。未触发任何排除标准,信息充分,设计严谨,故决定纳入。"
|
||||
},
|
||||
"processingTime": 11237
|
||||
},
|
||||
{
|
||||
"literatureId": "test-003",
|
||||
"title": "Systematic Review and Meta-Analysis of SGLT2 Inhibitors in Type 2 Diabetes: A Comprehensive Assessment",
|
||||
"expectedDecision": "exclude",
|
||||
"actualDecision": "exclude",
|
||||
"isCorrect": true,
|
||||
"hasConsensus": true,
|
||||
"needReview": false,
|
||||
"avgConfidence": 0.95,
|
||||
"deepseekResult": {
|
||||
"judgment": {
|
||||
"P": "match",
|
||||
"I": "match",
|
||||
"C": "partial",
|
||||
"S": "mismatch"
|
||||
},
|
||||
"evidence": {
|
||||
"P": "in adults with type 2 diabetes",
|
||||
"I": "SGLT2 inhibitors",
|
||||
"C": "comparing SGLT2 inhibitors with placebo or active comparators",
|
||||
"S": "Systematic Review and Meta-Analysis"
|
||||
},
|
||||
"conclusion": "exclude",
|
||||
"confidence": 0.95,
|
||||
"reason": "该文献明确为系统评价和Meta分析,研究设计为二次研究而非原始随机对照试验,直接触发排除标准第1条(排除综述、系统评价、Meta分析)。虽然P(2型糖尿病成人患者)和I(SGLT2抑制剂)完全匹配,C(安慰剂或常规降糖疗法)部分匹配(提及与安慰剂或活性对照比较),但研究设计不匹配且触发排除标准,因此必须排除。该文献发表于2010年后且为英文,但研究设计类型不符合纳入要求。"
|
||||
},
|
||||
"qwenResult": {
|
||||
"judgment": {
|
||||
"P": "match",
|
||||
"I": "match",
|
||||
"C": "partial",
|
||||
"S": "mismatch"
|
||||
},
|
||||
"evidence": {
|
||||
"P": "We included randomized controlled trials comparing SGLT2 inhibitors with placebo or active comparators in adults with type 2 diabetes.",
|
||||
"I": "We included randomized controlled trials comparing SGLT2 inhibitors with placebo or active comparators in adults with type 2 diabetes.",
|
||||
"C": "comparing SGLT2 inhibitors with placebo or active comparators",
|
||||
"S": "We included randomized controlled trials comparing SGLT2 inhibitors with placebo or active comparators in adults with type 2 diabetes."
|
||||
},
|
||||
"conclusion": "exclude",
|
||||
"confidence": 0.95,
|
||||
"reason": "该文献为系统综述和Meta分析,尽管其纳入的研究符合PICO中的P、I和部分C标准,且研究人群为成人2型糖尿病患者,干预措施为SGLT2抑制剂,对照包括安慰剂或常规疗法,结局包含心血管事件,但本文本身并非原始RCT研究,而是对RCT的汇总分析,因此研究设计不符合方案要求的原始随机对照试验(S),触发排除标准中‘系统评价、Meta分析’的排除条件,故应排除。"
|
||||
},
|
||||
"processingTime": 15737
|
||||
},
|
||||
{
|
||||
"literatureId": "test-004",
|
||||
"title": "Dapagliflozin Improves Cardiac Function in Diabetic Rats: An Experimental Study",
|
||||
"expectedDecision": "exclude",
|
||||
"actualDecision": "exclude",
|
||||
"isCorrect": true,
|
||||
"hasConsensus": true,
|
||||
"needReview": false,
|
||||
"avgConfidence": 0.95,
|
||||
"deepseekResult": {
|
||||
"judgment": {
|
||||
"P": "mismatch",
|
||||
"I": "match",
|
||||
"C": "partial",
|
||||
"S": "mismatch"
|
||||
},
|
||||
"evidence": {
|
||||
"P": "Diabetes was induced in male Sprague-Dawley rats by streptozotocin injection",
|
||||
"I": "Rats were randomly assigned to receive dapagliflozin (1 mg/kg/day)",
|
||||
"C": "vehicle for 8 weeks",
|
||||
"S": "diabetic rats"
|
||||
},
|
||||
"conclusion": "exclude",
|
||||
"confidence": 0.95,
|
||||
"reason": "该文献明确不符合纳入标准,主要问题在于:(1)研究对象为糖尿病大鼠而非人类2型糖尿病患者,违反了PICO中P标准;(2)研究设计为动物实验而非随机对照试验,违反了S标准;(3)对照为vehicle而非安慰剂或常规降糖疗法,仅部分匹配C标准。虽然干预措施为SGLT2抑制剂(dapagliflozin)匹配I标准,但由于研究对象为动物且研究设计不符合RCT要求,触发了排除标准中的'动物实验或体外实验'条款,因此必须排除。该文献为明确的动物实验研究,证据充分,排除决策高度可靠。"
|
||||
},
|
||||
"qwenResult": {
|
||||
"judgment": {
|
||||
"P": "mismatch",
|
||||
"I": "match",
|
||||
"C": "partial",
|
||||
"S": "mismatch"
|
||||
},
|
||||
"evidence": {
|
||||
"P": "Diabetes was induced in male Sprague-Dawley rats by streptozotocin injection.",
|
||||
"I": "Rats were randomly assigned to receive dapagliflozin (1 mg/kg/day) or vehicle for 8 weeks.",
|
||||
"C": "Rats were randomly assigned to receive dapagliflozin (1 mg/kg/day) or vehicle for 8 weeks.",
|
||||
"S": "Rats were randomly assigned to receive dapagliflozin (1 mg/kg/day) or vehicle for 8 weeks."
|
||||
},
|
||||
"conclusion": "exclude",
|
||||
"confidence": 0.95,
|
||||
"reason": "该研究在糖尿病大鼠模型中进行,研究人群为动物而非2型糖尿病成人患者,不符合P标准。尽管干预措施为dapagliflozin且采用随机分组,但属于动物实验,研究设计不符合RCT的人群范畴。根据排除标准第3条,动物实验应被排除。虽然方法提及随机分配,但整体研究不适用于人类患者,无法提供关于心血管结局的临床证据,因此最终决定排除。"
|
||||
},
|
||||
"processingTime": 12670
|
||||
},
|
||||
{
|
||||
"literatureId": "test-005",
|
||||
"title": "Canagliflozin and Renal Outcomes in Type 2 Diabetes and Nephropathy",
|
||||
"expectedDecision": "include",
|
||||
"actualDecision": "include",
|
||||
"isCorrect": true,
|
||||
"hasConsensus": true,
|
||||
"needReview": false,
|
||||
"avgConfidence": 0.95,
|
||||
"deepseekResult": {
|
||||
"judgment": {
|
||||
"P": "match",
|
||||
"I": "match",
|
||||
"C": "match",
|
||||
"S": "match"
|
||||
},
|
||||
"evidence": {
|
||||
"P": "we randomly assigned 4401 participants with type 2 diabetes and albuminuric chronic kidney disease",
|
||||
"I": "to receive canagliflozin at a dose of 100 mg daily",
|
||||
"C": "or placebo",
|
||||
"S": "In this double-blind trial, we randomly assigned 4401 participants"
|
||||
},
|
||||
"conclusion": "include",
|
||||
"confidence": 0.95,
|
||||
"reason": "该文献完全符合纳入标准:研究人群为2型糖尿病成人患者(P匹配),干预措施为SGLT2抑制剂canagliflozin(I匹配),对照为安慰剂(C匹配),研究设计为随机双盲对照试验(S匹配)。文献发表于2010年后且为英文,报告了心血管结局数据(摘要中明确提到'cardiovascular events')。该研究未触发任何排除标准,是一项高质量的心血管结局RCT研究,应纳入系统综述。"
|
||||
},
|
||||
"qwenResult": {
|
||||
"judgment": {
|
||||
"P": "match",
|
||||
"I": "match",
|
||||
"C": "match",
|
||||
"S": "match"
|
||||
},
|
||||
"evidence": {
|
||||
"P": "participants with type 2 diabetes and albuminuric chronic kidney disease",
|
||||
"I": "canagliflozin at a dose of 100 mg daily",
|
||||
"C": "placebo",
|
||||
"S": "In this double-blind trial, we randomly assigned 4401 participants...to receive canagliflozin...or placebo"
|
||||
},
|
||||
"conclusion": "include",
|
||||
"confidence": 0.95,
|
||||
"reason": "该研究为RCT,纳入成人2型糖尿病患者,干预措施为SGLT2抑制剂(canagliflozin),对照为安慰剂,报告了心血管事件作为复合终点的一部分。完全符合PICO标准和纳入标准。尽管主要结局是肾脏相关复合终点,但明确提及‘death from renal or cardiovascular causes’,说明心血管结局数据被报告,满足方案要求。无排除标准触发项。"
|
||||
},
|
||||
"processingTime": 11345
|
||||
},
|
||||
{
|
||||
"literatureId": "test-006",
|
||||
"title": "Real-World Experience with SGLT2 Inhibitors: A Retrospective Cohort Study",
|
||||
"expectedDecision": "exclude",
|
||||
"actualDecision": "exclude",
|
||||
"isCorrect": true,
|
||||
"hasConsensus": true,
|
||||
"needReview": false,
|
||||
"avgConfidence": 0.95,
|
||||
"deepseekResult": {
|
||||
"judgment": {
|
||||
"P": "match",
|
||||
"I": "match",
|
||||
"C": "mismatch",
|
||||
"S": "mismatch"
|
||||
},
|
||||
"evidence": {
|
||||
"P": "\"12,543 adults with type 2 diabetes\"",
|
||||
"I": "\"patients with type 2 diabetes who initiated SGLT2 inhibitor therapy\"",
|
||||
"C": "摘要中未提及安慰剂或常规降糖疗法作为对照",
|
||||
"S": "\"We conducted a retrospective cohort study\""
|
||||
},
|
||||
"conclusion": "exclude",
|
||||
"confidence": 0.95,
|
||||
"reason": "该文献明确触发排除标准,应予以排除。具体原因:(1)研究设计为回顾性队列研究,明确不符合PICO标准中要求的随机对照试验(RCT),且触发排除标准第7条(观察性研究);(2)研究人群和干预措施符合PICO标准,但缺乏明确的对照组设置,未提及安慰剂或常规降糖疗法;(3)研究结局指标为HbA1c、体重、血压变化及安全性事件,而非方案要求的心血管结局(主要不良心血管事件、心衰住院、心血管死亡)。这是一项真实世界研究,而非随机对照试验,因此不符合纳入条件。"
|
||||
},
|
||||
"qwenResult": {
|
||||
"judgment": {
|
||||
"P": "match",
|
||||
"I": "match",
|
||||
"C": "mismatch",
|
||||
"S": "mismatch"
|
||||
},
|
||||
"evidence": {
|
||||
"P": "We identified 12,543 adults with type 2 diabetes who initiated SGLT2 inhibitor therapy between 2014 and 2020.",
|
||||
"I": "We identified 12,543 adults with type 2 diabetes who initiated SGLT2 inhibitor therapy between 2014 and 2020.",
|
||||
"C": "Methods: We conducted a retrospective cohort study using electronic health records from a large healthcare system.",
|
||||
"S": "Methods: We conducted a retrospective cohort study using electronic health records from a large healthcare system."
|
||||
},
|
||||
"conclusion": "exclude",
|
||||
"confidence": 0.95,
|
||||
"reason": "该研究为回顾性队列研究,不符合方案要求的随机对照试验(RCT)设计,因此S维度不匹配。尽管研究人群(P)为成人2型糖尿病患者且干预措施(I)为SGLT2抑制剂,符合纳入标准,但对照(C)未明确使用安慰剂或常规降糖疗法,且研究设计属于观察性研究,触发排除标准第7条。因此,尽管报告了相关结局,仍应排除。"
|
||||
},
|
||||
"processingTime": 12213
|
||||
},
|
||||
{
|
||||
"literatureId": "test-007",
|
||||
"title": "Pharmacokinetics and Pharmacodynamics of Empagliflozin in Healthy Volunteers",
|
||||
"expectedDecision": "exclude",
|
||||
"actualDecision": "pending",
|
||||
"isCorrect": false,
|
||||
"hasConsensus": false,
|
||||
"needReview": true,
|
||||
"avgConfidence": 0.95,
|
||||
"deepseekResult": {
|
||||
"judgment": {
|
||||
"P": "mismatch",
|
||||
"I": "partial",
|
||||
"C": "match",
|
||||
"S": "partial"
|
||||
},
|
||||
"evidence": {
|
||||
"P": "In this Phase 1 study, we evaluated the pharmacokinetics and pharmacodynamics of empagliflozin in 48 healthy male volunteers",
|
||||
"I": "Participants received single oral doses of empagliflozin (1, 5, 10, 25, 50, or 100 mg) in a randomized, placebo-controlled, double-blind manner",
|
||||
"C": "in a randomized, placebo-controlled, double-blind manner",
|
||||
"S": "In this Phase 1 study...in a randomized, placebo-controlled, double-blind manner"
|
||||
},
|
||||
"conclusion": "exclude",
|
||||
"confidence": 0.95,
|
||||
"reason": "该文献明确不符合纳入标准,主要问题在于:(1)研究人群为健康志愿者而非2型糖尿病患者,与PICO标准中的P维度完全不匹配;(2)虽然干预措施使用了SGLT2抑制剂empagliflozin,但这是一项药代动力学研究而非疗效研究,且为单次给药;(3)研究设计为I期临床试验,主要关注药代动力学参数,而非心血管结局;(4)触发了排除标准中的'健康志愿者研究'和'未报告心血管结局数据'。因此,该文献应被排除。"
|
||||
},
|
||||
"qwenResult": {
|
||||
"judgment": {
|
||||
"P": "mismatch",
|
||||
"I": "match",
|
||||
"C": "match",
|
||||
"S": "partial"
|
||||
},
|
||||
"evidence": {
|
||||
"P": "In this Phase 1 study, we evaluated the pharmacokinetics and pharmacodynamics of empagliflozin in 48 healthy male volunteers.",
|
||||
"I": "Participants received single oral doses of empagliflozin (1, 5, 10, 25, 50, or 100 mg) in a randomized, placebo-controlled, double-blind manner.",
|
||||
"C": "Participants received single oral doses of empagliflozin (1, 5, 10, 25, 50, or 100 mg) in a randomized, placebo-controlled, double-blind manner.",
|
||||
"S": "In this Phase 1 study, we evaluated the pharmacokinetics and pharmacodynamics of empagliflozin in 48 healthy male volunteers."
|
||||
},
|
||||
"conclusion": "exclude",
|
||||
"confidence": 0.95,
|
||||
"reason": "该研究纳入的是健康志愿者,不符合P(2型糖尿病成人患者)的标准,明确触发排除标准中的‘健康志愿者研究’。尽管干预措施为SGLT2抑制剂且采用RCT设计,对照为安慰剂,但研究人群不匹配是关键缺陷。此外,该研究为Phase 1药代动力学试验,主要目的并非评估心血管结局,也未报告相关数据,因此不满足O和S的完整要求。综合判断应排除。"
|
||||
},
|
||||
"processingTime": 13333
|
||||
},
|
||||
{
|
||||
"literatureId": "test-008",
|
||||
"title": "Comparative Effectiveness of SGLT2 Inhibitors versus DPP-4 Inhibitors in Elderly Patients with Type 2 Diabetes",
|
||||
"expectedDecision": "exclude",
|
||||
"actualDecision": "pending",
|
||||
"isCorrect": false,
|
||||
"hasConsensus": false,
|
||||
"needReview": true,
|
||||
"avgConfidence": 0.95,
|
||||
"deepseekResult": {
|
||||
"judgment": {
|
||||
"P": "match",
|
||||
"I": "match",
|
||||
"C": "partial",
|
||||
"S": "mismatch"
|
||||
},
|
||||
"evidence": {
|
||||
"P": "Medicare beneficiaries aged ≥65 years with type 2 diabetes",
|
||||
"I": "patients initiating SGLT2 inhibitors",
|
||||
"C": "compared with those initiating DPP-4 inhibitors",
|
||||
"S": "We conducted a nationwide cohort study"
|
||||
},
|
||||
"conclusion": "exclude",
|
||||
"confidence": 0.95,
|
||||
"reason": "该文献明确触发排除标准,应予以排除。虽然研究人群(P)为2型糖尿病成人患者,干预措施(I)为SGLT2抑制剂,均符合纳入标准;对照(C)为DPP-4抑制剂而非安慰剂或常规降糖疗法,属于部分匹配;但最关键的是研究设计(S)为队列研究而非随机对照试验,明确不符合PICO标准中的RCT要求,且触发排除标准第7条'观察性研究(队列、病例对照)'。该研究使用医疗保险索赔数据进行回顾性队列分析,缺乏随机化设计,因此不符合系统综述的纳入条件。"
|
||||
},
|
||||
"qwenResult": {
|
||||
"judgment": {
|
||||
"P": "match",
|
||||
"I": "match",
|
||||
"C": "mismatch",
|
||||
"S": "mismatch"
|
||||
},
|
||||
"evidence": {
|
||||
"P": "We conducted a nationwide cohort study using claims data from Medicare beneficiaries aged ≥65 years with type 2 diabetes.",
|
||||
"I": "We compared patients initiating SGLT2 inhibitors (n=42,371) with those initiating DPP-4 inhibitors (n=126,806) between 2013 and 2017.",
|
||||
"C": "those initiating DPP-4 inhibitors (n=126,806)",
|
||||
"S": "We conducted a nationwide cohort study using claims data from Medicare beneficiaries"
|
||||
},
|
||||
"conclusion": "exclude",
|
||||
"confidence": 0.95,
|
||||
"reason": "该研究为队列研究,不符合方案要求的随机对照试验(RCT)设计,故S维度不匹配。尽管研究人群为2型糖尿病成人患者(P匹配),干预措施为SGLT2抑制剂(I匹配),但对照为DPP-4抑制剂而非安慰剂或常规降糖疗法(C不匹配)。此外,研究设计属于观察性研究,明确触发排除标准第7条。因此,尽管报告了心血管结局,仍应排除。"
|
||||
},
|
||||
"processingTime": 12025
|
||||
},
|
||||
{
|
||||
"literatureId": "test-009",
|
||||
"title": "Severe Diabetic Ketoacidosis Associated with SGLT2 Inhibitor Use: A Case Report",
|
||||
"expectedDecision": "exclude",
|
||||
"actualDecision": "exclude",
|
||||
"isCorrect": true,
|
||||
"hasConsensus": true,
|
||||
"needReview": false,
|
||||
"avgConfidence": 0.95,
|
||||
"deepseekResult": {
|
||||
"judgment": {
|
||||
"P": "match",
|
||||
"I": "match",
|
||||
"C": "mismatch",
|
||||
"S": "mismatch"
|
||||
},
|
||||
"evidence": {
|
||||
"P": "\"A 52-year-old man with type 2 diabetes\"",
|
||||
"I": "\"patient treated with dapagliflozin\"",
|
||||
"C": "摘要中未提及安慰剂或常规降糖疗法对照",
|
||||
"S": "\"Severe Diabetic Ketoacidosis Associated with SGLT2 Inhibitor Use: A Case Report\",摘要中明确描述为病例报告"
|
||||
},
|
||||
"conclusion": "exclude",
|
||||
"confidence": 0.95,
|
||||
"reason": "该文献明确触发排除标准,应予以排除。理由如下:(1)研究设计为病例报告(标题和摘要中明确标注),属于排除标准第2项;(2)虽然研究人群(2型糖尿病患者)和干预措施(SGLT2抑制剂)符合PICO标准,但缺乏对照组,且未报告心血管结局数据;(3)病例报告主要关注SGLT2抑制剂相关的酮症酸中毒不良事件,而非心血管结局,与研究方案的核心结局指标不符。"
|
||||
},
|
||||
"qwenResult": {
|
||||
"judgment": {
|
||||
"P": "match",
|
||||
"I": "match",
|
||||
"C": "mismatch",
|
||||
"S": "mismatch"
|
||||
},
|
||||
"evidence": {
|
||||
"P": "A 52-year-old man with type 2 diabetes on dapagliflozin 10 mg daily presented to the emergency department with nausea, vomiting, and abdominal pain.",
|
||||
"I": "A 52-year-old man with type 2 diabetes on dapagliflozin 10 mg daily presented to the emergency department with nausea, vomiting, and abdominal pain.",
|
||||
"C": "There is no mention of a control group or comparison with placebo or conventional therapy.",
|
||||
"S": "This is a case report, as indicated by the title: 'Severe Diabetic Ketoacidosis Associated with SGLT2 Inhibitor Use: A Case Report'."
|
||||
},
|
||||
"conclusion": "exclude",
|
||||
"confidence": 0.95,
|
||||
"reason": "该文献为病例报告,研究设计不符合方案要求的随机对照试验(RCT),且未设置对照组,无法满足C和S维度的要求。虽然研究对象为2型糖尿病成人患者并使用SGLT2抑制剂,符合P和I标准,但因研究设计属于排除标准第2条明确列出的‘病例报告’,故应排除。本研究旨在描述单一不良事件,不提供关于心血管结局的群体层面证据。"
|
||||
},
|
||||
"processingTime": 11897
|
||||
},
|
||||
{
|
||||
"literatureId": "test-010",
|
||||
"title": "Effect of Sotagliflozin on Cardiovascular and Renal Events in Patients with Type 2 Diabetes and Moderate Renal Impairment",
|
||||
"expectedDecision": "uncertain",
|
||||
"actualDecision": "pending",
|
||||
"isCorrect": false,
|
||||
"hasConsensus": false,
|
||||
"needReview": true,
|
||||
"avgConfidence": 0.95,
|
||||
"deepseekResult": {
|
||||
"judgment": {
|
||||
"P": "match",
|
||||
"I": "mismatch",
|
||||
"C": "match",
|
||||
"S": "match"
|
||||
},
|
||||
"evidence": {
|
||||
"P": "we enrolled 10,584 patients with type 2 diabetes",
|
||||
"I": "sotagliflozin, a dual SGLT1 and SGLT2 inhibitor",
|
||||
"C": "randomly assigned to receive sotagliflozin 200 mg or placebo",
|
||||
"S": "In this randomized, double-blind, placebo-controlled trial"
|
||||
},
|
||||
"conclusion": "exclude",
|
||||
"confidence": 0.95,
|
||||
"reason": "该文献虽然符合研究人群(2型糖尿病患者)、对照(安慰剂)和研究设计(随机对照试验)标准,但干预措施不符合要求。文献研究的是sotagliflozin,这是一种双重SGLT1和SGLT2抑制剂,而非方案要求的纯SGLT2抑制剂(如empagliflozin、dapagliflozin、canagliflozin等)。由于干预措施类型不匹配,且该药物作用机制与纯SGLT2抑制剂存在差异,因此决定排除。文献发表于2010年后,报告了心血管结局数据,但干预措施的不匹配是排除的主要原因。"
|
||||
},
|
||||
"qwenResult": {
|
||||
"judgment": {
|
||||
"P": "match",
|
||||
"I": "match",
|
||||
"C": "match",
|
||||
"S": "match"
|
||||
},
|
||||
"evidence": {
|
||||
"P": "patients with type 2 diabetes, cardiovascular disease, and an estimated glomerular filtration rate of 25 to 60 ml per minute per 1.73 m²",
|
||||
"I": "sotagliflozin, a dual SGLT1 and SGLT2 inhibitor",
|
||||
"C": "randomly assigned to receive sotagliflozin 200 mg or placebo once daily",
|
||||
"S": "randomized, double-blind, placebo-controlled trial"
|
||||
},
|
||||
"conclusion": "include",
|
||||
"confidence": 0.95,
|
||||
"reason": "该研究纳入成人2型糖尿病患者,符合P标准;干预措施为SGLT2抑制剂(sotagliflozin),尽管其同时抑制SGLT1,但明确包含SGLT2抑制作用,符合I标准;对照为安慰剂,符合C标准;研究设计为RCT,符合S标准。研究报告了心血管死亡、心衰住院等主要心血管结局,满足结局指标要求。所有纳入标准均满足,且未触发任何排除标准,因此决定纳入。"
|
||||
},
|
||||
"processingTime": 12769
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,470 @@
|
||||
{
|
||||
"metrics": {
|
||||
"totalTests": 10,
|
||||
"correctDecisions": 6,
|
||||
"accuracy": 0.6,
|
||||
"consistencyRate": 0.7,
|
||||
"jsonValidRate": 1,
|
||||
"avgConfidence": 0.9475,
|
||||
"needReviewRate": 0.3,
|
||||
"confusionMatrix": {
|
||||
"truePositive": 2,
|
||||
"falsePositive": 0,
|
||||
"trueNegative": 4,
|
||||
"falseNegative": 1,
|
||||
"uncertain": 0
|
||||
}
|
||||
},
|
||||
"results": [
|
||||
{
|
||||
"literatureId": "test-001",
|
||||
"title": "Efficacy and Safety of Empagliflozin in Patients with Type 2 Diabetes: A Randomized, Double-Blind, Placebo-Controlled Trial",
|
||||
"expectedDecision": "include",
|
||||
"actualDecision": "exclude",
|
||||
"isCorrect": false,
|
||||
"hasConsensus": true,
|
||||
"needReview": false,
|
||||
"avgConfidence": 0.925,
|
||||
"deepseekResult": {
|
||||
"judgment": {
|
||||
"P": "match",
|
||||
"I": "match",
|
||||
"C": "match",
|
||||
"S": "match"
|
||||
},
|
||||
"evidence": {
|
||||
"P": "we randomly assigned 800 adults with type 2 diabetes",
|
||||
"I": "We evaluated the efficacy and safety of empagliflozin in patients with type 2 diabetes",
|
||||
"C": "placebo-controlled trial",
|
||||
"S": "randomized, double-blind, placebo-controlled trial"
|
||||
},
|
||||
"conclusion": "exclude",
|
||||
"confidence": 0.9,
|
||||
"reason": "虽然该文献在PICO标准方面完全匹配(P:2型糖尿病成人患者;I:SGLT2抑制剂empagliflozin;C:安慰剂对照;S:随机对照试验),且满足纳入标准中的研究设计、干预措施、语言和时间要求,但文献明确缺乏心血管结局数据。摘要中仅报告了血糖控制(HbA1c变化)、体重、血压和不良事件等次要终点,未提及主要不良心血管事件、心衰住院或心血管死亡等心血管结局指标。根据研究方案要求,文献必须报告心血管结局数据才能纳入,因此予以排除。"
|
||||
},
|
||||
"qwenResult": {
|
||||
"judgment": {
|
||||
"P": "match",
|
||||
"I": "match",
|
||||
"C": "match",
|
||||
"S": "match"
|
||||
},
|
||||
"evidence": {
|
||||
"P": "patients with type 2 diabetes and inadequate glycemic control (HbA1c 7.0-10.0%)",
|
||||
"I": "to receive empagliflozin 10 mg, empagliflozin 25 mg, or placebo once daily",
|
||||
"C": "placebo once daily",
|
||||
"S": "randomized, double-blind, placebo-controlled trial"
|
||||
},
|
||||
"conclusion": "exclude",
|
||||
"confidence": 0.95,
|
||||
"reason": "尽管该研究符合PICO中的P、I、C、S标准,人群为成人2型糖尿病患者,干预为SGLT2抑制剂(empagliflozin),对照为安慰剂,设计为RCT,但未报告方案所要求的心血管结局指标(如MACE、心衰住院、心血管死亡)。摘要中提及的结局为HbA1c、体重、血压和不良事件,无心血管相关终点数据,因此不满足纳入标准第4条“报告心血管结局数据”,故排除。"
|
||||
},
|
||||
"processingTime": 11935
|
||||
},
|
||||
{
|
||||
"literatureId": "test-002",
|
||||
"title": "Cardiovascular Outcomes with Ertugliflozin in Type 2 Diabetes",
|
||||
"expectedDecision": "include",
|
||||
"actualDecision": "include",
|
||||
"isCorrect": true,
|
||||
"hasConsensus": true,
|
||||
"needReview": false,
|
||||
"avgConfidence": 0.95,
|
||||
"deepseekResult": {
|
||||
"judgment": {
|
||||
"P": "match",
|
||||
"I": "match",
|
||||
"C": "match",
|
||||
"S": "match"
|
||||
},
|
||||
"evidence": {
|
||||
"P": "We randomly assigned 8246 patients with type 2 diabetes mellitus and established atherosclerotic cardiovascular disease",
|
||||
"I": "ertugliflozin, a sodium-glucose cotransporter 2 (SGLT2) inhibitor",
|
||||
"C": "ertugliflozin (5 mg or 15 mg) or placebo, in addition to standard care",
|
||||
"S": "We randomly assigned 8246 patients...The primary outcome was major adverse cardiovascular events"
|
||||
},
|
||||
"conclusion": "include",
|
||||
"confidence": 0.95,
|
||||
"reason": "该文献完全符合纳入标准:研究对象为2型糖尿病成人患者(≥18岁),干预措施为SGLT2抑制剂(ertugliflozin),对照为安慰剂加标准治疗,研究设计为随机对照试验,报告了心血管结局数据(主要不良心血管事件和心衰住院),发表于2010年后且为英文文献。同时,该文献未触发任何排除标准,不是综述、病例报告、动物实验、会议摘要或观察性研究。因此明确纳入。"
|
||||
},
|
||||
"qwenResult": {
|
||||
"judgment": {
|
||||
"P": "match",
|
||||
"I": "match",
|
||||
"C": "match",
|
||||
"S": "match"
|
||||
},
|
||||
"evidence": {
|
||||
"P": "patients with type 2 diabetes mellitus and established atherosclerotic cardiovascular disease",
|
||||
"I": "ertugliflozin (5 mg or 15 mg)",
|
||||
"C": "placebo, in addition to standard care",
|
||||
"S": "We randomly assigned 8246 patients... to receive ertugliflozin (5 mg or 15 mg) or placebo"
|
||||
},
|
||||
"conclusion": "include",
|
||||
"confidence": 0.95,
|
||||
"reason": "该研究纳入2型糖尿病成人患者,符合P标准;干预措施为SGLT2抑制剂ertugliflozin,符合I标准;对照为安慰剂,符合C标准;研究设计为随机对照试验,符合S标准。研究明确报告了主要不良心血管事件、心衰住院等结局,满足O维度要求。文献为英文RCT,发表于2010年后,非综述或观察性研究,完全符合所有纳入标准且未触发任何排除标准,因此决定纳入。"
|
||||
},
|
||||
"processingTime": 13225
|
||||
},
|
||||
{
|
||||
"literatureId": "test-003",
|
||||
"title": "Systematic Review and Meta-Analysis of SGLT2 Inhibitors in Type 2 Diabetes: A Comprehensive Assessment",
|
||||
"expectedDecision": "exclude",
|
||||
"actualDecision": "exclude",
|
||||
"isCorrect": true,
|
||||
"hasConsensus": true,
|
||||
"needReview": false,
|
||||
"avgConfidence": 0.95,
|
||||
"deepseekResult": {
|
||||
"judgment": {
|
||||
"P": "match",
|
||||
"I": "match",
|
||||
"C": "partial",
|
||||
"S": "mismatch"
|
||||
},
|
||||
"evidence": {
|
||||
"P": "in adults with type 2 diabetes",
|
||||
"I": "SGLT2 inhibitors",
|
||||
"C": "comparing SGLT2 inhibitors with placebo or active comparators",
|
||||
"S": "Systematic Review and Meta-Analysis"
|
||||
},
|
||||
"conclusion": "exclude",
|
||||
"confidence": 0.95,
|
||||
"reason": "这篇文献明确触发排除标准第1条(综述、系统评价、Meta分析),因为标题和摘要都明确表明这是一篇系统综述和Meta分析,而非原始随机对照试验。虽然研究人群(2型糖尿病成人患者)、干预措施(SGLT2抑制剂)和对照(安慰剂或活性对照)都符合PICO标准,但研究设计类型不符合要求的RCT设计。根据排除标准,所有综述、系统评价和Meta分析都应被排除,因此直接排除此文献。"
|
||||
},
|
||||
"qwenResult": {
|
||||
"judgment": {
|
||||
"P": "match",
|
||||
"I": "match",
|
||||
"C": "partial",
|
||||
"S": "mismatch"
|
||||
},
|
||||
"evidence": {
|
||||
"P": "We included randomized controlled trials comparing SGLT2 inhibitors with placebo or active comparators in adults with type 2 diabetes.",
|
||||
"I": "We included randomized controlled trials comparing SGLT2 inhibitors with placebo or active comparators in adults with type 2 diabetes.",
|
||||
"C": "comparing SGLT2 inhibitors with placebo or active comparators",
|
||||
"S": "Systematic Review and Meta-Analysis of SGLT2 Inhibitors in Type 2 Diabetes: A Comprehensive Assessment"
|
||||
},
|
||||
"conclusion": "exclude",
|
||||
"confidence": 0.95,
|
||||
"reason": "该文献为系统评价和Meta分析,研究设计不符合方案要求的原始随机对照试验(RCT),因此排除。尽管其纳入的研究可能符合PICO标准,但本文本身并非原始研究,不提供独立的实证数据。P、I、O均匹配或部分匹配,但S明确不匹配。根据排除标准第1条,系统评价和Meta分析应被排除。"
|
||||
},
|
||||
"processingTime": 10683
|
||||
},
|
||||
{
|
||||
"literatureId": "test-004",
|
||||
"title": "Dapagliflozin Improves Cardiac Function in Diabetic Rats: An Experimental Study",
|
||||
"expectedDecision": "exclude",
|
||||
"actualDecision": "exclude",
|
||||
"isCorrect": true,
|
||||
"hasConsensus": true,
|
||||
"needReview": false,
|
||||
"avgConfidence": 0.95,
|
||||
"deepseekResult": {
|
||||
"judgment": {
|
||||
"P": "mismatch",
|
||||
"I": "match",
|
||||
"C": "partial",
|
||||
"S": "mismatch"
|
||||
},
|
||||
"evidence": {
|
||||
"P": "Diabetes was induced in male Sprague-Dawley rats by streptozotocin injection",
|
||||
"I": "Rats were randomly assigned to receive dapagliflozin (1 mg/kg/day)",
|
||||
"C": "Rats were randomly assigned to receive dapagliflozin (1 mg/kg/day) or vehicle for 8 weeks",
|
||||
"S": "We investigated the effects of dapagliflozin on cardiac function in diabetic rats"
|
||||
},
|
||||
"conclusion": "exclude",
|
||||
"confidence": 0.95,
|
||||
"reason": "该文献明确不符合纳入标准,应予以排除。主要理由包括:(1)研究对象为糖尿病大鼠而非人类2型糖尿病患者,与PICO标准中'2型糖尿病成人患者'完全不匹配;(2)研究设计为动物实验而非随机对照试验,触发排除标准第3条;(3)虽然干预措施为SGLT2抑制剂(dapagliflozin)且设置了对照(vehicle),但研究对象和研究设计的根本性不匹配决定了必须排除。这是一项基础医学研究,不符合系统综述关注临床证据的要求。"
|
||||
},
|
||||
"qwenResult": {
|
||||
"judgment": {
|
||||
"P": "mismatch",
|
||||
"I": "match",
|
||||
"C": "partial",
|
||||
"S": "mismatch"
|
||||
},
|
||||
"evidence": {
|
||||
"P": "Diabetes was induced in male Sprague-Dawley rats by streptozotocin injection.",
|
||||
"I": "Rats were randomly assigned to receive dapagliflozin (1 mg/kg/day) or vehicle for 8 weeks.",
|
||||
"C": "Rats were randomly assigned to receive dapagliflozin (1 mg/kg/day) or vehicle for 8 weeks.",
|
||||
"S": "Rats were randomly assigned to receive dapagliflozin (1 mg/kg/day) or vehicle for 8 weeks."
|
||||
},
|
||||
"conclusion": "exclude",
|
||||
"confidence": 0.95,
|
||||
"reason": "该研究使用糖尿病大鼠模型,研究人群不符合成人2型糖尿病患者的要求,属于动物实验,明确违反纳入标准并触发排除标准3。尽管干预措施为dapagliflozin且采用随机设计,但研究对象非人类,无法外推至目标人群。研究设计虽提及随机分配,但整体为动物实验,不属于RCT临床试验。因此,该文献应排除。"
|
||||
},
|
||||
"processingTime": 13067
|
||||
},
|
||||
{
|
||||
"literatureId": "test-005",
|
||||
"title": "Canagliflozin and Renal Outcomes in Type 2 Diabetes and Nephropathy",
|
||||
"expectedDecision": "include",
|
||||
"actualDecision": "include",
|
||||
"isCorrect": true,
|
||||
"hasConsensus": true,
|
||||
"needReview": false,
|
||||
"avgConfidence": 0.95,
|
||||
"deepseekResult": {
|
||||
"judgment": {
|
||||
"P": "match",
|
||||
"I": "match",
|
||||
"C": "match",
|
||||
"S": "match"
|
||||
},
|
||||
"evidence": {
|
||||
"P": "participants with type 2 diabetes and albuminuric chronic kidney disease",
|
||||
"I": "receive canagliflozin at a dose of 100 mg daily",
|
||||
"C": "or placebo",
|
||||
"S": "In this double-blind trial, we randomly assigned 4401 participants"
|
||||
},
|
||||
"conclusion": "include",
|
||||
"confidence": 0.95,
|
||||
"reason": "该文献完全符合纳入标准:研究对象为2型糖尿病成人患者,干预措施为SGLT2抑制剂canagliflozin,对照为安慰剂,研究设计为随机对照试验。虽然摘要主要关注肾脏结局,但明确提到'cardiovascular events'作为复合终点的一部分,且'death from renal or cardiovascular causes'包含心血管死亡结局。研究发表于2010年后,为英文文献,完全符合所有PICO标准和纳入标准,无任何排除标准触发。"
|
||||
},
|
||||
"qwenResult": {
|
||||
"judgment": {
|
||||
"P": "match",
|
||||
"I": "match",
|
||||
"C": "match",
|
||||
"S": "match"
|
||||
},
|
||||
"evidence": {
|
||||
"P": "participants with type 2 diabetes and albuminuric chronic kidney disease",
|
||||
"I": "canagliflozin at a dose of 100 mg daily",
|
||||
"C": "placebo",
|
||||
"S": "In this double-blind trial, we randomly assigned 4401 participants...to receive canagliflozin...or placebo"
|
||||
},
|
||||
"conclusion": "include",
|
||||
"confidence": 0.95,
|
||||
"reason": "该研究纳入2型糖尿病成人患者,使用SGLT2抑制剂(canagliflozin)作为干预,对照为安慰剂,研究设计为随机对照试验(RCT),符合PICO标准。摘要明确报告了心血管事件作为复合结局的一部分('death from renal or cardiovascular causes'),满足心血管结局的纳入要求。研究发表于2018年(NEJM),英文全文,非综述或观察性研究,完全符合所有纳入标准且未触发任何排除标准,因此应纳入。"
|
||||
},
|
||||
"processingTime": 12352
|
||||
},
|
||||
{
|
||||
"literatureId": "test-006",
|
||||
"title": "Real-World Experience with SGLT2 Inhibitors: A Retrospective Cohort Study",
|
||||
"expectedDecision": "exclude",
|
||||
"actualDecision": "exclude",
|
||||
"isCorrect": true,
|
||||
"hasConsensus": true,
|
||||
"needReview": false,
|
||||
"avgConfidence": 0.95,
|
||||
"deepseekResult": {
|
||||
"judgment": {
|
||||
"P": "match",
|
||||
"I": "match",
|
||||
"C": "mismatch",
|
||||
"S": "mismatch"
|
||||
},
|
||||
"evidence": {
|
||||
"P": "\"12,543 adults with type 2 diabetes\"",
|
||||
"I": "\"patients with type 2 diabetes who initiated SGLT2 inhibitor therapy\"",
|
||||
"C": "摘要中未提及安慰剂或常规降糖疗法作为对照",
|
||||
"S": "\"We conducted a retrospective cohort study\""
|
||||
},
|
||||
"conclusion": "exclude",
|
||||
"confidence": 0.95,
|
||||
"reason": "该文献明确触发排除标准,应予以排除。虽然研究人群(P)为2型糖尿病成人患者,干预措施(I)为SGLT2抑制剂,符合纳入标准,但研究设计(S)为回顾性队列研究,明确不符合方案要求的随机对照试验(RCT),且摘要中未提及任何对照措施(C)。此外,该研究主要关注血糖控制、体重变化和安全性结局,而非方案要求的心血管结局。根据排除标准第7条,观察性研究(包括队列研究)应被排除。"
|
||||
},
|
||||
"qwenResult": {
|
||||
"judgment": {
|
||||
"P": "match",
|
||||
"I": "match",
|
||||
"C": "mismatch",
|
||||
"S": "mismatch"
|
||||
},
|
||||
"evidence": {
|
||||
"P": "We identified 12,543 adults with type 2 diabetes who initiated SGLT2 inhibitor therapy between 2014 and 2020.",
|
||||
"I": "We identified 12,543 adults with type 2 diabetes who initiated SGLT2 inhibitor therapy between 2014 and 2020.",
|
||||
"C": "Methods: We conducted a retrospective cohort study using electronic health records from a large healthcare system.",
|
||||
"S": "Methods: We conducted a retrospective cohort study using electronic health records from a large healthcare system."
|
||||
},
|
||||
"conclusion": "exclude",
|
||||
"confidence": 0.95,
|
||||
"reason": "该研究为回顾性队列研究,不符合方案要求的随机对照试验(RCT)设计,故S维度不匹配。虽然研究人群为成人2型糖尿病患者,且干预措施为SGLT2抑制剂,符合P和I标准,但对照组未明确为安慰剂或常规降糖疗法,C维度不匹配。此外,研究设计属于观察性研究,直接触发排除标准第7条,因此应排除。"
|
||||
},
|
||||
"processingTime": 11690
|
||||
},
|
||||
{
|
||||
"literatureId": "test-007",
|
||||
"title": "Pharmacokinetics and Pharmacodynamics of Empagliflozin in Healthy Volunteers",
|
||||
"expectedDecision": "exclude",
|
||||
"actualDecision": "pending",
|
||||
"isCorrect": false,
|
||||
"hasConsensus": false,
|
||||
"needReview": true,
|
||||
"avgConfidence": 0.95,
|
||||
"deepseekResult": {
|
||||
"judgment": {
|
||||
"P": "mismatch",
|
||||
"I": "partial",
|
||||
"C": "match",
|
||||
"S": "partial"
|
||||
},
|
||||
"evidence": {
|
||||
"P": "In this Phase 1 study, we evaluated the pharmacokinetics and pharmacodynamics of empagliflozin in 48 healthy male volunteers",
|
||||
"I": "Participants received single oral doses of empagliflozin (1, 5, 10, 25, 50, or 100 mg) in a randomized, placebo-controlled, double-blind manner",
|
||||
"C": "placebo-controlled",
|
||||
"S": "In this Phase 1 study...in a randomized, placebo-controlled, double-blind manner"
|
||||
},
|
||||
"conclusion": "exclude",
|
||||
"confidence": 0.95,
|
||||
"reason": "该文献应被排除,主要原因为:研究人群明确为健康志愿者而非2型糖尿病患者(P不匹配),这直接违反了纳入标准第1条和排除标准第5条。虽然研究涉及SGLT2抑制剂empagliflozin(I部分匹配)并采用安慰剂对照(C匹配),但研究设计为I期临床试验而非典型的RCT(S部分匹配),且未报告任何心血管结局数据。此外,研究关注的是药代动力学和药效学参数,与研究方案要求的心血管结局完全不符。基于研究人群不符合PICO标准且触发排除标准,决定排除该文献。"
|
||||
},
|
||||
"qwenResult": {
|
||||
"judgment": {
|
||||
"P": "mismatch",
|
||||
"I": "match",
|
||||
"C": "match",
|
||||
"S": "match"
|
||||
},
|
||||
"evidence": {
|
||||
"P": "In this Phase 1 study, we evaluated the pharmacokinetics and pharmacodynamics of empagliflozin in 48 healthy male volunteers.",
|
||||
"I": "Participants received single oral doses of empagliflozin (1, 5, 10, 25, 50, or 100 mg) in a randomized, placebo-controlled, double-blind manner.",
|
||||
"C": "Participants received single oral doses of empagliflozin (1, 5, 10, 25, 50, or 100 mg) in a randomized, placebo-controlled, double-blind manner.",
|
||||
"S": "In this Phase 1 study, we evaluated the pharmacokinetics and pharmacodynamics of empagliflozin in 48 healthy male volunteers."
|
||||
},
|
||||
"conclusion": "exclude",
|
||||
"confidence": 0.95,
|
||||
"reason": "该研究纳入的是健康志愿者,而非2型糖尿病患者,不符合P(研究人群)标准。尽管研究设计为RCT,干预措施为SGLT2抑制剂,对照为安慰剂,且发表于英文期刊,但P的不匹配直接触发排除标准第5条(健康志愿者研究)。因此,尽管其他PICO要素部分符合,仍应排除。"
|
||||
},
|
||||
"processingTime": 14253
|
||||
},
|
||||
{
|
||||
"literatureId": "test-008",
|
||||
"title": "Comparative Effectiveness of SGLT2 Inhibitors versus DPP-4 Inhibitors in Elderly Patients with Type 2 Diabetes",
|
||||
"expectedDecision": "exclude",
|
||||
"actualDecision": "pending",
|
||||
"isCorrect": false,
|
||||
"hasConsensus": false,
|
||||
"needReview": true,
|
||||
"avgConfidence": 0.95,
|
||||
"deepseekResult": {
|
||||
"judgment": {
|
||||
"P": "match",
|
||||
"I": "match",
|
||||
"C": "partial",
|
||||
"S": "mismatch"
|
||||
},
|
||||
"evidence": {
|
||||
"P": "Medicare beneficiaries aged ≥65 years with type 2 diabetes",
|
||||
"I": "patients initiating SGLT2 inhibitors (n=42,371)",
|
||||
"C": "compared with those initiating DPP-4 inhibitors (n=126,806)",
|
||||
"S": "We conducted a nationwide cohort study using claims data"
|
||||
},
|
||||
"conclusion": "exclude",
|
||||
"confidence": 0.95,
|
||||
"reason": "该文献明确触发排除标准,主要问题在于研究设计不符合要求。文献明确描述为队列研究(nationwide cohort study using claims data),而研究方案要求必须是随机对照试验(RCT)。对照措施部分匹配,虽然使用了DPP-4抑制剂作为对照,但研究方案要求安慰剂或常规降糖疗法。研究人群(老年2型糖尿病患者)和干预措施(SGLT2抑制剂)符合要求,结局指标也报告了心血管结局(心衰住院、主要不良心血管事件)。但由于研究设计为观察性队列研究,明确触发排除标准第7条,因此必须排除。"
|
||||
},
|
||||
"qwenResult": {
|
||||
"judgment": {
|
||||
"P": "match",
|
||||
"I": "match",
|
||||
"C": "mismatch",
|
||||
"S": "mismatch"
|
||||
},
|
||||
"evidence": {
|
||||
"P": "We conducted a nationwide cohort study using claims data from Medicare beneficiaries aged ≥65 years with type 2 diabetes.",
|
||||
"I": "We compared patients initiating SGLT2 inhibitors (n=42,371) with those initiating DPP-4 inhibitors (n=126,806) between 2013 and 2017.",
|
||||
"C": "those initiating DPP-4 inhibitors",
|
||||
"S": "We conducted a nationwide cohort study using claims data from Medicare beneficiaries"
|
||||
},
|
||||
"conclusion": "exclude",
|
||||
"confidence": 0.95,
|
||||
"reason": "该研究纳入的是2型糖尿病患者,符合P标准;干预措施为SGLT2抑制剂,符合I标准。但对照组为DPP-4抑制剂,不符合方案中‘安慰剂或常规降糖疗法’的C标准,属于非等效对照。更重要的是,研究设计为队列研究,属于观察性研究,不满足S要求的随机对照试验(RCT),明确触发排除标准中的‘观察性研究’条款。因此,尽管部分PICO要素匹配,但因研究设计不符,应排除。"
|
||||
},
|
||||
"processingTime": 12808
|
||||
},
|
||||
{
|
||||
"literatureId": "test-009",
|
||||
"title": "Severe Diabetic Ketoacidosis Associated with SGLT2 Inhibitor Use: A Case Report",
|
||||
"expectedDecision": "exclude",
|
||||
"actualDecision": "exclude",
|
||||
"isCorrect": true,
|
||||
"hasConsensus": true,
|
||||
"needReview": false,
|
||||
"avgConfidence": 0.95,
|
||||
"deepseekResult": {
|
||||
"judgment": {
|
||||
"P": "match",
|
||||
"I": "match",
|
||||
"C": "mismatch",
|
||||
"S": "mismatch"
|
||||
},
|
||||
"evidence": {
|
||||
"P": "\"A 52-year-old man with type 2 diabetes\"",
|
||||
"I": "\"patient treated with dapagliflozin\"",
|
||||
"C": "摘要中未提及任何对照措施或安慰剂",
|
||||
"S": "\"We report a case of severe euglycemic diabetic ketoacidosis\",明确为病例报告"
|
||||
},
|
||||
"conclusion": "exclude",
|
||||
"confidence": 0.95,
|
||||
"reason": "该文献明确触发排除标准:这是一篇病例报告(标题和摘要中多次提及'case report'和'case'),而非随机对照试验。虽然研究人群(2型糖尿病患者)和干预措施(SGLT2抑制剂)符合PICO标准,但研究设计不符合要求的RCT设计,且未报告心血管结局数据(主要讨论糖尿病酮症酸中毒)。根据排除标准第2条(病例报告、病例系列),应直接排除。"
|
||||
},
|
||||
"qwenResult": {
|
||||
"judgment": {
|
||||
"P": "match",
|
||||
"I": "match",
|
||||
"C": "mismatch",
|
||||
"S": "mismatch"
|
||||
},
|
||||
"evidence": {
|
||||
"P": "A 52-year-old man with type 2 diabetes on dapagliflozin 10 mg daily presented to the emergency department with nausea, vomiting, and abdominal pain.",
|
||||
"I": "A 52-year-old man with type 2 diabetes on dapagliflozin 10 mg daily presented to the emergency department with nausea, vomiting, and abdominal pain.",
|
||||
"C": "There is no mention of a control group or comparison with placebo or conventional therapy.",
|
||||
"S": "This is a case report, as indicated by the title: 'Severe Diabetic Ketoacidosis Associated with SGLT2 Inhibitor Use: A Case Report'."
|
||||
},
|
||||
"conclusion": "exclude",
|
||||
"confidence": 0.95,
|
||||
"reason": "该文献为病例报告,研究设计不符合方案要求的随机对照试验(RCT),故S维度不匹配。尽管研究人群(P)和干预措施(I)符合标准,但缺乏对照组(C),且研究类型属于排除标准中的'病例报告'。根据排除标准第2条,此类文献应被排除。虽然提到了2型糖尿病患者使用SGLT2抑制剂,但无对照、非RCT设计,无法提供因果推断证据,因此最终决定排除。"
|
||||
},
|
||||
"processingTime": 12092
|
||||
},
|
||||
{
|
||||
"literatureId": "test-010",
|
||||
"title": "Effect of Sotagliflozin on Cardiovascular and Renal Events in Patients with Type 2 Diabetes and Moderate Renal Impairment",
|
||||
"expectedDecision": "uncertain",
|
||||
"actualDecision": "pending",
|
||||
"isCorrect": false,
|
||||
"hasConsensus": false,
|
||||
"needReview": true,
|
||||
"avgConfidence": 0.95,
|
||||
"deepseekResult": {
|
||||
"judgment": {
|
||||
"P": "match",
|
||||
"I": "mismatch",
|
||||
"C": "match",
|
||||
"S": "match"
|
||||
},
|
||||
"evidence": {
|
||||
"P": "we enrolled 10,584 patients with type 2 diabetes",
|
||||
"I": "sotagliflozin, a dual SGLT1 and SGLT2 inhibitor",
|
||||
"C": "randomly assigned to receive sotagliflozin 200 mg or placebo",
|
||||
"S": "In this randomized, double-blind, placebo-controlled trial"
|
||||
},
|
||||
"conclusion": "exclude",
|
||||
"confidence": 0.95,
|
||||
"reason": "该文献明确触发排除标准,主要问题在于干预措施不匹配。研究使用的是sotagliflozin(一种SGLT1和SGLT2双重抑制剂),而非方案要求的SGLT2抑制剂(如empagliflozin、dapagliflozin、canagliflozin等)。虽然研究人群(2型糖尿病成人患者)、对照(安慰剂)、研究设计(RCT)和结局指标(心血管结局)均符合PICO标准,且满足纳入标准中的研究设计、结局报告、语言和发表时间要求,但干预措施的根本性差异导致必须排除。SGLT1/SGLT2双重抑制剂与纯SGLT2抑制剂在作用机制和临床效果上存在差异,不应混为一谈。"
|
||||
},
|
||||
"qwenResult": {
|
||||
"judgment": {
|
||||
"P": "match",
|
||||
"I": "match",
|
||||
"C": "match",
|
||||
"S": "match"
|
||||
},
|
||||
"evidence": {
|
||||
"P": "patients with type 2 diabetes, cardiovascular disease, and an estimated glomerular filtration rate of 25 to 60 ml per minute per 1.73 m²",
|
||||
"I": "sotagliflozin, a dual SGLT1 and SGLT2 inhibitor... patients were randomly assigned to receive sotagliflozin 200 mg or placebo once daily",
|
||||
"C": "placebo once daily",
|
||||
"S": "randomized, double-blind, placebo-controlled trial"
|
||||
},
|
||||
"conclusion": "include",
|
||||
"confidence": 0.95,
|
||||
"reason": "该研究纳入2型糖尿病成人患者,符合P标准;干预措施为SGLT2抑制剂(sotagliflozin),虽为双抑制剂但包含SGLT2抑制作用,符合I标准;对照为安慰剂,符合C标准;研究设计为RCT,符合S标准。心血管结局明确报告了心血管死亡、心衰住院等复合终点,满足O指标要求。研究发表于2019年(基于背景知识),英文全文,非排除类型,完全符合所有纳入标准且未触发任何排除标准。"
|
||||
},
|
||||
"processingTime": 13503
|
||||
}
|
||||
]
|
||||
}
|
||||
115
backend/scripts/test-samples/asl-test-literatures.json
Normal file
115
backend/scripts/test-samples/asl-test-literatures.json
Normal file
@@ -0,0 +1,115 @@
|
||||
[
|
||||
{
|
||||
"id": "test-001",
|
||||
"title": "Efficacy and Safety of Empagliflozin in Patients with Type 2 Diabetes: A Randomized, Double-Blind, Placebo-Controlled Trial",
|
||||
"abstract": "Background: Sodium-glucose cotransporter 2 (SGLT2) inhibitors represent a novel class of glucose-lowering agents. We evaluated the efficacy and safety of empagliflozin in patients with type 2 diabetes. Methods: In this 24-week, randomized, double-blind, placebo-controlled trial, we randomly assigned 800 adults with type 2 diabetes and inadequate glycemic control (HbA1c 7.0-10.0%) to receive empagliflozin 10 mg, empagliflozin 25 mg, or placebo once daily. The primary endpoint was change in HbA1c from baseline. Secondary endpoints included body weight, systolic blood pressure, and adverse events. Results: Both empagliflozin doses significantly reduced HbA1c compared with placebo (10 mg: -0.74%, 25 mg: -0.85%, placebo: -0.13%; P<0.001). Empagliflozin also reduced body weight and systolic blood pressure. The incidence of hypoglycemia was low and similar across groups. Conclusions: Empagliflozin significantly improved glycemic control in patients with type 2 diabetes with acceptable safety profile.",
|
||||
"authors": "Zinman B, Wanner C, Lachin JM, et al.",
|
||||
"journal": "New England Journal of Medicine",
|
||||
"publicationYear": 2015,
|
||||
"doi": "10.1056/NEJMoa1504720",
|
||||
"expectedDecision": "include",
|
||||
"rationale": "明确的RCT研究,SGLT2抑制剂治疗2型糖尿病,有安慰剂对照,主要结局为HbA1c,完全符合PICO标准"
|
||||
},
|
||||
{
|
||||
"id": "test-002",
|
||||
"title": "Cardiovascular Outcomes with Ertugliflozin in Type 2 Diabetes",
|
||||
"abstract": "Background: The cardiovascular safety of ertugliflozin, a sodium-glucose cotransporter 2 (SGLT2) inhibitor, has not been established. Methods: We randomly assigned 8246 patients with type 2 diabetes mellitus and established atherosclerotic cardiovascular disease to receive ertugliflozin (5 mg or 15 mg) or placebo, in addition to standard care. The primary outcome was major adverse cardiovascular events (MACE), defined as death from cardiovascular causes, nonfatal myocardial infarction, or nonfatal stroke. Results: During a median follow-up of 3.5 years, MACE occurred in 11.9% of patients in the ertugliflozin group and 11.9% of patients in the placebo group (hazard ratio, 0.97; 95% CI, 0.85-1.11; P<0.001 for noninferiority). Ertugliflozin was associated with lower rates of hospitalization for heart failure. The rates of adverse events were similar in the two groups. Conclusions: Among patients with type 2 diabetes and atherosclerotic cardiovascular disease, ertugliflozin was noninferior to placebo with respect to major adverse cardiovascular events.",
|
||||
"authors": "Cannon CP, Pratley R, Dagogo-Jack S, et al.",
|
||||
"journal": "New England Journal of Medicine",
|
||||
"publicationYear": 2020,
|
||||
"doi": "10.1056/NEJMoa2004967",
|
||||
"expectedDecision": "include",
|
||||
"rationale": "大规模RCT,SGLT2抑制剂,有安慰剂对照,评估心血管结局,符合标准"
|
||||
},
|
||||
{
|
||||
"id": "test-003",
|
||||
"title": "Systematic Review and Meta-Analysis of SGLT2 Inhibitors in Type 2 Diabetes: A Comprehensive Assessment",
|
||||
"abstract": "Objective: To systematically review and meta-analyze the efficacy and safety of sodium-glucose cotransporter 2 (SGLT2) inhibitors in patients with type 2 diabetes. Methods: We searched PubMed, Embase, and the Cochrane Library through December 2022. We included randomized controlled trials comparing SGLT2 inhibitors with placebo or active comparators in adults with type 2 diabetes. Primary outcomes were glycemic control (HbA1c), body weight, and cardiovascular events. Results: We identified 142 eligible trials involving 87,562 participants. SGLT2 inhibitors significantly reduced HbA1c (mean difference -0.68%, 95% CI -0.73 to -0.63), body weight (-1.9 kg), and systolic blood pressure (-4.2 mmHg). The incidence of major adverse cardiovascular events was reduced by 11%. Conclusions: SGLT2 inhibitors demonstrate consistent benefits in glycemic control, weight reduction, and cardiovascular outcomes.",
|
||||
"authors": "McGuire DK, Shih WJ, Cosentino F, et al.",
|
||||
"journal": "Diabetes Care",
|
||||
"publicationYear": 2023,
|
||||
"doi": "10.2337/dc22-1234",
|
||||
"expectedDecision": "exclude",
|
||||
"rationale": "这是系统综述/Meta分析,不是原始研究,应排除"
|
||||
},
|
||||
{
|
||||
"id": "test-004",
|
||||
"title": "Dapagliflozin Improves Cardiac Function in Diabetic Rats: An Experimental Study",
|
||||
"abstract": "Background: The cardioprotective effects of dapagliflozin in diabetes remain unclear. We investigated the effects of dapagliflozin on cardiac function in diabetic rats. Methods: Diabetes was induced in male Sprague-Dawley rats by streptozotocin injection. Rats were randomly assigned to receive dapagliflozin (1 mg/kg/day) or vehicle for 8 weeks. Cardiac function was assessed by echocardiography. Myocardial fibrosis and oxidative stress markers were measured. Results: Dapagliflozin treatment significantly improved left ventricular ejection fraction and reduced myocardial fibrosis. Oxidative stress markers were decreased in the dapagliflozin group. Conclusions: Dapagliflozin improves cardiac function in diabetic rats through reduction of myocardial fibrosis and oxidative stress.",
|
||||
"authors": "Lee TM, Chang NC, Lin SZ",
|
||||
"journal": "Cardiovascular Diabetology",
|
||||
"publicationYear": 2019,
|
||||
"doi": "10.1186/s12933-019-0876-5",
|
||||
"expectedDecision": "exclude",
|
||||
"rationale": "动物实验(大鼠),不是人类研究,应排除"
|
||||
},
|
||||
{
|
||||
"id": "test-005",
|
||||
"title": "Canagliflozin and Renal Outcomes in Type 2 Diabetes and Nephropathy",
|
||||
"abstract": "Background: Type 2 diabetes is the leading cause of kidney failure worldwide. The effects of canagliflozin, a sodium-glucose cotransporter 2 inhibitor, on renal outcomes are uncertain. Methods: In this double-blind trial, we randomly assigned 4401 participants with type 2 diabetes and albuminuric chronic kidney disease to receive canagliflozin at a dose of 100 mg daily or placebo. All participants had an estimated glomerular filtration rate of 30 to <90 ml per minute per 1.73 m² and albuminuria. The primary outcome was a composite of end-stage kidney disease, doubling of serum creatinine level, or death from renal or cardiovascular causes. Results: The trial was stopped early after a median follow-up of 2.6 years. The primary outcome occurred in 43.2 events per 1000 patient-years in the canagliflozin group and 61.2 events per 1000 patient-years in the placebo group (hazard ratio, 0.70; 95% CI, 0.59-0.82; P=0.00001). Conclusions: In participants with type 2 diabetes and kidney disease, canagliflozin reduced the risk of kidney failure and cardiovascular events.",
|
||||
"authors": "Perkovic V, Jardine MJ, Neal B, et al.",
|
||||
"journal": "New England Journal of Medicine",
|
||||
"publicationYear": 2019,
|
||||
"doi": "10.1056/NEJMoa1811744",
|
||||
"expectedDecision": "include",
|
||||
"rationale": "RCT研究,SGLT2抑制剂,有安慰剂对照,虽然主要结局是肾脏结局,但也评估了心血管事件,可纳入"
|
||||
},
|
||||
{
|
||||
"id": "test-006",
|
||||
"title": "Real-World Experience with SGLT2 Inhibitors: A Retrospective Cohort Study",
|
||||
"abstract": "Objective: To evaluate the real-world effectiveness and safety of SGLT2 inhibitors in patients with type 2 diabetes. Methods: We conducted a retrospective cohort study using electronic health records from a large healthcare system. We identified 12,543 adults with type 2 diabetes who initiated SGLT2 inhibitor therapy between 2014 and 2020. Primary outcomes were changes in HbA1c, body weight, and blood pressure at 6 and 12 months. Safety outcomes included genital infections, urinary tract infections, and diabetic ketoacidosis. Results: Mean HbA1c decreased by 0.8% at 6 months and 0.7% at 12 months. Body weight decreased by 2.3 kg at 6 months. The rate of genital infections was 7.2% and urinary tract infections was 8.5%. Diabetic ketoacidosis occurred in 0.3% of patients. Conclusions: In real-world practice, SGLT2 inhibitors demonstrated effectiveness in glycemic control and weight reduction with acceptable safety profile.",
|
||||
"authors": "Patorno E, Pawar A, Franklin JM, et al.",
|
||||
"journal": "Diabetes, Obesity and Metabolism",
|
||||
"publicationYear": 2020,
|
||||
"doi": "10.1111/dom.14000",
|
||||
"expectedDecision": "exclude",
|
||||
"rationale": "回顾性队列研究,不是RCT,且无对照组,不符合研究设计要求"
|
||||
},
|
||||
{
|
||||
"id": "test-007",
|
||||
"title": "Pharmacokinetics and Pharmacodynamics of Empagliflozin in Healthy Volunteers",
|
||||
"abstract": "Background: Understanding the pharmacokinetic and pharmacodynamic properties of empagliflozin is essential for optimal clinical use. Methods: In this Phase 1 study, we evaluated the pharmacokinetics and pharmacodynamics of empagliflozin in 48 healthy male volunteers. Participants received single oral doses of empagliflozin (1, 5, 10, 25, 50, or 100 mg) in a randomized, placebo-controlled, double-blind manner. Blood and urine samples were collected for pharmacokinetic analysis. Urinary glucose excretion was measured as a pharmacodynamic endpoint. Results: Empagliflozin was rapidly absorbed with peak plasma concentrations at 1.5 hours post-dose. The elimination half-life was approximately 12 hours. Urinary glucose excretion increased dose-dependently. Empagliflozin was generally well tolerated. Conclusions: Empagliflozin exhibits dose-proportional pharmacokinetics and induces sustained urinary glucose excretion in healthy volunteers.",
|
||||
"authors": "Heise T, Seewaldt-Becker E, Macha S, et al.",
|
||||
"journal": "Clinical Pharmacokinetics",
|
||||
"publicationYear": 2013,
|
||||
"doi": "10.1007/s40262-013-0050-3",
|
||||
"expectedDecision": "exclude",
|
||||
"rationale": "Phase 1药代动力学研究,受试者为健康志愿者而非糖尿病患者,应排除"
|
||||
},
|
||||
{
|
||||
"id": "test-008",
|
||||
"title": "Comparative Effectiveness of SGLT2 Inhibitors versus DPP-4 Inhibitors in Elderly Patients with Type 2 Diabetes",
|
||||
"abstract": "Background: The comparative effectiveness of SGLT2 inhibitors and DPP-4 inhibitors in elderly patients remains unclear. Methods: We conducted a nationwide cohort study using claims data from Medicare beneficiaries aged ≥65 years with type 2 diabetes. We compared patients initiating SGLT2 inhibitors (n=42,371) with those initiating DPP-4 inhibitors (n=126,806) between 2013 and 2017. Primary outcomes were hospitalization for heart failure and all-cause mortality. Secondary outcomes included major adverse cardiovascular events and acute kidney injury. Results: During a median follow-up of 1.2 years, SGLT2 inhibitors were associated with lower rates of hospitalization for heart failure (HR 0.70, 95% CI 0.63-0.77) and all-cause mortality (HR 0.59, 95% CI 0.53-0.66) compared with DPP-4 inhibitors. The risk of acute kidney injury was also lower with SGLT2 inhibitors. Conclusions: In elderly patients with type 2 diabetes, SGLT2 inhibitors were associated with better cardiovascular and renal outcomes compared with DPP-4 inhibitors.",
|
||||
"authors": "Patorno E, Goldfine AB, Schneeweiss S, et al.",
|
||||
"journal": "The Lancet Diabetes & Endocrinology",
|
||||
"publicationYear": 2018,
|
||||
"doi": "10.1016/S2213-8587(18)30190-1",
|
||||
"expectedDecision": "exclude",
|
||||
"rationale": "观察性队列研究,虽然有比较组(DPP-4抑制剂)但不是RCT,且对照组不是安慰剂或常规疗法"
|
||||
},
|
||||
{
|
||||
"id": "test-009",
|
||||
"title": "Severe Diabetic Ketoacidosis Associated with SGLT2 Inhibitor Use: A Case Report",
|
||||
"abstract": "Introduction: Sodium-glucose cotransporter 2 (SGLT2) inhibitors have been associated with rare cases of diabetic ketoacidosis. We report a case of severe euglycemic diabetic ketoacidosis in a patient treated with dapagliflozin. Case Presentation: A 52-year-old man with type 2 diabetes on dapagliflozin 10 mg daily presented to the emergency department with nausea, vomiting, and abdominal pain. Despite a blood glucose of 180 mg/dL, arterial blood gas showed severe metabolic acidosis (pH 7.08, HCO3 8 mEq/L) with elevated beta-hydroxybutyrate. The patient was diagnosed with euglycemic diabetic ketoacidosis. Dapagliflozin was discontinued, and the patient was treated with intravenous insulin and fluids. He recovered completely within 48 hours. Discussion: Clinicians should be aware of the risk of euglycemic diabetic ketoacidosis with SGLT2 inhibitors, particularly in patients with concurrent illness or reduced oral intake. Conclusion: This case highlights the importance of recognizing atypical presentations of diabetic ketoacidosis in patients taking SGLT2 inhibitors.",
|
||||
"authors": "Brown JB, Pedula K, Barzilay J, et al.",
|
||||
"journal": "Diabetes Care",
|
||||
"publicationYear": 2017,
|
||||
"doi": "10.2337/dc16-2460",
|
||||
"expectedDecision": "exclude",
|
||||
"rationale": "病例报告,不是RCT,应排除"
|
||||
},
|
||||
{
|
||||
"id": "test-010",
|
||||
"title": "Effect of Sotagliflozin on Cardiovascular and Renal Events in Patients with Type 2 Diabetes and Moderate Renal Impairment",
|
||||
"abstract": "Background: The effects of sotagliflozin, a dual SGLT1 and SGLT2 inhibitor, on cardiovascular and renal outcomes in patients with type 2 diabetes and moderate renal impairment have not been fully elucidated. Methods: In this randomized, double-blind, placebo-controlled trial, we enrolled 10,584 patients with type 2 diabetes, cardiovascular disease, and an estimated glomerular filtration rate of 25 to 60 ml per minute per 1.73 m². Patients were randomly assigned to receive sotagliflozin 200 mg or placebo once daily. The primary outcome was the total number of deaths from cardiovascular causes, hospitalizations for heart failure, and urgent visits for heart failure. Results: After a median follow-up of 16 months, the primary outcome occurred with lower frequency in the sotagliflozin group than in the placebo group (rate ratio, 0.74; 95% CI, 0.63-0.88; P<0.001). The benefits were consistent across subgroups. The incidence of adverse events was similar in the two groups. Conclusions: In patients with type 2 diabetes, moderate renal impairment, and cardiovascular disease, sotagliflozin reduced the composite of cardiovascular deaths and hospitalizations for heart failure.",
|
||||
"authors": "Bhatt DL, Szarek M, Steg PG, et al.",
|
||||
"journal": "New England Journal of Medicine",
|
||||
"publicationYear": 2021,
|
||||
"doi": "10.1056/NEJMoa2030186",
|
||||
"expectedDecision": "uncertain",
|
||||
"rationale": "RCT研究,但sotagliflozin是双重SGLT1/SGLT2抑制剂,与单纯SGLT2抑制剂有所不同,可能需要进一步判断是否符合干预措施标准。且主要结局为心血管死亡和心衰住院,符合PICO标准。倾向于uncertain或include"
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
|
||||
348
backend/scripts/test-stroke-screening-international-models.ts
Normal file
348
backend/scripts/test-stroke-screening-international-models.ts
Normal file
@@ -0,0 +1,348 @@
|
||||
/**
|
||||
* 卒中数据测试 - 国际模型对比
|
||||
*
|
||||
* 目的:对比国内模型(DeepSeek+Qwen)vs 国际模型(GPT-4o+Claude)
|
||||
*
|
||||
* 测试假设:
|
||||
* 1. 如果国际模型准确率更高 → 是模型能力问题
|
||||
* 2. 如果国际模型准确率相似 → 是Prompt或理解差异问题
|
||||
*/
|
||||
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import * as XLSX from 'xlsx';
|
||||
import { fileURLToPath } from 'url';
|
||||
import { llmScreeningService } from '../src/modules/asl/services/llmScreeningService.js';
|
||||
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = path.dirname(__filename);
|
||||
|
||||
// ========================================
|
||||
// 📋 1. 读取PICOS和标准
|
||||
// ========================================
|
||||
|
||||
console.log('📖 正在读取PICOS和纳排标准...\n');
|
||||
|
||||
const picosPath = path.join(
|
||||
__dirname,
|
||||
'../../docs/03-业务模块/ASL-AI智能文献/05-测试文档/03-测试数据/screening/测试案例的PICOS、纳入标准、排除标准.txt'
|
||||
);
|
||||
|
||||
const picosContent = fs.readFileSync(picosPath, 'utf-8');
|
||||
|
||||
// 解析PICOS(简化版)
|
||||
const picoCriteria = {
|
||||
population: '非心源性缺血性卒中患者、亚洲人群',
|
||||
intervention: '抗血小板药物/抗凝药物/溶栓药物(阿司匹林、氯吡格雷、替格瑞洛、达比加群等)',
|
||||
comparison: '安慰剂或常规治疗',
|
||||
outcome: '卒中进展、复发、残疾程度、死亡率、出血事件等',
|
||||
studyDesign: 'SR、RCT、RWE、OBS'
|
||||
};
|
||||
|
||||
const inclusionCriteria = `
|
||||
1. 研究对象为非心源性缺血性卒中患者
|
||||
2. 研究人群为亚洲人群(优先)
|
||||
3. 干预措施为抗血小板/抗凝/溶栓药物
|
||||
4. 对照组为安慰剂或常规治疗
|
||||
5. 研究时间在2020年之后
|
||||
6. 研究设计为SR、RCT、RWE、OBS
|
||||
`;
|
||||
|
||||
const exclusionCriteria = `
|
||||
1. 综述、病例报告、会议摘要
|
||||
2. 动物实验、体外实验
|
||||
3. 研究人群非亚洲人群(除非有特殊价值)
|
||||
4. 研究时间在2020年之前
|
||||
5. 心源性卒中或出血性卒中
|
||||
`;
|
||||
|
||||
console.log('✅ PICOS标准已加载\n');
|
||||
|
||||
// ========================================
|
||||
// 📋 2. 读取测试案例
|
||||
// ========================================
|
||||
|
||||
console.log('📖 正在读取测试案例...\n');
|
||||
|
||||
const excelPath = path.join(
|
||||
__dirname,
|
||||
'../../docs/03-业务模块/ASL-AI智能文献/05-测试文档/03-测试数据/screening/Test Cases.xlsx'
|
||||
);
|
||||
|
||||
const workbook = XLSX.read(fs.readFileSync(excelPath), { type: 'buffer' });
|
||||
const sheetName = workbook.SheetNames[0];
|
||||
const worksheet = workbook.Sheets[sheetName];
|
||||
const data = XLSX.utils.sheet_to_json(worksheet);
|
||||
|
||||
console.log(`✅ 读取到 ${data.length} 条数据\n`);
|
||||
|
||||
// 选择测试样本:2个Included + 3个Excluded
|
||||
const includedCases = data.filter((row: any) =>
|
||||
row['Decision']?.toString().toLowerCase().includes('include')
|
||||
).slice(0, 2);
|
||||
|
||||
const excludedCases = data.filter((row: any) =>
|
||||
row['Decision']?.toString().toLowerCase().includes('exclude')
|
||||
).slice(0, 3);
|
||||
|
||||
const testCases = [...includedCases, ...excludedCases];
|
||||
|
||||
console.log(`✅ 选择测试样本: ${testCases.length}篇(2 Included + 3 Excluded)\n`);
|
||||
|
||||
// ========================================
|
||||
// 🧪 3. 定义测试模型组合
|
||||
// ========================================
|
||||
|
||||
const modelPairs = [
|
||||
{
|
||||
name: '国内模型组合',
|
||||
model1: 'deepseek-chat',
|
||||
model2: 'qwen3-72b',
|
||||
description: 'DeepSeek-V3 + Qwen3-Max(当前使用)'
|
||||
},
|
||||
{
|
||||
name: '国际模型组合',
|
||||
model1: 'gpt-4o',
|
||||
model2: 'claude-sonnet-4.5',
|
||||
description: 'GPT-4o + Claude-4.5(国际顶级模型)'
|
||||
}
|
||||
];
|
||||
|
||||
// ========================================
|
||||
// 🧪 4. 执行测试
|
||||
// ========================================
|
||||
|
||||
interface TestResult {
|
||||
caseIndex: number;
|
||||
title: string;
|
||||
humanDecision: string;
|
||||
aiDecision: string;
|
||||
model1Result: any;
|
||||
model2Result: any;
|
||||
isCorrect: boolean;
|
||||
hasConflict: boolean;
|
||||
processingTime: number;
|
||||
}
|
||||
|
||||
async function testModelPair(
|
||||
pairName: string,
|
||||
model1: string,
|
||||
model2: string,
|
||||
cases: any[]
|
||||
): Promise<TestResult[]> {
|
||||
console.log(`\n${'='.repeat(60)}`);
|
||||
console.log(`🧪 测试模型组合: ${pairName}`);
|
||||
console.log(`${'='.repeat(60)}\n`);
|
||||
|
||||
const results: TestResult[] = [];
|
||||
|
||||
for (let i = 0; i < cases.length; i++) {
|
||||
const testCase = cases[i];
|
||||
const title = testCase['title'] || '';
|
||||
const abstract = testCase['abstract'] || '';
|
||||
const humanDecision = testCase['Decision'] || '';
|
||||
|
||||
console.log(`\n[${i + 1}/${cases.length}] 正在筛选...`);
|
||||
console.log(`标题: ${title.substring(0, 60)}...`);
|
||||
console.log(`人类决策: ${humanDecision}`);
|
||||
|
||||
const startTime = Date.now();
|
||||
|
||||
try {
|
||||
const screeningResult = await llmScreeningService.dualModelScreening(
|
||||
`test-case-${i + 1}`, // literatureId
|
||||
title,
|
||||
abstract,
|
||||
picoCriteria,
|
||||
inclusionCriteria,
|
||||
exclusionCriteria,
|
||||
[model1, model2], // models参数应该是一个数组
|
||||
'standard' // style参数
|
||||
);
|
||||
|
||||
const processingTime = Date.now() - startTime;
|
||||
|
||||
// 标准化决策
|
||||
const normalizedHuman = humanDecision.toLowerCase().includes('include') ? 'include' : 'exclude';
|
||||
const normalizedAI = screeningResult.finalDecision === 'pending' ? 'uncertain' : screeningResult.finalDecision;
|
||||
|
||||
const isCorrect = normalizedAI === normalizedHuman;
|
||||
|
||||
console.log(`AI决策: ${screeningResult.finalDecision} ${isCorrect ? '✅' : '❌'}`);
|
||||
console.log(`模型一致: ${!screeningResult.hasConflict ? '✅' : '❌'}`);
|
||||
console.log(`处理时间: ${(processingTime / 1000).toFixed(2)}秒`);
|
||||
|
||||
results.push({
|
||||
caseIndex: i + 1,
|
||||
title: title.substring(0, 100),
|
||||
humanDecision: normalizedHuman,
|
||||
aiDecision: normalizedAI,
|
||||
model1Result: screeningResult.model1Result,
|
||||
model2Result: screeningResult.model2Result,
|
||||
isCorrect,
|
||||
hasConflict: screeningResult.hasConflict,
|
||||
processingTime
|
||||
});
|
||||
|
||||
} catch (error: any) {
|
||||
console.error(`❌ 筛选失败: ${error.message}`);
|
||||
results.push({
|
||||
caseIndex: i + 1,
|
||||
title: title.substring(0, 100),
|
||||
humanDecision: humanDecision.toLowerCase().includes('include') ? 'include' : 'exclude',
|
||||
aiDecision: 'error',
|
||||
model1Result: null,
|
||||
model2Result: null,
|
||||
isCorrect: false,
|
||||
hasConflict: false,
|
||||
processingTime: Date.now() - startTime
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
// ========================================
|
||||
// 📊 5. 生成对比报告
|
||||
// ========================================
|
||||
|
||||
function generateComparisonReport(
|
||||
domesticResults: TestResult[],
|
||||
internationalResults: TestResult[]
|
||||
) {
|
||||
console.log(`\n${'='.repeat(80)}`);
|
||||
console.log(`📊 国内 vs 国际模型对比报告`);
|
||||
console.log(`${'='.repeat(80)}\n`);
|
||||
|
||||
// 计算指标
|
||||
function calculateMetrics(results: TestResult[]) {
|
||||
const total = results.length;
|
||||
const correct = results.filter(r => r.isCorrect).length;
|
||||
const consistent = results.filter(r => !r.hasConflict).length;
|
||||
const avgTime = results.reduce((sum, r) => sum + r.processingTime, 0) / total;
|
||||
|
||||
return {
|
||||
accuracy: (correct / total * 100).toFixed(1),
|
||||
consistency: (consistent / total * 100).toFixed(1),
|
||||
avgTime: (avgTime / 1000).toFixed(2),
|
||||
correct,
|
||||
total
|
||||
};
|
||||
}
|
||||
|
||||
const domesticMetrics = calculateMetrics(domesticResults);
|
||||
const internationalMetrics = calculateMetrics(internationalResults);
|
||||
|
||||
// 对比表格
|
||||
console.log('| 指标 | 国内模型 | 国际模型 | 差异 |');
|
||||
console.log('|------|----------|----------|------|');
|
||||
console.log(`| 准确率 | ${domesticMetrics.accuracy}% (${domesticMetrics.correct}/${domesticMetrics.total}) | ${internationalMetrics.accuracy}% (${internationalMetrics.correct}/${internationalMetrics.total}) | ${(parseFloat(internationalMetrics.accuracy) - parseFloat(domesticMetrics.accuracy)).toFixed(1)}% |`);
|
||||
console.log(`| 一致率 | ${domesticMetrics.consistency}% | ${internationalMetrics.consistency}% | ${(parseFloat(internationalMetrics.consistency) - parseFloat(domesticMetrics.consistency)).toFixed(1)}% |`);
|
||||
console.log(`| 平均耗时 | ${domesticMetrics.avgTime}秒 | ${internationalMetrics.avgTime}秒 | ${(parseFloat(internationalMetrics.avgTime) - parseFloat(domesticMetrics.avgTime)).toFixed(2)}秒 |`);
|
||||
|
||||
console.log('\n');
|
||||
|
||||
// 逐案例对比
|
||||
console.log('📋 逐案例对比:\n');
|
||||
for (let i = 0; i < domesticResults.length; i++) {
|
||||
const domestic = domesticResults[i];
|
||||
const international = internationalResults[i];
|
||||
|
||||
console.log(`[案例 ${i + 1}] ${domestic.title}`);
|
||||
console.log(` 人类: ${domestic.humanDecision}`);
|
||||
console.log(` 国内模型: ${domestic.aiDecision} ${domestic.isCorrect ? '✅' : '❌'}`);
|
||||
console.log(` 国际模型: ${international.aiDecision} ${international.isCorrect ? '✅' : '❌'}`);
|
||||
|
||||
if (domestic.aiDecision !== international.aiDecision) {
|
||||
console.log(` ⚠️ 两组模型判断不一致!`);
|
||||
}
|
||||
console.log('');
|
||||
}
|
||||
|
||||
// 结论分析
|
||||
console.log('\n' + '='.repeat(80));
|
||||
console.log('🎯 结论分析\n');
|
||||
|
||||
const accuracyDiff = parseFloat(internationalMetrics.accuracy) - parseFloat(domesticMetrics.accuracy);
|
||||
|
||||
if (Math.abs(accuracyDiff) <= 10) {
|
||||
console.log('✅ 结论: 国内外模型准确率相近(差异≤10%)');
|
||||
console.log(' → 问题不在模型能力,而在于:');
|
||||
console.log(' 1. Prompt设计(可能过于严格)');
|
||||
console.log(' 2. AI vs 人类对"匹配"的理解差异');
|
||||
console.log(' 3. 纳排标准本身存在歧义');
|
||||
console.log('\n💡 建议: 优化Prompt策略,增加宽松/标准/严格三种模式');
|
||||
} else if (accuracyDiff > 10) {
|
||||
console.log('✅ 结论: 国际模型显著优于国内模型(差异>10%)');
|
||||
console.log(' → 问题在于模型能力差异');
|
||||
console.log(' → 国际模型对医学文献的理解更准确');
|
||||
console.log('\n💡 建议: 优先使用GPT-4o或Claude-4.5进行筛选');
|
||||
} else {
|
||||
console.log('✅ 结论: 国内模型优于国际模型(差异>10%)');
|
||||
console.log(' → 可能是国内模型对中文医学术语理解更好');
|
||||
console.log(' → 或者国内模型更符合中国专家的筛选习惯');
|
||||
console.log('\n💡 建议: 继续使用国内模型组合');
|
||||
}
|
||||
|
||||
console.log('='.repeat(80) + '\n');
|
||||
|
||||
// 保存详细报告
|
||||
const report = {
|
||||
testDate: new Date().toISOString(),
|
||||
testCases: testCases.length,
|
||||
domesticModels: modelPairs[0],
|
||||
internationalModels: modelPairs[1],
|
||||
domesticMetrics,
|
||||
internationalMetrics,
|
||||
domesticResults,
|
||||
internationalResults,
|
||||
conclusion: {
|
||||
accuracyDiff,
|
||||
analysis: Math.abs(accuracyDiff) <= 10 ? 'Prompt问题' : (accuracyDiff > 10 ? '国际模型更优' : '国内模型更优')
|
||||
}
|
||||
};
|
||||
|
||||
const reportPath = path.join(__dirname, '../docs/国内外模型对比测试报告.json');
|
||||
fs.writeFileSync(reportPath, JSON.stringify(report, null, 2), 'utf-8');
|
||||
console.log(`📄 详细报告已保存: ${reportPath}\n`);
|
||||
}
|
||||
|
||||
// ========================================
|
||||
// 🚀 6. 执行主流程
|
||||
// ========================================
|
||||
|
||||
async function main() {
|
||||
console.log('\n🚀 开始国内外模型对比测试\n');
|
||||
console.log(`测试样本: ${testCases.length}篇`);
|
||||
console.log(`测试组合: 2组`);
|
||||
console.log(`预计耗时: ${testCases.length * 2 * 15}秒(约${Math.ceil(testCases.length * 2 * 15 / 60)}分钟)\n`);
|
||||
|
||||
// 测试国内模型
|
||||
const domesticResults = await testModelPair(
|
||||
modelPairs[0].name,
|
||||
modelPairs[0].model1,
|
||||
modelPairs[0].model2,
|
||||
testCases
|
||||
);
|
||||
|
||||
// 等待2秒,避免API限流
|
||||
console.log('\n⏳ 等待2秒后测试国际模型...\n');
|
||||
await new Promise(resolve => setTimeout(resolve, 2000));
|
||||
|
||||
// 测试国际模型
|
||||
const internationalResults = await testModelPair(
|
||||
modelPairs[1].name,
|
||||
modelPairs[1].model1,
|
||||
modelPairs[1].model2,
|
||||
testCases
|
||||
);
|
||||
|
||||
// 生成对比报告
|
||||
generateComparisonReport(domesticResults, internationalResults);
|
||||
|
||||
console.log('✅ 测试完成!\n');
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
|
||||
205
backend/scripts/test-stroke-screening-lenient.ts
Normal file
205
backend/scripts/test-stroke-screening-lenient.ts
Normal file
@@ -0,0 +1,205 @@
|
||||
/**
|
||||
* 卒中数据测试 - 宽松模式
|
||||
*
|
||||
* 测试目的:验证宽松Prompt是否能提高初筛准确率
|
||||
*
|
||||
* 策略:
|
||||
* - 宁可多纳入,也不要错过
|
||||
* - 只排除明显不符合的
|
||||
* - 边界情况倾向于纳入
|
||||
*/
|
||||
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import * as XLSX from 'xlsx';
|
||||
import { fileURLToPath } from 'url';
|
||||
import { llmScreeningService } from '../src/modules/asl/services/llmScreeningService.js';
|
||||
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = path.dirname(__filename);
|
||||
|
||||
// 读取PICOS
|
||||
const picoCriteria = {
|
||||
population: '非心源性缺血性卒中患者、亚洲人群',
|
||||
intervention: '抗血小板药物/抗凝药物/溶栓药物(阿司匹林、氯吡格雷、替格瑞洛、达比加群等)',
|
||||
comparison: '安慰剂或常规治疗',
|
||||
outcome: '卒中进展、复发、残疾程度、死亡率、出血事件等',
|
||||
studyDesign: 'SR、RCT、RWE、OBS'
|
||||
};
|
||||
|
||||
const inclusionCriteria = `
|
||||
1. 研究对象为非心源性缺血性卒中患者
|
||||
2. 研究人群为亚洲人群(优先)
|
||||
3. 干预措施为抗血小板/抗凝/溶栓药物
|
||||
4. 对照组为安慰剂或常规治疗
|
||||
5. 研究时间在2020年之后
|
||||
6. 研究设计为SR、RCT、RWE、OBS
|
||||
`;
|
||||
|
||||
const exclusionCriteria = `
|
||||
1. 综述、病例报告、会议摘要
|
||||
2. 动物实验、体外实验
|
||||
3. 研究人群非亚洲人群(除非有特殊价值)
|
||||
4. 研究时间在2020年之前
|
||||
5. 心源性卒中或出血性卒中
|
||||
`;
|
||||
|
||||
// 读取测试案例
|
||||
const excelPath = path.join(
|
||||
__dirname,
|
||||
'../../docs/03-业务模块/ASL-AI智能文献/05-测试文档/03-测试数据/screening/Test Cases.xlsx'
|
||||
);
|
||||
|
||||
const workbook = XLSX.read(fs.readFileSync(excelPath), { type: 'buffer' });
|
||||
const data = XLSX.utils.sheet_to_json(workbook.Sheets[workbook.SheetNames[0]]);
|
||||
|
||||
// 选择测试样本
|
||||
const includedCases = data.filter((row: any) =>
|
||||
row['Decision']?.toString().toLowerCase().includes('include')
|
||||
).slice(0, 2);
|
||||
|
||||
const excludedCases = data.filter((row: any) =>
|
||||
row['Decision']?.toString().toLowerCase().includes('exclude')
|
||||
).slice(0, 3);
|
||||
|
||||
const testCases = [...includedCases, ...excludedCases];
|
||||
|
||||
console.log('\n🚀 开始宽松模式测试\n');
|
||||
console.log(`📊 测试配置:`);
|
||||
console.log(` - 模型组合: DeepSeek-V3 + Qwen-Max`);
|
||||
console.log(` - 筛选风格: 宽松模式(lenient)`);
|
||||
console.log(` - 测试样本: ${testCases.length}篇\n`);
|
||||
|
||||
interface TestResult {
|
||||
caseIndex: number;
|
||||
title: string;
|
||||
humanDecision: string;
|
||||
aiDecision: string;
|
||||
model1Conclusion: string;
|
||||
model2Conclusion: string;
|
||||
isCorrect: boolean;
|
||||
hasConflict: boolean;
|
||||
confidence: number;
|
||||
reason: string;
|
||||
}
|
||||
|
||||
async function runTest() {
|
||||
const results: TestResult[] = [];
|
||||
|
||||
for (let i = 0; i < testCases.length; i++) {
|
||||
const testCase = testCases[i];
|
||||
const title = testCase['title'] || '';
|
||||
const abstract = testCase['abstract'] || '';
|
||||
const humanDecision = testCase['Decision'] || '';
|
||||
|
||||
console.log(`[${i + 1}/${testCases.length}] 正在筛选...`);
|
||||
console.log(`标题: ${title.substring(0, 60)}...`);
|
||||
console.log(`人类决策: ${humanDecision}`);
|
||||
|
||||
try {
|
||||
const screeningResult = await llmScreeningService.dualModelScreening(
|
||||
`test-case-${i + 1}`,
|
||||
title,
|
||||
abstract,
|
||||
picoCriteria,
|
||||
inclusionCriteria,
|
||||
exclusionCriteria,
|
||||
['deepseek-chat', 'qwen-max'],
|
||||
'lenient' // ⭐ 使用宽松模式
|
||||
);
|
||||
|
||||
const normalizedHuman = humanDecision.toLowerCase().includes('include') ? 'include' : 'exclude';
|
||||
const normalizedAI = screeningResult.finalDecision === 'pending' ? 'uncertain' : screeningResult.finalDecision;
|
||||
const isCorrect = normalizedAI === normalizedHuman;
|
||||
|
||||
console.log(`AI决策: ${screeningResult.finalDecision} ${isCorrect ? '✅' : '❌'}`);
|
||||
console.log(`模型一致: ${!screeningResult.hasConflict ? '✅' : '❌'}`);
|
||||
console.log(`置信度: ${screeningResult.deepseek.confidence.toFixed(2)}\n`);
|
||||
|
||||
results.push({
|
||||
caseIndex: i + 1,
|
||||
title: title.substring(0, 100),
|
||||
humanDecision: normalizedHuman,
|
||||
aiDecision: normalizedAI,
|
||||
model1Conclusion: screeningResult.deepseek.conclusion,
|
||||
model2Conclusion: screeningResult.qwen.conclusion,
|
||||
isCorrect,
|
||||
hasConflict: screeningResult.hasConflict,
|
||||
confidence: screeningResult.deepseek.confidence,
|
||||
reason: screeningResult.deepseek.reason
|
||||
});
|
||||
|
||||
} catch (error: any) {
|
||||
console.error(`❌ 筛选失败: ${error.message}\n`);
|
||||
}
|
||||
}
|
||||
|
||||
// 生成对比报告
|
||||
console.log('\n' + '='.repeat(80));
|
||||
console.log('📊 宽松模式测试报告');
|
||||
console.log('='.repeat(80) + '\n');
|
||||
|
||||
const correct = results.filter(r => r.isCorrect).length;
|
||||
const consistent = results.filter(r => !r.hasConflict).length;
|
||||
const avgConfidence = results.reduce((sum, r) => sum + r.confidence, 0) / results.length;
|
||||
|
||||
console.log(`✅ 准确率: ${(correct / results.length * 100).toFixed(1)}% (${correct}/${results.length})`);
|
||||
console.log(`✅ 一致率: ${(consistent / results.length * 100).toFixed(1)}% (${consistent}/${results.length})`);
|
||||
console.log(`✅ 平均置信度: ${avgConfidence.toFixed(2)}\n`);
|
||||
|
||||
// 按人类决策分组统计
|
||||
const includedResults = results.filter(r => r.humanDecision === 'include');
|
||||
const excludedResults = results.filter(r => r.humanDecision === 'exclude');
|
||||
|
||||
const includedCorrect = includedResults.filter(r => r.isCorrect).length;
|
||||
const excludedCorrect = excludedResults.filter(r => r.isCorrect).length;
|
||||
|
||||
console.log('📋 分类准确率:');
|
||||
console.log(` 应纳入文献 (Included): ${(includedCorrect / includedResults.length * 100).toFixed(1)}% (${includedCorrect}/${includedResults.length})`);
|
||||
console.log(` 应排除文献 (Excluded): ${(excludedCorrect / excludedResults.length * 100).toFixed(1)}% (${excludedCorrect}/${excludedResults.length})\n`);
|
||||
|
||||
// 详细案例分析
|
||||
console.log('📝 详细案例分析:\n');
|
||||
results.forEach(r => {
|
||||
const status = r.isCorrect ? '✅ 正确' : '❌ 错误';
|
||||
console.log(`[案例 ${r.caseIndex}] ${status}`);
|
||||
console.log(` 标题: ${r.title}`);
|
||||
console.log(` 人类决策: ${r.humanDecision}`);
|
||||
console.log(` AI决策: ${r.aiDecision}`);
|
||||
console.log(` 模型1: ${r.model1Conclusion}, 模型2: ${r.model2Conclusion}`);
|
||||
console.log(` 置信度: ${r.confidence.toFixed(2)}`);
|
||||
if (!r.isCorrect) {
|
||||
console.log(` AI理由: ${r.reason.substring(0, 150)}...`);
|
||||
}
|
||||
console.log('');
|
||||
});
|
||||
|
||||
// 与标准模式对比
|
||||
console.log('='.repeat(80));
|
||||
console.log('🔄 与标准模式对比\n');
|
||||
console.log('| 指标 | 标准模式 | 宽松模式 | 改进 |');
|
||||
console.log('|------|----------|----------|------|');
|
||||
console.log(`| 准确率 | 60% | ${(correct / results.length * 100).toFixed(1)}% | ${(correct / results.length * 100 - 60).toFixed(1)}% |`);
|
||||
console.log(`| 召回率(Included) | 0% | ${(includedCorrect / includedResults.length * 100).toFixed(1)}% | ${(includedCorrect / includedResults.length * 100).toFixed(1)}% |`);
|
||||
console.log(`| 排除准确率 | 100% | ${(excludedCorrect / excludedResults.length * 100).toFixed(1)}% | ${(excludedCorrect / excludedResults.length * 100 - 100).toFixed(1)}% |`);
|
||||
console.log('\n' + '='.repeat(80));
|
||||
|
||||
// 结论
|
||||
if (correct / results.length >= 0.8) {
|
||||
console.log('\n🎉 宽松模式效果显著!准确率≥80%');
|
||||
console.log('💡 建议: 初筛使用宽松模式,全文复筛使用严格模式');
|
||||
} else if (correct / results.length >= 0.6) {
|
||||
console.log('\n⚠️ 宽松模式有改进,但仍需优化');
|
||||
console.log('💡 建议: 继续调整Prompt或考虑增加Few-shot示例');
|
||||
} else {
|
||||
console.log('\n❌ 宽松模式改进有限');
|
||||
console.log('💡 建议: 问题不在宽松/严格,而在PICOS标准的理解差异');
|
||||
console.log(' → 需要实现用户自定义边界情况功能');
|
||||
}
|
||||
|
||||
console.log('\n✅ 测试完成!\n');
|
||||
}
|
||||
|
||||
runTest().catch(console.error);
|
||||
|
||||
|
||||
293
backend/scripts/test-stroke-screening.ts
Normal file
293
backend/scripts/test-stroke-screening.ts
Normal file
@@ -0,0 +1,293 @@
|
||||
/**
|
||||
* 卒中文献筛选测试脚本
|
||||
* 用真实数据验证泛化能力
|
||||
*/
|
||||
|
||||
import XLSX from 'xlsx';
|
||||
import * as path from 'path';
|
||||
import { fileURLToPath } from 'url';
|
||||
import { llmScreeningService } from '../src/modules/asl/services/llmScreeningService.js';
|
||||
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = path.dirname(__filename);
|
||||
|
||||
// 卒中研究的PICOS(从测试文档读取)
|
||||
const STROKE_PICOS = {
|
||||
population: "非心源性缺血性卒中(NCIS)患者、亚洲人群",
|
||||
intervention: "抗血小板治疗药物(阿司匹林、氯吡格雷、奥扎格雷、贝前列素、西洛他唑、替罗非班、替格瑞洛、吲哚布芬、沙格雷酯、氯吡格雷阿司匹林、双嘧达莫等)或抗凝药物(阿加曲班、asundexian、milvexian、华法林、低分子肝素、肝素等)或溶栓药物(链激酶、尿激酶、阿替普酶、替奈普酶等)",
|
||||
comparison: "安慰剂或常规治疗",
|
||||
outcome: "疗效安全性:卒中进展、神经功能恶化、卒中复发、残疾、死亡、NIHSS评分变化、VTE、痴呆、认知功能减退、疲乏、抑郁等",
|
||||
studyDesign: "系统评价(SR)、随机对照试验(RCT)、真实世界研究(RWE)、观察性研究(OBS)"
|
||||
};
|
||||
|
||||
// 纳入标准
|
||||
const INCLUSION_CRITERIA = `
|
||||
1. 非心源性缺血性卒中、亚洲患者
|
||||
2. 卒中后接受二级预防治疗的患者(Secondary Stroke Prevention, SSP)
|
||||
3. 干预措施为抗血小板、抗凝或溶栓药物
|
||||
4. 报告疗效或安全性结局(卒中进展、复发、残疾、死亡等)
|
||||
5. 研究类型:系统评价、RCT、真实世界研究、观察性研究
|
||||
6. 研究时间:2020年之后的文献
|
||||
7. 包含"二级预防"或"预防复发"或"卒中预防"相关内容
|
||||
8. 涉及抗血小板或抗凝药物
|
||||
`;
|
||||
|
||||
// 排除标准
|
||||
const EXCLUSION_CRITERIA = `
|
||||
1. 心源性卒中患者、非亚洲人群
|
||||
2. 其他类型卒中(非缺血性)
|
||||
3. 用于急性冠脉综合征(ACS)的抗血小板治疗,未明确提及卒中
|
||||
4. 房颤(AF)患者
|
||||
5. 混合人群(包含非卒中患者)
|
||||
6. 病例报告
|
||||
7. 非中英文文献
|
||||
8. 仅包含急性期治疗(如急性期溶栓、取栓),未涉及二级预防
|
||||
`;
|
||||
|
||||
interface TestCase {
|
||||
index: number;
|
||||
pmid: string;
|
||||
title: string;
|
||||
abstract: string;
|
||||
humanDecision: string; // Include/Exclude
|
||||
excludeReason?: string;
|
||||
}
|
||||
|
||||
async function readExcelTestCases(filePath: string, limit: number = 5): Promise<TestCase[]> {
|
||||
console.log(`📖 读取Excel文件: ${filePath}`);
|
||||
|
||||
const workbook = XLSX.readFile(filePath);
|
||||
const sheetName = workbook.SheetNames[0];
|
||||
const worksheet = workbook.Sheets[sheetName];
|
||||
const data = XLSX.utils.sheet_to_json(worksheet);
|
||||
|
||||
console.log(`✅ 读取到 ${data.length} 条数据`);
|
||||
|
||||
// 分别提取Included和Excluded的案例(混合测试)
|
||||
const includedCases: any[] = [];
|
||||
const excludedCases: any[] = [];
|
||||
|
||||
for (const row of data as any[]) {
|
||||
// 跳过没有标题或摘要的行
|
||||
if (!row['title'] || !row['abstract']) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (row['Decision'] && row['Decision'].toLowerCase().includes('include')) {
|
||||
includedCases.push(row);
|
||||
} else if (row['Decision'] && row['Decision'].toLowerCase().includes('exclude')) {
|
||||
excludedCases.push(row);
|
||||
}
|
||||
}
|
||||
|
||||
console.log(` - Included案例: ${includedCases.length}条`);
|
||||
console.log(` - Excluded案例: ${excludedCases.length}条`);
|
||||
|
||||
// 混合选择:2个Included + 3个Excluded
|
||||
const testCases: TestCase[] = [];
|
||||
|
||||
// 取前2个Included
|
||||
for (let i = 0; i < Math.min(2, includedCases.length); i++) {
|
||||
const row = includedCases[i];
|
||||
testCases.push({
|
||||
index: testCases.length + 1,
|
||||
pmid: row['key'] || `test-${testCases.length + 1}`,
|
||||
title: row['title'] || '',
|
||||
abstract: row['abstract'] || '',
|
||||
humanDecision: row['Decision'] || 'Unknown',
|
||||
excludeReason: row['Reason for excluded'] || undefined
|
||||
});
|
||||
}
|
||||
|
||||
// 取前3个Excluded
|
||||
for (let i = 0; i < Math.min(3, excludedCases.length); i++) {
|
||||
const row = excludedCases[i];
|
||||
testCases.push({
|
||||
index: testCases.length + 1,
|
||||
pmid: row['key'] || `test-${testCases.length + 1}`,
|
||||
title: row['title'] || '',
|
||||
abstract: row['abstract'] || '',
|
||||
humanDecision: row['Decision'] || 'Unknown',
|
||||
excludeReason: row['Reason for excluded'] || undefined
|
||||
});
|
||||
}
|
||||
|
||||
console.log(`✅ 提取 ${testCases.length} 条有效测试案例 (${testCases.filter(t => t.humanDecision.toLowerCase().includes('include')).length} Included + ${testCases.filter(t => t.humanDecision.toLowerCase().includes('exclude')).length} Excluded)\n`);
|
||||
return testCases;
|
||||
}
|
||||
|
||||
async function testSingleLiterature(
|
||||
testCase: TestCase,
|
||||
models: [string, string]
|
||||
): Promise<{
|
||||
testCase: TestCase;
|
||||
aiDecision: string;
|
||||
isCorrect: boolean;
|
||||
hasConsensus: boolean;
|
||||
details: any;
|
||||
}> {
|
||||
console.log(`\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━`);
|
||||
console.log(`[${testCase.index}] PMID: ${testCase.pmid}`);
|
||||
console.log(`标题: ${testCase.title.substring(0, 100)}...`);
|
||||
console.log(`人类判断: ${testCase.humanDecision}`);
|
||||
|
||||
try {
|
||||
const startTime = Date.now();
|
||||
|
||||
const result = await llmScreeningService.dualModelScreening(
|
||||
testCase.pmid || `test-${testCase.index}`,
|
||||
testCase.title,
|
||||
testCase.abstract,
|
||||
STROKE_PICOS,
|
||||
INCLUSION_CRITERIA,
|
||||
EXCLUSION_CRITERIA,
|
||||
models
|
||||
);
|
||||
|
||||
const duration = Date.now() - startTime;
|
||||
|
||||
// 映射AI决策到Include/Exclude
|
||||
let aiDecision = 'Unknown';
|
||||
if (result.finalDecision === 'include') {
|
||||
aiDecision = 'Include';
|
||||
} else if (result.finalDecision === 'exclude') {
|
||||
aiDecision = 'Exclude';
|
||||
} else {
|
||||
aiDecision = 'Uncertain';
|
||||
}
|
||||
|
||||
// 标准化比较(处理Included/Include, Excluded/Exclude的差异)
|
||||
const normalizeDecision = (decision: string) => {
|
||||
const lower = decision.toLowerCase();
|
||||
if (lower.includes('include')) return 'include';
|
||||
if (lower.includes('exclude')) return 'exclude';
|
||||
return lower;
|
||||
};
|
||||
|
||||
const isCorrect = normalizeDecision(aiDecision) === normalizeDecision(testCase.humanDecision);
|
||||
|
||||
console.log(`AI判断: ${aiDecision}`);
|
||||
console.log(`DeepSeek: ${result.deepseek.conclusion} (置信度: ${result.deepseek.confidence})`);
|
||||
console.log(`Qwen: ${result.qwen.conclusion} (置信度: ${result.qwen.confidence})`);
|
||||
console.log(`一致性: ${result.hasConflict ? '❌ 冲突' : '✅ 一致'}`);
|
||||
console.log(`结果: ${isCorrect ? '✅ 正确' : '❌ 错误'}`);
|
||||
console.log(`耗时: ${duration}ms`);
|
||||
|
||||
if (!isCorrect) {
|
||||
console.log(`\n❌ 判断错误!`);
|
||||
console.log(`期望: ${testCase.humanDecision}`);
|
||||
console.log(`实际: ${aiDecision}`);
|
||||
if (testCase.excludeReason) {
|
||||
console.log(`人类排除理由: ${testCase.excludeReason}`);
|
||||
}
|
||||
console.log(`DeepSeek理由: ${result.deepseek.reason}`);
|
||||
console.log(`Qwen理由: ${result.qwen.reason}`);
|
||||
}
|
||||
|
||||
return {
|
||||
testCase,
|
||||
aiDecision,
|
||||
isCorrect,
|
||||
hasConsensus: !result.hasConflict,
|
||||
details: result
|
||||
};
|
||||
|
||||
} catch (error) {
|
||||
console.error(`❌ 测试失败:`, error);
|
||||
return {
|
||||
testCase,
|
||||
aiDecision: 'Error',
|
||||
isCorrect: false,
|
||||
hasConsensus: false,
|
||||
details: null
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
async function main() {
|
||||
console.log('\n🔬 卒中文献筛选测试');
|
||||
console.log('=' .repeat(60));
|
||||
console.log('目的: 验证系统对不同研究主题的泛化能力\n');
|
||||
|
||||
// 读取测试数据
|
||||
const excelPath = path.join(__dirname, '../docs/03-业务模块/ASL-AI智能文献/05-测试文档/03-测试数据/screening/Test Cases.xlsx');
|
||||
|
||||
let testCases: TestCase[];
|
||||
try {
|
||||
testCases = await readExcelTestCases(excelPath, 5);
|
||||
} catch (error: any) {
|
||||
console.error('❌ 读取Excel失败,尝试使用绝对路径...');
|
||||
const absolutePath = 'D:\\MyCursor\\AIclinicalresearch\\docs\\03-业务模块\\ASL-AI智能文献\\05-测试文档\\03-测试数据\\screening\\Test Cases.xlsx';
|
||||
testCases = await readExcelTestCases(absolutePath, 5);
|
||||
}
|
||||
|
||||
if (testCases.length === 0) {
|
||||
console.error('❌ 没有读取到有效的测试案例');
|
||||
return;
|
||||
}
|
||||
|
||||
console.log('📋 PICOS标准:');
|
||||
console.log(`P: ${STROKE_PICOS.population}`);
|
||||
console.log(`I: ${STROKE_PICOS.intervention.substring(0, 80)}...`);
|
||||
console.log(`C: ${STROKE_PICOS.comparison}`);
|
||||
console.log(`O: ${STROKE_PICOS.outcome.substring(0, 80)}...`);
|
||||
console.log(`S: ${STROKE_PICOS.studyDesign}`);
|
||||
|
||||
console.log('\n🚀 开始测试...');
|
||||
console.log(`测试样本数: ${testCases.length}`);
|
||||
console.log(`测试模型: DeepSeek-V3 + Qwen-Max\n`);
|
||||
|
||||
const results: any[] = [];
|
||||
|
||||
for (const testCase of testCases) {
|
||||
const result = await testSingleLiterature(testCase, ['deepseek-chat', 'qwen-max']);
|
||||
results.push(result);
|
||||
|
||||
// 避免API限流
|
||||
if (testCases.indexOf(testCase) < testCases.length - 1) {
|
||||
await new Promise(resolve => setTimeout(resolve, 2000));
|
||||
}
|
||||
}
|
||||
|
||||
// 统计结果
|
||||
console.log('\n\n' + '='.repeat(60));
|
||||
console.log('📊 测试结果统计');
|
||||
console.log('='.repeat(60));
|
||||
|
||||
const totalTests = results.length;
|
||||
const correctCount = results.filter(r => r.isCorrect).length;
|
||||
const consensusCount = results.filter(r => r.hasConsensus).length;
|
||||
const accuracy = totalTests > 0 ? (correctCount / totalTests * 100).toFixed(1) : '0.0';
|
||||
const consensusRate = totalTests > 0 ? (consensusCount / totalTests * 100).toFixed(1) : '0.0';
|
||||
|
||||
console.log(`\n总测试数: ${totalTests}`);
|
||||
console.log(`正确判断: ${correctCount}`);
|
||||
console.log(`准确率: ${accuracy}% ${parseFloat(accuracy) >= 85 ? '✅' : '❌'} (目标≥85%)`);
|
||||
console.log(`双模型一致率: ${consensusRate}% ${parseFloat(consensusRate) >= 80 ? '✅' : '❌'} (目标≥80%)`);
|
||||
|
||||
console.log('\n📋 详细结果:');
|
||||
results.forEach((r, i) => {
|
||||
console.log(`${i + 1}. ${r.isCorrect ? '✅' : '❌'} PMID:${r.testCase.pmid} - 期望:${r.testCase.humanDecision}, AI:${r.aiDecision}`);
|
||||
});
|
||||
|
||||
// 结论
|
||||
console.log('\n' + '='.repeat(60));
|
||||
console.log('🎯 结论');
|
||||
console.log('='.repeat(60));
|
||||
|
||||
if (parseFloat(accuracy) >= 85) {
|
||||
console.log('✅ 测试通过!系统对卒中研究的筛选准确率达标!');
|
||||
console.log('📝 建议: 可以继续开发PICOS配置界面,实现MVP。');
|
||||
} else if (parseFloat(accuracy) >= 60) {
|
||||
console.log('⚠️ 准确率中等。系统有一定泛化能力,但需要优化。');
|
||||
console.log('📝 建议: 分析错误案例,优化Prompt模板。');
|
||||
} else {
|
||||
console.log('❌ 准确率较低。当前Prompt对卒中研究泛化能力不足。');
|
||||
console.log('📝 建议: 需要重新设计Prompt策略,或考虑用户自定义方案。');
|
||||
}
|
||||
|
||||
console.log('='.repeat(60) + '\n');
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
|
||||
99
backend/scripts/verify-llm-models.ts
Normal file
99
backend/scripts/verify-llm-models.ts
Normal file
@@ -0,0 +1,99 @@
|
||||
/**
|
||||
* LLM模型验证脚本
|
||||
* 用于验证实际接入的是哪个版本的模型
|
||||
*/
|
||||
|
||||
import { LLMFactory } from '../src/common/llm/adapters/LLMFactory.js';
|
||||
import { logger } from '../src/common/logging/index.js';
|
||||
|
||||
const TEST_PROMPT = "请用一句话简单介绍你自己,包括你的模型名称和版本。";
|
||||
|
||||
async function verifyModel(modelType: string, expectedModel: string) {
|
||||
console.log(`\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━`);
|
||||
console.log(`🔍 验证模型: ${modelType}`);
|
||||
console.log(`━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━`);
|
||||
|
||||
try {
|
||||
const adapter = LLMFactory.getAdapter(modelType as any);
|
||||
console.log(`✅ 适配器创建成功`);
|
||||
console.log(` 模型名称: ${adapter.modelName}`);
|
||||
console.log(` 期望模型: ${expectedModel}`);
|
||||
console.log(` 匹配状态: ${adapter.modelName === expectedModel ? '✅ 正确' : '❌ 不匹配'}`);
|
||||
|
||||
console.log(`\n🚀 发送测试请求...`);
|
||||
const startTime = Date.now();
|
||||
|
||||
const response = await adapter.chat([
|
||||
{ role: 'user', content: TEST_PROMPT }
|
||||
]);
|
||||
|
||||
const duration = Date.now() - startTime;
|
||||
|
||||
console.log(`\n📊 响应结果:`);
|
||||
console.log(` 实际返回模型: ${response.model}`);
|
||||
console.log(` 响应时间: ${duration}ms`);
|
||||
console.log(` Token使用:`);
|
||||
console.log(` - 输入: ${response.usage?.promptTokens || 0}`);
|
||||
console.log(` - 输出: ${response.usage?.completionTokens || 0}`);
|
||||
console.log(` - 总计: ${response.usage?.totalTokens || 0}`);
|
||||
console.log(`\n💬 模型回复:`);
|
||||
console.log(` "${response.content}"`);
|
||||
|
||||
// 验证是否匹配
|
||||
if (response.model === expectedModel) {
|
||||
console.log(`\n✅ 验证通过!实际调用的就是 ${expectedModel}`);
|
||||
return true;
|
||||
} else {
|
||||
console.log(`\n⚠️ 警告!期望 ${expectedModel},实际返回 ${response.model}`);
|
||||
return false;
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
console.error(`\n❌ 验证失败:`, error);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
async function main() {
|
||||
console.log('\n🔬 ASL模块LLM模型验证工具');
|
||||
console.log('=' .repeat(60));
|
||||
console.log('用途: 验证实际接入的模型版本是否正确\n');
|
||||
|
||||
const models = [
|
||||
{ type: 'deepseek-v3', expected: 'deepseek-chat', description: 'DeepSeek-V3' },
|
||||
{ type: 'qwen3-72b', expected: 'qwen-max', description: 'Qwen最新最强模型' },
|
||||
];
|
||||
|
||||
const results: { model: string; passed: boolean }[] = [];
|
||||
|
||||
for (const model of models) {
|
||||
const passed = await verifyModel(model.type, model.expected);
|
||||
results.push({ model: model.description, passed });
|
||||
|
||||
// 避免API限流
|
||||
await new Promise(resolve => setTimeout(resolve, 2000));
|
||||
}
|
||||
|
||||
// 总结
|
||||
console.log('\n\n' + '='.repeat(60));
|
||||
console.log('📊 验证总结');
|
||||
console.log('='.repeat(60));
|
||||
|
||||
results.forEach(r => {
|
||||
console.log(`${r.passed ? '✅' : '❌'} ${r.model}: ${r.passed ? '通过' : '未通过'}`);
|
||||
});
|
||||
|
||||
const allPassed = results.every(r => r.passed);
|
||||
|
||||
if (allPassed) {
|
||||
console.log('\n🎉 所有模型验证通过!');
|
||||
} else {
|
||||
console.log('\n⚠️ 部分模型验证未通过,请检查配置!');
|
||||
}
|
||||
|
||||
console.log('='.repeat(60) + '\n');
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user