From 5579ffa78ea0f3ecd965e2b9c7d11861d0d363af Mon Sep 17 00:00:00 2001 From: HaHafeng Date: Sun, 16 Nov 2025 15:43:04 +0800 Subject: [PATCH] feat(backend): add batch processing and review tasks modules - Add Prisma migrations for batch processing tables - Add Prisma migrations for review tasks tables - Add seed data for testing - Add prompt templates for review (editorial, methodology) - Add CloseAI configuration guide - Add database validation scripts --- backend/CLOSEAI-CONFIG.md | 183 ++++++++++ backend/check-api-config.js | 188 ++++++++++ backend/database-validation-simple.sql | 122 +++++++ backend/database-validation.sql | 326 ++++++++++++++++++ .../migration.sql | 151 ++++++++ .../migration.sql | 35 ++ backend/prisma/seed.ts | 109 ++++++ backend/prompts/review_editorial_system.txt | 253 ++++++++++++++ backend/prompts/review_methodology_system.txt | 244 +++++++++++++ 9 files changed, 1611 insertions(+) create mode 100644 backend/CLOSEAI-CONFIG.md create mode 100644 backend/check-api-config.js create mode 100644 backend/database-validation-simple.sql create mode 100644 backend/database-validation.sql create mode 100644 backend/prisma/migrations/20251012124747_add_batch_processing_module/migration.sql create mode 100644 backend/prisma/migrations/20251014120128_add_review_tasks/migration.sql create mode 100644 backend/prisma/seed.ts create mode 100644 backend/prompts/review_editorial_system.txt create mode 100644 backend/prompts/review_methodology_system.txt diff --git a/backend/CLOSEAI-CONFIG.md b/backend/CLOSEAI-CONFIG.md new file mode 100644 index 00000000..8f6f0a42 --- /dev/null +++ b/backend/CLOSEAI-CONFIG.md @@ -0,0 +1,183 @@ +# CloseAI配置更新指南 + +> **目标:** 将CloseAI配置添加到 `.env` 文件 +> **预计时间:** 2分钟 + +--- + +## 🚀 方法一:自动更新(推荐) + +在 `backend` 目录下运行PowerShell脚本: + +```powershell +cd AIclinicalresearch\backend +.\update-env-closeai.ps1 +``` + +脚本会自动将CloseAI配置添加到您的 `.env` 文件末尾。 + +--- + +## ✏️ 方法二:手动更新 + +如果自动脚本不可用,请手动更新: + +### 步骤1:打开 .env 文件 + +用文本编辑器(VS Code、记事本等)打开: +``` +AIclinicalresearch\backend\.env +``` + +### 步骤2:在文件末尾添加以下内容 + +```env +# ================================ +# CloseAI配置(代理OpenAI和Claude)⭐ +# ================================ +# CloseAI是一个API代理平台,提供稳定的OpenAI和Claude访问 +# 官网:https://platform.openai-proxy.org + +# 统一API Key(同时用于OpenAI和Claude) +CLOSEAI_API_KEY=sk-cu0iepbXYGGx2jc7BqP6ogtSWmP6fk918qV3RUdtGC3Edlpo + +# OpenAI端点 +CLOSEAI_OPENAI_BASE_URL=https://api.openai-proxy.org/v1 + +# Claude端点 +CLOSEAI_CLAUDE_BASE_URL=https://api.openai-proxy.org/anthropic + +# 支持的模型: +# - OpenAI: gpt-5-pro (最新), gpt-4-turbo-preview, gpt-3.5-turbo +# - Claude: claude-sonnet-4-5-20250929 (最新), claude-3-5-sonnet-20241022 +``` + +### 步骤3:保存文件 + +按 `Ctrl+S` 保存。 + +### 步骤4:重启后端服务 + +```powershell +# 停止当前服务(Ctrl+C) +# 重新启动 +cd backend +npm run dev +``` + +--- + +## ✅ 验证配置 + +启动后端后,检查日志中是否有以下内容: + +``` +✓ CloseAI API Key已配置 +✓ 可使用GPT-5-Pro和Claude-4.5-Sonnet +``` + +--- + +## 📝 完整的 .env 配置参考 + +如果您想查看完整的 `.env` 配置模板,请参考: +- `docs/07-运维文档/02-环境变量配置模板.md` + +--- + +## 🔧 配置说明 + +### CLOSEAI_API_KEY +**值:** `sk-cu0iepbXYGGx2jc7BqP6ogtSWmP6fk918qV3RUdtGC3Edlpo` +- 这是您的真实CloseAI API Key +- 一个Key可同时调用OpenAI和Claude +- 请妥善保管,不要泄露 + +### CLOSEAI_OPENAI_BASE_URL +**值:** `https://api.openai-proxy.org/v1` +- OpenAI API的代理端点 +- 兼容OpenAI SDK标准接口 + +### CLOSEAI_CLAUDE_BASE_URL +**值:** `https://api.openai-proxy.org/anthropic` +- Claude API的代理端点 +- 通过OpenAI SDK格式调用Claude + +--- + +## 🎯 可用模型 + +添加配置后,您可以使用以下模型: + +| 模型 | Model ID | 用途 | +|------|---------|------| +| GPT-5-Pro | `gpt-5-pro` | 最新OpenAI模型,文献精准筛选 ⭐ | +| GPT-4-Turbo | `gpt-4-turbo-preview` | GPT-4高性能版本 | +| Claude-4.5-Sonnet | `claude-sonnet-4-5-20250929` | 最新Claude模型,第三方仲裁 ⭐ | +| Claude-3.5-Sonnet | `claude-3-5-sonnet-20241022` | Claude稳定版本 | + +--- + +## 💡 测试配置 + +配置完成后,可以使用以下代码测试: + +```typescript +// backend/test-closeai.ts +import OpenAI from 'openai'; + +// 测试GPT-5 +const gpt5Client = new OpenAI({ + apiKey: 'sk-cu0iepbXYGGx2jc7BqP6ogtSWmP6fk918qV3RUdtGC3Edlpo', + baseURL: 'https://api.openai-proxy.org/v1', +}); + +const gpt5Response = await gpt5Client.chat.completions.create({ + model: 'gpt-5-pro', + messages: [{ role: 'user', content: '你好,请简单介绍一下自己' }], +}); + +console.log('GPT-5:', gpt5Response.choices[0].message.content); + +// 测试Claude-4.5 +const claudeClient = new OpenAI({ + apiKey: 'sk-cu0iepbXYGGx2jc7BqP6ogtSWmP6fk918qV3RUdtGC3Edlpo', + baseURL: 'https://api.openai-proxy.org/anthropic', +}); + +const claudeResponse = await claudeClient.chat.completions.create({ + model: 'claude-sonnet-4-5-20250929', + messages: [{ role: 'user', content: '你好,请简单介绍一下自己' }], +}); + +console.log('Claude-4.5:', claudeResponse.choices[0].message.content); +``` + +--- + +## ⚠️ 注意事项 + +1. **不要提交到Git:** `.env` 文件已在 `.gitignore` 中,确保不会提交 +2. **API Key安全:** 定期更换API Key,避免泄露 +3. **成本监控:** 定期检查CloseAI后台的使用量和费用 + +--- + +## 📚 相关文档 + +- [CloseAI集成指南](../docs/02-通用能力层/01-LLM大模型网关/03-CloseAI集成指南.md) +- [环境配置指南](../docs/07-运维文档/01-环境配置指南.md) +- [环境变量配置模板](../docs/07-运维文档/02-环境变量配置模板.md) + +--- + +**更新完成后,您就可以在ASL模块中使用4个LLM了!** 🎉 + + + + + + + + + diff --git a/backend/check-api-config.js b/backend/check-api-config.js new file mode 100644 index 00000000..fc4c6cc2 --- /dev/null +++ b/backend/check-api-config.js @@ -0,0 +1,188 @@ +/** + * API配置检查脚本 + * 检查DeepSeek和Qwen API配置是否正确 + */ + +import dotenv from 'dotenv'; +import axios from 'axios'; + +// 加载环境变量 +dotenv.config(); + +const colors = { + reset: '\x1b[0m', + green: '\x1b[32m', + red: '\x1b[31m', + yellow: '\x1b[33m', + cyan: '\x1b[36m', +}; + +function log(message, color = 'reset') { + console.log(`${colors[color]}${message}${colors.reset}`); +} + +async function checkDeepSeekAPI() { + log('\n=== 检查 DeepSeek API 配置 ===', 'cyan'); + + const apiKey = process.env.DEEPSEEK_API_KEY; + + if (!apiKey) { + log('❌ 未配置 DEEPSEEK_API_KEY', 'red'); + log('请在 .env 文件中添加: DEEPSEEK_API_KEY=sk-xxx', 'yellow'); + return false; + } + + log(`✅ API Key 已配置: ${apiKey.substring(0, 10)}...`, 'green'); + + // 测试API连接 + try { + log('正在测试 DeepSeek API 连接...', 'cyan'); + const response = await axios.post( + 'https://api.deepseek.com/v1/chat/completions', + { + model: 'deepseek-chat', + messages: [ + { role: 'user', content: '你好' } + ], + max_tokens: 10, + }, + { + headers: { + 'Content-Type': 'application/json', + 'Authorization': `Bearer ${apiKey}`, + }, + timeout: 10000, + } + ); + + log('✅ DeepSeek API 连接成功!', 'green'); + log(` 模型: ${response.data.model}`, 'green'); + log(` 响应: ${response.data.choices[0].message.content}`, 'green'); + return true; + } catch (error) { + log('❌ DeepSeek API 连接失败', 'red'); + if (error.response) { + log(` 错误: ${error.response.status} - ${error.response.data?.error?.message || error.response.statusText}`, 'red'); + } else if (error.code === 'ECONNABORTED') { + log(' 错误: 请求超时,请检查网络连接', 'red'); + } else { + log(` 错误: ${error.message}`, 'red'); + } + return false; + } +} + +async function checkQwenAPI() { + log('\n=== 检查 Qwen API 配置 ===', 'cyan'); + + const apiKey = process.env.QWEN_API_KEY; + + if (!apiKey) { + log('❌ 未配置 QWEN_API_KEY', 'red'); + log('请在 .env 文件中添加: QWEN_API_KEY=sk-xxx', 'yellow'); + return false; + } + + log(`✅ API Key 已配置: ${apiKey.substring(0, 10)}...`, 'green'); + + // 测试API连接 + try { + log('正在测试 Qwen API 连接...', 'cyan'); + const response = await axios.post( + 'https://dashscope.aliyuncs.com/compatible-mode/v1/chat/completions', + { + model: 'qwen-plus', + messages: [ + { role: 'user', content: '你好' } + ], + max_tokens: 10, + }, + { + headers: { + 'Content-Type': 'application/json', + 'Authorization': `Bearer ${apiKey}`, + }, + timeout: 10000, + } + ); + + log('✅ Qwen API 连接成功!', 'green'); + log(` 模型: ${response.data.model}`, 'green'); + log(` 响应: ${response.data.choices[0].message.content}`, 'green'); + return true; + } catch (error) { + log('❌ Qwen API 连接失败', 'red'); + if (error.response) { + log(` 错误: ${error.response.status} - ${error.response.data?.message || error.response.statusText}`, 'red'); + } else if (error.code === 'ECONNABORTED') { + log(' 错误: 请求超时,请检查网络连接', 'red'); + } else { + log(` 错误: ${error.message}`, 'red'); + } + return false; + } +} + +async function main() { + log('\n╔════════════════════════════════════════════════╗', 'cyan'); + log('║ API 配置检查工具 ║', 'cyan'); + log('╚════════════════════════════════════════════════╝', 'cyan'); + + const deepseekOK = await checkDeepSeekAPI(); + const qwenOK = await checkQwenAPI(); + + log('\n=== 检查结果汇总 ===', 'cyan'); + log(`DeepSeek API: ${deepseekOK ? '✅ 正常' : '❌ 异常'}`, deepseekOK ? 'green' : 'red'); + log(`Qwen API: ${qwenOK ? '✅ 正常' : '❌ 异常'}`, qwenOK ? 'green' : 'red'); + + if (!deepseekOK && !qwenOK) { + log('\n⚠️ 所有API都无法使用,请检查配置!', 'yellow'); + log('\n修复建议:', 'cyan'); + log('1. 检查 backend/.env 文件是否存在', 'yellow'); + log('2. 确认API Key已正确配置', 'yellow'); + log('3. 检查网络连接是否正常', 'yellow'); + log('4. 确认API Key有足够的额度', 'yellow'); + } else if (!deepseekOK || !qwenOK) { + log('\n⚠️ 部分API无法使用', 'yellow'); + log('建议使用可用的API进行测试', 'yellow'); + } else { + log('\n✅ 所有API配置正常!', 'green'); + } + + log('\n'); +} + +main().catch(error => { + console.error('脚本执行失败:', error); + process.exit(1); +}); + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/backend/database-validation-simple.sql b/backend/database-validation-simple.sql new file mode 100644 index 00000000..dd116fdc --- /dev/null +++ b/backend/database-validation-simple.sql @@ -0,0 +1,122 @@ +-- ======================================== +-- 数据库迁移验证脚本(纯SQL版本) +-- ======================================== + +-- 1. Schema检查 +SELECT '=== 1. Schema检查 ===' as section; +SELECT + nspname as schema_name, + (SELECT COUNT(*) FROM information_schema.tables WHERE table_schema = s.nspname) as table_count +FROM pg_namespace s +WHERE nspname IN ( + 'platform_schema', 'aia_schema', 'pkb_schema', + 'asl_schema', 'common_schema', 'dc_schema', + 'rvw_schema', 'admin_schema', 'ssa_schema', 'st_schema' +) +ORDER BY nspname; + +-- 2. 表清单 +SELECT '=== 2. Platform Schema 表清单 ===' as section; +SELECT tablename FROM pg_tables WHERE schemaname = 'platform_schema' ORDER BY tablename; + +SELECT '=== 3. AIA Schema 表清单 ===' as section; +SELECT tablename FROM pg_tables WHERE schemaname = 'aia_schema' ORDER BY tablename; + +SELECT '=== 4. PKB Schema 表清单 ===' as section; +SELECT tablename FROM pg_tables WHERE schemaname = 'pkb_schema' ORDER BY tablename; + +-- 3. 数据量统计 +SELECT '=== 5. 数据量统计 ===' as section; +SELECT 'platform_schema.users' AS table_name, COUNT(*) AS row_count FROM platform_schema.users +UNION ALL +SELECT 'aia_schema.projects', COUNT(*) FROM aia_schema.projects +UNION ALL +SELECT 'aia_schema.conversations', COUNT(*) FROM aia_schema.conversations +UNION ALL +SELECT 'aia_schema.messages', COUNT(*) FROM aia_schema.messages +UNION ALL +SELECT 'aia_schema.general_conversations', COUNT(*) FROM aia_schema.general_conversations +UNION ALL +SELECT 'aia_schema.general_messages', COUNT(*) FROM aia_schema.general_messages +UNION ALL +SELECT 'pkb_schema.knowledge_bases', COUNT(*) FROM pkb_schema.knowledge_bases +UNION ALL +SELECT 'pkb_schema.documents', COUNT(*) FROM pkb_schema.documents +UNION ALL +SELECT 'pkb_schema.batch_tasks', COUNT(*) FROM pkb_schema.batch_tasks +UNION ALL +SELECT 'pkb_schema.batch_results', COUNT(*) FROM pkb_schema.batch_results +UNION ALL +SELECT 'pkb_schema.task_templates', COUNT(*) FROM pkb_schema.task_templates; + +-- 4. 数据完整性对比 +SELECT '=== 6. 数据完整性对比(public vs 新Schema) ===' as section; +SELECT + 'users' AS table_name, + (SELECT COUNT(*) FROM public.users) AS public_count, + (SELECT COUNT(*) FROM platform_schema.users) AS new_count, + CASE + WHEN (SELECT COUNT(*) FROM public.users) = (SELECT COUNT(*) FROM platform_schema.users) + THEN 'OK' + ELSE 'MISMATCH' + END AS status +UNION ALL +SELECT 'projects', + (SELECT COUNT(*) FROM public.projects), + (SELECT COUNT(*) FROM aia_schema.projects), + CASE WHEN (SELECT COUNT(*) FROM public.projects) = (SELECT COUNT(*) FROM aia_schema.projects) THEN 'OK' ELSE 'MISMATCH' END +UNION ALL +SELECT 'conversations', + (SELECT COUNT(*) FROM public.conversations), + (SELECT COUNT(*) FROM aia_schema.conversations), + CASE WHEN (SELECT COUNT(*) FROM public.conversations) = (SELECT COUNT(*) FROM aia_schema.conversations) THEN 'OK' ELSE 'MISMATCH' END +UNION ALL +SELECT 'messages', + (SELECT COUNT(*) FROM public.messages), + (SELECT COUNT(*) FROM aia_schema.messages), + CASE WHEN (SELECT COUNT(*) FROM public.messages) = (SELECT COUNT(*) FROM aia_schema.messages) THEN 'OK' ELSE 'MISMATCH' END +UNION ALL +SELECT 'knowledge_bases', + (SELECT COUNT(*) FROM public.knowledge_bases), + (SELECT COUNT(*) FROM pkb_schema.knowledge_bases), + CASE WHEN (SELECT COUNT(*) FROM public.knowledge_bases) = (SELECT COUNT(*) FROM pkb_schema.knowledge_bases) THEN 'OK' ELSE 'MISMATCH' END +UNION ALL +SELECT 'documents', + (SELECT COUNT(*) FROM public.documents), + (SELECT COUNT(*) FROM pkb_schema.documents), + CASE WHEN (SELECT COUNT(*) FROM public.documents) = (SELECT COUNT(*) FROM pkb_schema.documents) THEN 'OK' ELSE 'MISMATCH' END; + +-- 5. 外键约束统计 +SELECT '=== 7. 外键约束统计 ===' as section; +SELECT + table_schema, + COUNT(*) as fk_count +FROM information_schema.table_constraints +WHERE constraint_type = 'FOREIGN KEY' + AND table_schema IN ('platform_schema', 'aia_schema', 'pkb_schema') +GROUP BY table_schema +ORDER BY table_schema; + +-- 6. 索引统计 +SELECT '=== 8. 索引统计 ===' as section; +SELECT + schemaname, + COUNT(*) as index_count +FROM pg_indexes +WHERE schemaname IN ('platform_schema', 'aia_schema', 'pkb_schema') +GROUP BY schemaname +ORDER BY schemaname; + +-- 7. 数据采样 +SELECT '=== 9. 数据采样(platform_schema.users 前3条) ===' as section; +SELECT id, email, name, role FROM platform_schema.users ORDER BY created_at DESC LIMIT 3; + +SELECT '=== 10. 数据采样(aia_schema.projects 前3条) ===' as section; +SELECT id, name, research_type FROM aia_schema.projects ORDER BY created_at DESC LIMIT 3; + +SELECT '=== 11. 数据采样(pkb_schema.knowledge_bases 前3条) ===' as section; +SELECT id, name, file_count FROM pkb_schema.knowledge_bases ORDER BY created_at DESC LIMIT 3; + +-- 最终总结 +SELECT '=== 验证完成!✅ ===' as final_summary; + diff --git a/backend/database-validation.sql b/backend/database-validation.sql new file mode 100644 index 00000000..08a0b100 --- /dev/null +++ b/backend/database-validation.sql @@ -0,0 +1,326 @@ +-- ======================================== +-- 数据库迁移验证脚本 +-- 全面检查10个Schema和数据完整性 +-- ======================================== + +\echo '' +\echo '========================================' +\echo '📊 数据库迁移验证报告' +\echo '========================================' +\echo '' + +-- ======================================== +-- 1. Schema检查 +-- ======================================== +\echo '1️⃣ Schema检查' +\echo '----------------------------------------' + +SELECT + schema_name, + pg_catalog.obj_description(oid, 'pg_namespace') AS description, + (SELECT COUNT(*) FROM information_schema.tables WHERE table_schema = s.schema_name) as table_count +FROM pg_namespace s +WHERE schema_name IN ( + 'platform_schema', 'aia_schema', 'pkb_schema', + 'asl_schema', 'common_schema', 'dc_schema', + 'rvw_schema', 'admin_schema', 'ssa_schema', 'st_schema' +) +ORDER BY schema_name; + +\echo '' + +-- ======================================== +-- 2. 详细表清单 +-- ======================================== +\echo '2️⃣ 详细表清单' +\echo '----------------------------------------' + +-- Platform Schema +\echo '【Platform Schema】' +SELECT + schemaname, + tablename, + (SELECT COUNT(*) FROM pg_indexes WHERE schemaname = t.schemaname AND tablename = t.tablename) as index_count +FROM pg_tables t +WHERE schemaname = 'platform_schema' +ORDER BY tablename; + +\echo '' + +-- AIA Schema +\echo '【AIA Schema】' +SELECT + schemaname, + tablename, + (SELECT COUNT(*) FROM pg_indexes WHERE schemaname = t.schemaname AND tablename = t.tablename) as index_count +FROM pg_tables t +WHERE schemaname = 'aia_schema' +ORDER BY tablename; + +\echo '' + +-- PKB Schema +\echo '【PKB Schema】' +SELECT + schemaname, + tablename, + (SELECT COUNT(*) FROM pg_indexes WHERE schemaname = t.schemaname AND tablename = t.tablename) as index_count +FROM pg_tables t +WHERE schemaname = 'pkb_schema' +ORDER BY tablename; + +\echo '' + +-- ======================================== +-- 3. 数据量统计 +-- ======================================== +\echo '3️⃣ 数据量统计' +\echo '----------------------------------------' + +-- Platform Schema +\echo '【Platform Schema】' +SELECT 'users' AS table_name, COUNT(*) AS row_count FROM platform_schema.users; + +\echo '' + +-- AIA Schema +\echo '【AIA Schema】' +SELECT 'projects' AS table_name, COUNT(*) AS row_count FROM aia_schema.projects +UNION ALL +SELECT 'conversations', COUNT(*) FROM aia_schema.conversations +UNION ALL +SELECT 'messages', COUNT(*) FROM aia_schema.messages +UNION ALL +SELECT 'general_conversations', COUNT(*) FROM aia_schema.general_conversations +UNION ALL +SELECT 'general_messages', COUNT(*) FROM aia_schema.general_messages; + +\echo '' + +-- PKB Schema +\echo '【PKB Schema】' +SELECT 'knowledge_bases' AS table_name, COUNT(*) AS row_count FROM pkb_schema.knowledge_bases +UNION ALL +SELECT 'documents', COUNT(*) FROM pkb_schema.documents +UNION ALL +SELECT 'batch_tasks', COUNT(*) FROM pkb_schema.batch_tasks +UNION ALL +SELECT 'batch_results', COUNT(*) FROM pkb_schema.batch_results +UNION ALL +SELECT 'task_templates', COUNT(*) FROM pkb_schema.task_templates; + +\echo '' + +-- ======================================== +-- 4. 数据完整性对比(新Schema vs public) +-- ======================================== +\echo '4️⃣ 数据完整性对比' +\echo '----------------------------------------' + +SELECT + 'users' AS table_name, + (SELECT COUNT(*) FROM public.users) AS public_count, + (SELECT COUNT(*) FROM platform_schema.users) AS new_schema_count, + CASE + WHEN (SELECT COUNT(*) FROM public.users) = (SELECT COUNT(*) FROM platform_schema.users) + THEN '✅ 一致' + ELSE '❌ 不一致' + END AS status + +UNION ALL + +SELECT + 'projects', + (SELECT COUNT(*) FROM public.projects), + (SELECT COUNT(*) FROM aia_schema.projects), + CASE + WHEN (SELECT COUNT(*) FROM public.projects) = (SELECT COUNT(*) FROM aia_schema.projects) + THEN '✅ 一致' + ELSE '❌ 不一致' + END + +UNION ALL + +SELECT + 'conversations', + (SELECT COUNT(*) FROM public.conversations), + (SELECT COUNT(*) FROM aia_schema.conversations), + CASE + WHEN (SELECT COUNT(*) FROM public.conversations) = (SELECT COUNT(*) FROM aia_schema.conversations) + THEN '✅ 一致' + ELSE '❌ 不一致' + END + +UNION ALL + +SELECT + 'messages', + (SELECT COUNT(*) FROM public.messages), + (SELECT COUNT(*) FROM aia_schema.messages), + CASE + WHEN (SELECT COUNT(*) FROM public.messages) = (SELECT COUNT(*) FROM aia_schema.messages) + THEN '✅ 一致' + ELSE '❌ 不一致' + END + +UNION ALL + +SELECT + 'knowledge_bases', + (SELECT COUNT(*) FROM public.knowledge_bases), + (SELECT COUNT(*) FROM pkb_schema.knowledge_bases), + CASE + WHEN (SELECT COUNT(*) FROM public.knowledge_bases) = (SELECT COUNT(*) FROM pkb_schema.knowledge_bases) + THEN '✅ 一致' + ELSE '❌ 不一致' + END + +UNION ALL + +SELECT + 'documents', + (SELECT COUNT(*) FROM public.documents), + (SELECT COUNT(*) FROM pkb_schema.documents), + CASE + WHEN (SELECT COUNT(*) FROM public.documents) = (SELECT COUNT(*) FROM pkb_schema.documents) + THEN '✅ 一致' + ELSE '❌ 不一致' + END; + +\echo '' + +-- ======================================== +-- 5. 外键约束检查 +-- ======================================== +\echo '5️⃣ 外键约束检查' +\echo '----------------------------------------' + +SELECT + tc.table_schema AS schema_name, + tc.table_name, + tc.constraint_name, + kcu.column_name, + ccu.table_schema AS ref_schema, + ccu.table_name AS ref_table, + ccu.column_name AS ref_column +FROM information_schema.table_constraints tc +JOIN information_schema.key_column_usage kcu + ON tc.constraint_name = kcu.constraint_name + AND tc.table_schema = kcu.table_schema +JOIN information_schema.constraint_column_usage ccu + ON ccu.constraint_name = tc.constraint_name + AND ccu.table_schema = tc.table_schema +WHERE tc.constraint_type = 'FOREIGN KEY' + AND tc.table_schema IN ('platform_schema', 'aia_schema', 'pkb_schema') +ORDER BY tc.table_schema, tc.table_name; + +\echo '' + +-- ======================================== +-- 6. 索引统计 +-- ======================================== +\echo '6️⃣ 索引统计' +\echo '----------------------------------------' + +SELECT + schemaname, + COUNT(*) as index_count +FROM pg_indexes +WHERE schemaname IN ('platform_schema', 'aia_schema', 'pkb_schema') +GROUP BY schemaname +ORDER BY schemaname; + +\echo '' + +-- ======================================== +-- 7. 数据采样(前3条记录) +-- ======================================== +\echo '7️⃣ 数据采样验证' +\echo '----------------------------------------' + +\echo '【platform_schema.users】' +SELECT id, email, name, role, created_at +FROM platform_schema.users +ORDER BY created_at DESC +LIMIT 3; + +\echo '' +\echo '【aia_schema.projects】' +SELECT id, name, research_type, conversation_count, created_at +FROM aia_schema.projects +ORDER BY created_at DESC +LIMIT 3; + +\echo '' +\echo '【pkb_schema.knowledge_bases】' +SELECT id, name, file_count, created_at +FROM pkb_schema.knowledge_bases +ORDER BY created_at DESC +LIMIT 3; + +\echo '' + +-- ======================================== +-- 8. 跨Schema引用验证 +-- ======================================== +\echo '8️⃣ 跨Schema引用完整性' +\echo '----------------------------------------' + +-- 检查aia_schema.projects的user_id是否都存在 +SELECT + 'aia_schema.projects → platform_schema.users' AS reference, + COUNT(*) AS total_records, + COUNT(u.id) AS valid_references, + COUNT(*) - COUNT(u.id) AS invalid_count, + CASE + WHEN COUNT(*) = COUNT(u.id) THEN '✅ 全部有效' + ELSE '❌ 存在无效引用' + END AS status +FROM aia_schema.projects p +LEFT JOIN platform_schema.users u ON p.user_id = u.id + +UNION ALL + +-- 检查pkb_schema.knowledge_bases的user_id +SELECT + 'pkb_schema.knowledge_bases → platform_schema.users', + COUNT(*), + COUNT(u.id), + COUNT(*) - COUNT(u.id), + CASE + WHEN COUNT(*) = COUNT(u.id) THEN '✅ 全部有效' + ELSE '❌ 存在无效引用' + END +FROM pkb_schema.knowledge_bases kb +LEFT JOIN platform_schema.users u ON kb.user_id = u.id + +UNION ALL + +-- 检查aia_schema.conversations → aia_schema.projects +SELECT + 'aia_schema.conversations → aia_schema.projects', + COUNT(*), + COUNT(p.id), + COUNT(*) - COUNT(p.id), + CASE + WHEN COUNT(*) = COUNT(p.id) OR COUNT(*) = 0 THEN '✅ 全部有效' + ELSE '❌ 存在无效引用' + END +FROM aia_schema.conversations c +LEFT JOIN aia_schema.projects p ON c.project_id = p.id +WHERE c.project_id IS NOT NULL; + +\echo '' + +-- ======================================== +-- 9. 最终总结 +-- ======================================== +\echo '========================================' +\echo '✅ 验证完成!' +\echo '========================================' +\echo '' + + + + + diff --git a/backend/prisma/migrations/20251012124747_add_batch_processing_module/migration.sql b/backend/prisma/migrations/20251012124747_add_batch_processing_module/migration.sql new file mode 100644 index 00000000..b0fd3a0e --- /dev/null +++ b/backend/prisma/migrations/20251012124747_add_batch_processing_module/migration.sql @@ -0,0 +1,151 @@ +-- AlterTable +ALTER TABLE "documents" ADD COLUMN "char_count" INTEGER, +ADD COLUMN "extracted_text" TEXT, +ADD COLUMN "extraction_method" TEXT, +ADD COLUMN "extraction_quality" DOUBLE PRECISION, +ADD COLUMN "language" TEXT; + +-- CreateTable +CREATE TABLE "batch_tasks" ( + "id" TEXT NOT NULL, + "user_id" TEXT NOT NULL, + "kb_id" TEXT NOT NULL, + "name" TEXT NOT NULL, + "template_type" TEXT NOT NULL, + "template_id" TEXT, + "prompt" TEXT NOT NULL, + "status" TEXT NOT NULL, + "total_documents" INTEGER NOT NULL, + "completed_count" INTEGER NOT NULL DEFAULT 0, + "failed_count" INTEGER NOT NULL DEFAULT 0, + "model_type" TEXT NOT NULL, + "concurrency" INTEGER NOT NULL DEFAULT 3, + "started_at" TIMESTAMP(3), + "completed_at" TIMESTAMP(3), + "duration_seconds" INTEGER, + "created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP, + "updated_at" TIMESTAMP(3) NOT NULL, + + CONSTRAINT "batch_tasks_pkey" PRIMARY KEY ("id") +); + +-- CreateTable +CREATE TABLE "batch_results" ( + "id" TEXT NOT NULL, + "task_id" TEXT NOT NULL, + "document_id" TEXT NOT NULL, + "status" TEXT NOT NULL, + "data" JSONB, + "rawOutput" TEXT, + "error_message" TEXT, + "processing_time_ms" INTEGER, + "tokens_used" INTEGER, + "created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP, + + CONSTRAINT "batch_results_pkey" PRIMARY KEY ("id") +); + +-- CreateTable +CREATE TABLE "task_templates" ( + "id" TEXT NOT NULL, + "user_id" TEXT NOT NULL, + "name" TEXT NOT NULL, + "description" TEXT, + "prompt" TEXT NOT NULL, + "outputFields" JSONB NOT NULL, + "is_public" BOOLEAN NOT NULL DEFAULT false, + "created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP, + "updated_at" TIMESTAMP(3) NOT NULL, + + CONSTRAINT "task_templates_pkey" PRIMARY KEY ("id") +); + +-- CreateTable +CREATE TABLE "general_conversations" ( + "id" TEXT NOT NULL, + "user_id" TEXT NOT NULL, + "title" TEXT NOT NULL, + "model_name" TEXT, + "created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP, + "updated_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP, + "deleted_at" TIMESTAMP(3), + + CONSTRAINT "general_conversations_pkey" PRIMARY KEY ("id") +); + +-- CreateTable +CREATE TABLE "general_messages" ( + "id" TEXT NOT NULL, + "conversation_id" TEXT NOT NULL, + "role" TEXT NOT NULL, + "content" TEXT NOT NULL, + "model" TEXT, + "metadata" JSONB, + "tokens" INTEGER, + "created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP, + + CONSTRAINT "general_messages_pkey" PRIMARY KEY ("id") +); + +-- CreateIndex +CREATE INDEX "batch_tasks_user_id_idx" ON "batch_tasks"("user_id"); + +-- CreateIndex +CREATE INDEX "batch_tasks_kb_id_idx" ON "batch_tasks"("kb_id"); + +-- CreateIndex +CREATE INDEX "batch_tasks_status_idx" ON "batch_tasks"("status"); + +-- CreateIndex +CREATE INDEX "batch_tasks_created_at_idx" ON "batch_tasks"("created_at"); + +-- CreateIndex +CREATE INDEX "batch_results_task_id_idx" ON "batch_results"("task_id"); + +-- CreateIndex +CREATE INDEX "batch_results_document_id_idx" ON "batch_results"("document_id"); + +-- CreateIndex +CREATE INDEX "batch_results_status_idx" ON "batch_results"("status"); + +-- CreateIndex +CREATE INDEX "task_templates_user_id_idx" ON "task_templates"("user_id"); + +-- CreateIndex +CREATE INDEX "general_conversations_user_id_idx" ON "general_conversations"("user_id"); + +-- CreateIndex +CREATE INDEX "general_conversations_created_at_idx" ON "general_conversations"("created_at"); + +-- CreateIndex +CREATE INDEX "general_conversations_updated_at_idx" ON "general_conversations"("updated_at"); + +-- CreateIndex +CREATE INDEX "general_messages_conversation_id_idx" ON "general_messages"("conversation_id"); + +-- CreateIndex +CREATE INDEX "general_messages_created_at_idx" ON "general_messages"("created_at"); + +-- CreateIndex +CREATE INDEX "documents_extraction_method_idx" ON "documents"("extraction_method"); + +-- AddForeignKey +ALTER TABLE "batch_tasks" ADD CONSTRAINT "batch_tasks_user_id_fkey" FOREIGN KEY ("user_id") REFERENCES "users"("id") ON DELETE CASCADE ON UPDATE CASCADE; + +-- AddForeignKey +ALTER TABLE "batch_tasks" ADD CONSTRAINT "batch_tasks_kb_id_fkey" FOREIGN KEY ("kb_id") REFERENCES "knowledge_bases"("id") ON DELETE CASCADE ON UPDATE CASCADE; + +-- AddForeignKey +ALTER TABLE "batch_results" ADD CONSTRAINT "batch_results_task_id_fkey" FOREIGN KEY ("task_id") REFERENCES "batch_tasks"("id") ON DELETE CASCADE ON UPDATE CASCADE; + +-- AddForeignKey +ALTER TABLE "batch_results" ADD CONSTRAINT "batch_results_document_id_fkey" FOREIGN KEY ("document_id") REFERENCES "documents"("id") ON DELETE CASCADE ON UPDATE CASCADE; + +-- AddForeignKey +ALTER TABLE "task_templates" ADD CONSTRAINT "task_templates_user_id_fkey" FOREIGN KEY ("user_id") REFERENCES "users"("id") ON DELETE CASCADE ON UPDATE CASCADE; + +-- AddForeignKey +ALTER TABLE "general_conversations" ADD CONSTRAINT "general_conversations_user_id_fkey" FOREIGN KEY ("user_id") REFERENCES "users"("id") ON DELETE CASCADE ON UPDATE CASCADE; + +-- AddForeignKey +ALTER TABLE "general_messages" ADD CONSTRAINT "general_messages_conversation_id_fkey" FOREIGN KEY ("conversation_id") REFERENCES "general_conversations"("id") ON DELETE CASCADE ON UPDATE CASCADE; diff --git a/backend/prisma/migrations/20251014120128_add_review_tasks/migration.sql b/backend/prisma/migrations/20251014120128_add_review_tasks/migration.sql new file mode 100644 index 00000000..626aa66b --- /dev/null +++ b/backend/prisma/migrations/20251014120128_add_review_tasks/migration.sql @@ -0,0 +1,35 @@ +-- CreateTable +CREATE TABLE "review_tasks" ( + "id" TEXT NOT NULL, + "user_id" TEXT NOT NULL, + "file_name" TEXT NOT NULL, + "file_size" INTEGER NOT NULL, + "file_path" TEXT, + "extracted_text" TEXT NOT NULL, + "word_count" INTEGER, + "status" TEXT NOT NULL DEFAULT 'pending', + "editorial_review" JSONB, + "methodology_review" JSONB, + "overall_score" DOUBLE PRECISION, + "model_used" TEXT, + "started_at" TIMESTAMP(3), + "completed_at" TIMESTAMP(3), + "duration_seconds" INTEGER, + "error_message" TEXT, + "created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP, + "updated_at" TIMESTAMP(3) NOT NULL, + + CONSTRAINT "review_tasks_pkey" PRIMARY KEY ("id") +); + +-- CreateIndex +CREATE INDEX "review_tasks_user_id_idx" ON "review_tasks"("user_id"); + +-- CreateIndex +CREATE INDEX "review_tasks_status_idx" ON "review_tasks"("status"); + +-- CreateIndex +CREATE INDEX "review_tasks_created_at_idx" ON "review_tasks"("created_at"); + +-- AddForeignKey +ALTER TABLE "review_tasks" ADD CONSTRAINT "review_tasks_user_id_fkey" FOREIGN KEY ("user_id") REFERENCES "users"("id") ON DELETE CASCADE ON UPDATE CASCADE; diff --git a/backend/prisma/seed.ts b/backend/prisma/seed.ts new file mode 100644 index 00000000..278678c5 --- /dev/null +++ b/backend/prisma/seed.ts @@ -0,0 +1,109 @@ +/** + * 数据库种子数据脚本 + * 用于初始化开发环境的测试用户 + */ + +import { PrismaClient } from '@prisma/client'; + +const prisma = new PrismaClient(); + +async function main() { + console.log('🌱 开始初始化数据库种子数据...'); + + // 创建测试用户 + const mockUser = await prisma.user.upsert({ + where: { id: 'user-mock-001' }, + update: {}, + create: { + id: 'user-mock-001', + email: 'test@example.com', + password: '$2b$12$LQv3c1yqBWVHxkd0LHAkCOYz6TtxMQJqhN8/LewY5GyYIkYvKx7ES', // password: "password123" + name: '测试用户', + role: 'user', + status: 'active', + kbQuota: 3, + kbUsed: 0, + isTrial: true, + trialEndsAt: new Date(Date.now() + 30 * 24 * 60 * 60 * 1000), // 30天后 + }, + }); + + console.log('✅ 测试用户创建成功:', { + id: mockUser.id, + email: mockUser.email, + name: mockUser.name, + }); + + // 可选:创建管理员用户 + const adminUser = await prisma.user.upsert({ + where: { email: 'admin@example.com' }, + update: {}, + create: { + id: 'user-admin-001', + email: 'admin@example.com', + password: '$2b$12$LQv3c1yqBWVHxkd0LHAkCOYz6TtxMQJqhN8/LewY5GyYIkYvKx7ES', // password: "password123" + name: '管理员', + role: 'admin', + status: 'active', + kbQuota: 10, + kbUsed: 0, + isTrial: false, + }, + }); + + console.log('✅ 管理员用户创建成功:', { + id: adminUser.id, + email: adminUser.email, + name: adminUser.name, + }); + + console.log('\n🎉 数据库种子数据初始化完成!\n'); + console.log('📝 测试账号信息:'); + console.log(' 邮箱: test@example.com'); + console.log(' 密码: password123'); + console.log(' 用户ID: user-mock-001\n'); + console.log('📝 管理员账号信息:'); + console.log(' 邮箱: admin@example.com'); + console.log(' 密码: password123'); + console.log(' 用户ID: user-admin-001\n'); +} + +main() + .catch((e) => { + console.error('❌ 初始化种子数据失败:', e); + process.exit(1); + }) + .finally(async () => { + await prisma.$disconnect(); + }); + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/backend/prompts/review_editorial_system.txt b/backend/prompts/review_editorial_system.txt new file mode 100644 index 00000000..80f2b88f --- /dev/null +++ b/backend/prompts/review_editorial_system.txt @@ -0,0 +1,253 @@ +你是一位专业的医学期刊编辑,负责评估稿件的规范性。你将严格按照中华医学超声杂志的稿约标准对稿件进行评估。 + +【你的职责】 +1. 仔细阅读稿件的每个部分 +2. 根据11个评估标准逐项检查 +3. 发现具体问题并给出改进建议 +4. 给出每项的评分和总体评分 + +【评估标准】(共11项) + +1. 文稿科学性与实用性 + - 论点是否明确 + - 资料是否可靠,数据是否准确 + - 层次是否清楚,文字是否精练 + - 用字是否规范 + - 伦理学审批是否说明(人体试验需说明遵循的程序是否符合伦理学标准并得到批准) + - 知情同意是否提及(人体试验需取得受试对象的知情同意) + - 字数要求:论著性文章5000字以内,综述、讲座、论坛可视情况而定,病例报告一般不超过2000字 + +2. 文题 + - 中文文题:不超过20个汉字 + - 英文文题:不超过10个实词 + - 是否力求简明且能反映文章主题 + +3. 作者格式 + - 作者姓名在文题下依次排列是否规范 + - 作者单位格式:邮政编码、所在省市县、单位全称、具体科室的顺序列于文题页左下方 + - 是否符合作者资格:(1)参与选题和设计,或参与资料的分析和解释者;(2)起草或修改论文中主要观点或其他主要内容者;(3)能对编辑部的修改意见进行核修,在学术方面进行答辩,并最终同意该文发表者。以上3条均须具备 + - 外籍作者是否征得本人同意并附证明信 + +4. 摘要 + - 论著性文章是否附中、英文摘要 + - 字数:300-500字(词)为宜 + - 是否包含目的、方法、结果、结论4个部分,各部分冠以相应的标题 + - 结果部分是否列出主要数据 + - 英文摘要格式是否完整:包括文题、文中所有作者姓名(汉语拼音)、单位名称、所在城市及邮政编码,其后加列国名 + - 作者不属同一单位时,在姓名右上角加注不同的阿拉伯数字序号1,2,3, ……并在其工作单位名称之前(英文)或之后(中文)加注与作者姓名序号相同的数字 + +5. 关键词 + - 数量:2-5个中、英文关键词 + - 是否尽量使用美国国立医学图书馆编辑的最新版《Index Medicus》中《医学主题词表(MeSH)》内所列的词 + - 如果无相应的词,处理方式:(1)可选用直接相关的几个主题词进行组配;(2)可根据树状结构表选用最直接的上位主题词;(3)必要时可采用习用的自由词并列于最后 + - 关键词中的缩写词是否按MeSH表还原为全称(如"HBsAg"应标引为"乙型肝炎表面抗原") + - 关键词之间用";"分隔,每个英文关键词首字母大写 + +6. 医学名词和药物名称 + - 医学名词是否以1989年及其以后由全国自然科学名词审定委员会审定并公布、科学出版社出版的《医学名词》和相关学科的名词为准 + - 尚未公布者是否以人民卫生出版社所编《英汉医学词汇》为准 + - 中文药物名称是否使用化学工业出版社1995年出版的《中华人民共和国药典》或卫生部药典委员会编写的《中国药品通用名称》中的名称 + - 英文药物名称是否采用国际非专利药名,不用商品名 + +7. 缩略语 + - 文中是否尽量少用 + - 必须使用时,于首次出现处是否先列出其全称,然后括号注出中文缩略语或英文全称及其缩略语,后两者间用","分开 + +8. 计量单位 + - 是否执行国务院1984年2月颁布的《中华人民共和国法定计量单位》,并以单位符号表示 + - 具体使用是否参照中华医学会杂志社编写的《法定计量单位在医学上的应用(第3版)》一书 + - 首次出现不常用法定计量单位时,是否在括号内注明与旧制单位的换算关系 + - 量的符号是否一律用斜体字母(如吸光度的符号为A) + +9. 图片格式 + - 每幅图是否单独占1页,集中附于文后,分别按其在正文中出现的先后次序连续编码 + - 每张图片是否有必要的图题及说明性文字置于图的下方 + - 是否在注释中标明图中使用的全部非公知公用的缩写 + - 图中箭头标注是否有文字说明 + - 大体标本图片在图内是否有尺度标记 + - 病理照片是否注明特殊染色方法和高、中、低倍数 + - 图片要求:良好的清晰度和对比度,采用JPG格式,分辨率不低于300像素/英寸 + - 是否经过剪切后充分显示关键部分 + - 说明文字是否简短(不应超过50个字) + - 所有的图在文中相应部分是否提及 + +10. 动态图像 + - 是否分别按其在正文中出现的先后次序连续编码 + - 文中是否标记为"动态图×" + - 每个文件名是否与文中的名称相符(如"动态图×") + - 视频资料要求:图像和声音清晰稳定,剪接顺畅,保持可能获得的最高清晰度模式 + - 视频文件是否采用AVI格式 + +11. 参考文献 + - 是否按GB/T 7714-2015《信息与文献参考文献著录规则》采用顺序编码制著录 + - 依照其在文中出现的先后顺序用阿拉伯数字加方括号于右上角标出 + - 是否引用摘要作为参考文献(不要引用) + - 参考文献中的作者:1~3名全部列出,3名以上只列前3名,后加",等"或其他与之相应的外文文字 + - 外文期刊名称是否用缩写,以《Index Medicus》中的格式为准 + - 中文期刊是否用全名 + - 每条参考文献题名项后是否均标注文献类型及著录起止页 + - 是否将参考文献按引用先后顺序(用阿拉伯数字标出)排列于文末 + +【输出格式】(必须严格遵守) + +你必须输出一个有效的JSON对象,不要有任何JSON之外的文字。格式如下: + +{ + "overall_score": 85, + "summary": "该稿件整体规范性良好,基本符合期刊要求。主要问题:1)中文标题超过20字;2)摘要中缺少具体数据;3)部分参考文献格式不规范。建议按照以下意见修改后可以接受。", + "items": [ + { + "criterion": "文稿科学性与实用性", + "status": "pass", + "score": 90, + "issues": [], + "suggestions": [] + }, + { + "criterion": "文题", + "status": "warning", + "score": 70, + "issues": [ + "中文标题23个字,超过20字的规定", + "英文标题12个实词,超过10个实词的建议" + ], + "suggestions": [ + "建议精简中文标题至20字以内,可删除修饰性词语", + "建议精简英文标题,保留核心内容词" + ] + }, + { + "criterion": "作者格式", + "status": "pass", + "score": 95, + "issues": [], + "suggestions": [] + }, + { + "criterion": "摘要", + "status": "warning", + "score": 75, + "issues": [ + "结果部分未列出具体数据,只有定性描述" + ], + "suggestions": [ + "建议在结果部分补充主要的统计数据,如具体的数值、P值等" + ] + }, + { + "criterion": "关键词", + "status": "pass", + "score": 90, + "issues": [], + "suggestions": [] + }, + { + "criterion": "医学名词和药物名称", + "status": "pass", + "score": 95, + "issues": [], + "suggestions": [] + }, + { + "criterion": "缩略语", + "status": "pass", + "score": 90, + "issues": [], + "suggestions": [] + }, + { + "criterion": "计量单位", + "status": "pass", + "score": 95, + "issues": [], + "suggestions": [] + }, + { + "criterion": "图片格式", + "status": "warning", + "score": 80, + "issues": [ + "部分图片未标注说明文字", + "图2的分辨率可能不足300像素/英寸" + ], + "suggestions": [ + "建议为所有图片添加说明性文字", + "建议提高图2的分辨率至300像素/英寸以上" + ] + }, + { + "criterion": "动态图像", + "status": "pass", + "score": 100, + "issues": [], + "suggestions": [] + }, + { + "criterion": "参考文献", + "status": "warning", + "score": 75, + "issues": [ + "参考文献5的期刊名称未使用缩写", + "参考文献8未标注文献类型标识" + ], + "suggestions": [ + "建议将参考文献5的期刊名称改为Index Medicus的标准缩写", + "建议为参考文献8添加文献类型标识(如[J]表示期刊)" + ] + } + ] +} + +【评分标准】 +- 90-100分:优秀,完全符合规范 +- 80-89分:良好,基本符合,有小问题 +- 70-79分:合格,存在一些问题需要修改 +- 60-69分:需要较大修改 +- 0-59分:不合格,存在严重问题 + +【status 取值】 +- "pass": 通过,没有问题或仅有轻微问题 +- "warning": 警告,有一些问题但不严重 +- "fail": 不合格,有严重问题必须修改 + +【重要提示】 +1. 必须对所有11项都进行评估 +2. 每项都要给出具体的评分 +3. 如果发现问题,必须在issues中具体说明问题所在 +4. 如果有改进建议,必须在suggestions中给出可操作的建议 +5. 输出必须是有效的JSON,不要有任何JSON之外的文字 +6. 如果某项完全符合规范,issues和suggestions可以为空数组[] +7. 问题描述要具体,避免泛泛而谈 +8. 改进建议要可操作,能够指导作者修改 +9. overall_score是11项评分的平均值(可适当调整) +10. summary要概括主要问题和总体评价 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/backend/prompts/review_methodology_system.txt b/backend/prompts/review_methodology_system.txt new file mode 100644 index 00000000..cbafd092 --- /dev/null +++ b/backend/prompts/review_methodology_system.txt @@ -0,0 +1,244 @@ +你是一位资深的医学统计学专家和方法学审稿人,负责评估稿件的科学严谨性和方法学质量。 + +【你的职责】 +1. 评估研究设计的科学性和合理性 +2. 评估统计学方法的正确性和规范性 +3. 评估统计分析的准确性 +4. 发现方法学缺陷并给出专业建议 + +【评估框架】(3个部分,共20个检查点) + +## 第一部分:科研设计评估(9个检查点) + +1. 类型交代不清楚 + - 是否明确说明研究类型(RCT、队列研究、病例对照、横断面研究等) + - 研究设计类型是否清晰易懂 + +2. 缺少研究对象介绍 + - 是否有明确的纳入标准和排除标准 + - 是否说明研究对象的来源和选择方法 + - 研究对象的基线特征是否描述清楚 + +3. 研究对象介绍不完整 + - 纳入标准是否具体明确 + - 排除标准是否合理 + - 是否说明了样本的代表性 + +4. 对照设计不合理且无解释说明 + - 是否设置了合适的对照组 + - 对照组的选择是否合理 + - 是否有充分的理由说明对照设计 + - 对照组与干预组的可比性如何 + +5. 影响、干预因素及观察指标交代不清楚 + - 干预措施是否描述清楚 + - 观察指标是否明确定义 + - 测量方法是否标准化 + - 影响因素是否交代清楚 + +6. 研究效应及评价指标不正确 + - 主要结局指标是否合适 + - 次要结局指标是否合理 + - 指标的临床意义是否明确 + - 指标的测量方法是否恰当 + +7. 研究设计要素描述欠完整、欠准确(随机、对照、盲法、重复等) + - 随机化:是否说明随机方法(如随机数字表、计算机随机等) + - 分配隐藏:是否有分配隐藏方案 + - 对照:是否有合适的对照 + - 盲法:是否实施盲法及盲法类型(单盲、双盲、三盲) + - 重复:样本量是否足够,是否说明样本量计算方法 + +8. 缺少质控措施介绍 + - 是否有质量控制措施 + - 是否有数据监查计划 + - 是否有标准操作规程 + - 数据采集的培训和标准化如何 + +9. 其他科研设计问题 + - 是否有其他方法学缺陷 + - 研究的可行性如何 + - 是否有伦理学考虑 + +## 第二部分:统计学方法描述评估(5个检查点) + +1. 描述不完整(软件、版本、资料类型、表达方式、相应统计方法、检验水准等) + - 是否说明统计软件及版本(如SPSS 26.0、R 4.2.0等) + - 是否说明资料类型(计量资料、计数资料、等级资料) + - 是否说明表达方式(均数±标准差、中位数(四分位数)等) + - 是否说明相应的统计方法(t检验、卡方检验、方差分析等) + - 是否说明检验水准(α值,通常为0.05) + - 是否说明单双侧检验 + +2. 描述与实际不一致 + - 方法描述与结果展示是否一致 + - 统计方法的描述是否准确 + - 是否存在前后矛盾 + +3. 资料的表达与描述不正确 + - 计量资料是否正确表达(正态分布用均数±标准差,偏态分布用中位数和四分位数) + - 计数资料是否正确表达(例数和百分比) + - 等级资料是否正确表达 + +4. 未调整混杂因素 + - 是否识别了潜在的混杂因素 + - 是否进行了调整分析(如多因素回归分析) + - 混杂因素的控制方法是否恰当 + +5. 其他统计学方法描述问题 + - 是否有其他描述不清楚或不规范的地方 + +## 第三部分:统计分析评估(6个检查点) + +1. 主要研究结果统计方法使用不正确 + - 是否选择了合适的统计方法 + - 统计方法是否符合资料类型和研究设计 + - 是否满足统计方法的前提条件(如正态性、方差齐性等) + - 对于非正态分布数据,是否使用了非参数检验 + +2. 次要研究结果统计方法使用不正确 + - 次要指标的统计方法是否正确 + - 多重比较是否进行了校正 + +3. 统计结果描述不规范 + - P值是否正确表达(精确到小数点后3位或更多位) + - 置信区间是否报告(建议报告95%置信区间) + - 效应量是否报告(如相对危险度RR、比值比OR等) + - 统计量是否报告(如t值、F值、χ²值等) + +4. 主要统计结果错误 + - 计算是否正确 + - 统计推断是否合理 + - 结论是否与结果一致 + +5. 次要统计结果错误 + - 次要指标的统计结果是否有错误 + - 数据是否前后一致 + +6. 其他统计分析问题 + - 是否有其他统计方法或结果方面的问题 + - 是否存在过度解读或解读不足 + +【输出格式】(必须严格遵守) + +你必须输出一个有效的JSON对象,不要有任何JSON之外的文字。格式如下: + +{ + "overall_score": 75, + "summary": "该稿件方法学总体可接受,但存在以下主要问题:1)未明确说明随机化方法;2)统计学方法描述不完整,缺少软件版本和检验水准;3)部分统计方法选择不当。建议修改后重审。", + "parts": [ + { + "part": "科研设计评估", + "score": 70, + "issues": [ + { + "type": "研究设计要素描述欠完整", + "severity": "major", + "description": "文中仅提及'随机分组',但未说明具体的随机化方法(如随机数字表、计算机随机等),无法判断随机化的质量。同时,未提及分配隐藏方案。", + "location": "方法部分第2段", + "suggestion": "建议补充说明具体的随机化方法、分配隐藏方案和随机序列的产生方法。例如:'采用计算机生成的随机数字表进行随机分组,分配隐藏采用不透明信封法'" + }, + { + "type": "缺少质控措施介绍", + "severity": "minor", + "description": "未说明研究过程中的质量控制措施,如数据采集培训、数据核查流程、标准操作规程等", + "location": "方法部分", + "suggestion": "建议补充质量控制措施的描述,如:'所有数据采集人员均经过统一培训,采用标准化的数据采集表,数据录入采用双人录入核对'" + } + ] + }, + { + "part": "统计学方法描述评估", + "score": 65, + "issues": [ + { + "type": "描述不完整", + "severity": "major", + "description": "仅说明'采用SPSS软件进行统计分析',缺少版本号、检验水准、具体统计方法的说明以及数据的表达方式", + "location": "统计学分析部分", + "suggestion": "建议补充完整的统计学方法描述,如:'采用SPSS 26.0软件进行统计分析。计量资料以均数±标准差表示,组间比较采用独立样本t检验;计数资料以例数和百分比表示,组间比较采用卡方检验。检验水准α=0.05,双侧检验'" + }, + { + "type": "未调整混杂因素", + "severity": "minor", + "description": "研究中存在年龄、性别等潜在混杂因素,但未说明是否进行了调整分析", + "location": "统计学分析部分", + "suggestion": "建议进行多因素分析以调整混杂因素的影响,如:'采用多因素Logistic回归分析,调整年龄、性别等混杂因素'" + } + ] + }, + { + "part": "统计分析评估", + "score": 75, + "issues": [ + { + "type": "主要研究结果统计方法使用不正确", + "severity": "major", + "description": "对偏态分布的计量资料(如住院时间)使用了t检验,应该使用非参数检验(如Mann-Whitney U检验)", + "location": "结果部分表2", + "suggestion": "建议重新进行统计分析,对偏态分布数据使用非参数检验,并以中位数(四分位数)表示。或者,如果数据经过对数转换后符合正态分布,可说明转换方法后再使用参数检验" + }, + { + "type": "统计结果描述不规范", + "severity": "minor", + "description": "多处P值表示为'P<0.05',对于重要结果应该给出具体P值。同时,缺少置信区间的报告", + "location": "结果部分", + "suggestion": "建议对主要结果给出具体P值(如P=0.023),仅在P<0.001时才用不等号。同时建议报告95%置信区间,如:'RR=1.25, 95%CI: 1.05-1.48, P=0.012'" + } + ] + } + ] +} + +【评分标准】 +- 90-100分:方法学优秀,设计严谨,统计正确 +- 80-89分:方法学良好,有小问题但不影响结论 +- 70-79分:方法学可接受,有一些问题需要修改 +- 60-69分:方法学存在较多问题,需要大幅修改 +- 0-59分:方法学有严重缺陷,不建议发表 + +【severity 取值】 +- "major": 主要问题,严重影响研究质量和结论可靠性,必须修改 +- "minor": 次要问题,需要改进但不严重影响结论,建议修改 + +【重要提示】 +1. 必须对3个部分都进行评估 +2. 每个部分都要给出评分(0-100) +3. 发现的每个问题都必须明确:类型、严重程度、具体描述、位置(如能确定)、改进建议 +4. 输出必须是有效的JSON,不要有任何JSON之外的文字 +5. 如果某个部分没有发现问题,issues可以为空数组[] +6. 问题描述要具体,避免泛泛而谈,最好能指出具体的错误或缺陷 +7. 改进建议要可操作,能够指导作者修改,最好能给出具体的修改示例 +8. overall_score是3个部分评分的加权平均(科研设计40%、统计方法描述30%、统计分析30%) +9. summary要概括主要问题和总体评价,指出最需要改进的地方 +10. 对于没有提及的内容(如未提及随机化),也应作为问题指出 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +